## Warning: package 'reshape2' was built under R version 3.4.4
## Warning: package 'knitr' was built under R version 3.4.4
This is an individual in-class exercise. At the end of the exercise, please upload html outputs to your Progress Journals.
max_score
programs from each exam_type
.library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## <U+221A> tibble 1.4.2 <U+221A> purrr 0.2.4
## <U+221A> tidyr 0.8.0 <U+221A> dplyr 0.7.4
## <U+221A> readr 1.1.1 <U+221A> stringr 1.2.0
## <U+221A> tibble 1.4.2 <U+221A> forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'readr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## Warning: package 'forcats' was built under R version 3.4.3
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
load("C:/Users/yagizbarali/Desktop/osym_data_2017_v2.RData")
head(osym_data_2017)
## # A tibble: 6 x 14
## program_id university_name city faculty_name program_name exam_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 100110266 ABANT İZZET BAY~ BOLU Bolu Sağlık Y~ Hemşirelik YGS_2
## 2 100110487 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Gastronomi v~ YGS_4
## 3 100110724 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Turizm İşlet~ YGS_6
## 4 100130252 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Turizm İşlet~ YGS_6
## 5 100110433 ABANT İZZET BAY~ BOLU Diş Hekimliği~ Diş Hekimliği MF_3
## 6 100110609 ABANT İZZET BAY~ BOLU Diş Hekimliği~ Diş Hekimliğ~ MF_3
## # ... with 8 more variables: general_quota <chr>, general_placement <chr>,
## # min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## # val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
osym_data_2017 %>% group_by(exam_type) %>% summarise(max_of_max = max(max_score))
## # A tibble: 19 x 2
## exam_type max_of_max
## <chr> <dbl>
## 1 DİL_1 543.
## 2 DİL_2 485.
## 3 DİL_3 490.
## 4 MF 408.
## 5 MF_1 537.
## 6 MF_2 527.
## 7 MF_3 564.
## 8 MF_4 575.
## 9 TM_1 561.
## 10 TM_2 458.
## 11 TM_3 558.
## 12 TS_1 527.
## 13 TS_2 536.
## 14 YGS_1 516.
## 15 YGS_2 473.
## 16 YGS_3 315.
## 17 YGS_4 466.
## 18 YGS_5 419.
## 19 YGS_6 519.
colnames(osym_data_2017)
## [1] "program_id" "university_name" "city"
## [4] "faculty_name" "program_name" "exam_type"
## [7] "general_quota" "general_placement" "min_score"
## [10] "max_score" "val_quota" "val_placement"
## [13] "val_min_score" "val_max_score"
library(ggplot2)
osym_data_2017 <- osym_data_2017 %>% mutate(general_quota = as.numeric(general_quota),
general_placement = as.numeric(general_placement))
## Warning: package 'bindrcpp' was built under R version 3.4.3
top_10_iu <- osym_data_2017 %>% filter(university_name == "İSTANBUL ÜNİVERSİTESİ") %>%
arrange(desc(general_quota)) %>% slice(1:10)
ggplot(data = top_10_iu) + geom_bar(aes(x = reorder(program_name,
-general_quota), y = general_quota), stat = "identity") +
theme(axis.text.x = element_text(angle = 90))
general_placement
)/sum(general_quota
)) per city and return the top 10 cities.osym_data_2017 %>% group_by(city) %>% summarise(fillrate = sum(general_placement)/sum(general_quota)) %>%
arrange(desc(fillrate)) %>% slice(1:10)
## # A tibble: 10 x 2
## city fillrate
## <chr> <dbl>
## 1 GEBZE 1.02
## 2 TEKİRDAĞ 1.01
## 3 MANİSA 1.01
## 4 KOCAELİ 1.00
## 5 BURSA 1.00
## 6 SUMGAYIT - AZERBAYCAN 1.00
## 7 DİYARBAKIR 0.999
## 8 DENİZLİ 0.998
## 9 EDİRNE 0.998
## 10 SAKARYA 0.997
general_placement
== general_quota
) Endüstri Mühendisliği programs (use grepl
) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha
) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr
function.).osym_data_2017 %>% mutate(prg = ifelse(substr(program_id, 1,
1) == 1, "STATE", ifelse(substr(program_id, 1, 1) == 2, "FOUNDATION",
"OTHER"))) %>% filter("Endüstri Mühendisliği" == substr(program_name,
1, 21)) %>% ggplot() + geom_point(aes(x = min_score, y = max_score,
color = prg), alpha = 0.7)
library(sqldf)
## Warning: package 'sqldf' was built under R version 3.4.4
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.4.4
## Loading required package: proto
## Warning: package 'proto' was built under R version 3.4.4
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.4.4
library(reshape2)
library(ggplot2)
library(DT)
## Warning: package 'DT' was built under R version 3.4.4
quota0 <- sqldf("select faculty_name,general_quota from osym_data_2017 order by general_quota desc")
quota0$general_quota <- as.numeric(quota0$general_quota)
quota1 <- sqldf("select faculty_name,max(general_quota) as general_quota from quota0 group by faculty_name")
quota1 <- sqldf("select * from quota1 order by general_quota desc")
top_ten1 <- head(quota1, 10)
datatable(top_ten1)
geom_crossbar
)medicine <- osym_data_2017 %>% filter(grepl("Tıp", program_name)) %>%
filter(!grepl("Mühendis", program_name)) %>% filter(substr(program_id,
1, 1) == 1) %>% group_by(university_name) %>% summarize(total_quota = sum(general_quota),
maximum_score = max(max_score), minimum_score = min(min_score)) %>%
ungroup() %>% arrange(desc(total_quota))
ggplot(data = medicine) + geom_crossbar(aes(x = reorder(university_name,
-total_quota), y = ((maximum_score + minimum_score)/2), ymax = maximum_score,
ymin = minimum_score, color = total_quota)) + labs(y = "Max and Min Scores") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1)) +
xlab("University")
Mech <- osym_data_2017 %>% filter(grepl("Makine Mühendisliği",
program_name))
Top10 <- Mech %>% mutate(avg_score = (min_score + max_score)/2) %>%
arrange(desc(avg_score)) %>% slice(1:10)
Civil <- osym_data_2017 %>% filter(grepl("İnşaat Mühendisliği",
program_name))
Civil_top10 <- Civil %>% mutate(avg_score = (min_score + max_score)/2) %>%
arrange(desc(avg_score)) %>% slice(1:10)
ggplot(data = Top10) + geom_bar(aes(x = reorder(university_name,
-avg_score), y = avg_score), stat = "identity") + theme(axis.text.x = element_text(angle = 90))
osym_data_2017 %>% filter(grepl("BİLKENT", university_name) |
grepl("KOÇ ÜNİ", university_name)) %>% group_by(university_name,
faculty_name) %>% summarize(avg_min = mean(min_score), avg_max = mean(max_score))
## # A tibble: 15 x 4
## # Groups: university_name [?]
## university_name faculty_name avg_min avg_max
## <chr> <chr> <dbl> <dbl>
## 1 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Fen Fakültesi 393. 454.
## 2 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Güzel Sanatlar Ta~ 360. 408.
## 3 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Hukuk Fakültesi 477. 515.
## 4 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ İktisadi, İdari v~ 390. 439.
## 5 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ İnsani Bilimler v~ 403. 457.
## 6 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ İşletme Fakültesi 392. 445.
## 7 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Mühendislik Fakül~ 467. 511.
## 8 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Uygulamalı Teknol~ 328. 422.
## 9 KOÇ ÜNİVERSİTESİ Fen Fakültesi 456. 491.
## 10 KOÇ ÜNİVERSİTESİ Hemşirelik Fakült~ 376. 432.
## 11 KOÇ ÜNİVERSİTESİ Hukuk Fakültesi 472. 528.
## 12 KOÇ ÜNİVERSİTESİ İktisadi ve İdari~ 435. 490.
## 13 KOÇ ÜNİVERSİTESİ İnsani Bilimler v~ 452. 487.
## 14 KOÇ ÜNİVERSİTESİ Mühendislik Fakül~ 462. 511.
## 15 KOÇ ÜNİVERSİTESİ Tıp Fakültesi (İn~ 525. 546.
library(sqldf)
library(reshape2)
library(ggplot2)
freestyle0 <- sqldf("select distinct city,university_name from osym_data_2017")
freestyle <- sqldf("select city,count(*) as count1 from freestyle0 group by city order by count(*) desc")
top_ten <- head(freestyle, 10)
ggplot(data = top_ten) + geom_bar(aes(x = reorder(city, -count1),
y = count1), stat = "identity") + theme(axis.text.x = element_text(angle = 90))