This is an individual in-class exercise. At the end of the exercise, please upload html outputs to your Progress Journals.
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1 <U+221A> purrr 0.2.4
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.4
## <U+221A> tidyr 0.8.0 <U+221A> stringr 1.2.0
## <U+221A> readr 1.1.1 <U+221A> forcats 0.2.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
load("/Users/TCTAGUMUS/Documents/R işleri/osym_data_2017_v2.RData")
head(osym_data_2017)
## # A tibble: 6 x 14
## program_id university_name city faculty_name program_name exam_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 100110266 ABANT İZZET BAY~ BOLU Bolu Sağlık Y~ Hemşirelik YGS_2
## 2 100110487 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Gastronomi v~ YGS_4
## 3 100110724 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Turizm İşlet~ YGS_6
## 4 100130252 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Turizm İşlet~ YGS_6
## 5 100110433 ABANT İZZET BAY~ BOLU Diş Hekimliği~ Diş Hekimliği MF_3
## 6 100110609 ABANT İZZET BAY~ BOLU Diş Hekimliği~ Diş Hekimliğ~ MF_3
## # ... with 8 more variables: general_quota <chr>, general_placement <chr>,
## # min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## # val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
osym_data_2017 <- osym_data_2017 %>% mutate(general_quota = as.numeric(general_quota), general_placement = as.numeric(general_placement))
max_score
programs from each exam_type
.osym_data_2017 %>% group_by(exam_type,program_name) %>% summarize(Max_score_programs = max(max_score)) %>% filter(Max_score_programs == max(Max_score_programs))
## # A tibble: 19 x 3
## # Groups: exam_type [19]
## exam_type program_name Max_score_progra~
## <chr> <chr> <dbl>
## 1 DİL_1 Çeviribilim (İngilizce) 543
## 2 DİL_2 Latin Dili ve Edebiyatı 485
## 3 DİL_3 Japonca Öğretmenliği 490
## 4 MF Ziraat Mühendisliği Programları 408
## 5 MF_1 Matematik (İngilizce) (Tam Burslu) 537
## 6 MF_2 Fizik (İngilizce) 527
## 7 MF_3 Cerrahpaşa Tıp (İngilizce) 564
## 8 MF_4 Elektrik - Elektronik Mühendisliği (İngili~ 575
## 9 TM_1 İktisat (İngilizce) 561
## 10 TM_2 Grafik Tasarım (İngilizce) (Tam Burslu) 458
## 11 TM_3 Hukuk (Tam Burslu) 558
## 12 TS_1 Özel Eğitim Öğretmenliği (Tam Burslu) 527
## 13 TS_2 Medya ve Görsel Sanatlar (İngilizce) (Tam ~ 536
## 14 YGS_1 Bilgisayar Teknolojisi ve Bilişim Sistemle~ 516
## 15 YGS_2 Fizyoterapi ve Rehabilitasyon 473
## 16 YGS_3 Rekreasyon Yönetimi (Tam Burslu) 315
## 17 YGS_4 Gastronomi (Tam Burslu) 466
## 18 YGS_5 Spor Yöneticiliği (Tam Burslu) 419
## 19 YGS_6 Yönetim Bilişim Sistemleri (İngilizce) 519
top_10_iu <- osym_data_2017 %>% filter(university_name == "İSTANBUL ÜNİVERSİTESİ") %>% arrange(desc(general_quota)) %>% slice(1:10)
ggplot(data=top_10_iu) + geom_bar(aes(x=reorder(program_name,-general_quota),y=general_quota),stat="identity") + theme(axis.text.x=element_text(angle=90))
general_placement
)/sum(general_quota
)) per city and return the top 10 cities.osym_data_2017 %>% group_by(city) %>% summarise(fill_rate = sum(general_placement)/sum(general_quota)) %>% ungroup() %>% arrange(desc(fill_rate)) %>% slice(1:10)
## # A tibble: 10 x 2
## city fill_rate
## <chr> <dbl>
## 1 GEBZE 1.02
## 2 TEKİRDAĞ 1.01
## 3 MANİSA 1.01
## 4 KOCAELİ 1.00
## 5 BURSA 1.00
## 6 SUMGAYIT - AZERBAYCAN 1.00
## 7 DİYARBAKIR 0.999
## 8 DENİZLİ 0.998
## 9 EDİRNE 0.998
## 10 SAKARYA 0.997
general_placement
== general_quota
) Endüstri Mühendisliği programs (use grepl
) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha
) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr
function.).Endüstri <- osym_data_2017 %>% filter (grepl ('Endüstri Mühendisliği', program_name))
Endüstri_data <- subset( Endüstri ,general_quota == general_placement)
ggplot(Endüstri_data) + geom_point(aes(x=min_score , y =max_score , color = substr(program_id,1,1), alpha = 0.7)) +xlab("min_score") + ylab ("max_score") + labs (color = "University_type") + scale_color_manual( values = c(1,2,3), labels = c("state","foundation","KKTC")) + guides(alpha = "none")
Highest_quotas <- osym_data_2017 %>% group_by(university_name ,faculty_name) %>% summarize (max_quota=max(general_quota)) %>% arrange(desc(max_quota)) %>% mutate(faculty = paste(university_name, faculty_name)) %>% ungroup() %>% select(faculty,max_quota) %>% slice (1:10)
ggplot(Highest_quotas) + geom_col(aes(x= reorder(faculty,-max_quota) ,y = max_quota )) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + xlab("faculty")
geom_crossbar
) tıp <- osym_data_2017 %>% filter(grepl("Tıp", program_name)) %>% filter(!grepl("Mühen",program_name)) %>% filter(substr(program_id,1,1) == 1) %>% group_by(university_name) %>% summarize(total_quotas = sum(general_quota), max_score = max(max_score),min_score = min(min_score) ) %>%ungroup() %>% arrange(desc(total_quotas))
ggplot(tıp) + geom_crossbar(aes(x=reorder(university_name,-total_quotas) , y = ((max_score+min_score)/2) , ymax= max_score , ymin = min_score , color = total_quotas)) + labs (y = "score") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + xlab ("university")
Makine <- osym_data_2017 %>% filter(grepl("Makine Mühendisliği", program_name)) %>% group_by(university_name) %>% summarize(total_quotas = sum(general_quota), max_score = max(max_score),min_score = min(min_score) ) %>%ungroup() %>% arrange(desc(max_score))%>% slice(1:10)
İnşaat <- osym_data_2017 %>% filter(grepl("İnşaat Mühendisliği", program_name)) %>% group_by(university_name) %>% summarize(total_quotas = sum(general_quota), max_score = max(max_score),min_score = min(min_score) ) %>%ungroup %>% filter(university_name %in% Makine$university_name)
ggplot(İnşaat) + geom_crossbar(aes(x=reorder(university_name,-total_quotas) , y = ((max_score+min_score)/2) , ymax= max_score , ymin = min_score , color = "blue")) + labs (y = "score") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + xlab ("university") + geom_crossbar(data = Makine, aes(x=reorder(university_name,-total_quotas) , y = ((max_score+min_score)/2) , ymax= max_score , ymin = min_score, color = "yellow" )) + scale_color_manual(values = c("Blue","Red"), labels = c("İnşaat","Makine") ) + labs(color = "Bölüm")
I pivotted max_score, min_score and fill_rate basically.
osym_data_2017 %>% filter(grepl("BİLKENT ÜNİVERSİTESİ", university_name) | grepl("KOÇ ÜNİVERSİTESİ" , university_name) ) %>% group_by(university_name) %>% summarize (Total_quota = sum(general_quota), max_score = max(max_score), min_score = min (min_score),fill_rate = sum(general_placement)/sum(general_quota))
## # A tibble: 2 x 5
## university_name Total_quota max_score min_score fill_rate
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 İHSAN DOĞRAMACI BİLKENT ÜNİVE~ 2259 566 207 0.990
## 2 KOÇ ÜNİVERSİTESİ 1054 569 261 0.996
Top 10 cities according to total_quota and ordered by total_quota for all “Öğretmenlik” programs and corresponding boundaries in terms of score( max, min). In other terms, you can see minimum score needed for “Öğretmenlik” among top 10 cities that have highest quotas.
Analysis_by_City = osym_data_2017 %>% filter(grepl("Öğretmen", program_name)) %>% group_by(city) %>% summarize(Total_Quotas = sum(general_quota), max_score = max(max_score), min_score = min(min_score)) %>%ungroup() %>% arrange(desc(Total_Quotas)) %>% slice (1:10)
ggplot(Analysis_by_City) + geom_errorbar(aes(x=reorder(city, -Total_Quotas), ymin= min_score, ymax = max_score, color = Total_Quotas)) + xlab("City") + ylab("Score") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))