This is an individual in-class exercise. At the end of the exercise, please upload html outputs to your Progress Journals.

library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1     <U+221A> purrr   0.2.4
## <U+221A> tibble  1.4.2     <U+221A> dplyr   0.7.4
## <U+221A> tidyr   0.8.0     <U+221A> stringr 1.2.0
## <U+221A> readr   1.1.1     <U+221A> forcats 0.2.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
load("/Users/TCTAGUMUS/Documents/R işleri/osym_data_2017_v2.RData")
head(osym_data_2017)
## # A tibble: 6 x 14
##   program_id university_name  city  faculty_name   program_name  exam_type
##   <chr>      <chr>            <chr> <chr>          <chr>         <chr>    
## 1 100110266  ABANT İZZET BAY~ BOLU  Bolu Sağlık Y~ Hemşirelik    YGS_2    
## 2 100110487  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Gastronomi v~ YGS_4    
## 3 100110724  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Turizm İşlet~ YGS_6    
## 4 100130252  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Turizm İşlet~ YGS_6    
## 5 100110433  ABANT İZZET BAY~ BOLU  Diş Hekimliği~ Diş Hekimliği MF_3     
## 6 100110609  ABANT İZZET BAY~ BOLU  Diş Hekimliği~ Diş Hekimliğ~ MF_3     
## # ... with 8 more variables: general_quota <chr>, general_placement <chr>,
## #   min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## #   val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
osym_data_2017 <- osym_data_2017 %>% mutate(general_quota = as.numeric(general_quota), general_placement = as.numeric(general_placement))
  1. Get the highest max_score programs from each exam_type.
osym_data_2017 %>% group_by(exam_type,program_name) %>% summarize(Max_score_programs = max(max_score)) %>% filter(Max_score_programs ==  max(Max_score_programs))
## # A tibble: 19 x 3
## # Groups:   exam_type [19]
##    exam_type program_name                                Max_score_progra~
##    <chr>     <chr>                                                   <dbl>
##  1 DİL_1     Çeviribilim (İngilizce)                                   543
##  2 DİL_2     Latin Dili ve Edebiyatı                                   485
##  3 DİL_3     Japonca Öğretmenliği                                      490
##  4 MF        Ziraat Mühendisliği Programları                           408
##  5 MF_1      Matematik (İngilizce) (Tam Burslu)                        537
##  6 MF_2      Fizik (İngilizce)                                         527
##  7 MF_3      Cerrahpaşa Tıp (İngilizce)                                564
##  8 MF_4      Elektrik - Elektronik Mühendisliği (İngili~               575
##  9 TM_1      İktisat (İngilizce)                                       561
## 10 TM_2      Grafik Tasarım (İngilizce) (Tam Burslu)                   458
## 11 TM_3      Hukuk (Tam Burslu)                                        558
## 12 TS_1      Özel Eğitim Öğretmenliği (Tam Burslu)                     527
## 13 TS_2      Medya ve Görsel Sanatlar (İngilizce) (Tam ~               536
## 14 YGS_1     Bilgisayar Teknolojisi ve Bilişim Sistemle~               516
## 15 YGS_2     Fizyoterapi ve Rehabilitasyon                             473
## 16 YGS_3     Rekreasyon Yönetimi (Tam Burslu)                          315
## 17 YGS_4     Gastronomi (Tam Burslu)                                   466
## 18 YGS_5     Spor Yöneticiliği (Tam Burslu)                            419
## 19 YGS_6     Yönetim Bilişim Sistemleri (İngilizce)                    519
  1. Plot the top 10 programs of İSTANBUL ÜNİVERSİTESİ in terms of total quota in a bar chart.
top_10_iu <- osym_data_2017 %>% filter(university_name == "İSTANBUL ÜNİVERSİTESİ") %>% arrange(desc(general_quota)) %>% slice(1:10)

ggplot(data=top_10_iu) + geom_bar(aes(x=reorder(program_name,-general_quota),y=general_quota),stat="identity") + theme(axis.text.x=element_text(angle=90))

  1. Calculate the fill rate (sum(general_placement)/sum(general_quota)) per city and return the top 10 cities.
osym_data_2017 %>% group_by(city) %>% summarise(fill_rate = sum(general_placement)/sum(general_quota)) %>% ungroup() %>% arrange(desc(fill_rate)) %>% slice(1:10)
## # A tibble: 10 x 2
##    city                  fill_rate
##    <chr>                     <dbl>
##  1 GEBZE                     1.02 
##  2 TEKİRDAĞ                  1.01 
##  3 MANİSA                    1.01 
##  4 KOCAELİ                   1.00 
##  5 BURSA                     1.00 
##  6 SUMGAYIT - AZERBAYCAN     1.00 
##  7 DİYARBAKIR                0.999
##  8 DENİZLİ                   0.998
##  9 EDİRNE                    0.998
## 10 SAKARYA                   0.997
  1. Find full (general_placement == general_quota) Endüstri Mühendisliği programs (use grepl) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr function.).
Endüstri <- osym_data_2017 %>% filter (grepl ('Endüstri Mühendisliği', program_name)) 
Endüstri_data <- subset( Endüstri ,general_quota == general_placement) 
ggplot(Endüstri_data) + geom_point(aes(x=min_score , y =max_score , color = substr(program_id,1,1), alpha = 0.7)) +xlab("min_score") + ylab ("max_score") + labs (color = "University_type") + scale_color_manual( values = c(1,2,3), labels = c("state","foundation","KKTC")) + guides(alpha = "none")

  1. Find the top 10 faculties with the highest quotas and draw a bar chart. Ignore similar names and typos in faculty names.
Highest_quotas <- osym_data_2017 %>% group_by(university_name ,faculty_name) %>% summarize (max_quota=max(general_quota))  %>% arrange(desc(max_quota)) %>% mutate(faculty = paste(university_name, faculty_name)) %>% ungroup() %>% select(faculty,max_quota) %>% slice (1:10)
ggplot(Highest_quotas) + geom_col(aes(x= reorder(faculty,-max_quota) ,y = max_quota )) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + xlab("faculty")

  1. Find all full medicine programs (Tıp but not Tıp Mühendisliği) of foundation universities group by university calculate total quotas per university and maximum max_score and minimum min_score as bounds, ordered and colored by total quota. (Tip: Use geom_crossbar)
 tıp <- osym_data_2017 %>% filter(grepl("Tıp", program_name)) %>% filter(!grepl("Mühen",program_name)) %>% filter(substr(program_id,1,1) == 1) %>% group_by(university_name) %>% summarize(total_quotas = sum(general_quota), max_score = max(max_score),min_score = min(min_score) ) %>%ungroup() %>% arrange(desc(total_quotas))
ggplot(tıp) + geom_crossbar(aes(x=reorder(university_name,-total_quotas) , y = ((max_score+min_score)/2) , ymax= max_score , ymin = min_score , color = total_quotas)) + labs (y = "score") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + xlab ("university")

  1. Freestyle: Do an analysis that compares the mechanical engineering (Makine Mühendisliği) and civil engineering (İnşaat Mühendisliği) programs.
 Makine <- osym_data_2017 %>% filter(grepl("Makine Mühendisliği", program_name)) %>% group_by(university_name) %>% summarize(total_quotas = sum(general_quota), max_score = max(max_score),min_score = min(min_score) ) %>%ungroup() %>% arrange(desc(max_score))%>% slice(1:10)
İnşaat <- osym_data_2017 %>% filter(grepl("İnşaat Mühendisliği", program_name)) %>% group_by(university_name) %>% summarize(total_quotas = sum(general_quota), max_score = max(max_score),min_score = min(min_score) ) %>%ungroup %>% filter(university_name %in% Makine$university_name)
ggplot(İnşaat) + geom_crossbar(aes(x=reorder(university_name,-total_quotas) , y = ((max_score+min_score)/2) , ymax= max_score , ymin = min_score , color = "blue")) + labs (y = "score") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + xlab ("university") + geom_crossbar(data = Makine, aes(x=reorder(university_name,-total_quotas) , y = ((max_score+min_score)/2) , ymax= max_score , ymin = min_score, color = "yellow" )) + scale_color_manual(values = c("Blue","Red"), labels = c("İnşaat","Makine") ) + labs(color = "Bölüm")

  1. Freestyle: Compare Koç University with Bilkent University.

I pivotted max_score, min_score and fill_rate basically.

osym_data_2017 %>% filter(grepl("BİLKENT ÜNİVERSİTESİ", university_name) | grepl("KOÇ ÜNİVERSİTESİ" , university_name)  ) %>% group_by(university_name) %>% summarize (Total_quota = sum(general_quota), max_score = max(max_score), min_score = min (min_score),fill_rate = sum(general_placement)/sum(general_quota))
## # A tibble: 2 x 5
##   university_name                Total_quota max_score min_score fill_rate
##   <chr>                                <dbl>     <dbl>     <dbl>     <dbl>
## 1 İHSAN DOĞRAMACI BİLKENT ÜNİVE~        2259       566       207     0.990
## 2 KOÇ ÜNİVERSİTESİ                      1054       569       261     0.996
  1. Freestyle: Do your best.

Top 10 cities according to total_quota and ordered by total_quota for all “Öğretmenlik” programs and corresponding boundaries in terms of score( max, min). In other terms, you can see minimum score needed for “Öğretmenlik” among top 10 cities that have highest quotas.

Analysis_by_City = osym_data_2017 %>% filter(grepl("Öğretmen", program_name)) %>% group_by(city) %>% summarize(Total_Quotas = sum(general_quota), max_score = max(max_score), min_score = min(min_score)) %>%ungroup() %>% arrange(desc(Total_Quotas)) %>% slice (1:10)
ggplot(Analysis_by_City) + geom_errorbar(aes(x=reorder(city, -Total_Quotas), ymin= min_score, ymax = max_score, color = Total_Quotas)) + xlab("City") + ylab("Score") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))