load("C:\\Tool\\osym_data_2017_v2.RData")
osym_data_2017 <- 
  osym_data_2017 %>% 
    mutate(general_quota = as.numeric(general_quota), general_placement = as.numeric(general_placement)) 
head(osym_data_2017)
## # A tibble: 6 x 14
##   program_id university_name  city  faculty_name   program_name  exam_type
##   <chr>      <chr>            <chr> <chr>          <chr>         <chr>    
## 1 100110266  ABANT İZZET BAY~ BOLU  Bolu Sağlık Y~ Hemşirelik    YGS_2    
## 2 100110487  ABANT Ä°ZZET BAY~ BOLU  Bolu Turizm Ä°~ Gastronomi v~ YGS_4    
## 3 100110724  ABANT Ä°ZZET BAY~ BOLU  Bolu Turizm Ä°~ Turizm Ä°ÅŸlet~ YGS_6    
## 4 100130252  ABANT Ä°ZZET BAY~ BOLU  Bolu Turizm Ä°~ Turizm Ä°ÅŸlet~ YGS_6    
## 5 100110433  ABANT Ä°ZZET BAY~ BOLU  DiÅŸ HekimliÄŸi~ DiÅŸ HekimliÄŸi MF_3     
## 6 100110609  ABANT Ä°ZZET BAY~ BOLU  DiÅŸ HekimliÄŸi~ DiÅŸ HekimliÄŸ~ MF_3     
## # ... with 8 more variables: general_quota <dbl>, general_placement <dbl>,
## #   min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## #   val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
  1. Get the highest max_score programs from each exam_type.
osym_data_2017%>%
    group_by(exam_type)%>%
      arrange(desc(max_score))%>%
        slice(1)
## # A tibble: 19 x 14
## # Groups:   exam_type [19]
##    program_id university_name  city   faculty_name program_name  exam_type
##    <chr>      <chr>            <chr>  <chr>        <chr>         <chr>    
##  1 102210108  BOĞAZİÇİ ÜNİVER~ İSTAN~ Fen - Edebi~ Çeviribilim ~ DİL_1    
##  2 101110245  ANKARA ÃœNÄ°VERSÄ°~ ANKARA Dil ve Tari~ Latin Dili v~ DÄ°L_2    
##  3 102710051  ÇANAKKALE ONSEK~ ÇANAK~ Eğitim Fakü~ Japonca Öğre~ DİL_3    
##  4 109210459  SÜLEYMAN DEMİRE~ ISPAR~ Ziraat Fakü~ Ziraat Mühen~ MF       
##  5 203910115  KOÇ ÜNİVERSİTESİ İSTAN~ Fen Fakülte~ Matematik (İ~ MF_1     
##  6 102210126  BOĞAZİÇİ ÜNİVER~ İSTAN~ Fen - Edebi~ Fizik (İngil~ MF_2     
##  7 105610025  Ä°STANBUL ÃœNÄ°VER~ Ä°STAN~ CerrahpaÅŸa ~ CerrahpaÅŸa T~ MF_3     
##  8 102210286  BOĞAZİÇİ ÜNİVER~ İSTAN~ Mühendislik~ Elektrik - E~ MF_4     
##  9 102210214  BOĞAZİÇİ ÜNİVER~ İSTAN~ İktisadi ve~ İktisat (İng~ TM_1     
## 10 202111142  İHSAN DOĞRAMACI~ ANKARA Güzel Sanat~ Grafik Tasar~ TM_2     
## 11 203910796  KOÇ ÜNİVERSİTESİ İSTAN~ Hukuk Fakül~ Hukuk (Tam B~ TM_3     
## 12 203111236  İSTANBUL MEDİPO~ İSTAN~ Eğitim Fakü~ Özel Eğitim ~ TS_1     
## 13 203910724  KOÇ ÜNİVERSİTESİ İSTAN~ İnsani Bili~ Medya ve Gör~ TS_2     
## 14 202111009  İHSAN DOĞRAMACI~ ANKARA Uygulamalı ~ Bilgisayar T~ YGS_1    
## 15 103110645  DOKUZ EYLÃœL ÃœNÄ°~ Ä°ZMÄ°R  Fizik Tedav~ Fizyoterapi ~ YGS_2    
## 16 300211897  GİRNE AMERİKAN ~ KKTC ~ Spor Yüksek~ Rekreasyon Y~ YGS_3    
## 17 202711287  İSTANBUL GELİŞİ~ İSTAN~ Uygulamalı ~ Gastronomi (~ YGS_4    
## 18 204711809  OKAN ÜNİVERSİTE~ İSTAN~ Uygulamalı ~ Spor Yönetic~ YGS_5    
## 19 102210383  BOĞAZİÇİ ÜNİVER~ İSTAN~ Uygulamalı ~ Yönetim Bili~ YGS_6    
## # ... with 8 more variables: general_quota <dbl>, general_placement <dbl>,
## #   min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## #   val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
  1. Plot the top 10 programs of Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ° in terms of total quota in a bar chart.
osym_data_2017%>%
  filter(university_name=='Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ°')%>%
    arrange(desc(max_score))%>%
      slice(1:10)%>%
        ggplot()+geom_bar(aes(x=reorder(program_name,-general_quota),y=general_quota),stat="Identity") + theme(axis.text.x=element_text(angle=90)) 

  1. Calculate the fill rate (sum(general_placement)/sum(general_quota)) per city and return the top 10 cities.
osym_data_2017%>%
  group_by(city)%>%
    summarise(t=(sum(`general_placement`)/sum(`general_quota`)))%>%
      arrange(desc(t))%>%
        slice(1:10)
## # A tibble: 10 x 2
##    city                      t
##    <chr>                 <dbl>
##  1 GEBZE                 1.02 
##  2 TEKÄ°RDAÄž              1.01 
##  3 MANÄ°SA                1.01 
##  4 KOCAELÄ°               1.00 
##  5 BURSA                 1.00 
##  6 SUMGAYIT - AZERBAYCAN 1.00 
##  7 DÄ°YARBAKIR            0.999
##  8 DENÄ°ZLÄ°               0.998
##  9 EDÄ°RNE                0.998
## 10 SAKARYA               0.997
  1. Find full (general_placement == general_quota) Endüstri Mühendisliği programs (use grepl) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr function.).
osym_data_2017%>%
  mutate(prg=ifelse(substr(program_id,1,1)==1,"STATE",ifelse(substr(program_id,1,1)==2,"FOUNDATION","OTHER")))%>%
    filter("Endüstri Mühendisliği"==substr(program_name,1,21))%>%
      ggplot()+geom_point(aes(x=min_score,y=max_score,color=prg),alpha=0.7)  

  1. Find the top 10 faculties with the highest quotas and draw a bar chart. Ignore similar names and typos in faculty names.
osym_data_2017%>% select(program_name,faculty_name,general_quota)%>%group_by(faculty_name)%>%
   summarise(tplm=sum(general_quota))%>% arrange(desc(tplm))%>%slice(1:10)%>%
    ggplot()+geom_bar(aes(x=reorder(faculty_name,-tplm),y=tplm),stat="identity")+theme(axis.text.x=element_text(angle=90)) 

  1. Find all full medicine programs (Tıp but not Tıp Mühendisliği) of foundation universities group by university calculate total quotas per university and maximum max_score and minimum min_score as bounds, ordered and colored by total quota. (Tip: Use geom_crossbar)
osym_data_2017%>% 
  filter("Tıp"==substr(program_name,1,3)&"Tıp Fakültesi"==substr(faculty_name,1,13)&substr(program_id,1,1)==2)%>%
    group_by(university_name)%>%
      summarise(avg=mean(general_quota),sum5=sum(general_quota),max5=max(max_score),min5=min(min_score),avg5=(min(min_score)/2+max(max_score)/2))%>%
        ggplot(aes(reorder(x=university_name,-sum5),y=avg5)) + geom_crossbar(aes(ymin=min5,ymax=max5,color=sum5),stat = "identity")+ 
          theme(axis.text.x=element_text(angle=90))