This is an individual in-class exercise. At the end of the exercise, please upload html outputs to your Progress Journals.

library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1     <U+221A> purrr   0.2.4
## <U+221A> tibble  1.4.2     <U+221A> dplyr   0.7.4
## <U+221A> tidyr   0.8.0     <U+221A> stringr 1.2.0
## <U+221A> readr   1.1.1     <U+221A> forcats 0.2.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
load("C:/Users/Sezgin/Desktop/osym_data_2017_v2.RData")
head(osym_data_2017)
## # A tibble: 6 x 14
##   program_id university_name  city  faculty_name   program_name  exam_type
##   <chr>      <chr>            <chr> <chr>          <chr>         <chr>    
## 1 100110266  ABANT İZZET BAY~ BOLU  Bolu Sağlık Y~ Hemşirelik    YGS_2    
## 2 100110487  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Gastronomi v~ YGS_4    
## 3 100110724  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Turizm İşlet~ YGS_6    
## 4 100130252  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Turizm İşlet~ YGS_6    
## 5 100110433  ABANT İZZET BAY~ BOLU  Diş Hekimliği~ Diş Hekimliği MF_3     
## 6 100110609  ABANT İZZET BAY~ BOLU  Diş Hekimliği~ Diş Hekimliğ~ MF_3     
## # ... with 8 more variables: general_quota <chr>, general_placement <chr>,
## #   min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## #   val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
osym_data_2017 <- osym_data_2017 %>% mutate(general_quota= as.numeric(general_quota), general_placement= as.numeric(general_placement))
  1. Get the highest max_score programs from each exam_type.
osym_data_2017 %>% group_by(exam_type) %>% summarise(Max=max(max_score))
## # A tibble: 19 x 2
##    exam_type   Max
##    <chr>     <dbl>
##  1 DİL_1       543
##  2 DİL_2       485
##  3 DİL_3       490
##  4 MF          408
##  5 MF_1        537
##  6 MF_2        527
##  7 MF_3        564
##  8 MF_4        575
##  9 TM_1        561
## 10 TM_2        458
## 11 TM_3        558
## 12 TS_1        527
## 13 TS_2        536
## 14 YGS_1       516
## 15 YGS_2       473
## 16 YGS_3       315
## 17 YGS_4       466
## 18 YGS_5       419
## 19 YGS_6       519
  1. Plot the top 10 programs of Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ° in terms of total quota in a bar chart.
top_10_iu <- osym_data_2017 %>% filter(university_name == "İSTANBUL ÜNİVERSİTESİ") %>% arrange(desc(general_quota)) %>% slice(1:10)

ggplot(data=top_10_iu)  + geom_bar(aes(x=reorder(program_name, -general_quota),y=general_quota), stat="identity") + theme(axis.text.x=element_text(angle=90))

  1. Calculate the fill rate (sum(general_placement)/sum(general_quota)) per city and return the top 10 cities.
osym_data_2017 %>% group_by(city) %>% summarise(Fill_rate= sum(general_placement)/sum(general_quota)) %>% ungroup() %>% arrange(desc(Fill_rate)) %>% slice(1:10)
## # A tibble: 10 x 2
##    city                  Fill_rate
##    <chr>                     <dbl>
##  1 GEBZE                     1.02 
##  2 TEKİRDAĞ                  1.01 
##  3 MANİSA                    1.01 
##  4 KOCAELİ                   1.00 
##  5 BURSA                     1.00 
##  6 SUMGAYIT - AZERBAYCAN     1.00 
##  7 DİYARBAKIR                0.999
##  8 DENİZLİ                   0.998
##  9 EDİRNE                    0.998
## 10 SAKARYA                   0.997
  1. Find full (general_placement == general_quota) Endüstri Mühendisliği programs (use grepl) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr function.).
#grepl returns a logical output for each indices in the original vector/data set
#utilizing grepl with dplyr (filter) enables to filter the data set where "Endüstri Mühendisliği" is involved.
Industrial_eng <- osym_data_2017 %>% filter(grepl("Endüstri Mühendisliği", program_name))
#now that we have filtered the Industrial engineering programs, we need to find data where the program is full.
diff_Industrial_eng <- Industrial_eng %>% mutate(difference= general_placement - general_quota)
full_Industrial_eng <- diff_Industrial_eng %>% filter(difference == 0)
ggplot(data=full_Industrial_eng) + geom_point(aes(x=min_score , y =max_score , color = substr(program_id,1,1), alpha = 0.7)) +xlab("min_score") + ylab ("max_score") + labs (color = "University_type") + guides(alpha = "none") + scale_color_manual( values = c(5,2,6), labels = c("state","foundation","KKTC"))

  1. Find the top 10 faculties with the highest quotas and draw a bar chart. Ignore similar names and typos in faculty names.
top_10_faculty <- osym_data_2017 %>% arrange(desc(general_quota)) %>% slice(1:10)
ggplot(data=top_10_faculty) + geom_bar(aes(x=reorder(program_name, -general_quota),y=general_quota), stat="identity") + theme(axis.text.x=element_text(angle=90,size = 8))

  1. Find all full medicine programs (Tıp but not Tıp Mühendisliği) of foundation universities group by university calculate total quotas per university and maximum max_score and minimum min_score as bounds, ordered and colored by total quota. (Tip: Use geom_crossbar)
Medicine <- osym_data_2017 %>% filter(grepl("Tıp", program_name)) %>% filter(!grepl("Mühendis",program_name)) %>% filter(substr(program_id,1,1) == 1) %>% group_by(university_name) %>% summarize(total_quota = sum(general_quota), maximum_score = max(max_score),minimum_score = min(min_score) ) %>%ungroup() %>% arrange(desc(total_quota))
ggplot(data=Medicine) + geom_crossbar(aes(x=reorder(university_name,-total_quota) , y = ((maximum_score+minimum_score)/2) , ymax= maximum_score , ymin = minimum_score , color = total_quota)) + labs (y = "Max and Min Scores") + theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0.5)) + xlab ("University")

  1. Freestyle: Do an analysis that compares the mechanical engineering (Makine Mühendisliği) and civil engineering (İnşaat Mühendisliği) programs.

Let’s find average scores for both Mechanical Engineering and Civil Engineering. Then let’s compare to check which program has a higher average score depending on top 10 results.

Mech <- osym_data_2017 %>% filter(grepl("Makine Mühendisliği", program_name))
Avg_Mech_top10 <- Mech %>% mutate(avg_score= (max_score + min_score)/2) %>% arrange(desc(avg_score)) %>% slice(1:10)
Civil <- osym_data_2017 %>% filter(grepl("İnşaat Mühendisliği", program_name))
Avg_Civil_top10 <- Civil %>% mutate(avg_score= (max_score + min_score)/2) %>% arrange(desc(avg_score)) %>% slice(1:10)
ggplot(data= Avg_Mech_top10) + geom_bar(aes(x= reorder(university_name, -avg_score), y= avg_score), stat= "identity") + theme(axis.text.x=element_text(angle=90))

ggplot(data= Avg_Civil_top10) + geom_bar(aes(x= reorder(university_name, -avg_score), y= avg_score), stat= "identity") + theme(axis.text.x=element_text(angle=90))

Boğaziçi University has the highest average score for both Mechanical and Civil engineering programs.

  1. Freestyle: Compare Koç University with Bilkent University.

Let’s compare the average top 10 scores and fill rates of both university’s programmes.

Koc <- osym_data_2017 %>% filter(grepl("KOÇ ÜNİVERSİTESİ", university_name))
Bilkent <- osym_data_2017 %>% filter(grepl("BİLKENT ÜNİVERSİTESİ", university_name))

Koc %>% mutate(avg_score= (max_score + min_score)/2, Fill_rate= general_placement/general_quota) %>% arrange(desc(avg_score)) %>% slice(1:10)
## # A tibble: 10 x 16
##    program_id university_name  city   faculty_name program_name  exam_type
##    <chr>      <chr>            <chr>  <chr>        <chr>         <chr>    
##  1 203910399  KOÇ ÜNİVERSİTESİ İSTAN~ Mühendislik~ Elektrik - E~ MF_4     
##  2 203910699  KOÇ ÜNİVERSİTESİ İSTAN~ Tıp Fakülte~ Tıp (İngiliz~ MF_3     
##  3 203910363  KOÇ ÜNİVERSİTESİ İSTAN~ Mühendislik~ Bilgisayar M~ MF_4     
##  4 203910796  KOÇ ÜNİVERSİTESİ İSTAN~ Hukuk Fakül~ Hukuk (Tam B~ TM_3     
##  5 203910424  KOÇ ÜNİVERSİTESİ İSTAN~ Mühendislik~ Endüstri Müh~ MF_4     
##  6 203910487  KOÇ ÜNİVERSİTESİ İSTAN~ Mühendislik~ Makine Mühen~ MF_4     
##  7 203910072  KOÇ ÜNİVERSİTESİ İSTAN~ İnsani Bili~ İngiliz Dili~ DİL_1    
##  8 203910309  KOÇ ÜNİVERSİTESİ İSTAN~ İktisadi ve~ İşletme (İng~ TM_1     
##  9 203910706  KOÇ ÜNİVERSİTESİ İSTAN~ Tıp Fakülte~ Tıp (İngiliz~ MF_3     
## 10 203910133  KOÇ ÜNİVERSİTESİ İSTAN~ İnsani Bili~ Psikoloji (İ~ TM_3     
## # ... with 10 more variables: general_quota <dbl>,
## #   general_placement <dbl>, min_score <dbl>, max_score <dbl>,
## #   val_quota <dbl>, val_placement <dbl>, val_min_score <dbl>,
## #   val_max_score <dbl>, avg_score <dbl>, Fill_rate <dbl>
Bilkent %>% mutate(avg_score= (max_score + min_score)/2, Fill_rate= general_placement/general_quota) %>% arrange(desc(avg_score)) %>% slice(1:10)
## # A tibble: 10 x 16
##    program_id university_name  city   faculty_name program_name  exam_type
##    <chr>      <chr>            <chr>  <chr>        <chr>         <chr>    
##  1 202110426  İHSAN DOĞRAMACI~ ANKARA Mühendislik~ Elektrik - E~ MF_4     
##  2 202111106  İHSAN DOĞRAMACI~ ANKARA Hukuk Fakül~ Hukuk (Tam B~ TM_3     
##  3 202110408  İHSAN DOĞRAMACI~ ANKARA Mühendislik~ Bilgisayar M~ MF_4     
##  4 202110877  İHSAN DOĞRAMACI~ ANKARA Mühendislik~ Makine Mühen~ MF_4     
##  5 202110992  İHSAN DOĞRAMACI~ ANKARA İnsani Bili~ Mütercim - T~ DİL_1    
##  6 202110744  İHSAN DOĞRAMACI~ ANKARA Mühendislik~ Elektrik - E~ MF_4     
##  7 202110444  İHSAN DOĞRAMACI~ ANKARA Mühendislik~ Endüstri Müh~ MF_4     
##  8 202110735  İHSAN DOĞRAMACI~ ANKARA Mühendislik~ Bilgisayar M~ MF_4     
##  9 202110241  İHSAN DOĞRAMACI~ ANKARA İktisadi, İ~ Psikoloji (İ~ TM_3     
## 10 202111115  İHSAN DOĞRAMACI~ ANKARA Hukuk Fakül~ Hukuk (%50 B~ TM_3     
## # ... with 10 more variables: general_quota <dbl>,
## #   general_placement <dbl>, min_score <dbl>, max_score <dbl>,
## #   val_quota <dbl>, val_placement <dbl>, val_min_score <dbl>,
## #   val_max_score <dbl>, avg_score <dbl>, Fill_rate <dbl>

Both universities have 100% fill rate for their top 10 highest score programmes. Both universities have “Electrical & Electronics Enginnering” as a highest average score programme.

  1. Freestyle: Do your best.

We can calculate the averange difference between maximum and minimum scores for the universities in Istanbul.

chance_to_enroll <- osym_data_2017 %>% filter(city== "İSTANBUL") %>% group_by(university_name, program_name) %>% summarise(Score_diff=round(mean(abs(max_score-min_score)),1)) %>%  arrange(desc(Score_diff)) %>% ungroup() %>% slice(1:10)
ggplot(data=chance_to_enroll) + geom_bar(aes(x= program_name, y= Score_diff), stat="identity") + theme(axis.text.x=element_text(angle=90,size = 8))

Assuming the student wants to study in Istanbul, above calculation shows that German Teaching program in Istanbul University has the highest score difference, could be demonstrating that the student has a higher chance to enroll.