## Warning: package 'reshape2' was built under R version 3.4.4
## Warning: package 'knitr' was built under R version 3.4.4

This is an individual in-class exercise. At the end of the exercise, please upload html outputs to your Progress Journals.

  1. Get the highest max_score programs from each exam_type.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## <U+221A> tibble  1.4.2     <U+221A> purrr   0.2.4
## <U+221A> tidyr   0.8.0     <U+221A> dplyr   0.7.4
## <U+221A> readr   1.1.1     <U+221A> stringr 1.2.0
## <U+221A> tibble  1.4.2     <U+221A> forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'readr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## Warning: package 'forcats' was built under R version 3.4.3
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
load("C:/Users/yagizbarali/Desktop/osym_data_2017_v2.RData")
head(osym_data_2017)
## # A tibble: 6 x 14
##   program_id university_name  city  faculty_name   program_name  exam_type
##   <chr>      <chr>            <chr> <chr>          <chr>         <chr>    
## 1 100110266  ABANT İZZET BAY~ BOLU  Bolu Sağlık Y~ Hemşirelik    YGS_2    
## 2 100110487  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Gastronomi v~ YGS_4    
## 3 100110724  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Turizm İşlet~ YGS_6    
## 4 100130252  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Turizm İşlet~ YGS_6    
## 5 100110433  ABANT İZZET BAY~ BOLU  Diş Hekimliği~ Diş Hekimliği MF_3     
## 6 100110609  ABANT İZZET BAY~ BOLU  Diş Hekimliği~ Diş Hekimliğ~ MF_3     
## # ... with 8 more variables: general_quota <chr>, general_placement <chr>,
## #   min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## #   val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
osym_data_2017 %>% group_by(exam_type) %>% summarise(max_of_max = max(max_score))
## # A tibble: 19 x 2
##    exam_type max_of_max
##    <chr>          <dbl>
##  1 DİL_1           543.
##  2 DİL_2           485.
##  3 DİL_3           490.
##  4 MF              408.
##  5 MF_1            537.
##  6 MF_2            527.
##  7 MF_3            564.
##  8 MF_4            575.
##  9 TM_1            561.
## 10 TM_2            458.
## 11 TM_3            558.
## 12 TS_1            527.
## 13 TS_2            536.
## 14 YGS_1           516.
## 15 YGS_2           473.
## 16 YGS_3           315.
## 17 YGS_4           466.
## 18 YGS_5           419.
## 19 YGS_6           519.
colnames(osym_data_2017)
##  [1] "program_id"        "university_name"   "city"             
##  [4] "faculty_name"      "program_name"      "exam_type"        
##  [7] "general_quota"     "general_placement" "min_score"        
## [10] "max_score"         "val_quota"         "val_placement"    
## [13] "val_min_score"     "val_max_score"
  1. Plot the top 10 programs of Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ° in terms of total quota in a bar chart.
library(ggplot2)

osym_data_2017 <- osym_data_2017 %>% mutate(general_quota = as.numeric(general_quota), 
    general_placement = as.numeric(general_placement))
## Warning: package 'bindrcpp' was built under R version 3.4.3
top_10_iu <- osym_data_2017 %>% filter(university_name == "İSTANBUL ÜNİVERSİTESİ") %>% 
    arrange(desc(general_quota)) %>% slice(1:10)

ggplot(data = top_10_iu) + geom_bar(aes(x = reorder(program_name, 
    -general_quota), y = general_quota), stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90))

  1. Calculate the fill rate (sum(general_placement)/sum(general_quota)) per city and return the top 10 cities.
osym_data_2017 %>% group_by(city) %>% summarise(fillrate = sum(general_placement)/sum(general_quota)) %>% 
    arrange(desc(fillrate)) %>% slice(1:10)
## # A tibble: 10 x 2
##    city                  fillrate
##    <chr>                    <dbl>
##  1 GEBZE                    1.02 
##  2 TEKİRDAĞ                 1.01 
##  3 MANİSA                   1.01 
##  4 KOCAELİ                  1.00 
##  5 BURSA                    1.00 
##  6 SUMGAYIT - AZERBAYCAN    1.00 
##  7 DİYARBAKIR               0.999
##  8 DENİZLİ                  0.998
##  9 EDİRNE                   0.998
## 10 SAKARYA                  0.997
  1. Find full (general_placement == general_quota) Endüstri Mühendisliği programs (use grepl) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr function.).
osym_data_2017 %>% mutate(prg = ifelse(substr(program_id, 1, 
    1) == 1, "STATE", ifelse(substr(program_id, 1, 1) == 2, "FOUNDATION", 
    "OTHER"))) %>% filter("Endüstri Mühendisliği" == substr(program_name, 
    1, 21)) %>% ggplot() + geom_point(aes(x = min_score, y = max_score, 
    color = prg), alpha = 0.7)

  1. Find the top 10 faculties with the highest quotas and draw a bar chart. Ignore similar names and typos in faculty names.
library(sqldf)
## Warning: package 'sqldf' was built under R version 3.4.4
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.4.4
## Loading required package: proto
## Warning: package 'proto' was built under R version 3.4.4
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.4.4
library(reshape2)
library(ggplot2)
library(DT)
## Warning: package 'DT' was built under R version 3.4.4
quota0 <- sqldf("select faculty_name,general_quota from osym_data_2017 order by general_quota desc")
quota0$general_quota <- as.numeric(quota0$general_quota)


quota1 <- sqldf("select faculty_name,max(general_quota) as general_quota from quota0 group by faculty_name")

quota1 <- sqldf("select * from quota1 order by general_quota desc")

top_ten1 <- head(quota1, 10)


datatable(top_ten1)
  1. Find all full medicine programs (Tıp but not Tıp Mühendisliği) of foundation universities group by university calculate total quotas per university and maximum max_score and minimum min_score as bounds, ordered and colored by total quota. (Tip: Use geom_crossbar)
medicine <- osym_data_2017 %>% filter(grepl("Tıp", program_name)) %>% 
    filter(!grepl("Mühendis", program_name)) %>% filter(substr(program_id, 
    1, 1) == 1) %>% group_by(university_name) %>% summarize(total_quota = sum(general_quota), 
    maximum_score = max(max_score), minimum_score = min(min_score)) %>% 
    ungroup() %>% arrange(desc(total_quota))
ggplot(data = medicine) + geom_crossbar(aes(x = reorder(university_name, 
    -total_quota), y = ((maximum_score + minimum_score)/2), ymax = maximum_score, 
    ymin = minimum_score, color = total_quota)) + labs(y = "Max and Min Scores") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1)) + 
    xlab("University")

  1. Freestyle: Do an analysis that compares the mechanical engineering (Makine Mühendisliği) and civil engineering (İnşaat Mühendisliği) programs.
Mech <- osym_data_2017 %>% filter(grepl("Makine Mühendisliği", 
    program_name))
Top10 <- Mech %>% mutate(avg_score = (min_score + max_score)/2) %>% 
    arrange(desc(avg_score)) %>% slice(1:10)
Civil <- osym_data_2017 %>% filter(grepl("İnşaat Mühendisliği", 
    program_name))
Civil_top10 <- Civil %>% mutate(avg_score = (min_score + max_score)/2) %>% 
    arrange(desc(avg_score)) %>% slice(1:10)
ggplot(data = Top10) + geom_bar(aes(x = reorder(university_name, 
    -avg_score), y = avg_score), stat = "identity") + theme(axis.text.x = element_text(angle = 90))

  1. Freestyle: Compare Koç University with Bilkent University.
osym_data_2017 %>% filter(grepl("BİLKENT", university_name) | 
    grepl("KOÇ ÜNİ", university_name)) %>% group_by(university_name, 
    faculty_name) %>% summarize(avg_min = mean(min_score), avg_max = mean(max_score))
## # A tibble: 15 x 4
## # Groups:   university_name [?]
##    university_name                      faculty_name       avg_min avg_max
##    <chr>                                <chr>                <dbl>   <dbl>
##  1 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Fen Fakültesi         393.    454.
##  2 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Güzel Sanatlar Ta~    360.    408.
##  3 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Hukuk Fakültesi       477.    515.
##  4 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ İktisadi, İdari v~    390.    439.
##  5 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ İnsani Bilimler v~    403.    457.
##  6 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ İşletme Fakültesi     392.    445.
##  7 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Mühendislik Fakül~    467.    511.
##  8 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Uygulamalı Teknol~    328.    422.
##  9 KOÇ ÜNİVERSİTESİ                     Fen Fakültesi         456.    491.
## 10 KOÇ ÜNİVERSİTESİ                     Hemşirelik Fakült~    376.    432.
## 11 KOÇ ÜNİVERSİTESİ                     Hukuk Fakültesi       472.    528.
## 12 KOÇ ÜNİVERSİTESİ                     İktisadi ve İdari~    435.    490.
## 13 KOÇ ÜNİVERSİTESİ                     İnsani Bilimler v~    452.    487.
## 14 KOÇ ÜNİVERSİTESİ                     Mühendislik Fakül~    462.    511.
## 15 KOÇ ÜNİVERSİTESİ                     Tıp Fakültesi (İn~    525.    546.
  1. Freestyle: Do your best.
library(sqldf)
library(reshape2)
library(ggplot2)

freestyle0 <- sqldf("select distinct city,university_name from osym_data_2017")
freestyle <- sqldf("select city,count(*) as count1 from freestyle0 group by city order by count(*) desc")

top_ten <- head(freestyle, 10)

ggplot(data = top_ten) + geom_bar(aes(x = reorder(city, -count1), 
    y = count1), stat = "identity") + theme(axis.text.x = element_text(angle = 90))