load("~/ETM58D/osym_data_2017_v2.RData")
head(osym_data_2017)
## # A tibble: 6 x 14
##   program_id university_name  city  faculty_name   program_name  exam_type
##   <chr>      <chr>            <chr> <chr>          <chr>         <chr>    
## 1 100110266  ABANT İZZET BAY~ BOLU  Bolu Sağlık Y~ Hemşirelik    YGS_2    
## 2 100110487  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Gastronomi v~ YGS_4    
## 3 100110724  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Turizm İşlet~ YGS_6    
## 4 100130252  ABANT İZZET BAY~ BOLU  Bolu Turizm İ~ Turizm İşlet~ YGS_6    
## 5 100110433  ABANT İZZET BAY~ BOLU  Diş Hekimliği~ Diş Hekimliği MF_3     
## 6 100110609  ABANT İZZET BAY~ BOLU  Diş Hekimliği~ Diş Hekimliğ~ MF_3     
## # ... with 8 more variables: general_quota <chr>, general_placement <chr>,
## #   min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## #   val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
osym_data_2017 <- osym_data_2017 %>% mutate(general_quota = as.numeric(general_quota), general_placement = as.numeric(general_placement))

Question 1.

Get the highest max_score programs from each exam_type.

osym_data_2017 %>% group_by(exam_type)  %>% filter(max_score == max(max_score)) %>%  select(university_name,program_name,exam_type,max_score) %>% arrange(desc(max_score))
## # A tibble: 19 x 4
## # Groups:   exam_type [19]
##    university_name                      program_name   exam_type max_score
##    <chr>                                <chr>          <chr>         <dbl>
##  1 BOĞAZİÇİ ÜNİVERSİTESİ                Elektrik - El~ MF_4           575.
##  2 İSTANBUL ÜNİVERSİTESİ                Cerrahpaşa Tı~ MF_3           564.
##  3 BOĞAZİÇİ ÜNİVERSİTESİ                İktisat (İngi~ TM_1           561.
##  4 KOÇ ÜNİVERSİTESİ                     Hukuk (Tam Bu~ TM_3           558.
##  5 BOĞAZİÇİ ÜNİVERSİTESİ                Çeviribilim (~ DİL_1          543.
##  6 KOÇ ÜNİVERSİTESİ                     Matematik (İn~ MF_1           537.
##  7 KOÇ ÜNİVERSİTESİ                     Medya ve Görs~ TS_2           536.
##  8 BOĞAZİÇİ ÜNİVERSİTESİ                Fizik (İngili~ MF_2           527.
##  9 İSTANBUL MEDİPOL ÜNİVERSİTESİ        Özel Eğitim Ö~ TS_1           527.
## 10 BOĞAZİÇİ ÜNİVERSİTESİ                Yönetim Biliş~ YGS_6          519.
## 11 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Bilgisayar Te~ YGS_1          516.
## 12 ÇANAKKALE ONSEKİZ MART ÜNİVERSİTESİ  Japonca Öğret~ DİL_3          490.
## 13 ANKARA ÜNİVERSİTESİ                  Latin Dili ve~ DİL_2          485.
## 14 DOKUZ EYLÜL ÜNİVERSİTESİ             Fizyoterapi v~ YGS_2          473.
## 15 İSTANBUL GELİŞİM ÜNİVERSİTESİ        Gastronomi (T~ YGS_4          466.
## 16 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Grafik Tasarı~ TM_2           458.
## 17 OKAN ÜNİVERSİTESİ                    Spor Yönetici~ YGS_5          419.
## 18 SÜLEYMAN DEMİREL ÜNİVERSİTESİ        Ziraat Mühend~ MF             408.
## 19 GİRNE AMERİKAN ÜNİVERSİTESİ          Rekreasyon Yö~ YGS_3          315.

Question 2

Plot the top 10 programs of İSTANBUL ÜNİVERSİTESİ in terms of total quota in a bar chart.

top_10_iu <- osym_data_2017 %>% filter(university_name == "İSTANBUL ÜNİVERSİTESİ") %>% arrange(desc(general_quota)) %>% slice(1:10)
library(ggplot2)
ggplot(data=top_10_iu) + geom_bar(aes(x=reorder(program_name,-general_quota),y=general_quota),stat="identity") + theme(axis.text.x=element_text(angle=90))

Question 3

Calculate the fill rate (sum(general_placement)/sum(general_quota)) per city and return the top 10 cities.

osym_data_2017 %>%
  group_by(city) %>%
  summarise(fillrate = sum(general_placement)/sum(general_quota)) %>%
  arrange(desc(fillrate)) %>%
  slice(1:10)
## # A tibble: 10 x 2
##    city                  fillrate
##    <chr>                    <dbl>
##  1 GEBZE                    1.02 
##  2 TEKİRDAĞ                 1.01 
##  3 MANİSA                   1.01 
##  4 KOCAELİ                  1.00 
##  5 BURSA                    1.00 
##  6 SUMGAYIT - AZERBAYCAN    1    
##  7 DİYARBAKIR               0.999
##  8 DENİZLİ                  0.998
##  9 EDİRNE                   0.998
## 10 SAKARYA                  0.997

Question 4

Find full (general_placement == general_quota) Endüstri Mühendisliği programs (use grepl) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr function.).

fullEndustri <- filter(osym_data_2017, grepl("Endüstri Mühendisliği", program_name)) %>% filter(general_quota==general_placement)

ggplot(data = fullEndustri, aes(x= min_score, y= max_score)) + geom_point(color = substr(fullEndustri$program_id,1,1), alpha=0.7) + labs(x="Min Score", y="Max Score")   + labs (color = "University_type") + guides(alpha = "none") + scale_color_manual( values = c(5,2,6), labels = c("State","Foundation","KKTC"))

Question 5

Find the top 10 faculties with the highest quotas and draw a bar chart. Ignore similar names and typos in faculty names.

maxFacs <- osym_data_2017 %>% group_by(faculty_name)  %>%   summarise(sumOfQouta = sum( as.double(general_quota)))  %>%   arrange(desc(sumOfQouta)) %>% slice(1:10)
ggplot(data = maxFacs) + geom_bar(aes(x=reorder(faculty_name,-sumOfQouta), y=sumOfQouta), stat = "identity") +
  labs(x="Program Name", y="Quota") + theme(axis.text.x=element_text(angle=90))

Question 6

Find all full medicine programs (Tıp but not Tıp Mühendisliği) of foundation universities group by university calculate total quotas per university and maximum max_score and minimum min_score as bounds, ordered and colored by total quota. (Tip: Use geom_crossbar)

medicene <- osym_data_2017 %>% filter(grepl("Tıp", program_name)) %>% filter(!grepl("Mühendis",program_name))
foundationMed <- filter(medicene,substr(program_id,1,1) == 2)
foundationMedBounds <- foundationMed %>% group_by(university_name) %>% summarise(sumOfQouta = sum( as.double(general_quota)), minScore = min(min_score),  maxScore =max(max_score) )   %>%   arrange(desc(sumOfQouta))

ggplot(data=foundationMedBounds) + geom_crossbar(aes(x=reorder(university_name,-sumOfQouta) , y = ((minScore+maxScore)/2) , ymax= maxScore , ymin = minScore , color = sumOfQouta)) + labs (y = "Score Interval") + theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0.5)) + xlab ("University")

Question 7

Freestyle: Do an analysis that compares the mechanical engineering (Makine Mühendisliği) and civil engineering (İnşaat Mühendisliği) programs.

mech <- osym_data_2017 %>% filter(grepl("Makine Mühendis", program_name)) 
civil <- osym_data_2017 %>% filter(grepl("İnşaat Mühendis", program_name)) 

mechAndCivil <- osym_data_2017 %>% filter(grepl("Makine Mühendis|İnşaat Mühendis", program_name))


mechSummary <- mech %>% summarise(sumOfQouta = sum( as.double(general_quota)), minScore = min(min_score),  maxBottomScore =max(min_score), maxTopScore = max(max_score), meanScore= mean(min_score))  %>%   arrange(desc(sumOfQouta))

civilSummary <- civil %>% summarise(sumOfQouta = sum( as.double(general_quota)), minScore = min(min_score),  maxBottomScore =max(min_score), maxTopScore = max(max_score) ,  meanScore= mean(min_score))  %>%   arrange(desc(sumOfQouta))

mechSummary$programType = "mechanichal eng."
civilSummary$programType = "civil eng."

combinedSummaryTable <- rbind(mechSummary, civilSummary)
head(combinedSummaryTable)
## # A tibble: 2 x 6
##   sumOfQouta minScore maxBottomScore maxTopScore meanScore programType    
##        <dbl>    <dbl>          <dbl>       <dbl>     <dbl> <chr>          
## 1      10668      180           522.        540.      296. mechanichal en~
## 2      11858      180           480.        515.      298. civil eng.

Question 8

Freestyle: Compare Koç University with Bilkent University.

Find the mean score of each program that overlaps calculate the winner school on mean scores

kocAndBilkent = osym_data_2017 %>% filter(university_name == "KOÇ ÜNİVERSİTESİ" | university_name == "İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ")
kocAndBilkent$meanScore = (kocAndBilkent$min_score + kocAndBilkent$max_score) / 2
commonPrograms = semi_join( kocAndBilkent %>% filter(university_name == "KOÇ ÜNİVERSİTESİ") %>% select(program_name), kocAndBilkent %>% filter(university_name == "İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ") %>% select(program_name) )
## Joining, by = "program_name"
kocAndBilkent <- kocAndBilkent %>% filter(program_name %in% commonPrograms)
#this filter is no working, I don't know why

In this one I found the common programs that exist in both universities, then I triderd to filter the list with that programs. However filter did not work, I searched online and found same answer always, tried with another dataset it is working. However it does not work on this one. So I’m not able to move forward, if you know why it is not working, please let me know because I spend 3 hours on this.

Quetion 9

Freestyle: Do your best. Find the schools that which min score of the program closest to mean of that programs min score over all universities

result = osym_data_2017 %>% group_by(program_name)  %>%   slice(which.min(abs(min_score - mean(min_score)))) %>% select(university_name,program_name,exam_type,min_score) 
head(result)
## # A tibble: 6 x 4
## # Groups:   program_name [6]
##   university_name                program_name          exam_type min_score
##   <chr>                          <chr>                 <chr>         <dbl>
## 1 MEHMET AKİF ERSOY ÜNİVERSİTESİ Acil Yardım ve Afet ~ YGS_2          267.
## 2 GİRNE AMERİKAN ÜNİVERSİTESİ    Acil Yardım ve Afet ~ MF_3           180 
## 3 GİRNE AMERİKAN ÜNİVERSİTESİ    Acil Yardım ve Afet ~ MF_3           180 
## 4 GİRNE AMERİKAN ÜNİVERSİTESİ    Acil Yardım ve Afet ~ MF_3           180 
## 5 GİRNE AMERİKAN ÜNİVERSİTESİ    Acil Yardım ve Afet ~ MF_3           227.
## 6 GİRNE AMERİKAN ÜNİVERSİTESİ    Acil Yardım ve Afet ~ MF_3           180