load("~/ETM58D/osym_data_2017_v2.RData")
head(osym_data_2017)
## # A tibble: 6 x 14
## program_id university_name city faculty_name program_name exam_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 100110266 ABANT İZZET BAY~ BOLU Bolu Sağlık Y~ Hemşirelik YGS_2
## 2 100110487 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Gastronomi v~ YGS_4
## 3 100110724 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Turizm İşlet~ YGS_6
## 4 100130252 ABANT İZZET BAY~ BOLU Bolu Turizm İ~ Turizm İşlet~ YGS_6
## 5 100110433 ABANT İZZET BAY~ BOLU Diş Hekimliği~ Diş Hekimliği MF_3
## 6 100110609 ABANT İZZET BAY~ BOLU Diş Hekimliği~ Diş Hekimliğ~ MF_3
## # ... with 8 more variables: general_quota <chr>, general_placement <chr>,
## # min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## # val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
osym_data_2017 <- osym_data_2017 %>% mutate(general_quota = as.numeric(general_quota), general_placement = as.numeric(general_placement))
Get the highest max_score
programs from each exam_type
.
osym_data_2017 %>% group_by(exam_type) %>% filter(max_score == max(max_score)) %>% select(university_name,program_name,exam_type,max_score) %>% arrange(desc(max_score))
## # A tibble: 19 x 4
## # Groups: exam_type [19]
## university_name program_name exam_type max_score
## <chr> <chr> <chr> <dbl>
## 1 BOĞAZİÇİ ÜNİVERSİTESİ Elektrik - El~ MF_4 575.
## 2 İSTANBUL ÜNİVERSİTESİ Cerrahpaşa Tı~ MF_3 564.
## 3 BOĞAZİÇİ ÜNİVERSİTESİ İktisat (İngi~ TM_1 561.
## 4 KOÇ ÜNİVERSİTESİ Hukuk (Tam Bu~ TM_3 558.
## 5 BOĞAZİÇİ ÜNİVERSİTESİ Çeviribilim (~ DİL_1 543.
## 6 KOÇ ÜNİVERSİTESİ Matematik (İn~ MF_1 537.
## 7 KOÇ ÜNİVERSİTESİ Medya ve Görs~ TS_2 536.
## 8 BOĞAZİÇİ ÜNİVERSİTESİ Fizik (İngili~ MF_2 527.
## 9 İSTANBUL MEDİPOL ÜNİVERSİTESİ Özel Eğitim Ö~ TS_1 527.
## 10 BOĞAZİÇİ ÜNİVERSİTESİ Yönetim Biliş~ YGS_6 519.
## 11 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Bilgisayar Te~ YGS_1 516.
## 12 ÇANAKKALE ONSEKİZ MART ÜNİVERSİTESİ Japonca Öğret~ DİL_3 490.
## 13 ANKARA ÜNİVERSİTESİ Latin Dili ve~ DİL_2 485.
## 14 DOKUZ EYLÜL ÜNİVERSİTESİ Fizyoterapi v~ YGS_2 473.
## 15 İSTANBUL GELİŞİM ÜNİVERSİTESİ Gastronomi (T~ YGS_4 466.
## 16 İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ Grafik Tasarı~ TM_2 458.
## 17 OKAN ÜNİVERSİTESİ Spor Yönetici~ YGS_5 419.
## 18 SÜLEYMAN DEMİREL ÜNİVERSİTESİ Ziraat Mühend~ MF 408.
## 19 GİRNE AMERİKAN ÜNİVERSİTESİ Rekreasyon Yö~ YGS_3 315.
Plot the top 10 programs of İSTANBUL ÜNİVERSİTESİ in terms of total quota in a bar chart.
top_10_iu <- osym_data_2017 %>% filter(university_name == "İSTANBUL ÜNİVERSİTESİ") %>% arrange(desc(general_quota)) %>% slice(1:10)
library(ggplot2)
ggplot(data=top_10_iu) + geom_bar(aes(x=reorder(program_name,-general_quota),y=general_quota),stat="identity") + theme(axis.text.x=element_text(angle=90))
Calculate the fill rate (sum(general_placement
)/sum(general_quota
)) per city and return the top 10 cities.
osym_data_2017 %>%
group_by(city) %>%
summarise(fillrate = sum(general_placement)/sum(general_quota)) %>%
arrange(desc(fillrate)) %>%
slice(1:10)
## # A tibble: 10 x 2
## city fillrate
## <chr> <dbl>
## 1 GEBZE 1.02
## 2 TEKİRDAĞ 1.01
## 3 MANİSA 1.01
## 4 KOCAELİ 1.00
## 5 BURSA 1.00
## 6 SUMGAYIT - AZERBAYCAN 1
## 7 DİYARBAKIR 0.999
## 8 DENİZLİ 0.998
## 9 EDİRNE 0.998
## 10 SAKARYA 0.997
Find full (general_placement
== general_quota
) Endüstri Mühendisliği programs (use grepl
) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha
) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr
function.).
fullEndustri <- filter(osym_data_2017, grepl("Endüstri Mühendisliği", program_name)) %>% filter(general_quota==general_placement)
ggplot(data = fullEndustri, aes(x= min_score, y= max_score)) + geom_point(color = substr(fullEndustri$program_id,1,1), alpha=0.7) + labs(x="Min Score", y="Max Score") + labs (color = "University_type") + guides(alpha = "none") + scale_color_manual( values = c(5,2,6), labels = c("State","Foundation","KKTC"))
Find the top 10 faculties with the highest quotas and draw a bar chart. Ignore similar names and typos in faculty names.
maxFacs <- osym_data_2017 %>% group_by(faculty_name) %>% summarise(sumOfQouta = sum( as.double(general_quota))) %>% arrange(desc(sumOfQouta)) %>% slice(1:10)
ggplot(data = maxFacs) + geom_bar(aes(x=reorder(faculty_name,-sumOfQouta), y=sumOfQouta), stat = "identity") +
labs(x="Program Name", y="Quota") + theme(axis.text.x=element_text(angle=90))
Find all full medicine programs (Tıp but not Tıp Mühendisliği) of foundation universities group by university calculate total quotas per university and maximum max_score and minimum min_score as bounds, ordered and colored by total quota. (Tip: Use geom_crossbar
)
medicene <- osym_data_2017 %>% filter(grepl("Tıp", program_name)) %>% filter(!grepl("Mühendis",program_name))
foundationMed <- filter(medicene,substr(program_id,1,1) == 2)
foundationMedBounds <- foundationMed %>% group_by(university_name) %>% summarise(sumOfQouta = sum( as.double(general_quota)), minScore = min(min_score), maxScore =max(max_score) ) %>% arrange(desc(sumOfQouta))
ggplot(data=foundationMedBounds) + geom_crossbar(aes(x=reorder(university_name,-sumOfQouta) , y = ((minScore+maxScore)/2) , ymax= maxScore , ymin = minScore , color = sumOfQouta)) + labs (y = "Score Interval") + theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0.5)) + xlab ("University")
Freestyle: Do an analysis that compares the mechanical engineering (Makine Mühendisliği) and civil engineering (İnşaat Mühendisliği) programs.
mech <- osym_data_2017 %>% filter(grepl("Makine Mühendis", program_name))
civil <- osym_data_2017 %>% filter(grepl("İnşaat Mühendis", program_name))
mechAndCivil <- osym_data_2017 %>% filter(grepl("Makine Mühendis|İnşaat Mühendis", program_name))
mechSummary <- mech %>% summarise(sumOfQouta = sum( as.double(general_quota)), minScore = min(min_score), maxBottomScore =max(min_score), maxTopScore = max(max_score), meanScore= mean(min_score)) %>% arrange(desc(sumOfQouta))
civilSummary <- civil %>% summarise(sumOfQouta = sum( as.double(general_quota)), minScore = min(min_score), maxBottomScore =max(min_score), maxTopScore = max(max_score) , meanScore= mean(min_score)) %>% arrange(desc(sumOfQouta))
mechSummary$programType = "mechanichal eng."
civilSummary$programType = "civil eng."
combinedSummaryTable <- rbind(mechSummary, civilSummary)
head(combinedSummaryTable)
## # A tibble: 2 x 6
## sumOfQouta minScore maxBottomScore maxTopScore meanScore programType
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 10668 180 522. 540. 296. mechanichal en~
## 2 11858 180 480. 515. 298. civil eng.
Freestyle: Compare Koç University with Bilkent University.
Find the mean score of each program that overlaps calculate the winner school on mean scores
kocAndBilkent = osym_data_2017 %>% filter(university_name == "KOÇ ÜNİVERSİTESİ" | university_name == "İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ")
kocAndBilkent$meanScore = (kocAndBilkent$min_score + kocAndBilkent$max_score) / 2
commonPrograms = semi_join( kocAndBilkent %>% filter(university_name == "KOÇ ÜNİVERSİTESİ") %>% select(program_name), kocAndBilkent %>% filter(university_name == "İHSAN DOĞRAMACI BİLKENT ÜNİVERSİTESİ") %>% select(program_name) )
## Joining, by = "program_name"
kocAndBilkent <- kocAndBilkent %>% filter(program_name %in% commonPrograms)
#this filter is no working, I don't know why
In this one I found the common programs that exist in both universities, then I triderd to filter the list with that programs. However filter did not work, I searched online and found same answer always, tried with another dataset it is working. However it does not work on this one. So I’m not able to move forward, if you know why it is not working, please let me know because I spend 3 hours on this.
Freestyle: Do your best. Find the schools that which min score of the program closest to mean of that programs min score over all universities
result = osym_data_2017 %>% group_by(program_name) %>% slice(which.min(abs(min_score - mean(min_score)))) %>% select(university_name,program_name,exam_type,min_score)
head(result)
## # A tibble: 6 x 4
## # Groups: program_name [6]
## university_name program_name exam_type min_score
## <chr> <chr> <chr> <dbl>
## 1 MEHMET AKİF ERSOY ÜNİVERSİTESİ Acil Yardım ve Afet ~ YGS_2 267.
## 2 GİRNE AMERİKAN ÜNİVERSİTESİ Acil Yardım ve Afet ~ MF_3 180
## 3 GİRNE AMERİKAN ÜNİVERSİTESİ Acil Yardım ve Afet ~ MF_3 180
## 4 GİRNE AMERİKAN ÜNİVERSİTESİ Acil Yardım ve Afet ~ MF_3 180
## 5 GİRNE AMERİKAN ÜNİVERSİTESİ Acil Yardım ve Afet ~ MF_3 227.
## 6 GİRNE AMERİKAN ÜNİVERSİTESİ Acil Yardım ve Afet ~ MF_3 180