load("C:\\Tool\\osym_data_2017_v2.RData")
osym_data_2017 <-
osym_data_2017 %>%
mutate(general_quota = as.numeric(general_quota), general_placement = as.numeric(general_placement))
head(osym_data_2017)
## # A tibble: 6 x 14
## program_id university_name city faculty_name program_name exam_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 100110266 ABANT İZZET BAY~ BOLU Bolu Sağlık Y~ Hemşirelik YGS_2
## 2 100110487 ABANT Ä°ZZET BAY~ BOLU Bolu Turizm Ä°~ Gastronomi v~ YGS_4
## 3 100110724 ABANT Ä°ZZET BAY~ BOLU Bolu Turizm Ä°~ Turizm Ä°ÅŸlet~ YGS_6
## 4 100130252 ABANT Ä°ZZET BAY~ BOLU Bolu Turizm Ä°~ Turizm Ä°ÅŸlet~ YGS_6
## 5 100110433 ABANT Ä°ZZET BAY~ BOLU DiÅŸ HekimliÄŸi~ DiÅŸ HekimliÄŸi MF_3
## 6 100110609 ABANT Ä°ZZET BAY~ BOLU DiÅŸ HekimliÄŸi~ DiÅŸ HekimliÄŸ~ MF_3
## # ... with 8 more variables: general_quota <dbl>, general_placement <dbl>,
## # min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## # val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
- Get the highest
max_score
programs from each exam_type
.
osym_data_2017%>%
group_by(exam_type)%>%
arrange(desc(max_score))%>%
slice(1)
## # A tibble: 19 x 14
## # Groups: exam_type [19]
## program_id university_name city faculty_name program_name exam_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 102210108 BOĞAZİÇİ ÜNİVER~ İSTAN~ Fen - Edebi~ Çeviribilim ~ DİL_1
## 2 101110245 ANKARA ÃœNÄ°VERSÄ°~ ANKARA Dil ve Tari~ Latin Dili v~ DÄ°L_2
## 3 102710051 ÇANAKKALE ONSEK~ ÇANAK~ Eğitim Fakü~ Japonca Öğre~ DİL_3
## 4 109210459 SÜLEYMAN DEMİRE~ ISPAR~ Ziraat Fakü~ Ziraat Mühen~ MF
## 5 203910115 KOÇ ÜNİVERSİTESİ İSTAN~ Fen Fakülte~ Matematik (İ~ MF_1
## 6 102210126 BOĞAZİÇİ ÜNİVER~ İSTAN~ Fen - Edebi~ Fizik (İngil~ MF_2
## 7 105610025 Ä°STANBUL ÃœNÄ°VER~ Ä°STAN~ CerrahpaÅŸa ~ CerrahpaÅŸa T~ MF_3
## 8 102210286 BOĞAZİÇİ ÜNİVER~ İSTAN~ Mühendislik~ Elektrik - E~ MF_4
## 9 102210214 BOĞAZİÇİ ÜNİVER~ İSTAN~ İktisadi ve~ İktisat (İng~ TM_1
## 10 202111142 İHSAN DOĞRAMACI~ ANKARA Güzel Sanat~ Grafik Tasar~ TM_2
## 11 203910796 KOÇ ÜNİVERSİTESİ İSTAN~ Hukuk Fakül~ Hukuk (Tam B~ TM_3
## 12 203111236 İSTANBUL MEDİPO~ İSTAN~ Eğitim Fakü~ Özel Eğitim ~ TS_1
## 13 203910724 KOÇ ÜNİVERSİTESİ İSTAN~ İnsani Bili~ Medya ve Gör~ TS_2
## 14 202111009 İHSAN DOĞRAMACI~ ANKARA Uygulamalı ~ Bilgisayar T~ YGS_1
## 15 103110645 DOKUZ EYLÃœL ÃœNÄ°~ Ä°ZMÄ°R Fizik Tedav~ Fizyoterapi ~ YGS_2
## 16 300211897 GİRNE AMERİKAN ~ KKTC ~ Spor Yüksek~ Rekreasyon Y~ YGS_3
## 17 202711287 İSTANBUL GELİŞİ~ İSTAN~ Uygulamalı ~ Gastronomi (~ YGS_4
## 18 204711809 OKAN ÜNİVERSİTE~ İSTAN~ Uygulamalı ~ Spor Yönetic~ YGS_5
## 19 102210383 BOĞAZİÇİ ÜNİVER~ İSTAN~ Uygulamalı ~ Yönetim Bili~ YGS_6
## # ... with 8 more variables: general_quota <dbl>, general_placement <dbl>,
## # min_score <dbl>, max_score <dbl>, val_quota <dbl>,
## # val_placement <dbl>, val_min_score <dbl>, val_max_score <dbl>
- Plot the top 10 programs of Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ° in terms of total quota in a bar chart.
osym_data_2017%>%
filter(university_name=='Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ°')%>%
arrange(desc(max_score))%>%
slice(1:10)%>%
ggplot()+geom_bar(aes(x=reorder(program_name,-general_quota),y=general_quota),stat="Identity") + theme(axis.text.x=element_text(angle=90))
- Calculate the fill rate (sum(
general_placement
)/sum(general_quota
)) per city and return the top 10 cities.
osym_data_2017%>%
group_by(city)%>%
summarise(t=(sum(`general_placement`)/sum(`general_quota`)))%>%
arrange(desc(t))%>%
slice(1:10)
## # A tibble: 10 x 2
## city t
## <chr> <dbl>
## 1 GEBZE 1.02
## 2 TEKÄ°RDAÄž 1.01
## 3 MANÄ°SA 1.01
## 4 KOCAELÄ° 1.00
## 5 BURSA 1.00
## 6 SUMGAYIT - AZERBAYCAN 1.00
## 7 DÄ°YARBAKIR 0.999
## 8 DENÄ°ZLÄ° 0.998
## 9 EDÄ°RNE 0.998
## 10 SAKARYA 0.997
- Find full (
general_placement
== general_quota
) Endüstri Mühendisliği programs (use grepl
) and draw a scatterplot of min_score vs max_score. Set transparency parameter (alpha
) to 0.7. Set program colors according to whether it is a foundation university or state university. (Tip: State university programs ids start with 1, foundation 2, KKTC 3 and other abroad 4. You can use substr
function.).
osym_data_2017%>%
mutate(prg=ifelse(substr(program_id,1,1)==1,"STATE",ifelse(substr(program_id,1,1)==2,"FOUNDATION","OTHER")))%>%
filter("Endüstri Mühendisliği"==substr(program_name,1,21))%>%
ggplot()+geom_point(aes(x=min_score,y=max_score,color=prg),alpha=0.7)
- Find the top 10 faculties with the highest quotas and draw a bar chart. Ignore similar names and typos in faculty names.
osym_data_2017%>% select(program_name,faculty_name,general_quota)%>%group_by(faculty_name)%>%
summarise(tplm=sum(general_quota))%>% arrange(desc(tplm))%>%slice(1:10)%>%
ggplot()+geom_bar(aes(x=reorder(faculty_name,-tplm),y=tplm),stat="identity")+theme(axis.text.x=element_text(angle=90))
- Find all full medicine programs (Tıp but not Tıp Mühendisliği) of foundation universities group by university calculate total quotas per university and maximum max_score and minimum min_score as bounds, ordered and colored by total quota. (Tip: Use
geom_crossbar
)
osym_data_2017%>%
filter("Tıp"==substr(program_name,1,3)&"Tıp Fakültesi"==substr(faculty_name,1,13)&substr(program_id,1,1)==2)%>%
group_by(university_name)%>%
summarise(avg=mean(general_quota),sum5=sum(general_quota),max5=max(max_score),min5=min(min_score),avg5=(min(min_score)/2+max(max_score)/2))%>%
ggplot(aes(reorder(x=university_name,-sum5),y=avg5)) + geom_crossbar(aes(ymin=min5,ymax=max5,color=sum5),stat = "identity")+
theme(axis.text.x=element_text(angle=90))