Task 1 Q1

tr_super_league_matches <- read_csv("C:/Users/Sezgin/Desktop/tr_super_league_matches.csv")
hist(Home_Score, main= "Home Score Distribution",xlab= "Home Score",ylab= "Number of Games", col="blue")

hist(Away_Score, main= "Away Score Distribution", xlab= "Away Score",ylab= "Number of Games", col="green")

hist(Home_Score - Away_Score, main= "Home Score - Away Score Distribution", xlab= "Home Score - Away Score", ylab= "Number of Games", col="red")

Task 1 Q2

What distribution do you think home and away goals are coming from? Calculate the expected number of games corresponding to each quantile (number of goals) by using sample means as distribution mean and plot these values on the histogram.

Answer: In accordance with the score difference distribution histogram, it can be seen that the distribution comes from standard normal distribution. The histogram also states that the score difference mostly 0, meaning that most games end as a draw, also could be indicating that the mean value is 0 . Games ending as a draw has the highest probability as a result of the number of games. The second quantile with the highest “probability” is +1 goal difference meaning that Home teams score 1 more goal than Away teams.

Task 2

tr_super_league_odd_details <- read_csv("C:/Users/Sezgin/Desktop/tr_super_league_odd_details.csv")
#Calculation of P(home win), P(tie) and P(away win) with formula P(x)=1/odd 
probabilities <- tr_super_league_odd_details %>% group_by(matchid, Bookmaker) %>% summarise(Phomewin= 1/HomeOdd, Pawaywin = 1/AwayOdd, Ptie= 1/TieOdd) %>% ungroup()
#Calculation of the probabilities via normalization approach
probabilities_norm <- tr_super_league_odd_details %>% group_by(matchid, Bookmaker) %>% summarise(Pnorhomewin= (1/HomeOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd))), Pnorawaywin= (1/AwayOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd))), Pnortie= (1/TieOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd)))) %>% ungroup()
#Plotting P(home win) - P(away win) vs. P(tie) utilizing both probability calculation approach
calculation1 <- probabilities %>% mutate(diff1= Phomewin - Pawaywin)
calculation2 <- probabilities_norm %>% mutate(diff2= Pnorhomewin - Pnorawaywin)
ggplot(data= calculation1, aes(x=diff1, y=Ptie)) + geom_point(stat= "identity") + labs(x= "P(home win) - P(away win)", y= "P(tie)", title= "P(home win) - P(away win) vs. P(tie) with First Prabability Calculation Approach") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5, size = 12))

ggplot(data= calculation2, aes(x=diff2, y=Pnortie)) + geom_point(stat= "identity") + labs(x= "P(home win) - P(away win)", y= "P(tie)", title= "P(home win) - P(away win) vs. P(tie) with Second Prabability Calculation Approach") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5, size = 12))

#Plotting for each book marker
bookmarker1 <- probabilities %>% filter(Bookmaker == "Betsson") %>% mutate(diff3 = Phomewin - Pawaywin)
ggplot(data= bookmarker1, aes(x=diff3, y=Ptie)) + geom_point(stat= "identity") + labs(x= "P(home win) - P(away win)", y= "P(tie)", title= "Betsson") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5, size = 12))

bookmarker2 <- probabilities %>% filter(Bookmaker == "Pinnacle") %>% mutate(diff4 = Phomewin - Pawaywin)
ggplot(data= bookmarker2, aes(x=diff4, y=Ptie)) + geom_point(stat= "identity") + labs(x= "P(home win) - P(away win)", y= "P(tie)", title= "Pinnacle") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5, size = 12))

bookmarker3 <- probabilities %>% filter(Bookmaker == "bet365") %>% mutate(diff5 = Phomewin - Pawaywin)
ggplot(data= bookmarker3, aes(x=diff5, y=Ptie)) + geom_point(stat= "identity") + labs(x= "P(home win) - P(away win)", y= "P(tie)", title= "bet365") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5, size = 12))

bookmarker4 <- probabilities %>% filter(Bookmaker == "bwin") %>% mutate(diff6 = Phomewin - Pawaywin)
ggplot(data= bookmarker4, aes(x=diff6, y=Ptie)) + geom_point(stat= "identity") + labs(x= "P(home win) - P(away win)", y= "P(tie)", title= "bwin") + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5, size = 12))