library(ggplot2)
library(dplyr)
setwd("C:\\Tool")
data1 <- read.table(file="tr_super_league_matches.csv", header=T, sep=",")
dataframe2 <- as.data.frame(data1)
dataodd1 <- read.table(file="tr_super_league_odd_details.csv", header=T, sep=",")
dfodd1 <- as.data.frame(dataodd1)
plot1 <- qplot(dataframe2$Home_Score,
geom="histogram",
binwidth=0.5,
main="Home Goals",
xlab="Home Goals",
ylab="Number of Games",
fill=I("red"),
col=I("black"),
alpha=I(0.2))
print(plot1)
plot2 <- qplot(dataframe2$Away_Score,
geom="histogram",
binwidth=0.5,
main="Away Goals",
xlab="Away Goals",
ylab="Number of Games",
fill=I("blue"),
col=I("black"),
alpha=I(0.2))
df2 <- dataframe2 %>%
mutate(ScoreDiff = Home_Score-Away_Score)
print(plot2)
plot3 <- qplot(df2$ScoreDiff,
geom="histogram",
binwidth=0.5,
main="Home-Away Goals",
xlab="Home-Away Goals",
ylab="Number of Games",
fill=I("red"),
col=I("black"),
alpha=I(0.2))
dfodd2 <- dfodd1 %>%
mutate(Phomewin = 1/HomeOdd, Pawaywin = 1/AwayOdd, Ptie = 1/TieOdd)
plot(plot3)
Task 1 Question 2
Score_diff <- data.frame(df2$ScoreDiff)
plot8 <- ggplot(Score_diff , aes(x= df2$ScoreDiff), )+
geom_histogram(binwidth =1 , color = "yellow" ,aes(y= ..density..) , fill = "blue") +
stat_function(fun = dnorm, args = list(mean = 0.33 , sd = 1.75 ), geom = "line" ) +
xlab (label = "Score Difference" )
print(plot8)
Score Difference graph is verry similar to standart normal distribution.
dfodd3 <- dfodd2 %>%
mutate(Phomewinnormal = (Phomewin/(Phomewin+Pawaywin+Ptie)), Pawaywinnormal= (Pawaywin/(Phomewin+Pawaywin+Ptie)),
Ptienormal = (Ptie/(Phomewin+Pawaywin+Ptie)))
dfoddbetsson <- dfodd3 %>% filter(Bookmaker == "Betsson") %>% mutate(homeaway= Phomewin-Pawaywin, homeawaynorm= Phomewinnormal-Pawaywinnormal)
dfoddpinnacle <- dfodd3 %>% filter(Bookmaker == "Pinnacle") %>% mutate(homeaway= Phomewin-Pawaywin, homeawaynorm= Phomewinnormal-Pawaywinnormal)
dfoddbet365 <- dfodd3 %>% filter(Bookmaker == "bet365") %>% mutate(homeaway= Phomewin-Pawaywin, homeawaynorm= Phomewinnormal-Pawaywinnormal)
dfoddbwin <- dfodd3 %>% filter(Bookmaker == "bwin") %>% mutate(homeaway= Phomewin-Pawaywin, homeawaynorm= Phomewinnormal-Pawaywinnormal)
plot4 <- ggplot(dfoddbetsson, aes(x=homeaway) )+
geom_line(aes(y= Ptie,colour="Bookmaker value"))+
geom_line(aes(y=Ptienormal, colour="Normalized Value"))+
ylab(label= "Probability of a draw")+
xlab(label= "Probability of home team win - probability of away team win")+
ggtitle("Betsson")+
scale_color_manual(values=c("blue","red"))
print(plot4)
plot5 <- ggplot(dfoddpinnacle, aes(x=homeaway) )+
geom_line(aes(y= Ptie, colour="Bookmaker value"))+
geom_line(aes(y=Ptienormal, colour="Normalized Value"))+
ylab(label= "Probability of a draw")+
xlab(label= "Probability of home team win - probability of away team win")+
ggtitle("Pinnacle")+
scale_color_manual(values=c("blue","red"))
print(plot5)
plot6 <- ggplot(dfoddbet365, aes(x=homeaway) )+
geom_line(aes(y= Ptie, colour="Bookmaker value"))+
geom_line(aes(y=Ptienormal, colour="Normalized Value"))+
ylab(label= "Probability of a draw")+
xlab(label= "Probability of home team win - probability of away team win")+
ggtitle("Bet365")+
scale_color_manual(values=c("blue","red"))
print(plot6)
plot7 <- ggplot(dfoddbwin, aes(x=homeaway), legend="TRUE" )+
geom_line(aes(y= Ptie, colour="Bookmaker value"))+
geom_line(aes(y=Ptienormal, colour="Normalized Value"))+
ylab(label= "Probability of a draw")+
xlab(label= "Probability of home team win - probability of away team win")+
ggtitle("Bwin")+
scale_color_manual(values=c("blue","red"))
print(plot7)
As we see from the graphs, except Pinnacle all the bookmakers are adding significant margin to odds. This is consisted with if we are to bet constantly we would loose in the long term. We could thinnk that Pinnacle is a small company. It does lowers its margin to gain competitve advantage against other 3 competitors.
Website popularity is partially consistent with our theory. However, the popularity of Betsson is less than Pinnacle. Although Pinnacle gains lower margin, it has almost same popularity comparing to Betsson.