At first, we have founded the probabilities associated with every matches by taking inverse of odds given. Then we obtained normalized values of these probabilities as a football game has 3 possibilites and their sums should be added up to 1.(We eliminated the companies’ margins)

tsl_odddata <- tsl_odddata %>% mutate(Phome_bet365=(1/HomeOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>%
  mutate(Paway_bet365=(1/AwayOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>%
  mutate(Ptie_bet365=(1/TieOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>% select(-HomeOdd_bet365,-AwayOdd_bet365,-TieOdd_bet365)
tsl_odddata <- tsl_odddata %>% mutate(Phome_betfair=(1/HomeOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>%
  mutate(Paway_betfair=(1/AwayOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>%
  mutate(Ptie_betfair=(1/TieOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>% select(-HomeOdd_betfair,-AwayOdd_betfair,-TieOdd_betfair)
tsl_odddata <- tsl_odddata %>% mutate(Phome_betsson=(1/HomeOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>%
  mutate(Paway_betsson=(1/AwayOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>%
  mutate(Ptie_betsson=(1/TieOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>% select(-HomeOdd_betsson,-AwayOdd_betsson,-TieOdd_betsson)
tsl_odddata <- tsl_odddata %>% mutate(Phome_bwin=(1/HomeOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>%
  mutate(Paway_bwin=(1/AwayOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>%
  mutate(Ptie_bwin=(1/TieOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>% select(-HomeOdd_bwin,-AwayOdd_bwin,-TieOdd_bwin)
tsl_odddata <- tsl_odddata %>% mutate(Phome_pinnacle=(1/HomeOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>%
  mutate(Paway_pinnacle=(1/AwayOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>%
  mutate(Ptie_pinnacle=(1/TieOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>% select(-HomeOdd_pinnacle,-AwayOdd_pinnacle,-TieOdd_pinnacle)

Then, we have obtained a decision tree to see our data’s trend and the distribution of the game results using our training data.

tsl_odddata <- tsl_odddata %>% mutate("difference" = Home_score - Away_score)

tsl_odddata <- tsl_odddata %>% select(-MatchId,-Home_score,-Away_score,-Weekday,-Round,-Hour,-difference, -Phome_betfair, -Paway_betfair, - Ptie_betfair) %>% filter(Season > 2013) 

  tsl_train <- tsl_odddata %>% filter(Season<2017) %>% select(-Season)
tsl_model <- rpart(Result ~., data = tsl_train)
rpart.plot(tsl_model)

At the next step, we added our test data which is the mathes of 2017 season, and obtained the predictions.

tsl_test <- tsl_odddata %>% filter(Season==2017) %>% select(-Season) %>% 
  
  filter(!grepl("goztepe|yeni-malatyaspor",Home)) %>%
  filter(!grepl("goztepe|yeni-malatyaspor",Away))

tsl_predict <- predict(tsl_model, newdata = tsl_test) 

tsl_test <- rename(tsl_test,Home_Team = Home,Away_Team= Away)

We combined our test data and predictions in a single data frame. Then the predictions of probabilities and the odds given by oddmaker companies are combined together. We prepared our data to use Kelly Criterion and to evaluate our predictions.

We have taken the avareges of Odds from each companies and mutated a single column for probabilities and a single column for bets.

Then using Kelly Criterion, our model decide whether to play on a game or not by using the index we obtained after Kelly Criterion Formula.

tsl_predict <- data.frame(tsl_predict) %>% select(Home,Tie, Away)

tsl_test <- tsl_test %>% cbind(tsl_predict)



deneme <- deneme1 %>% filter(Season == 2017) %>% filter(!grepl("goztepe|yeni-malatyaspor",Home)) %>%
  filter(!grepl("goztepe|yeni-malatyaspor",Away))%>% select(-HomeOdd_betfair,-AwayOdd_betfair,-TieOdd_betfair) %>% mutate(Avg_Home = (HomeOdd_bet365+HomeOdd_betsson+HomeOdd_bwin+HomeOdd_pinnacle)/4) %>% mutate(Avg_Tie = (TieOdd_bet365+TieOdd_betsson+TieOdd_bwin+TieOdd_pinnacle)/4)  %>% mutate(Avg_Away = (AwayOdd_bet365+AwayOdd_betsson+AwayOdd_bwin+AwayOdd_pinnacle)/4)

deneme <- deneme %>% select(Avg_Home, Avg_Tie, Avg_Away)

tsl_test <- tsl_test %>% cbind(deneme)

tsl_test <- tsl_test %>% select(Home_Team,Away_Team,Result,Home:Avg_Away)                             

tsl_test <- tsl_test %>% rename(PHome=Home, PTie = Tie, PAway =Away)

tsl_test <- tsl_test %>% mutate(XHome= (PHome*Avg_Home-1)/(Avg_Home-1)) %>% mutate(XTie= (PTie*Avg_Tie-1)/(Avg_Tie-1)) %>% mutate(XAway= (PAway*Avg_Away-1)/(Avg_Away-1))

tsl_test <- tsl_test %>% mutate(advise = ifelse(XHome >0 & XHome > XTie & XHome > XAway,"Home", ifelse(XAway > 0 & XAway > XHome & XAway > XTie,"Away", ifelse(XTie > 0 & XTie > XAway & XTie > XHome, "Tie","NO")) ))

Moreover, we calculated our model’s accuracy by comparing what Kelly Criterion suggests and what was the actual results of the games, as above :

tsl_prediction_accuracy <- tsl_test %>% filter(!grepl("NO",advise)) %>% mutate(correct_class = (Result==advise)) %>% group_by(correct_class) %>% summarise(count=n(),percentage=n()/nrow(.))

tsl_prediction_accuracy
## # A tibble: 2 x 3
##   correct_class count percentage
##   <lgl>         <int>      <dbl>
## 1 F                84      0.609
## 2 T                54      0.391

Lastly, we evaluated that, if a person plays on games with 1 Lira/ game according to our suggestions;

His expense of playing for 138 games : 138 TL His revenue of interpreting truly the results of 54 games : 133 TL

gain <- tsl_test %>% mutate(correct_class = Result==advise) %>% filter(correct_class ==TRUE) %>% mutate(final_gain = ifelse(Result== "Home",Avg_Home,ifelse(Result=="Away",Avg_Away,ifelse(Result=="Tie",Avg_Tie,0)) )) %>% summarise(sum(final_gain))

gain
##   sum(final_gain)
## 1        133.5889