At first, we converted the bookmarkers’ ratios to probabilities for each result.

tsl_odddata <- read_excel("tsl_odddata.xlsx")
tsl_odddata <- tsl_odddata %>% mutate(Phome_bet365=(1/HomeOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>%
  mutate(Paway_bet365=(1/AwayOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>%
  mutate(Ptie_bet365=(1/TieOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>% select(-HomeOdd_bet365,-AwayOdd_bet365,-TieOdd_bet365)
tsl_odddata <- tsl_odddata %>% mutate(Phome_betfair=(1/HomeOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>%
  mutate(Paway_betfair=(1/AwayOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>%
  mutate(Ptie_betfair=(1/TieOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>% select(-HomeOdd_betfair,-AwayOdd_betfair,-TieOdd_betfair)
tsl_odddata <- tsl_odddata %>% mutate(Phome_betsson=(1/HomeOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>%
  mutate(Paway_betsson=(1/AwayOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>%
  mutate(Ptie_betsson=(1/TieOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>% select(-HomeOdd_betsson,-AwayOdd_betsson,-TieOdd_betsson)
tsl_odddata <- tsl_odddata %>% mutate(Phome_bwin=(1/HomeOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>%
  mutate(Paway_bwin=(1/AwayOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>%
  mutate(Ptie_bwin=(1/TieOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>% select(-HomeOdd_bwin,-AwayOdd_bwin,-TieOdd_bwin)
tsl_odddata <- tsl_odddata %>% mutate(Phome_pinnacle=(1/HomeOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>%
  mutate(Paway_pinnacle=(1/AwayOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>%
  mutate(Ptie_pinnacle=(1/TieOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>% select(-HomeOdd_pinnacle,-AwayOdd_pinnacle,-TieOdd_pinnacle)

We selected 2017 season as the test data and last 3 season before as the training data.

tsl_odddata <- tsl_odddata %>% select(-MatchId,-Home_score,-Away_score,-Weekday,-Round,-Hour,-Phome_betfair,-Paway_betfair,-Ptie_betfair) %>% filter(Season>2013)
tsl_odddata
## # A tibble: 1,098 x 16
##    Home      Away      Result Season Phome_bet365 Paway_bet365 Ptie_bet365
##    <chr>     <chr>     <chr>   <dbl>        <dbl>        <dbl>       <dbl>
##  1 besiktas  galatasa… Away     2014        0.404        0.310       0.286
##  2 gaziante… balikesi… Home     2014        0.531        0.212       0.257
##  3 besiktas  eskisehi… Tie      2014        0.570        0.175       0.255
##  4 gaziante… akhisar-… Home     2014        0.396        0.317       0.287
##  5 basakseh… eskisehi… Tie      2014        0.518        0.221       0.261
##  6 genclerb… besiktas  Away     2014        0.260        0.459       0.281
##  7 genclerb… kayseri-… Tie      2014        0.442        0.276       0.282
##  8 gaziante… kardemir… Home     2014        0.373        0.342       0.284
##  9 kardemir… basakseh… Tie      2014        0.452        0.268       0.280
## 10 genclerb… akhisar-… Tie      2014        0.452        0.268       0.280
## # ... with 1,088 more rows, and 9 more variables: Phome_betsson <dbl>,
## #   Paway_betsson <dbl>, Ptie_betsson <dbl>, Phome_bwin <dbl>,
## #   Paway_bwin <dbl>, Ptie_bwin <dbl>, Phome_pinnacle <dbl>,
## #   Paway_pinnacle <dbl>, Ptie_pinnacle <dbl>
tsl_train <- tsl_odddata %>% filter(Season<2017) %>% select(-Season)
tsl_model <- rpart(Result ~., data = tsl_train)
rpart.plot(tsl_model)

The graph shows that 45% of the matches were resulted with home win from 2014 to 2016. The main classification is the home bet ratio of betsson.

Than, we removed the data of new teams in 2017 season from test data as they have no meanings in training data. After that we applied the training model to the test data and predicted the probability of results.

tsl_test <- tsl_odddata %>% filter(Season==2017) %>% select(-Season) %>% 
  filter(!grepl("goztepe|yeni-malatyaspor",Home)) %>%
  filter(!grepl("goztepe|yeni-malatyaspor",Away))
tsl_predict <- predict(tsl_model, newdata = tsl_test)
head(tsl_predict)
##        Away      Home       Tie
## 1 0.1234257 0.6397985 0.2367758
## 2 0.1234257 0.6397985 0.2367758
## 3 0.2689076 0.5126050 0.2184874
## 4 0.5384615 0.2508361 0.2107023
## 5 0.2689076 0.5126050 0.2184874
## 6 0.2758621 0.1206897 0.6034483

We use Kelly Criteria method to decide which match should we play according to model. Therefore, we calculated the average bookmarkers’ ratios and kelly fractions of each match.

tsl_test <- tsl_test %>% rename(.,Home_team=Home,Away_team=Away)
tsl_predicted <- tsl_test %>% cbind(tsl_predict) %>% tbl_df
tsl_rev <- read_excel("tsl_odddata.xlsx") %>% filter(Season==2017)%>% 
  filter(!grepl("goztepe|yeni-malatyaspor",Home)) %>%
  filter(!grepl("goztepe|yeni-malatyaspor",Away)) %>% 
  mutate(Bhome_avg = (HomeOdd_bet365+HomeOdd_betsson+HomeOdd_bwin+HomeOdd_pinnacle)/4) %>%
  mutate(Baway_avg = (AwayOdd_bet365+AwayOdd_betsson+AwayOdd_bwin+AwayOdd_pinnacle)/4) %>%
  mutate(Btie_avg = (TieOdd_bet365+TieOdd_betsson+TieOdd_bwin+TieOdd_pinnacle)/4) %>%
  select(Bhome_avg,Btie_avg,Baway_avg)
tsl_predicted <- tsl_predicted %>% cbind(tsl_rev) %>% select(Home_team,Away_team,Bhome_avg,Btie_avg,Baway_avg,Home,Tie,Away,Result)
tsl_predicted <- tsl_predicted %>% rename(.,Phome=Home,Ptie=Tie,Paway=Away)
tsl_predicted <- tsl_predicted %>% mutate(Xhome = ((Phome * Bhome_avg)-1)/(Bhome_avg-1)) %>%
  mutate(Xtie = ((Ptie * Btie_avg)-1)/(Btie_avg-1)) %>%
  mutate(Xaway = ((Paway * Baway_avg)-1)/(Baway_avg-1))
head(tsl_predicted)
##          Home_team        Away_team Bhome_avg Btie_avg Baway_avg     Phome
## 1   genclerbirligi kardemir-karabuk  1.978750 3.355000  3.896250 0.6397985
## 2         besiktas        kasimpasa  1.272526 5.870733  9.607017 0.6397985
## 3      kayserispor        bursaspor  2.145000 3.417500  3.331250 0.5126050
## 4        sivasspor      galatasaray  3.771828 3.670594  1.913446 0.2508361
## 5        kasimpasa        bursaspor  2.411250 3.493750  2.761250 0.5126050
## 6 kardemir-karabuk        kasimpasa  2.253889 3.413310  3.084533 0.1206897
##        Ptie     Paway Result       Xhome        Xtie       Xaway
## 1 0.2367758 0.1234257    Tie  0.27177651 -0.08731088 -0.17923268
## 2 0.2367758 0.1234257   Home -0.68191567  0.08007987  0.02158154
## 3 0.2184874 0.2689076   Home  0.08693259 -0.10478566 -0.04469777
## 4 0.2107023 0.5384615   Home -0.01944178 -0.08484900  0.03318979
## 5 0.2184874 0.2689076    Tie  0.16724103 -0.09490112 -0.14619105
## 6 0.6034483 0.2758621   Away -0.58057690  0.43912970 -0.07152410

We recommended the bet in “Advice” column as a result of kelly fraction.

tsl_predicted <- tsl_predicted %>% mutate(Advice = ifelse(Xhome>0&Xhome>Xaway&Xhome>Xtie,"Home",
                                         ifelse(Xtie>0&Xtie>Xhome&Xtie>Xaway,"Tie",
                                                ifelse(Xaway>0&Xaway>Xhome&Xaway>Xtie,"Away","Do not bet"))))
head(tsl_predicted)
##          Home_team        Away_team Bhome_avg Btie_avg Baway_avg     Phome
## 1   genclerbirligi kardemir-karabuk  1.978750 3.355000  3.896250 0.6397985
## 2         besiktas        kasimpasa  1.272526 5.870733  9.607017 0.6397985
## 3      kayserispor        bursaspor  2.145000 3.417500  3.331250 0.5126050
## 4        sivasspor      galatasaray  3.771828 3.670594  1.913446 0.2508361
## 5        kasimpasa        bursaspor  2.411250 3.493750  2.761250 0.5126050
## 6 kardemir-karabuk        kasimpasa  2.253889 3.413310  3.084533 0.1206897
##        Ptie     Paway Result       Xhome        Xtie       Xaway Advice
## 1 0.2367758 0.1234257    Tie  0.27177651 -0.08731088 -0.17923268   Home
## 2 0.2367758 0.1234257   Home -0.68191567  0.08007987  0.02158154    Tie
## 3 0.2184874 0.2689076   Home  0.08693259 -0.10478566 -0.04469777   Home
## 4 0.2107023 0.5384615   Home -0.01944178 -0.08484900  0.03318979   Away
## 5 0.2184874 0.2689076    Tie  0.16724103 -0.09490112 -0.14619105   Home
## 6 0.6034483 0.2758621   Away -0.58057690  0.43912970 -0.07152410    Tie

The accuracy of our model is about 40% in 138 matches.

tsl_predicted_accuracy <- tsl_predicted %>% filter(!grepl("Do not bet",Advice)) %>%
  mutate(correct_class = (Result == Advice)) %>%
  group_by(correct_class) %>%
  summarise(count=n(),percentage=n()/nrow(.))
tsl_predicted_accuracy
## # A tibble: 2 x 3
##   correct_class count percentage
##   <lgl>         <int>      <dbl>
## 1 FALSE            84      0.609
## 2 TRUE             54      0.391

The total gain was calculated with the assumption of playing all matches with one unit. As a result, if we bet 138 units in total, we gain 133 units back.

tsl_gain <- tsl_predicted %>% filter(!grepl("Do not bet",Advice)) %>%
  mutate(Gain = ifelse(Result==Advice&Advice=="Home", Bhome_avg,
                       ifelse(Result==Advice&Advice=="Away", Baway_avg,
                              ifelse(Result==Advice&Advice=="Tie", Btie_avg,0))))
tsl_gain %>% summarise(sum(Gain))
##   sum(Gain)
## 1  133.5889