At first, we converted the bookmarkers’ ratios to probabilities for each result.
tsl_odddata <- read_excel("tsl_odddata.xlsx")
tsl_odddata <- tsl_odddata %>% mutate(Phome_bet365=(1/HomeOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>%
mutate(Paway_bet365=(1/AwayOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>%
mutate(Ptie_bet365=(1/TieOdd_bet365)*(1/(1/HomeOdd_bet365+1/AwayOdd_bet365+1/TieOdd_bet365))) %>% select(-HomeOdd_bet365,-AwayOdd_bet365,-TieOdd_bet365)
tsl_odddata <- tsl_odddata %>% mutate(Phome_betfair=(1/HomeOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>%
mutate(Paway_betfair=(1/AwayOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>%
mutate(Ptie_betfair=(1/TieOdd_betfair)*(1/(1/HomeOdd_betfair+1/AwayOdd_betfair+1/TieOdd_betfair))) %>% select(-HomeOdd_betfair,-AwayOdd_betfair,-TieOdd_betfair)
tsl_odddata <- tsl_odddata %>% mutate(Phome_betsson=(1/HomeOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>%
mutate(Paway_betsson=(1/AwayOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>%
mutate(Ptie_betsson=(1/TieOdd_betsson)*(1/(1/HomeOdd_betsson+1/AwayOdd_betsson+1/TieOdd_betsson))) %>% select(-HomeOdd_betsson,-AwayOdd_betsson,-TieOdd_betsson)
tsl_odddata <- tsl_odddata %>% mutate(Phome_bwin=(1/HomeOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>%
mutate(Paway_bwin=(1/AwayOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>%
mutate(Ptie_bwin=(1/TieOdd_bwin)*(1/(1/HomeOdd_bwin+1/AwayOdd_bwin+1/TieOdd_bwin))) %>% select(-HomeOdd_bwin,-AwayOdd_bwin,-TieOdd_bwin)
tsl_odddata <- tsl_odddata %>% mutate(Phome_pinnacle=(1/HomeOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>%
mutate(Paway_pinnacle=(1/AwayOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>%
mutate(Ptie_pinnacle=(1/TieOdd_pinnacle)*(1/(1/HomeOdd_pinnacle+1/AwayOdd_pinnacle+1/TieOdd_pinnacle))) %>% select(-HomeOdd_pinnacle,-AwayOdd_pinnacle,-TieOdd_pinnacle)
We selected 2017 season as the test data and last 3 season before as the training data.
tsl_odddata <- tsl_odddata %>% select(-MatchId,-Home_score,-Away_score,-Weekday,-Round,-Hour,-Phome_betfair,-Paway_betfair,-Ptie_betfair) %>% filter(Season>2013)
tsl_odddata
## # A tibble: 1,098 x 16
## Home Away Result Season Phome_bet365 Paway_bet365 Ptie_bet365
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 besiktas galatasa… Away 2014 0.404 0.310 0.286
## 2 gaziante… balikesi… Home 2014 0.531 0.212 0.257
## 3 besiktas eskisehi… Tie 2014 0.570 0.175 0.255
## 4 gaziante… akhisar-… Home 2014 0.396 0.317 0.287
## 5 basakseh… eskisehi… Tie 2014 0.518 0.221 0.261
## 6 genclerb… besiktas Away 2014 0.260 0.459 0.281
## 7 genclerb… kayseri-… Tie 2014 0.442 0.276 0.282
## 8 gaziante… kardemir… Home 2014 0.373 0.342 0.284
## 9 kardemir… basakseh… Tie 2014 0.452 0.268 0.280
## 10 genclerb… akhisar-… Tie 2014 0.452 0.268 0.280
## # ... with 1,088 more rows, and 9 more variables: Phome_betsson <dbl>,
## # Paway_betsson <dbl>, Ptie_betsson <dbl>, Phome_bwin <dbl>,
## # Paway_bwin <dbl>, Ptie_bwin <dbl>, Phome_pinnacle <dbl>,
## # Paway_pinnacle <dbl>, Ptie_pinnacle <dbl>
tsl_train <- tsl_odddata %>% filter(Season<2017) %>% select(-Season)
tsl_model <- rpart(Result ~., data = tsl_train)
rpart.plot(tsl_model)
The graph shows that 45% of the matches were resulted with home win from 2014 to 2016. The main classification is the home bet ratio of betsson.
Than, we removed the data of new teams in 2017 season from test data as they have no meanings in training data. After that we applied the training model to the test data and predicted the probability of results.
tsl_test <- tsl_odddata %>% filter(Season==2017) %>% select(-Season) %>%
filter(!grepl("goztepe|yeni-malatyaspor",Home)) %>%
filter(!grepl("goztepe|yeni-malatyaspor",Away))
tsl_predict <- predict(tsl_model, newdata = tsl_test)
head(tsl_predict)
## Away Home Tie
## 1 0.1234257 0.6397985 0.2367758
## 2 0.1234257 0.6397985 0.2367758
## 3 0.2689076 0.5126050 0.2184874
## 4 0.5384615 0.2508361 0.2107023
## 5 0.2689076 0.5126050 0.2184874
## 6 0.2758621 0.1206897 0.6034483
We use Kelly Criteria method to decide which match should we play according to model. Therefore, we calculated the average bookmarkers’ ratios and kelly fractions of each match.
tsl_test <- tsl_test %>% rename(.,Home_team=Home,Away_team=Away)
tsl_predicted <- tsl_test %>% cbind(tsl_predict) %>% tbl_df
tsl_rev <- read_excel("tsl_odddata.xlsx") %>% filter(Season==2017)%>%
filter(!grepl("goztepe|yeni-malatyaspor",Home)) %>%
filter(!grepl("goztepe|yeni-malatyaspor",Away)) %>%
mutate(Bhome_avg = (HomeOdd_bet365+HomeOdd_betsson+HomeOdd_bwin+HomeOdd_pinnacle)/4) %>%
mutate(Baway_avg = (AwayOdd_bet365+AwayOdd_betsson+AwayOdd_bwin+AwayOdd_pinnacle)/4) %>%
mutate(Btie_avg = (TieOdd_bet365+TieOdd_betsson+TieOdd_bwin+TieOdd_pinnacle)/4) %>%
select(Bhome_avg,Btie_avg,Baway_avg)
tsl_predicted <- tsl_predicted %>% cbind(tsl_rev) %>% select(Home_team,Away_team,Bhome_avg,Btie_avg,Baway_avg,Home,Tie,Away,Result)
tsl_predicted <- tsl_predicted %>% rename(.,Phome=Home,Ptie=Tie,Paway=Away)
tsl_predicted <- tsl_predicted %>% mutate(Xhome = ((Phome * Bhome_avg)-1)/(Bhome_avg-1)) %>%
mutate(Xtie = ((Ptie * Btie_avg)-1)/(Btie_avg-1)) %>%
mutate(Xaway = ((Paway * Baway_avg)-1)/(Baway_avg-1))
head(tsl_predicted)
## Home_team Away_team Bhome_avg Btie_avg Baway_avg Phome
## 1 genclerbirligi kardemir-karabuk 1.978750 3.355000 3.896250 0.6397985
## 2 besiktas kasimpasa 1.272526 5.870733 9.607017 0.6397985
## 3 kayserispor bursaspor 2.145000 3.417500 3.331250 0.5126050
## 4 sivasspor galatasaray 3.771828 3.670594 1.913446 0.2508361
## 5 kasimpasa bursaspor 2.411250 3.493750 2.761250 0.5126050
## 6 kardemir-karabuk kasimpasa 2.253889 3.413310 3.084533 0.1206897
## Ptie Paway Result Xhome Xtie Xaway
## 1 0.2367758 0.1234257 Tie 0.27177651 -0.08731088 -0.17923268
## 2 0.2367758 0.1234257 Home -0.68191567 0.08007987 0.02158154
## 3 0.2184874 0.2689076 Home 0.08693259 -0.10478566 -0.04469777
## 4 0.2107023 0.5384615 Home -0.01944178 -0.08484900 0.03318979
## 5 0.2184874 0.2689076 Tie 0.16724103 -0.09490112 -0.14619105
## 6 0.6034483 0.2758621 Away -0.58057690 0.43912970 -0.07152410
We recommended the bet in “Advice” column as a result of kelly fraction.
tsl_predicted <- tsl_predicted %>% mutate(Advice = ifelse(Xhome>0&Xhome>Xaway&Xhome>Xtie,"Home",
ifelse(Xtie>0&Xtie>Xhome&Xtie>Xaway,"Tie",
ifelse(Xaway>0&Xaway>Xhome&Xaway>Xtie,"Away","Do not bet"))))
head(tsl_predicted)
## Home_team Away_team Bhome_avg Btie_avg Baway_avg Phome
## 1 genclerbirligi kardemir-karabuk 1.978750 3.355000 3.896250 0.6397985
## 2 besiktas kasimpasa 1.272526 5.870733 9.607017 0.6397985
## 3 kayserispor bursaspor 2.145000 3.417500 3.331250 0.5126050
## 4 sivasspor galatasaray 3.771828 3.670594 1.913446 0.2508361
## 5 kasimpasa bursaspor 2.411250 3.493750 2.761250 0.5126050
## 6 kardemir-karabuk kasimpasa 2.253889 3.413310 3.084533 0.1206897
## Ptie Paway Result Xhome Xtie Xaway Advice
## 1 0.2367758 0.1234257 Tie 0.27177651 -0.08731088 -0.17923268 Home
## 2 0.2367758 0.1234257 Home -0.68191567 0.08007987 0.02158154 Tie
## 3 0.2184874 0.2689076 Home 0.08693259 -0.10478566 -0.04469777 Home
## 4 0.2107023 0.5384615 Home -0.01944178 -0.08484900 0.03318979 Away
## 5 0.2184874 0.2689076 Tie 0.16724103 -0.09490112 -0.14619105 Home
## 6 0.6034483 0.2758621 Away -0.58057690 0.43912970 -0.07152410 Tie
The accuracy of our model is about 40% in 138 matches.
tsl_predicted_accuracy <- tsl_predicted %>% filter(!grepl("Do not bet",Advice)) %>%
mutate(correct_class = (Result == Advice)) %>%
group_by(correct_class) %>%
summarise(count=n(),percentage=n()/nrow(.))
tsl_predicted_accuracy
## # A tibble: 2 x 3
## correct_class count percentage
## <lgl> <int> <dbl>
## 1 FALSE 84 0.609
## 2 TRUE 54 0.391
The total gain was calculated with the assumption of playing all matches with one unit. As a result, if we bet 138 units in total, we gain 133 units back.
tsl_gain <- tsl_predicted %>% filter(!grepl("Do not bet",Advice)) %>%
mutate(Gain = ifelse(Result==Advice&Advice=="Home", Bhome_avg,
ifelse(Result==Advice&Advice=="Away", Baway_avg,
ifelse(Result==Advice&Advice=="Tie", Btie_avg,0))))
tsl_gain %>% summarise(sum(Gain))
## sum(Gain)
## 1 133.5889