library(bindrcpp)
match_data<- read.csv("C://Tool//tr_super_league_matches.csv"  )
odd_data<- read.csv("C://Tool//tr_super_league_odd_details.csv"  ) 
head(match_data)
##    matchid season             Home           Away Match_Date Round
## 1 04ouflDc   2010 kardemir karabuk     manisaspor 2010-08-15     1
## 2 0AUXYwsk   2010    eskisehirspor genclerbirligi 2010-08-14     1
## 3 6mVTZJRr   2010    gaziantepspor      kasimpasa 2010-08-14     1
## 4 6NaYWHB1   2010        sivasspor    galatasaray 2010-08-14     1
## 5 8UlmhSsA   2010       ankaragucu    trabzonspor 2010-08-15     1
## 6 jgvhincG   2010         bucaspor       besiktas 2010-08-14     1
##   Match_Hour weekDay month AWA_FLAG Home_Score Away_Score Match_Result
## 1         20       1     8        0          2          1         Home
## 2         21       7     8        0          0          0          Tie
## 3         19       7     8        0          0          0          Tie
## 4         19       7     8        0          2          1         Home
## 5         18       1     8        0          0          2         Away
## 6         21       7     8        0          0          1         Away
head(odd_data)
##    matchid        Home        Away Bookmaker HomeOdd AwayOdd TieOdd
## 1 0029LnfI   konyaspor   sivasspor   Betsson   2.825   2.560  3.275
## 2 0029LnfI   konyaspor   sivasspor  Pinnacle   2.730   2.765  3.420
## 3 0029LnfI   konyaspor   sivasspor    bet365   2.750   2.565  3.350
## 4 0029LnfI   konyaspor   sivasspor      bwin   2.725   2.450  3.325
## 5 00ApPQui trabzonspor galatasaray   Betfair   2.525   2.750  3.200
## 6 00ApPQui trabzonspor galatasaray   Betsson   2.175   2.900  3.200
match_data <- match_data %>% arrange(season, Home, weekDay)
match_data <- match_data %>% arrange(season, Home, Round)
match_data <- match_data %>% mutate (prev_home_score = lag(Home_Score))
match_data <- match_data %>% arrange(season, Away, Round) %>% mutate (prev_away_score = lag (Away_Score))
match_data <- match_data %>% arrange(season, Away, Round) %>% mutate (prev_away_yed_score = lag (Home_Score))
match_data <- match_data %>% arrange(season, Home, Round) %>% mutate (prev_home_yed_score = lag (Away_Score))
match_data <- match_data %>% arrange(season, Home, Round) %>% mutate (prev_home_top_score = lag (Away_Score) + lag(Home_Score))
match_data <- match_data %>% arrange(season, Away, Round) %>% mutate (prev_away_top_score = lag (Away_Score) + lag(Home_Score))
head(match_data)
##    matchid season           Home       Away Match_Date Round Match_Hour
## 1 pEbxWyR7   2010     manisaspor ankaragucu 2010-08-22     2         21
## 2 YuMTJenQ   2010       besiktas ankaragucu 2010-09-11     4         20
## 3 C0vUlBlp   2010 genclerbirligi ankaragucu 2010-09-25     6         14
## 4 xz8YMkAF   2010    galatasaray ankaragucu 2010-10-17     8         18
## 5 WWB2mS8E   2010  eskisehirspor ankaragucu 2010-10-30    10         14
## 6 A5kUGXRa   2010       bucaspor ankaragucu 2010-11-13    12         13
##   weekDay month AWA_FLAG Home_Score Away_Score Match_Result
## 1       1     8        0          0          3         Away
## 2       7     9        0          4          0         Home
## 3       7     9        0          1          0         Home
## 4       1    10        0          2          4         Away
## 5       7    10        0          0          0          Tie
## 6       7    11        0          0          0          Tie
##   prev_home_score prev_away_score prev_away_yed_score prev_home_yed_score
## 1               1              NA                  NA                   1
## 2               0               3                   0                   2
## 3               2               0                   4                   1
## 4               3               0                   1                   1
## 5               1               4                   2                   0
## 6               0               0                   0                   2
##   prev_home_top_score prev_away_top_score
## 1                   2                  NA
## 2                   2                   3
## 3                   3                   4
## 4                   4                   1
## 5                   1                   6
## 6                   2                   0
data2 <- match_data %>% mutate(ooru=(ifelse(Home_Score+Away_Score>=3,TRUE,FALSE))) %>% mutate(gd=Home_Score-Away_Score) %>% select(season ,prev_home_score:gd)
football_train <-data2 %>% filter(season < 2017) 
football_model <- glm(gd ~.-season  , data=football_train)
football_test <- data2 %>% filter(season == 2017) 
test_predict <- predict(football_model,newdata=football_test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
print(head(test_predict, 30)) 
##           1           2           3           4           5           6 
##  0.56431981  0.50644962  0.24350235  0.56232088  0.04341573  0.20240819 
##           7           8           9          10          11          12 
## -0.01287205  0.72664797  0.81028249  0.28329591  0.20677357  0.66603097 
##          13          14          15          16          17          18 
##  0.42205382  0.38446774  0.34250288  0.43872043  0.44857942  0.06671506 
##          19          20          21          22          23          24 
##  0.72664797  0.67918950  0.53001986  0.06639708  0.57284203  0.44724265 
##          25          26          27          28          29          30 
##  0.12863266  0.54479697  0.74691869  0.53417670  0.52122672  0.73980650