Preparing the data:
tr_super_league_matches <- read_excel("tr_super_league_matches.xlsx")
tsl_veri <- tr_super_league_matches %>% select(-matchid, -Match_Date, -Match_Result)
tsl_veri <- tsl_veri %>% mutate(difference = Home_Score - Away_Score) %>%
mutate(ou25 = Home_Score + Away_Score > 2.5) %>%
mutate(ou35 = Home_Score + Away_Score > 3.5)
The data before 2017 is selected as train data for over/under 2.5 goals model.
tsl_train <- tsl_veri %>% filter(season<2017) %>% select(-season, -ou35)
tsl_model <- rpart(ou25 ~., data = tsl_train)
rpart.plot(tsl_model)
New teams of 2017 season is substracted from test data.
tsl_test <- tsl_veri %>% filter(season==2017) %>% select(-season, -ou35) %>%
filter(!grepl("goztepe|yeni malatyaspor",Home)) %>%
filter(!grepl("goztepe|yeni malatyaspor",Away))
tsl_predict <- predict(tsl_model, newdata = tsl_test)
head(tsl_predict)
## 1 2 3 4 5 6
## 0 0 0 1 1 0
The model with train data is applied to test data.
tsl_test_prediction <-
cbind(
tsl_predict %>% tbl_df %>%
transmute(tsl_predict = ifelse(tsl_predict >= 2.5,1,0)),
tsl_test %>% tbl_df %>%
transmute(tsl_actual = ifelse(ou25 ==1,1,0))
) %>%
mutate(correct_class = (tsl_predict == tsl_actual)) %>%
group_by(correct_class) %>%
summarise(count=n(),percentage=n()/nrow(.))
tsl_test_prediction
## # A tibble: 2 x 3
## correct_class count percentage
## <lgl> <int> <dbl>
## 1 FALSE 86 0.610
## 2 TRUE 55 0.390
The accuracy of train data is tested with in sample prediction.
tsl_in_sample <- predict(tsl_model)
tsl_train_prediction <-
cbind(
tsl_in_sample %>% tbl_df %>%
transmute(tsl_predict = ifelse(tsl_in_sample >= 2.5,1,0)),
tsl_train %>% tbl_df %>%
transmute(tsl_actual = ifelse(ou25 == 1,1,0))
) %>%
mutate(correct_class = (tsl_predict == tsl_actual)) %>%
group_by(correct_class) %>%
summarise(count=n(),percentage=n()/nrow(.))
tsl_train_prediction
## # A tibble: 2 x 3
## correct_class count percentage
## <lgl> <int> <dbl>
## 1 FALSE 1108 0.517
## 2 TRUE 1034 0.483
The data before 2017 is selected as train data for over/under 2.5 goals model.
tsl_train <- tsl_veri %>% filter(season<2017) %>% select(-season, -ou25)
tsl_model <- rpart(ou35 ~., data = tsl_train)
rpart.plot(tsl_model)
New teams of 2017 season is substracted from test data.
tsl_test <- tsl_veri %>% filter(season==2017) %>% select(-season, -ou25) %>%
filter(!grepl("goztepe|yeni malatyaspor",Home)) %>%
filter(!grepl("goztepe|yeni malatyaspor",Away))
tsl_predict <- predict(tsl_model, newdata = tsl_test)
The model with train data is applied to test data.
tsl_test_prediction <-
cbind(
tsl_predict %>% tbl_df %>%
transmute(tsl_predict = ifelse(tsl_predict >= 3.5,1,0)),
tsl_test %>% tbl_df %>%
transmute(tsl_actual = ifelse(ou35 ==1,1,0))
) %>%
mutate(correct_class = (tsl_predict == tsl_actual)) %>%
group_by(correct_class) %>%
summarise(count=n(),percentage=n()/nrow(.))
tsl_test_prediction
## # A tibble: 2 x 3
## correct_class count percentage
## <lgl> <int> <dbl>
## 1 FALSE 46 0.326
## 2 TRUE 95 0.674
tsl_in_sample <- predict(tsl_model)
The accuracy of train data is tested with in sample prediction.
tsl_train_prediction <-
cbind(
tsl_in_sample %>% tbl_df %>%
transmute(tsl_predict = ifelse(tsl_in_sample >= 3.5,1,0)),
tsl_train %>% tbl_df %>%
transmute(tsl_actual = ifelse(ou35 == 1,1,0))
) %>%
mutate(correct_class = (tsl_predict == tsl_actual)) %>%
group_by(correct_class) %>%
summarise(count=n(),percentage=n()/nrow(.))
tsl_train_prediction
## # A tibble: 2 x 3
## correct_class count percentage
## <lgl> <int> <dbl>
## 1 FALSE 634 0.296
## 2 TRUE 1508 0.704
The data before 2017 is selected as train data for over/under 2.5 goals model.
tsl_train <- tsl_veri %>% filter(season<2017) %>% select(-season, -ou25, -ou35)
tsl_model <- rpart(difference ~., data = tsl_train)
rpart.plot(tsl_model)
New teams of 2017 season is substracted from test data.
tsl_test <- tsl_veri %>% filter(season==2017) %>% select(-season, -ou25, -ou35) %>%
filter(!grepl("goztepe|yeni malatyaspor",Home)) %>%
filter(!grepl("goztepe|yeni malatyaspor",Away))
tsl_predict <- predict(tsl_model, newdata = tsl_test)
The model with train data is applied to test data.
tsl_test_prediction <-
cbind(
tsl_predict %>% tbl_df %>%
transmute(tsl_predict = ifelse(tsl_predict >= 0,1,0)),
tsl_test %>% tbl_df %>%
transmute(tsl_actual = ifelse(difference ==1,1,0))
) %>%
mutate(correct_class = (tsl_predict == tsl_actual)) %>%
group_by(correct_class) %>%
summarise(count=n(),percentage=n()/nrow(.))
tsl_test_prediction
## # A tibble: 2 x 3
## correct_class count percentage
## <lgl> <int> <dbl>
## 1 FALSE 75 0.532
## 2 TRUE 66 0.468
tsl_in_sample <- predict(tsl_model)
The accuracy of train data is tested with in sample prediction.
tsl_train_prediction <-
cbind(
tsl_in_sample %>% tbl_df %>%
transmute(tsl_predict = ifelse(tsl_in_sample >= 0,1,0)),
tsl_train %>% tbl_df %>%
transmute(tsl_actual = ifelse(difference == 1,1,0))
) %>%
mutate(correct_class = (tsl_predict == tsl_actual)) %>%
group_by(correct_class) %>%
summarise(count=n(),percentage=n()/nrow(.))
tsl_train_prediction
## # A tibble: 2 x 3
## correct_class count percentage
## <lgl> <int> <dbl>
## 1 FALSE 1022 0.477
## 2 TRUE 1120 0.523