library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1 <U+221A> purrr 0.2.4
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.4
## <U+221A> tidyr 0.8.0 <U+221A> stringr 1.3.0
## <U+221A> readr 1.1.1 <U+221A> forcats 0.2.0
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
getwd()
## [1] "D:/Users/SUUSER/Desktop"
setwd("D:/Users/SUUSER/Desktop")
veri<- read.csv("tr_super_league_matches.csv" )
Task1
a.Home Score (goals)
home <- ggplot(data=veri,aes(x= Home_Score)) + geom_histogram(col="darkgreen",fill= "green", alpha=.3)+ labs (title="Home Score Histogram", x="Home Score", y="number of events")
home
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
b.Away Score (goals)
away <- ggplot(data=veri,aes(x= Away_Score)) + geom_histogram(col="darkgreen",fill= "green", alpha=.3)+ labs (title="Away Score Histogram", x="Away Score", y="number of events")
away
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
c.Score Difference
difference <- ggplot(data=veri %>% mutate(Difference_Score=Home_Score - Away_Score) ,aes(x=Difference_Score)) + geom_histogram(col="darkgreen",fill= "green", alpha=.3)+ labs (title="Score Difference Histogram", x="Score Difference", y="number of events")
difference
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Answer Part1 Q2
The distribution of goals for the home team appears to have the same pattern, and they show caracteristics of Poisson Distribution.The distribution of the difference between two independent random variables with Poisson distribution is known as the Skellam Distribution.
Task2
library(tidyverse)
library(ggplot2)
getwd()
## [1] "D:/Users/SUUSER/Desktop"
setwd("D:/Users/SUUSER/Desktop")
veri<- read.csv("tr_super_league_odd_details.csv")
veri1 <- veri %>% mutate(Phome = 1/HomeOdd,Paway= 1/AwayOdd, Ptie = 1/TieOdd )
veri2 <- veri %>% mutate(Pnhome = (1/HomeOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd))) ,Pnaway= (1/AwayOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd))),Pntie = (1/TieOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd))))
veri3 <- veri1 %>% mutate(diff_prob = Phome - Paway)
ggplot(data=veri3, aes(x=diff_prob, y=Ptie)) + geom_point( col="darkblue",fill="blue", alpha=.4) + labs(x="Phome - Paway")