library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1     <U+221A> purrr   0.2.4
## <U+221A> tibble  1.4.2     <U+221A> dplyr   0.7.4
## <U+221A> tidyr   0.8.0     <U+221A> stringr 1.3.0
## <U+221A> readr   1.1.1     <U+221A> forcats 0.2.0
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
getwd()
## [1] "D:/Users/SUUSER/Desktop"
setwd("D:/Users/SUUSER/Desktop")

veri<- read.csv("tr_super_league_matches.csv"  )

Task1

a.Home Score (goals)

home <- ggplot(data=veri,aes(x= Home_Score)) + geom_histogram(col="darkgreen",fill= "green", alpha=.3)+ labs (title="Home Score Histogram", x="Home Score", y="number of events")
home
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

b.Away Score (goals)

away <- ggplot(data=veri,aes(x= Away_Score)) + geom_histogram(col="darkgreen",fill= "green", alpha=.3)+ labs (title="Away Score Histogram", x="Away Score", y="number of events")

away
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

c.Score Difference

difference <- ggplot(data=veri %>% mutate(Difference_Score=Home_Score - Away_Score) ,aes(x=Difference_Score)) + geom_histogram(col="darkgreen",fill= "green", alpha=.3)+ labs (title="Score Difference Histogram", x="Score Difference", y="number of events")
difference
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  1. What distribution do you think home and away goals are coming from? Calculate the expected number of games corresponding to each quantile (number of goals) by using sample means as distribution mean and plot these values on the histogram

Answer Part1 Q2

The distribution of goals for the home team appears to have the same pattern, and they show caracteristics of Poisson Distribution.The distribution of the difference between two independent random variables with Poisson distribution is known as the Skellam Distribution.

Task2

library(tidyverse)
library(ggplot2)
getwd()
## [1] "D:/Users/SUUSER/Desktop"
setwd("D:/Users/SUUSER/Desktop")

veri<- read.csv("tr_super_league_odd_details.csv")
veri1 <- veri %>% mutate(Phome = 1/HomeOdd,Paway= 1/AwayOdd, Ptie = 1/TieOdd )
veri2 <- veri %>% mutate(Pnhome = (1/HomeOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd))) ,Pnaway= (1/AwayOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd))),Pntie = (1/TieOdd)*(1/((1/HomeOdd)+(1/AwayOdd)+(1/TieOdd))))
veri3 <- veri1 %>% mutate(diff_prob = Phome - Paway)                                      
ggplot(data=veri3, aes(x=diff_prob, y=Ptie)) + geom_point( col="darkblue",fill="blue", alpha=.4) + labs(x="Phome - Paway")