Assignment 4: Winning Expectancy | Anastashia Pelletier

Assignment 4: Win Expectancy Formulas

For assignment four, we worked on understanding various win expectancy formulas. This assignment allowed us to see how the strong correlation between the winning percentage and the Pythagorean Win Estimation was. We were able to see using a linear model and scatter plot to analyze the correlation. This resulted in a very close y=x model.

Since the correlation is very strong, we used logarithms to replace the squares. This new value was then place back into the formula to show an even closer model.

Download

library(readr)
library(tidyverse)
library(ggpubr)

#Part 1.A: Importing File and Creating a Subset with Needed Variables
Teams <- read_csv("MATH 494/Teams.csv")

teamW <- select(Teams, c(yearID, teamID, W, L, R, RA))

#Part 1.B: Calculate Winning Percentage
winF <- function(df){
wPer <- with(df, (W/(W+L))*100)
df <- cbind(df, wPer)
}

teamW <- winF(teamW)

#Part 1.C: Calculate Pythagorean Winning Percentage
winPythF <- function(df,n){
wPythD <- with(df, (R^n)+(RA^n))
wPythN <- with(df, R^n)
wPyth <- (wPythN/wPythD)*100
df <- cbind(df, wPyth)
}

##Function will be used for other parts of the assignment

teamW <- winPythF(teamW, 2)

#Part 1.D: Create Win Per vs Pyth. Win Per Scatterplot
WinP_Pyth <- ggplot(teamW, aes(wPyth, wPer)) +
geom_point(color="#ff99ff") +
geom_smooth(method = "lm", se=FALSE, color="blue") +
labs(title="Win Percentage vs Pythagoean Winning Percentage",
x="Pythagoean Winning Percentage",
y="Winning Percentage") +
theme(plot.title = element_text(hjust = 0.5))
WinP_Pyth

#Part 1.E: Create Linear Model
model1 <- lm(wPer ~ wPyth, data=teamW)
summary(model1)

slope1 <- round(coef(model1)[2], 3)
int1 <- round(coef(model1)[1], 3)
r2_1 <- round(summary(model1)$r.squared, 3)
r_1 <- round(sqrt(r2_1), 3)

##Display Linear Model
glue::glue("y = {int1} + {slope1}x", "
R = {r_1}", "
R-Squared = {r2_1}")

#Part 2.C: Calculating the Logs of Win/Losses and R/RA
lteamW <- filter(teamW, W > 0)

winLogF <- function(df){
lWL <- with(df, log10(W/L))
lRRA <- with(df, log10(R/RA))
df <- cbind(df, lWL, lRRA)
}

lteamW <- winLogF(lteamW)

#Part 2.D: Create a Scatterplot for log(w/l) vs log(r/ra)
lWL_lRRA <- ggplot(lteamW, aes(lRRA, lWL)) +
geom_point(color="#CC44CC") +
geom_smooth(method = "lm", se=FALSE, color="green") +
labs(title="Log of Wins over Losses vs Log of Runs over Runs Allowed",
x="Log(Runs/Runs Allowed)",
y="Log(Wins/Losses)") +
theme(plot.title = element_text(hjust = 0.5))

lWL_lRRA

#Part 2.E: Create Linear Model
model2 <- lm(lWL ~ lRRA, data=lteamW)
summary(model2)

P <- round(coef(model2)[2], 3)
Eps <- round(coef(model2)[1], 3)
r2_2 <- round(summary(model2)$r.squared, 3)
r_2 <- round(sqrt(r2_2), 3)

##Display Linear Model
glue::glue("y = {Eps} + {P}x", "
R = {r_2}", "
R-Squared = {r2_2}")

#Part 3.A and 3.B: Calculate the Razakean Winning Percentage :)

## NOTE: Since I used a subset to calculate P, and P is a static
## variable, I am going to apply it to a copy of the first subset

copyTW <- select(teamW, c(yearID, teamID, W, L, R, RA, wPer))

##Using the previous function with the new variable
copyTW <- winPythF(copyTW, P)

#Part 3.C: Create Scatterplot of Winning % vs Win Estimation %
WP_WEP <- ggplot(copyTW, aes(wPyth, wPer)) +
geom_point(color="#DD77DD") +
geom_smooth(method = "lm", se=FALSE, color="red") +
labs(title="Winning Percentage vs Win Estimation Percentage",
x="Win Estimation Percentage",
y="Winning Percentage") +
theme(plot.title = element_text(hjust = 0.5))

WP_WEP

#Part 3.D: Create Linear Model
model3 <- lm(wPer ~ wPyth, data=copyTW)
summary(model3)

Pslope <- round(coef(model3)[2], 3)
inter2 <- round(coef(model3)[1], 3)
r2_3 <- round(summary(model3)$r.squared, 3)
r_3 <- round(sqrt(r2_3), 3)

##Display Linear Model
glue::glue("y = {inter2} + {Pslope}x", "
R = {r_3}", "
R-Squared = {r2_3}")

Assignment 4: Win Expectancy Formulas

Download

Assignment 4 Code and Results