

Assignment 3: Normalization and OPS+
For assignment three, we worked on understanding normalization and OPS+. For OPS+, it was important to calculate each year's average on base percentage and slugging average for the league. Then using the OPS+ we are able to create an overall function, making this usable to any time frame.
​
Also, this assignment required us to use the same data and compare it to the winning percentage of the teams. The data was displayed in a scatter plot and a linear model with R and R-Squared was created. While the plot gives a visual comparison, the linear model allows us to see if there is a correlation between the winning percentage and the OPS+. It also allows us to determine if the model is predictive.
Download
​library(tidyverse)
library(ggpubr)
library(gt)
library(readr)
​
#Importing Team file
teamsA3 <- read_csv("MATH 494/Teams (for Assignment 3)(in).csv")
​
#Creating subset for years 2000-2019
teamSub <- filter(teamsA3, yearID %in% 2000:2019)
teamSub <- select(teamSub, yearID, teamID, franchID, G, W, L, H, BB, HBP,
AB, SF, `2B`, `3B`, HR, BPF)
​
#Get OBP and SLG for each year and by team
teamFun <- function(df){
OBP <- with(df, round((H+BB+HBP)/(AB+BB+HBP+SF),3))
TB <- with(df, (H+`2B`+2*`3B`+3*HR))
SLG <- with(df, round(TB/AB,3))
df <- cbind(df, OBP, SLG)
}
teamSub <- teamFun(teamSub)
​
#Get Averages for OBP and SLG for each team
teamSub |>
group_by(yearID) |>
summarise(across(c(OBP, SLG), mean),
.groups="keep") -> sumOPS
names(sumOPS)[2] <- "avgOBP"
names(sumOPS)[3] <- "avgSLG"
teamSub |>
left_join(sumOPS |>
select(yearID, avgOBP, avgSLG),
join_by(yearID)) -> teamSub
​
#Get OPS+ for each year and by team
teamOPSF <- function(df){
tOBP <- with(df, OBP/avgOBP)
tSLG <- with(df, SLG/avgSLG)
dOPS <- with(df, (tOBP+tSLG-1)/(BPF))
OPSP <- with(df, round(dOPS*100, 3))
df <- cbind(df, OPSP)
}
teamSub <- teamOPSF(teamSub)
​
#Create table for the team with the best OPS+ by year
tableSub <- select(teamSub, yearID, teamID, OPSP)
​
tableSub |>
group_by(yearID) |>
top_n(1, OPSP) |>
ungroup(yearID)-> tableSub
​
tableSub <- gt(select(tableSub, yearID, teamID, OPSP),
rowname_col = "yearID") |>
tab_header(title = "Highest Team OPS+ by Year") |>
tab_stubhead(label="Year") |>
cols_label(teamID = "Team", OPSP = "OPS+")
​
tableSub
​
#Export for Problem 2
write.csv(teamSub,
"C:\\Users\\Anastashia Pelletier\\Documents\\MATH 494\\teamSub.csv",
row.names = TRUE)


