top of page

Assignment 3: Normalization and OPS+

For assignment three, we worked on understanding normalization and OPS+. For OPS+, it was important to calculate each year's average on base percentage and slugging average for the league. Then using the OPS+ we are able to create an overall function, making this usable to any time frame. 

Also, this assignment required us to use the same data and compare it to the winning percentage of the teams. The data was displayed in a scatter plot and a linear model with R and R-Squared was created. While the plot gives a visual comparison, the linear model allows us to see if there is a correlation between the winning percentage and the OPS+. It also allows us to determine if the model is predictive.

Download

library(tidyverse)
library(ggpubr)
library(gt)
library(readr)

#Importing Team file
teamsA3 <- read_csv("MATH 494/Teams (for Assignment 3)(in).csv")

#Creating subset for years 2000-2019
teamSub <- filter(teamsA3, yearID %in% 2000:2019)
teamSub <- select(teamSub, yearID, teamID, franchID, G, W, L, H, BB, HBP, 
                           AB, SF, `2B`, `3B`, HR, BPF)

#Get OBP and SLG for each year and by team
teamFun <- function(df){
                                   OBP <- with(df, round((H+BB+HBP)/(AB+BB+HBP+SF),3))
                                     TB <- with(df, (H+`2B`+2*`3B`+3*HR))
                                    SLG <- with(df, round(TB/AB,3))
                                      df <- cbind(df, OBP, SLG)
                                   }

teamSub <- teamFun(teamSub)

#Get Averages for OBP and SLG for each team
teamSub |>

             group_by(yearID) |> 
                                       summarise(across(c(OBP, SLG), mean),

                                       .groups="keep") -> sumOPS


names(sumOPS)[2] <- "avgOBP"
names(sumOPS)[3] <- "avgSLG"

 

teamSub |> 
             left_join(sumOPS |> 
                                       select(yearID, avgOBP, avgSLG), 
                                       join_by(yearID)) -> teamSub

#Get OPS+ for each year and by team
teamOPSF <- function(df){
                                     tOBP <- with(df, OBP/avgOBP)
                                      tSLG <- with(df, SLG/avgSLG)
                                     dOPS <- with(df, (tOBP+tSLG-1)/(BPF))
                                     OPSP <- with(df, round(dOPS*100, 3))
                                         df <- cbind(df, OPSP)
                                     }

teamSub <- teamOPSF(teamSub)

#Create table for the team with the best OPS+ by year
tableSub <- select(teamSub, yearID, teamID, OPSP)

tableSub |>
             group_by(yearID) |>
                                      top_n(1, OPSP) |>
                                                           ungroup(yearID)-> tableSub

tableSub <- gt(select(tableSub, yearID, teamID, OPSP),
                               rowname_col = "yearID") |>
                               tab_header(title = "Highest Team OPS+ by Year") |>
                               tab_stubhead(label="Year") |>
                               cols_label(teamID = "Team", OPSP = "OPS+")

tableSub

#Export for Problem 2
write.csv(teamSub, 
          "C:\\Users\\Anastashia Pelletier\\Documents\\MATH 494\\teamSub.csv", 
          row.names = TRUE)

Assignment 3 Code and Results

Anastashia Pelletier

©2022 by Anastashia Pelletier. Proudly created with Wix.com

bottom of page