top of page

Assignment 3: Normalization and OPS+

For assignment three, we worked on understanding normalization and OPS+. For OPS+, it was important to calculate each year's average on base percentage and slugging average for the league. Then using the OPS+ we are able to create an overall function, making this usable to any time frame. 

​

Also, this assignment required us to use the same data and compare it to the winning percentage of the teams. The data was displayed in a scatter plot and a linear model with R and R-Squared was created. While the plot gives a visual comparison, the linear model allows us to see if there is a correlation between the winning percentage and the OPS+. It also allows us to determine if the model is predictive.

Download

​library(tidyverse)
library(ggpubr)
library(gt)
library(readr)

​

#Importing Team file
teamsA3 <- read_csv("MATH 494/Teams (for Assignment 3)(in).csv")

​

#Creating subset for years 2000-2019
teamSub <- filter(teamsA3, yearID %in% 2000:2019)
teamSub <- select(teamSub, yearID, teamID, franchID, G, W, L, H, BB, HBP, 
                           AB, SF, `2B`, `3B`, HR, BPF)

​

#Get OBP and SLG for each year and by team
teamFun <- function(df){
                                   OBP <- with(df, round((H+BB+HBP)/(AB+BB+HBP+SF),3))
                                     TB <- with(df, (H+`2B`+2*`3B`+3*HR))
                                    SLG <- with(df, round(TB/AB,3))
                                      df <- cbind(df, OBP, SLG)
                                   }

teamSub <- teamFun(teamSub)

​

#Get Averages for OBP and SLG for each team
teamSub |>

             group_by(yearID) |> 
                                       summarise(across(c(OBP, SLG), mean),

                                       .groups="keep") -> sumOPS


names(sumOPS)[2] <- "avgOBP"
names(sumOPS)[3] <- "avgSLG"

 

teamSub |> 
             left_join(sumOPS |> 
                                       select(yearID, avgOBP, avgSLG), 
                                       join_by(yearID)) -> teamSub

​

#Get OPS+ for each year and by team
teamOPSF <- function(df){
                                     tOBP <- with(df, OBP/avgOBP)
                                      tSLG <- with(df, SLG/avgSLG)
                                     dOPS <- with(df, (tOBP+tSLG-1)/(BPF))
                                     OPSP <- with(df, round(dOPS*100, 3))
                                         df <- cbind(df, OPSP)
                                     }

teamSub <- teamOPSF(teamSub)

​

#Create table for the team with the best OPS+ by year
tableSub <- select(teamSub, yearID, teamID, OPSP)

​

tableSub |>
             group_by(yearID) |>
                                      top_n(1, OPSP) |>
                                                           ungroup(yearID)-> tableSub

​

tableSub <- gt(select(tableSub, yearID, teamID, OPSP),
                               rowname_col = "yearID") |>
                               tab_header(title = "Highest Team OPS+ by Year") |>
                               tab_stubhead(label="Year") |>
                               cols_label(teamID = "Team", OPSP = "OPS+")

​

tableSub

​

#Export for Problem 2
write.csv(teamSub, 
          "C:\\Users\\Anastashia Pelletier\\Documents\\MATH 494\\teamSub.csv", 
          row.names = TRUE)

Assignment 3 Code and Results

Anastashia Pelletier

©2022 by Anastashia Pelletier. Proudly created with Wix.com

bottom of page