View Single Post
Old 09-09-2019, 09:27 PM   #350
stealofhome
Hall Of Famer
 
stealofhome's Avatar
 
Join Date: Apr 2014
Posts: 2,282
Blog Entries: 1
Advanced Batting Stat Calculations

In preparation for finding the new names for the Silver Slugger Awards, I wanted to have the ability to calculate the advanced offensive stats on my own. With the help of cavebutter and his awesome website, I've been able to do that in R. Like cavebutter, I do have some error associated with it, my thought is it is possibly coming from sac hits? I haven't done a huge amount of testing but what I have done shows that my numbers are 2% too low on average.

---
title: "RunEnvironment"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r Import}
league.runs.per.out <- read.csv.sql("~/import_export/csv/players_career_pitching_stats.csv",
sql="select year, league_id,
sum(r) as lg_r,
sum(outs) as outs,
sum(outs) + sum(ha) + sum(bb) + sum(iw) + sum(sh) + sum(sf) as lg_PA
from file group by year, league_id, split_id having split_id = 1 and league_id = 100", eol = "\n")

league.batting <- read.csv.sql("~/import_export/csv/players_career_batting_stats.csv",sql="select year, league_id,sum(ab) as ab,sum(h) as h, sum(d) as d,
sum(t) as t, sum(hr) as hr, sum(sb) as sb, sum(cs) as cs, sum(bb) as bb, sum(ibb) as ibb, sum(sf) as sf, sum(hp) as hp
from file
group by year, league_id, split_id
having split_id = 1 and league_id = 100", eol = "\n")

players_career_batting_stats <- read.csv.sql("~/import_export/csv/players_career_batting_stats.csv",
sql="select player_id, year, league_id, sum(pa) as pa, sum(ab) as ab,sum(h) as h, sum(d) as d,
sum(t) as t, sum(hr) as hr, sum(sb) as sb, sum(cs) as cs, sum(bb) as bb, sum(ibb) as ibb, sum(sf) as sf, sum(hp) as hp from file group by player_id, year, league_id, split_id having league_id = 100 and split_id = 1", eol = "\n")

feed <- read.csv.sql("~/import_export/csv/players_career_batting_stats.csv",
sql="select player_id, level_id, league_id from file", eol = "\n")

players_awards <- read_csv("~/import_export/csv/players_awards.csv")

players <- read_csv("~/import_export/csv/players.csv")
players <- unite(players, name, c("first_name","last_name"), sep = " ") %>%
group_by(player_id) %>%
distinct()
```


```{r League Runs Per Out}
league.batting %<>% mutate(s=h-d-t-hr)


league.runs.per.out %<>%
merge(league.batting, by=c("year","league_id")) %>%
mutate(r.per.out = lg_r/outs, r.per.pa = lg_r/lg_PA, rBB = r.per.out+0.14,
rHB = rBB+.025, r1B = rBB+.155, r2B=r1B+.3, r3B=r2B+0.27, rHR=1.4,
rSB=0.2, rCS=(2*r.per.out)+0.075,
run.minus = (rBB*(bb-ibb)+rHB*hp+r1B*s+r2B*d+r3B*t+1.4*hr+rSB*sb-rCS*cs)/(ab-h+sf),
run.plus = (rBB*(bb-ibb)+rHB*hp+r1B*s+r2B*d+r3B*t+1.4*hr+rSB*sb-rCS*cs)/(bb-ibb+hp+h),
league.wOBA = (h+bb-ibb+hp)/(ab+bb-ibb+hp+sf),
wOBA.scale = (1/(run.plus+run.minus)),
wBB=(rBB+run.minus)*wOBA.scale, wHB=(rHB+run.minus)*wOBA.scale, w1B=(r1B+run.minus)*wOBA.scale, w2B=(r2B+run.minus)*wOBA.scale, w3B=(r3B+run.minus)*wOBA.scale, wHR=(rHR+run.minus)*wOBA.scale, wSB=rSB*wOBA.scale, wCS=rCS*wOBA.scale)
```

```{r Calculate seasonal wOBA}
players_career_batting_stats %<>%
merge(league.runs.per.out[ , c("year", "league_id", "lg_PA","lg_r","league.wOBA","wOBA.scale","wBB ", "wHB", "w1B", "w2B", "w3B", "wHR")], by=c("year","league_id")) %>%
mutate(s=h-d-t-hr, woba=round((wBB*(bb-ibb)+wHB*hp+w1B*s+w2B*d+w3B*t+wHR*hr)/(ab+bb-ibb+sf+hp),3),
wrc=(((woba-league.wOBA)/wOBA.scale)+(lg_r/lg_PA))*pa,
wraa=((woba-league.wOBA)/wOBA.scale)*pa)

career.woba <- players_career_batting_stats %>%
group_by(player_id) %>%
summarise(career.pa = sum(pa), career.woba = weighted.mean(woba,pa),
career.wRC = sum(wrc), career.wraa=sum(wraa))
```

```{r silver slugger awards}
silver_sluggers <- players_awards %>%
filter(award_id=='11') %>%
select(player_id, award_id) %>%
group_by(player_id, award_id) %>%
tally()
```

```{r Find feeder league}
#Find feeder league for each player
feed <- feed %>%
group_by(player_id) %>%
distinct() %>%
top_n(1, level_id)

#merge to career
feed.woba <- career.woba %>%
merge(feed, by="player_id") %>%
merge(silver_sluggers[ ,c("player_id", "n")], by="player_id") %>%
merge(players[ , c("player_id", "name","age")], by = "player_id") %>%
distinct(player_id, .keep_all = TRUE) %>%
arrange(desc(career.wraa)) %>%
group_by(league_id) %>%
top_n(10, career.wraa) %>%
arrange(level_id, league_id, desc(career.wraa))
```

stealofhome is offline   Reply With Quote