Data Science Manager - Accenture
M.S. in Predictive Analytics - DePaul University
Me
Malter Analytics
GitHub
LinkedIn
YouTube Channel
Twitter
Kaggle
Other Work
General Assembly
AriBall
Media
Built In
This analysis was done by taking a specific game from Jon Lester’s 2014 season and scraping PITCHf/x data to perform outputs using R. In particular, I used a game on 6/28 against the NY Yankees in which Lester pitched 8 innings on 5 hits and 6 strikeouts.
devtools::source_gist(id = "da555f08f3c9ba2c0b8e")
require(mosaic)
require(ggplot2)
require(dplyr)
library(plyr)
require(ggplot2)
require(RSQLite)
require(pitchRx)
require(knitr)
require(animation)
Create a pitchRx database between the date ranges. This will be stored locally on your working directory.
db20140628 <- src_sqlite("MLB20140628.sqlite3", create = TRUE)
scrape(start = "2014-06-28", end = "2014-06-28", connect = db20140628$con)
Join the location and names table into a new que table and select for only where the pitcher name is Jon Lester.
locations <- select(tbl(db20140628, "pitch"),
pitch_type, px, pz, des, num, gameday_link, inning)
names <- select(tbl(db20140628, "atbat"), pitcher_name, batter_name,
num, b_height, gameday_link, event, stand)
que <- inner_join(locations, filter(names,
pitcher_name == "Jon Lester"),
by = c("num", "gameday_link"))
pitchfx <- collect(que)
# Last pitch of an at-bat
last.pitch <- ddply(pitchfx, .(num), function(x) x[nrow(x), ])
Create a new column with the full pitch type name.
pitchfx$pitch_type_full <- factor(pitchfx$pitch_type,
levels=c("FF", "FC", "SI", "CH", "CU"),
labels=c("Four-seam FB","Fastball (cutter)",
"Sinker","Changeup", "Curveball"))
Create a table for the count of each type of pitch.
table(pitchfx$pitch_type_full)
##
## Four-seam FB Fastball (cutter) Sinker Changeup
## 53 30 13 4
## Curveball
## 18
Create a table for the count of each type of pitch by batter stand and inning.
with(pitchfx, table(pitch_type_full, stand))
## stand
## pitch_type_full L R
## Four-seam FB 19 34
## Fastball (cutter) 13 17
## Sinker 2 11
## Changeup 1 3
## Curveball 6 12
with(pitchfx, table(pitch_type_full, inning))
## inning
## pitch_type_full 1 2 3 4 5 6 7 8
## Four-seam FB 10 5 13 4 3 8 4 6
## Fastball (cutter) 3 3 4 3 5 6 3 3
## Sinker 2 1 1 1 1 2 1 4
## Changeup 0 1 0 1 1 1 0 0
## Curveball 2 3 1 2 0 3 2 5
Graph the count of pitch type by pitch description. Only ‘Ball’, ‘Swinging Strike’ and ‘Called Strike’ are being used, but many other types of descriptions are recorded.
ball.strike <- subset(pitchfx, des == "Ball" | des == "Swinging Strike" | des == "Called Strike")
ggplot(ball.strike, aes(x = pitch_type_full, fill = des)) +
geom_bar(position = "dodge") +
guides(fill=guide_legend(title='Call')) +
ggtitle("Count of Pitch Type") +
xlab('Pitch Type') +
ylab('Count')
Create an strike zone for the pitch type thrown to each type of batter.
# Overlay a strike zone
topKzone <- 3.5
botKzone <- 1.6
inKzone <- -0.95
outKzone <- 0.95
kZone <- data.frame(
x=c(inKzone, inKzone, outKzone, outKzone, inKzone),
y=c(botKzone, topKzone, topKzone, botKzone, botKzone)
)
ggplot(pitchfx, aes(px, pz, color=pitch_type_full)) + geom_point() +
geom_path(aes(x, y), data=kZone, lwd=2, col="red") +
ylim(0, 5) + facet_wrap(~ batter_name, ncol=3) +
scale_color_discrete(name="Pitch Type") +
ggtitle("Pitch Type by Player (Catcher's Perspective)")
Derek Jeter’s pitches broken down
jeter <- subset(pitchfx, batter_name=="Derek Jeter")
jeter.last <- ddply(jeter, .(num), function(x) x[nrow(x), ])
ggplot(jeter.last, aes(px, pz, color=pitch_type_full)) + geom_point() +
geom_path(aes(x, y), data=kZone, lwd=2, col="red") +
ylim(0, 5) + facet_wrap(~ event, ncol=3) +
scale_color_discrete(name="Pitch Type") +
ggtitle("Derek Jeter's Events by Pitch (Catcher's Perspective)")
ggplot(jeter, aes(px, pz, color=pitch_type_full)) + geom_point() +
geom_path(aes(x, y), data=kZone, lwd=2, col="red") +
ylim(0, 5) + facet_wrap(~ des, ncol=3) +
scale_color_discrete(name="Pitch Type") +
ggtitle("Derek Jeter's Description by All Pitches (Catcher's Perspective)")
All pitches
strikeFX(pitchfx, geom="tile", layer=facet_grid(.~stand))
Just Called Strikes
lester.called.strikes <- subset(pitchfx, des == "Called Strike")
strikeFX(lester.called.strikes, geom="tile", layer=facet_grid(.~stand))
Just Balls
lester.balls <- subset(pitchfx, des == "Ball")
strikeFX(lester.balls, geom="tile", layer=facet_grid(.~stand))
Probability of a strike
require(mgcv)
noswing <- subset(pitchfx, des %in% c("Ball", "Called Strike"))
noswing$strike <- as.numeric(noswing$des %in% "Called Strike")
m1 <- bam(strike ~ s(px, pz, by=factor(stand)) +
factor(stand), data=noswing, family = binomial(link='logit'))
strikeFX(noswing, model=m1, layer=facet_grid(.~stand))
Animation of pitches
data <- scrape(start="2014-06-28", end="2014-06-28")
names <- c("Jon Lester")
atbats <- subset(data$atbat, pitcher_name %in% names)
pitchFX <- plyr::join(atbats, data$pitch, by=c("num", "url"), type="inner")
pitch.fast <- subset(pitchFX, pitch_type %in% c("FF", "FC", "SI"))
pitch.offspeed <- subset(pitchFX, pitch_type %in% c("CU", "CH"))
require(animation)
setwd("~/pitchRx")
animateFX(pitch.fast, layer=list(facet_grid(pitcher_name~stand, labeller = label_both), theme_bw(), coord_equal()))
animateFX(pitch.offspeed, layer=list(facet_grid(pitcher_name~stand, labeller = label_both), theme_bw(), coord_equal()))
ani.options(interval = 0.1)
saveHTML({animateFX(pitch.fast, layer=list(facet_grid(pitch_type~stand, labeller = label_both), theme_bw(), coord_equal()))}, img.name="lester_fast_pitches")
saveHTML({animateFX(pitch.offspeed, layer=list(facet_grid(pitch_type~stand, labeller = label_both), theme_bw(), coord_equal()))}, img.name="lester_slow_pitches")
Further reading can be seen here