Danny Malter

Data Science Manager - Accenture
M.S. in Predictive Analytics - DePaul University

Danny Malter

Me
Malter Analytics
GitHub
LinkedIn
YouTube Channel
Twitter
Kaggle

Other Work
General Assembly
AriBall

Media
Built In

Jon Lester Pitch Analysis

This analysis was done by taking a specific game from Jon Lester’s 2014 season and scraping PITCHf/x data to perform outputs using R. In particular, I used a game on 6/28 against the NY Yankees in which Lester pitched 8 innings on 5 hits and 6 strikeouts.

devtools::source_gist(id = "da555f08f3c9ba2c0b8e")
require(mosaic)
require(ggplot2)
require(dplyr)
library(plyr)
require(ggplot2)
require(RSQLite)
require(pitchRx)
require(knitr)
require(animation)

Create a pitchRx database between the date ranges. This will be stored locally on your working directory.

db20140628 <- src_sqlite("MLB20140628.sqlite3", create = TRUE)
scrape(start = "2014-06-28", end = "2014-06-28", connect = db20140628$con)

Join the location and names table into a new que table and select for only where the pitcher name is Jon Lester.

locations <- select(tbl(db20140628, "pitch"), 
                    pitch_type, px, pz, des, num, gameday_link, inning)
names <- select(tbl(db20140628, "atbat"), pitcher_name, batter_name, 
                num, b_height, gameday_link, event, stand)
que <- inner_join(locations, filter(names, 
                                    pitcher_name == "Jon Lester"),
                  by = c("num", "gameday_link"))
pitchfx <- collect(que)

# Last pitch of an at-bat
last.pitch <- ddply(pitchfx, .(num), function(x) x[nrow(x), ])

Create a new column with the full pitch type name.

pitchfx$pitch_type_full <- factor(pitchfx$pitch_type,
                  levels=c("FF", "FC", "SI", "CH", "CU"),
                  labels=c("Four-seam FB","Fastball (cutter)",
                           "Sinker","Changeup", "Curveball"))

Create a table for the count of each type of pitch.

table(pitchfx$pitch_type_full)
## 
##      Four-seam FB Fastball (cutter)            Sinker          Changeup 
##                53                30                13                 4 
##         Curveball 
##                18

Create a table for the count of each type of pitch by batter stand and inning.

with(pitchfx, table(pitch_type_full, stand))
##                    stand
## pitch_type_full      L  R
##   Four-seam FB      19 34
##   Fastball (cutter) 13 17
##   Sinker             2 11
##   Changeup           1  3
##   Curveball          6 12
with(pitchfx, table(pitch_type_full, inning))
##                    inning
## pitch_type_full      1  2  3  4  5  6  7  8
##   Four-seam FB      10  5 13  4  3  8  4  6
##   Fastball (cutter)  3  3  4  3  5  6  3  3
##   Sinker             2  1  1  1  1  2  1  4
##   Changeup           0  1  0  1  1  1  0  0
##   Curveball          2  3  1  2  0  3  2  5

Graph the count of pitch type by pitch description. Only ‘Ball’, ‘Swinging Strike’ and ‘Called Strike’ are being used, but many other types of descriptions are recorded.

ball.strike <- subset(pitchfx, des == "Ball" | des == "Swinging Strike" | des == "Called Strike")
ggplot(ball.strike, aes(x = pitch_type_full, fill = des)) +
    geom_bar(position = "dodge") +
    guides(fill=guide_legend(title='Call')) +
    ggtitle("Count of Pitch Type") +
    xlab('Pitch Type') +
    ylab('Count')

plot of chunk unnamed-chunk-9

Create an strike zone for the pitch type thrown to each type of batter.

# Overlay a strike zone
topKzone <- 3.5
botKzone <- 1.6
inKzone <- -0.95
outKzone <- 0.95
kZone <- data.frame(
  x=c(inKzone, inKzone, outKzone, outKzone, inKzone),
  y=c(botKzone, topKzone, topKzone, botKzone, botKzone)
)

ggplot(pitchfx, aes(px, pz, color=pitch_type_full)) + geom_point() +
  geom_path(aes(x, y), data=kZone, lwd=2, col="red") +
  ylim(0, 5) + facet_wrap(~ batter_name, ncol=3) +
  scale_color_discrete(name="Pitch Type") +
  ggtitle("Pitch Type by Player (Catcher's Perspective)")

plot of chunk unnamed-chunk-10

Derek Jeter’s pitches broken down

jeter <- subset(pitchfx, batter_name=="Derek Jeter")
jeter.last <- ddply(jeter, .(num), function(x) x[nrow(x), ])

ggplot(jeter.last, aes(px, pz, color=pitch_type_full)) + geom_point() +
  geom_path(aes(x, y), data=kZone, lwd=2, col="red") +
  ylim(0, 5) + facet_wrap(~ event, ncol=3) +
  scale_color_discrete(name="Pitch Type") +
  ggtitle("Derek Jeter's Events by Pitch (Catcher's Perspective)")

plot of chunk unnamed-chunk-11

ggplot(jeter, aes(px, pz, color=pitch_type_full)) + geom_point() +
  geom_path(aes(x, y), data=kZone, lwd=2, col="red") +
  ylim(0, 5) + facet_wrap(~ des, ncol=3) +
  scale_color_discrete(name="Pitch Type") +
  ggtitle("Derek Jeter's Description by All Pitches (Catcher's Perspective)")

plot of chunk unnamed-chunk-11

Visualize Pitches

All pitches

strikeFX(pitchfx, geom="tile", layer=facet_grid(.~stand))

plot of chunk unnamed-chunk-12

Just Called Strikes

lester.called.strikes <- subset(pitchfx, des == "Called Strike")
strikeFX(lester.called.strikes, geom="tile", layer=facet_grid(.~stand))

plot of chunk unnamed-chunk-13

Just Balls

lester.balls <- subset(pitchfx, des == "Ball")
strikeFX(lester.balls, geom="tile", layer=facet_grid(.~stand))

plot of chunk unnamed-chunk-14

Probability of a strike

require(mgcv)
noswing <- subset(pitchfx, des %in% c("Ball", "Called Strike"))
noswing$strike <- as.numeric(noswing$des %in% "Called Strike")
m1 <- bam(strike ~ s(px, pz, by=factor(stand)) +
          factor(stand), data=noswing, family = binomial(link='logit'))
strikeFX(noswing, model=m1, layer=facet_grid(.~stand))

plot of chunk unnamed-chunk-16

Animation of pitches

data <- scrape(start="2014-06-28", end="2014-06-28")
names <- c("Jon Lester")
atbats <- subset(data$atbat, pitcher_name %in% names)
pitchFX <- plyr::join(atbats, data$pitch, by=c("num", "url"), type="inner")
pitch.fast <- subset(pitchFX, pitch_type %in% c("FF", "FC", "SI"))
pitch.offspeed <- subset(pitchFX, pitch_type %in% c("CU", "CH"))

require(animation)

setwd("~/pitchRx")
animateFX(pitch.fast, layer=list(facet_grid(pitcher_name~stand, labeller = label_both), theme_bw(), coord_equal()))
animateFX(pitch.offspeed, layer=list(facet_grid(pitcher_name~stand, labeller = label_both), theme_bw(), coord_equal()))
ani.options(interval = 0.1)
saveHTML({animateFX(pitch.fast, layer=list(facet_grid(pitch_type~stand, labeller = label_both), theme_bw(), coord_equal()))}, img.name="lester_fast_pitches")
saveHTML({animateFX(pitch.offspeed, layer=list(facet_grid(pitch_type~stand, labeller = label_both), theme_bw(), coord_equal()))}, img.name="lester_slow_pitches")

Further reading can be seen here

comments powered by Disqus