1 – Loading data

gas.prices = read.csv("/Users/jake/Desktop/Datasets/Energy/Gas/gas_prices.csv", stringsAsFactors = FALSE)
str(gas.prices)
## 'data.frame':    1257 obs. of  22 variables:
##  $ Date                                                                                               : chr  "Nov 28, 1994" "Dec 05, 1994" "Dec 12, 1994" "Dec 19, 1994" ...
##  $ Weekly.U.S..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                      : num  1.27 1.26 1.24 1.23 1.22 ...
##  $ Weekly.East.Coast.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                : num  1.3 1.3 1.29 1.28 1.27 ...
##  $ Weekly.New.England..PADD.1A..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.     : num  1.33 1.33 1.32 1.32 1.31 ...
##  $ Weekly.Central.Atlantic..PADD.1B..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.: num  1.35 1.36 1.35 1.34 1.34 ...
##  $ Weekly.Lower.Atlantic..PADD.1C..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.  : num  1.28 1.28 1.27 1.26 1.25 ...
##  $ Weekly.Midwest.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                   : num  1.22 1.19 1.16 1.15 1.14 ...
##  $ Weekly.Gulf.Coast.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                : num  1.3 1.29 1.29 1.28 1.27 ...
##  $ Weekly.Rocky.Mountain.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.            : num  1.31 1.32 1.32 1.3 1.28 ...
##  $ Weekly.West.Coast.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                : num  NA NA NA NA NA ...
##  $ Weekly.Colorado.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Florida.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Minnesota.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.New.York.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Ohio.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Texas.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Washington.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Cleveland..OH.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Denver..CO.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Miami..FL.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Weekly.Seattle..WA.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ X                                                                                                  : logi  NA NA NA NA NA NA ...
elec.prices = read.csv("/Users/jake/Desktop/Datasets/Energy/Electricity/Average_retail_price_of_electricity_cleaned.csv", stringsAsFactors = FALSE)
str(elec.prices)
## 'data.frame':    7 obs. of  214 variables:
##  $ source.key: chr  "ELEC.PRICE.US-ALL.M" "ELEC.PRICE.US-ALL.M" "ELEC.PRICE.US-RES.M" "ELEC.PRICE.US-COM.M" ...
##  $ Jan.2001  : chr  "" "6.75" "7.73" "7.25" ...
##  $ Feb.2001  : chr  "" "6.87" "8.04" "7.51" ...
##  $ Mar.2001  : chr  "" "7.01" "8.32" "7.7" ...
##  $ Apr.2001  : chr  "" "7.02" "8.46" "7.73" ...
##  $ May.2001  : chr  "" "7.17" "8.83" "7.77" ...
##  $ Jun.2001  : chr  "" "7.58" "9.07" "8.13" ...
##  $ Jul.2001  : chr  "" "7.88" "9.03" "8.41" ...
##  $ Aug.2001  : chr  "" "7.84" "9.01" "8.35" ...
##  $ Sep.2001  : chr  "" "7.62" "8.92" "8.22" ...
##  $ Oct.2001  : chr  "" "7.43" "8.84" "8.27" ...
##  $ Nov.2001  : chr  "" "7.02" "8.47" "7.73" ...
##  $ Dec.2001  : chr  "" "7.03" "8.29" "7.66" ...
##  $ Jan.2002  : chr  "" "6.95" "8.07" "7.49" ...
##  $ Feb.2002  : chr  "" "6.97" "8.19" "7.68" ...
##  $ Mar.2002  : chr  "" "6.95" "8.17" "7.72" ...
##  $ Apr.2002  : chr  "" "6.95" "8.37" "7.68" ...
##  $ May.2002  : chr  "" "7.11" "8.64" "7.84" ...
##  $ Jun.2002  : chr  "" "7.45" "8.73" "8.18" ...
##  $ Jul.2002  : chr  "" "7.68" "8.82" "8.33" ...
##  $ Aug.2002  : chr  "" "7.58" "8.72" "8.24" ...
##  $ Sep.2002  : chr  "" "7.36" "8.59" "8" ...
##  $ Oct.2002  : chr  "" "7.2" "8.47" "8" ...
##  $ Nov.2002  : chr  "" "6.94" "8.31" "7.64" ...
##  $ Dec.2002  : chr  "" "6.96" "8.08" "7.59" ...
##  $ Jan.2003  : chr  "" "7" "8" "7.45" ...
##  $ Feb.2003  : chr  "" "7.05" "8.02" "7.65" ...
##  $ Mar.2003  : chr  "" "7.16" "8.35" "7.78" ...
##  $ Apr.2003  : chr  "" "7.27" "8.82" "7.93" ...
##  $ May.2003  : chr  "" "7.42" "8.99" "8.06" ...
##  $ Jun.2003  : chr  "" "7.78" "9.25" "8.44" ...
##  $ Jul.2003  : chr  "" "8.02" "9.21" "8.55" ...
##  $ Aug.2003  : chr  "" "7.97" "9.22" "8.46" ...
##  $ Sep.2003  : chr  "" "7.61" "8.92" "8.15" ...
##  $ Oct.2003  : chr  "" "7.39" "8.85" "8.09" ...
##  $ Nov.2003  : chr  "" "7.19" "8.72" "7.85" ...
##  $ Dec.2003  : chr  "" "7.14" "8.3" "7.7" ...
##  $ Jan.2004  : chr  "" "7.22" "8.24" "7.7" ...
##  $ Feb.2004  : chr  "" "7.25" "8.33" "7.85" ...
##  $ Mar.2004  : chr  "" "7.3" "8.62" "7.92" ...
##  $ Apr.2004  : chr  "" "7.34" "8.93" "7.89" ...
##  $ May.2004  : chr  "" "7.46" "9.07" "7.99" ...
##  $ Jun.2004  : chr  "" "7.93" "9.29" "8.49" ...
##  $ Jul.2004  : chr  "" "8.11" "9.36" "8.63" ...
##  $ Aug.2004  : chr  "" "8.18" "9.5" "8.7" ...
##  $ Sep.2004  : chr  "" "7.97" "9.39" "8.54" ...
##  $ Oct.2004  : chr  "" "7.6" "9.05" "8.23" ...
##  $ Nov.2004  : chr  "" "7.42" "8.96" "8.04" ...
##  $ Dec.2004  : chr  "" "7.36" "8.58" "7.82" ...
##  $ Jan.2005  : chr  "" "7.47" "8.52" "7.99" ...
##  $ Feb.2005  : chr  "" "7.58" "8.76" "8.19" ...
##  $ Mar.2005  : chr  "" "7.59" "8.87" "8.15" ...
##  $ Apr.2005  : chr  "" "7.65" "9.22" "8.25" ...
##  $ May.2005  : chr  "" "7.84" "9.56" "8.41" ...
##  $ Jun.2005  : chr  "" "8.38" "9.79" "8.89" ...
##  $ Jul.2005  : chr  "" "8.6" "9.77" "9" ...
##  $ Aug.2005  : chr  "" "8.71" "9.93" "9.1" ...
##  $ Sep.2005  : chr  "" "8.68" "9.94" "9.18" ...
##  $ Oct.2005  : chr  "" "8.37" "9.76" "8.91" ...
##  $ Nov.2005  : chr  "" "8.21" "9.76" "8.79" ...
##  $ Dec.2005  : chr  "" "8.21" "9.27" "8.79" ...
##  $ Jan.2006  : chr  "" "8.31" "9.55" "8.87" ...
##  $ Feb.2006  : chr  "" "8.49" "9.8" "9.14" ...
##  $ Mar.2006  : chr  "" "8.44" "9.87" "9.06" ...
##  $ Apr.2006  : chr  "" "8.56" "10.32" "9.17" ...
##  $ May.2006  : chr  "" "8.71" "10.61" "9.22" ...
##  $ Jun.2006  : chr  "" "9.3" "10.85" "9.88" ...
##  $ Jul.2006  : chr  "" "9.55" "10.96" "9.97" ...
##  $ Aug.2006  : chr  "" "9.58" "10.94" "10.04" ...
##  $ Sep.2006  : chr  "" "9.32" "10.94" "9.89" ...
##  $ Oct.2006  : chr  "" "8.89" "10.58" "9.51" ...
##  $ Nov.2006  : chr  "" "8.63" "10.18" "9.24" ...
##  $ Dec.2006  : chr  "" "8.55" "9.84" "9.08" ...
##  $ Jan.2007  : chr  "" "8.71" "10.06" "9.12" ...
##  $ Feb.2007  : chr  "" "8.74" "9.89" "9.34" ...
##  $ Mar.2007  : chr  "" "8.8" "10.27" "9.35" ...
##  $ Apr.2007  : chr  "" "8.82" "10.63" "9.38" ...
##  $ May.2007  : chr  "" "8.96" "10.77" "9.51" ...
##  $ Jun.2007  : chr  "" "9.45" "11.09" "9.95" ...
##  $ Jul.2007  : chr  "" "9.64" "11.07" "10.14" ...
##  $ Aug.2007  : chr  "" "9.68" "11.07" "10.07" ...
##  $ Sep.2007  : chr  "" "9.43" "10.96" "9.9" ...
##  $ Oct.2007  : chr  "" "9.17" "10.82" "9.77" ...
##  $ Nov.2007  : chr  "" "8.94" "10.7" "9.5" ...
##  $ Dec.2007  : chr  "" "8.91" "10.33" "9.42" ...
##  $ Jan.2008  : chr  "" "8.92" "10.14" "9.39" ...
##  $ Feb.2008  : chr  "" "8.9" "10.16" "9.46" ...
##  $ Mar.2008  : chr  "" "9.03" "10.45" "9.62" ...
##  $ Apr.2008  : chr  "" "9.22" "10.93" "9.79" ...
##  $ May.2008  : chr  "" "9.47" "11.4" "10.05" ...
##  $ Jun.2008  : chr  "" "10.27" "11.77" "10.87" ...
##  $ Jul.2008  : chr  "" "10.65" "12.07" "11.1" ...
##  $ Aug.2008  : chr  "" "10.59" "12.09" "11.07" ...
##  $ Sep.2008  : chr  "" "10.27" "11.92" "10.77" ...
##  $ Oct.2008  : chr  "" "9.94" "11.81" "10.41" ...
##  $ Nov.2008  : chr  "" "9.67" "11.42" "10.16" ...
##  $ Dec.2008  : chr  "" "9.53" "10.86" "9.92" ...
##  $ Jan.2009  : chr  "" "9.68" "10.98" "9.96" ...
##  $ Feb.2009  : chr  "" "9.78" "11.18" "10.2" ...
##   [list output truncated]
# Location of both datasets:
# https://www.eia.gov

2 – Cleaning data

## Gas Prices Date
gas.prices.clean = gas.prices %>% 
  mutate(date = mdy(Date), weekly.us = Weekly.U.S..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon., weekly.east = Weekly.East.Coast.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.) %>% 
  select(date, weekly.us, weekly.east) %>% 
  mutate(month = month(date), day = day(date), year = year(date))

## Electricity Prices Date
elec.prices.clean = t(elec.prices)
elec.prices.clean = data.frame(elec.prices.clean, stringsAsFactors = F)
names(elec.prices.clean) = c("NA","all","residential","commercial","industrial","transit", "other")
elec.prices.clean = elec.prices.clean[-1]
elec.prices.clean = elec.prices.clean[-c(1),]
setDT(elec.prices.clean, keep.rownames = TRUE)[]
##            rn   all residential commercial industrial transit other
##   1: Jan.2001  6.75        7.73       7.25       4.73      --  6.48
##   2: Feb.2001  6.87        8.04       7.51        4.8      --  6.94
##   3: Mar.2001  7.01        8.32        7.7       4.86      --  7.05
##   4: Apr.2001  7.02        8.46       7.73       4.87      --  7.04
##   5: May.2001  7.17        8.83       7.77          5      --  7.14
##  ---                                                               
## 209: May.2018 10.41       13.15      10.49       6.82    9.46    --
## 210: Jun.2018 10.79       13.05      10.82       7.18   10.15    --
## 211: Jul.2018 11.03       13.13      10.97       7.34   10.14    --
## 212: Aug.2018 11.05        13.3      11.01       7.24    9.68    --
## 213: Sep.2018 10.79       13.01      10.91       7.11   10.28    --
names(elec.prices.clean)[1] = "date"

elec.prices.clean = elec.prices.clean %>% 
  mutate(date = mdy(date), year = year(date), month = month(date), date = make_date(year, month, 1))

elec.prices.clean$all = as.numeric(elec.prices.clean$all)
elec.prices.clean$residential = as.numeric(elec.prices.clean$residential)
elec.prices.clean$commercial = as.numeric(elec.prices.clean$commercial)
elec.prices.clean$industrial = as.numeric(elec.prices.clean$industrial)
elec.prices.clean$transit = as.numeric(elec.prices.clean$transit)
## Warning: NAs introduced by coercion
elec.prices.clean$other = as.numeric(elec.prices.clean$other)
## Warning: NAs introduced by coercion

3 – Plotting gasoline prices (U.S.)

gas.prices.clean %>% 
  ggplot() + geom_point(aes(x = date, y = weekly.us)) + ggtitle("U.S. Weekly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).

gas.month = gas.prices.clean %>% 
  group_by(month, year) %>% 
  summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>% 
  mutate(date = make_date(year, month, 1))

gas.month %>% 
  ggplot() + geom_point(aes(x = date, y = monthly.us)) + ggtitle("U.S. Monthly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).

gas.prices.clean %>% 
  group_by(month) %>% 
  summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>% 
  ggplot() + geom_point(aes(x = month, y = monthly.us)) + geom_line(aes(x = month, y = monthly.us)) + ggtitle("Average U.S. Monthly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).

gas.prices.clean %>% 
  group_by(month, year) %>% 
  summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>% 
  ggplot() + geom_point(aes(x = month, y = monthly.us, color = year)) + ggtitle("U.S. Gas Prices by Month")
## Warning: Removed 1 rows containing missing values (geom_point).

gas.prices.clean %>% 
  group_by(month, year) %>% 
  summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>% 
  mutate(date = make_date(year, month, 1)) %>% 
  ggplot() + geom_line(aes(x = year, y = monthly.us, color = factor(month))) + ggtitle("U.S. Gas Prices Stratified by Month")
## Warning: Removed 1 rows containing missing values (geom_path).

4 – Predicting gasoline prices (U.S.)

lm.us = lm(monthly.us ~ date, data = gas.month)
summary(lm.us)
## 
## Call:
## lm(formula = monthly.us ~ date, data = gas.month)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.23899 -0.37291 -0.03472  0.33581  1.63721 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.543e+00  1.683e-01  -9.171   <2e-16 ***
## date         2.947e-04  1.228e-05  24.005   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5328 on 288 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6668, Adjusted R-squared:  0.6656 
## F-statistic: 576.3 on 1 and 288 DF,  p-value: < 2.2e-16
gas.month.lm = gas.month
gas.month.lm$predict = predict(lm.us, newdata = gas.month)

gas.month.lm %>% 
  arrange(date) %>% 
  ggplot() + geom_point(aes(x = date, y = monthly.us)) + geom_line(aes(x = date, y = predict), color = "red")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).

5 – Plotting gasoline prices (East Coast)

gas.prices.clean %>% 
  ggplot() + geom_point(aes(x = date, y = weekly.east)) + ggtitle("East Coast Weekly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).

gas.month = gas.prices.clean %>% 
  group_by(month, year) %>% 
  summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>% 
  mutate(date = make_date(year, month, 1))

gas.month %>% 
  ggplot() + geom_point(aes(x = date, y = monthly.east)) + ggtitle("East Coast Monthly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).

gas.prices.clean %>% 
  group_by(month) %>% 
  summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>% 
  ggplot() + geom_point(aes(x = month, y = monthly.east)) + geom_line(aes(x = month, y = monthly.east)) + ggtitle("Average East Coast Monthly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).

gas.prices.clean %>% 
  group_by(month, year) %>% 
  summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>% 
  ggplot() + geom_point(aes(x = month, y = monthly.east, color = year)) + ggtitle("East Coast Gas Prices Organized by Month")
## Warning: Removed 1 rows containing missing values (geom_point).

gas.prices.clean %>% 
  group_by(month, year) %>% 
  summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>% 
  mutate(date = make_date(year, month, 1)) %>% 
  ggplot() + geom_line(aes(x = year, y = monthly.east, color = factor(month))) + ggtitle("East Coast Gas Prices Stratified by Month")
## Warning: Removed 1 rows containing missing values (geom_path).

6 – Predicting gasoline prices (East Coast)

lm.us = lm(monthly.east ~ date, data = gas.month)
summary(lm.us)
## 
## Call:
## lm(formula = monthly.east ~ date, data = gas.month)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.17794 -0.39736 -0.03787  0.35285  1.63991 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.663e+00  1.707e-01   -9.74   <2e-16 ***
## date         3.052e-04  1.245e-05   24.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5404 on 288 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6759, Adjusted R-squared:  0.6748 
## F-statistic: 600.7 on 1 and 288 DF,  p-value: < 2.2e-16
gas.month.lm = gas.month
gas.month.lm$predict = predict(lm.us, newdata = gas.month)

gas.month.lm %>% 
  arrange(date) %>% 
  ggplot() + geom_point(aes(x = date, y = monthly.east)) + geom_line(aes(x = date, y = predict), color = "red")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).

7 – Plotting electricity prices (U.S.)

elec.prices.clean %>% 
  ggplot() + geom_point(aes(x = date, y = residential)) + ggtitle("Monthly Electricity Prices (2001-2018)")

elec.prices.clean %>% 
  group_by(month) %>% 
  summarise(monthly.us = mean(residential, na.rm = T)) %>% 
  ggplot() + geom_point(aes(x = month, y = monthly.us)) + geom_line(aes(x = month, y = monthly.us)) + ggtitle("Average Electricity Prices by Month (2001-2018)")

elec.prices.clean %>% 
  group_by(month, year) %>% 
  summarise(monthly.us = mean(residential, na.rm = T)) %>% 
  ggplot() + geom_point(aes(x = month, y = monthly.us, color = year)) + ggtitle(" Electricity Prices by Month (2001-2018)")

elec.prices.clean %>% 
  group_by(month, year) %>% 
  summarise(monthly.us = mean(residential, na.rm = T)) %>% 
  mutate(date = make_date(year, month, 1)) %>% 
  ggplot() + geom_line(aes(x = year, y = monthly.us, color = factor(month))) + ggtitle("Electricity Prices Stratified by Month (2001-2018)")

8 – Predicting electricity prices (U.S.)

# Linear Model

lm.elec = lm(residential ~ date, data = elec.prices.clean)
summary(lm.us)
## 
## Call:
## lm(formula = monthly.east ~ date, data = gas.month)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.17794 -0.39736 -0.03787  0.35285  1.63991 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.663e+00  1.707e-01   -9.74   <2e-16 ***
## date         3.052e-04  1.245e-05   24.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5404 on 288 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6759, Adjusted R-squared:  0.6748 
## F-statistic: 600.7 on 1 and 288 DF,  p-value: < 2.2e-16
elec.prices.clean.lm = elec.prices.clean
elec.prices.clean.lm$predict = predict(lm.elec, newdata = elec.prices.clean)

elec.prices.clean.lm %>% 
  arrange(date) %>% 
  ggplot() + geom_point(aes(x = date, y = residential)) + geom_line(aes(x = date, y = predict), color = "red")

# Hodrick-Prescott Filter: This was interesting but not particularly relevent to the analysis

elec.prices.ts = elec.prices.clean %>% 
  select(date, residential) %>% 
  filter(!is.na(residential))

ts.elec = read.zoo(elec.prices.ts, format = "%Y-%m-%d")
ts.elec.m = as.zooreg(aggregate(ts.elec, as.yearmon, mean), freq = 12)
hp.elec = hpfilter(ts.elec.m, freq=300, type="lambda", drift=TRUE)
plot(hp.elec)

9 – Joining data

gas.month.temp = gas.month %>% 
  select(-year)

elec.prices.temp = elec.prices.clean %>% 
  select(-month, -year)

df.joined = left_join(gas.month.temp, elec.prices.temp, by = "date") %>% 
  select(date, month, monthly.east, monthly.us, residential) %>% 
  mutate(year = year(date), gas.east = monthly.east, gas.us = monthly.us, elec.res = residential) %>% 
  select(-residential, -monthly.east, -monthly.us) %>% 
  filter(!is.na(date))

10 – Converting electricity and gasoline to $/mile

gas.mpg = 28.5 # MiniCooper 2018 Countryman
elec.kwhpm = 0.26 # Tesla Model 3

df.joined = df.joined %>% 
  mutate(dpm.gas = gas.us/gas.mpg, dpm.elec = elec.res*elec.kwhpm/100)

df.joined.tidy = df.joined %>% 
  mutate(dpm.gas = gas.us/gas.mpg, dpm.elec = elec.res*elec.kwhpm/100) %>% 
  gather("dpm.gas", "dpm.elec", "gas.east", "gas.us", "elec.res", key = "type", value = "amount")

11 – Graphing $/mile for an electric and gasoline-powered vehicle

df.joined.tidy %>% 
  filter(type == "dpm.gas" | type == "dpm.elec") %>% 
  group_by(month, type) %>% 
  summarise(dollars = mean(amount, na.rm = T)) %>% 
  ggplot() + geom_point(aes(x = month, y = dollars, color = type)) + geom_line(aes(x = month, y = dollars, color = type))

df.joined %>% 
  group_by(month) %>% 
  summarise(elec.savings = mean(dpm.elec, na.rm = T), gas.savings = mean(dpm.gas, na.rm = T)) %>% 
  ggplot() + geom_point(aes(x = month, y = elec.savings - gas.savings)) + ylab("Savings")

12 – Calculating annual savings

df.savings = df.joined %>% 
  group_by(month) %>% 
  summarise(elec.savings = mean(dpm.elec, na.rm = T), gas.savings = mean(dpm.gas, na.rm = T)) %>% 
  mutate(savings = elec.savings - gas.savings)

annual.miles.traveled = 12000
monthly.breakdown = c(10, 10, 6.5, 6.5, 6.5, 6.5, 9, 9, 6.5, 6.5, 6.5, 6.5)

annual.savings = c()

for(i in 1:12){
  annual.savings = c(annual.savings, annual.miles.traveled*monthly.breakdown[i]*df.savings$savings[i]/100)
}

annual.gas = c()

for(i in 1:12){
  annual.gas = c(annual.gas, annual.miles.traveled*monthly.breakdown[i]*df.savings$gas.savings[i]/100)
}

cum.savings = c()

for(i in 1:12){
  if(i == 1){
    cum.savings = c(annual.savings[i])
  }
  if(i != 1){
    cum.savings = c(cum.savings, annual.savings[i] + cum.savings[i-1])
  }
}

df.savings$annual.gas = annual.gas
df.savings$actual.savings = annual.savings
df.savings$cum.savings = cum.savings

13 – Graphing and displaying annual savings

df.savings %>% 
  ggplot() + geom_point(aes(x = month, y = annual.gas)) + ggtitle("Gas Costs per Month ($)")

df.savings %>% 
  ggplot() + geom_point(aes(x = month, y = -cum.savings)) + ggtitle("Cumulative Gas Costs ($)")

df.savings %>% 
  ggplot() + geom_point(aes(x = month, y = -actual.savings)) + ggtitle("Savings per Month ($)")

df.savings %>% 
  ggplot() + geom_point(aes(x = month, y = -cum.savings)) + ggtitle("Cumulative Savings ($)")

paste("Total Average Gas Costs: $",round(sum(annual.gas), 2), sep = "")
## [1] "Total Average Gas Costs: $917.02"
paste("Total Average Annual Savings: $",round(-sum(annual.savings), 2), sep = "")
## [1] "Total Average Annual Savings: $608.51"