1 – Loading data
gas.prices = read.csv("/Users/jake/Desktop/Datasets/Energy/Gas/gas_prices.csv", stringsAsFactors = FALSE)
str(gas.prices)
## 'data.frame': 1257 obs. of 22 variables:
## $ Date : chr "Nov 28, 1994" "Dec 05, 1994" "Dec 12, 1994" "Dec 19, 1994" ...
## $ Weekly.U.S..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num 1.27 1.26 1.24 1.23 1.22 ...
## $ Weekly.East.Coast.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num 1.3 1.3 1.29 1.28 1.27 ...
## $ Weekly.New.England..PADD.1A..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num 1.33 1.33 1.32 1.32 1.31 ...
## $ Weekly.Central.Atlantic..PADD.1B..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.: num 1.35 1.36 1.35 1.34 1.34 ...
## $ Weekly.Lower.Atlantic..PADD.1C..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num 1.28 1.28 1.27 1.26 1.25 ...
## $ Weekly.Midwest.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num 1.22 1.19 1.16 1.15 1.14 ...
## $ Weekly.Gulf.Coast.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num 1.3 1.29 1.29 1.28 1.27 ...
## $ Weekly.Rocky.Mountain.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num 1.31 1.32 1.32 1.3 1.28 ...
## $ Weekly.West.Coast.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA ...
## $ Weekly.Colorado.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Florida.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Minnesota.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.New.York.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Ohio.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Texas.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Washington.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Cleveland..OH.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Denver..CO.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Miami..FL.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Weekly.Seattle..WA.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon. : num NA NA NA NA NA NA NA NA NA NA ...
## $ X : logi NA NA NA NA NA NA ...
elec.prices = read.csv("/Users/jake/Desktop/Datasets/Energy/Electricity/Average_retail_price_of_electricity_cleaned.csv", stringsAsFactors = FALSE)
str(elec.prices)
## 'data.frame': 7 obs. of 214 variables:
## $ source.key: chr "ELEC.PRICE.US-ALL.M" "ELEC.PRICE.US-ALL.M" "ELEC.PRICE.US-RES.M" "ELEC.PRICE.US-COM.M" ...
## $ Jan.2001 : chr "" "6.75" "7.73" "7.25" ...
## $ Feb.2001 : chr "" "6.87" "8.04" "7.51" ...
## $ Mar.2001 : chr "" "7.01" "8.32" "7.7" ...
## $ Apr.2001 : chr "" "7.02" "8.46" "7.73" ...
## $ May.2001 : chr "" "7.17" "8.83" "7.77" ...
## $ Jun.2001 : chr "" "7.58" "9.07" "8.13" ...
## $ Jul.2001 : chr "" "7.88" "9.03" "8.41" ...
## $ Aug.2001 : chr "" "7.84" "9.01" "8.35" ...
## $ Sep.2001 : chr "" "7.62" "8.92" "8.22" ...
## $ Oct.2001 : chr "" "7.43" "8.84" "8.27" ...
## $ Nov.2001 : chr "" "7.02" "8.47" "7.73" ...
## $ Dec.2001 : chr "" "7.03" "8.29" "7.66" ...
## $ Jan.2002 : chr "" "6.95" "8.07" "7.49" ...
## $ Feb.2002 : chr "" "6.97" "8.19" "7.68" ...
## $ Mar.2002 : chr "" "6.95" "8.17" "7.72" ...
## $ Apr.2002 : chr "" "6.95" "8.37" "7.68" ...
## $ May.2002 : chr "" "7.11" "8.64" "7.84" ...
## $ Jun.2002 : chr "" "7.45" "8.73" "8.18" ...
## $ Jul.2002 : chr "" "7.68" "8.82" "8.33" ...
## $ Aug.2002 : chr "" "7.58" "8.72" "8.24" ...
## $ Sep.2002 : chr "" "7.36" "8.59" "8" ...
## $ Oct.2002 : chr "" "7.2" "8.47" "8" ...
## $ Nov.2002 : chr "" "6.94" "8.31" "7.64" ...
## $ Dec.2002 : chr "" "6.96" "8.08" "7.59" ...
## $ Jan.2003 : chr "" "7" "8" "7.45" ...
## $ Feb.2003 : chr "" "7.05" "8.02" "7.65" ...
## $ Mar.2003 : chr "" "7.16" "8.35" "7.78" ...
## $ Apr.2003 : chr "" "7.27" "8.82" "7.93" ...
## $ May.2003 : chr "" "7.42" "8.99" "8.06" ...
## $ Jun.2003 : chr "" "7.78" "9.25" "8.44" ...
## $ Jul.2003 : chr "" "8.02" "9.21" "8.55" ...
## $ Aug.2003 : chr "" "7.97" "9.22" "8.46" ...
## $ Sep.2003 : chr "" "7.61" "8.92" "8.15" ...
## $ Oct.2003 : chr "" "7.39" "8.85" "8.09" ...
## $ Nov.2003 : chr "" "7.19" "8.72" "7.85" ...
## $ Dec.2003 : chr "" "7.14" "8.3" "7.7" ...
## $ Jan.2004 : chr "" "7.22" "8.24" "7.7" ...
## $ Feb.2004 : chr "" "7.25" "8.33" "7.85" ...
## $ Mar.2004 : chr "" "7.3" "8.62" "7.92" ...
## $ Apr.2004 : chr "" "7.34" "8.93" "7.89" ...
## $ May.2004 : chr "" "7.46" "9.07" "7.99" ...
## $ Jun.2004 : chr "" "7.93" "9.29" "8.49" ...
## $ Jul.2004 : chr "" "8.11" "9.36" "8.63" ...
## $ Aug.2004 : chr "" "8.18" "9.5" "8.7" ...
## $ Sep.2004 : chr "" "7.97" "9.39" "8.54" ...
## $ Oct.2004 : chr "" "7.6" "9.05" "8.23" ...
## $ Nov.2004 : chr "" "7.42" "8.96" "8.04" ...
## $ Dec.2004 : chr "" "7.36" "8.58" "7.82" ...
## $ Jan.2005 : chr "" "7.47" "8.52" "7.99" ...
## $ Feb.2005 : chr "" "7.58" "8.76" "8.19" ...
## $ Mar.2005 : chr "" "7.59" "8.87" "8.15" ...
## $ Apr.2005 : chr "" "7.65" "9.22" "8.25" ...
## $ May.2005 : chr "" "7.84" "9.56" "8.41" ...
## $ Jun.2005 : chr "" "8.38" "9.79" "8.89" ...
## $ Jul.2005 : chr "" "8.6" "9.77" "9" ...
## $ Aug.2005 : chr "" "8.71" "9.93" "9.1" ...
## $ Sep.2005 : chr "" "8.68" "9.94" "9.18" ...
## $ Oct.2005 : chr "" "8.37" "9.76" "8.91" ...
## $ Nov.2005 : chr "" "8.21" "9.76" "8.79" ...
## $ Dec.2005 : chr "" "8.21" "9.27" "8.79" ...
## $ Jan.2006 : chr "" "8.31" "9.55" "8.87" ...
## $ Feb.2006 : chr "" "8.49" "9.8" "9.14" ...
## $ Mar.2006 : chr "" "8.44" "9.87" "9.06" ...
## $ Apr.2006 : chr "" "8.56" "10.32" "9.17" ...
## $ May.2006 : chr "" "8.71" "10.61" "9.22" ...
## $ Jun.2006 : chr "" "9.3" "10.85" "9.88" ...
## $ Jul.2006 : chr "" "9.55" "10.96" "9.97" ...
## $ Aug.2006 : chr "" "9.58" "10.94" "10.04" ...
## $ Sep.2006 : chr "" "9.32" "10.94" "9.89" ...
## $ Oct.2006 : chr "" "8.89" "10.58" "9.51" ...
## $ Nov.2006 : chr "" "8.63" "10.18" "9.24" ...
## $ Dec.2006 : chr "" "8.55" "9.84" "9.08" ...
## $ Jan.2007 : chr "" "8.71" "10.06" "9.12" ...
## $ Feb.2007 : chr "" "8.74" "9.89" "9.34" ...
## $ Mar.2007 : chr "" "8.8" "10.27" "9.35" ...
## $ Apr.2007 : chr "" "8.82" "10.63" "9.38" ...
## $ May.2007 : chr "" "8.96" "10.77" "9.51" ...
## $ Jun.2007 : chr "" "9.45" "11.09" "9.95" ...
## $ Jul.2007 : chr "" "9.64" "11.07" "10.14" ...
## $ Aug.2007 : chr "" "9.68" "11.07" "10.07" ...
## $ Sep.2007 : chr "" "9.43" "10.96" "9.9" ...
## $ Oct.2007 : chr "" "9.17" "10.82" "9.77" ...
## $ Nov.2007 : chr "" "8.94" "10.7" "9.5" ...
## $ Dec.2007 : chr "" "8.91" "10.33" "9.42" ...
## $ Jan.2008 : chr "" "8.92" "10.14" "9.39" ...
## $ Feb.2008 : chr "" "8.9" "10.16" "9.46" ...
## $ Mar.2008 : chr "" "9.03" "10.45" "9.62" ...
## $ Apr.2008 : chr "" "9.22" "10.93" "9.79" ...
## $ May.2008 : chr "" "9.47" "11.4" "10.05" ...
## $ Jun.2008 : chr "" "10.27" "11.77" "10.87" ...
## $ Jul.2008 : chr "" "10.65" "12.07" "11.1" ...
## $ Aug.2008 : chr "" "10.59" "12.09" "11.07" ...
## $ Sep.2008 : chr "" "10.27" "11.92" "10.77" ...
## $ Oct.2008 : chr "" "9.94" "11.81" "10.41" ...
## $ Nov.2008 : chr "" "9.67" "11.42" "10.16" ...
## $ Dec.2008 : chr "" "9.53" "10.86" "9.92" ...
## $ Jan.2009 : chr "" "9.68" "10.98" "9.96" ...
## $ Feb.2009 : chr "" "9.78" "11.18" "10.2" ...
## [list output truncated]
# Location of both datasets:
# https://www.eia.gov
2 – Cleaning data
## Gas Prices Date
gas.prices.clean = gas.prices %>%
mutate(date = mdy(Date), weekly.us = Weekly.U.S..Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon., weekly.east = Weekly.East.Coast.Premium.Conventional.Retail.Gasoline.Prices...Dollars.per.Gallon.) %>%
select(date, weekly.us, weekly.east) %>%
mutate(month = month(date), day = day(date), year = year(date))
## Electricity Prices Date
elec.prices.clean = t(elec.prices)
elec.prices.clean = data.frame(elec.prices.clean, stringsAsFactors = F)
names(elec.prices.clean) = c("NA","all","residential","commercial","industrial","transit", "other")
elec.prices.clean = elec.prices.clean[-1]
elec.prices.clean = elec.prices.clean[-c(1),]
setDT(elec.prices.clean, keep.rownames = TRUE)[]
## rn all residential commercial industrial transit other
## 1: Jan.2001 6.75 7.73 7.25 4.73 -- 6.48
## 2: Feb.2001 6.87 8.04 7.51 4.8 -- 6.94
## 3: Mar.2001 7.01 8.32 7.7 4.86 -- 7.05
## 4: Apr.2001 7.02 8.46 7.73 4.87 -- 7.04
## 5: May.2001 7.17 8.83 7.77 5 -- 7.14
## ---
## 209: May.2018 10.41 13.15 10.49 6.82 9.46 --
## 210: Jun.2018 10.79 13.05 10.82 7.18 10.15 --
## 211: Jul.2018 11.03 13.13 10.97 7.34 10.14 --
## 212: Aug.2018 11.05 13.3 11.01 7.24 9.68 --
## 213: Sep.2018 10.79 13.01 10.91 7.11 10.28 --
names(elec.prices.clean)[1] = "date"
elec.prices.clean = elec.prices.clean %>%
mutate(date = mdy(date), year = year(date), month = month(date), date = make_date(year, month, 1))
elec.prices.clean$all = as.numeric(elec.prices.clean$all)
elec.prices.clean$residential = as.numeric(elec.prices.clean$residential)
elec.prices.clean$commercial = as.numeric(elec.prices.clean$commercial)
elec.prices.clean$industrial = as.numeric(elec.prices.clean$industrial)
elec.prices.clean$transit = as.numeric(elec.prices.clean$transit)
## Warning: NAs introduced by coercion
elec.prices.clean$other = as.numeric(elec.prices.clean$other)
## Warning: NAs introduced by coercion
3 – Plotting gasoline prices (U.S.)
gas.prices.clean %>%
ggplot() + geom_point(aes(x = date, y = weekly.us)) + ggtitle("U.S. Weekly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).
gas.month = gas.prices.clean %>%
group_by(month, year) %>%
summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>%
mutate(date = make_date(year, month, 1))
gas.month %>%
ggplot() + geom_point(aes(x = date, y = monthly.us)) + ggtitle("U.S. Monthly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).
gas.prices.clean %>%
group_by(month) %>%
summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>%
ggplot() + geom_point(aes(x = month, y = monthly.us)) + geom_line(aes(x = month, y = monthly.us)) + ggtitle("Average U.S. Monthly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).
gas.prices.clean %>%
group_by(month, year) %>%
summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>%
ggplot() + geom_point(aes(x = month, y = monthly.us, color = year)) + ggtitle("U.S. Gas Prices by Month")
## Warning: Removed 1 rows containing missing values (geom_point).
gas.prices.clean %>%
group_by(month, year) %>%
summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>%
mutate(date = make_date(year, month, 1)) %>%
ggplot() + geom_line(aes(x = year, y = monthly.us, color = factor(month))) + ggtitle("U.S. Gas Prices Stratified by Month")
## Warning: Removed 1 rows containing missing values (geom_path).
4 – Predicting gasoline prices (U.S.)
lm.us = lm(monthly.us ~ date, data = gas.month)
summary(lm.us)
##
## Call:
## lm(formula = monthly.us ~ date, data = gas.month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23899 -0.37291 -0.03472 0.33581 1.63721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.543e+00 1.683e-01 -9.171 <2e-16 ***
## date 2.947e-04 1.228e-05 24.005 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5328 on 288 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6668, Adjusted R-squared: 0.6656
## F-statistic: 576.3 on 1 and 288 DF, p-value: < 2.2e-16
gas.month.lm = gas.month
gas.month.lm$predict = predict(lm.us, newdata = gas.month)
gas.month.lm %>%
arrange(date) %>%
ggplot() + geom_point(aes(x = date, y = monthly.us)) + geom_line(aes(x = date, y = predict), color = "red")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).
5 – Plotting gasoline prices (East Coast)
gas.prices.clean %>%
ggplot() + geom_point(aes(x = date, y = weekly.east)) + ggtitle("East Coast Weekly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).
gas.month = gas.prices.clean %>%
group_by(month, year) %>%
summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>%
mutate(date = make_date(year, month, 1))
gas.month %>%
ggplot() + geom_point(aes(x = date, y = monthly.east)) + ggtitle("East Coast Monthly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).
gas.prices.clean %>%
group_by(month) %>%
summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>%
ggplot() + geom_point(aes(x = month, y = monthly.east)) + geom_line(aes(x = month, y = monthly.east)) + ggtitle("Average East Coast Monthly Gas Prices (1994-2018)")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).
gas.prices.clean %>%
group_by(month, year) %>%
summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>%
ggplot() + geom_point(aes(x = month, y = monthly.east, color = year)) + ggtitle("East Coast Gas Prices Organized by Month")
## Warning: Removed 1 rows containing missing values (geom_point).
gas.prices.clean %>%
group_by(month, year) %>%
summarise(monthly.us = mean(weekly.us, na.rm = T), monthly.east = mean(weekly.east, na.rm = T)) %>%
mutate(date = make_date(year, month, 1)) %>%
ggplot() + geom_line(aes(x = year, y = monthly.east, color = factor(month))) + ggtitle("East Coast Gas Prices Stratified by Month")
## Warning: Removed 1 rows containing missing values (geom_path).
6 – Predicting gasoline prices (East Coast)
lm.us = lm(monthly.east ~ date, data = gas.month)
summary(lm.us)
##
## Call:
## lm(formula = monthly.east ~ date, data = gas.month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17794 -0.39736 -0.03787 0.35285 1.63991
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.663e+00 1.707e-01 -9.74 <2e-16 ***
## date 3.052e-04 1.245e-05 24.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5404 on 288 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6759, Adjusted R-squared: 0.6748
## F-statistic: 600.7 on 1 and 288 DF, p-value: < 2.2e-16
gas.month.lm = gas.month
gas.month.lm$predict = predict(lm.us, newdata = gas.month)
gas.month.lm %>%
arrange(date) %>%
ggplot() + geom_point(aes(x = date, y = monthly.east)) + geom_line(aes(x = date, y = predict), color = "red")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).
7 – Plotting electricity prices (U.S.)
elec.prices.clean %>%
ggplot() + geom_point(aes(x = date, y = residential)) + ggtitle("Monthly Electricity Prices (2001-2018)")
elec.prices.clean %>%
group_by(month) %>%
summarise(monthly.us = mean(residential, na.rm = T)) %>%
ggplot() + geom_point(aes(x = month, y = monthly.us)) + geom_line(aes(x = month, y = monthly.us)) + ggtitle("Average Electricity Prices by Month (2001-2018)")
elec.prices.clean %>%
group_by(month, year) %>%
summarise(monthly.us = mean(residential, na.rm = T)) %>%
ggplot() + geom_point(aes(x = month, y = monthly.us, color = year)) + ggtitle(" Electricity Prices by Month (2001-2018)")
elec.prices.clean %>%
group_by(month, year) %>%
summarise(monthly.us = mean(residential, na.rm = T)) %>%
mutate(date = make_date(year, month, 1)) %>%
ggplot() + geom_line(aes(x = year, y = monthly.us, color = factor(month))) + ggtitle("Electricity Prices Stratified by Month (2001-2018)")
8 – Predicting electricity prices (U.S.)
# Linear Model
lm.elec = lm(residential ~ date, data = elec.prices.clean)
summary(lm.us)
##
## Call:
## lm(formula = monthly.east ~ date, data = gas.month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17794 -0.39736 -0.03787 0.35285 1.63991
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.663e+00 1.707e-01 -9.74 <2e-16 ***
## date 3.052e-04 1.245e-05 24.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5404 on 288 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6759, Adjusted R-squared: 0.6748
## F-statistic: 600.7 on 1 and 288 DF, p-value: < 2.2e-16
elec.prices.clean.lm = elec.prices.clean
elec.prices.clean.lm$predict = predict(lm.elec, newdata = elec.prices.clean)
elec.prices.clean.lm %>%
arrange(date) %>%
ggplot() + geom_point(aes(x = date, y = residential)) + geom_line(aes(x = date, y = predict), color = "red")
# Hodrick-Prescott Filter: This was interesting but not particularly relevent to the analysis
elec.prices.ts = elec.prices.clean %>%
select(date, residential) %>%
filter(!is.na(residential))
ts.elec = read.zoo(elec.prices.ts, format = "%Y-%m-%d")
ts.elec.m = as.zooreg(aggregate(ts.elec, as.yearmon, mean), freq = 12)
hp.elec = hpfilter(ts.elec.m, freq=300, type="lambda", drift=TRUE)
plot(hp.elec)
9 – Joining data
gas.month.temp = gas.month %>%
select(-year)
elec.prices.temp = elec.prices.clean %>%
select(-month, -year)
df.joined = left_join(gas.month.temp, elec.prices.temp, by = "date") %>%
select(date, month, monthly.east, monthly.us, residential) %>%
mutate(year = year(date), gas.east = monthly.east, gas.us = monthly.us, elec.res = residential) %>%
select(-residential, -monthly.east, -monthly.us) %>%
filter(!is.na(date))
10 – Converting electricity and gasoline to $/mile
gas.mpg = 28.5 # MiniCooper 2018 Countryman
elec.kwhpm = 0.26 # Tesla Model 3
df.joined = df.joined %>%
mutate(dpm.gas = gas.us/gas.mpg, dpm.elec = elec.res*elec.kwhpm/100)
df.joined.tidy = df.joined %>%
mutate(dpm.gas = gas.us/gas.mpg, dpm.elec = elec.res*elec.kwhpm/100) %>%
gather("dpm.gas", "dpm.elec", "gas.east", "gas.us", "elec.res", key = "type", value = "amount")
11 – Graphing $/mile for an electric and gasoline-powered vehicle
df.joined.tidy %>%
filter(type == "dpm.gas" | type == "dpm.elec") %>%
group_by(month, type) %>%
summarise(dollars = mean(amount, na.rm = T)) %>%
ggplot() + geom_point(aes(x = month, y = dollars, color = type)) + geom_line(aes(x = month, y = dollars, color = type))
df.joined %>%
group_by(month) %>%
summarise(elec.savings = mean(dpm.elec, na.rm = T), gas.savings = mean(dpm.gas, na.rm = T)) %>%
ggplot() + geom_point(aes(x = month, y = elec.savings - gas.savings)) + ylab("Savings")
12 – Calculating annual savings
df.savings = df.joined %>%
group_by(month) %>%
summarise(elec.savings = mean(dpm.elec, na.rm = T), gas.savings = mean(dpm.gas, na.rm = T)) %>%
mutate(savings = elec.savings - gas.savings)
annual.miles.traveled = 12000
monthly.breakdown = c(10, 10, 6.5, 6.5, 6.5, 6.5, 9, 9, 6.5, 6.5, 6.5, 6.5)
annual.savings = c()
for(i in 1:12){
annual.savings = c(annual.savings, annual.miles.traveled*monthly.breakdown[i]*df.savings$savings[i]/100)
}
annual.gas = c()
for(i in 1:12){
annual.gas = c(annual.gas, annual.miles.traveled*monthly.breakdown[i]*df.savings$gas.savings[i]/100)
}
cum.savings = c()
for(i in 1:12){
if(i == 1){
cum.savings = c(annual.savings[i])
}
if(i != 1){
cum.savings = c(cum.savings, annual.savings[i] + cum.savings[i-1])
}
}
df.savings$annual.gas = annual.gas
df.savings$actual.savings = annual.savings
df.savings$cum.savings = cum.savings
13 – Graphing and displaying annual savings
df.savings %>%
ggplot() + geom_point(aes(x = month, y = annual.gas)) + ggtitle("Gas Costs per Month ($)")
df.savings %>%
ggplot() + geom_point(aes(x = month, y = -cum.savings)) + ggtitle("Cumulative Gas Costs ($)")
df.savings %>%
ggplot() + geom_point(aes(x = month, y = -actual.savings)) + ggtitle("Savings per Month ($)")
df.savings %>%
ggplot() + geom_point(aes(x = month, y = -cum.savings)) + ggtitle("Cumulative Savings ($)")
paste("Total Average Gas Costs: $",round(sum(annual.gas), 2), sep = "")
## [1] "Total Average Gas Costs: $917.02"
paste("Total Average Annual Savings: $",round(-sum(annual.savings), 2), sep = "")
## [1] "Total Average Annual Savings: $608.51"