Arima_forecast
[Notice] [Arima(p,k,q)]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# from pandas.plotting import register_matplotlib_converters
# register_matplotlib_converters()
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
df=pd.read_csv('https://raw.githubusercontent.com/sm-joo/sm-joo/master/DEXKOUS.csv', parse_dates=['DATE'], index_col='DATE')
df.columns=['KOUS']
df['KOUS'].replace('.', '', inplace=True)
df['KOUS']=pd.to_numeric(df['KOUS'])
df['KOUS'].fillna(method='ffill', inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 1306 entries, 2015-03-13 to 2020-03-13 Data columns (total 1 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 KOUS 1306 non-null float64 dtypes: float64(1) memory usage: 20.4 KB
df=df[(df.index>'2019-01-01')&(df.index<'2020-01-01')]
ARIMA(p,k,q) => k decision
adfuller(df.KOUS)
(-1.834289925068011, 0.36357542996557135, 2, 257, {'1%': -3.4560535712549925, '10%': -2.5727985212493754, '5%': -2.8728527662442334}, 1453.3457437081727)
adfuller(df.KOUS.diff().dropna())
# Determined as k=1 (The first difference is stable.)
(-10.572632524492832, 7.239046680336767e-19, 1, 257, {'1%': -3.4560535712549925, '10%': -2.5727985212493754, '5%': -2.8728527662442334}, 1446.168602565303)
#ARIMA(p,k,q) => p, q 결정
# 2x3 subplot
figure, axes= plt.subplots(2, 3, figsize=(15,7))
axes[0,0].plot(df.KOUS)
axes[0,0].set_title('original series')
axes[1,0].plot(df.KOUS.diff())
axes[1,0].set_title('1st difference series')
plot_acf(df.KOUS,axes[0,1])
plot_pacf(df.KOUS,axes[0,2])
plot_acf(df.KOUS.diff().dropna(),axes[1,1])
plot_pacf(df.KOUS.diff().dropna(),axes[1,2])
plt.tight_layout()
plt.show()
# AR order: 3rd or 1st
# MA order: 2nd or 0th order
ARIMA predictive modeling
# The degree of ARIMA is (3,1,2) -> (2,1,2)
model=ARIMA(df.KOUS, order=(2,1,2), freq='B')
model_fit= model.fit(trend='nc')
print(model_fit.summary())
ARIMA Model Results ============================================================================== Dep. Variable: D.KOUS No. Observations: 259 Model: ARIMA(2, 1, 2) Log Likelihood -769.208 Method: css-mle S.D. of innovations 4.715 Date: Sun, 19 Apr 2020 AIC 1548.416 Time: 06:45:15 BIC 1566.200 Sample: 01-03-2019 HQIC 1555.566 - 12-31-2019 ================================================================================ coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------- ar.L1.D.KOUS -1.2447 0.243 -5.123 0.000 -1.721 -0.768 ar.L2.D.KOUS -0.7340 0.257 -2.851 0.005 -1.239 -0.229 ma.L1.D.KOUS 1.1778 0.245 4.802 0.000 0.697 1.658 ma.L2.D.KOUS 0.7424 0.254 2.923 0.004 0.245 1.240 Roots ============================================================================= Real Imaginary Modulus Frequency ----------------------------------------------------------------------------- AR.1 -0.8479 -0.8022j 1.1672 -0.3794 AR.2 -0.8479 +0.8022j 1.1672 0.3794 MA.1 -0.7932 -0.8472j 1.1606 -0.3698 MA.2 -0.7932 +0.8472j 1.1606 0.3698 -----------------------------------------------------------------------------
model_fit.plot_predict()
plt.show()
## Training set, Divide the test set to learn and evaluate
train=df.iloc[0:30]
test=df.iloc[30:35]
test.shape
(5, 1)
model=ARIMA(train, order=(2,1,2), freq='B')
model_fit=model.fit(trend='nc')
fc, se, conf= model_fit.forecast(5, alpha=0.05)
fc_series=pd.Series(fc, index=test.index)
lower_series=pd.Series(conf[:,0], index=test.index)
upper_series=pd.Series(conf[:,1], index=test.index)
# plot
plt.figure(figsize=(13,5))
plt.plot(train, label='training')
plt.plot(test, label='actual')
plt.plot(fc_series, label='forecast')
plt.fill_between(test.index, lower_series, upper_series, color='black', alpha=0.1)
plt.legend(loc='upper left')
plt.show()
댓글남기기