1 분 소요

[Notice] [Arima(p,k,q)]

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# from pandas.plotting import register_matplotlib_converters
# register_matplotlib_converters()

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
df=pd.read_csv('https://raw.githubusercontent.com/sm-joo/sm-joo/master/DEXKOUS.csv', parse_dates=['DATE'], index_col='DATE')
df.columns=['KOUS']
df['KOUS'].replace('.', '', inplace=True)
df['KOUS']=pd.to_numeric(df['KOUS'])
df['KOUS'].fillna(method='ffill', inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1306 entries, 2015-03-13 to 2020-03-13
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   KOUS    1306 non-null   float64
dtypes: float64(1)
memory usage: 20.4 KB
df=df[(df.index>'2019-01-01')&(df.index<'2020-01-01')]

ARIMA(p,k,q) => k decision

adfuller(df.KOUS)
(-1.834289925068011,
 0.36357542996557135,
 2,
 257,
 {'1%': -3.4560535712549925,
  '10%': -2.5727985212493754,
  '5%': -2.8728527662442334},
 1453.3457437081727)
adfuller(df.KOUS.diff().dropna())
# Determined as k=1 (The first difference is stable.)
(-10.572632524492832,
 7.239046680336767e-19,
 1,
 257,
 {'1%': -3.4560535712549925,
  '10%': -2.5727985212493754,
  '5%': -2.8728527662442334},
 1446.168602565303)

#ARIMA(p,k,q) => p, q 결정

# 2x3 subplot 
figure, axes= plt.subplots(2, 3, figsize=(15,7))
axes[0,0].plot(df.KOUS)
axes[0,0].set_title('original series')
axes[1,0].plot(df.KOUS.diff())
axes[1,0].set_title('1st difference series')
plot_acf(df.KOUS,axes[0,1])
plot_pacf(df.KOUS,axes[0,2])
plot_acf(df.KOUS.diff().dropna(),axes[1,1])
plot_pacf(df.KOUS.diff().dropna(),axes[1,2])
plt.tight_layout()
plt.show()

# AR order: 3rd or 1st
# MA order: 2nd or 0th order

ARIMA predictive modeling

# The degree of ARIMA is (3,1,2) -> (2,1,2)
model=ARIMA(df.KOUS, order=(2,1,2), freq='B')
model_fit= model.fit(trend='nc')
print(model_fit.summary())
                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                 D.KOUS   No. Observations:                  259
Model:                 ARIMA(2, 1, 2)   Log Likelihood                -769.208
Method:                       css-mle   S.D. of innovations              4.715
Date:                Sun, 19 Apr 2020   AIC                           1548.416
Time:                        06:45:15   BIC                           1566.200
Sample:                    01-03-2019   HQIC                          1555.566
                         - 12-31-2019                                         
================================================================================
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
ar.L1.D.KOUS    -1.2447      0.243     -5.123      0.000      -1.721      -0.768
ar.L2.D.KOUS    -0.7340      0.257     -2.851      0.005      -1.239      -0.229
ma.L1.D.KOUS     1.1778      0.245      4.802      0.000       0.697       1.658
ma.L2.D.KOUS     0.7424      0.254      2.923      0.004       0.245       1.240
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1           -0.8479           -0.8022j            1.1672           -0.3794
AR.2           -0.8479           +0.8022j            1.1672            0.3794
MA.1           -0.7932           -0.8472j            1.1606           -0.3698
MA.2           -0.7932           +0.8472j            1.1606            0.3698
-----------------------------------------------------------------------------
model_fit.plot_predict()
plt.show()

## Training set, Divide the test set to learn and evaluate
train=df.iloc[0:30]
test=df.iloc[30:35]
test.shape
(5, 1)
model=ARIMA(train, order=(2,1,2), freq='B')
model_fit=model.fit(trend='nc')
fc, se, conf= model_fit.forecast(5, alpha=0.05)
fc_series=pd.Series(fc, index=test.index)
lower_series=pd.Series(conf[:,0], index=test.index)
upper_series=pd.Series(conf[:,1], index=test.index)
# plot
plt.figure(figsize=(13,5))
plt.plot(train, label='training')
plt.plot(test, label='actual')
plt.plot(fc_series, label='forecast')
plt.fill_between(test.index, lower_series, upper_series, color='black', alpha=0.1)
plt.legend(loc='upper left')
plt.show()

댓글남기기