python
阅读原文时间:2021年05月04日阅读:1

%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format=‘retina‘

from __future__ import absolute_import, division, print_function

import sys
import os

import pandas as pd
import numpy as np

TSA from Statsmodels

import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt

Display and Plotting

import matplotlib.pylab as plt
import seaborn as sns

pd.set_option(‘display.float_format‘, lambda x: ‘%.5f‘ % x) # pandas
np.set_printoptions(precision=5, suppress=True) # numpy

pd.set_option(‘display.max_columns‘, 100)
pd.set_option(‘display.max_rows‘, 100)

seaborn plotting style

sns.set(style=‘ticks‘, context=‘poster‘)
filename_ts = ‘aa.csv‘
ts_df = pd.read_csv(filename_ts, index_col=0, parse_dates=[0])

n_sample = ts_df.shape[0]
print(ts_df.shape)
print(ts_df.head())
# Create a training sample and testing sample before analyzing the series

n_train=int(0.95*n_sample)+1

n_train=int(1*n_sample)+1-6
n_forecast=n_sample-n_train
#ts_df

print(ts_df.iloc[n_train][‘Close‘])

ts_train = ts_df.iloc[:n_train][‘Close‘]

#从本地读取数据 本文用的是本地数据,未用接口数据

stock = pd.read_csv(‘aa.csv‘, index_col=0, parse_dates=[0])

stock.head(10)

#下采样 日频数据太多

stock_week = stock[‘Close‘].resample(‘W-MON‘).mean()

#训练数据

stock_train = stock_week[‘2006‘:‘2020‘]

print(stock_train)

ts_test = ts_df.iloc[n_train:][‘Close‘]

print(ts_train.shape)

print(ts_test.shape)

print("Training Series:", "\n", ts_train.tail(), "\n")

print("Testing Series:", "\n", ts_test.head())

print(111)

print(ts_train)

def tsplot(y, lags=None, title=‘‘, figsize=(14, 8)):

fig = plt.figure(figsize=figsize)
layout = (2, 2)
ts\_ax   = plt.subplot2grid(layout, (0, 0))
hist\_ax = plt.subplot2grid(layout, (0, 1))
acf\_ax  = plt.subplot2grid(layout, (1, 0))
pacf\_ax = plt.subplot2grid(layout, (1, 1))

y.plot(ax=ts\_ax) # 折线图
ts\_ax.set\_title(title)
y.plot(ax=hist\_ax, kind=‘hist‘, bins=25) #直方图
hist\_ax.set\_title(‘Histogram‘)
smt.graphics.plot\_acf(y, lags=lags, ax=acf\_ax) # ACF自相关系数

smt.graphics.plot\_pacf(y, lags=lags, ax=pacf\_ax) # 偏自相关系数
\[ax.set\_xlim(0) for ax in \[acf\_ax, pacf\_ax\]\]
sns.despine()
fig.tight\_layout()
return ts\_ax, acf\_ax, pacf\_ax

tsplot(ts_train, title=‘A Given Training Series‘, lags=20);

Fit the model

import warnings # 忽略警告
warnings.filterwarnings(‘ignore‘)

arima200 = sm.tsa.SARIMAX(ts_train, order=(1,0,0)) # ARIMA季节性模型,至于p,d,q需要按照下面的方法选择
model_results = arima200.fit()
# 此处运用BIC(贝叶斯信息准则)进行模型参数选择

另外还可以利用AIC(赤池信息准则),视具体情况而定

import itertools

p_min = 0
d_min = 0
q_min = 0
p_max = 4
d_max = 0
q_max = 4

Initialize a DataFrame to store the results

results_bic = pd.DataFrame(index=[‘AR{}‘.format(i) for i in range(p_min,p_max+1)],
columns=[‘MA{}‘.format(i) for i in range(q_min,q_max+1)])

for p,d,q in itertools.product(range(p_min,p_max+1),
range(d_min,d_max+1),
range(q_min,q_max+1)):
if p==0 and d==0 and q==0:
results_bic.loc[‘AR{}‘.format(p), ‘MA{}‘.format(q)] = np.nan
continue

try:
    model = sm.tsa.SARIMAX(ts\_train, order=(p, d, q),
                           #enforce\_stationarity=False,
                           #enforce\_invertibility=False,
                          )
    results = model.fit() #此处的result包含了很多信息,具体如果用到需要自己去查询

http://www.statsmodels.org/stable/tsa.html

print("results.bic",results.bic)

print("results.aic",results.aic)

    results\_bic.loc\[‘AR{}‘.format(p), ‘MA{}‘.format(q)\] = results.bic
except:
    continue

print(p,d,q)
results_bic = results_bic[results_bic.columns].astype(float)
fig, ax = plt.subplots(figsize=(10, 8))
ax = sns.heatmap(results_bic,
mask=results_bic.isnull(),
ax=ax,
annot=True,
fmt=‘.2f‘,
);
ax.set_title(‘BIC‘);

Alternative model selection method, limited to only searching AR and MA parameters

train_results = sm.tsa.arma_order_select_ic(ts_train, ic=[‘aic‘, ‘bic‘], trend=‘nc‘, max_ar=4, max_ma=4)

print(‘AIC‘, train_results.aic_min_order)
print(‘BIC‘, train_results.bic_min_order)
#残差分析 正态分布 QQ图线性

model_results.plot_diagnostics(figsize=(16, 12));
from statsmodels.tsa.arima_model import ARIMA

print(ts_train)

print(1111)

print(ts_test)

model = ARIMA(ts_train,order=(7,1,0))
model = ARIMA(ts_restored,order=(7,1,0)) #第一种情况,导入ARIMA模型
model = ARIMA(data,order=(7,1,0)) #第二种情况,导入ARIMA模型
result = model.fit(disp=-1)
print(result.summary())
result.conf_int()#模型诊断,可以发现所有的系数置信区间都不为0;即在5%的置信水平下,所有的系数都是显著的,即模型通过检验。

#最后画出时序图

fig, ax = plt.subplots(figsize=(12, 10))

# pred = result.predict(‘20140609‘, ‘20160701‘,dynamic=True, typ=‘levels‘)

ax = ts_train.loc[‘2020‘:].plot(ax=ax) #注意起点是从1901开始

fig = result.plot_predict(20,500) #因为前面是90个数,所以加上预测的10个就是100

plt.show() #数据预测并画图

print(stock_train)

model = ARIMA(stock_train, order=(7, 1, 0))

model_pred = model.fit()

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器

你可能感兴趣的文章