Logo

Out of sample predictionΒΆ

In [1]: import numpy as np

In [2]: import statsmodels.api as sm

Create some data

In [3]: nsample = 50

In [4]: sig = 0.25

In [5]: x1 = np.linspace(0, 20, nsample)

In [6]: X = np.c_[x1, np.sin(x1), (x1 - 5)**2, np.ones(nsample)]

In [7]: beta = [0.5, 0.5, -0.02, 5.]

In [8]: y_true = np.dot(X, beta)

In [9]: y = y_true + sig * np.random.normal(size=nsample)

Setup and estimate the model

In [10]: olsmod = sm.OLS(y, X)

In [11]: olsres = olsmod.fit()

In [12]: print olsres.params
[ 0.5102  0.4748 -0.0212  4.9435]

In [13]: print olsres.bse
[ 0.0129  0.0507  0.0011  0.0837]

In-sample prediction

In [14]: ypred = olsres.predict(X)

Create a new sample of explanatory variables Xnew, predict and plot

In [15]: x1n = np.linspace(20.5, 25, 10)

In [16]: Xnew = np.c_[x1n, np.sin(x1n), (x1n - 5)**2, np.ones(10)]

In [17]: ynewpred = olsres.predict(Xnew)  # predict out of sample

In [18]: print ypred
[  4.413    4.8928   5.3345   5.7123   6.0097   6.2222   6.358    6.437
   6.487    6.5396   6.6246   6.7656   6.9756   7.2552   7.5923   7.9644
   8.3419   8.6933   8.99     9.2115   9.3483   9.4037   9.3932   9.3421
   9.281    9.2412   9.2491   9.3223   9.4663   9.6742   9.9269  10.1972
  10.4537  10.6661  10.8102  10.8718  10.8491  10.7532  10.6058  10.436
  10.2755  10.153   10.0903  10.0977  10.1733  10.3024  10.4605  10.6174
  10.7417  10.8061]

In [19]: import matplotlib.pyplot as plt

In [20]: plt.figure();

In [21]: plt.plot(x1, y, 'o', x1, y_true, 'b-');

In [22]: plt.plot(np.hstack((x1, x1n)), np.hstack((ypred, ynewpred)), 'r');

In [23]: plt.title('OLS prediction, blue: true and data, fitted/predicted values:red');
../../_images/ols_predict.png

This Page