In [1]: import numpy as np
In [2]: from scipy import stats
In [3]: import statsmodels.api as sm
In [4]: import matplotlib.pyplot as plt
In [5]: from statsmodels.sandbox.regression.predstd import wls_prediction_std
In [6]: from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)
In [7]: np.random.seed(1024)
Model assumptions:
In [8]: nsample = 50
In [9]: x = np.linspace(0, 20, nsample)
In [10]: X = np.c_[x, (x - 5)**2, np.ones(nsample)]
In [11]: beta = [0.5, -0.01, 5.]
In [12]: sig = 0.5
In [13]: w = np.ones(nsample)
In [14]: w[nsample * 6 / 10:] = 3
In [15]: y_true = np.dot(X, beta)
In [16]: e = np.random.normal(size=nsample)
In [17]: y = y_true + sig * w * e
In [18]: X = X[:, [0, 2]]
In [19]: mod_wls = sm.WLS(y, X, weights=1. / w)
In [20]: res_wls = mod_wls.fit()
In [21]: print res_wls.summary()
WLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.910
Model: WLS Adj. R-squared: 0.909
Method: Least Squares F-statistic: 487.9
Date: Fri, 16 Aug 2013 Prob (F-statistic): 8.52e-27
Time: 08:00:06 Log-Likelihood: -46.062
No. Observations: 50 AIC: 96.12
Df Residuals: 48 BIC: 99.95
Df Model: 1
==============================================================================
coef std err t P>|t| [95.0% Conf. Int.]
------------------------------------------------------------------------------
x1 0.4379 0.020 22.088 0.000 0.398 0.478
const 5.2726 0.185 28.488 0.000 4.900 5.645
==============================================================================
Omnibus: 5.040 Durbin-Watson: 2.242
Prob(Omnibus): 0.080 Jarque-Bera (JB): 6.431
Skew: 0.024 Prob(JB): 0.0401
Kurtosis: 4.756 Cond. No. 17.0
==============================================================================
Estimate an OLS model for comparison
In [22]: res_ols = sm.OLS(y, X).fit()
Compare the estimated parameters in WLS and OLS
In [23]: print res_ols.params
[ 0.4349 5.2426]
In [24]: print res_wls.params
[ 0.4379 5.2726]
Compare the WLS standard errors to heteroscedasticity corrected OLS standard errors:
In [25]: se = np.vstack([[res_wls.bse], [res_ols.bse], [res_ols.HC0_se],
....: [res_ols.HC1_se], [res_ols.HC2_se], [res_ols.HC3_se]])
....:
In [26]: se = np.round(se, 4)
In [27]: colnames = 'x1', 'const'
In [28]: rownames = 'WLS', 'OLS', 'OLS_HC0', 'OLS_HC1', 'OLS_HC3', 'OLS_HC3'
In [29]: tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)
In [30]: print tabl
=====================
x1 const
---------------------
WLS 0.0198 0.1851
OLS 0.0233 0.2707
OLS_HC0 0.0281 0.194
OLS_HC1 0.0287 0.198
OLS_HC3 0.029 0.2003
OLS_HC3 0.03 0.207
---------------------
Calculate OLS prediction interval
In [31]: covb = res_ols.cov_params()
In [32]: prediction_var = res_ols.mse_resid + (X * np.dot(covb, X.T).T).sum(1)
In [33]: prediction_std = np.sqrt(prediction_var)
In [34]: tppf = stats.t.ppf(0.975, res_ols.df_resid)
Draw a plot to compare predicted values in WLS and OLS:
In [35]: prstd, iv_l, iv_u = wls_prediction_std(res_wls)
In [36]: plt.figure();
In [37]: plt.plot(x, y, 'o', x, y_true, 'b-');
In [38]: plt.plot(x, res_ols.fittedvalues, 'r--');
In [39]: plt.plot(x, res_ols.fittedvalues + tppf * prediction_std, 'r--');
In [40]: plt.plot(x, res_ols.fittedvalues - tppf * prediction_std, 'r--');
In [41]: plt.plot(x, res_wls.fittedvalues, 'g--.');
In [42]: plt.plot(x, iv_u, 'g--');
In [43]: plt.plot(x, iv_l, 'g--');
In [44]: plt.title('blue: true, red: OLS, green: WLS');
In [45]: resid1 = res_ols.resid[w == 1.]
In [46]: var1 = resid1.var(ddof=int(res_ols.df_model) + 1)
In [47]: resid2 = res_ols.resid[w != 1.]
In [48]: var2 = resid2.var(ddof=int(res_ols.df_model) + 1)
In [49]: w_est = w.copy()
In [50]: w_est[w != 1.] = np.sqrt(var2) / np.sqrt(var1)
In [51]: res_fwls = sm.WLS(y, X, 1. / w_est).fit()
In [52]: print res_fwls.summary()
WLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.914
Model: WLS Adj. R-squared: 0.912
Method: Least Squares F-statistic: 507.1
Date: Fri, 16 Aug 2013 Prob (F-statistic): 3.65e-27
Time: 08:00:06 Log-Likelihood: -43.178
No. Observations: 50 AIC: 90.36
Df Residuals: 48 BIC: 94.18
Df Model: 1
==============================================================================
coef std err t P>|t| [95.0% Conf. Int.]
------------------------------------------------------------------------------
x1 0.4390 0.019 22.520 0.000 0.400 0.478
const 5.2710 0.177 29.828 0.000 4.916 5.626
==============================================================================
Omnibus: 4.076 Durbin-Watson: 2.251
Prob(Omnibus): 0.130 Jarque-Bera (JB): 4.336
Skew: 0.003 Prob(JB): 0.114
Kurtosis: 4.443 Cond. No. 16.5
==============================================================================