Source code for yatsm.regression.recresid

# -*- coding: utf-8 -*-
u""" Recursive residuals computation

Citations:

- Brown, RL, J Durbin, and JM Evans. 1975. Techniques for Testing the Consistency
  of Regression Relationships over Time. Journal of the Royal Statistical
  Society. Series B (Methodological) 37 (2): 149-192.

- Judge George G., William E. Griffiths, R. Carter Hill, Helmut Lütkepohl,
  and Tsoung-Chao Lee. 1985. The theory and practice of econometrics.
  New York: Wiley. ISBN: 978-0-471-89530-5

"""
import numpy as np


[docs]def recresid(X, y, span=None):
    """ Return standardized recursive residuals for y ~ X

    For a matrix :math:`X_t` of :math:`T` total observations of :math:`n`
    variables, the :math:`t` th recursive residual is the forecast prediction
    error for :math:`y_t` using a regression fit on the first :math:`t - 1`
    observations. Recursive residuals are scaled and standardized so they are
    N(0, 1) distributed.

    Using notation from Brown, Durbin, and Evans (1975) and Judge, et al
    (1985):

    .. math::
        w_r =
            \\frac{y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_{r-1}}
                  {\sqrt{(1 + \\boldsymbol{x}_r^{\prime}
                   S_{r-1}\\boldsymbol{x}_r)}}
            =
            \\frac
                {y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_r}
                {\sqrt{1 - \\boldsymbol{x}_r^{\prime}S_r\\boldsymbol{x}_r}}

        r = k + 1, \ldots, T,

    where :math:`S_{r}` is the residual sum of squares after
    fitting the model on :math:`r` observations.

    A quick way of calculating :math:`\\boldsymbol{b}_r` and
    :math:`S_r` is using an update formula (Equations 4 and 5 in
    Brown, Durbin, and Evans; Equation 5.5.14 and 5.5.15 in Judge et al):

    .. math::
        \\boldsymbol{b}_r
            =
            b_{r-1} +
            \\frac
                {S_{r-1}\\boldsymbol{x}_j
                    (y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_{r-1})}
                {1 + \\boldsymbol{x}_r^{\prime}S_{r-1}x_r}

    .. math::
        S_r =
            S_{j-1} -
            \\frac{S_{j-1}\\boldsymbol{x}_r\\boldsymbol{x}_r^{\prime}S_{j-1}}
                  {1 + \\boldsymbol{x}_r^{\prime}S_{j-1}\\boldsymbol{x}_r}

    See the recursive residuals implementation that this follows,
    `recursive_olsresiduals`, within the `statsmodels.stats.diagnostic` module.

    Args:
        X (np.ndarray): 2D (n_features x n_obs) design matrix
        y (np.ndarray): 1D independent variable
        span (int, optional): minimum number of observations for initial
            regression. If ``span`` is None, use the number of features in
            ``X``

    Returns:
        np.ndarray: array containing recursive residuals standardized by
            prediction error variance

    """
    nobs, nvars = X.shape
    if span is None:
        span = nvars

    recresid = np.nan * np.zeros((nobs))
    recvar = np.nan * np.zeros((nobs))

    X0 = X[:span]
    y0 = y[:span]

    # Initial fit
    XTX_j = np.linalg.inv(np.dot(X0.T, X0))
    XTY = np.dot(X0.T, y0)
    beta = np.dot(XTX_j, XTY)

    yhat_j = np.dot(X[span - 1], beta)
    recresid[span - 1] = y[span - 1] - yhat_j
    recvar[span - 1] = 1 + np.dot(X[span - 1],
                                  np.dot(XTX_j, X[span - 1]))
    for j in range(span, nobs):
        x_j = X[j:j + 1, :]
        y_j = y[j]

        # Prediction with previous beta
        yhat_j = np.dot(x_j, beta)
        resid_j = y_j - yhat_j

        # Update
        XTXx_j = np.dot(XTX_j, x_j.T)
        f_t = 1 + np.dot(x_j, XTXx_j)
        XTX_j = XTX_j - np.dot(XTXx_j, XTXx_j.T) / f_t  # eqn 5.5.15

        beta = beta + (XTXx_j * resid_j / f_t).ravel()  # eqn 5.5.14

        recresid[j] = resid_j
        recvar[j] = f_t

    return recresid / np.sqrt(recvar)