Skip to content

Forecasting

forecast()

Forecasting module for pyStMoMo.

ForStMoMo dataclass

Result of :func:~pystmomo.forecast.forecast.

Attributes:

Name Type Description
fit FitStMoMo

The fitted model that was projected.

h int

Forecast horizon (number of future years).

years_f ndarray

Future year labels, shape (h,).

cohorts_f ndarray

All future cohorts that were forecast (those beyond max(fit.cohorts)), shape (n_new_cohorts,). Empty array when the model has no cohort term.

kt_f ndarray

Central forecast of period indexes, shape (N, h).

kt_f_lower ndarray

Lower confidence band for kt, shape (N, h).

kt_f_upper ndarray

Upper confidence band for kt, shape (N, h).

gc_f ndarray | None

Central forecast of new cohort indexes, shape (n_new_cohorts,). None when the model has no cohort term.

gc_f_lower ndarray | None

Lower band for gc, shape (n_new_cohorts,). None if no cohort.

gc_f_upper ndarray | None

Upper band for gc, shape (n_new_cohorts,). None if no cohort.

rates ndarray

Central forecast mortality rates, shape (n_ages, h).

level float

Confidence level used for intervals.

kt_model object

The fitted MRWD or IndependentArima used to project kt.

gc_model object | None

The fitted model used to project gc, or None.

Source code in src/pystmomo/forecast/forecast_result.py
@dataclass
class ForStMoMo:
    """Result of :func:`~pystmomo.forecast.forecast`.

    Attributes
    ----------
    fit:
        The fitted model that was projected.
    h:
        Forecast horizon (number of future years).
    years_f:
        Future year labels, shape (h,).
    cohorts_f:
        All future cohorts that were forecast (those beyond max(fit.cohorts)),
        shape (n_new_cohorts,).  Empty array when the model has no cohort term.
    kt_f:
        Central forecast of period indexes, shape (N, h).
    kt_f_lower:
        Lower confidence band for kt, shape (N, h).
    kt_f_upper:
        Upper confidence band for kt, shape (N, h).
    gc_f:
        Central forecast of new cohort indexes, shape (n_new_cohorts,).
        None when the model has no cohort term.
    gc_f_lower:
        Lower band for gc, shape (n_new_cohorts,).  None if no cohort.
    gc_f_upper:
        Upper band for gc, shape (n_new_cohorts,).  None if no cohort.
    rates:
        Central forecast mortality rates, shape (n_ages, h).
    level:
        Confidence level used for intervals.
    kt_model:
        The fitted MRWD or IndependentArima used to project kt.
    gc_model:
        The fitted model used to project gc, or None.
    """

    fit: FitStMoMo
    h: int
    years_f: np.ndarray
    cohorts_f: np.ndarray
    kt_f: np.ndarray
    kt_f_lower: np.ndarray
    kt_f_upper: np.ndarray
    gc_f: np.ndarray | None
    gc_f_lower: np.ndarray | None
    gc_f_upper: np.ndarray | None
    rates: np.ndarray
    level: float
    kt_model: object
    gc_model: object | None = field(default=None)

    def __repr__(self) -> str:
        n_ages = self.rates.shape[0]
        return (
            f"ForStMoMo(\n"
            f"  model  = {self.fit.model}\n"
            f"  years  = {self.years_f[0]}{self.years_f[-1]}  (h={self.h})\n"
            f"  n_ages = {n_ages}\n"
            f"  rates  shape = {self.rates.shape}\n"
            f"  level  = {self.level:.0%}\n"
            f")"
        )

IndependentArima

One ARIMA model per row of kt (or per scalar gc).

Parameters:

Name Type Description Default
order tuple[int, int, int]

ARIMA order (p, d, q).

required
include_constant bool

Whether to include a constant/trend term.

required
models list

List of fitted statsmodels ARIMA results, one per series.

required
last_values ndarray

Shape (N,) — last observed values for each series.

required
Source code in src/pystmomo/forecast/arima_fc.py
class IndependentArima:
    """One ARIMA model per row of kt (or per scalar gc).

    Parameters
    ----------
    order:
        ARIMA order (p, d, q).
    include_constant:
        Whether to include a constant/trend term.
    models:
        List of fitted statsmodels ARIMA results, one per series.
    last_values:
        Shape (N,) — last observed values for each series.
    """

    def __init__(
        self,
        order: tuple[int, int, int],
        include_constant: bool,
        models: list,
        last_values: np.ndarray,
    ) -> None:
        self.order = order
        self.include_constant = include_constant
        self.models = models
        self.last_values = last_values

    @classmethod
    def fit(
        cls,
        kt: np.ndarray,
        order: tuple[int, int, int] = (1, 1, 0),
        include_constant: bool = True,
    ) -> IndependentArima:
        """Fit independent ARIMA models to each row of kt.

        Parameters
        ----------
        kt:
            Shape (N, n_years) or (n_years,) for a single series.
        order:
            ARIMA (p, d, q).
        include_constant:
            Include constant/trend term.

        Returns
        -------
        IndependentArima
        """
        from statsmodels.tsa.arima.model import ARIMA

        kt = np.atleast_2d(kt)
        N = kt.shape[0]
        _, d, _ = order
        # statsmodels semantics: when d >= 1, a constant in levels corresponds
        # to a linear trend term ('t'), not 'c'.  'c' is only valid for d=0.
        trend = ("t" if d >= 1 else "c") if include_constant else "n"

        fitted_models = []
        for i in range(N):
            series = kt[i, :]
            # Drop NaN/Inf values that might appear from sparse cohorts
            series = series[np.isfinite(series)]
            if len(series) < max(3, sum(order) + 2):
                raise ValueError(
                    f"Series {i} has too few valid observations ({len(series)}) "
                    f"to fit ARIMA{order}."
                )
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                model = ARIMA(series, order=order, trend=trend)
                result = model.fit()
            fitted_models.append(result)

        last_values = kt[:, -1]
        return cls(
            order=order,
            include_constant=include_constant,
            models=fitted_models,
            last_values=last_values,
        )

    def forecast(
        self,
        h: int,
        level: float = 0.95,
    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
        """Point forecast and confidence intervals for h steps ahead.

        Parameters
        ----------
        h:
            Forecast horizon.
        level:
            Confidence level (e.g. 0.95).

        Returns
        -------
        mean, lower, upper
            Each shape (N, h).
        """
        N = len(self.models)
        mean = np.zeros((N, h))
        lower = np.zeros((N, h))
        upper = np.zeros((N, h))
        alpha = 1.0 - level

        for i, result in enumerate(self.models):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                fc = result.get_forecast(steps=h)
                mean[i, :] = fc.predicted_mean
                ci = fc.conf_int(alpha=alpha)
                # conf_int() may return a DataFrame or a numpy array
                # depending on statsmodels version — handle both
                if hasattr(ci, "iloc"):
                    lower[i, :] = ci.iloc[:, 0].values
                    upper[i, :] = ci.iloc[:, 1].values
                else:
                    ci = np.asarray(ci)
                    lower[i, :] = ci[:, 0]
                    upper[i, :] = ci[:, 1]

        return mean, lower, upper

    def simulate(
        self,
        h: int,
        nsim: int,
        rng: np.random.Generator,
    ) -> np.ndarray:
        """Simulate future trajectories from fitted ARIMA models.

        Parameters
        ----------
        h:
            Forecast horizon.
        nsim:
            Number of simulations.
        rng:
            NumPy random generator (used to set seed for statsmodels).

        Returns
        -------
        np.ndarray
            Shape (N, h, nsim).
        """
        N = len(self.models)
        paths = np.zeros((N, h, nsim))
        seed = int(rng.integers(0, 2**31))

        for i, result in enumerate(self.models):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                sims = result.simulate(
                    nsimulations=h,
                    repetitions=nsim,
                    anchor="end",
                    random_state=seed + i,
                )
                sims = np.asarray(sims).squeeze()   # collapse extra dims
                if sims.ndim == 1:
                    sims = sims[:, np.newaxis].repeat(nsim, axis=1)
                if sims.shape[0] != h:
                    sims = sims.T
                paths[i, :, :] = sims

        return paths

fit(kt, order=(1, 1, 0), include_constant=True) classmethod

Fit independent ARIMA models to each row of kt.

Parameters:

Name Type Description Default
kt ndarray

Shape (N, n_years) or (n_years,) for a single series.

required
order tuple[int, int, int]

ARIMA (p, d, q).

(1, 1, 0)
include_constant bool

Include constant/trend term.

True

Returns:

Type Description
IndependentArima
Source code in src/pystmomo/forecast/arima_fc.py
@classmethod
def fit(
    cls,
    kt: np.ndarray,
    order: tuple[int, int, int] = (1, 1, 0),
    include_constant: bool = True,
) -> IndependentArima:
    """Fit independent ARIMA models to each row of kt.

    Parameters
    ----------
    kt:
        Shape (N, n_years) or (n_years,) for a single series.
    order:
        ARIMA (p, d, q).
    include_constant:
        Include constant/trend term.

    Returns
    -------
    IndependentArima
    """
    from statsmodels.tsa.arima.model import ARIMA

    kt = np.atleast_2d(kt)
    N = kt.shape[0]
    _, d, _ = order
    # statsmodels semantics: when d >= 1, a constant in levels corresponds
    # to a linear trend term ('t'), not 'c'.  'c' is only valid for d=0.
    trend = ("t" if d >= 1 else "c") if include_constant else "n"

    fitted_models = []
    for i in range(N):
        series = kt[i, :]
        # Drop NaN/Inf values that might appear from sparse cohorts
        series = series[np.isfinite(series)]
        if len(series) < max(3, sum(order) + 2):
            raise ValueError(
                f"Series {i} has too few valid observations ({len(series)}) "
                f"to fit ARIMA{order}."
            )
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            model = ARIMA(series, order=order, trend=trend)
            result = model.fit()
        fitted_models.append(result)

    last_values = kt[:, -1]
    return cls(
        order=order,
        include_constant=include_constant,
        models=fitted_models,
        last_values=last_values,
    )

forecast(h, level=0.95)

Point forecast and confidence intervals for h steps ahead.

Parameters:

Name Type Description Default
h int

Forecast horizon.

required
level float

Confidence level (e.g. 0.95).

0.95

Returns:

Type Description
(mean, lower, upper)

Each shape (N, h).

Source code in src/pystmomo/forecast/arima_fc.py
def forecast(
    self,
    h: int,
    level: float = 0.95,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Point forecast and confidence intervals for h steps ahead.

    Parameters
    ----------
    h:
        Forecast horizon.
    level:
        Confidence level (e.g. 0.95).

    Returns
    -------
    mean, lower, upper
        Each shape (N, h).
    """
    N = len(self.models)
    mean = np.zeros((N, h))
    lower = np.zeros((N, h))
    upper = np.zeros((N, h))
    alpha = 1.0 - level

    for i, result in enumerate(self.models):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            fc = result.get_forecast(steps=h)
            mean[i, :] = fc.predicted_mean
            ci = fc.conf_int(alpha=alpha)
            # conf_int() may return a DataFrame or a numpy array
            # depending on statsmodels version — handle both
            if hasattr(ci, "iloc"):
                lower[i, :] = ci.iloc[:, 0].values
                upper[i, :] = ci.iloc[:, 1].values
            else:
                ci = np.asarray(ci)
                lower[i, :] = ci[:, 0]
                upper[i, :] = ci[:, 1]

    return mean, lower, upper

simulate(h, nsim, rng)

Simulate future trajectories from fitted ARIMA models.

Parameters:

Name Type Description Default
h int

Forecast horizon.

required
nsim int

Number of simulations.

required
rng Generator

NumPy random generator (used to set seed for statsmodels).

required

Returns:

Type Description
ndarray

Shape (N, h, nsim).

Source code in src/pystmomo/forecast/arima_fc.py
def simulate(
    self,
    h: int,
    nsim: int,
    rng: np.random.Generator,
) -> np.ndarray:
    """Simulate future trajectories from fitted ARIMA models.

    Parameters
    ----------
    h:
        Forecast horizon.
    nsim:
        Number of simulations.
    rng:
        NumPy random generator (used to set seed for statsmodels).

    Returns
    -------
    np.ndarray
        Shape (N, h, nsim).
    """
    N = len(self.models)
    paths = np.zeros((N, h, nsim))
    seed = int(rng.integers(0, 2**31))

    for i, result in enumerate(self.models):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            sims = result.simulate(
                nsimulations=h,
                repetitions=nsim,
                anchor="end",
                random_state=seed + i,
            )
            sims = np.asarray(sims).squeeze()   # collapse extra dims
            if sims.ndim == 1:
                sims = sims[:, np.newaxis].repeat(nsim, axis=1)
            if sims.shape[0] != h:
                sims = sims.T
            paths[i, :, :] = sims

    return paths

MultivariateRandomWalkDrift

Multivariate Random Walk with Drift fitted to a matrix of period indexes.

The model is: Δκ_t = drift + ε_t, ε_t ~ MVN(0, Σ)

Forecast mean at step s: last + s * drift Forecast variance at step s: Var(s) = s * Σ + (s²/T) * Σ where T = number of differences (n_years - 1).

The second term accounts for estimation uncertainty in the drift.

Attributes:

Name Type Description
drift

Shape (N,) — mean of first differences.

sigma

Shape (N, N) — sample covariance of first differences.

last

Shape (N,) — last observed κ_t values.

n_years

Number of fitted years (T+1); T = n_years - 1 differences are used.

Source code in src/pystmomo/forecast/mrwd.py
class MultivariateRandomWalkDrift:
    """Multivariate Random Walk with Drift fitted to a matrix of period indexes.

    The model is:
        Δκ_t = drift + ε_t,   ε_t ~ MVN(0, Σ)

    Forecast mean at step s:  last + s * drift
    Forecast variance at step s:
        Var(s) = s * Σ  +  (s²/T) * Σ
    where T = number of differences (n_years - 1).

    The second term accounts for estimation uncertainty in the drift.

    Attributes
    ----------
    drift:
        Shape (N,) — mean of first differences.
    sigma:
        Shape (N, N) — sample covariance of first differences.
    last:
        Shape (N,) — last observed κ_t values.
    n_years:
        Number of fitted years (T+1); T = n_years - 1 differences are used.
    """

    def __init__(
        self,
        drift: np.ndarray,
        sigma: np.ndarray,
        last: np.ndarray,
        n_years: int,
    ) -> None:
        self.drift = drift
        self.sigma = sigma
        self.last = last
        self.n_years = n_years

    @classmethod
    def fit(cls, kt: np.ndarray) -> MultivariateRandomWalkDrift:
        """Fit MRWD to observed period indexes.

        Parameters
        ----------
        kt:
            Period indexes, shape (N, n_years).

        Returns
        -------
        MultivariateRandomWalkDrift
        """
        kt = np.atleast_2d(kt)
        # First differences along years axis: shape (N, n_years-1)
        diff = np.diff(kt, axis=1)
        drift = diff.mean(axis=1)                  # (N,)
        # Covariance of differences: rowvar=True means each row is a variable
        if diff.shape[0] == 1:
            # Single period index: 1x1 covariance
            sigma = np.var(diff, ddof=1, keepdims=False).reshape(1, 1)
        else:
            sigma = np.cov(diff, rowvar=True)      # (N, N)
        last = kt[:, -1]                           # (N,)
        return cls(drift=drift, sigma=sigma, last=last, n_years=kt.shape[1])

    def forecast(
        self,
        h: int,
        level: float = 0.95,
    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
        """Point forecast and confidence interval for h steps ahead.

        Parameters
        ----------
        h:
            Forecast horizon.
        level:
            Confidence level (e.g. 0.95).

        Returns
        -------
        mean, lower, upper
            Each shape (N, h).
        """
        N = len(self.drift)
        T = self.n_years - 1          # number of differences used
        z = stats.norm.ppf(0.5 + level / 2.0)

        mean = np.zeros((N, h))
        lower = np.zeros((N, h))
        upper = np.zeros((N, h))

        for s in range(1, h + 1):
            # Forecast mean
            mean[:, s - 1] = self.last + s * self.drift

            # Forecast covariance: s*Σ + (s²/T)*Σ  = (s + s²/T) * Σ
            factor = s + (s ** 2) / T
            var_diag = factor * np.diag(self.sigma)     # (N,)
            sd = np.sqrt(np.maximum(var_diag, 0.0))

            lower[:, s - 1] = mean[:, s - 1] - z * sd
            upper[:, s - 1] = mean[:, s - 1] + z * sd

        return mean, lower, upper

    def simulate(
        self,
        h: int,
        nsim: int,
        rng: np.random.Generator,
    ) -> np.ndarray:
        """Simulate future trajectories using Cholesky-based MVN innovations.

        Parameters
        ----------
        h:
            Forecast horizon.
        nsim:
            Number of simulations.
        rng:
            NumPy random generator.

        Returns
        -------
        np.ndarray
            Shape (N, h, nsim).
        """
        N = len(self.drift)
        # Cholesky decomposition of sigma
        try:
            L = np.linalg.cholesky(self.sigma)
        except np.linalg.LinAlgError:
            # Add small jitter for numerical stability
            L = np.linalg.cholesky(self.sigma + 1e-10 * np.eye(N))

        paths = np.zeros((N, h, nsim))
        kt_prev = np.tile(self.last[:, None], (1, nsim))  # (N, nsim)

        for s in range(h):
            # Standard normals: (N, nsim)
            z = rng.standard_normal((N, nsim))
            # Correlated MVN innovation: L @ z, shape (N, nsim)
            innov = L @ z
            kt_prev = kt_prev + self.drift[:, None] + innov
            paths[:, s, :] = kt_prev

        return paths

fit(kt) classmethod

Fit MRWD to observed period indexes.

Parameters:

Name Type Description Default
kt ndarray

Period indexes, shape (N, n_years).

required

Returns:

Type Description
MultivariateRandomWalkDrift
Source code in src/pystmomo/forecast/mrwd.py
@classmethod
def fit(cls, kt: np.ndarray) -> MultivariateRandomWalkDrift:
    """Fit MRWD to observed period indexes.

    Parameters
    ----------
    kt:
        Period indexes, shape (N, n_years).

    Returns
    -------
    MultivariateRandomWalkDrift
    """
    kt = np.atleast_2d(kt)
    # First differences along years axis: shape (N, n_years-1)
    diff = np.diff(kt, axis=1)
    drift = diff.mean(axis=1)                  # (N,)
    # Covariance of differences: rowvar=True means each row is a variable
    if diff.shape[0] == 1:
        # Single period index: 1x1 covariance
        sigma = np.var(diff, ddof=1, keepdims=False).reshape(1, 1)
    else:
        sigma = np.cov(diff, rowvar=True)      # (N, N)
    last = kt[:, -1]                           # (N,)
    return cls(drift=drift, sigma=sigma, last=last, n_years=kt.shape[1])

forecast(h, level=0.95)

Point forecast and confidence interval for h steps ahead.

Parameters:

Name Type Description Default
h int

Forecast horizon.

required
level float

Confidence level (e.g. 0.95).

0.95

Returns:

Type Description
(mean, lower, upper)

Each shape (N, h).

Source code in src/pystmomo/forecast/mrwd.py
def forecast(
    self,
    h: int,
    level: float = 0.95,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Point forecast and confidence interval for h steps ahead.

    Parameters
    ----------
    h:
        Forecast horizon.
    level:
        Confidence level (e.g. 0.95).

    Returns
    -------
    mean, lower, upper
        Each shape (N, h).
    """
    N = len(self.drift)
    T = self.n_years - 1          # number of differences used
    z = stats.norm.ppf(0.5 + level / 2.0)

    mean = np.zeros((N, h))
    lower = np.zeros((N, h))
    upper = np.zeros((N, h))

    for s in range(1, h + 1):
        # Forecast mean
        mean[:, s - 1] = self.last + s * self.drift

        # Forecast covariance: s*Σ + (s²/T)*Σ  = (s + s²/T) * Σ
        factor = s + (s ** 2) / T
        var_diag = factor * np.diag(self.sigma)     # (N,)
        sd = np.sqrt(np.maximum(var_diag, 0.0))

        lower[:, s - 1] = mean[:, s - 1] - z * sd
        upper[:, s - 1] = mean[:, s - 1] + z * sd

    return mean, lower, upper

simulate(h, nsim, rng)

Simulate future trajectories using Cholesky-based MVN innovations.

Parameters:

Name Type Description Default
h int

Forecast horizon.

required
nsim int

Number of simulations.

required
rng Generator

NumPy random generator.

required

Returns:

Type Description
ndarray

Shape (N, h, nsim).

Source code in src/pystmomo/forecast/mrwd.py
def simulate(
    self,
    h: int,
    nsim: int,
    rng: np.random.Generator,
) -> np.ndarray:
    """Simulate future trajectories using Cholesky-based MVN innovations.

    Parameters
    ----------
    h:
        Forecast horizon.
    nsim:
        Number of simulations.
    rng:
        NumPy random generator.

    Returns
    -------
    np.ndarray
        Shape (N, h, nsim).
    """
    N = len(self.drift)
    # Cholesky decomposition of sigma
    try:
        L = np.linalg.cholesky(self.sigma)
    except np.linalg.LinAlgError:
        # Add small jitter for numerical stability
        L = np.linalg.cholesky(self.sigma + 1e-10 * np.eye(N))

    paths = np.zeros((N, h, nsim))
    kt_prev = np.tile(self.last[:, None], (1, nsim))  # (N, nsim)

    for s in range(h):
        # Standard normals: (N, nsim)
        z = rng.standard_normal((N, nsim))
        # Correlated MVN innovation: L @ z, shape (N, nsim)
        innov = L @ z
        kt_prev = kt_prev + self.drift[:, None] + innov
        paths[:, s, :] = kt_prev

    return paths

ForStMoMo

Result of :func:~pystmomo.forecast.forecast.

Attributes:

Name Type Description
fit FitStMoMo

The fitted model that was projected.

h int

Forecast horizon (number of future years).

years_f ndarray

Future year labels, shape (h,).

cohorts_f ndarray

All future cohorts that were forecast (those beyond max(fit.cohorts)), shape (n_new_cohorts,). Empty array when the model has no cohort term.

kt_f ndarray

Central forecast of period indexes, shape (N, h).

kt_f_lower ndarray

Lower confidence band for kt, shape (N, h).

kt_f_upper ndarray

Upper confidence band for kt, shape (N, h).

gc_f ndarray | None

Central forecast of new cohort indexes, shape (n_new_cohorts,). None when the model has no cohort term.

gc_f_lower ndarray | None

Lower band for gc, shape (n_new_cohorts,). None if no cohort.

gc_f_upper ndarray | None

Upper band for gc, shape (n_new_cohorts,). None if no cohort.

rates ndarray

Central forecast mortality rates, shape (n_ages, h).

level float

Confidence level used for intervals.

kt_model object

The fitted MRWD or IndependentArima used to project kt.

gc_model object | None

The fitted model used to project gc, or None.

Source code in src/pystmomo/forecast/forecast_result.py
@dataclass
class ForStMoMo:
    """Result of :func:`~pystmomo.forecast.forecast`.

    Attributes
    ----------
    fit:
        The fitted model that was projected.
    h:
        Forecast horizon (number of future years).
    years_f:
        Future year labels, shape (h,).
    cohorts_f:
        All future cohorts that were forecast (those beyond max(fit.cohorts)),
        shape (n_new_cohorts,).  Empty array when the model has no cohort term.
    kt_f:
        Central forecast of period indexes, shape (N, h).
    kt_f_lower:
        Lower confidence band for kt, shape (N, h).
    kt_f_upper:
        Upper confidence band for kt, shape (N, h).
    gc_f:
        Central forecast of new cohort indexes, shape (n_new_cohorts,).
        None when the model has no cohort term.
    gc_f_lower:
        Lower band for gc, shape (n_new_cohorts,).  None if no cohort.
    gc_f_upper:
        Upper band for gc, shape (n_new_cohorts,).  None if no cohort.
    rates:
        Central forecast mortality rates, shape (n_ages, h).
    level:
        Confidence level used for intervals.
    kt_model:
        The fitted MRWD or IndependentArima used to project kt.
    gc_model:
        The fitted model used to project gc, or None.
    """

    fit: FitStMoMo
    h: int
    years_f: np.ndarray
    cohorts_f: np.ndarray
    kt_f: np.ndarray
    kt_f_lower: np.ndarray
    kt_f_upper: np.ndarray
    gc_f: np.ndarray | None
    gc_f_lower: np.ndarray | None
    gc_f_upper: np.ndarray | None
    rates: np.ndarray
    level: float
    kt_model: object
    gc_model: object | None = field(default=None)

    def __repr__(self) -> str:
        n_ages = self.rates.shape[0]
        return (
            f"ForStMoMo(\n"
            f"  model  = {self.fit.model}\n"
            f"  years  = {self.years_f[0]}{self.years_f[-1]}  (h={self.h})\n"
            f"  n_ages = {n_ages}\n"
            f"  rates  shape = {self.rates.shape}\n"
            f"  level  = {self.level:.0%}\n"
            f")"
        )

MRWD

Multivariate Random Walk with Drift fitted to a matrix of period indexes.

The model is: Δκ_t = drift + ε_t, ε_t ~ MVN(0, Σ)

Forecast mean at step s: last + s * drift Forecast variance at step s: Var(s) = s * Σ + (s²/T) * Σ where T = number of differences (n_years - 1).

The second term accounts for estimation uncertainty in the drift.

Attributes:

Name Type Description
drift

Shape (N,) — mean of first differences.

sigma

Shape (N, N) — sample covariance of first differences.

last

Shape (N,) — last observed κ_t values.

n_years

Number of fitted years (T+1); T = n_years - 1 differences are used.

Source code in src/pystmomo/forecast/mrwd.py
class MultivariateRandomWalkDrift:
    """Multivariate Random Walk with Drift fitted to a matrix of period indexes.

    The model is:
        Δκ_t = drift + ε_t,   ε_t ~ MVN(0, Σ)

    Forecast mean at step s:  last + s * drift
    Forecast variance at step s:
        Var(s) = s * Σ  +  (s²/T) * Σ
    where T = number of differences (n_years - 1).

    The second term accounts for estimation uncertainty in the drift.

    Attributes
    ----------
    drift:
        Shape (N,) — mean of first differences.
    sigma:
        Shape (N, N) — sample covariance of first differences.
    last:
        Shape (N,) — last observed κ_t values.
    n_years:
        Number of fitted years (T+1); T = n_years - 1 differences are used.
    """

    def __init__(
        self,
        drift: np.ndarray,
        sigma: np.ndarray,
        last: np.ndarray,
        n_years: int,
    ) -> None:
        self.drift = drift
        self.sigma = sigma
        self.last = last
        self.n_years = n_years

    @classmethod
    def fit(cls, kt: np.ndarray) -> MultivariateRandomWalkDrift:
        """Fit MRWD to observed period indexes.

        Parameters
        ----------
        kt:
            Period indexes, shape (N, n_years).

        Returns
        -------
        MultivariateRandomWalkDrift
        """
        kt = np.atleast_2d(kt)
        # First differences along years axis: shape (N, n_years-1)
        diff = np.diff(kt, axis=1)
        drift = diff.mean(axis=1)                  # (N,)
        # Covariance of differences: rowvar=True means each row is a variable
        if diff.shape[0] == 1:
            # Single period index: 1x1 covariance
            sigma = np.var(diff, ddof=1, keepdims=False).reshape(1, 1)
        else:
            sigma = np.cov(diff, rowvar=True)      # (N, N)
        last = kt[:, -1]                           # (N,)
        return cls(drift=drift, sigma=sigma, last=last, n_years=kt.shape[1])

    def forecast(
        self,
        h: int,
        level: float = 0.95,
    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
        """Point forecast and confidence interval for h steps ahead.

        Parameters
        ----------
        h:
            Forecast horizon.
        level:
            Confidence level (e.g. 0.95).

        Returns
        -------
        mean, lower, upper
            Each shape (N, h).
        """
        N = len(self.drift)
        T = self.n_years - 1          # number of differences used
        z = stats.norm.ppf(0.5 + level / 2.0)

        mean = np.zeros((N, h))
        lower = np.zeros((N, h))
        upper = np.zeros((N, h))

        for s in range(1, h + 1):
            # Forecast mean
            mean[:, s - 1] = self.last + s * self.drift

            # Forecast covariance: s*Σ + (s²/T)*Σ  = (s + s²/T) * Σ
            factor = s + (s ** 2) / T
            var_diag = factor * np.diag(self.sigma)     # (N,)
            sd = np.sqrt(np.maximum(var_diag, 0.0))

            lower[:, s - 1] = mean[:, s - 1] - z * sd
            upper[:, s - 1] = mean[:, s - 1] + z * sd

        return mean, lower, upper

    def simulate(
        self,
        h: int,
        nsim: int,
        rng: np.random.Generator,
    ) -> np.ndarray:
        """Simulate future trajectories using Cholesky-based MVN innovations.

        Parameters
        ----------
        h:
            Forecast horizon.
        nsim:
            Number of simulations.
        rng:
            NumPy random generator.

        Returns
        -------
        np.ndarray
            Shape (N, h, nsim).
        """
        N = len(self.drift)
        # Cholesky decomposition of sigma
        try:
            L = np.linalg.cholesky(self.sigma)
        except np.linalg.LinAlgError:
            # Add small jitter for numerical stability
            L = np.linalg.cholesky(self.sigma + 1e-10 * np.eye(N))

        paths = np.zeros((N, h, nsim))
        kt_prev = np.tile(self.last[:, None], (1, nsim))  # (N, nsim)

        for s in range(h):
            # Standard normals: (N, nsim)
            z = rng.standard_normal((N, nsim))
            # Correlated MVN innovation: L @ z, shape (N, nsim)
            innov = L @ z
            kt_prev = kt_prev + self.drift[:, None] + innov
            paths[:, s, :] = kt_prev

        return paths

fit(kt) classmethod

Fit MRWD to observed period indexes.

Parameters:

Name Type Description Default
kt ndarray

Period indexes, shape (N, n_years).

required

Returns:

Type Description
MultivariateRandomWalkDrift
Source code in src/pystmomo/forecast/mrwd.py
@classmethod
def fit(cls, kt: np.ndarray) -> MultivariateRandomWalkDrift:
    """Fit MRWD to observed period indexes.

    Parameters
    ----------
    kt:
        Period indexes, shape (N, n_years).

    Returns
    -------
    MultivariateRandomWalkDrift
    """
    kt = np.atleast_2d(kt)
    # First differences along years axis: shape (N, n_years-1)
    diff = np.diff(kt, axis=1)
    drift = diff.mean(axis=1)                  # (N,)
    # Covariance of differences: rowvar=True means each row is a variable
    if diff.shape[0] == 1:
        # Single period index: 1x1 covariance
        sigma = np.var(diff, ddof=1, keepdims=False).reshape(1, 1)
    else:
        sigma = np.cov(diff, rowvar=True)      # (N, N)
    last = kt[:, -1]                           # (N,)
    return cls(drift=drift, sigma=sigma, last=last, n_years=kt.shape[1])

forecast(h, level=0.95)

Point forecast and confidence interval for h steps ahead.

Parameters:

Name Type Description Default
h int

Forecast horizon.

required
level float

Confidence level (e.g. 0.95).

0.95

Returns:

Type Description
(mean, lower, upper)

Each shape (N, h).

Source code in src/pystmomo/forecast/mrwd.py
def forecast(
    self,
    h: int,
    level: float = 0.95,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Point forecast and confidence interval for h steps ahead.

    Parameters
    ----------
    h:
        Forecast horizon.
    level:
        Confidence level (e.g. 0.95).

    Returns
    -------
    mean, lower, upper
        Each shape (N, h).
    """
    N = len(self.drift)
    T = self.n_years - 1          # number of differences used
    z = stats.norm.ppf(0.5 + level / 2.0)

    mean = np.zeros((N, h))
    lower = np.zeros((N, h))
    upper = np.zeros((N, h))

    for s in range(1, h + 1):
        # Forecast mean
        mean[:, s - 1] = self.last + s * self.drift

        # Forecast covariance: s*Σ + (s²/T)*Σ  = (s + s²/T) * Σ
        factor = s + (s ** 2) / T
        var_diag = factor * np.diag(self.sigma)     # (N,)
        sd = np.sqrt(np.maximum(var_diag, 0.0))

        lower[:, s - 1] = mean[:, s - 1] - z * sd
        upper[:, s - 1] = mean[:, s - 1] + z * sd

    return mean, lower, upper

simulate(h, nsim, rng)

Simulate future trajectories using Cholesky-based MVN innovations.

Parameters:

Name Type Description Default
h int

Forecast horizon.

required
nsim int

Number of simulations.

required
rng Generator

NumPy random generator.

required

Returns:

Type Description
ndarray

Shape (N, h, nsim).

Source code in src/pystmomo/forecast/mrwd.py
def simulate(
    self,
    h: int,
    nsim: int,
    rng: np.random.Generator,
) -> np.ndarray:
    """Simulate future trajectories using Cholesky-based MVN innovations.

    Parameters
    ----------
    h:
        Forecast horizon.
    nsim:
        Number of simulations.
    rng:
        NumPy random generator.

    Returns
    -------
    np.ndarray
        Shape (N, h, nsim).
    """
    N = len(self.drift)
    # Cholesky decomposition of sigma
    try:
        L = np.linalg.cholesky(self.sigma)
    except np.linalg.LinAlgError:
        # Add small jitter for numerical stability
        L = np.linalg.cholesky(self.sigma + 1e-10 * np.eye(N))

    paths = np.zeros((N, h, nsim))
    kt_prev = np.tile(self.last[:, None], (1, nsim))  # (N, nsim)

    for s in range(h):
        # Standard normals: (N, nsim)
        z = rng.standard_normal((N, nsim))
        # Correlated MVN innovation: L @ z, shape (N, nsim)
        innov = L @ z
        kt_prev = kt_prev + self.drift[:, None] + innov
        paths[:, s, :] = kt_prev

    return paths

Independent ARIMA

One ARIMA model per row of kt (or per scalar gc).

Parameters:

Name Type Description Default
order tuple[int, int, int]

ARIMA order (p, d, q).

required
include_constant bool

Whether to include a constant/trend term.

required
models list

List of fitted statsmodels ARIMA results, one per series.

required
last_values ndarray

Shape (N,) — last observed values for each series.

required
Source code in src/pystmomo/forecast/arima_fc.py
class IndependentArima:
    """One ARIMA model per row of kt (or per scalar gc).

    Parameters
    ----------
    order:
        ARIMA order (p, d, q).
    include_constant:
        Whether to include a constant/trend term.
    models:
        List of fitted statsmodels ARIMA results, one per series.
    last_values:
        Shape (N,) — last observed values for each series.
    """

    def __init__(
        self,
        order: tuple[int, int, int],
        include_constant: bool,
        models: list,
        last_values: np.ndarray,
    ) -> None:
        self.order = order
        self.include_constant = include_constant
        self.models = models
        self.last_values = last_values

    @classmethod
    def fit(
        cls,
        kt: np.ndarray,
        order: tuple[int, int, int] = (1, 1, 0),
        include_constant: bool = True,
    ) -> IndependentArima:
        """Fit independent ARIMA models to each row of kt.

        Parameters
        ----------
        kt:
            Shape (N, n_years) or (n_years,) for a single series.
        order:
            ARIMA (p, d, q).
        include_constant:
            Include constant/trend term.

        Returns
        -------
        IndependentArima
        """
        from statsmodels.tsa.arima.model import ARIMA

        kt = np.atleast_2d(kt)
        N = kt.shape[0]
        _, d, _ = order
        # statsmodels semantics: when d >= 1, a constant in levels corresponds
        # to a linear trend term ('t'), not 'c'.  'c' is only valid for d=0.
        trend = ("t" if d >= 1 else "c") if include_constant else "n"

        fitted_models = []
        for i in range(N):
            series = kt[i, :]
            # Drop NaN/Inf values that might appear from sparse cohorts
            series = series[np.isfinite(series)]
            if len(series) < max(3, sum(order) + 2):
                raise ValueError(
                    f"Series {i} has too few valid observations ({len(series)}) "
                    f"to fit ARIMA{order}."
                )
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                model = ARIMA(series, order=order, trend=trend)
                result = model.fit()
            fitted_models.append(result)

        last_values = kt[:, -1]
        return cls(
            order=order,
            include_constant=include_constant,
            models=fitted_models,
            last_values=last_values,
        )

    def forecast(
        self,
        h: int,
        level: float = 0.95,
    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
        """Point forecast and confidence intervals for h steps ahead.

        Parameters
        ----------
        h:
            Forecast horizon.
        level:
            Confidence level (e.g. 0.95).

        Returns
        -------
        mean, lower, upper
            Each shape (N, h).
        """
        N = len(self.models)
        mean = np.zeros((N, h))
        lower = np.zeros((N, h))
        upper = np.zeros((N, h))
        alpha = 1.0 - level

        for i, result in enumerate(self.models):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                fc = result.get_forecast(steps=h)
                mean[i, :] = fc.predicted_mean
                ci = fc.conf_int(alpha=alpha)
                # conf_int() may return a DataFrame or a numpy array
                # depending on statsmodels version — handle both
                if hasattr(ci, "iloc"):
                    lower[i, :] = ci.iloc[:, 0].values
                    upper[i, :] = ci.iloc[:, 1].values
                else:
                    ci = np.asarray(ci)
                    lower[i, :] = ci[:, 0]
                    upper[i, :] = ci[:, 1]

        return mean, lower, upper

    def simulate(
        self,
        h: int,
        nsim: int,
        rng: np.random.Generator,
    ) -> np.ndarray:
        """Simulate future trajectories from fitted ARIMA models.

        Parameters
        ----------
        h:
            Forecast horizon.
        nsim:
            Number of simulations.
        rng:
            NumPy random generator (used to set seed for statsmodels).

        Returns
        -------
        np.ndarray
            Shape (N, h, nsim).
        """
        N = len(self.models)
        paths = np.zeros((N, h, nsim))
        seed = int(rng.integers(0, 2**31))

        for i, result in enumerate(self.models):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                sims = result.simulate(
                    nsimulations=h,
                    repetitions=nsim,
                    anchor="end",
                    random_state=seed + i,
                )
                sims = np.asarray(sims).squeeze()   # collapse extra dims
                if sims.ndim == 1:
                    sims = sims[:, np.newaxis].repeat(nsim, axis=1)
                if sims.shape[0] != h:
                    sims = sims.T
                paths[i, :, :] = sims

        return paths

fit(kt, order=(1, 1, 0), include_constant=True) classmethod

Fit independent ARIMA models to each row of kt.

Parameters:

Name Type Description Default
kt ndarray

Shape (N, n_years) or (n_years,) for a single series.

required
order tuple[int, int, int]

ARIMA (p, d, q).

(1, 1, 0)
include_constant bool

Include constant/trend term.

True

Returns:

Type Description
IndependentArima
Source code in src/pystmomo/forecast/arima_fc.py
@classmethod
def fit(
    cls,
    kt: np.ndarray,
    order: tuple[int, int, int] = (1, 1, 0),
    include_constant: bool = True,
) -> IndependentArima:
    """Fit independent ARIMA models to each row of kt.

    Parameters
    ----------
    kt:
        Shape (N, n_years) or (n_years,) for a single series.
    order:
        ARIMA (p, d, q).
    include_constant:
        Include constant/trend term.

    Returns
    -------
    IndependentArima
    """
    from statsmodels.tsa.arima.model import ARIMA

    kt = np.atleast_2d(kt)
    N = kt.shape[0]
    _, d, _ = order
    # statsmodels semantics: when d >= 1, a constant in levels corresponds
    # to a linear trend term ('t'), not 'c'.  'c' is only valid for d=0.
    trend = ("t" if d >= 1 else "c") if include_constant else "n"

    fitted_models = []
    for i in range(N):
        series = kt[i, :]
        # Drop NaN/Inf values that might appear from sparse cohorts
        series = series[np.isfinite(series)]
        if len(series) < max(3, sum(order) + 2):
            raise ValueError(
                f"Series {i} has too few valid observations ({len(series)}) "
                f"to fit ARIMA{order}."
            )
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            model = ARIMA(series, order=order, trend=trend)
            result = model.fit()
        fitted_models.append(result)

    last_values = kt[:, -1]
    return cls(
        order=order,
        include_constant=include_constant,
        models=fitted_models,
        last_values=last_values,
    )

forecast(h, level=0.95)

Point forecast and confidence intervals for h steps ahead.

Parameters:

Name Type Description Default
h int

Forecast horizon.

required
level float

Confidence level (e.g. 0.95).

0.95

Returns:

Type Description
(mean, lower, upper)

Each shape (N, h).

Source code in src/pystmomo/forecast/arima_fc.py
def forecast(
    self,
    h: int,
    level: float = 0.95,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Point forecast and confidence intervals for h steps ahead.

    Parameters
    ----------
    h:
        Forecast horizon.
    level:
        Confidence level (e.g. 0.95).

    Returns
    -------
    mean, lower, upper
        Each shape (N, h).
    """
    N = len(self.models)
    mean = np.zeros((N, h))
    lower = np.zeros((N, h))
    upper = np.zeros((N, h))
    alpha = 1.0 - level

    for i, result in enumerate(self.models):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            fc = result.get_forecast(steps=h)
            mean[i, :] = fc.predicted_mean
            ci = fc.conf_int(alpha=alpha)
            # conf_int() may return a DataFrame or a numpy array
            # depending on statsmodels version — handle both
            if hasattr(ci, "iloc"):
                lower[i, :] = ci.iloc[:, 0].values
                upper[i, :] = ci.iloc[:, 1].values
            else:
                ci = np.asarray(ci)
                lower[i, :] = ci[:, 0]
                upper[i, :] = ci[:, 1]

    return mean, lower, upper

simulate(h, nsim, rng)

Simulate future trajectories from fitted ARIMA models.

Parameters:

Name Type Description Default
h int

Forecast horizon.

required
nsim int

Number of simulations.

required
rng Generator

NumPy random generator (used to set seed for statsmodels).

required

Returns:

Type Description
ndarray

Shape (N, h, nsim).

Source code in src/pystmomo/forecast/arima_fc.py
def simulate(
    self,
    h: int,
    nsim: int,
    rng: np.random.Generator,
) -> np.ndarray:
    """Simulate future trajectories from fitted ARIMA models.

    Parameters
    ----------
    h:
        Forecast horizon.
    nsim:
        Number of simulations.
    rng:
        NumPy random generator (used to set seed for statsmodels).

    Returns
    -------
    np.ndarray
        Shape (N, h, nsim).
    """
    N = len(self.models)
    paths = np.zeros((N, h, nsim))
    seed = int(rng.integers(0, 2**31))

    for i, result in enumerate(self.models):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            sims = result.simulate(
                nsimulations=h,
                repetitions=nsim,
                anchor="end",
                random_state=seed + i,
            )
            sims = np.asarray(sims).squeeze()   # collapse extra dims
            if sims.ndim == 1:
                sims = sims[:, np.newaxis].repeat(nsim, axis=1)
            if sims.shape[0] != h:
                sims = sims.T
            paths[i, :, :] = sims

    return paths