External Forecasters¶

pyStMoMo's built-in period-index forecasters (MRWD and ARIMA) can be replaced by any external model — LSTM, Transformer, Prophet, XGBoost, Gaussian Process, etc. — by wrapping it in an ExternalKtForecaster.

The wrapper plugs seamlessly into forecast() and simulate() via the kt_method and gc_method parameters, while the GAPC rate reconstruction pipeline remains unchanged.

Interface required¶

Your external model must be expressible as two callables:

def forecast_fn(h: int, *, level: float = 0.95):
    """
    Returns one of:
      - np.ndarray of shape (N, h)              — point forecast only
      - tuple (mean, lower, upper) each (N, h)  — with confidence bands
        (pass None for lower/upper to omit intervals)
    """
    ...

def simulate_fn(h: int, nsim: int, *, rng: np.random.Generator):
    """Returns np.ndarray of shape (N, h, nsim)."""
    ...

N = number of period indexes in the model (fit.model.N).

Model	N	Has cohort (gc)
LC	1	No
CBD	2	No
APC	1	Yes
RH	1	Yes
M6	2	Yes
M7	3	Yes
M8	2	Yes

Example — LC with a custom random walk¶

import numpy as np
import pystmomo as ps
from pystmomo import ExternalKtForecaster

data = ps.load_ew_male()
fit  = ps.lc().fit(data.deaths, data.exposures,
                   ages=data.ages, years=data.years)

kt    = fit.kt[0]             # shape (51,) — the fitted period index
slope = np.diff(kt).mean()
sigma = np.diff(kt).std()

def forecast_fn(h, *, level=0.95):
    from scipy.stats import norm
    z    = norm.ppf((1 + level) / 2)
    mean = (kt[-1] + slope * np.arange(1, h + 1)).reshape(1, h)
    se   = sigma * np.sqrt(np.arange(1, h + 1))
    return mean, mean - z * se, mean + z * se

def simulate_fn(h, nsim, *, rng):
    innov = rng.normal(slope, sigma, (h, nsim))
    paths = kt[-1] + np.cumsum(innov, axis=0)    # (h, nsim)
    return paths[np.newaxis, :, :]                # (1, h, nsim)

ext = ExternalKtForecaster(forecast_fn, simulate_fn)
fc  = ps.forecast(fit, h=20, kt_method=ext)
sim = ps.simulate(fit, nsim=1000, h=20, kt_method=ext, seed=42)

Example — CBD with independent models per index (N=2)¶

CBD has two period indexes: κ_t^(1) (level) and κ_t^(2) (slope-in-age).

fit = ps.cbd().fit(data.deaths, data.exposures,
                   ages=data.ages, years=data.years)

kt     = fit.kt                               # shape (2, 51)
slopes = np.diff(kt, axis=1).mean(axis=1)    # (2,)
sigmas = np.diff(kt, axis=1).std(axis=1)     # (2,)

def forecast_fn(h, *, level=0.95):
    from scipy.stats import norm
    z    = norm.ppf((1 + level) / 2)
    mean = kt[:, -1:] + slopes[:, None] * np.arange(1, h + 1)   # (2, h)
    se   = sigmas[:, None] * np.sqrt(np.arange(1, h + 1))
    return mean, mean - z * se, mean + z * se

def simulate_fn(h, nsim, *, rng):
    paths = np.zeros((2, h, nsim))
    for i in range(2):
        innov      = rng.normal(slopes[i], sigmas[i], (h, nsim))
        paths[i]   = kt[i, -1] + np.cumsum(innov, axis=0)
    return paths

ext = ExternalKtForecaster(forecast_fn, simulate_fn)
fc  = ps.forecast(fit, h=20, kt_method=ext)

Example — APC with separate kt and gc models¶

APC has one period index and a cohort effect. Pass one ExternalKtForecaster for each via kt_method and gc_method.

fit = ps.apc().fit(data.deaths, data.exposures,
                   ages=data.ages, years=data.years)

# ── period index (N=1) ─────────────────────────────────────────────
kt      = fit.kt[0]
slope_k = np.diff(kt).mean();  sigma_k = np.diff(kt).std()

def kt_simulate(h, nsim, *, rng):
    innov = rng.normal(slope_k, sigma_k, (h, nsim))
    return (kt[-1] + np.cumsum(innov, axis=0))[np.newaxis]   # (1,h,nsim)

def kt_forecast(h, *, level=0.95):
    mean = (kt[-1] + slope_k * np.arange(1, h+1)).reshape(1, h)
    return mean, mean, mean

# ── cohort index ───────────────────────────────────────────────────
gc      = fit.gc[np.isfinite(fit.gc)]
slope_g = np.diff(gc).mean();  sigma_g = np.diff(gc).std()

def gc_simulate(h, nsim, *, rng):
    innov = rng.normal(slope_g, sigma_g, (h, nsim))
    return (gc[-1] + np.cumsum(innov, axis=0))[np.newaxis]   # (1,h,nsim)

def gc_forecast(h, *, level=0.95):
    mean = (gc[-1] + slope_g * np.arange(1, h+1)).reshape(1, h)
    return mean, mean, mean

fc = ps.forecast(fit, h=20,
                 kt_method=ExternalKtForecaster(kt_forecast, kt_simulate),
                 gc_method=ExternalKtForecaster(gc_forecast, gc_simulate))

Number of new cohorts ≠ h

The gc forecaster will be called for new cohorts only (those not yet observed), not for all h future years. The number of new cohorts is typically h + len(ages) - 1, not h. Your simulate_fn receives the correct h_new automatically.

Example — LSTM with PyTorch (LC)¶

import torch
import torch.nn as nn
import pystmomo as ps
from pystmomo import ExternalKtForecaster

data = ps.load_ew_male()
fit  = ps.lc().fit(data.deaths, data.exposures,
                   ages=data.ages, years=data.years)
kt   = fit.kt[0]   # (51,)

# ── Normalise ──────────────────────────────────────────────────────
mu_k, sd_k = kt.mean(), kt.std()
kt_n = (kt - mu_k) / sd_k

# ── Build sliding-window dataset ───────────────────────────────────
lookback = 10
X = torch.FloatTensor(
    [kt_n[i:i+lookback] for i in range(len(kt_n) - lookback)]
).unsqueeze(-1)                         # (n, lookback, 1)
y = torch.FloatTensor(kt_n[lookback:]).unsqueeze(-1)   # (n, 1)

# ── Define and train ───────────────────────────────────────────────
class KtLSTM(nn.Module):
    def __init__(self, hidden=32):
        super().__init__()
        self.lstm = nn.LSTM(1, hidden, batch_first=True)
        self.fc   = nn.Linear(hidden, 1)
    def forward(self, x):
        return self.fc(self.lstm(x)[0][:, -1])

model = KtLSTM()
opt   = torch.optim.Adam(model.parameters(), lr=1e-3)
for _ in range(500):
    loss = nn.MSELoss()(model(X), y)
    opt.zero_grad(); loss.backward(); opt.step()

# ── Wrap as ExternalKtForecaster ───────────────────────────────────
def lstm_forecast(h, *, level=0.95):
    model.eval()
    window = torch.FloatTensor(kt_n[-lookback:]).reshape(1, lookback, 1)
    preds  = []
    with torch.no_grad():
        for _ in range(h):
            v = model(window).item()
            preds.append(v)
            window = torch.cat([window[:, 1:], torch.tensor([[[v]]])], dim=1)
    mean = (np.array(preds) * sd_k + mu_k).reshape(1, h)
    return mean, mean, mean   # no CI — provide simulate_fn for uncertainty

def lstm_simulate(h, nsim, *, rng):
    # Simple approach: add Gaussian noise scaled to training residuals
    resid_std = float(nn.MSELoss()(model(X), y).detach().sqrt()) * sd_k
    mean, _, _ = lstm_forecast(h)
    noise = rng.normal(0, resid_std, (1, h, nsim)) * np.sqrt(np.arange(1, h+1))
    return mean[:, :, np.newaxis] + noise

ext = ExternalKtForecaster(lstm_forecast, lstm_simulate)
fc  = ps.forecast(fit, h=20, kt_method=ext)
sim = ps.simulate(fit, nsim=500, h=20, kt_method=ext, seed=42)

Only point forecast, no simulation¶

If you only supply forecast_fn (no simulate_fn), simulate() will repeat the point forecast across all paths — fully deterministic, no parameter uncertainty:

ext = ExternalKtForecaster(forecast_fn)   # no simulate_fn
fc  = ps.forecast(fit, h=20, kt_method=ext)    # works fine
sim = ps.simulate(fit, nsim=500, h=20, kt_method=ext)   # all nsim paths identical

Warning

A deterministic simulate defeats the purpose of Monte Carlo — supply simulate_fn whenever uncertainty quantification matters.