Skip to content

Data

StMoMoData

Container for mortality data.

Parameters:

Name Type Description Default
deaths ndarray

Death counts matrix, shape (n_ages, n_years).

required
exposures ndarray

Central or initial exposures matrix, shape (n_ages, n_years).

required
ages ndarray

Age labels, length n_ages.

required
years ndarray

Calendar year labels, length n_years.

required
type Literal['central', 'initial']

"central" (central death rates μ_xt) or "initial" (initial death probabilities q_xt).

'central'
series str

Description of the population series, e.g. "males".

''
label str

Short identifier for the dataset.

''
Source code in src/pystmomo/data/_loader.py
@dataclass
class StMoMoData:
    """Container for mortality data.

    Parameters
    ----------
    deaths:
        Death counts matrix, shape (n_ages, n_years).
    exposures:
        Central or initial exposures matrix, shape (n_ages, n_years).
    ages:
        Age labels, length n_ages.
    years:
        Calendar year labels, length n_years.
    type:
        ``"central"`` (central death rates μ_xt) or ``"initial"`` (initial
        death probabilities q_xt).
    series:
        Description of the population series, e.g. ``"males"``.
    label:
        Short identifier for the dataset.
    """

    deaths: np.ndarray
    exposures: np.ndarray
    ages: np.ndarray
    years: np.ndarray
    type: Literal["central", "initial"] = "central"
    series: str = ""
    label: str = ""

    @property
    def n_ages(self) -> int:
        return len(self.ages)

    @property
    def n_years(self) -> int:
        return len(self.years)

    def central2initial(self) -> StMoMoData:
        """Convert central exposures to initial exposures.

        Uses the approximation ``E_xt^0 ≈ E_xt^c + 0.5 * D_xt``, which is
        standard in the HMD methodology.

        Returns
        -------
        StMoMoData
            New object with ``type = "initial"`` and adjusted exposures.
        """
        if self.type == "initial":
            return self
        new_exp = self.exposures + 0.5 * self.deaths
        return StMoMoData(
            deaths=self.deaths,
            exposures=new_exp,
            ages=self.ages.copy(),
            years=self.years.copy(),
            type="initial",
            series=self.series,
            label=self.label,
        )

    def subset(
        self,
        ages: np.ndarray | list[int] | None = None,
        years: np.ndarray | list[int] | None = None,
    ) -> StMoMoData:
        """Return a subset of the data restricted to given ages and/or years."""
        age_idx = (
            np.isin(self.ages, ages)
            if ages is not None
            else np.ones(self.n_ages, dtype=bool)
        )
        year_idx = (
            np.isin(self.years, years)
            if years is not None
            else np.ones(self.n_years, dtype=bool)
        )
        return StMoMoData(
            deaths=self.deaths[np.ix_(age_idx, year_idx)],
            exposures=self.exposures[np.ix_(age_idx, year_idx)],
            ages=self.ages[age_idx],
            years=self.years[year_idx],
            type=self.type,
            series=self.series,
            label=self.label,
        )

    def __repr__(self) -> str:  # noqa: D105
        return (
            f"StMoMoData(label={self.label!r}, series={self.series!r}, "
            f"ages={self.ages[0]}{self.ages[-1]}, "
            f"years={self.years[0]}{self.years[-1]}, type={self.type!r})"
        )

central2initial()

Convert central exposures to initial exposures.

Uses the approximation E_xt^0 ≈ E_xt^c + 0.5 * D_xt, which is standard in the HMD methodology.

Returns:

Type Description
StMoMoData

New object with type = "initial" and adjusted exposures.

Source code in src/pystmomo/data/_loader.py
def central2initial(self) -> StMoMoData:
    """Convert central exposures to initial exposures.

    Uses the approximation ``E_xt^0 ≈ E_xt^c + 0.5 * D_xt``, which is
    standard in the HMD methodology.

    Returns
    -------
    StMoMoData
        New object with ``type = "initial"`` and adjusted exposures.
    """
    if self.type == "initial":
        return self
    new_exp = self.exposures + 0.5 * self.deaths
    return StMoMoData(
        deaths=self.deaths,
        exposures=new_exp,
        ages=self.ages.copy(),
        years=self.years.copy(),
        type="initial",
        series=self.series,
        label=self.label,
    )

subset(ages=None, years=None)

Return a subset of the data restricted to given ages and/or years.

Source code in src/pystmomo/data/_loader.py
def subset(
    self,
    ages: np.ndarray | list[int] | None = None,
    years: np.ndarray | list[int] | None = None,
) -> StMoMoData:
    """Return a subset of the data restricted to given ages and/or years."""
    age_idx = (
        np.isin(self.ages, ages)
        if ages is not None
        else np.ones(self.n_ages, dtype=bool)
    )
    year_idx = (
        np.isin(self.years, years)
        if years is not None
        else np.ones(self.n_years, dtype=bool)
    )
    return StMoMoData(
        deaths=self.deaths[np.ix_(age_idx, year_idx)],
        exposures=self.exposures[np.ix_(age_idx, year_idx)],
        ages=self.ages[age_idx],
        years=self.years[year_idx],
        type=self.type,
        series=self.series,
        label=self.label,
    )

load_ew_male()

Load the bundled England & Wales male mortality dataset.

The dataset contains synthetic deaths and central exposures for English and Welsh males, ages 0–100, calendar years 1961–2011. It was generated to match published Human Mortality Database mortality patterns for illustrative purposes.

Parameters:

Name Type Description Default
ages ndarray | list[int] | None

If provided, restrict to these ages. Defaults to ages 55–89, which are standard for CBD-family models.

None
years ndarray | list[int] | None

If provided, restrict to these calendar years. Defaults to all years (1961–2011).

None

Returns:

Type Description
StMoMoData

Central mortality data for the requested subset.

Examples:

>>> import pystmomo as ps
>>> data = ps.load_ew_male()
>>> data.deaths.shape
(35, 51)
>>> data = ps.load_ew_male(ages=range(55, 90), years=range(1980, 2012))
Source code in src/pystmomo/data/_loader.py
def load_ew_male(
    ages: np.ndarray | list[int] | None = None,
    years: np.ndarray | list[int] | None = None,
) -> StMoMoData:
    """Load the bundled England & Wales male mortality dataset.

    The dataset contains synthetic deaths and central exposures for English and
    Welsh males, ages 0–100, calendar years 1961–2011.  It was generated to
    match published Human Mortality Database mortality patterns for illustrative
    purposes.

    Parameters
    ----------
    ages:
        If provided, restrict to these ages.  Defaults to ages 55–89, which
        are standard for CBD-family models.
    years:
        If provided, restrict to these calendar years.  Defaults to all years
        (1961–2011).

    Returns
    -------
    StMoMoData
        Central mortality data for the requested subset.

    Examples
    --------
    >>> import pystmomo as ps
    >>> data = ps.load_ew_male()
    >>> data.deaths.shape
    (35, 51)
    >>> data = ps.load_ew_male(ages=range(55, 90), years=range(1980, 2012))
    """
    dxt = pd.read_csv(_DATA_DIR / "EWMaleData_Dxt.csv", index_col="age")
    ext = pd.read_csv(_DATA_DIR / "EWMaleData_Ext.csv", index_col="age")

    all_ages = dxt.index.to_numpy(dtype=int)
    all_years = dxt.columns.to_numpy(dtype=int)

    ages_sel = np.arange(55, 90) if ages is None else np.asarray(ages, dtype=int)

    years_sel = np.asarray(years, dtype=int) if years is not None else all_years

    age_mask = np.isin(all_ages, ages_sel)
    year_mask = np.isin(all_years, years_sel)

    data = StMoMoData(
        deaths=dxt.values[np.ix_(age_mask, year_mask)].astype(float),
        exposures=ext.values[np.ix_(age_mask, year_mask)].astype(float),
        ages=all_ages[age_mask],
        years=all_years[year_mask],
        type="central",
        series="males",
        label="EWMale",
    )
    return data

load_hmd_csv()

Load mortality data from HMD-format CSV files.

HMD CSV files can be downloaded from https://www.mortality.org after registration. The expected format has columns Year, Age, and Total (or Male / Female).

Parameters:

Name Type Description Default
deaths_file str | Path

Path to the HMD deaths CSV (e.g., Deaths_1x1.txt).

required
exposures_file str | Path

Path to the HMD exposures CSV (e.g., Exposures_1x1.txt).

required
series str

Population series identifier ("total", "male", "female").

''
label str

Short dataset label.

''
exposure_type Literal['central', 'initial']

Whether the exposures file contains central ("central") or initial ("initial") exposures.

'central'

Returns:

Type Description
StMoMoData
Source code in src/pystmomo/data/_loader.py
def load_hmd_csv(
    deaths_file: str | Path,
    exposures_file: str | Path,
    *,
    series: str = "",
    label: str = "",
    exposure_type: Literal["central", "initial"] = "central",
) -> StMoMoData:
    """Load mortality data from HMD-format CSV files.

    HMD CSV files can be downloaded from `https://www.mortality.org` after
    registration.  The expected format has columns ``Year``, ``Age``, and
    ``Total`` (or ``Male`` / ``Female``).

    Parameters
    ----------
    deaths_file:
        Path to the HMD deaths CSV (e.g., ``Deaths_1x1.txt``).
    exposures_file:
        Path to the HMD exposures CSV (e.g., ``Exposures_1x1.txt``).
    series:
        Population series identifier (``"total"``, ``"male"``, ``"female"``).
    label:
        Short dataset label.
    exposure_type:
        Whether the exposures file contains central (``"central"``) or initial
        (``"initial"``) exposures.

    Returns
    -------
    StMoMoData
    """
    def _parse_hmd(path: str | Path) -> pd.DataFrame:
        df = pd.read_csv(
            path,
            sep=r"\s+",
            comment="#",
            na_values=".",
            dtype=str,
        )
        df.columns = df.columns.str.strip()
        # Handle "110+" age notation
        df["Age"] = df["Age"].str.replace("+", "", regex=False).astype(int)
        df["Year"] = df["Year"].astype(int)
        col_choices = ["Total", "Male", "Female"]
        val_col = next((c for c in col_choices if c in df.columns), df.columns[-1])
        df[val_col] = pd.to_numeric(df[val_col], errors="coerce")
        return df.pivot(index="Age", columns="Year", values=val_col)

    dxt_df = _parse_hmd(deaths_file)
    ext_df = _parse_hmd(exposures_file)

    common_ages = np.intersect1d(dxt_df.index.values, ext_df.index.values)
    common_years = np.intersect1d(dxt_df.columns.values, ext_df.columns.values)

    dxt_df = dxt_df.loc[common_ages, common_years]
    ext_df = ext_df.loc[common_ages, common_years]

    return StMoMoData(
        deaths=dxt_df.values.astype(float),
        exposures=ext_df.values.astype(float),
        ages=np.asarray(common_ages, dtype=int),
        years=np.asarray(common_years, dtype=int),
        type=exposure_type,
        series=series,
        label=label,
    )