Source code for endogen.adapter_mlforecast

from .utilities import PanelUnits

from mlforecast.forecast import MLForecast
from mlforecast.utils import PredictionIntervals

from typing import Literal

import numpy as np
import pandas as pd
import itertools
import xarray



[docs]
def percentile_hi_lo(interval: float, type: Literal["lo", "hi"]) -> float:
    """Calculate lo/hi percentile from predictive interval

    Parameters
    ----------
    interval : float
        The predictive interval in percent.
    type : Literal["lo", "hi"]
        Whether to return the low or high percentile from the interval.

    Returns
    -------
    float
        A percentile from 0 - 1.
    """
    alpha = (100 - interval) / 100
    if type == "lo":
        return alpha / 2
    if type == "hi":
        return 1 - (alpha / 2)




[docs]
def setup_mlforecast_bins(
    model: MLForecast, levels=list[float]
) -> tuple[dict[str, str], list[float]]:
    """A helper function to rename columns from MLForecast models with PredictiveIntervals. These are named in terms of the prediction interval and
    a lo/hi indicator. This function converts this to percentiles and returns a dictionary for easy renaming in Pandas.

    The idea here is to create an equally binned histogram over the percentiles in a prediction distribution, finding the prediction at the middle of each bin.

    Parameters
    ----------
    model : MLForecast
        A `mlforecast.forecast.MLForecast` model object
    levels : list[float]
        A list of prediction intervals. E.g., [50, 90] gives the 50% and 90% prediction interval

    Returns
    -------
    tuple[dict[str, str], list[float]]
        A tuple with a dictionary with the renaming scheme for use in Pandas and the list of percentiles.
    """

    var_names = {
        f"{m}-{t}-{l}": f"{m}-{percentile_hi_lo(l, t)}"
        for m, t, l in list(
            itertools.product(model.models.keys(), ["lo", "hi"], levels)
        )
    }

    # this is probably not generalizable to any type if integer list. idea is to eavenly spread equally spaced bins across 0-1 and get the percentile for the middle of the bin
    p = [1 / len(var_names)] * len(var_names)
    return var_names, p




[docs]
def forecast_mlforecast(
    t: int,
    s: int,
    model: MLForecast,
    xdata: xarray.Dataset,
    pnames: PanelUnits,
    output_var: str,
    input_vars: list[str],
    levels=list[float],
) -> pd.DataFrame:
    """A prediction function adapter for MLForecast fitted with PredictionIntervals drawing predictions from the (approx./stepwise)
    full predictive distribution.

    Parameters
    ----------
    t : int
        Time index to forecast
    s : int
        Simulation index to forecast
    model : MLForecast
        The MLForecast object
    xdata : xarray.Dataset
        The input data used in forecasting
    pnames : PanelUnits
        The internal index naming convention used.
    output_var : str
        The output variable
    input_vars : list[str]
        The list of input variables
    levels : list[float]
        The list of prediction intervals that the MLForecast model has been fitted with.

    Returns
    -------
    pd.DataFrame
        A properly indexed pandas.DataFrame with a single draw from the full predictive distribution for all units at time t.
    """
    df = (
        xdata[list(itertools.chain([output_var], input_vars))]
        .to_dataframe()
        .loc[t, :, s]
    )
    df = df.rename(columns={output_var: "y"})
    df.reset_index(inplace=True)
    df["ds"] = t

    X = xdata[input_vars].to_dataframe().loc[t, :, s]
    X.reset_index(inplace=True)
    X["ds"] = t + 1

    res = model.predict(h=1, level=levels, new_df=df, X_df=X)
    var_names, p = setup_mlforecast_bins(model, levels)
    res = res.rename(columns=var_names)
    res.set_index(pnames.internal_index, inplace=True)
    res = res.drop(columns=model.models.keys())
    res = res.reindex(sorted(res.columns), axis=1)
    res = res.apply(
        lambda x: np.random.choice(x.tolist(), size=1, p=p),
        axis=1,
    ).explode()
    return res