# from pydantic.dataclasses import dataclass, Field
# from pydantic import validator
from dataclasses import dataclass, field
from typing import List, Optional, Literal, Any, Mapping, Tuple
from .variables import (
VariableTransform,
VariableDifference,
VariableRolling,
VariableLag,
)
[docs]
@dataclass(eq=True, frozen=True)
class Differences:
"""A schema for describing a growth variables and a factory for `variables.VariableDifference` variables.
Can be used separately, or in conjunction with .yaml files and `hydra.initialize`, `hydra.compose`, and `hydra.utils.instantiate`.
Will return output variables named the same as the input variables, only with "_gr" suffixed.
Parameters
----------
type : Literal["growth]
Growth is currently the only difference function available. Note that dividing with zero is a possibility.
input_vars : list[str]
List of input variables to transform.
"""
type: Literal["growth"]
input_vars: List[str]
def get_output_name(self, input_var: str) -> str:
match self.type:
case "growth":
suffix = "_gr"
case _:
raise ValueError(f"Unknown Differences.type: {self.type}")
return f"{input_var}{suffix}"
def get_variables(self) -> List[VariableDifference]:
[
VariableDifference(
output_var=self.get_output_name(input_var),
input_var=input_var,
type=self.type,
subset=0,
)
for input_var in self.input_vars
]
[docs]
@dataclass(eq=True, frozen=True)
class Rolling:
"""A schema for describing variables with "rolling" transformations and a factory for `variables.VariableRolling` variables.
Can be used separately, or in conjunction with .yaml files and `hydra.initialize`, `hydra.compose`, and `hydra.utils.instantiate`.
Will return output variables named the same as the input variables, only with a suffix according to this scheme:
{input_var}_{window_type_suffix}{fun_suffix}{window} where:
=========== =======
window_type suffix
=========== =======
normal \_r
span \_rsp
com \_rc
halflife \_hl
alpha \_ral
=========== =======
==== ======
fun suffix
==== ======
mean m
sum s
==== ======
Parameters
----------
window : int
The window size in time-units.
funs : list[Literal["mean", "sum]]
List of aggregation functions (rolling mean or rolling sum).
input_vars : list[str]
List of input variables to transform.
window_type : Literal["normal", "span", "com", "halflife", "alpha"]
"normal" is equally weighted. See `pandas.DataFrame.ewm` for details on the rest.
"""
window: int
funs: List[Literal["mean", "sum"]]
input_vars: List[str]
window_type: Literal["normal", "span", "com", "halflife", "alpha"] = "normal"
def get_output_name(self, input_var: str, fun: str) -> str:
match self.window_type:
case "normal":
suffix = "_r"
case "span":
suffix = "_rsp"
case "com":
suffix = "_rc"
case "halflife":
suffix = "_hl"
case "alpha":
suffix = "_ral"
case _:
raise ValueError(f"Unknown Rolling.window_type: {self.window_type}")
match fun:
case "mean":
suffix = suffix + "m"
case "sum":
suffix = suffix + "s"
case _:
raise ValueError(f"Unknown function: {fun}, in Rolling.funs")
return f"{input_var}{suffix}{self.window}"
def get_variables(self) -> List[VariableRolling]:
out = []
for input_var in self.input_vars:
for fun in self.funs:
obj = VariableRolling(
output_var=self.get_output_name(input_var, fun),
input_var=input_var,
window=self.window,
fun=fun,
window_type=self.window_type,
subset=0,
)
out.append(obj)
return out
[docs]
@dataclass(eq=True, frozen=True)
class Lags:
"""A schema for describing a lagged variables and a factory for `variables.VariableLag` variables.
Can be used separately, or in conjunction with .yaml files and `hydra.initialize`, `hydra.compose`, and `hydra.utils.instantiate`.
Will return output variables named the same as the input variables, only with "_l{num_lag}" suffixed.
Parameters
----------
num_lag : int
How many time-units to offset. E.g., 1 would lag a time-series 1 time-unit compared to the input_var.
input_vars : list[str]
List of input variables to transform.
"""
num_lag: int
input_vars: List[str]
def get_output_names(self, input_var: str) -> str:
return f"{input_var}_l{self.num_lag}"
def get_variables(self) -> List[VariableLag]:
return [
VariableLag(
output_var=self.get_output_names(input_var),
input_var=input_var,
num_lag=self.num_lag,
subset=0,
)
for input_var in self.input_vars
]
[docs]
@dataclass(eq=True, frozen=True)
class ExogenModel:
"""Configuration schema for statistical model of any variable, to be used in endogenous simulation.
Can be used separately, or in conjunction with .yaml files and `hydra.initialize`, `hydra.compose`, and `hydra.utils.instantiate`.
An ExogenModel variable is forecast data coming from somewhere else. Currenly only supports deterministic exogenous variables. It has to be a complete
set of data for all units in the simulation system, from the start date of simulation to the end date.
output_var : str
The name of the output variable in question.
exogen_data : str
String path to .csv or .parquet file only including time_var, unit_var and output_var.
subset : int
This should just always be 1. Might remove this as an option.
"""
output_var: str
exogen_data: str
subset: int = field(default=1)
@property
def node(self) -> Tuple[str, Mapping[str, Any]]:
"""A node representation that interface well with NetworkX graphs.
Returns
-------
Tuple[str, Mapping[str, Any]]
A tuple where the first element is the output variable name ("node"), and the second element is a dictionary of node data.
"""
return (self.output_var, {"subset": self.subset})
[docs]
@dataclass
class GlobalSimConfig:
"""Configuration schema for global simulation options.
Can be used separately, or in conjunction with .yaml files and `hydra.initialize`, `hydra.compose`, and `hydra.utils.instantiate`.
Parameters
----------
input_data : str
Path to input data in either .csv or .parquet file format. Used both for training and as initial values in simulation.
time_var : str
Name of the variable in the input_data indicating the time dimension. The variable must be integer type.
unit_var : str
Name of the variable in the input_data indicating the unit/spatial dimension. The variable must be integer type.
nsim : int
Number of independent simulations to run.
end : int
The time-unit to end simulation. Since these are fully described endogenous simulations, they can go indefinitely.
include_past_n : int
How much of the past to include when fitting statistical models.
start : int
The time-unit to start simulation. Must be an integer value found in the time_var series in the input_data.
vars : list[str]
The subset of variables in the input_data to include.
"""
input_data: str
time_var: str
unit_var: str
nsim: int
end: int
include_past_n: int
start: Optional[int] = None
vars: Optional[List[str]] = field(default_factory=list)
[docs]
@dataclass
class Config:
"""Global configuration schema for the endogenous simulation.
Can be used separately, or in conjunction with .yaml files and `hydra.initialize`, `hydra.compose`, and `hydra.utils.instantiate`.
Parameters
----------
global_config : GlobalSimConfig
Global simulation configuration options
variables : list[InputModel]
List of configuration schema for input models to include in endogenous simulation. Note restrictions on circularity, etc.
"""
_variable_dict: dict
global_config: GlobalSimConfig
variables: List[InputModel|ExogenModel] = field(default_factory=list)