-
Notifications
You must be signed in to change notification settings - Fork 39
Kalman filter #751
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: research/v4.1.0
Are you sure you want to change the base?
Kalman filter #751
Changes from all commits
828a8d6
742c4bd
5a044fb
b93dfb8
435cc7b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,147 @@ | ||
| # SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com> | ||
| # | ||
| # SPDX-License-Identifier: MPL-2.0 | ||
|
|
||
| """Kalman Filter Transforms for Time Series Data pre and post-processing. | ||
|
|
||
| This class provides implementations of Kalman Smoothing as both a preprocessor | ||
| and postprocessor for time series datasets. The Kalman Smoother helps reduce noise | ||
| """ | ||
|
|
||
| from collections.abc import Iterable | ||
| from typing import override | ||
|
|
||
| import pandas as pd | ||
| from pydantic import Field | ||
| from sktime.transformations.series.kalman_filter import ( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add this as a dependency? I don't think we have it already.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added as dependency of openstef models |
||
| KalmanFilterTransformerFP, | ||
| ) | ||
|
|
||
| from openstef_core.base_model import BaseConfig | ||
| from openstef_core.datasets import ForecastDataset, TimeSeriesDataset | ||
| from openstef_core.mixins import Transform | ||
| from openstef_core.transforms import TimeSeriesTransform | ||
| from openstef_models.utils.feature_selection import FeatureSelection | ||
|
|
||
|
|
||
| class BaseKalman(BaseConfig): | ||
| """Base class for Kalman Smoothing transforms.""" | ||
|
|
||
| selection: FeatureSelection = Field(default=FeatureSelection.ALL, description="Columns to smooth") | ||
| state_dim: int = Field( | ||
| default=1, | ||
| description="Kalman filter state dimension (1 = per-column independent)", | ||
| ) | ||
|
|
||
| @staticmethod | ||
| def _run_kalman_filter(df: pd.DataFrame, features: Iterable[str]) -> pd.DataFrame: | ||
| features_list = list(features) | ||
| if not features_list: | ||
| return df | ||
|
|
||
| kf = KalmanFilterTransformerFP(state_dim=len(features_list)) | ||
| out = df.copy(deep=True) | ||
| out[features_list] = kf.fit_transform(X=df[features_list]) # type: ignore[assignment] | ||
| return out | ||
|
|
||
|
|
||
| class KalmanPreprocessor(BaseKalman, TimeSeriesTransform): | ||
| """Apply Kalman Smoothing to time series data to reduce noise and improve temporal consistency. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a specific source you used for the implementation? If so, I think it is nice to add it to the docs.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The implementation was done to mimic other transforms. No additional sources were used |
||
|
|
||
| Example: | ||
| >>> from datetime import timedelta | ||
| >>> import pandas as pd | ||
| >>> from openstef_core.testing import create_timeseries_dataset | ||
| >>> from openstef_models.transforms.general import KalmanPreprocessor | ||
| >>> dataset = create_timeseries_dataset( | ||
| ... index=pd.date_range("2025-01-01", periods=5, freq="1h"), | ||
| ... load=[10.0, 50.0, 100.0, 200.0, 150.0], | ||
| ... sample_interval=timedelta(hours=1), | ||
| ... ) | ||
| >>> transform = KalmanPreprocessor() | ||
| >>> result = transform.fit_transform(dataset) | ||
| >>> result.data | ||
| load | ||
| timestamp | ||
| 2025-01-01 00:00:00 6.666667 | ||
| 2025-01-01 01:00:00 33.750000 | ||
| 2025-01-01 02:00:00 74.761905 | ||
| 2025-01-01 03:00:00 152.181818 | ||
| 2025-01-01 04:00:00 150.833333 | ||
| """ | ||
|
|
||
| @property | ||
| @override | ||
| def is_fitted(self) -> bool: | ||
| return True | ||
|
|
||
| @override | ||
| def fit(self, data: TimeSeriesDataset) -> None: | ||
| # stateless: nothing to do | ||
| return None | ||
|
|
||
| @override | ||
| def transform(self, data: TimeSeriesDataset) -> TimeSeriesDataset: | ||
| features: list[str] = list(self.selection.resolve(data.feature_names)) | ||
| # restrict to numeric | ||
| numeric = data.data.select_dtypes(include=["number"]).columns.tolist() | ||
| features = [f for f in features if f in numeric] | ||
| if not features: | ||
| return data | ||
| df = data.data.copy(deep=True) | ||
| df_filtered = self._run_kalman_filter(df, features) | ||
| return data.copy_with(data=df_filtered, is_sorted=True) | ||
|
|
||
| @override | ||
| def features_added(self) -> list[str]: | ||
| # Preprocessor doesn't add columns | ||
| return [] | ||
|
|
||
| class KalmanPostprocessor(BaseKalman, Transform[ForecastDataset, ForecastDataset]): | ||
| """Apply Kalman Smoothing to quantile forecasts to reduce noise and improve temporal consistency. | ||
|
|
||
| Example: | ||
| >>> from datetime import timedelta | ||
| >>> import pandas as pd | ||
| >>> import numpy as np | ||
| >>> from openstef_core.datasets.validated_datasets import ForecastDataset | ||
| >>> from openstef_models.transforms.general import KalmanPostprocessor | ||
| >>> forecast_data = pd.DataFrame({ | ||
| ... 'load': [100, np.nan], | ||
| ... 'quantile_P10': [90, 95], | ||
| ... 'quantile_P50': [100, 110], | ||
| ... 'quantile_P90': [115, 125] | ||
| ... }, index=pd.date_range('2025-01-01', periods=2, freq='h')) | ||
| >>> dataset = ForecastDataset(forecast_data, timedelta(hours=1)) | ||
| >>> transform = KalmanPostprocessor() | ||
| >>> result = transform.fit_transform(dataset) | ||
| >>> result.data | ||
| load quantile_P10 quantile_P50 quantile_P90 | ||
| timestamp | ||
| 2025-01-01 00:00:00 100.0 60.000 66.666667 76.666667 | ||
| 2025-01-01 01:00:00 NaN 81.875 93.750000 106.875000 | ||
| """ | ||
|
|
||
| monotonic: bool = Field( | ||
| default=True, | ||
| description="Enforce non-crossing quantiles after smoothing", | ||
| ) | ||
|
|
||
| @property | ||
| @override | ||
| def is_fitted(self) -> bool: | ||
| return True | ||
|
|
||
| @override | ||
| def fit(self, data: ForecastDataset) -> None: | ||
| return None | ||
|
|
||
| @override | ||
| def transform(self, data: ForecastDataset) -> ForecastDataset: | ||
| quantile_columns = [q.format() for q in sorted(data.quantiles)] | ||
| df = data.data.copy(deep=True) | ||
| df_filtered = self._run_kalman_filter(df, quantile_columns) | ||
| return ForecastDataset.from_timeseries(data.copy_with(data=df_filtered, is_sorted=True)) | ||
|
|
||
|
|
||
| __all__ = ["BaseKalman", "KalmanPostprocessor", "KalmanPreprocessor"] | ||
Uh oh!
There was an error while loading. Please reload this page.