Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.anomaly_detection.spark.iqr.decomposition_iqr_anomaly_detection
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.anomaly_detection.spark.iqr.iqr_anomaly_detection
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.anomaly_detection.spark.mad.mad_anomaly_detection
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.chronological_sort
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.cyclical_encoding
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.datetime_features
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.datetime_string_conversion
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.drop_columns_by_NaN_percentage
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.drop_empty_columns
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.lag_features
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.mad_outlier_detection
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.mixed_type_separation
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.one_hot_encoding
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.rolling_statistics
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.pandas.select_columns_by_correlation
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.chronological_sort
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.cyclical_encoding
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.datetime_features
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.datetime_string_conversion
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.drop_columns_by_NaN_percentage
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.drop_empty_columns
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.lag_features
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.mad_outlier_detection
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.mixed_type_separation
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.rolling_statistics
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.select_columns_by_correlation
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.decomposition.pandas.classical_decomposition
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.decomposition.pandas.mstl_decomposition
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.decomposition.pandas.stl_decomposition
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.decomposition.spark.classical_decomposition
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.decomposition.spark.mstl_decomposition
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.decomposition.spark.stl_decomposition
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.forecasting.prediction_evaluation
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.forecasting.spark.autogluon_timeseries
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.forecasting.spark.catboost_timeseries
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.forecasting.spark.lstm_timeseries
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.forecasting.spark.prophet
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.forecasting.spark.xgboost_timeseries
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.sources.python.azure_blob
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.visualization.matplotlib.anomaly_detection
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.visualization.matplotlib.comparison
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.visualization.matplotlib.decomposition
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.visualization.matplotlib.forecasting
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.visualization.plotly.anomaly_detection
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.visualization.plotly.comparison
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.visualization.plotly.decomposition
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.visualization.plotly.forecasting
11 changes: 11 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@ dependencies:
- statsmodels>=0.14.1,<0.15.0
- pmdarima>=2.0.4
- scikit-learn>=1.3.0,<1.6.0
# ML/Forecasting dependencies added by AMOS team
- tensorflow>=2.18.0,<3.0.0
- tf-keras>=2.15,<2.19
- xgboost>=2.0.0,<3.0.0
- plotly>=5.0.0
- python-kaleido>=0.2.0
- prophet==1.2.1
- sktime==0.40.1
- catboost==1.2.8
- pip:
# protobuf installed via pip to avoid libabseil conflicts with conda libarrow
- protobuf>=5.29.0,<5.30.0
Expand All @@ -92,3 +101,5 @@ dependencies:
- eth-typing>=5.0.1,<6.0.0
- pandas>=2.0.1,<2.3.0
- moto[s3]>=5.0.16,<6.0.0
# AutoGluon for time series forecasting (AMOS team)
- autogluon.timeseries>=1.1.1,<2.0.0
89 changes: 74 additions & 15 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ nav:
- Delta Sharing: sdk/code-reference/pipelines/sources/python/delta_sharing.md
- ENTSO-E: sdk/code-reference/pipelines/sources/python/entsoe.md
- MFFBAS: sdk/code-reference/pipelines/sources/python/mffbas.md
- Azure Blob: sdk/code-reference/pipelines/sources/python/azure_blob.md
- Transformers:
- Spark:
- Binary To String: sdk/code-reference/pipelines/transformers/spark/binary_to_string.md
Expand Down Expand Up @@ -245,27 +246,85 @@ nav:
- Interval Based: sdk/code-reference/pipelines/data_quality/monitoring/spark/identify_missing_data_interval.md
- Pattern Based: sdk/code-reference/pipelines/data_quality/monitoring/spark/identify_missing_data_pattern.md
- Moving Average: sdk/code-reference/pipelines/data_quality/monitoring/spark/moving_average.md
- Data Manipulation:
- Duplicate Detetection: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/duplicate_detection.md
- Out of Range Value Filter: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/out_of_range_value_filter.md
- Flatline Filter: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/flatline_filter.md
- Gaussian Smoothing: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/gaussian_smoothing.md
- Dimensionality Reduction: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/dimensionality_reduction.md
- Interval Filtering: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/interval_filtering.md
- K-Sigma Anomaly Detection: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/k_sigma_anomaly_detection.md
- Missing Value Imputation: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/missing_value_imputation.md
- Normalization:
- Normalization: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization.md
- Normalization Mean: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_mean.md
- Normalization MinMax: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_minmax.md
- Normalization ZScore: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_zscore.md
- Denormalization: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/denormalization.md
- Data Manipulation:
- Spark:
- Duplicate Detetection: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/duplicate_detection.md
- Out of Range Value Filter: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/out_of_range_value_filter.md
- Flatline Filter: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/flatline_filter.md
- Gaussian Smoothing: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/gaussian_smoothing.md
- Dimensionality Reduction: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/dimensionality_reduction.md
- Interval Filtering: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/interval_filtering.md
- K-Sigma Anomaly Detection: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/k_sigma_anomaly_detection.md
- Missing Value Imputation: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/missing_value_imputation.md
- Chronological Sort: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/chronological_sort.md
- Cyclical Encoding: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/cyclical_encoding.md
- Datetime Features: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/datetime_features.md
- Datetime String Conversion: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/datetime_string_conversion.md
- Lag Features: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/lag_features.md
- MAD Outlier Detection: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/mad_outlier_detection.md
- Mixed Type Separation: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/mixed_type_separation.md
- Rolling Statistics: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/rolling_statistics.md
- Drop Empty Columns: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/drop_empty_columns.md
- Drop Columns by NaN Percentage: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/drop_columns_by_nan_percentage.md
- Select Columns by Correlation: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/select_columns_by_correlation.md
- Normalization:
- Normalization: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization.md
- Normalization Mean: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_mean.md
- Normalization MinMax: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_minmax.md
- Normalization ZScore: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_zscore.md
- Denormalization: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/denormalization.md
- Pandas:
- Chronological Sort: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/chronological_sort.md
- Cyclical Encoding: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/cyclical_encoding.md
- Datetime Features: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/datetime_features.md
- Datetime String Conversion: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/datetime_string_conversion.md
- Lag Features: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/lag_features.md
- MAD Outlier Detection: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/mad_outlier_detection.md
- Mixed Type Separation: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/mixed_type_separation.md
- One-Hot Encoding: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/one_hot_encoding.md
- Rolling Statistics: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/rolling_statistics.md
- Drop Empty Columns: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/drop_empty_columns.md
- Drop Columns by NaN Percentage: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/drop_columns_by_nan_percentage.md
- Select Columns by Correlation: sdk/code-reference/pipelines/data_quality/data_manipulation/pandas/select_columns_by_correlation.md
- Forecasting:
- Data Binning: sdk/code-reference/pipelines/forecasting/spark/data_binning.md
- Linear Regression: sdk/code-reference/pipelines/forecasting/spark/linear_regression.md
- Arima: sdk/code-reference/pipelines/forecasting/spark/arima.md
- Auto Arima: sdk/code-reference/pipelines/forecasting/spark/auto_arima.md
- K Nearest Neighbors: sdk/code-reference/pipelines/forecasting/spark/k_nearest_neighbors.md
- Prophet: sdk/code-reference/pipelines/forecasting/spark/prophet.md
- LSTM TimeSeries: sdk/code-reference/pipelines/forecasting/spark/lstm_timeseries.md
- XGBoost TimeSeries: sdk/code-reference/pipelines/forecasting/spark/xgboost_timeseries.md
- CatBoost TimeSeries: sdk/code-reference/pipelines/forecasting/spark/catboost_timeseries.md
- AutoGluon TimeSeries: sdk/code-reference/pipelines/forecasting/spark/autogluon_timeseries.md
- Prediction Evaluation: sdk/code-reference/pipelines/forecasting/prediction_evaluation.md
- Decomposition:
- Pandas:
- Classical Decomposition: sdk/code-reference/pipelines/decomposition/pandas/classical_decomposition.md
- STL Decomposition: sdk/code-reference/pipelines/decomposition/pandas/stl_decomposition.md
- MSTL Decomposition: sdk/code-reference/pipelines/decomposition/pandas/mstl_decomposition.md
- Spark:
- Classical Decomposition: sdk/code-reference/pipelines/decomposition/spark/classical_decomposition.md
- STL Decomposition: sdk/code-reference/pipelines/decomposition/spark/stl_decomposition.md
- MSTL Decomposition: sdk/code-reference/pipelines/decomposition/spark/mstl_decomposition.md
- Anomaly Detection:
- Spark:
- IQR:
- IQR Anomaly Detection: sdk/code-reference/pipelines/anomaly_detection/spark/iqr/iqr_anomaly_detection.md
- Decomposition IQR Anomaly Detection: sdk/code-reference/pipelines/anomaly_detection/spark/iqr/decomposition_iqr_anomaly_detection.md
- MAD:
- MAD Anomaly Detection: sdk/code-reference/pipelines/anomaly_detection/spark/mad/mad_anomaly_detection.md
- Visualization:
- Matplotlib:
- Anomaly Detection: sdk/code-reference/pipelines/visualization/matplotlib/anomaly_detection.md
- Model Comparison: sdk/code-reference/pipelines/visualization/matplotlib/comparison.md
- Decomposition: sdk/code-reference/pipelines/visualization/matplotlib/decomposition.md
- Forecasting: sdk/code-reference/pipelines/visualization/matplotlib/forecasting.md
- Plotly:
- Anomaly Detection: sdk/code-reference/pipelines/visualization/plotly/anomaly_detection.md
- Model Comparison: sdk/code-reference/pipelines/visualization/plotly/comparison.md
- Decomposition: sdk/code-reference/pipelines/visualization/plotly/decomposition.md
- Forecasting: sdk/code-reference/pipelines/visualization/plotly/forecasting.md

- Jobs: sdk/pipelines/jobs.md
- Deploy:
Expand Down
13 changes: 13 additions & 0 deletions src/sdk/python/rtdip_sdk/pipelines/anomaly_detection/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2025 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
29 changes: 29 additions & 0 deletions src/sdk/python/rtdip_sdk/pipelines/anomaly_detection/interfaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright 2025 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import abstractmethod

from great_expectations.compatibility.pyspark import DataFrame

from ..interfaces import PipelineComponentBaseInterface


class AnomalyDetectionInterface(PipelineComponentBaseInterface):

@abstractmethod
def __init__(self):
pass

@abstractmethod
def detect(self, df: DataFrame) -> DataFrame:
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2025 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .iqr_anomaly_detection import IQRAnomalyDetectionComponent
from .decomposition_iqr_anomaly_detection import (
DecompositionIQRAnomalyDetectionComponent,
)

__all__ = [
"IQRAnomalyDetectionComponent",
"DecompositionIQRAnomalyDetectionComponent",
]
Loading