Source code for conmo.metrics.metric

from abc import ABC, abstractmethod
from os import path
from typing import Iterable

import pandas as pd

from conmo.conf import File, Index, Label


[docs]class Metric(ABC):
[docs] @abstractmethod def calculate(self, idx: int, algorithms: Iterable[str], last_preprocess_dir: str, algorithms_dir: str, metrics_dir: str) -> None: """ Calculates specific metric for each of the algorithms' results. Parameters ---------- idx: str Index of the metric in the Experiment. Userful in case you want to calculate several metrics. algoritmss: Iterable[str] List of names of the selected algorithms. last_preprocess_dir: Name of the directory where the ground truth is located algorithms_dir: Name of the directory where the results of the algorithms executions are stored. metrics_dir: Name of th edirectory where the results will be stored. """ pass
[docs] def problem_label(self, truth: pd.DataFrame) -> str: """ Determinates the nature of the problem by identifying the column's name of the labels. Parameters ---------- truth: Pandas Dataframe Labels file of the dataset. Returns ------- str Returns the column for the metric. Raises ------ RuntimeError If the labels of the ground truth are invalid for the problem. """ if Label.ANOMALY in truth.columns: return Label.ANOMALY elif Label.RUL in truth.columns: return Label.RUL elif Label.BATTERIES_DEG_TYPES == truth.columns.values.tolist(): # It's a problem related with batteries degradation return Label.BATTERIES_DEG_TYPES else: raise RuntimeError("Invalid labels for ground truth.")
[docs] def labels_per_sequence(self, labels: pd.DataFrame) -> bool: """ Use only with time series datasets. Checks if the labels file of the chosen dataset has an index format with sequences only or sequences and time. *This method in future updates will be changed to a specific class for time series.* Parameters ---------- labels: Pandas Dataframe Labels file of the dataset. Returns ------- bool True if the labels contains 1 level of index with sequence or False if the labels file contains 2 leves with sequence and time. Raises ------ RuntimeError If the number of index levels is invalid. """ # First level is FOLD if labels.index.nlevels == 2 and labels.index.names[1] == Index.SEQUENCE: return True elif labels.index.nlevels == 3 and labels.index.names[1] == Index.SEQUENCE and labels.index.names[2] == Index.TIME: return False else: raise RuntimeError("Invalid number of levels for labels.")
[docs] def show_start_message(self): """ Simple method to print on the terminal the name of the used metric. """ print("\n+++ Metric {} +++".format(self.__class__.__name__))
[docs] def load_truth(self, last_preprocess_dir: str): """ Load labels from the last preprocess directory. Parameters ---------- last_preprocess_dir: str Last diretory where the labels dataframe was stored. Returns ------- Pandas Dataframe Dataframe cantainig the labels. """ return pd.read_parquet(path.join(last_preprocess_dir, File.LABELS))
[docs] def load_results(self, algorithm: str, algorithms_dir: str) -> pd.DataFrame: """ Load results for a specific algorthm. Parameters ---------- algoritm: str Name of the selected algorithm. algorithms_dir: str Name of the directory where the results of the algorithms executions are stored. Returns ------- Pandas Dataframe Dataframe cantainig the results (predictions). """ return pd.read_parquet(path.join(algorithms_dir, "{}.gz".format(algorithm)))
[docs] def save_output(self, metric: pd.DataFrame, idx: int, metrics_dir: str) -> None: """ Save metric's output to disk. Parameters ---------- metric: Pandas Dataframe Dataframe containing the metric's results. idx: int Index of the metric in the Experiment. Userful in case you want to calculate several metrics. metrics_dir: str Name of the directory where the results will be stored. """ name = "{:02}_{}".format(idx, self.__class__.__name__) print(metric) metric.to_parquet(path.join(metrics_dir, "{}.gz".format( name)), compression="gzip", index=True)