Source code for conmo.preprocesses.binarizer
from typing import Iterable, Union
import pandas as pd
from conmo.conf import Index
from conmo.preprocesses.preprocess import ExtendedPreprocess
[docs]class Binarizer(ExtendedPreprocess):
[docs] def __init__(self, to_data: Union[bool, Iterable[str]], to_labels: Union[bool, Iterable[str]], test_set: bool, threshold: int) -> None:
super().__init__(to_data, to_labels, test_set)
self.threshold = threshold
[docs] def transform(self, df: pd.DataFrame, columns: Iterable[str]) -> pd.DataFrame:
# Binarize columns, setting TRUE if value is less than threshold and FALSE if values is equal or greather than threshold
if self.test_set == True:
index_slice = pd.IndexSlice[:, columns]
else:
# Only TRAIN sequences
index_slice = pd.IndexSlice[(
slice(None), Index.SET_TRAIN), columns]
df.loc[index_slice] = df.loc[index_slice] < self.threshold
return df