bikes.core.metrics

Evaluate model performances with metrics.

  1"""Evaluate model performances with metrics."""
  2
  3# %% IMPORTS
  4
  5from __future__ import annotations
  6
  7import abc
  8import typing as T
  9
 10import mlflow
 11import pandas as pd
 12import pydantic as pdt
 13from mlflow.metrics import MetricValue
 14from sklearn import metrics as sklearn_metrics
 15
 16from bikes.core import models, schemas
 17
 18# %% TYPINGS
 19
 20MlflowMetric: T.TypeAlias = MetricValue
 21MlflowThreshold: T.TypeAlias = mlflow.models.MetricThreshold
 22MlflowModelValidationFailedException: T.TypeAlias = (
 23    mlflow.models.evaluation.validation.ModelValidationFailedException
 24)
 25
 26# %% METRICS
 27
 28
 29class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
 30    """Base class for a project metric.
 31
 32    Use metrics to evaluate model performance.
 33    e.g., accuracy, precision, recall, MAE, F1, ...
 34
 35    Parameters:
 36        name (str): name of the metric for the reporting.
 37        greater_is_better (bool): maximize or minimize result.
 38    """
 39
 40    KIND: str
 41
 42    name: str
 43    greater_is_better: bool
 44
 45    @abc.abstractmethod
 46    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
 47        """Score the outputs against the targets.
 48
 49        Args:
 50            targets (schemas.Targets): expected values.
 51            outputs (schemas.Outputs): predicted values.
 52
 53        Returns:
 54            float: single result from the metric computation.
 55        """
 56
 57    def scorer(
 58        self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets
 59    ) -> float:
 60        """Score model outputs against targets.
 61
 62        Args:
 63            model (models.Model): model to evaluate.
 64            inputs (schemas.Inputs): model inputs values.
 65            targets (schemas.Targets): model expected values.
 66
 67        Returns:
 68            float: single result from the metric computation.
 69        """
 70        outputs = model.predict(inputs=inputs)
 71        score = self.score(targets=targets, outputs=outputs)
 72        return score
 73
 74    def to_mlflow(self) -> MlflowMetric:
 75        """Convert the metric to an Mlflow metric.
 76
 77        Returns:
 78            MlflowMetric: the Mlflow metric.
 79        """
 80
 81        def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric:
 82            """Evaluation function associated with the mlflow metric.
 83
 84            Args:
 85                predictions (pd.Series): model predictions.
 86                targets (pd.Series | None): model targets.
 87
 88            Returns:
 89                MlflowMetric: the mlflow metric.
 90            """
 91            score_targets = schemas.Targets(
 92                {schemas.TargetsSchema.cnt: targets}, index=targets.index
 93            )
 94            score_outputs = schemas.Outputs(
 95                {schemas.OutputsSchema.prediction: predictions}, index=predictions.index
 96            )
 97            sign = 1 if self.greater_is_better else -1  # reverse the effect
 98            score = self.score(targets=score_targets, outputs=score_outputs)
 99            return MlflowMetric(aggregate_results={self.name: score * sign})
100
101        return mlflow.metrics.make_metric(
102            eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better
103        )
104
105
106class SklearnMetric(Metric):
107    """Compute metrics with sklearn.
108
109    Parameters:
110        name (str): name of the sklearn metric.
111        greater_is_better (bool): maximize or minimize.
112    """
113
114    KIND: T.Literal["SklearnMetric"] = "SklearnMetric"
115
116    name: str = "mean_squared_error"
117    greater_is_better: bool = False
118
119    @T.override
120    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
121        metric = getattr(sklearn_metrics, self.name)
122        sign = 1 if self.greater_is_better else -1
123        y_true = targets[schemas.TargetsSchema.cnt]
124        y_pred = outputs[schemas.OutputsSchema.prediction]
125        score = metric(y_pred=y_pred, y_true=y_true) * sign
126        return float(score)
127
128
129MetricKind = SklearnMetric
130MetricsKind: T.TypeAlias = list[T.Annotated[MetricKind, pdt.Field(discriminator="KIND")]]
131
132# %% THRESHOLDS
133
134
135class Threshold(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
136    """A project threshold for a metric.
137
138    Use thresholds to monitor model performances.
139    e.g., to trigger an alert when a threshold is met.
140
141    Parameters:
142        threshold (int | float): absolute threshold value.
143        greater_is_better (bool): maximize or minimize result.
144    """
145
146    threshold: int | float
147    greater_is_better: bool
148
149    def to_mlflow(self) -> MlflowThreshold:
150        """Convert the threshold to an mlflow threshold.
151
152        Returns:
153            MlflowThreshold: the mlflow threshold.
154        """
155        return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)
MlflowMetric: TypeAlias = mlflow.metrics.base.MetricValue
MlflowThreshold: TypeAlias = mlflow.models.evaluation.validation.MetricThreshold
MlflowModelValidationFailedException: TypeAlias = mlflow.models.evaluation.validation.ModelValidationFailedException
class Metric(abc.ABC, pydantic.main.BaseModel):
 30class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
 31    """Base class for a project metric.
 32
 33    Use metrics to evaluate model performance.
 34    e.g., accuracy, precision, recall, MAE, F1, ...
 35
 36    Parameters:
 37        name (str): name of the metric for the reporting.
 38        greater_is_better (bool): maximize or minimize result.
 39    """
 40
 41    KIND: str
 42
 43    name: str
 44    greater_is_better: bool
 45
 46    @abc.abstractmethod
 47    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
 48        """Score the outputs against the targets.
 49
 50        Args:
 51            targets (schemas.Targets): expected values.
 52            outputs (schemas.Outputs): predicted values.
 53
 54        Returns:
 55            float: single result from the metric computation.
 56        """
 57
 58    def scorer(
 59        self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets
 60    ) -> float:
 61        """Score model outputs against targets.
 62
 63        Args:
 64            model (models.Model): model to evaluate.
 65            inputs (schemas.Inputs): model inputs values.
 66            targets (schemas.Targets): model expected values.
 67
 68        Returns:
 69            float: single result from the metric computation.
 70        """
 71        outputs = model.predict(inputs=inputs)
 72        score = self.score(targets=targets, outputs=outputs)
 73        return score
 74
 75    def to_mlflow(self) -> MlflowMetric:
 76        """Convert the metric to an Mlflow metric.
 77
 78        Returns:
 79            MlflowMetric: the Mlflow metric.
 80        """
 81
 82        def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric:
 83            """Evaluation function associated with the mlflow metric.
 84
 85            Args:
 86                predictions (pd.Series): model predictions.
 87                targets (pd.Series | None): model targets.
 88
 89            Returns:
 90                MlflowMetric: the mlflow metric.
 91            """
 92            score_targets = schemas.Targets(
 93                {schemas.TargetsSchema.cnt: targets}, index=targets.index
 94            )
 95            score_outputs = schemas.Outputs(
 96                {schemas.OutputsSchema.prediction: predictions}, index=predictions.index
 97            )
 98            sign = 1 if self.greater_is_better else -1  # reverse the effect
 99            score = self.score(targets=score_targets, outputs=score_outputs)
100            return MlflowMetric(aggregate_results={self.name: score * sign})
101
102        return mlflow.metrics.make_metric(
103            eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better
104        )

Base class for a project metric.

Use metrics to evaluate model performance. e.g., accuracy, precision, recall, MAE, F1, ...

Arguments:
  • name (str): name of the metric for the reporting.
  • greater_is_better (bool): maximize or minimize result.
KIND: str
name: str
greater_is_better: bool
@abc.abstractmethod
def score( self, targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], outputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.OutputsSchema]) -> float:
46    @abc.abstractmethod
47    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
48        """Score the outputs against the targets.
49
50        Args:
51            targets (schemas.Targets): expected values.
52            outputs (schemas.Outputs): predicted values.
53
54        Returns:
55            float: single result from the metric computation.
56        """

Score the outputs against the targets.

Arguments:
  • targets (schemas.Targets): expected values.
  • outputs (schemas.Outputs): predicted values.
Returns:

float: single result from the metric computation.

def scorer( self, model: bikes.core.models.Model, inputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.InputsSchema], targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema]) -> float:
58    def scorer(
59        self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets
60    ) -> float:
61        """Score model outputs against targets.
62
63        Args:
64            model (models.Model): model to evaluate.
65            inputs (schemas.Inputs): model inputs values.
66            targets (schemas.Targets): model expected values.
67
68        Returns:
69            float: single result from the metric computation.
70        """
71        outputs = model.predict(inputs=inputs)
72        score = self.score(targets=targets, outputs=outputs)
73        return score

Score model outputs against targets.

Arguments:
  • model (models.Model): model to evaluate.
  • inputs (schemas.Inputs): model inputs values.
  • targets (schemas.Targets): model expected values.
Returns:

float: single result from the metric computation.

def to_mlflow(self) -> mlflow.metrics.base.MetricValue:
 75    def to_mlflow(self) -> MlflowMetric:
 76        """Convert the metric to an Mlflow metric.
 77
 78        Returns:
 79            MlflowMetric: the Mlflow metric.
 80        """
 81
 82        def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric:
 83            """Evaluation function associated with the mlflow metric.
 84
 85            Args:
 86                predictions (pd.Series): model predictions.
 87                targets (pd.Series | None): model targets.
 88
 89            Returns:
 90                MlflowMetric: the mlflow metric.
 91            """
 92            score_targets = schemas.Targets(
 93                {schemas.TargetsSchema.cnt: targets}, index=targets.index
 94            )
 95            score_outputs = schemas.Outputs(
 96                {schemas.OutputsSchema.prediction: predictions}, index=predictions.index
 97            )
 98            sign = 1 if self.greater_is_better else -1  # reverse the effect
 99            score = self.score(targets=score_targets, outputs=score_outputs)
100            return MlflowMetric(aggregate_results={self.name: score * sign})
101
102        return mlflow.metrics.make_metric(
103            eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better
104        )

Convert the metric to an Mlflow metric.

Returns:

MlflowMetric: the Mlflow metric.

model_config: ClassVar[pydantic.config.ConfigDict] = {'strict': True, 'frozen': True, 'extra': 'forbid'}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class SklearnMetric(Metric):
107class SklearnMetric(Metric):
108    """Compute metrics with sklearn.
109
110    Parameters:
111        name (str): name of the sklearn metric.
112        greater_is_better (bool): maximize or minimize.
113    """
114
115    KIND: T.Literal["SklearnMetric"] = "SklearnMetric"
116
117    name: str = "mean_squared_error"
118    greater_is_better: bool = False
119
120    @T.override
121    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
122        metric = getattr(sklearn_metrics, self.name)
123        sign = 1 if self.greater_is_better else -1
124        y_true = targets[schemas.TargetsSchema.cnt]
125        y_pred = outputs[schemas.OutputsSchema.prediction]
126        score = metric(y_pred=y_pred, y_true=y_true) * sign
127        return float(score)

Compute metrics with sklearn.

Arguments:
  • name (str): name of the sklearn metric.
  • greater_is_better (bool): maximize or minimize.
KIND: Literal['SklearnMetric']
name: str
greater_is_better: bool
@T.override
def score( self, targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], outputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.OutputsSchema]) -> float:
120    @T.override
121    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
122        metric = getattr(sklearn_metrics, self.name)
123        sign = 1 if self.greater_is_better else -1
124        y_true = targets[schemas.TargetsSchema.cnt]
125        y_pred = outputs[schemas.OutputsSchema.prediction]
126        score = metric(y_pred=y_pred, y_true=y_true) * sign
127        return float(score)

Score the outputs against the targets.

Arguments:
  • targets (schemas.Targets): expected values.
  • outputs (schemas.Outputs): predicted values.
Returns:

float: single result from the metric computation.

model_config: ClassVar[pydantic.config.ConfigDict] = {'strict': True, 'frozen': True, 'extra': 'forbid'}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

Inherited Members
Metric
scorer
to_mlflow
MetricKind = <class 'SklearnMetric'>
MetricsKind: TypeAlias = list[typing.Annotated[SklearnMetric, FieldInfo(annotation=NoneType, required=True, discriminator='KIND')]]
class Threshold(abc.ABC, pydantic.main.BaseModel):
136class Threshold(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
137    """A project threshold for a metric.
138
139    Use thresholds to monitor model performances.
140    e.g., to trigger an alert when a threshold is met.
141
142    Parameters:
143        threshold (int | float): absolute threshold value.
144        greater_is_better (bool): maximize or minimize result.
145    """
146
147    threshold: int | float
148    greater_is_better: bool
149
150    def to_mlflow(self) -> MlflowThreshold:
151        """Convert the threshold to an mlflow threshold.
152
153        Returns:
154            MlflowThreshold: the mlflow threshold.
155        """
156        return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)

A project threshold for a metric.

Use thresholds to monitor model performances. e.g., to trigger an alert when a threshold is met.

Arguments:
  • threshold (int | float): absolute threshold value.
  • greater_is_better (bool): maximize or minimize result.
threshold: int | float
greater_is_better: bool
def to_mlflow(self) -> mlflow.models.evaluation.validation.MetricThreshold:
150    def to_mlflow(self) -> MlflowThreshold:
151        """Convert the threshold to an mlflow threshold.
152
153        Returns:
154            MlflowThreshold: the mlflow threshold.
155        """
156        return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)

Convert the threshold to an mlflow threshold.

Returns:

MlflowThreshold: the mlflow threshold.

model_config: ClassVar[pydantic.config.ConfigDict] = {'strict': True, 'frozen': True, 'extra': 'forbid'}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].