bikes.core.metrics

Evaluate model performances with metrics.

View Source

  1"""Evaluate model performances with metrics."""
  2
  3# %% IMPORTS
  4
  5from __future__ import annotations
  6
  7import abc
  8import typing as T
  9
 10import mlflow
 11import pandas as pd
 12import pydantic as pdt
 13from sklearn import metrics
 14
 15from bikes.core import models, schemas
 16
 17# %% TYPINGS
 18
 19MlflowMetric: T.TypeAlias = mlflow.metrics.MetricValue
 20MlflowThreshold: T.TypeAlias = mlflow.models.MetricThreshold
 21MlflowModelValidationFailedException: T.TypeAlias = (
 22    mlflow.models.evaluation.validation.ModelValidationFailedException
 23)
 24
 25# %% METRICS
 26
 27
 28class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
 29    """Base class for a project metric.
 30
 31    Use metrics to evaluate model performance.
 32    e.g., accuracy, precision, recall, MAE, F1, ...
 33
 34    Parameters:
 35        name (str): name of the metric for the reporting.
 36        greater_is_better (bool): maximize or minimize result.
 37    """
 38
 39    KIND: str
 40
 41    name: str
 42    greater_is_better: bool
 43
 44    @abc.abstractmethod
 45    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
 46        """Score the outputs against the targets.
 47
 48        Args:
 49            targets (schemas.Targets): expected values.
 50            outputs (schemas.Outputs): predicted values.
 51
 52        Returns:
 53            float: single result from the metric computation.
 54        """
 55
 56    def scorer(
 57        self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets
 58    ) -> float:
 59        """Score model outputs against targets.
 60
 61        Args:
 62            model (models.Model): model to evaluate.
 63            inputs (schemas.Inputs): model inputs values.
 64            targets (schemas.Targets): model expected values.
 65
 66        Returns:
 67            float: single result from the metric computation.
 68        """
 69        outputs = model.predict(inputs=inputs)
 70        score = self.score(targets=targets, outputs=outputs)
 71        return score
 72
 73    def to_mlflow(self) -> MlflowMetric:
 74        """Convert the metric to an Mlflow metric.
 75
 76        Returns:
 77            MlflowMetric: the Mlflow metric.
 78        """
 79
 80        def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric:
 81            """Evaluation function associated with the mlflow metric.
 82
 83            Args:
 84                predictions (pd.Series): model predictions.
 85                targets (pd.Series | None): model targets.
 86
 87            Returns:
 88                MlflowMetric: the mlflow metric.
 89            """
 90            score_targets = schemas.Targets(
 91                {schemas.TargetsSchema.cnt: targets}, index=targets.index
 92            )
 93            score_outputs = schemas.Outputs(
 94                {schemas.OutputsSchema.prediction: predictions}, index=predictions.index
 95            )
 96            sign = 1 if self.greater_is_better else -1  # reverse the effect
 97            score = self.score(targets=score_targets, outputs=score_outputs)
 98            return MlflowMetric(aggregate_results={self.name: score * sign})
 99
100        return mlflow.metrics.make_metric(
101            eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better
102        )
103
104
105class SklearnMetric(Metric):
106    """Compute metrics with sklearn.
107
108    Parameters:
109        name (str): name of the sklearn metric.
110        greater_is_better (bool): maximize or minimize.
111    """
112
113    KIND: T.Literal["SklearnMetric"] = "SklearnMetric"
114
115    name: str = "mean_squared_error"
116    greater_is_better: bool = False
117
118    @T.override
119    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
120        metric = getattr(metrics, self.name)
121        sign = 1 if self.greater_is_better else -1
122        y_true = targets[schemas.TargetsSchema.cnt]
123        y_pred = outputs[schemas.OutputsSchema.prediction]
124        score = metric(y_pred=y_pred, y_true=y_true) * sign
125        return float(score)
126
127
128MetricKind = SklearnMetric
129
130# %% THRESHOLDS
131
132
133class Threshold(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
134    """A project threshold for a metric.
135
136    Use thresholds to monitor model performances.
137    e.g., to trigger an alert when a threshold is met.
138
139    Parameters:
140        threshold (int | float): absolute threshold value.
141        greater_is_better (bool): maximize or minimize result.
142    """
143
144    threshold: int | float
145    greater_is_better: bool
146
147    def to_mlflow(self) -> MlflowThreshold:
148        """Convert the threshold to an mlflow threshold.
149
150        Returns:
151            MlflowThreshold: the mlflow threshold.
152        """
153        return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)

MlflowMetric: TypeAlias = mlflow.metrics.base.MetricValue

MlflowThreshold: TypeAlias = mlflow.models.evaluation.validation.MetricThreshold

MlflowModelValidationFailedException: TypeAlias = mlflow.models.evaluation.validation.ModelValidationFailedException

class Metric(abc.ABC, pydantic.main.BaseModel): View Source

 29class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
 30    """Base class for a project metric.
 31
 32    Use metrics to evaluate model performance.
 33    e.g., accuracy, precision, recall, MAE, F1, ...
 34
 35    Parameters:
 36        name (str): name of the metric for the reporting.
 37        greater_is_better (bool): maximize or minimize result.
 38    """
 39
 40    KIND: str
 41
 42    name: str
 43    greater_is_better: bool
 44
 45    @abc.abstractmethod
 46    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
 47        """Score the outputs against the targets.
 48
 49        Args:
 50            targets (schemas.Targets): expected values.
 51            outputs (schemas.Outputs): predicted values.
 52
 53        Returns:
 54            float: single result from the metric computation.
 55        """
 56
 57    def scorer(
 58        self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets
 59    ) -> float:
 60        """Score model outputs against targets.
 61
 62        Args:
 63            model (models.Model): model to evaluate.
 64            inputs (schemas.Inputs): model inputs values.
 65            targets (schemas.Targets): model expected values.
 66
 67        Returns:
 68            float: single result from the metric computation.
 69        """
 70        outputs = model.predict(inputs=inputs)
 71        score = self.score(targets=targets, outputs=outputs)
 72        return score
 73
 74    def to_mlflow(self) -> MlflowMetric:
 75        """Convert the metric to an Mlflow metric.
 76
 77        Returns:
 78            MlflowMetric: the Mlflow metric.
 79        """
 80
 81        def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric:
 82            """Evaluation function associated with the mlflow metric.
 83
 84            Args:
 85                predictions (pd.Series): model predictions.
 86                targets (pd.Series | None): model targets.
 87
 88            Returns:
 89                MlflowMetric: the mlflow metric.
 90            """
 91            score_targets = schemas.Targets(
 92                {schemas.TargetsSchema.cnt: targets}, index=targets.index
 93            )
 94            score_outputs = schemas.Outputs(
 95                {schemas.OutputsSchema.prediction: predictions}, index=predictions.index
 96            )
 97            sign = 1 if self.greater_is_better else -1  # reverse the effect
 98            score = self.score(targets=score_targets, outputs=score_outputs)
 99            return MlflowMetric(aggregate_results={self.name: score * sign})
100
101        return mlflow.metrics.make_metric(
102            eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better
103        )

Base class for a project metric.

Use metrics to evaluate model performance. e.g., accuracy, precision, recall, MAE, F1, ...

Arguments:

name (str): name of the metric for the reporting.
greater_is_better (bool): maximize or minimize result.

KIND: str

name: str

greater_is_better: bool

@abc.abstractmethod

def score( self, targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], outputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.OutputsSchema]) -> float: View Source

45    @abc.abstractmethod
46    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
47        """Score the outputs against the targets.
48
49        Args:
50            targets (schemas.Targets): expected values.
51            outputs (schemas.Outputs): predicted values.
52
53        Returns:
54            float: single result from the metric computation.
55        """

Score the outputs against the targets.

Arguments:

targets (schemas.Targets): expected values.
outputs (schemas.Outputs): predicted values.

Returns:

float: single result from the metric computation.

def scorer( self, model: bikes.core.models.Model, inputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.InputsSchema], targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema]) -> float: View Source

57    def scorer(
58        self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets
59    ) -> float:
60        """Score model outputs against targets.
61
62        Args:
63            model (models.Model): model to evaluate.
64            inputs (schemas.Inputs): model inputs values.
65            targets (schemas.Targets): model expected values.
66
67        Returns:
68            float: single result from the metric computation.
69        """
70        outputs = model.predict(inputs=inputs)
71        score = self.score(targets=targets, outputs=outputs)
72        return score

Score model outputs against targets.

Arguments:

model (models.Model): model to evaluate.
inputs (schemas.Inputs): model inputs values.
targets (schemas.Targets): model expected values.

Returns:

float: single result from the metric computation.

def to_mlflow(self) -> mlflow.metrics.base.MetricValue: View Source

 74    def to_mlflow(self) -> MlflowMetric:
 75        """Convert the metric to an Mlflow metric.
 76
 77        Returns:
 78            MlflowMetric: the Mlflow metric.
 79        """
 80
 81        def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric:
 82            """Evaluation function associated with the mlflow metric.
 83
 84            Args:
 85                predictions (pd.Series): model predictions.
 86                targets (pd.Series | None): model targets.
 87
 88            Returns:
 89                MlflowMetric: the mlflow metric.
 90            """
 91            score_targets = schemas.Targets(
 92                {schemas.TargetsSchema.cnt: targets}, index=targets.index
 93            )
 94            score_outputs = schemas.Outputs(
 95                {schemas.OutputsSchema.prediction: predictions}, index=predictions.index
 96            )
 97            sign = 1 if self.greater_is_better else -1  # reverse the effect
 98            score = self.score(targets=score_targets, outputs=score_outputs)
 99            return MlflowMetric(aggregate_results={self.name: score * sign})
100
101        return mlflow.metrics.make_metric(
102            eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better
103        )

Convert the metric to an Mlflow metric.

Returns:

MlflowMetric: the Mlflow metric.

model_config = {'strict': True, 'frozen': True, 'extra': 'forbid'}

model_fields = {'KIND': FieldInfo(annotation=str, required=True), 'name': FieldInfo(annotation=str, required=True), 'greater_is_better': FieldInfo(annotation=bool, required=True)}

model_computed_fields = {}

Inherited Members

pydantic.main.BaseModel: BaseModel; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_post_init; model_rebuild; model_validate; model_validate_json; model_validate_strings; dict; json; parse_obj; parse_raw; parse_file; from_orm; construct; copy; schema; schema_json; validate; update_forward_refs

class SklearnMetric(Metric): View Source

106class SklearnMetric(Metric):
107    """Compute metrics with sklearn.
108
109    Parameters:
110        name (str): name of the sklearn metric.
111        greater_is_better (bool): maximize or minimize.
112    """
113
114    KIND: T.Literal["SklearnMetric"] = "SklearnMetric"
115
116    name: str = "mean_squared_error"
117    greater_is_better: bool = False
118
119    @T.override
120    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
121        metric = getattr(metrics, self.name)
122        sign = 1 if self.greater_is_better else -1
123        y_true = targets[schemas.TargetsSchema.cnt]
124        y_pred = outputs[schemas.OutputsSchema.prediction]
125        score = metric(y_pred=y_pred, y_true=y_true) * sign
126        return float(score)

Compute metrics with sklearn.

Arguments:

name (str): name of the sklearn metric.
greater_is_better (bool): maximize or minimize.

KIND: Literal['SklearnMetric']

name: str

greater_is_better: bool

@T.override

def score( self, targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], outputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.OutputsSchema]) -> float: View Source

119    @T.override
120    def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
121        metric = getattr(metrics, self.name)
122        sign = 1 if self.greater_is_better else -1
123        y_true = targets[schemas.TargetsSchema.cnt]
124        y_pred = outputs[schemas.OutputsSchema.prediction]
125        score = metric(y_pred=y_pred, y_true=y_true) * sign
126        return float(score)

Score the outputs against the targets.

Arguments:

targets (schemas.Targets): expected values.
outputs (schemas.Outputs): predicted values.

Returns:

float: single result from the metric computation.

model_config = {'strict': True, 'frozen': True, 'extra': 'forbid'}

model_fields = {'KIND': FieldInfo(annotation=Literal['SklearnMetric'], required=False, default='SklearnMetric'), 'name': FieldInfo(annotation=str, required=False, default='mean_squared_error'), 'greater_is_better': FieldInfo(annotation=bool, required=False, default=False)}

model_computed_fields = {}

Inherited Members

pydantic.main.BaseModel: BaseModel; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_post_init; model_rebuild; model_validate; model_validate_json; model_validate_strings; dict; json; parse_obj; parse_raw; parse_file; from_orm; construct; copy; schema; schema_json; validate; update_forward_refs
Metric: scorer; to_mlflow

MetricKind = <class 'SklearnMetric'>

class Threshold(abc.ABC, pydantic.main.BaseModel): View Source

134class Threshold(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
135    """A project threshold for a metric.
136
137    Use thresholds to monitor model performances.
138    e.g., to trigger an alert when a threshold is met.
139
140    Parameters:
141        threshold (int | float): absolute threshold value.
142        greater_is_better (bool): maximize or minimize result.
143    """
144
145    threshold: int | float
146    greater_is_better: bool
147
148    def to_mlflow(self) -> MlflowThreshold:
149        """Convert the threshold to an mlflow threshold.
150
151        Returns:
152            MlflowThreshold: the mlflow threshold.
153        """
154        return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)

A project threshold for a metric.

Use thresholds to monitor model performances. e.g., to trigger an alert when a threshold is met.

Arguments:

threshold (int | float): absolute threshold value.
greater_is_better (bool): maximize or minimize result.

threshold: int | float

greater_is_better: bool

def to_mlflow(self) -> mlflow.models.evaluation.validation.MetricThreshold: View Source

148    def to_mlflow(self) -> MlflowThreshold:
149        """Convert the threshold to an mlflow threshold.
150
151        Returns:
152            MlflowThreshold: the mlflow threshold.
153        """
154        return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)

Convert the threshold to an mlflow threshold.

Returns:

MlflowThreshold: the mlflow threshold.

model_config = {'strict': True, 'frozen': True, 'extra': 'forbid'}

model_fields = {'threshold': FieldInfo(annotation=Union[int, float], required=True), 'greater_is_better': FieldInfo(annotation=bool, required=True)}

model_computed_fields = {}

Inherited Members

pydantic.main.BaseModel: BaseModel; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_post_init; model_rebuild; model_validate; model_validate_json; model_validate_strings; dict; json; parse_obj; parse_raw; parse_file; from_orm; construct; copy; schema; schema_json; validate; update_forward_refs