bikes.core.metrics
Evaluate model performances with metrics.
1"""Evaluate model performances with metrics.""" 2 3# %% IMPORTS 4 5from __future__ import annotations 6 7import abc 8import typing as T 9 10import mlflow 11import pandas as pd 12import pydantic as pdt 13from mlflow.metrics import MetricValue 14from sklearn import metrics as sklearn_metrics 15 16from bikes.core import models, schemas 17 18# %% TYPINGS 19 20MlflowMetric: T.TypeAlias = MetricValue 21MlflowThreshold: T.TypeAlias = mlflow.models.MetricThreshold 22MlflowModelValidationFailedException: T.TypeAlias = ( 23 mlflow.models.evaluation.validation.ModelValidationFailedException 24) 25 26# %% METRICS 27 28 29class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 30 """Base class for a project metric. 31 32 Use metrics to evaluate model performance. 33 e.g., accuracy, precision, recall, MAE, F1, ... 34 35 Parameters: 36 name (str): name of the metric for the reporting. 37 greater_is_better (bool): maximize or minimize result. 38 """ 39 40 KIND: str 41 42 name: str 43 greater_is_better: bool 44 45 @abc.abstractmethod 46 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 47 """Score the outputs against the targets. 48 49 Args: 50 targets (schemas.Targets): expected values. 51 outputs (schemas.Outputs): predicted values. 52 53 Returns: 54 float: single result from the metric computation. 55 """ 56 57 def scorer( 58 self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets 59 ) -> float: 60 """Score model outputs against targets. 61 62 Args: 63 model (models.Model): model to evaluate. 64 inputs (schemas.Inputs): model inputs values. 65 targets (schemas.Targets): model expected values. 66 67 Returns: 68 float: single result from the metric computation. 69 """ 70 outputs = model.predict(inputs=inputs) 71 score = self.score(targets=targets, outputs=outputs) 72 return score 73 74 def to_mlflow(self) -> MlflowMetric: 75 """Convert the metric to an Mlflow metric. 76 77 Returns: 78 MlflowMetric: the Mlflow metric. 79 """ 80 81 def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric: 82 """Evaluation function associated with the mlflow metric. 83 84 Args: 85 predictions (pd.Series): model predictions. 86 targets (pd.Series | None): model targets. 87 88 Returns: 89 MlflowMetric: the mlflow metric. 90 """ 91 score_targets = schemas.Targets( 92 {schemas.TargetsSchema.cnt: targets}, index=targets.index 93 ) 94 score_outputs = schemas.Outputs( 95 {schemas.OutputsSchema.prediction: predictions}, index=predictions.index 96 ) 97 sign = 1 if self.greater_is_better else -1 # reverse the effect 98 score = self.score(targets=score_targets, outputs=score_outputs) 99 return MlflowMetric(aggregate_results={self.name: score * sign}) 100 101 return mlflow.metrics.make_metric( 102 eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better 103 ) 104 105 106class SklearnMetric(Metric): 107 """Compute metrics with sklearn. 108 109 Parameters: 110 name (str): name of the sklearn metric. 111 greater_is_better (bool): maximize or minimize. 112 """ 113 114 KIND: T.Literal["SklearnMetric"] = "SklearnMetric" 115 116 name: str = "mean_squared_error" 117 greater_is_better: bool = False 118 119 @T.override 120 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 121 metric = getattr(sklearn_metrics, self.name) 122 sign = 1 if self.greater_is_better else -1 123 y_true = targets[schemas.TargetsSchema.cnt] 124 y_pred = outputs[schemas.OutputsSchema.prediction] 125 score = metric(y_pred=y_pred, y_true=y_true) * sign 126 return float(score) 127 128 129MetricKind = SklearnMetric 130MetricsKind: T.TypeAlias = list[T.Annotated[MetricKind, pdt.Field(discriminator="KIND")]] 131 132# %% THRESHOLDS 133 134 135class Threshold(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 136 """A project threshold for a metric. 137 138 Use thresholds to monitor model performances. 139 e.g., to trigger an alert when a threshold is met. 140 141 Parameters: 142 threshold (int | float): absolute threshold value. 143 greater_is_better (bool): maximize or minimize result. 144 """ 145 146 threshold: int | float 147 greater_is_better: bool 148 149 def to_mlflow(self) -> MlflowThreshold: 150 """Convert the threshold to an mlflow threshold. 151 152 Returns: 153 MlflowThreshold: the mlflow threshold. 154 """ 155 return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)
MlflowMetric: TypeAlias =
mlflow.metrics.base.MetricValue
MlflowThreshold: TypeAlias =
mlflow.models.evaluation.validation.MetricThreshold
MlflowModelValidationFailedException: TypeAlias =
mlflow.models.evaluation.validation.ModelValidationFailedException
class
Metric(abc.ABC, pydantic.main.BaseModel):
30class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 31 """Base class for a project metric. 32 33 Use metrics to evaluate model performance. 34 e.g., accuracy, precision, recall, MAE, F1, ... 35 36 Parameters: 37 name (str): name of the metric for the reporting. 38 greater_is_better (bool): maximize or minimize result. 39 """ 40 41 KIND: str 42 43 name: str 44 greater_is_better: bool 45 46 @abc.abstractmethod 47 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 48 """Score the outputs against the targets. 49 50 Args: 51 targets (schemas.Targets): expected values. 52 outputs (schemas.Outputs): predicted values. 53 54 Returns: 55 float: single result from the metric computation. 56 """ 57 58 def scorer( 59 self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets 60 ) -> float: 61 """Score model outputs against targets. 62 63 Args: 64 model (models.Model): model to evaluate. 65 inputs (schemas.Inputs): model inputs values. 66 targets (schemas.Targets): model expected values. 67 68 Returns: 69 float: single result from the metric computation. 70 """ 71 outputs = model.predict(inputs=inputs) 72 score = self.score(targets=targets, outputs=outputs) 73 return score 74 75 def to_mlflow(self) -> MlflowMetric: 76 """Convert the metric to an Mlflow metric. 77 78 Returns: 79 MlflowMetric: the Mlflow metric. 80 """ 81 82 def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric: 83 """Evaluation function associated with the mlflow metric. 84 85 Args: 86 predictions (pd.Series): model predictions. 87 targets (pd.Series | None): model targets. 88 89 Returns: 90 MlflowMetric: the mlflow metric. 91 """ 92 score_targets = schemas.Targets( 93 {schemas.TargetsSchema.cnt: targets}, index=targets.index 94 ) 95 score_outputs = schemas.Outputs( 96 {schemas.OutputsSchema.prediction: predictions}, index=predictions.index 97 ) 98 sign = 1 if self.greater_is_better else -1 # reverse the effect 99 score = self.score(targets=score_targets, outputs=score_outputs) 100 return MlflowMetric(aggregate_results={self.name: score * sign}) 101 102 return mlflow.metrics.make_metric( 103 eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better 104 )
Base class for a project metric.
Use metrics to evaluate model performance. e.g., accuracy, precision, recall, MAE, F1, ...
Arguments:
- name (str): name of the metric for the reporting.
- greater_is_better (bool): maximize or minimize result.
@abc.abstractmethod
def
score( self, targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], outputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.OutputsSchema]) -> float:
46 @abc.abstractmethod 47 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 48 """Score the outputs against the targets. 49 50 Args: 51 targets (schemas.Targets): expected values. 52 outputs (schemas.Outputs): predicted values. 53 54 Returns: 55 float: single result from the metric computation. 56 """
Score the outputs against the targets.
Arguments:
- targets (schemas.Targets): expected values.
- outputs (schemas.Outputs): predicted values.
Returns:
float: single result from the metric computation.
def
scorer( self, model: bikes.core.models.Model, inputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.InputsSchema], targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema]) -> float:
58 def scorer( 59 self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets 60 ) -> float: 61 """Score model outputs against targets. 62 63 Args: 64 model (models.Model): model to evaluate. 65 inputs (schemas.Inputs): model inputs values. 66 targets (schemas.Targets): model expected values. 67 68 Returns: 69 float: single result from the metric computation. 70 """ 71 outputs = model.predict(inputs=inputs) 72 score = self.score(targets=targets, outputs=outputs) 73 return score
Score model outputs against targets.
Arguments:
- model (models.Model): model to evaluate.
- inputs (schemas.Inputs): model inputs values.
- targets (schemas.Targets): model expected values.
Returns:
float: single result from the metric computation.
def
to_mlflow(self) -> mlflow.metrics.base.MetricValue:
75 def to_mlflow(self) -> MlflowMetric: 76 """Convert the metric to an Mlflow metric. 77 78 Returns: 79 MlflowMetric: the Mlflow metric. 80 """ 81 82 def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric: 83 """Evaluation function associated with the mlflow metric. 84 85 Args: 86 predictions (pd.Series): model predictions. 87 targets (pd.Series | None): model targets. 88 89 Returns: 90 MlflowMetric: the mlflow metric. 91 """ 92 score_targets = schemas.Targets( 93 {schemas.TargetsSchema.cnt: targets}, index=targets.index 94 ) 95 score_outputs = schemas.Outputs( 96 {schemas.OutputsSchema.prediction: predictions}, index=predictions.index 97 ) 98 sign = 1 if self.greater_is_better else -1 # reverse the effect 99 score = self.score(targets=score_targets, outputs=score_outputs) 100 return MlflowMetric(aggregate_results={self.name: score * sign}) 101 102 return mlflow.metrics.make_metric( 103 eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better 104 )
Convert the metric to an Mlflow metric.
Returns:
MlflowMetric: the Mlflow metric.
107class SklearnMetric(Metric): 108 """Compute metrics with sklearn. 109 110 Parameters: 111 name (str): name of the sklearn metric. 112 greater_is_better (bool): maximize or minimize. 113 """ 114 115 KIND: T.Literal["SklearnMetric"] = "SklearnMetric" 116 117 name: str = "mean_squared_error" 118 greater_is_better: bool = False 119 120 @T.override 121 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 122 metric = getattr(sklearn_metrics, self.name) 123 sign = 1 if self.greater_is_better else -1 124 y_true = targets[schemas.TargetsSchema.cnt] 125 y_pred = outputs[schemas.OutputsSchema.prediction] 126 score = metric(y_pred=y_pred, y_true=y_true) * sign 127 return float(score)
Compute metrics with sklearn.
Arguments:
- name (str): name of the sklearn metric.
- greater_is_better (bool): maximize or minimize.
@T.override
def
score( self, targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], outputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.OutputsSchema]) -> float:
120 @T.override 121 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 122 metric = getattr(sklearn_metrics, self.name) 123 sign = 1 if self.greater_is_better else -1 124 y_true = targets[schemas.TargetsSchema.cnt] 125 y_pred = outputs[schemas.OutputsSchema.prediction] 126 score = metric(y_pred=y_pred, y_true=y_true) * sign 127 return float(score)
Score the outputs against the targets.
Arguments:
- targets (schemas.Targets): expected values.
- outputs (schemas.Outputs): predicted values.
Returns:
float: single result from the metric computation.
MetricKind =
<class 'SklearnMetric'>
MetricsKind: TypeAlias =
list[typing.Annotated[SklearnMetric, FieldInfo(annotation=NoneType, required=True, discriminator='KIND')]]
class
Threshold(abc.ABC, pydantic.main.BaseModel):
136class Threshold(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 137 """A project threshold for a metric. 138 139 Use thresholds to monitor model performances. 140 e.g., to trigger an alert when a threshold is met. 141 142 Parameters: 143 threshold (int | float): absolute threshold value. 144 greater_is_better (bool): maximize or minimize result. 145 """ 146 147 threshold: int | float 148 greater_is_better: bool 149 150 def to_mlflow(self) -> MlflowThreshold: 151 """Convert the threshold to an mlflow threshold. 152 153 Returns: 154 MlflowThreshold: the mlflow threshold. 155 """ 156 return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)
A project threshold for a metric.
Use thresholds to monitor model performances. e.g., to trigger an alert when a threshold is met.
Arguments:
- threshold (int | float): absolute threshold value.
- greater_is_better (bool): maximize or minimize result.
def
to_mlflow(self) -> mlflow.models.evaluation.validation.MetricThreshold:
150 def to_mlflow(self) -> MlflowThreshold: 151 """Convert the threshold to an mlflow threshold. 152 153 Returns: 154 MlflowThreshold: the mlflow threshold. 155 """ 156 return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)
Convert the threshold to an mlflow threshold.
Returns:
MlflowThreshold: the mlflow threshold.