bikes.core.metrics
Evaluate model performances with metrics.
1"""Evaluate model performances with metrics.""" 2 3# %% IMPORTS 4 5from __future__ import annotations 6 7import abc 8import typing as T 9 10import mlflow 11import pandas as pd 12import pydantic as pdt 13from sklearn import metrics 14 15from bikes.core import models, schemas 16 17# %% TYPINGS 18 19MlflowMetric: T.TypeAlias = mlflow.metrics.MetricValue 20MlflowThreshold: T.TypeAlias = mlflow.models.MetricThreshold 21MlflowModelValidationFailedException: T.TypeAlias = ( 22 mlflow.models.evaluation.validation.ModelValidationFailedException 23) 24 25# %% METRICS 26 27 28class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 29 """Base class for a project metric. 30 31 Use metrics to evaluate model performance. 32 e.g., accuracy, precision, recall, MAE, F1, ... 33 34 Parameters: 35 name (str): name of the metric for the reporting. 36 greater_is_better (bool): maximize or minimize result. 37 """ 38 39 KIND: str 40 41 name: str 42 greater_is_better: bool 43 44 @abc.abstractmethod 45 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 46 """Score the outputs against the targets. 47 48 Args: 49 targets (schemas.Targets): expected values. 50 outputs (schemas.Outputs): predicted values. 51 52 Returns: 53 float: single result from the metric computation. 54 """ 55 56 def scorer( 57 self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets 58 ) -> float: 59 """Score model outputs against targets. 60 61 Args: 62 model (models.Model): model to evaluate. 63 inputs (schemas.Inputs): model inputs values. 64 targets (schemas.Targets): model expected values. 65 66 Returns: 67 float: single result from the metric computation. 68 """ 69 outputs = model.predict(inputs=inputs) 70 score = self.score(targets=targets, outputs=outputs) 71 return score 72 73 def to_mlflow(self) -> MlflowMetric: 74 """Convert the metric to an Mlflow metric. 75 76 Returns: 77 MlflowMetric: the Mlflow metric. 78 """ 79 80 def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric: 81 """Evaluation function associated with the mlflow metric. 82 83 Args: 84 predictions (pd.Series): model predictions. 85 targets (pd.Series | None): model targets. 86 87 Returns: 88 MlflowMetric: the mlflow metric. 89 """ 90 score_targets = schemas.Targets( 91 {schemas.TargetsSchema.cnt: targets}, index=targets.index 92 ) 93 score_outputs = schemas.Outputs( 94 {schemas.OutputsSchema.prediction: predictions}, index=predictions.index 95 ) 96 sign = 1 if self.greater_is_better else -1 # reverse the effect 97 score = self.score(targets=score_targets, outputs=score_outputs) 98 return MlflowMetric(aggregate_results={self.name: score * sign}) 99 100 return mlflow.metrics.make_metric( 101 eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better 102 ) 103 104 105class SklearnMetric(Metric): 106 """Compute metrics with sklearn. 107 108 Parameters: 109 name (str): name of the sklearn metric. 110 greater_is_better (bool): maximize or minimize. 111 """ 112 113 KIND: T.Literal["SklearnMetric"] = "SklearnMetric" 114 115 name: str = "mean_squared_error" 116 greater_is_better: bool = False 117 118 @T.override 119 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 120 metric = getattr(metrics, self.name) 121 sign = 1 if self.greater_is_better else -1 122 y_true = targets[schemas.TargetsSchema.cnt] 123 y_pred = outputs[schemas.OutputsSchema.prediction] 124 score = metric(y_pred=y_pred, y_true=y_true) * sign 125 return float(score) 126 127 128MetricKind = SklearnMetric 129 130# %% THRESHOLDS 131 132 133class Threshold(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 134 """A project threshold for a metric. 135 136 Use thresholds to monitor model performances. 137 e.g., to trigger an alert when a threshold is met. 138 139 Parameters: 140 threshold (int | float): absolute threshold value. 141 greater_is_better (bool): maximize or minimize result. 142 """ 143 144 threshold: int | float 145 greater_is_better: bool 146 147 def to_mlflow(self) -> MlflowThreshold: 148 """Convert the threshold to an mlflow threshold. 149 150 Returns: 151 MlflowThreshold: the mlflow threshold. 152 """ 153 return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)
MlflowMetric: TypeAlias =
mlflow.metrics.base.MetricValue
MlflowThreshold: TypeAlias =
mlflow.models.evaluation.validation.MetricThreshold
MlflowModelValidationFailedException: TypeAlias =
mlflow.models.evaluation.validation.ModelValidationFailedException
class
Metric(abc.ABC, pydantic.main.BaseModel):
29class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 30 """Base class for a project metric. 31 32 Use metrics to evaluate model performance. 33 e.g., accuracy, precision, recall, MAE, F1, ... 34 35 Parameters: 36 name (str): name of the metric for the reporting. 37 greater_is_better (bool): maximize or minimize result. 38 """ 39 40 KIND: str 41 42 name: str 43 greater_is_better: bool 44 45 @abc.abstractmethod 46 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 47 """Score the outputs against the targets. 48 49 Args: 50 targets (schemas.Targets): expected values. 51 outputs (schemas.Outputs): predicted values. 52 53 Returns: 54 float: single result from the metric computation. 55 """ 56 57 def scorer( 58 self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets 59 ) -> float: 60 """Score model outputs against targets. 61 62 Args: 63 model (models.Model): model to evaluate. 64 inputs (schemas.Inputs): model inputs values. 65 targets (schemas.Targets): model expected values. 66 67 Returns: 68 float: single result from the metric computation. 69 """ 70 outputs = model.predict(inputs=inputs) 71 score = self.score(targets=targets, outputs=outputs) 72 return score 73 74 def to_mlflow(self) -> MlflowMetric: 75 """Convert the metric to an Mlflow metric. 76 77 Returns: 78 MlflowMetric: the Mlflow metric. 79 """ 80 81 def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric: 82 """Evaluation function associated with the mlflow metric. 83 84 Args: 85 predictions (pd.Series): model predictions. 86 targets (pd.Series | None): model targets. 87 88 Returns: 89 MlflowMetric: the mlflow metric. 90 """ 91 score_targets = schemas.Targets( 92 {schemas.TargetsSchema.cnt: targets}, index=targets.index 93 ) 94 score_outputs = schemas.Outputs( 95 {schemas.OutputsSchema.prediction: predictions}, index=predictions.index 96 ) 97 sign = 1 if self.greater_is_better else -1 # reverse the effect 98 score = self.score(targets=score_targets, outputs=score_outputs) 99 return MlflowMetric(aggregate_results={self.name: score * sign}) 100 101 return mlflow.metrics.make_metric( 102 eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better 103 )
Base class for a project metric.
Use metrics to evaluate model performance. e.g., accuracy, precision, recall, MAE, F1, ...
Arguments:
- name (str): name of the metric for the reporting.
- greater_is_better (bool): maximize or minimize result.
@abc.abstractmethod
def
score( self, targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], outputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.OutputsSchema]) -> float:
45 @abc.abstractmethod 46 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 47 """Score the outputs against the targets. 48 49 Args: 50 targets (schemas.Targets): expected values. 51 outputs (schemas.Outputs): predicted values. 52 53 Returns: 54 float: single result from the metric computation. 55 """
Score the outputs against the targets.
Arguments:
- targets (schemas.Targets): expected values.
- outputs (schemas.Outputs): predicted values.
Returns:
float: single result from the metric computation.
def
scorer( self, model: bikes.core.models.Model, inputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.InputsSchema], targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema]) -> float:
57 def scorer( 58 self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets 59 ) -> float: 60 """Score model outputs against targets. 61 62 Args: 63 model (models.Model): model to evaluate. 64 inputs (schemas.Inputs): model inputs values. 65 targets (schemas.Targets): model expected values. 66 67 Returns: 68 float: single result from the metric computation. 69 """ 70 outputs = model.predict(inputs=inputs) 71 score = self.score(targets=targets, outputs=outputs) 72 return score
Score model outputs against targets.
Arguments:
- model (models.Model): model to evaluate.
- inputs (schemas.Inputs): model inputs values.
- targets (schemas.Targets): model expected values.
Returns:
float: single result from the metric computation.
def
to_mlflow(self) -> mlflow.metrics.base.MetricValue:
74 def to_mlflow(self) -> MlflowMetric: 75 """Convert the metric to an Mlflow metric. 76 77 Returns: 78 MlflowMetric: the Mlflow metric. 79 """ 80 81 def eval_fn(predictions: pd.Series[int], targets: pd.Series[int]) -> MlflowMetric: 82 """Evaluation function associated with the mlflow metric. 83 84 Args: 85 predictions (pd.Series): model predictions. 86 targets (pd.Series | None): model targets. 87 88 Returns: 89 MlflowMetric: the mlflow metric. 90 """ 91 score_targets = schemas.Targets( 92 {schemas.TargetsSchema.cnt: targets}, index=targets.index 93 ) 94 score_outputs = schemas.Outputs( 95 {schemas.OutputsSchema.prediction: predictions}, index=predictions.index 96 ) 97 sign = 1 if self.greater_is_better else -1 # reverse the effect 98 score = self.score(targets=score_targets, outputs=score_outputs) 99 return MlflowMetric(aggregate_results={self.name: score * sign}) 100 101 return mlflow.metrics.make_metric( 102 eval_fn=eval_fn, name=self.name, greater_is_better=self.greater_is_better 103 )
Convert the metric to an Mlflow metric.
Returns:
MlflowMetric: the Mlflow metric.
model_fields =
{'KIND': FieldInfo(annotation=str, required=True), 'name': FieldInfo(annotation=str, required=True), 'greater_is_better': FieldInfo(annotation=bool, required=True)}
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
106class SklearnMetric(Metric): 107 """Compute metrics with sklearn. 108 109 Parameters: 110 name (str): name of the sklearn metric. 111 greater_is_better (bool): maximize or minimize. 112 """ 113 114 KIND: T.Literal["SklearnMetric"] = "SklearnMetric" 115 116 name: str = "mean_squared_error" 117 greater_is_better: bool = False 118 119 @T.override 120 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 121 metric = getattr(metrics, self.name) 122 sign = 1 if self.greater_is_better else -1 123 y_true = targets[schemas.TargetsSchema.cnt] 124 y_pred = outputs[schemas.OutputsSchema.prediction] 125 score = metric(y_pred=y_pred, y_true=y_true) * sign 126 return float(score)
Compute metrics with sklearn.
Arguments:
- name (str): name of the sklearn metric.
- greater_is_better (bool): maximize or minimize.
@T.override
def
score( self, targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], outputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.OutputsSchema]) -> float:
119 @T.override 120 def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: 121 metric = getattr(metrics, self.name) 122 sign = 1 if self.greater_is_better else -1 123 y_true = targets[schemas.TargetsSchema.cnt] 124 y_pred = outputs[schemas.OutputsSchema.prediction] 125 score = metric(y_pred=y_pred, y_true=y_true) * sign 126 return float(score)
Score the outputs against the targets.
Arguments:
- targets (schemas.Targets): expected values.
- outputs (schemas.Outputs): predicted values.
Returns:
float: single result from the metric computation.
model_fields =
{'KIND': FieldInfo(annotation=Literal['SklearnMetric'], required=False, default='SklearnMetric'), 'name': FieldInfo(annotation=str, required=False, default='mean_squared_error'), 'greater_is_better': FieldInfo(annotation=bool, required=False, default=False)}
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
MetricKind =
<class 'SklearnMetric'>
class
Threshold(abc.ABC, pydantic.main.BaseModel):
134class Threshold(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 135 """A project threshold for a metric. 136 137 Use thresholds to monitor model performances. 138 e.g., to trigger an alert when a threshold is met. 139 140 Parameters: 141 threshold (int | float): absolute threshold value. 142 greater_is_better (bool): maximize or minimize result. 143 """ 144 145 threshold: int | float 146 greater_is_better: bool 147 148 def to_mlflow(self) -> MlflowThreshold: 149 """Convert the threshold to an mlflow threshold. 150 151 Returns: 152 MlflowThreshold: the mlflow threshold. 153 """ 154 return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)
A project threshold for a metric.
Use thresholds to monitor model performances. e.g., to trigger an alert when a threshold is met.
Arguments:
- threshold (int | float): absolute threshold value.
- greater_is_better (bool): maximize or minimize result.
def
to_mlflow(self) -> mlflow.models.evaluation.validation.MetricThreshold:
148 def to_mlflow(self) -> MlflowThreshold: 149 """Convert the threshold to an mlflow threshold. 150 151 Returns: 152 MlflowThreshold: the mlflow threshold. 153 """ 154 return MlflowThreshold(threshold=self.threshold, greater_is_better=self.greater_is_better)
Convert the threshold to an mlflow threshold.
Returns:
MlflowThreshold: the mlflow threshold.
model_fields =
{'threshold': FieldInfo(annotation=Union[int, float], required=True), 'greater_is_better': FieldInfo(annotation=bool, required=True)}
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs