bikes.core.models
Define trainable machine learning models.
1"""Define trainable machine learning models.""" 2 3# %% IMPORTS 4 5import abc 6import typing as T 7 8import pydantic as pdt 9import shap 10from sklearn import compose, ensemble, pipeline, preprocessing 11 12from bikes.core import schemas 13 14# %% TYPES 15 16# Model params 17ParamKey = str 18ParamValue = T.Any 19Params = dict[ParamKey, ParamValue] 20 21# %% MODELS 22 23 24class Model(abc.ABC, pdt.BaseModel, strict=True, frozen=False, extra="forbid"): 25 """Base class for a project model. 26 27 Use a model to adapt AI/ML frameworks. 28 e.g., to swap easily one model with another. 29 """ 30 31 KIND: str 32 33 def get_params(self, deep: bool = True) -> Params: 34 """Get the model params. 35 36 Args: 37 deep (bool, optional): ignored. 38 39 Returns: 40 Params: internal model parameters. 41 """ 42 params: Params = {} 43 for key, value in self.model_dump().items(): 44 if not key.startswith("_") and not key.isupper(): 45 params[key] = value 46 return params 47 48 def set_params(self, **params: ParamValue) -> T.Self: 49 """Set the model params in place. 50 51 Returns: 52 T.Self: instance of the model. 53 """ 54 for key, value in params.items(): 55 setattr(self, key, value) 56 return self 57 58 @abc.abstractmethod 59 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> T.Self: 60 """Fit the model on the given inputs and targets. 61 62 Args: 63 inputs (schemas.Inputs): model training inputs. 64 targets (schemas.Targets): model training targets. 65 66 Returns: 67 T.Self: instance of the model. 68 """ 69 70 @abc.abstractmethod 71 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 72 """Generate outputs with the model for the given inputs. 73 74 Args: 75 inputs (schemas.Inputs): model prediction inputs. 76 77 Returns: 78 schemas.Outputs: model prediction outputs. 79 """ 80 81 def explain_model(self) -> schemas.FeatureImportances: 82 """Explain the internal model structure. 83 84 Raises: 85 NotImplementedError: method not implemented. 86 87 Returns: 88 schemas.FeatureImportances: feature importances. 89 """ 90 raise NotImplementedError() 91 92 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 93 """Explain model outputs on input samples. 94 95 Raises: 96 NotImplementedError: method not implemented. 97 98 Returns: 99 schemas.SHAPValues: SHAP values. 100 """ 101 raise NotImplementedError() 102 103 def get_internal_model(self) -> T.Any: 104 """Return the internal model in the object. 105 106 Raises: 107 NotImplementedError: method not implemented. 108 109 Returns: 110 T.Any: any internal model (either empty or fitted). 111 """ 112 raise NotImplementedError() 113 114 115class BaselineSklearnModel(Model): 116 """Simple baseline model based on scikit-learn. 117 118 Parameters: 119 max_depth (int): maximum depth of the random forest. 120 n_estimators (int): number of estimators in the random forest. 121 random_state (int, optional): random state of the machine learning pipeline. 122 """ 123 124 KIND: T.Literal["BaselineSklearnModel"] = "BaselineSklearnModel" 125 126 # params 127 max_depth: int = 20 128 n_estimators: int = 200 129 random_state: int | None = 42 130 # private 131 _pipeline: pipeline.Pipeline | None = None 132 _numericals: list[str] = [ 133 "yr", 134 "mnth", 135 "hr", 136 "holiday", 137 "weekday", 138 "workingday", 139 "temp", 140 "atemp", 141 "hum", 142 "windspeed", 143 "casual", 144 # "registered", # too correlated with target 145 ] 146 _categoricals: list[str] = [ 147 "season", 148 "weathersit", 149 ] 150 151 @T.override 152 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSklearnModel": 153 # subcomponents 154 categoricals_transformer = preprocessing.OneHotEncoder( 155 sparse_output=False, handle_unknown="ignore" 156 ) 157 # components 158 transformer = compose.ColumnTransformer( 159 [ 160 ("categoricals", categoricals_transformer, self._categoricals), 161 ("numericals", "passthrough", self._numericals), 162 ], 163 remainder="drop", 164 ) 165 regressor = ensemble.RandomForestRegressor( 166 max_depth=self.max_depth, n_estimators=self.n_estimators, random_state=self.random_state 167 ) 168 # pipeline 169 self._pipeline = pipeline.Pipeline( 170 steps=[ 171 ("transformer", transformer), 172 ("regressor", regressor), 173 ] 174 ) 175 self._pipeline.fit(X=inputs, y=targets[schemas.TargetsSchema.cnt]) 176 return self 177 178 @T.override 179 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 180 model = self.get_internal_model() 181 prediction = model.predict(inputs) 182 outputs = schemas.Outputs( 183 {schemas.OutputsSchema.prediction: prediction}, index=inputs.index 184 ) 185 return outputs 186 187 @T.override 188 def explain_model(self) -> schemas.FeatureImportances: 189 model = self.get_internal_model() 190 regressor = model.named_steps["regressor"] 191 transformer = model.named_steps["transformer"] 192 column_names = transformer.get_feature_names_out() 193 feature_importances = schemas.FeatureImportances( 194 data={ 195 "feature": column_names, 196 "importance": regressor.feature_importances_, 197 } 198 ) 199 return feature_importances 200 201 @T.override 202 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 203 model = self.get_internal_model() 204 regressor = model.named_steps["regressor"] 205 transformer = model.named_steps["transformer"] 206 transformed = transformer.transform(X=inputs) 207 explainer = shap.TreeExplainer(model=regressor) 208 shap_values = schemas.SHAPValues( 209 data=explainer.shap_values(X=transformed), 210 columns=transformer.get_feature_names_out(), 211 ) 212 return shap_values 213 214 @T.override 215 def get_internal_model(self) -> pipeline.Pipeline: 216 model = self._pipeline 217 if model is None: 218 raise ValueError("Model is not fitted yet!") 219 return model 220 221 222ModelKind = BaselineSklearnModel
25class Model(abc.ABC, pdt.BaseModel, strict=True, frozen=False, extra="forbid"): 26 """Base class for a project model. 27 28 Use a model to adapt AI/ML frameworks. 29 e.g., to swap easily one model with another. 30 """ 31 32 KIND: str 33 34 def get_params(self, deep: bool = True) -> Params: 35 """Get the model params. 36 37 Args: 38 deep (bool, optional): ignored. 39 40 Returns: 41 Params: internal model parameters. 42 """ 43 params: Params = {} 44 for key, value in self.model_dump().items(): 45 if not key.startswith("_") and not key.isupper(): 46 params[key] = value 47 return params 48 49 def set_params(self, **params: ParamValue) -> T.Self: 50 """Set the model params in place. 51 52 Returns: 53 T.Self: instance of the model. 54 """ 55 for key, value in params.items(): 56 setattr(self, key, value) 57 return self 58 59 @abc.abstractmethod 60 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> T.Self: 61 """Fit the model on the given inputs and targets. 62 63 Args: 64 inputs (schemas.Inputs): model training inputs. 65 targets (schemas.Targets): model training targets. 66 67 Returns: 68 T.Self: instance of the model. 69 """ 70 71 @abc.abstractmethod 72 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 73 """Generate outputs with the model for the given inputs. 74 75 Args: 76 inputs (schemas.Inputs): model prediction inputs. 77 78 Returns: 79 schemas.Outputs: model prediction outputs. 80 """ 81 82 def explain_model(self) -> schemas.FeatureImportances: 83 """Explain the internal model structure. 84 85 Raises: 86 NotImplementedError: method not implemented. 87 88 Returns: 89 schemas.FeatureImportances: feature importances. 90 """ 91 raise NotImplementedError() 92 93 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 94 """Explain model outputs on input samples. 95 96 Raises: 97 NotImplementedError: method not implemented. 98 99 Returns: 100 schemas.SHAPValues: SHAP values. 101 """ 102 raise NotImplementedError() 103 104 def get_internal_model(self) -> T.Any: 105 """Return the internal model in the object. 106 107 Raises: 108 NotImplementedError: method not implemented. 109 110 Returns: 111 T.Any: any internal model (either empty or fitted). 112 """ 113 raise NotImplementedError()
Base class for a project model.
Use a model to adapt AI/ML frameworks. e.g., to swap easily one model with another.
34 def get_params(self, deep: bool = True) -> Params: 35 """Get the model params. 36 37 Args: 38 deep (bool, optional): ignored. 39 40 Returns: 41 Params: internal model parameters. 42 """ 43 params: Params = {} 44 for key, value in self.model_dump().items(): 45 if not key.startswith("_") and not key.isupper(): 46 params[key] = value 47 return params
Get the model params.
Arguments:
- deep (bool, optional): ignored.
Returns:
Params: internal model parameters.
49 def set_params(self, **params: ParamValue) -> T.Self: 50 """Set the model params in place. 51 52 Returns: 53 T.Self: instance of the model. 54 """ 55 for key, value in params.items(): 56 setattr(self, key, value) 57 return self
Set the model params in place.
Returns:
T.Self: instance of the model.
59 @abc.abstractmethod 60 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> T.Self: 61 """Fit the model on the given inputs and targets. 62 63 Args: 64 inputs (schemas.Inputs): model training inputs. 65 targets (schemas.Targets): model training targets. 66 67 Returns: 68 T.Self: instance of the model. 69 """
Fit the model on the given inputs and targets.
Arguments:
- inputs (schemas.Inputs): model training inputs.
- targets (schemas.Targets): model training targets.
Returns:
T.Self: instance of the model.
71 @abc.abstractmethod 72 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 73 """Generate outputs with the model for the given inputs. 74 75 Args: 76 inputs (schemas.Inputs): model prediction inputs. 77 78 Returns: 79 schemas.Outputs: model prediction outputs. 80 """
Generate outputs with the model for the given inputs.
Arguments:
- inputs (schemas.Inputs): model prediction inputs.
Returns:
schemas.Outputs: model prediction outputs.
82 def explain_model(self) -> schemas.FeatureImportances: 83 """Explain the internal model structure. 84 85 Raises: 86 NotImplementedError: method not implemented. 87 88 Returns: 89 schemas.FeatureImportances: feature importances. 90 """ 91 raise NotImplementedError()
Explain the internal model structure.
Raises:
- NotImplementedError: method not implemented.
Returns:
schemas.FeatureImportances: feature importances.
93 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 94 """Explain model outputs on input samples. 95 96 Raises: 97 NotImplementedError: method not implemented. 98 99 Returns: 100 schemas.SHAPValues: SHAP values. 101 """ 102 raise NotImplementedError()
Explain model outputs on input samples.
Raises:
- NotImplementedError: method not implemented.
Returns:
schemas.SHAPValues: SHAP values.
104 def get_internal_model(self) -> T.Any: 105 """Return the internal model in the object. 106 107 Raises: 108 NotImplementedError: method not implemented. 109 110 Returns: 111 T.Any: any internal model (either empty or fitted). 112 """ 113 raise NotImplementedError()
Return the internal model in the object.
Raises:
- NotImplementedError: method not implemented.
Returns:
T.Any: any internal model (either empty or fitted).
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
116class BaselineSklearnModel(Model): 117 """Simple baseline model based on scikit-learn. 118 119 Parameters: 120 max_depth (int): maximum depth of the random forest. 121 n_estimators (int): number of estimators in the random forest. 122 random_state (int, optional): random state of the machine learning pipeline. 123 """ 124 125 KIND: T.Literal["BaselineSklearnModel"] = "BaselineSklearnModel" 126 127 # params 128 max_depth: int = 20 129 n_estimators: int = 200 130 random_state: int | None = 42 131 # private 132 _pipeline: pipeline.Pipeline | None = None 133 _numericals: list[str] = [ 134 "yr", 135 "mnth", 136 "hr", 137 "holiday", 138 "weekday", 139 "workingday", 140 "temp", 141 "atemp", 142 "hum", 143 "windspeed", 144 "casual", 145 # "registered", # too correlated with target 146 ] 147 _categoricals: list[str] = [ 148 "season", 149 "weathersit", 150 ] 151 152 @T.override 153 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSklearnModel": 154 # subcomponents 155 categoricals_transformer = preprocessing.OneHotEncoder( 156 sparse_output=False, handle_unknown="ignore" 157 ) 158 # components 159 transformer = compose.ColumnTransformer( 160 [ 161 ("categoricals", categoricals_transformer, self._categoricals), 162 ("numericals", "passthrough", self._numericals), 163 ], 164 remainder="drop", 165 ) 166 regressor = ensemble.RandomForestRegressor( 167 max_depth=self.max_depth, n_estimators=self.n_estimators, random_state=self.random_state 168 ) 169 # pipeline 170 self._pipeline = pipeline.Pipeline( 171 steps=[ 172 ("transformer", transformer), 173 ("regressor", regressor), 174 ] 175 ) 176 self._pipeline.fit(X=inputs, y=targets[schemas.TargetsSchema.cnt]) 177 return self 178 179 @T.override 180 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 181 model = self.get_internal_model() 182 prediction = model.predict(inputs) 183 outputs = schemas.Outputs( 184 {schemas.OutputsSchema.prediction: prediction}, index=inputs.index 185 ) 186 return outputs 187 188 @T.override 189 def explain_model(self) -> schemas.FeatureImportances: 190 model = self.get_internal_model() 191 regressor = model.named_steps["regressor"] 192 transformer = model.named_steps["transformer"] 193 column_names = transformer.get_feature_names_out() 194 feature_importances = schemas.FeatureImportances( 195 data={ 196 "feature": column_names, 197 "importance": regressor.feature_importances_, 198 } 199 ) 200 return feature_importances 201 202 @T.override 203 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 204 model = self.get_internal_model() 205 regressor = model.named_steps["regressor"] 206 transformer = model.named_steps["transformer"] 207 transformed = transformer.transform(X=inputs) 208 explainer = shap.TreeExplainer(model=regressor) 209 shap_values = schemas.SHAPValues( 210 data=explainer.shap_values(X=transformed), 211 columns=transformer.get_feature_names_out(), 212 ) 213 return shap_values 214 215 @T.override 216 def get_internal_model(self) -> pipeline.Pipeline: 217 model = self._pipeline 218 if model is None: 219 raise ValueError("Model is not fitted yet!") 220 return model
Simple baseline model based on scikit-learn.
Arguments:
- max_depth (int): maximum depth of the random forest.
- n_estimators (int): number of estimators in the random forest.
- random_state (int, optional): random state of the machine learning pipeline.
152 @T.override 153 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSklearnModel": 154 # subcomponents 155 categoricals_transformer = preprocessing.OneHotEncoder( 156 sparse_output=False, handle_unknown="ignore" 157 ) 158 # components 159 transformer = compose.ColumnTransformer( 160 [ 161 ("categoricals", categoricals_transformer, self._categoricals), 162 ("numericals", "passthrough", self._numericals), 163 ], 164 remainder="drop", 165 ) 166 regressor = ensemble.RandomForestRegressor( 167 max_depth=self.max_depth, n_estimators=self.n_estimators, random_state=self.random_state 168 ) 169 # pipeline 170 self._pipeline = pipeline.Pipeline( 171 steps=[ 172 ("transformer", transformer), 173 ("regressor", regressor), 174 ] 175 ) 176 self._pipeline.fit(X=inputs, y=targets[schemas.TargetsSchema.cnt]) 177 return self
Fit the model on the given inputs and targets.
Arguments:
- inputs (schemas.Inputs): model training inputs.
- targets (schemas.Targets): model training targets.
Returns:
T.Self: instance of the model.
179 @T.override 180 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 181 model = self.get_internal_model() 182 prediction = model.predict(inputs) 183 outputs = schemas.Outputs( 184 {schemas.OutputsSchema.prediction: prediction}, index=inputs.index 185 ) 186 return outputs
Generate outputs with the model for the given inputs.
Arguments:
- inputs (schemas.Inputs): model prediction inputs.
Returns:
schemas.Outputs: model prediction outputs.
188 @T.override 189 def explain_model(self) -> schemas.FeatureImportances: 190 model = self.get_internal_model() 191 regressor = model.named_steps["regressor"] 192 transformer = model.named_steps["transformer"] 193 column_names = transformer.get_feature_names_out() 194 feature_importances = schemas.FeatureImportances( 195 data={ 196 "feature": column_names, 197 "importance": regressor.feature_importances_, 198 } 199 ) 200 return feature_importances
Explain the internal model structure.
Raises:
- NotImplementedError: method not implemented.
Returns:
schemas.FeatureImportances: feature importances.
202 @T.override 203 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 204 model = self.get_internal_model() 205 regressor = model.named_steps["regressor"] 206 transformer = model.named_steps["transformer"] 207 transformed = transformer.transform(X=inputs) 208 explainer = shap.TreeExplainer(model=regressor) 209 shap_values = schemas.SHAPValues( 210 data=explainer.shap_values(X=transformed), 211 columns=transformer.get_feature_names_out(), 212 ) 213 return shap_values
Explain model outputs on input samples.
Raises:
- NotImplementedError: method not implemented.
Returns:
schemas.SHAPValues: SHAP values.
215 @T.override 216 def get_internal_model(self) -> pipeline.Pipeline: 217 model = self._pipeline 218 if model is None: 219 raise ValueError("Model is not fitted yet!") 220 return model
Return the internal model in the object.
Raises:
- NotImplementedError: method not implemented.
Returns:
T.Any: any internal model (either empty or fitted).
281def init_private_attributes(self: BaseModel, context: Any, /) -> None: 282 """This function is meant to behave like a BaseModel method to initialise private attributes. 283 284 It takes context as an argument since that's what pydantic-core passes when calling it. 285 286 Args: 287 self: The BaseModel instance. 288 context: The context. 289 """ 290 if getattr(self, '__pydantic_private__', None) is None: 291 pydantic_private = {} 292 for name, private_attr in self.__private_attributes__.items(): 293 default = private_attr.get_default() 294 if default is not PydanticUndefined: 295 pydantic_private[name] = default 296 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Arguments:
- self: The BaseModel instance.
- context: The context.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs