bikes.core.models
Define trainable machine learning models.
1"""Define trainable machine learning models.""" 2 3# %% IMPORTS 4 5import abc 6import typing as T 7 8import pydantic as pdt 9import shap 10from sklearn import compose, ensemble, pipeline, preprocessing 11 12from bikes.core import schemas 13 14# %% TYPES 15 16# Model params 17ParamKey = str 18ParamValue = T.Any 19Params = dict[ParamKey, ParamValue] 20 21# %% MODELS 22 23 24class Model(abc.ABC, pdt.BaseModel, strict=True, frozen=False, extra="forbid"): 25 """Base class for a project model. 26 27 Use a model to adapt AI/ML frameworks. 28 e.g., to swap easily one model with another. 29 """ 30 31 KIND: str 32 33 def get_params(self, deep: bool = True) -> Params: 34 """Get the model params. 35 36 Args: 37 deep (bool, optional): ignored. 38 39 Returns: 40 Params: internal model parameters. 41 """ 42 params: Params = {} 43 for key, value in self.model_dump().items(): 44 if not key.startswith("_") and not key.isupper(): 45 params[key] = value 46 return params 47 48 def set_params(self, **params: ParamValue) -> T.Self: 49 """Set the model params in place. 50 51 Returns: 52 T.Self: instance of the model. 53 """ 54 for key, value in params.items(): 55 setattr(self, key, value) 56 return self 57 58 @abc.abstractmethod 59 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> T.Self: 60 """Fit the model on the given inputs and targets. 61 62 Args: 63 inputs (schemas.Inputs): model training inputs. 64 targets (schemas.Targets): model training targets. 65 66 Returns: 67 T.Self: instance of the model. 68 """ 69 70 @abc.abstractmethod 71 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 72 """Generate outputs with the model for the given inputs. 73 74 Args: 75 inputs (schemas.Inputs): model prediction inputs. 76 77 Returns: 78 schemas.Outputs: model prediction outputs. 79 """ 80 81 def explain_model(self) -> schemas.FeatureImportances: 82 """Explain the internal model structure. 83 84 Returns: 85 schemas.FeatureImportances: feature importances. 86 """ 87 raise NotImplementedError() 88 89 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 90 """Explain model outputs on input samples. 91 92 Returns: 93 schemas.SHAPValues: SHAP values. 94 """ 95 raise NotImplementedError() 96 97 def get_internal_model(self) -> T.Any: 98 """Return the internal model in the object. 99 100 Raises: 101 NotImplementedError: method not implemented. 102 103 Returns: 104 T.Any: any internal model (either empty or fitted). 105 """ 106 raise NotImplementedError() 107 108 109class BaselineSklearnModel(Model): 110 """Simple baseline model based on scikit-learn. 111 112 Parameters: 113 max_depth (int): maximum depth of the random forest. 114 n_estimators (int): number of estimators in the random forest. 115 random_state (int, optional): random state of the machine learning pipeline. 116 """ 117 118 KIND: T.Literal["BaselineSklearnModel"] = "BaselineSklearnModel" 119 120 # params 121 max_depth: int = 20 122 n_estimators: int = 200 123 random_state: int | None = 42 124 # private 125 _pipeline: pipeline.Pipeline | None = None 126 _numericals: list[str] = [ 127 "yr", 128 "mnth", 129 "hr", 130 "holiday", 131 "weekday", 132 "workingday", 133 "temp", 134 "atemp", 135 "hum", 136 "windspeed", 137 "casual", 138 "registered", # too correlated with target 139 ] 140 _categoricals: list[str] = [ 141 "season", 142 "weathersit", 143 ] 144 145 @T.override 146 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSklearnModel": 147 # subcomponents 148 categoricals_transformer = preprocessing.OneHotEncoder( 149 sparse_output=False, handle_unknown="ignore" 150 ) 151 # components 152 transformer = compose.ColumnTransformer( 153 [ 154 ("categoricals", categoricals_transformer, self._categoricals), 155 ("numericals", "passthrough", self._numericals), 156 ], 157 remainder="drop", 158 ) 159 regressor = ensemble.RandomForestRegressor( 160 max_depth=self.max_depth, 161 n_estimators=self.n_estimators, 162 random_state=self.random_state, 163 ) 164 # pipeline 165 self._pipeline = pipeline.Pipeline( 166 steps=[ 167 ("transformer", transformer), 168 ("regressor", regressor), 169 ] 170 ) 171 self._pipeline.fit(X=inputs, y=targets[schemas.TargetsSchema.cnt]) 172 return self 173 174 @T.override 175 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 176 model = self.get_internal_model() 177 prediction = model.predict(inputs) 178 outputs = schemas.Outputs( 179 {schemas.OutputsSchema.prediction: prediction}, index=inputs.index 180 ) 181 return outputs 182 183 @T.override 184 def explain_model(self) -> schemas.FeatureImportances: 185 model = self.get_internal_model() 186 regressor = model.named_steps["regressor"] 187 transformer = model.named_steps["transformer"] 188 feature = transformer.get_feature_names_out() 189 feature_importances = schemas.FeatureImportances( 190 data={ 191 "feature": feature, 192 "importance": regressor.feature_importances_, 193 } 194 ) 195 return feature_importances 196 197 @T.override 198 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 199 model = self.get_internal_model() 200 regressor = model.named_steps["regressor"] 201 transformer = model.named_steps["transformer"] 202 transformed = transformer.transform(X=inputs) 203 explainer = shap.TreeExplainer(model=regressor) 204 shap_values = schemas.SHAPValues( 205 data=explainer.shap_values(X=transformed), 206 columns=transformer.get_feature_names_out(), 207 ) 208 return shap_values 209 210 @T.override 211 def get_internal_model(self) -> pipeline.Pipeline: 212 model = self._pipeline 213 if model is None: 214 raise ValueError("Model is not fitted yet!") 215 return model 216 217 218ModelKind = BaselineSklearnModel
25class Model(abc.ABC, pdt.BaseModel, strict=True, frozen=False, extra="forbid"): 26 """Base class for a project model. 27 28 Use a model to adapt AI/ML frameworks. 29 e.g., to swap easily one model with another. 30 """ 31 32 KIND: str 33 34 def get_params(self, deep: bool = True) -> Params: 35 """Get the model params. 36 37 Args: 38 deep (bool, optional): ignored. 39 40 Returns: 41 Params: internal model parameters. 42 """ 43 params: Params = {} 44 for key, value in self.model_dump().items(): 45 if not key.startswith("_") and not key.isupper(): 46 params[key] = value 47 return params 48 49 def set_params(self, **params: ParamValue) -> T.Self: 50 """Set the model params in place. 51 52 Returns: 53 T.Self: instance of the model. 54 """ 55 for key, value in params.items(): 56 setattr(self, key, value) 57 return self 58 59 @abc.abstractmethod 60 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> T.Self: 61 """Fit the model on the given inputs and targets. 62 63 Args: 64 inputs (schemas.Inputs): model training inputs. 65 targets (schemas.Targets): model training targets. 66 67 Returns: 68 T.Self: instance of the model. 69 """ 70 71 @abc.abstractmethod 72 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 73 """Generate outputs with the model for the given inputs. 74 75 Args: 76 inputs (schemas.Inputs): model prediction inputs. 77 78 Returns: 79 schemas.Outputs: model prediction outputs. 80 """ 81 82 def explain_model(self) -> schemas.FeatureImportances: 83 """Explain the internal model structure. 84 85 Returns: 86 schemas.FeatureImportances: feature importances. 87 """ 88 raise NotImplementedError() 89 90 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 91 """Explain model outputs on input samples. 92 93 Returns: 94 schemas.SHAPValues: SHAP values. 95 """ 96 raise NotImplementedError() 97 98 def get_internal_model(self) -> T.Any: 99 """Return the internal model in the object. 100 101 Raises: 102 NotImplementedError: method not implemented. 103 104 Returns: 105 T.Any: any internal model (either empty or fitted). 106 """ 107 raise NotImplementedError()
Base class for a project model.
Use a model to adapt AI/ML frameworks. e.g., to swap easily one model with another.
34 def get_params(self, deep: bool = True) -> Params: 35 """Get the model params. 36 37 Args: 38 deep (bool, optional): ignored. 39 40 Returns: 41 Params: internal model parameters. 42 """ 43 params: Params = {} 44 for key, value in self.model_dump().items(): 45 if not key.startswith("_") and not key.isupper(): 46 params[key] = value 47 return params
Get the model params.
Arguments:
- deep (bool, optional): ignored.
Returns:
Params: internal model parameters.
49 def set_params(self, **params: ParamValue) -> T.Self: 50 """Set the model params in place. 51 52 Returns: 53 T.Self: instance of the model. 54 """ 55 for key, value in params.items(): 56 setattr(self, key, value) 57 return self
Set the model params in place.
Returns:
T.Self: instance of the model.
59 @abc.abstractmethod 60 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> T.Self: 61 """Fit the model on the given inputs and targets. 62 63 Args: 64 inputs (schemas.Inputs): model training inputs. 65 targets (schemas.Targets): model training targets. 66 67 Returns: 68 T.Self: instance of the model. 69 """
Fit the model on the given inputs and targets.
Arguments:
- inputs (schemas.Inputs): model training inputs.
- targets (schemas.Targets): model training targets.
Returns:
T.Self: instance of the model.
71 @abc.abstractmethod 72 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 73 """Generate outputs with the model for the given inputs. 74 75 Args: 76 inputs (schemas.Inputs): model prediction inputs. 77 78 Returns: 79 schemas.Outputs: model prediction outputs. 80 """
Generate outputs with the model for the given inputs.
Arguments:
- inputs (schemas.Inputs): model prediction inputs.
Returns:
schemas.Outputs: model prediction outputs.
82 def explain_model(self) -> schemas.FeatureImportances: 83 """Explain the internal model structure. 84 85 Returns: 86 schemas.FeatureImportances: feature importances. 87 """ 88 raise NotImplementedError()
Explain the internal model structure.
Returns:
schemas.FeatureImportances: feature importances.
90 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 91 """Explain model outputs on input samples. 92 93 Returns: 94 schemas.SHAPValues: SHAP values. 95 """ 96 raise NotImplementedError()
Explain model outputs on input samples.
Returns:
schemas.SHAPValues: SHAP values.
98 def get_internal_model(self) -> T.Any: 99 """Return the internal model in the object. 100 101 Raises: 102 NotImplementedError: method not implemented. 103 104 Returns: 105 T.Any: any internal model (either empty or fitted). 106 """ 107 raise NotImplementedError()
Return the internal model in the object.
Raises:
- NotImplementedError: method not implemented.
Returns:
T.Any: any internal model (either empty or fitted).
110class BaselineSklearnModel(Model): 111 """Simple baseline model based on scikit-learn. 112 113 Parameters: 114 max_depth (int): maximum depth of the random forest. 115 n_estimators (int): number of estimators in the random forest. 116 random_state (int, optional): random state of the machine learning pipeline. 117 """ 118 119 KIND: T.Literal["BaselineSklearnModel"] = "BaselineSklearnModel" 120 121 # params 122 max_depth: int = 20 123 n_estimators: int = 200 124 random_state: int | None = 42 125 # private 126 _pipeline: pipeline.Pipeline | None = None 127 _numericals: list[str] = [ 128 "yr", 129 "mnth", 130 "hr", 131 "holiday", 132 "weekday", 133 "workingday", 134 "temp", 135 "atemp", 136 "hum", 137 "windspeed", 138 "casual", 139 "registered", # too correlated with target 140 ] 141 _categoricals: list[str] = [ 142 "season", 143 "weathersit", 144 ] 145 146 @T.override 147 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSklearnModel": 148 # subcomponents 149 categoricals_transformer = preprocessing.OneHotEncoder( 150 sparse_output=False, handle_unknown="ignore" 151 ) 152 # components 153 transformer = compose.ColumnTransformer( 154 [ 155 ("categoricals", categoricals_transformer, self._categoricals), 156 ("numericals", "passthrough", self._numericals), 157 ], 158 remainder="drop", 159 ) 160 regressor = ensemble.RandomForestRegressor( 161 max_depth=self.max_depth, 162 n_estimators=self.n_estimators, 163 random_state=self.random_state, 164 ) 165 # pipeline 166 self._pipeline = pipeline.Pipeline( 167 steps=[ 168 ("transformer", transformer), 169 ("regressor", regressor), 170 ] 171 ) 172 self._pipeline.fit(X=inputs, y=targets[schemas.TargetsSchema.cnt]) 173 return self 174 175 @T.override 176 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 177 model = self.get_internal_model() 178 prediction = model.predict(inputs) 179 outputs = schemas.Outputs( 180 {schemas.OutputsSchema.prediction: prediction}, index=inputs.index 181 ) 182 return outputs 183 184 @T.override 185 def explain_model(self) -> schemas.FeatureImportances: 186 model = self.get_internal_model() 187 regressor = model.named_steps["regressor"] 188 transformer = model.named_steps["transformer"] 189 feature = transformer.get_feature_names_out() 190 feature_importances = schemas.FeatureImportances( 191 data={ 192 "feature": feature, 193 "importance": regressor.feature_importances_, 194 } 195 ) 196 return feature_importances 197 198 @T.override 199 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 200 model = self.get_internal_model() 201 regressor = model.named_steps["regressor"] 202 transformer = model.named_steps["transformer"] 203 transformed = transformer.transform(X=inputs) 204 explainer = shap.TreeExplainer(model=regressor) 205 shap_values = schemas.SHAPValues( 206 data=explainer.shap_values(X=transformed), 207 columns=transformer.get_feature_names_out(), 208 ) 209 return shap_values 210 211 @T.override 212 def get_internal_model(self) -> pipeline.Pipeline: 213 model = self._pipeline 214 if model is None: 215 raise ValueError("Model is not fitted yet!") 216 return model
Simple baseline model based on scikit-learn.
Arguments:
- max_depth (int): maximum depth of the random forest.
- n_estimators (int): number of estimators in the random forest.
- random_state (int, optional): random state of the machine learning pipeline.
146 @T.override 147 def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSklearnModel": 148 # subcomponents 149 categoricals_transformer = preprocessing.OneHotEncoder( 150 sparse_output=False, handle_unknown="ignore" 151 ) 152 # components 153 transformer = compose.ColumnTransformer( 154 [ 155 ("categoricals", categoricals_transformer, self._categoricals), 156 ("numericals", "passthrough", self._numericals), 157 ], 158 remainder="drop", 159 ) 160 regressor = ensemble.RandomForestRegressor( 161 max_depth=self.max_depth, 162 n_estimators=self.n_estimators, 163 random_state=self.random_state, 164 ) 165 # pipeline 166 self._pipeline = pipeline.Pipeline( 167 steps=[ 168 ("transformer", transformer), 169 ("regressor", regressor), 170 ] 171 ) 172 self._pipeline.fit(X=inputs, y=targets[schemas.TargetsSchema.cnt]) 173 return self
Fit the model on the given inputs and targets.
Arguments:
- inputs (schemas.Inputs): model training inputs.
- targets (schemas.Targets): model training targets.
Returns:
T.Self: instance of the model.
175 @T.override 176 def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: 177 model = self.get_internal_model() 178 prediction = model.predict(inputs) 179 outputs = schemas.Outputs( 180 {schemas.OutputsSchema.prediction: prediction}, index=inputs.index 181 ) 182 return outputs
Generate outputs with the model for the given inputs.
Arguments:
- inputs (schemas.Inputs): model prediction inputs.
Returns:
schemas.Outputs: model prediction outputs.
184 @T.override 185 def explain_model(self) -> schemas.FeatureImportances: 186 model = self.get_internal_model() 187 regressor = model.named_steps["regressor"] 188 transformer = model.named_steps["transformer"] 189 feature = transformer.get_feature_names_out() 190 feature_importances = schemas.FeatureImportances( 191 data={ 192 "feature": feature, 193 "importance": regressor.feature_importances_, 194 } 195 ) 196 return feature_importances
Explain the internal model structure.
Returns:
schemas.FeatureImportances: feature importances.
198 @T.override 199 def explain_samples(self, inputs: schemas.Inputs) -> schemas.SHAPValues: 200 model = self.get_internal_model() 201 regressor = model.named_steps["regressor"] 202 transformer = model.named_steps["transformer"] 203 transformed = transformer.transform(X=inputs) 204 explainer = shap.TreeExplainer(model=regressor) 205 shap_values = schemas.SHAPValues( 206 data=explainer.shap_values(X=transformed), 207 columns=transformer.get_feature_names_out(), 208 ) 209 return shap_values
Explain model outputs on input samples.
Returns:
schemas.SHAPValues: SHAP values.
211 @T.override 212 def get_internal_model(self) -> pipeline.Pipeline: 213 model = self._pipeline 214 if model is None: 215 raise ValueError("Model is not fitted yet!") 216 return model
Return the internal model in the object.
Raises:
- NotImplementedError: method not implemented.
Returns:
T.Any: any internal model (either empty or fitted).
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
384def init_private_attributes(self: BaseModel, context: Any, /) -> None: 385 """This function is meant to behave like a BaseModel method to initialise private attributes. 386 387 It takes context as an argument since that's what pydantic-core passes when calling it. 388 389 Args: 390 self: The BaseModel instance. 391 context: The context. 392 """ 393 if getattr(self, '__pydantic_private__', None) is None: 394 pydantic_private = {} 395 for name, private_attr in self.__private_attributes__.items(): 396 default = private_attr.get_default() 397 if default is not PydanticUndefined: 398 pydantic_private[name] = default 399 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Arguments:
- self: The BaseModel instance.
- context: The context.