bikes.utils.searchers
Find the best hyperparameters for a model.
1"""Find the best hyperparameters for a model.""" 2 3# %% IMPORTS 4 5import abc 6import typing as T 7 8import pandas as pd 9import pydantic as pdt 10from sklearn import model_selection 11 12from bikes.core import metrics, models, schemas 13from bikes.utils import splitters 14 15# %% TYPES 16 17# Grid of model params 18Grid = dict[models.ParamKey, list[models.ParamValue]] 19 20# Results of a model search 21Results = tuple[ 22 T.Annotated[pd.DataFrame, "details"], 23 T.Annotated[float, "best score"], 24 T.Annotated[models.Params, "best params"], 25] 26 27# Cross-validation options for searchers 28CrossValidation = int | splitters.TrainTestSplits | splitters.Splitter 29 30# %% SEARCHERS 31 32 33class Searcher(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 34 """Base class for a searcher. 35 36 Use searcher to fine-tune models. 37 i.e., to find the best model params. 38 39 Parameters: 40 param_grid (Grid): mapping of param key -> values. 41 """ 42 43 KIND: str 44 45 param_grid: Grid 46 47 @abc.abstractmethod 48 def search( 49 self, 50 model: models.Model, 51 metric: metrics.Metric, 52 inputs: schemas.Inputs, 53 targets: schemas.Targets, 54 cv: CrossValidation, 55 ) -> Results: 56 """Search the best model for the given inputs and targets. 57 58 Args: 59 model (models.Model): AI/ML model to fine-tune. 60 metric (metrics.Metric): main metric to optimize. 61 inputs (schemas.Inputs): model inputs for tuning. 62 targets (schemas.Targets): model targets for tuning. 63 cv (CrossValidation): choice for cross-fold validation. 64 65 Returns: 66 Results: all the results of the searcher execution process. 67 """ 68 69 70class GridCVSearcher(Searcher): 71 """Grid searcher with cross-fold validation. 72 73 Convention: metric returns higher values for better models. 74 75 Parameters: 76 n_jobs (int, optional): number of jobs to run in parallel. 77 refit (bool): refit the model after the tuning. 78 verbose (int): set the searcher verbosity level. 79 error_score (str | float): strategy or value on error. 80 return_train_score (bool): include train scores if True. 81 """ 82 83 KIND: T.Literal["GridCVSearcher"] = "GridCVSearcher" 84 85 n_jobs: int | None = None 86 refit: bool = True 87 verbose: int = 3 88 error_score: str | float = "raise" 89 return_train_score: bool = False 90 91 @T.override 92 def search( 93 self, 94 model: models.Model, 95 metric: metrics.Metric, 96 inputs: schemas.Inputs, 97 targets: schemas.Targets, 98 cv: CrossValidation, 99 ) -> Results: 100 searcher = model_selection.GridSearchCV( 101 estimator=model, 102 scoring=metric.scorer, 103 cv=cv, 104 param_grid=self.param_grid, 105 n_jobs=self.n_jobs, 106 refit=self.refit, 107 verbose=self.verbose, 108 error_score=self.error_score, 109 return_train_score=self.return_train_score, 110 ) 111 searcher.fit(inputs, targets) 112 results = pd.DataFrame(searcher.cv_results_) 113 return results, searcher.best_score_, searcher.best_params_ 114 115 116SearcherKind = GridCVSearcher
Grid =
dict[str, list[typing.Any]]
Results =
tuple[typing.Annotated[pandas.core.frame.DataFrame, 'details'], typing.Annotated[float, 'best score'], typing.Annotated[dict[str, typing.Any], 'best params']]
CrossValidation =
typing.Union[int, typing.Iterator[tuple[numpy.ndarray[typing.Any, numpy.dtype[numpy.int64]], numpy.ndarray[typing.Any, numpy.dtype[numpy.int64]]]], bikes.utils.splitters.Splitter]
class
Searcher(abc.ABC, pydantic.main.BaseModel):
34class Searcher(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 35 """Base class for a searcher. 36 37 Use searcher to fine-tune models. 38 i.e., to find the best model params. 39 40 Parameters: 41 param_grid (Grid): mapping of param key -> values. 42 """ 43 44 KIND: str 45 46 param_grid: Grid 47 48 @abc.abstractmethod 49 def search( 50 self, 51 model: models.Model, 52 metric: metrics.Metric, 53 inputs: schemas.Inputs, 54 targets: schemas.Targets, 55 cv: CrossValidation, 56 ) -> Results: 57 """Search the best model for the given inputs and targets. 58 59 Args: 60 model (models.Model): AI/ML model to fine-tune. 61 metric (metrics.Metric): main metric to optimize. 62 inputs (schemas.Inputs): model inputs for tuning. 63 targets (schemas.Targets): model targets for tuning. 64 cv (CrossValidation): choice for cross-fold validation. 65 66 Returns: 67 Results: all the results of the searcher execution process. 68 """
Base class for a searcher.
Use searcher to fine-tune models. i.e., to find the best model params.
Arguments:
- param_grid (Grid): mapping of param key -> values.
@abc.abstractmethod
def
search( self, model: bikes.core.models.Model, metric: bikes.core.metrics.Metric, inputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.InputsSchema], targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], cv: Union[int, Iterator[tuple[numpy.ndarray[Any, numpy.dtype[numpy.int64]], numpy.ndarray[Any, numpy.dtype[numpy.int64]]]], bikes.utils.splitters.Splitter]) -> tuple[typing.Annotated[pandas.core.frame.DataFrame, 'details'], typing.Annotated[float, 'best score'], typing.Annotated[dict[str, typing.Any], 'best params']]:
48 @abc.abstractmethod 49 def search( 50 self, 51 model: models.Model, 52 metric: metrics.Metric, 53 inputs: schemas.Inputs, 54 targets: schemas.Targets, 55 cv: CrossValidation, 56 ) -> Results: 57 """Search the best model for the given inputs and targets. 58 59 Args: 60 model (models.Model): AI/ML model to fine-tune. 61 metric (metrics.Metric): main metric to optimize. 62 inputs (schemas.Inputs): model inputs for tuning. 63 targets (schemas.Targets): model targets for tuning. 64 cv (CrossValidation): choice for cross-fold validation. 65 66 Returns: 67 Results: all the results of the searcher execution process. 68 """
Search the best model for the given inputs and targets.
Arguments:
- model (models.Model): AI/ML model to fine-tune.
- metric (metrics.Metric): main metric to optimize.
- inputs (schemas.Inputs): model inputs for tuning.
- targets (schemas.Targets): model targets for tuning.
- cv (CrossValidation): choice for cross-fold validation.
Returns:
Results: all the results of the searcher execution process.
71class GridCVSearcher(Searcher): 72 """Grid searcher with cross-fold validation. 73 74 Convention: metric returns higher values for better models. 75 76 Parameters: 77 n_jobs (int, optional): number of jobs to run in parallel. 78 refit (bool): refit the model after the tuning. 79 verbose (int): set the searcher verbosity level. 80 error_score (str | float): strategy or value on error. 81 return_train_score (bool): include train scores if True. 82 """ 83 84 KIND: T.Literal["GridCVSearcher"] = "GridCVSearcher" 85 86 n_jobs: int | None = None 87 refit: bool = True 88 verbose: int = 3 89 error_score: str | float = "raise" 90 return_train_score: bool = False 91 92 @T.override 93 def search( 94 self, 95 model: models.Model, 96 metric: metrics.Metric, 97 inputs: schemas.Inputs, 98 targets: schemas.Targets, 99 cv: CrossValidation, 100 ) -> Results: 101 searcher = model_selection.GridSearchCV( 102 estimator=model, 103 scoring=metric.scorer, 104 cv=cv, 105 param_grid=self.param_grid, 106 n_jobs=self.n_jobs, 107 refit=self.refit, 108 verbose=self.verbose, 109 error_score=self.error_score, 110 return_train_score=self.return_train_score, 111 ) 112 searcher.fit(inputs, targets) 113 results = pd.DataFrame(searcher.cv_results_) 114 return results, searcher.best_score_, searcher.best_params_
Grid searcher with cross-fold validation.
Convention: metric returns higher values for better models.
Arguments:
- n_jobs (int, optional): number of jobs to run in parallel.
- refit (bool): refit the model after the tuning.
- verbose (int): set the searcher verbosity level.
- error_score (str | float): strategy or value on error.
- return_train_score (bool): include train scores if True.
@T.override
def
search( self, model: bikes.core.models.Model, metric: bikes.core.metrics.Metric, inputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.InputsSchema], targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], cv: Union[int, Iterator[tuple[numpy.ndarray[Any, numpy.dtype[numpy.int64]], numpy.ndarray[Any, numpy.dtype[numpy.int64]]]], bikes.utils.splitters.Splitter]) -> tuple[typing.Annotated[pandas.core.frame.DataFrame, 'details'], typing.Annotated[float, 'best score'], typing.Annotated[dict[str, typing.Any], 'best params']]:
92 @T.override 93 def search( 94 self, 95 model: models.Model, 96 metric: metrics.Metric, 97 inputs: schemas.Inputs, 98 targets: schemas.Targets, 99 cv: CrossValidation, 100 ) -> Results: 101 searcher = model_selection.GridSearchCV( 102 estimator=model, 103 scoring=metric.scorer, 104 cv=cv, 105 param_grid=self.param_grid, 106 n_jobs=self.n_jobs, 107 refit=self.refit, 108 verbose=self.verbose, 109 error_score=self.error_score, 110 return_train_score=self.return_train_score, 111 ) 112 searcher.fit(inputs, targets) 113 results = pd.DataFrame(searcher.cv_results_) 114 return results, searcher.best_score_, searcher.best_params_
Search the best model for the given inputs and targets.
Arguments:
- model (models.Model): AI/ML model to fine-tune.
- metric (metrics.Metric): main metric to optimize.
- inputs (schemas.Inputs): model inputs for tuning.
- targets (schemas.Targets): model targets for tuning.
- cv (CrossValidation): choice for cross-fold validation.
Returns:
Results: all the results of the searcher execution process.
model_config: ClassVar[pydantic.config.ConfigDict] =
{'strict': True, 'frozen': True, 'extra': 'forbid'}
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Inherited Members
SearcherKind =
<class 'GridCVSearcher'>