bikes.utils.searchers

Find the best hyperparameters for a model.

View Source

  1"""Find the best hyperparameters for a model."""
  2
  3# %% IMPORTS
  4
  5import abc
  6import typing as T
  7
  8import pandas as pd
  9import pydantic as pdt
 10from sklearn import model_selection
 11
 12from bikes.core import metrics, models, schemas
 13from bikes.utils import splitters
 14
 15# %% TYPES
 16
 17# Grid of model params
 18Grid = dict[models.ParamKey, list[models.ParamValue]]
 19
 20# Results of a model search
 21Results = tuple[
 22    T.Annotated[pd.DataFrame, "details"],
 23    T.Annotated[float, "best score"],
 24    T.Annotated[models.Params, "best params"],
 25]
 26
 27# Cross-validation options for searchers
 28CrossValidation = int | splitters.TrainTestSplits | splitters.Splitter
 29
 30# %% SEARCHERS
 31
 32
 33class Searcher(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
 34    """Base class for a searcher.
 35
 36    Use searcher to fine-tune models.
 37    i.e., to find the best model params.
 38
 39    Parameters:
 40        param_grid (Grid): mapping of param key -> values.
 41    """
 42
 43    KIND: str
 44
 45    param_grid: Grid
 46
 47    @abc.abstractmethod
 48    def search(
 49        self,
 50        model: models.Model,
 51        metric: metrics.Metric,
 52        inputs: schemas.Inputs,
 53        targets: schemas.Targets,
 54        cv: CrossValidation,
 55    ) -> Results:
 56        """Search the best model for the given inputs and targets.
 57
 58        Args:
 59            model (models.Model): AI/ML model to fine-tune.
 60            metric (metrics.Metric): main metric to optimize.
 61            inputs (schemas.Inputs): model inputs for tuning.
 62            targets (schemas.Targets): model targets for tuning.
 63            cv (CrossValidation): choice for cross-fold validation.
 64
 65        Returns:
 66            Results: all the results of the searcher execution process.
 67        """
 68
 69
 70class GridCVSearcher(Searcher):
 71    """Grid searcher with cross-fold validation.
 72
 73    Convention: metric returns higher values for better models.
 74
 75    Parameters:
 76        n_jobs (int, optional): number of jobs to run in parallel.
 77        refit (bool): refit the model after the tuning.
 78        verbose (int): set the searcher verbosity level.
 79        error_score (str | float): strategy or value on error.
 80        return_train_score (bool): include train scores if True.
 81    """
 82
 83    KIND: T.Literal["GridCVSearcher"] = "GridCVSearcher"
 84
 85    n_jobs: int | None = None
 86    refit: bool = True
 87    verbose: int = 3
 88    error_score: str | float = "raise"
 89    return_train_score: bool = False
 90
 91    @T.override
 92    def search(
 93        self,
 94        model: models.Model,
 95        metric: metrics.Metric,
 96        inputs: schemas.Inputs,
 97        targets: schemas.Targets,
 98        cv: CrossValidation,
 99    ) -> Results:
100        searcher = model_selection.GridSearchCV(
101            estimator=model,
102            scoring=metric.scorer,
103            cv=cv,
104            param_grid=self.param_grid,
105            n_jobs=self.n_jobs,
106            refit=self.refit,
107            verbose=self.verbose,
108            error_score=self.error_score,
109            return_train_score=self.return_train_score,
110        )
111        searcher.fit(inputs, targets)
112        results = pd.DataFrame(searcher.cv_results_)
113        return results, searcher.best_score_, searcher.best_params_
114
115
116SearcherKind = GridCVSearcher

Grid = dict[str, list[typing.Any]]

Results = tuple[typing.Annotated[pandas.core.frame.DataFrame, 'details'], typing.Annotated[float, 'best score'], typing.Annotated[dict[str, typing.Any], 'best params']]

CrossValidation = typing.Union[int, typing.Iterator[tuple[numpy.ndarray[typing.Any, numpy.dtype[numpy.int64]], numpy.ndarray[typing.Any, numpy.dtype[numpy.int64]]]], bikes.utils.splitters.Splitter]

class Searcher(abc.ABC, pydantic.main.BaseModel): View Source

34class Searcher(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
35    """Base class for a searcher.
36
37    Use searcher to fine-tune models.
38    i.e., to find the best model params.
39
40    Parameters:
41        param_grid (Grid): mapping of param key -> values.
42    """
43
44    KIND: str
45
46    param_grid: Grid
47
48    @abc.abstractmethod
49    def search(
50        self,
51        model: models.Model,
52        metric: metrics.Metric,
53        inputs: schemas.Inputs,
54        targets: schemas.Targets,
55        cv: CrossValidation,
56    ) -> Results:
57        """Search the best model for the given inputs and targets.
58
59        Args:
60            model (models.Model): AI/ML model to fine-tune.
61            metric (metrics.Metric): main metric to optimize.
62            inputs (schemas.Inputs): model inputs for tuning.
63            targets (schemas.Targets): model targets for tuning.
64            cv (CrossValidation): choice for cross-fold validation.
65
66        Returns:
67            Results: all the results of the searcher execution process.
68        """

Base class for a searcher.

Use searcher to fine-tune models. i.e., to find the best model params.

Arguments:

param_grid (Grid): mapping of param key -> values.

KIND: str

param_grid: dict[str, list[typing.Any]]

@abc.abstractmethod

def search( self, model: bikes.core.models.Model, metric: bikes.core.metrics.Metric, inputs: pandera.typing.pandas.DataFrame[bikes.core.schemas.InputsSchema], targets: pandera.typing.pandas.DataFrame[bikes.core.schemas.TargetsSchema], cv: Union[int, Iterator[tuple[numpy.ndarray[Any, numpy.dtype[numpy.int64]], numpy.ndarray[Any, numpy.dtype[numpy.int64]]]], bikes.utils.splitters.Splitter]) -> tuple[typing.Annotated[pandas.core.frame.DataFrame, 'details'], typing.Annotated[float, 'best score'], typing.Annotated[dict[str, typing.Any], 'best params']]: View Source

48    @abc.abstractmethod
49    def search(
50        self,
51        model: models.Model,
52        metric: metrics.Metric,
53        inputs: schemas.Inputs,
54        targets: schemas.Targets,
55        cv: CrossValidation,
56    ) -> Results:
57        """Search the best model for the given inputs and targets.
58
59        Args:
60            model (models.Model): AI/ML model to fine-tune.
61            metric (metrics.Metric): main metric to optimize.
62            inputs (schemas.Inputs): model inputs for tuning.
63            targets (schemas.Targets): model targets for tuning.
64            cv (CrossValidation): choice for cross-fold validation.
65
66        Returns:
67            Results: all the results of the searcher execution process.
68        """

Search the best model for the given inputs and targets.

Arguments:

model (models.Model): AI/ML model to fine-tune.
metric (metrics.Metric): main metric to optimize.
inputs (schemas.Inputs): model inputs for tuning.
targets (schemas.Targets): model targets for tuning.
cv (CrossValidation): choice for cross-fold validation.

Returns:

Results: all the results of the searcher execution process.

model_config: ClassVar[pydantic.config.ConfigDict] = {'strict': True, 'frozen': True, 'extra': 'forbid'}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class GridCVSearcher(Searcher): View Source

 71class GridCVSearcher(Searcher):
 72    """Grid searcher with cross-fold validation.
 73
 74    Convention: metric returns higher values for better models.
 75
 76    Parameters:
 77        n_jobs (int, optional): number of jobs to run in parallel.
 78        refit (bool): refit the model after the tuning.
 79        verbose (int): set the searcher verbosity level.
 80        error_score (str | float): strategy or value on error.
 81        return_train_score (bool): include train scores if True.
 82    """
 83
 84    KIND: T.Literal["GridCVSearcher"] = "GridCVSearcher"
 85
 86    n_jobs: int | None = None
 87    refit: bool = True
 88    verbose: int = 3
 89    error_score: str | float = "raise"
 90    return_train_score: bool = False
 91
 92    @T.override
 93    def search(
 94        self,
 95        model: models.Model,
 96        metric: metrics.Metric,
 97        inputs: schemas.Inputs,
 98        targets: schemas.Targets,
 99        cv: CrossValidation,
100    ) -> Results:
101        searcher = model_selection.GridSearchCV(
102            estimator=model,
103            scoring=metric.scorer,
104            cv=cv,
105            param_grid=self.param_grid,
106            n_jobs=self.n_jobs,
107            refit=self.refit,
108            verbose=self.verbose,
109            error_score=self.error_score,
110            return_train_score=self.return_train_score,
111        )
112        searcher.fit(inputs, targets)
113        results = pd.DataFrame(searcher.cv_results_)
114        return results, searcher.best_score_, searcher.best_params_

Grid searcher with cross-fold validation.

Convention: metric returns higher values for better models.

Arguments:

n_jobs (int, optional): number of jobs to run in parallel.
refit (bool): refit the model after the tuning.
verbose (int): set the searcher verbosity level.
error_score (str | float): strategy or value on error.
return_train_score (bool): include train scores if True.

KIND: Literal['GridCVSearcher']

n_jobs: int | None

refit: bool

verbose: int

error_score: str | float

return_train_score: bool

@T.override

 92    @T.override
 93    def search(
 94        self,
 95        model: models.Model,
 96        metric: metrics.Metric,
 97        inputs: schemas.Inputs,
 98        targets: schemas.Targets,
 99        cv: CrossValidation,
100    ) -> Results:
101        searcher = model_selection.GridSearchCV(
102            estimator=model,
103            scoring=metric.scorer,
104            cv=cv,
105            param_grid=self.param_grid,
106            n_jobs=self.n_jobs,
107            refit=self.refit,
108            verbose=self.verbose,
109            error_score=self.error_score,
110            return_train_score=self.return_train_score,
111        )
112        searcher.fit(inputs, targets)
113        results = pd.DataFrame(searcher.cv_results_)
114        return results, searcher.best_score_, searcher.best_params_

Search the best model for the given inputs and targets.

Arguments:

model (models.Model): AI/ML model to fine-tune.
metric (metrics.Metric): main metric to optimize.
inputs (schemas.Inputs): model inputs for tuning.
targets (schemas.Targets): model targets for tuning.
cv (CrossValidation): choice for cross-fold validation.

Returns:

Results: all the results of the searcher execution process.

model_config: ClassVar[pydantic.config.ConfigDict] = {'strict': True, 'frozen': True, 'extra': 'forbid'}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

Inherited Members

Searcher: param_grid

SearcherKind = <class 'GridCVSearcher'>