Source code for evalml.tuners.tuner

"""Base Tuner class."""
from abc import ABC, abstractmethod

from skopt.space import Categorical, Integer, Real


[docs]class Tuner(ABC): """Base Tuner class. Tuners implement different strategies for sampling from a search space. They're used in EvalML to search the space of pipeline hyperparameters. Args: pipeline_hyperparameter_ranges (dict): a set of hyperparameter ranges corresponding to a pipeline's parameters. random_seed (int): The random state. Defaults to 0. """ def __init__(self, pipeline_hyperparameter_ranges, random_seed=0): self._pipeline_hyperparameter_ranges = pipeline_hyperparameter_ranges self._parameter_names_map = dict() self._search_space_names = [] self._search_space_ranges = [] self.random_seed = random_seed if not isinstance(pipeline_hyperparameter_ranges, dict): raise ValueError( "pipeline_hyperparameter_ranges must be a dict but is of type {}".format( type(pipeline_hyperparameter_ranges), ), ) self._component_names = list(pipeline_hyperparameter_ranges.keys()) for component_name, component_ranges in pipeline_hyperparameter_ranges.items(): if not isinstance(component_ranges, dict): raise ValueError( "pipeline_hyperparameter_ranges has invalid entry for {}: {}".format( component_name, component_ranges, ), ) for parameter_name, parameter_range in component_ranges.items(): if parameter_range is None: raise ValueError( "pipeline_hyperparameter_ranges has invalid dimensions for " + "{} parameter {}: None.".format( component_name, parameter_name, ), ) if not isinstance( parameter_range, (Real, Integer, Categorical, list, tuple), ): continue flat_parameter_name = "{}: {}".format(component_name, parameter_name) self._parameter_names_map[flat_parameter_name] = ( component_name, parameter_name, ) self._search_space_names.append(flat_parameter_name) self._search_space_ranges.append(parameter_range) def _convert_to_flat_parameters(self, pipeline_parameters): """Convert from pipeline parameters to a flat list of values.""" flat_parameter_values = [] for flat_parameter_name in self._search_space_names: component_name, parameter_name = self._parameter_names_map[ flat_parameter_name ] if ( component_name not in pipeline_parameters or parameter_name not in pipeline_parameters[component_name] ): raise TypeError( 'Pipeline parameters missing required field "{}" for component "{}"'.format( parameter_name, component_name, ), ) flat_parameter_values.append( pipeline_parameters[component_name][parameter_name], ) return flat_parameter_values def _convert_to_pipeline_parameters(self, flat_parameters): """Convert from a flat list of values to a dict of pipeline parameters.""" pipeline_parameters = { component_name: dict() for component_name in self._component_names } for flat_parameter_name, parameter_value in zip( self._search_space_names, flat_parameters, ): component_name, parameter_name = self._parameter_names_map[ flat_parameter_name ] pipeline_parameters[component_name][parameter_name] = parameter_value return pipeline_parameters
[docs] def get_starting_parameters(self, hyperparameter_ranges, random_seed=0): """Gets the starting parameters given the pipeline hyperparameter range. Arguments: hyperparameter_ranges (dict): The custom hyperparameter ranges passed in during search. Used to determine the starting parameters. random_seed (int): The random seed to use. Defaults to 0. Returns: dict: The starting parameters, randomly chosen, to initialize a pipeline with. """ starting_parameters = {} for name, param_dict in hyperparameter_ranges.items(): component_parameters = {} for param_name, value in param_dict.items(): if isinstance(value, (Integer, Real)): # get a random value in the space component_parameters[param_name] = value.rvs( random_state=random_seed, )[0] elif isinstance(value, Categorical): # Categorical component_parameters[param_name] = value.rvs( random_state=random_seed, ) elif isinstance(value, (list, tuple)): # list value from our internal hyperparameter_ranges component_parameters[param_name] = value[0] starting_parameters[name] = component_parameters return starting_parameters
[docs] @abstractmethod def add(self, pipeline_parameters, score): """Register a set of hyperparameters with the score obtained from training a pipeline with those hyperparameters. Args: pipeline_parameters (dict): a dict of the parameters used to evaluate a pipeline score (float): the score obtained by evaluating the pipeline with the provided parameters Returns: None """
[docs] @abstractmethod def propose(self): """Returns a suggested set of parameters to train and score a pipeline with, based off the search space dimensions and prior samples. Returns: dict: Proposed pipeline parameters """
[docs] def is_search_space_exhausted(self): """Optional. If possible search space for tuner is finite, this method indicates whether or not all possible parameters have been scored. Returns: bool: Returns true if all possible parameters in a search space has been scored. """ return False