Source code for evalml.utils.gen_utils

import importlib
from collections import namedtuple

import numpy as np
from sklearn.utils import check_random_state


[docs]def import_or_raise(library, error_msg=None): """Attempts to import the requested library by name. If the import fails, raises an ImportError. Arguments: library (str): the name of the library error_msg (str): error message to return if the import fails """ try: return importlib.import_module(library) except ImportError: if error_msg is None: error_msg = "" msg = (f"Missing optional dependency '{library}'. Please use pip to install {library}. {error_msg}") raise ImportError(msg) except Exception as ex: msg = (f"An exception occurred while trying to import `{library}`: {str(ex)}") raise Exception(msg)
[docs]def convert_to_seconds(input_str): """Converts a string describing a length of time to its length in seconds.""" hours = {'h', 'hr', 'hour', 'hours'} minutes = {'m', 'min', 'minute', 'minutes'} seconds = {'s', 'sec', 'second', 'seconds'} value, unit = input_str.split() if unit[-1] == 's' and len(unit) != 1: unit = unit[:-1] if unit in seconds: return float(value) elif unit in minutes: return float(value) * 60 elif unit in hours: return float(value) * 3600 else: msg = "Invalid unit. Units must be hours, mins, or seconds. Received '{}'".format(unit) raise AssertionError(msg)
# specifies the min and max values a seed to np.random.RandomState is allowed to take. # these limits were chosen to fit in the numpy.int32 datatype to avoid issues with 32-bit systems # see https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.random.RandomState.html SEED_BOUNDS = namedtuple('SEED_BOUNDS', ('min_bound', 'max_bound'))(0, 2**31 - 1)
[docs]def get_random_state(seed): """Generates a numpy.random.RandomState instance using seed. Arguments: seed (None, int, np.random.RandomState object): seed to use to generate numpy.random.RandomState. Must be between SEED_BOUNDS.min_bound and SEED_BOUNDS.max_bound, inclusive. Otherwise, an exception will be thrown. """ if isinstance(seed, (int, np.integer)) and (seed < SEED_BOUNDS.min_bound or SEED_BOUNDS.max_bound < seed): raise ValueError('Seed "{}" is not in the range [{}, {}], inclusive'.format(seed, SEED_BOUNDS.min_bound, SEED_BOUNDS.max_bound)) return check_random_state(seed)
[docs]def get_random_seed(random_state, min_bound=SEED_BOUNDS.min_bound, max_bound=SEED_BOUNDS.max_bound): """Given a numpy.random.RandomState object, generate an int representing a seed value for another random number generator. Or, if given an int, return that int. To protect against invalid input to a particular library's random number generator, if an int value is provided, and it is outside the bounds "[min_bound, max_bound)", the value will be projected into the range between the min_bound (inclusive) and max_bound (exclusive) using modular arithmetic. Arguments: random_state (int, numpy.random.RandomState): random state min_bound (None, int): if not default of None, will be min bound when generating seed (inclusive). Must be less than max_bound. max_bound (None, int): if not default of None, will be max bound when generating seed (exclusive). Must be greater than min_bound. Returns: int: seed for random number generator """ if not min_bound < max_bound: raise ValueError("Provided min_bound {} is not less than max_bound {}".format(min_bound, max_bound)) if isinstance(random_state, np.random.RandomState): return random_state.randint(min_bound, max_bound) if random_state < min_bound or random_state >= max_bound: return ((random_state - min_bound) % (max_bound - min_bound)) + min_bound return random_state
class classproperty: """Allows function to be accessed as a class level property. Example: class LogisticRegressionBinaryPipeline: component_graph = ['Simple Imputer', 'Logistic Regression Classifier'] @classproperty def summary(cls): summary = "" for component in cls.component_graph: component = handle_component_class(component) summary += component.name + " + " return summary assert LogisticRegressionBinaryPipeline.summary == "Simple Imputer + Logistic Regression Classifier + " assert LogisticRegressionBinaryPipeline().summary == "Simple Imputer + Logistic Regression Classifier + " """ def __init__(self, func): self.func = func def __get__(self, _, klass): return self.func(klass)