Source code for evalml.automl.progress

"""Progress abstraction holding stopping criteria and progress information."""

import logging
import time

from evalml.utils.logger import get_logger


[docs]class Progress:
    """Progress object holding stopping criteria and progress information.

    Args:
        max_time (int): Maximum time to search for pipelines.
        max_iterations (int): Maximum number of iterations to search.
        max_batches (int): The maximum number of batches of pipelines to search. Parameters max_time, and
            max_iterations have precedence over stopping the search.
        patience (int): Number of iterations without improvement to stop search early.
        tolerance (float): Minimum percentage difference to qualify as score improvement for early stopping.
        automl_algorithm (str): The automl algorithm to use. Used to calculate iterations if max_batches is selected as stopping criteria.
        objective (str, ObjectiveBase): The objective used in search.
        verbose (boolean): Whether or not to log out stopping information.
    """

    def __init__(
        self,
        max_time=None,
        max_batches=None,
        max_iterations=None,
        patience=None,
        tolerance=None,
        automl_algorithm=None,
        objective=None,
        verbose=False,
    ):
        self.max_time = max_time
        self.current_time = None
        self.start_time = None
        self.max_batches = max_batches
        self.current_batch = 0
        self.max_iterations = max_iterations
        self.current_iterations = 0
        self.patience = patience
        self.tolerance = tolerance
        self.automl_algorithm = automl_algorithm
        self.objective = objective
        self._best_score = None
        self._without_improvement = 0
        self._last_id = 0

        if verbose:
            self.logger = get_logger(f"{__name__}.verbose")
        else:
            self.logger = logging.getLogger(__name__)

[docs]    def start_timing(self):
        """Sets start time to current time."""
        self.start_time = time.time()

[docs]    def elapsed(self):
        """Return time elapsed using the start time and current time."""
        return self.current_time - self.start_time

[docs]    def should_continue(self, results, interrupted=False, mid_batch=False):
        """Given AutoML Results, return whether or not the search should continue.

        Args:
            results (dict): AutoMLSearch results.
            interrupted (bool): whether AutoMLSearch was given an keyboard interrupt. Defaults to False.
            mid_batch (bool): whether this method was called while in the middle of a batch or not. Defaults to False.

        Returns:
            bool: True if search should continue, False otherwise.
        """
        if interrupted:
            return False
        # update and check max_time, max_iterations, and max_batches
        self.current_time = time.time()
        self.current_iterations = len(results["pipeline_results"])
        self.current_batch = self.automl_algorithm.batch_number

        if self.max_time and self.elapsed() >= self.max_time:
            return False
        elif self.max_iterations and self.current_iterations >= self.max_iterations:
            return False
        elif (
            self.max_batches
            and self.current_batch >= self.max_batches
            and not mid_batch
        ):
            return False

        # check for early stopping
        if self.patience is not None and self.tolerance is not None:
            last_id = results["search_order"][-1]
            curr_score = results["pipeline_results"][last_id]["mean_cv_score"]
            if self._best_score is None:
                self._best_score = curr_score
                return True
            elif last_id > self._last_id:
                self._last_id = last_id
                score_improved = (
                    curr_score > self._best_score
                    if self.objective.greater_is_better
                    else curr_score < self._best_score
                )
                significant_change = (
                    abs((curr_score - self._best_score) / self._best_score)
                    > self.tolerance
                )
                if score_improved and significant_change:
                    self._best_score = curr_score
                    self._without_improvement = 0
                else:
                    self._without_improvement += 1
                if self._without_improvement >= self.patience:
                    self.logger.info(
                        "\n\n{} iterations without improvement. Stopping search early...".format(
                            self.patience,
                        ),
                    )
                    return False
        return True

[docs]    def return_progress(self):
        """Return information about current and end state of each stopping criteria in order of priority.

        Returns:
            List[Dict[str, unit]]: list of dictionaries containing information of each stopping criteria.
        """
        progress = []
        if self.max_time:
            progress.append(
                {
                    "stopping_criteria": "max_time",
                    "current_state": self.elapsed(),
                    "end_state": self.max_time,
                    "unit": "seconds",
                },
            )
        if self.max_iterations or self.max_batches:
            max_iterations = (
                self.max_iterations
                if self.max_iterations
                else sum(
                    [
                        self.automl_algorithm.num_pipelines_per_batch(n)
                        for n in range(self.max_batches)
                    ],
                )
            )
            progress.append(
                {
                    "stopping_criteria": "max_iterations",
                    "current_state": self.current_iterations,
                    "end_state": max_iterations,
                    "unit": "iterations",
                },
            )
        if self.max_batches:
            progress.append(
                {
                    "stopping_criteria": "max_batches",
                    "current_state": self.current_batch,
                    "end_state": self.max_batches,
                    "unit": "batches",
                },
            )
        return progress