import numpy as np
import pandas as pd
from scipy.optimize import minimize_scalar
from .objective_base import ObjectiveBase
from evalml.problem_types import ProblemTypes
[docs]class BinaryClassificationObjective(ObjectiveBase):
"""Base class for all binary classification objectives.
problem_type (ProblemTypes): Type of problem this objective is. Set to ProblemTypes.BINARY.
can_optimize_threshold (bool): Determines if threshold used by objective can be optimized or not.
"""
problem_type = ProblemTypes.BINARY
@property
def can_optimize_threshold(cls):
"""Returns a boolean determining if we can optimize the binary classification objective threshold. This will be false for any objective that works directly with predicted probabilities, like log loss and AUC. Otherwise, it will be true."""
return not cls.score_needs_proba
[docs] def optimize_threshold(self, ypred_proba, y_true, X=None):
"""Learn a binary classification threshold which optimizes the current objective.
Arguments:
ypred_proba (list): The classifier's predicted probabilities
y_true (list): The ground truth for the predictions.
X (pd.DataFrame, optional): Any extra columns that are needed from training data.
Returns:
Optimal threshold for this objective
"""
if not self.can_optimize_threshold:
raise RuntimeError("Trying to optimize objective that can't be optimized!")
def cost(threshold):
y_predicted = self.decision_function(ypred_proba=ypred_proba, threshold=threshold, X=X)
cost = self.objective_function(y_true, y_predicted, X=X)
return -cost if self.greater_is_better else cost
optimal = minimize_scalar(cost, method='Golden', options={"maxiter": 100})
return optimal.x
[docs] def decision_function(self, ypred_proba, threshold=0.5, X=None):
"""Apply a learned threshold to predicted probabilities to get predicted classes.
Arguments:
ypred_proba (list): The classifier's predicted probabilities
threshold (float, optional): Threshold used to make a prediction. Defaults to 0.5.
X (pd.DataFrame, optional): Any extra columns that are needed from training data.
Returns:
predictions
"""
if not isinstance(ypred_proba, pd.Series):
ypred_proba = pd.Series(ypred_proba)
return ypred_proba > threshold