Source code for evalml.pipelines.plot_utils

import warnings

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix as sklearn_confusion_matrix
from sklearn.metrics import roc_curve as sklearn_roc_curve
from sklearn.utils.multiclass import unique_labels


[docs]def roc_curve(y_true, y_pred_proba): """Receiver Operating Characteristic score for binary classification. Arguments: y_true (pd.Series or np.array): true binary labels. y_pred_proba (pd.Series or np.array): predictions from a binary classifier, before thresholding has been applied. Returns: (np.array, np.array, np.array): false positive rates, true positive rates, and threshold values used to produce each pair of true/false positive rates. """ return sklearn_roc_curve(y_true, y_pred_proba)
[docs]def confusion_matrix(y_true, y_predicted): """Confusion matrix for binary and multiclass classification. Arguments: y_true (pd.Series or np.array): true binary labels. y_predicted (pd.Series or np.array): predictions from a binary classifier, before thresholding has been applied. Returns: np.array: confusion matrix """ labels = unique_labels(y_true, y_predicted) conf_mat = sklearn_confusion_matrix(y_true, y_predicted) conf_mat = pd.DataFrame(conf_mat, columns=labels) return conf_mat
[docs]def normalize_confusion_matrix(conf_mat, option='true'): """Normalizes a confusion matrix. Arguments: conf_mat (pd.DataFrame or np.array): confusion matrix to normalize option ({'true', 'pred', 'all'}): Option to normalize over the rows ('true'), columns ('pred') or all ('all') values. Defaults to 'true'. Returns: A normalized version of the input confusion matrix. """ with warnings.catch_warnings(record=True) as w: if option == 'true': conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis] elif option == 'pred': conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=0) elif option == 'all': conf_mat = conf_mat.astype('float') / conf_mat.sum().sum() else: raise ValueError('Invalid value provided for "option": %s'.format(option)) if w and "invalid value encountered in" in str(w[0].message): raise ValueError("Sum of given axis is 0 and normalization is not possible. Please select another option.") return conf_mat