Source code for evalml.pipelines.classification.xgboost

import numpy as np
from skopt.space import Integer, Real

from evalml.model_types import ModelTypes
from evalml.pipelines import PipelineBase
from evalml.pipelines.components import (
    OneHotEncoder,
    RFClassifierSelectFromModel,
    SimpleImputer,
    XGBoostClassifier
)
from evalml.problem_types import ProblemTypes


[docs]class XGBoostPipeline(PipelineBase): """XGBoost Pipeline for both binary and multiclass classification""" name = "XGBoost Classifier w/ One Hot Encoder + Simple Imputer + RF Classifier Select From Model" model_type = ModelTypes.XGBOOST problem_types = [ProblemTypes.BINARY, ProblemTypes.MULTICLASS] hyperparameters = { "eta": Real(0, 1), "min_child_weight": Real(1, 10), "max_depth": Integer(1, 20), "impute_strategy": ["mean", "median", "most_frequent"], "percent_features": Real(.01, 1) }
[docs] def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy, percent_features, number_features, n_estimators=10, n_jobs=-1, random_state=0): imputer = SimpleImputer(impute_strategy=impute_strategy) enc = OneHotEncoder() feature_selection = RFClassifierSelectFromModel(n_estimators=n_estimators, max_depth=max_depth, number_features=number_features, percent_features=percent_features, threshold=-np.inf, n_jobs=n_jobs, random_state=random_state) estimator = XGBoostClassifier(random_state=random_state, eta=eta, max_depth=max_depth, min_child_weight=min_child_weight) super().__init__(objective=objective, component_list=[enc, imputer, feature_selection, estimator], n_jobs=n_jobs, random_state=random_state)