Skip to content

xgbse._meta.XGBSEBootstrapEstimator

Bootstrap meta-estimator for XGBSE models:

  • allows for confidence interval estimation for XGBSEDebiasedBCE and XGBSEStackedWeibull
  • provides variance stabilization for all models, specially for XGBSEKaplanTree

Performs simple bootstrap with sample size equal to training set size.

Source code in xgbse/_meta.py
class XGBSEBootstrapEstimator(BaseEstimator):
    """
    Bootstrap meta-estimator for XGBSE models:

    *  allows for confidence interval estimation for `XGBSEDebiasedBCE` and `XGBSEStackedWeibull`
    *  provides variance stabilization for all models, specially for `XGBSEKaplanTree`

    Performs simple bootstrap with sample size equal to training set size.

    """

    def __init__(self, base_estimator, n_estimators=10, random_state=42):
        """
        Args:
            base_estimator (XGBSEBaseEstimator): Base estimator for bootstrap procedure
            n_estimators (int): Number of estimators to fit in bootstrap procedure
            random_state (int): Random state for resampling function
        """
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.random_state = random_state

    def fit(self, X, y, **kwargs):
        """
        Fit several (base) estimators and store them.

        Args:
            X ([pd.DataFrame, np.array]): Features to be used while fitting
                XGBoost model

            y (structured array(numpy.bool_, numpy.number)): Binary event indicator as first field,
                and time of event or time of censoring as second field.

            **kwargs : Keyword arguments to be passed to .fit() method of base_estimator

        Returns:
            XGBSEBootstrapEstimator: Trained instance of XGBSEBootstrapEstimator

        """

        # initializing list of estimators
        self.estimators_ = []

        # loop for n_estimators
        for i in range(self.n_estimators):
            X_sample, y_sample = resample(X, y, random_state=i + self.random_state)

            trained_model = self.base_estimator.fit(X_sample, y_sample, **kwargs)

            self.estimators_.append(deepcopy(trained_model))

        return self

    def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False):
        """
        Predicts survival as given by the base estimator. A survival function, its upper and lower
        confidence intervals can be returned for each sample of the dataframe X.

        Args:
            X (pd.DataFrame): data frame with samples to generate predictions

            return_ci (Bool): whether to include confidence intervals

            ci_width (Float): width of confidence interval

        Returns:
            ([(pd.DataFrame, np.array, np.array), pd.DataFrame]):
            preds_df: A dataframe of survival probabilities
            for all times (columns), from a time_bins array, for all samples of X
            (rows). If return_interval_probs is True, the interval probabilities are returned
            instead of the cumulative survival probabilities.

            upper_ci: Upper confidence interval for the survival
                probability values

            lower_ci: Lower confidence interval for the survival
                probability values
        """

        preds_list = []

        for estimator in self.estimators_:
            temp_preds = estimator.predict(
                X, return_interval_probs=return_interval_probs
            )
            preds_list.append(temp_preds)

        agg_preds = pd.concat(preds_list)

        preds_df = agg_preds.groupby(level=0).mean()

        if return_ci:
            low_p = 0.5 - ci_width / 2
            high_p = 0.5 + ci_width / 2

            lower_ci = agg_preds.groupby(level=0).quantile(low_p)
            upper_ci = agg_preds.groupby(level=0).quantile(high_p)

            return preds_df, upper_ci, lower_ci

        return preds_df

__init__(self, base_estimator, n_estimators=10, random_state=42) special

Parameters:

Name Type Description Default
base_estimator XGBSEBaseEstimator

Base estimator for bootstrap procedure

required
n_estimators int

Number of estimators to fit in bootstrap procedure

10
random_state int

Random state for resampling function

42
Source code in xgbse/_meta.py
def __init__(self, base_estimator, n_estimators=10, random_state=42):
    """
    Args:
        base_estimator (XGBSEBaseEstimator): Base estimator for bootstrap procedure
        n_estimators (int): Number of estimators to fit in bootstrap procedure
        random_state (int): Random state for resampling function
    """
    self.base_estimator = base_estimator
    self.n_estimators = n_estimators
    self.random_state = random_state

fit(self, X, y, **kwargs)

Fit several (base) estimators and store them.

Parameters:

Name Type Description Default
X [pd.DataFrame, np.array]

Features to be used while fitting XGBoost model

required
y structured array(numpy.bool_, numpy.number

Binary event indicator as first field, and time of event or time of censoring as second field.

required
**kwargs

Keyword arguments to be passed to .fit() method of base_estimator

{}

Returns:

Type Description
XGBSEBootstrapEstimator

Trained instance of XGBSEBootstrapEstimator

Source code in xgbse/_meta.py
def fit(self, X, y, **kwargs):
    """
    Fit several (base) estimators and store them.

    Args:
        X ([pd.DataFrame, np.array]): Features to be used while fitting
            XGBoost model

        y (structured array(numpy.bool_, numpy.number)): Binary event indicator as first field,
            and time of event or time of censoring as second field.

        **kwargs : Keyword arguments to be passed to .fit() method of base_estimator

    Returns:
        XGBSEBootstrapEstimator: Trained instance of XGBSEBootstrapEstimator

    """

    # initializing list of estimators
    self.estimators_ = []

    # loop for n_estimators
    for i in range(self.n_estimators):
        X_sample, y_sample = resample(X, y, random_state=i + self.random_state)

        trained_model = self.base_estimator.fit(X_sample, y_sample, **kwargs)

        self.estimators_.append(deepcopy(trained_model))

    return self

predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False)

Predicts survival as given by the base estimator. A survival function, its upper and lower confidence intervals can be returned for each sample of the dataframe X.

Parameters:

Name Type Description Default
X pd.DataFrame

data frame with samples to generate predictions

required
return_ci Bool

whether to include confidence intervals

False
ci_width Float

width of confidence interval

0.683

Returns:

Type Description
([(pd.DataFrame, np.array, np.array), pd.DataFrame])

preds_df: A dataframe of survival probabilities for all times (columns), from a time_bins array, for all samples of X (rows). If return_interval_probs is True, the interval probabilities are returned instead of the cumulative survival probabilities.

upper_ci: Upper confidence interval for the survival probability values

lower_ci: Lower confidence interval for the survival probability values

Source code in xgbse/_meta.py
def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False):
    """
    Predicts survival as given by the base estimator. A survival function, its upper and lower
    confidence intervals can be returned for each sample of the dataframe X.

    Args:
        X (pd.DataFrame): data frame with samples to generate predictions

        return_ci (Bool): whether to include confidence intervals

        ci_width (Float): width of confidence interval

    Returns:
        ([(pd.DataFrame, np.array, np.array), pd.DataFrame]):
        preds_df: A dataframe of survival probabilities
        for all times (columns), from a time_bins array, for all samples of X
        (rows). If return_interval_probs is True, the interval probabilities are returned
        instead of the cumulative survival probabilities.

        upper_ci: Upper confidence interval for the survival
            probability values

        lower_ci: Lower confidence interval for the survival
            probability values
    """

    preds_list = []

    for estimator in self.estimators_:
        temp_preds = estimator.predict(
            X, return_interval_probs=return_interval_probs
        )
        preds_list.append(temp_preds)

    agg_preds = pd.concat(preds_list)

    preds_df = agg_preds.groupby(level=0).mean()

    if return_ci:
        low_p = 0.5 - ci_width / 2
        high_p = 0.5 + ci_width / 2

        lower_ci = agg_preds.groupby(level=0).quantile(low_p)
        upper_ci = agg_preds.groupby(level=0).quantile(high_p)

        return preds_df, upper_ci, lower_ci

    return preds_df

set_predict_request(self, *, ci_width='$UNCHANGED$', return_ci='$UNCHANGED$', return_interval_probs='$UNCHANGED$')

Request metadata passed to the predict method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

  • True: metadata is requested, and passed to predict if provided. The request is ignored if metadata is not provided.

  • False: metadata is not requested and the meta-estimator will not pass it to predict.

  • None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.

  • str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

.. versionadded:: 1.3

.. note:: This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

ci_width : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED Metadata routing for ci_width parameter in predict.

return_ci : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED Metadata routing for return_ci parameter in predict.

return_interval_probs : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED Metadata routing for return_interval_probs parameter in predict.

Returns

self : object The updated object.

Source code in xgbse/_meta.py
def func(*args, **kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. "
            f"Accepted arguments are: {set(self.keys)}"
        )

    # This makes it possible to use the decorated method as an unbound method,
    # for instance when monkeypatching.
    # https://github.com/scikit-learn/scikit-learn/issues/28632
    if instance is None:
        _instance = args[0]
        args = args[1:]
    else:
        _instance = instance

    # Replicating python's behavior when positional args are given other than
    # `self`, and `self` is only allowed if this method is unbound.
    if args:
        raise TypeError(
            f"set_{self.name}_request() takes 0 positional argument but"
            f" {len(args)} were given"
        )

    requests = _instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    _instance._metadata_request = requests

    return _instance