xgbse._meta.XGBSEBootstrapEstimator¶

Bootstrap meta-estimator for XGBSE models:

allows for confidence interval estimation for XGBSEDebiasedBCE and XGBSEStackedWeibull
provides variance stabilization for all models, specially for XGBSEKaplanTree

Performs simple bootstrap with sample size equal to training set size.

Source code in xgbse/_meta.py

class XGBSEBootstrapEstimator(BaseEstimator):
    """
    Bootstrap meta-estimator for XGBSE models:

    *  allows for confidence interval estimation for `XGBSEDebiasedBCE` and `XGBSEStackedWeibull`
    *  provides variance stabilization for all models, specially for `XGBSEKaplanTree`

    Performs simple bootstrap with sample size equal to training set size.

    """

    def __init__(self, base_estimator, n_estimators=10, random_state=42):
        """
        Args:
            base_estimator (XGBSEBaseEstimator): Base estimator for bootstrap procedure
            n_estimators (int): Number of estimators to fit in bootstrap procedure
            random_state (int): Random state for resampling function
        """
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.random_state = random_state

    def fit(self, X, y, **kwargs):
        """
        Fit several (base) estimators and store them.

        Args:
            X ([pd.DataFrame, np.array]): Features to be used while fitting
                XGBoost model

            y (structured array(numpy.bool_, numpy.number)): Binary event indicator as first field,
                and time of event or time of censoring as second field.

            **kwargs : Keyword arguments to be passed to .fit() method of base_estimator

        Returns:
            XGBSEBootstrapEstimator: Trained instance of XGBSEBootstrapEstimator

        """

        # initializing list of estimators
        self.estimators_ = []

        # loop for n_estimators
        for i in range(self.n_estimators):
            X_sample, y_sample = resample(X, y, random_state=i + self.random_state)

            trained_model = self.base_estimator.fit(X_sample, y_sample, **kwargs)

            self.estimators_.append(deepcopy(trained_model))

        return self

    def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False):
        """
        Predicts survival as given by the base estimator. A survival function, its upper and lower
        confidence intervals can be returned for each sample of the dataframe X.

        Args:
            X (pd.DataFrame): data frame with samples to generate predictions

            return_ci (Bool): whether to include confidence intervals

            ci_width (Float): width of confidence interval

        Returns:
            ([(pd.DataFrame, np.array, np.array), pd.DataFrame]):
            preds_df: A dataframe of survival probabilities
            for all times (columns), from a time_bins array, for all samples of X
            (rows). If return_interval_probs is True, the interval probabilities are returned
            instead of the cumulative survival probabilities.

            upper_ci: Upper confidence interval for the survival
                probability values

            lower_ci: Lower confidence interval for the survival
                probability values
        """

        preds_list = []

        for estimator in self.estimators_:
            temp_preds = estimator.predict(
                X, return_interval_probs=return_interval_probs
            )
            preds_list.append(temp_preds)

        agg_preds = pd.concat(preds_list)

        preds_df = agg_preds.groupby(level=0).mean()

        if return_ci:
            low_p = 0.5 - ci_width / 2
            high_p = 0.5 + ci_width / 2

            lower_ci = agg_preds.groupby(level=0).quantile(low_p)
            upper_ci = agg_preds.groupby(level=0).quantile(high_p)

            return preds_df, upper_ci, lower_ci

        return preds_df

`init(self, base_estimator, n_estimators=10, random_state=42)` `special` ¶

Parameters:

Name	Type	Description	Default
`base_estimator`	`XGBSEBaseEstimator`	Base estimator for bootstrap procedure	required
`n_estimators`	`int`	Number of estimators to fit in bootstrap procedure	`10`
`random_state`	`int`	Random state for resampling function	`42`

Source code in xgbse/_meta.py

def __init__(self, base_estimator, n_estimators=10, random_state=42):
    """
    Args:
        base_estimator (XGBSEBaseEstimator): Base estimator for bootstrap procedure
        n_estimators (int): Number of estimators to fit in bootstrap procedure
        random_state (int): Random state for resampling function
    """
    self.base_estimator = base_estimator
    self.n_estimators = n_estimators
    self.random_state = random_state

`fit(self, X, y, **kwargs)` ¶

Fit several (base) estimators and store them.

Parameters:

Name	Type	Description	Default
`X`	`[pd.DataFrame, np.array]`	Features to be used while fitting XGBoost model	required
`y`	`structured array(numpy.bool_, numpy.number`	Binary event indicator as first field, and time of event or time of censoring as second field.	required
`**kwargs`		Keyword arguments to be passed to .fit() method of base_estimator	`{}`

Returns:

Type	Description
`XGBSEBootstrapEstimator`	Trained instance of XGBSEBootstrapEstimator

Source code in xgbse/_meta.py

def fit(self, X, y, **kwargs):
    """
    Fit several (base) estimators and store them.

    Args:
        X ([pd.DataFrame, np.array]): Features to be used while fitting
            XGBoost model

        y (structured array(numpy.bool_, numpy.number)): Binary event indicator as first field,
            and time of event or time of censoring as second field.

        **kwargs : Keyword arguments to be passed to .fit() method of base_estimator

    Returns:
        XGBSEBootstrapEstimator: Trained instance of XGBSEBootstrapEstimator

    """

    # initializing list of estimators
    self.estimators_ = []

    # loop for n_estimators
    for i in range(self.n_estimators):
        X_sample, y_sample = resample(X, y, random_state=i + self.random_state)

        trained_model = self.base_estimator.fit(X_sample, y_sample, **kwargs)

        self.estimators_.append(deepcopy(trained_model))

    return self

`predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False)` ¶

Predicts survival as given by the base estimator. A survival function, its upper and lower confidence intervals can be returned for each sample of the dataframe X.

Parameters:

Name	Type	Description	Default
`X`	`pd.DataFrame`	data frame with samples to generate predictions	required
`return_ci`	`Bool`	whether to include confidence intervals	`False`
`ci_width`	`Float`	width of confidence interval	`0.683`

Returns:

Type Description

([(pd.DataFrame, np.array, np.array), pd.DataFrame])

preds_df: A dataframe of survival probabilities for all times (columns), from a time_bins array, for all samples of X (rows). If return_interval_probs is True, the interval probabilities are returned instead of the cumulative survival probabilities.

upper_ci: Upper confidence interval for the survival probability values

lower_ci: Lower confidence interval for the survival probability values

Source code in xgbse/_meta.py

def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False):
    """
    Predicts survival as given by the base estimator. A survival function, its upper and lower
    confidence intervals can be returned for each sample of the dataframe X.

    Args:
        X (pd.DataFrame): data frame with samples to generate predictions

        return_ci (Bool): whether to include confidence intervals

        ci_width (Float): width of confidence interval

    Returns:
        ([(pd.DataFrame, np.array, np.array), pd.DataFrame]):
        preds_df: A dataframe of survival probabilities
        for all times (columns), from a time_bins array, for all samples of X
        (rows). If return_interval_probs is True, the interval probabilities are returned
        instead of the cumulative survival probabilities.

        upper_ci: Upper confidence interval for the survival
            probability values

        lower_ci: Lower confidence interval for the survival
            probability values
    """

    preds_list = []

    for estimator in self.estimators_:
        temp_preds = estimator.predict(
            X, return_interval_probs=return_interval_probs
        )
        preds_list.append(temp_preds)

    agg_preds = pd.concat(preds_list)

    preds_df = agg_preds.groupby(level=0).mean()

    if return_ci:
        low_p = 0.5 - ci_width / 2
        high_p = 0.5 + ci_width / 2

        lower_ci = agg_preds.groupby(level=0).quantile(low_p)
        upper_ci = agg_preds.groupby(level=0).quantile(high_p)

        return preds_df, upper_ci, lower_ci

    return preds_df

`set_predict_request(self, *, ci_width='$UNCHANGED$', return_ci='$UNCHANGED$', return_interval_probs='$UNCHANGED$')` ¶

Request metadata passed to the predict method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to predict if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to predict.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

.. versionadded:: 1.3

.. note:: This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters¶

ci_width : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED Metadata routing for ci_width parameter in predict.

return_ci : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED Metadata routing for return_ci parameter in predict.

return_interval_probs : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED Metadata routing for return_interval_probs parameter in predict.

Returns¶

self : object The updated object.

Source code in xgbse/_meta.py

def func(*args, **kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. "
            f"Accepted arguments are: {set(self.keys)}"
        )

    # This makes it possible to use the decorated method as an unbound method,
    # for instance when monkeypatching.
    # https://github.com/scikit-learn/scikit-learn/issues/28632
    if instance is None:
        _instance = args[0]
        args = args[1:]
    else:
        _instance = instance

    # Replicating python's behavior when positional args are given other than
    # `self`, and `self` is only allowed if this method is unbound.
    if args:
        raise TypeError(
            f"set_{self.name}_request() takes 0 positional argument but"
            f" {len(args)} were given"
        )

    requests = _instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    _instance._metadata_request = requests

    return _instance

xgbse._meta.XGBSEBootstrapEstimator¶

__init__(self, base_estimator, n_estimators=10, random_state=42) special ¶

fit(self, X, y, **kwargs) ¶

predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False) ¶

set_predict_request(self, *, ci_width='$UNCHANGED$', return_ci='$UNCHANGED$', return_interval_probs='$UNCHANGED$') ¶

Parameters¶

Returns¶

`init(self, base_estimator, n_estimators=10, random_state=42)` `special` ¶

`fit(self, X, y, **kwargs)` ¶

`predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False)` ¶

`set_predict_request(self, *, ci_width='$UNCHANGED$', return_ci='$UNCHANGED$', return_interval_probs='$UNCHANGED$')` ¶