xgbse._meta.XGBSEBootstrapEstimator¶
Bootstrap meta-estimator for XGBSE models:
- allows for confidence interval estimation for
XGBSEDebiasedBCE
andXGBSEStackedWeibull
- provides variance stabilization for all models, specially for
XGBSEKaplanTree
Performs simple bootstrap with sample size equal to training set size.
Source code in xgbse/_meta.py
class XGBSEBootstrapEstimator(BaseEstimator):
"""
Bootstrap meta-estimator for XGBSE models:
* allows for confidence interval estimation for `XGBSEDebiasedBCE` and `XGBSEStackedWeibull`
* provides variance stabilization for all models, specially for `XGBSEKaplanTree`
Performs simple bootstrap with sample size equal to training set size.
"""
def __init__(self, base_estimator, n_estimators=10, random_state=42):
"""
Args:
base_estimator (XGBSEBaseEstimator): Base estimator for bootstrap procedure
n_estimators (int): Number of estimators to fit in bootstrap procedure
random_state (int): Random state for resampling function
"""
self.base_estimator = base_estimator
self.n_estimators = n_estimators
self.random_state = random_state
def fit(self, X, y, **kwargs):
"""
Fit several (base) estimators and store them.
Args:
X ([pd.DataFrame, np.array]): Features to be used while fitting
XGBoost model
y (structured array(numpy.bool_, numpy.number)): Binary event indicator as first field,
and time of event or time of censoring as second field.
**kwargs : Keyword arguments to be passed to .fit() method of base_estimator
Returns:
XGBSEBootstrapEstimator: Trained instance of XGBSEBootstrapEstimator
"""
# initializing list of estimators
self.estimators_ = []
# loop for n_estimators
for i in range(self.n_estimators):
X_sample, y_sample = resample(X, y, random_state=i + self.random_state)
trained_model = self.base_estimator.fit(X_sample, y_sample, **kwargs)
self.estimators_.append(deepcopy(trained_model))
return self
def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False):
"""
Predicts survival as given by the base estimator. A survival function, its upper and lower
confidence intervals can be returned for each sample of the dataframe X.
Args:
X (pd.DataFrame): data frame with samples to generate predictions
return_ci (Bool): whether to include confidence intervals
ci_width (Float): width of confidence interval
Returns:
([(pd.DataFrame, np.array, np.array), pd.DataFrame]):
preds_df: A dataframe of survival probabilities
for all times (columns), from a time_bins array, for all samples of X
(rows). If return_interval_probs is True, the interval probabilities are returned
instead of the cumulative survival probabilities.
upper_ci: Upper confidence interval for the survival
probability values
lower_ci: Lower confidence interval for the survival
probability values
"""
preds_list = []
for estimator in self.estimators_:
temp_preds = estimator.predict(
X, return_interval_probs=return_interval_probs
)
preds_list.append(temp_preds)
agg_preds = pd.concat(preds_list)
preds_df = agg_preds.groupby(level=0).mean()
if return_ci:
low_p = 0.5 - ci_width / 2
high_p = 0.5 + ci_width / 2
lower_ci = agg_preds.groupby(level=0).quantile(low_p)
upper_ci = agg_preds.groupby(level=0).quantile(high_p)
return preds_df, upper_ci, lower_ci
return preds_df
__init__(self, base_estimator, n_estimators=10, random_state=42)
special
¶
Parameters:
Name | Type | Description | Default |
---|---|---|---|
base_estimator |
XGBSEBaseEstimator |
Base estimator for bootstrap procedure |
required |
n_estimators |
int |
Number of estimators to fit in bootstrap procedure |
10 |
random_state |
int |
Random state for resampling function |
42 |
Source code in xgbse/_meta.py
def __init__(self, base_estimator, n_estimators=10, random_state=42):
"""
Args:
base_estimator (XGBSEBaseEstimator): Base estimator for bootstrap procedure
n_estimators (int): Number of estimators to fit in bootstrap procedure
random_state (int): Random state for resampling function
"""
self.base_estimator = base_estimator
self.n_estimators = n_estimators
self.random_state = random_state
fit(self, X, y, **kwargs)
¶
Fit several (base) estimators and store them.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
[pd.DataFrame, np.array] |
Features to be used while fitting XGBoost model |
required |
y |
structured array(numpy.bool_, numpy.number |
Binary event indicator as first field, and time of event or time of censoring as second field. |
required |
**kwargs |
Keyword arguments to be passed to .fit() method of base_estimator |
{} |
Returns:
Type | Description |
---|---|
XGBSEBootstrapEstimator |
Trained instance of XGBSEBootstrapEstimator |
Source code in xgbse/_meta.py
def fit(self, X, y, **kwargs):
"""
Fit several (base) estimators and store them.
Args:
X ([pd.DataFrame, np.array]): Features to be used while fitting
XGBoost model
y (structured array(numpy.bool_, numpy.number)): Binary event indicator as first field,
and time of event or time of censoring as second field.
**kwargs : Keyword arguments to be passed to .fit() method of base_estimator
Returns:
XGBSEBootstrapEstimator: Trained instance of XGBSEBootstrapEstimator
"""
# initializing list of estimators
self.estimators_ = []
# loop for n_estimators
for i in range(self.n_estimators):
X_sample, y_sample = resample(X, y, random_state=i + self.random_state)
trained_model = self.base_estimator.fit(X_sample, y_sample, **kwargs)
self.estimators_.append(deepcopy(trained_model))
return self
predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False)
¶
Predicts survival as given by the base estimator. A survival function, its upper and lower confidence intervals can be returned for each sample of the dataframe X.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
pd.DataFrame |
data frame with samples to generate predictions |
required |
return_ci |
Bool |
whether to include confidence intervals |
False |
ci_width |
Float |
width of confidence interval |
0.683 |
Returns:
Type | Description |
---|---|
([(pd.DataFrame, np.array, np.array), pd.DataFrame]) |
preds_df: A dataframe of survival probabilities for all times (columns), from a time_bins array, for all samples of X (rows). If return_interval_probs is True, the interval probabilities are returned instead of the cumulative survival probabilities. upper_ci: Upper confidence interval for the survival probability values lower_ci: Lower confidence interval for the survival probability values |
Source code in xgbse/_meta.py
def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False):
"""
Predicts survival as given by the base estimator. A survival function, its upper and lower
confidence intervals can be returned for each sample of the dataframe X.
Args:
X (pd.DataFrame): data frame with samples to generate predictions
return_ci (Bool): whether to include confidence intervals
ci_width (Float): width of confidence interval
Returns:
([(pd.DataFrame, np.array, np.array), pd.DataFrame]):
preds_df: A dataframe of survival probabilities
for all times (columns), from a time_bins array, for all samples of X
(rows). If return_interval_probs is True, the interval probabilities are returned
instead of the cumulative survival probabilities.
upper_ci: Upper confidence interval for the survival
probability values
lower_ci: Lower confidence interval for the survival
probability values
"""
preds_list = []
for estimator in self.estimators_:
temp_preds = estimator.predict(
X, return_interval_probs=return_interval_probs
)
preds_list.append(temp_preds)
agg_preds = pd.concat(preds_list)
preds_df = agg_preds.groupby(level=0).mean()
if return_ci:
low_p = 0.5 - ci_width / 2
high_p = 0.5 + ci_width / 2
lower_ci = agg_preds.groupby(level=0).quantile(low_p)
upper_ci = agg_preds.groupby(level=0).quantile(high_p)
return preds_df, upper_ci, lower_ci
return preds_df
set_predict_request(self, *, ci_width='$UNCHANGED$', return_ci='$UNCHANGED$', return_interval_probs='$UNCHANGED$')
¶
Request metadata passed to the predict
method.
Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
).
Please see :ref:User Guide <metadata_routing>
on how the routing
mechanism works.
The options for each parameter are:
-
True
: metadata is requested, and passed topredict
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it topredict
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (sklearn.utils.metadata_routing.UNCHANGED
) retains the
existing request. This allows you to change the request for some
parameters and not others.
.. versionadded:: 1.3
.. note::
This method is only relevant if this estimator is used as a
sub-estimator of a meta-estimator, e.g. used inside a
:class:~sklearn.pipeline.Pipeline
. Otherwise it has no effect.
Parameters¶
ci_width : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED
Metadata routing for ci_width
parameter in predict
.
return_ci : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED
Metadata routing for return_ci
parameter in predict
.
return_interval_probs : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED
Metadata routing for return_interval_probs
parameter in predict
.
Returns¶
self : object The updated object.
Source code in xgbse/_meta.py
def func(*args, **kw):
"""Updates the request for provided parameters
This docstring is overwritten below.
See REQUESTER_DOC for expected functionality
"""
if not _routing_enabled():
raise RuntimeError(
"This method is only available when metadata routing is enabled."
" You can enable it using"
" sklearn.set_config(enable_metadata_routing=True)."
)
if self.validate_keys and (set(kw) - set(self.keys)):
raise TypeError(
f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. "
f"Accepted arguments are: {set(self.keys)}"
)
# This makes it possible to use the decorated method as an unbound method,
# for instance when monkeypatching.
# https://github.com/scikit-learn/scikit-learn/issues/28632
if instance is None:
_instance = args[0]
args = args[1:]
else:
_instance = instance
# Replicating python's behavior when positional args are given other than
# `self`, and `self` is only allowed if this method is unbound.
if args:
raise TypeError(
f"set_{self.name}_request() takes 0 positional argument but"
f" {len(args)} were given"
)
requests = _instance._get_metadata_request()
method_metadata_request = getattr(requests, self.name)
for prop, alias in kw.items():
if alias is not UNCHANGED:
method_metadata_request.add_request(param=prop, alias=alias)
_instance._metadata_request = requests
return _instance