from copy import deepcopy
import numpy as np
from ...base import BudgetManager
from ...utils import check_random_state, check_scalar
[docs]class EstimatedBudgetZliobaite(BudgetManager):
"""EstimatedBudgetZliobaite
Budget manager which checks, whether the specified budget has been
exhausted already. If not, an instance is queried, when the utility is
higher than the specified budget.
This budget manager calculates the estimated budget [1] spent in the last
w steps and compares that to the budget. If the ratio is smaller
than the specified budget, i.e., budget - u_t / w > 0, the budget
manager samples an instance when its utility is higher than the budget.
u is the estimate of how many true lables were queried within the last
w steps. The incremental funktion, u_t = u_t-1 * (w-1) / w + labeling_t,
is used to calculate u at time t.
Parameters
----------
budget : float, optional (default=None)
Specifies the ratio of instances which are allowed to be queried, with
0 <= budget <= 1. See Also :class:`BudgetManager`.
w : int, optional (default=100)
Specifies the size of the memory window. Controlles the budget in the
last w steps taken. Default = 100
References
----------
[1] Žliobaitė, I., Bifet, A., Pfahringer, B., & Holmes, G. (2014). Active
Learning With Drifting Streaming Data. IEEE Transactions on Neural
Networks and Learning Systems, 25(1), 27-39.
"""
def __init__(self, budget=None, w=100):
super().__init__(budget)
self.w = w
[docs] def update(self, candidates, queried_indices):
"""Updates the budget manager.
Parameters
----------
candidates : {array-like, sparse matrix} of shape
(n_samples, n_features)
The instances which could be queried. Sparse matrices are accepted
only if they are supported by the base query strategy.
queried : array-like of shape (n_samples,)
Indicates which instances from candidates have been queried.
Returns
-------
self : EstimatedBudgetZliobaite
The EstimatedBudgetZliobaite returns itself, after it is updated.
"""
queried = np.zeros(len(candidates))
queried[queried_indices] = 1
self._validate_data(np.array([]))
# update u_t for queried candidates
for s in queried:
self.u_t_ = self.u_t_ * ((self.w - 1) / self.w) + s
return self
def _validate_data(self, utilities):
"""Validate input data.
Parameters
----------
utilities: ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy.
Returns
-------
utilities : ndarray of shape (n_samples,)
Checked utilities.
"""
utilities = super()._validate_data(utilities)
# check if calculation of estimate bought/true lables has begun
if not hasattr(self, "u_t_"):
self.u_t_ = 0
return utilities
[docs]class FixedUncertaintyBudgetManager(EstimatedBudgetZliobaite):
"""FixedUncertaintyBudgetManager
Budget manager which is optimized for FixedUncertainty and checks,
whether the specified budget has been exhausted already. If not, an
instance is queried, when the utility is higher than the specified budget
and the probability of the most likely class exceeds a threshold
calculated based on the budget and the number of classes.
See also :class:`.EstimatedBudgetZliobaite`
Parameters
----------
budget : float, optional (default=None)
Specifies the ratio of instances which are allowed to be queried, with
0 <= budget <= 1. See Also :class:`BudgetManager`.
w : int, optional (default=100)
Specifies the size of the memory window. Controlles the budget in the
last w steps taken. Default = 100
num_classes : int, optional (default=2)
Specifies the number of classes. Default = 2
"""
def __init__(self, budget=None, w=100, num_classes=2):
super().__init__(budget, w)
self.num_classes = num_classes
[docs] def query_by_utility(self, utilities):
"""Ask the budget manager which utilities are sufficient to query the
corresponding instance.
Parameters
----------
utilities : ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy, which are used to determine whether sampling an instance
is worth it given the budgeting constraint.
return_utilities : bool, optional
If true, also return whether there was budget left for each
assessed utility. The default is False.
Returns
-------
queried_indices : ndarray of shape (n_queried_instances,)
The indices of instances represented by utilities which should be
queried, with 0 <= n_queried_instances <= n_samples.
"""
utilities = self._validate_data(utilities)
confidence = 1 - utilities
# intialize return parameters
queried_indices = []
budget_left = []
# calculate theta with num_classes
theta = 1 / self.num_classes + self.budget_ * (
1 - 1 / self.num_classes
)
# keep the internal state to reset it later if simulate is true
tmp_u_t = self.u_t_
samples = np.array(confidence) <= theta
# check for each sample separately if budget is left and the utility is
# high enough
for i, d in enumerate(samples):
budget_left.append(tmp_u_t / self.w < self.budget_)
if not budget_left[-1]:
d = False
# u_t = u_t-1 * (w-1)/w + labeling_t
tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + d
# get the indices instances that should be queried
if d:
queried_indices.append(i)
return queried_indices
[docs] def update(self, candidates, queried_indices):
"""Updates the budget manager.
Parameters
----------
candidates : {array-like, sparse matrix} of shape
(n_samples, n_features)
The instances which could be queried. Sparse matrices are accepted
only if they are supported by the base query strategy.
queried_indices : array-like of shape (n_samples,)
Indicates which instances from candidates have been queried.
Returns
-------
self : FixedUncertaintyBudgetManager
The FixedUncertaintyBudget returns itself, after it is updated.
"""
super().update(candidates, queried_indices)
return self
def _validate_data(self, utilities):
"""Validate input data.
Parameters
----------
utilities: ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy.
Returns
-------
utilities : ndarray of shape (n_samples,)
Checked utilities.
"""
utilities = super()._validate_data(utilities)
check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
check_scalar(
self.num_classes,
"num_classes",
int,
min_val=0,
min_inclusive=False,
)
return utilities
[docs]class VariableUncertaintyBudgetManager(EstimatedBudgetZliobaite):
"""VariableUncertaintyBudgetManager
Budget manager which checks, whether the specified budget has been
exhausted already. If not, an instance is queried, when the utility is
higher than the specified budget and when the probability of
the most likely class exceeds a time-dependent threshold calculated based
on the budget, the number of classes and the number of observed and
acquired samples.
This budget manager calculates the estimated budget spent in the last
w steps and compares that to the budget. If the ratio is smaller
than the specified budget, i.e.,
budget - u_t / w > 0 , the budget
manager samples an instance when its utility is higher than the budget.
u is the estimate of how many true lables were queried within the last
w steps. The recursive funktion,
u_t = u_t-1 * (w-1) / w + labeling_t , is used to calculate u at time t.
See also :class:`.EstimatedBudgetZliobaite`
Parameters
----------
budget : float, optional (default=None)
Specifies the ratio of instances which are allowed to be queried, with
0 <= budget <= 1. See Also :class:`BudgetManager`.
w : int, optional (default=100)
Specifies the size of the memory window. Controlles the budget in the
last w steps taken. Default = 100
theta : float, optional (default=1.0)
Specifies the starting threshold in wich instances are purchased. This
value of theta will recalculated after each instance. Default = 1
s : float, optional (default=0.1)
Specifies the value in wich theta is decresed or increased based on the
purchase of the given label. Default = 0.01
"""
def __init__(self, budget=None, w=100, theta=1.0, s=0.01):
super().__init__(budget, w)
self.theta = theta
self.s = s
[docs] def query_by_utility(self, utilities):
"""Ask the budget manager which utilities are sufficient to query the
corresponding instance.
Parameters
----------
utilities : ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy, which are used to determine whether sampling an instance
is worth it given the budgeting constraint.
return_utilities : bool, optional
If true, also return whether there was budget left for each
assessed utility. The default is False.
Returns
-------
queried_indices : ndarray of shape (n_queried_instances,)
The indices of instances represented by utilities which should be
queried, with 0 <= n_queried_instances <= n_samples.
"""
utilities = self._validate_data(utilities)
confidence = 1 - utilities
# intialize return parameters
queried_indices = []
budget_left = []
# keep the internal state to reset it later if simulate is true
tmp_u_t = self.u_t_
tmp_theta = self.theta_
# get confidence
for i, u in enumerate(confidence):
budget_left.append(self.budget_ > tmp_u_t / self.w)
if not budget_left[-1]:
sample = False
else:
sample = u < tmp_theta
# get the indices instances that should be queried
if sample:
tmp_theta *= 1 - self.s
queried_indices.append(i)
else:
tmp_theta *= 1 + self.s
# u_t = u_t-1 * (w-1)/w + labeling_t
tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample
return queried_indices
[docs] def update(self, candidates, queried_indices):
"""Updates the budget manager.
Parameters
----------
candidates : {array-like, sparse matrix} of shape
(n_samples, n_features)
The instances which could be queried. Sparse matrices are accepted
only if they are supported by the base query strategy.
queried_indices : array-like of shape (n_samples,)
Indicates which instances from candidates have been queried.
Returns
-------
self : VariableUncertaintyBudgetManager
The VariableUncertaintyBudget returns itself, after it is updated.
"""
self._validate_data(np.array([]))
queried = np.zeros(len(candidates))
queried[queried_indices] = 1
for i, s in enumerate(queried):
if self.budget_ > self.u_t_ / self.w:
if s:
self.theta_ *= 1 - self.s
else:
self.theta_ *= 1 + self.s
super().update(candidates, queried_indices)
return self
def _validate_data(self, utilities):
"""Validate input data.
Parameters
----------
utilities: ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy.
Returns
-------
utilities : ndarray of shape (n_samples,)
Checked utilities.
"""
utilities = super()._validate_data(utilities)
# Check w
check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
# Check theta
self._validate_theta()
check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
# Chack s
check_scalar(
self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
)
return utilities
def _validate_theta(self):
"""Validate if theta is set as a float."""
check_scalar(self.theta, "theta", float)
# check if theta exists
if not hasattr(self, "theta_"):
self.theta_ = self.theta
[docs]class RandomVariableUncertaintyBudgetManager(EstimatedBudgetZliobaite):
"""RandomVariableUncertaintyBudgetManager
Budget manager which checks, whether the specified budget has been
exhausted already. If not, an instance is queried, when the utility is
higher than the specified budget and when the probability of
the most likely class exceeds a time-dependent threshold calculated based
on the budget, the number of classes and the number of observed and
acquired samples.
This budget manager calculates the estimated budget spent in the last
w steps and compares that to the budget. If the ratio is smaller
than the specified budget, i.e.,
budget - u_t / w > 0 , the budget
manager samples an instance when its utility is higher than the budget.
u is the estimate of how many true lables were queried within the last
w steps. The recursive funktion,
u_t = u_t-1 * (w-1) / w + labeling_t , is used to calculate u at time t.
Parameters
----------
budget : float, optional (default=None)
Specifies the ratio of instances which are allowed to be queried, with
0 <= budget <= 1. See Also :class:`BudgetManager`.
w : int, optional (default=100)
Specifies the size of the memory window. Controlles the budget in the
last w steps taken. Default = 100
theta : float, optional (default=1)
Specifies the starting threshold in wich instances are purchased. This
value of theta will recalculated after each instance. Default = 1
s : float, optional (default=0.01)
Specifies the value in wich theta is decresed or increased based on the
purchase of the given label. Default = 0.01
delta : float, optional (default=1.0)
Specifies the standart deviation of the distribution. Default 1.0
random_state : int | np.random.RandomState, optional (default=None)
Random state for candidate selection.
"""
def __init__(
self,
budget=None,
w=100,
theta=1.0,
s=0.01,
delta=1.0,
random_state=None,
):
super().__init__(budget, w)
self.theta = theta
self.s = s
self.delta = delta
self.random_state = random_state
[docs] def query_by_utility(self, utilities):
"""Ask the budget manager which utilities are sufficient to query the
corresponding instance.
Parameters
----------
utilities : ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy, which are used to determine whether sampling an instance
is worth it given the budgeting constraint.
return_utilities : bool, optional
If true, also return whether there was budget left for each
assessed utility. The default is False.
Returns
-------
queried_indices : ndarray of shape (n_queried_instances,)
The indices of instances represented by utilities which should be
queried, with 0 <= n_queried_instances <= n_samples.
"""
utilities = self._validate_data(utilities)
confidence = 1 - utilities
# intialize return parameters
queried_indices = []
budget_left = []
# keep the internal state to reset it later if simulate is true
tmp_u_t = self.u_t_
tmp_theta = self.theta_
prior_random_state = self.random_state_.get_state()
# get confidence
for i, u in enumerate(confidence):
budget_left.append(self.budget_ > tmp_u_t / self.w)
if not budget_left[-1]:
sample = False
else:
eta = self.random_state_.normal(1, self.delta)
theta_random = tmp_theta * eta
sample = u < theta_random
# get the indices instances that should be queried
if sample:
tmp_theta *= 1 - self.s
queried_indices.append(i)
else:
tmp_theta *= 1 + self.s
# u_t = u_t-1 * (w-1)/w + labeling_t
tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample
self.random_state_.set_state(prior_random_state)
return queried_indices
[docs] def update(self, candidates, queried_indices):
"""Updates the budget manager.
Parameters
----------
candidates : {array-like, sparse matrix} of shape
(n_samples, n_features)
The instances which could be queried. Sparse matrices are accepted
only if they are supported by the base query strategy.
queried_indices : array-like of shape (n_samples,)
Indicates which instances from candidates have been queried.
Returns
-------
self : RandomVariableUncertaintyBudgetManager
The RandomVariableUncertaintyBudget returns itself, after it is
updated.
"""
self._validate_data(np.array([]))
queried = np.zeros(len(candidates))
queried[queried_indices] = 1
self.random_state_.random_sample(len(candidates))
for s in queried:
if self.budget_ > self.u_t_ / self.w:
if s:
self.theta_ *= 1 - self.s
else:
self.theta_ *= 1 + self.s
super().update(candidates, queried_indices)
return self
def _validate_data(self, utilities):
"""Validate input data.
Parameters
----------
utilities: ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy.
Returns
-------
utilities : ndarray of shape (n_samples,)
Checked utilities.
"""
utilities = super()._validate_data(utilities)
# Check w
check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
# Check theta
self._validate_theta()
# Chack s
check_scalar(
self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
)
# Check delta
check_scalar(
self.delta, "delta", float, min_val=0, min_inclusive=False
)
self._validate_random_state()
return utilities
def _validate_theta(self):
"""Validate if theta is set as a float."""
check_scalar(self.theta, "theta", float)
# check if theta exists
if not hasattr(self, "theta_"):
self.theta_ = self.theta
def _validate_random_state(self):
"""Creates a copy 'random_state_' if random_state is an instance of
np.random_state. If not create a new random state. See also
:func:`~sklearn.utils.check_random_state`
"""
if not hasattr(self, "random_state_"):
self.random_state_ = deepcopy(self.random_state)
self.random_state_ = check_random_state(self.random_state_)
[docs]class SplitBudgetManager(EstimatedBudgetZliobaite):
"""SplitBudgetManager
Budget manager which checks, whether the specified budget has been
exhausted already. If not, an instance is queried, when the utility is
higher than the specified budget. 100*v% of instances will be queried
randomly and in 100*(1-v)% of will be queried cases according
to VariableUncertainty
This budget manager calculates the estimated budget spent in the last
w steps and compares that to the budget. If the ratio is smaller
than the specified budget, i.e., budget - u_t / w > 0 , the budget
manager samples an instance when its utility is higher than the budget.
u is the estimate of how many true lables were queried within the last
w steps. The recursive funktion,
u_t = u_t-1 * (w-1) / w + labeling_t , is used to calculate u at time t.
See also :class:`.EstimatedBudgetZliobaite`
Parameters
----------
budget : float, optional (default=None)
Specifies the ratio of instances which are allowed to be queried, with
0 <= budget <= 1. See Also :class:`BudgetManager`.
w : int, optional (default=100)
Specifies the size of the memory window. Controlles the budget in the
last w steps taken. Default = 100
theta : float, optional (default=1.0)
Specifies the starting threshold in wich instances are purchased. This
value of theta will recalculated after each instance. Default = 1
s : float, optional (default=0.01)
Specifies the value in wich theta is decresed or increased based on the
purchase of the given label. Default = 0.01
v : float, optional (default=0.1)
Specifies the percent value of instances queried randomly.
random_state : int | np.random.RandomState, optional (default=None)
Random state for candidate selection.
See Also
--------
EstimatedBudgetZliobaite : BudgetManager implementing the base class for
Zliobaite based budget managers
"""
def __init__(
self, budget=None, w=100, theta=1.0, s=0.01, v=0.1, random_state=None
):
super().__init__(budget, w)
self.v = v
self.theta = theta
self.s = s
self.random_state = random_state
[docs] def query_by_utility(self, utilities):
"""Ask the budget manager which utilities are sufficient to query the
corresponding instance.
Parameters
----------
utilities : ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy, which are used to determine whether sampling an instance
is worth it given the budgeting constraint.
Returns
-------
queried_indices : ndarray of shape (n_queried_instances,)
The indices of instances represented by utilities which should be
queried, with 0 <= n_queried_instances <= n_samples.
"""
utilities = self._validate_data(utilities)
confidence = 1 - utilities
# intialise return parameters
queried_indices = []
budget_left = []
# keep the internal state to reset it later if simulate is true
tmp_u_t = self.u_t_
tmp_theta = self.theta_
random_state_state = self.random_state_.get_state()
# check for each queried separately if budget is left and the utility
# is high enough
for i, u in enumerate(confidence):
budget_left.append(tmp_u_t / self.w < self.budget_)
if not budget_left[-1]:
sample = False
else:
# changed self.v < self.rand_.random_sample()
random_val = self.random_state_.random_sample()
if self.v > random_val:
new_u = self.random_state_.random_sample()
sample = new_u <= self.budget_
else:
sample = u < tmp_theta
# get the indices instances that should be queried
if sample:
tmp_theta *= 1 - self.s
else:
tmp_theta *= 1 + self.s
if sample:
queried_indices.append(i)
# u_t = u_t-1 * (w-1)/w + labeling_t
tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample
# set the internal state to the previous value
self.random_state_.set_state(random_state_state)
return queried_indices
[docs] def update(self, candidates, queried_indices):
"""Updates the budget manager.
Parameters
----------
candidates : {array-like, sparse matrix} of shape
(n_samples, n_features)
The instances which could be queried. Sparse matrices are accepted
only if they are supported by the base query strategy.
queried_indices : array-like of shape (n_samples,)
Indicates which instances from candidates have been queried.
Returns
-------
self : SplitBudgetManager
The SplitBudget returns itself, after it is updated.
"""
self._validate_data(np.array([]))
queried = np.zeros(len(candidates))
queried[queried_indices] = 1
for x_t, q in zip(candidates, queried):
if self.u_t_ / self.w < self.budget_:
if self.v > self.random_state_.random_sample():
_ = self.random_state_.random_sample()
else:
if q:
self.theta_ *= 1 - self.s
else:
self.theta_ *= 1 + self.s
new_queried_indices = [0] if q else []
super().update([x_t], new_queried_indices)
return self
def _validate_data(self, utilities):
"""Validate input data.
Parameters
----------
utilities: ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy.
Returns
-------
utilities : ndarray of shape (n_samples,)
Checked utilities.
"""
utilities = super()._validate_data(utilities)
# Check w
check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
# Check theta
self._validate_theta()
# Check s
check_scalar(
self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
)
# Check v
check_scalar(
self.v,
"v",
float,
min_val=0,
min_inclusive=False,
max_inclusive=False,
max_val=1,
)
# Check random_state
self._validate_random_state()
return utilities
def _validate_theta(self):
"""Validate if theta is set as a float."""
check_scalar(self.theta, "theta", float)
# check if theta exists
if not hasattr(self, "theta_"):
self.theta_ = self.theta
def _validate_random_state(self):
"""Creates a copy 'random_state_' if random_state is an instance of
np.random_state. If not create a new random state. See also
:func:`~sklearn.utils.check_random_state`
"""
if not hasattr(self, "random_state_"):
self.random_state_ = deepcopy(self.random_state)
self.random_state_ = check_random_state(self.random_state_)
[docs]class RandomBudgetManager(EstimatedBudgetZliobaite):
"""RandomBudgetManager
Budget manager which checks, whether the specified budget has been
exhausted already. If not, an instance is queried, when the utility is
higher than the specified budget. If budget is available, budget% instances
are queried randomly.
This budget manager calculates the estimated budget spent in the last
w steps and compares that to the budget. If the ratio is smaller
than the specified budget, i.e., budget - u_t / w > 0 , the budget
manager samples an instance when its utility is higher than the budget.
u is the estimate of how many true lables were queried within the last
w steps. The recursive funktion,
u_t = u_t-1 * (w-1) / w + labeling_t , is used to calculate u at time t.
See also :class:`.EstimatedBudgetZliobaite`
Parameters
----------
budget : float, optional (default=None)
Specifies the ratio of instances which are allowed to be queried, with
0 <= budget <= 1. See Also :class:`BudgetManager`.
w : int, optional (default=100)
Specifies the size of the memory window. Controlles the budget in the
last w steps taken. Default = 100
random_state : int | np.random.RandomState, optional (default=None)
Random state for candidate selection.
"""
def __init__(self, budget=None, w=100, random_state=None):
super().__init__(budget, w)
self.random_state = random_state
[docs] def query_by_utility(self, utilities):
"""Ask the budget manager which utilities are sufficient to query the
corresponding instance.
Parameters
----------
utilities : ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy, which are used to determine whether sampling an instance
is worth it given the budgeting constraint.
return_utilities : bool, optional
If true, also return whether there was budget left for each
assessed utility. The default is False.
Returns
-------
queried_indices : ndarray of shape (n_queried_instances,)
The indices of instances represented by utilities which should be
queried, with 0 <= n_queried_instances <= n_samples.
"""
utilities = self._validate_data(utilities)
confidence = 1 - utilities
# intialize return parameters
queried_indices = []
# keep the internal state to reset it later if simulate is true
tmp_u_t = self.u_t_
prior_random_state = self.random_state_.get_state()
samples = (
self.random_state_.random_sample(len(confidence)) <= self.budget_
)
# check for each sample separately if budget is left and the utility is
# high enough
for i, d in enumerate(samples):
budget_left = tmp_u_t / self.w < self.budget_
d = d if budget_left else False
tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + (
d and not np.isnan(utilities[i])
)
# get the indices instances that should be queried
if d and not np.isnan(utilities[i]):
queried_indices.append(i)
self.random_state_.set_state(prior_random_state)
return queried_indices
[docs] def update(self, candidates, queried_indices):
"""Updates the budget manager.
Parameters
----------
candidates : {array-like, sparse matrix} of shape
(n_samples, n_features)
The instances which could be queried. Sparse matrices are accepted
only if they are supported by the base query strategy.
queried_indices : array-like of shape (n_samples,)
Indicates which instances from candidates have been queried.
Returns
-------
self : RandomBudgetManager
The RandomBudgetManager returns itself, after it is updated.
"""
self._validate_data(np.array([]))
self.random_state_.random_sample(len(candidates))
super().update(candidates, queried_indices)
return self
def _validate_data(self, utilities):
"""Validate input data.
Parameters
----------
utilities: ndarray of shape (n_samples,)
The utilities provided by the stream-based active learning
strategy.
Returns
-------
utilities : ndarray of shape (n_samples,)
Checked utilities.
"""
utilities = super()._validate_data(utilities)
check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
self._validate_random_state()
return utilities
def _validate_random_state(self):
"""Creates a copy 'random_state_' if random_state is an instance of
np.random_state. If not create a new random state. See also
:func:`~sklearn.utils.check_random_state`
"""
if not hasattr(self, "random_state_"):
self.random_state_ = deepcopy(self.random_state)
self.random_state_ = check_random_state(self.random_state_)