Source code for skactiveml.stream.budgetmanager._estimated_budget_zliobaite

from copy import deepcopy

import numpy as np

from ...base import BudgetManager
from ...utils import check_random_state, check_scalar


[docs]class EstimatedBudgetZliobaite(BudgetManager):
    """EstimatedBudgetZliobaite

    Budget manager which checks, whether the specified budget has been
    exhausted already. If not, an instance is queried, when the utility is
    higher than the specified budget.

    This budget manager calculates the estimated budget [1] spent in the last
    w steps and compares that to the budget. If the ratio is smaller
    than the specified budget, i.e., budget - u_t / w > 0, the budget
    manager samples an instance when its utility is higher than the budget.
    u is the estimate of how many true lables were queried within the last
    w steps. The incremental funktion, u_t = u_t-1 * (w-1) / w + labeling_t,
    is used to calculate u at time t.

    Parameters
    ----------
    budget : float, optional (default=None)
        Specifies the ratio of instances which are allowed to be queried, with
        0 <= budget <= 1. See Also :class:`BudgetManager`.
    w : int, optional (default=100)
        Specifies the size of the memory window. Controlles the budget in the
        last w steps taken. Default = 100

    References
    ----------
    [1] Žliobaitė, I., Bifet, A., Pfahringer, B., & Holmes, G. (2014). Active
        Learning With Drifting Streaming Data. IEEE Transactions on Neural
        Networks and Learning Systems, 25(1), 27-39.
    """

    def __init__(self, budget=None, w=100):
        super().__init__(budget)
        self.w = w

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape
        (n_samples, n_features)
            The instances which could be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.

        queried : array-like of shape (n_samples,)
            Indicates which instances from candidates have been queried.

        Returns
        -------
        self : EstimatedBudgetZliobaite
            The EstimatedBudgetZliobaite returns itself, after it is updated.
        """
        queried = np.zeros(len(candidates))
        queried[queried_indices] = 1
        self._validate_data(np.array([]))
        # update u_t for queried candidates
        for s in queried:
            self.u_t_ = self.u_t_ * ((self.w - 1) / self.w) + s

        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities : ndarray of shape (n_samples,)
            Checked utilities.
        """
        utilities = super()._validate_data(utilities)

        # check if calculation of estimate bought/true lables has begun
        if not hasattr(self, "u_t_"):
            self.u_t_ = 0

        return utilities


[docs]class FixedUncertaintyBudgetManager(EstimatedBudgetZliobaite):
    """FixedUncertaintyBudgetManager

    Budget manager which is optimized for FixedUncertainty and checks,
    whether the specified budget has been exhausted already. If not, an
    instance is queried, when the utility is higher than the specified budget
    and the probability of the most likely class exceeds a threshold
    calculated based on the budget and the number of classes.
    See also :class:`.EstimatedBudgetZliobaite`

    Parameters
    ----------
    budget : float, optional (default=None)
        Specifies the ratio of instances which are allowed to be queried, with
        0 <= budget <= 1. See Also :class:`BudgetManager`.
    w : int, optional (default=100)
        Specifies the size of the memory window. Controlles the budget in the
        last w steps taken. Default = 100
    num_classes : int, optional (default=2)
        Specifies the number of classes. Default = 2
    """

    def __init__(self, budget=None, w=100, num_classes=2):
        super().__init__(budget, w)
        self.num_classes = num_classes

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which utilities are sufficient to query the
        corresponding instance.

        Parameters
        ----------
        utilities : ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether sampling an instance
            is worth it given the budgeting constraint.
        return_utilities : bool, optional
            If true, also return whether there was budget left for each
            assessed utility. The default is False.

        Returns
        -------
        queried_indices : ndarray of shape (n_queried_instances,)
            The indices of instances represented by utilities which should be
            queried, with 0 <= n_queried_instances <= n_samples.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialize return parameters
        queried_indices = []
        budget_left = []
        # calculate theta with num_classes
        theta = 1 / self.num_classes + self.budget_ * (
            1 - 1 / self.num_classes
        )

        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_

        samples = np.array(confidence) <= theta
        # check for each sample separately if budget is left and the utility is
        # high enough
        for i, d in enumerate(samples):
            budget_left.append(tmp_u_t / self.w < self.budget_)
            if not budget_left[-1]:
                d = False
            # u_t = u_t-1 * (w-1)/w + labeling_t
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + d
            # get the indices instances that should be queried
            if d:
                queried_indices.append(i)

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape
        (n_samples, n_features)
            The instances which could be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.

        queried_indices : array-like of shape (n_samples,)
            Indicates which instances from candidates have been queried.

        Returns
        -------
        self : FixedUncertaintyBudgetManager
            The FixedUncertaintyBudget returns itself, after it is updated.
        """
        super().update(candidates, queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities : ndarray of shape (n_samples,)
            Checked utilities.
        """

        utilities = super()._validate_data(utilities)
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        check_scalar(
            self.num_classes,
            "num_classes",
            int,
            min_val=0,
            min_inclusive=False,
        )

        return utilities


[docs]class VariableUncertaintyBudgetManager(EstimatedBudgetZliobaite):
    """VariableUncertaintyBudgetManager

    Budget manager which checks, whether the specified budget has been
    exhausted already. If not, an instance is queried, when the utility is
    higher than the specified budget and when the probability of
    the most likely class exceeds a time-dependent threshold calculated based
    on the budget, the number of classes and the number of observed and
    acquired samples.

    This budget manager calculates the estimated budget spent in the last
    w steps and compares that to the budget. If the ratio is smaller
    than the specified budget, i.e.,
    budget - u_t / w > 0 , the budget
    manager samples an instance when its utility is higher than the budget.
    u is the estimate of how many true lables were queried within the last
    w steps. The recursive funktion,
    u_t = u_t-1 * (w-1) / w + labeling_t , is used to calculate u at time t.
    See also :class:`.EstimatedBudgetZliobaite`

    Parameters
    ----------
    budget : float, optional (default=None)
        Specifies the ratio of instances which are allowed to be queried, with
        0 <= budget <= 1. See Also :class:`BudgetManager`.
    w : int, optional (default=100)
        Specifies the size of the memory window. Controlles the budget in the
        last w steps taken. Default = 100
    theta : float, optional (default=1.0)
        Specifies the starting threshold in wich instances are purchased. This
        value of theta will recalculated after each instance. Default = 1
    s : float, optional (default=0.1)
        Specifies the value in wich theta is decresed or increased based on the
        purchase of the given label. Default = 0.01
    """

    def __init__(self, budget=None, w=100, theta=1.0, s=0.01):
        super().__init__(budget, w)
        self.theta = theta
        self.s = s

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which utilities are sufficient to query the
        corresponding instance.

        Parameters
        ----------
        utilities : ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether sampling an instance
            is worth it given the budgeting constraint.
        return_utilities : bool, optional
            If true, also return whether there was budget left for each
            assessed utility. The default is False.

        Returns
        -------
        queried_indices : ndarray of shape (n_queried_instances,)
            The indices of instances represented by utilities which should be
            queried, with 0 <= n_queried_instances <= n_samples.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialize return parameters
        queried_indices = []
        budget_left = []
        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_
        tmp_theta = self.theta_

        # get confidence
        for i, u in enumerate(confidence):
            budget_left.append(self.budget_ > tmp_u_t / self.w)

            if not budget_left[-1]:
                sample = False
            else:
                sample = u < tmp_theta
                # get the indices instances that should be queried
                if sample:
                    tmp_theta *= 1 - self.s
                    queried_indices.append(i)
                else:
                    tmp_theta *= 1 + self.s
            # u_t = u_t-1 * (w-1)/w + labeling_t
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape
        (n_samples, n_features)
            The instances which could be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.

        queried_indices : array-like of shape (n_samples,)
            Indicates which instances from candidates have been queried.

        Returns
        -------
        self : VariableUncertaintyBudgetManager
            The VariableUncertaintyBudget returns itself, after it is updated.
        """
        self._validate_data(np.array([]))

        queried = np.zeros(len(candidates))
        queried[queried_indices] = 1
        for i, s in enumerate(queried):
            if self.budget_ > self.u_t_ / self.w:
                if s:
                    self.theta_ *= 1 - self.s
                else:
                    self.theta_ *= 1 + self.s
        super().update(candidates, queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy.


        Returns
        -------
        utilities : ndarray of shape (n_samples,)
            Checked utilities.
        """

        utilities = super()._validate_data(utilities)
        # Check w
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        # Check theta
        self._validate_theta()
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        # Chack s
        check_scalar(
            self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
        )

        return utilities

    def _validate_theta(self):
        """Validate if theta is set as a float."""
        check_scalar(self.theta, "theta", float)
        # check if theta exists
        if not hasattr(self, "theta_"):
            self.theta_ = self.theta


[docs]class RandomVariableUncertaintyBudgetManager(EstimatedBudgetZliobaite):
    """RandomVariableUncertaintyBudgetManager

    Budget manager which checks, whether the specified budget has been
    exhausted already. If not, an instance is queried, when the utility is
    higher than the specified budget and when the probability of
    the most likely class exceeds a time-dependent threshold calculated based
    on the budget, the number of classes and the number of observed and
    acquired samples.

    This budget manager calculates the estimated budget spent in the last
    w steps and compares that to the budget. If the ratio is smaller
    than the specified budget, i.e.,
    budget - u_t / w > 0 , the budget
    manager samples an instance when its utility is higher than the budget.
    u is the estimate of how many true lables were queried within the last
    w steps. The recursive funktion,
    u_t = u_t-1 * (w-1) / w + labeling_t , is used to calculate u at time t.

    Parameters
    ----------
    budget : float, optional (default=None)
        Specifies the ratio of instances which are allowed to be queried, with
        0 <= budget <= 1. See Also :class:`BudgetManager`.
    w : int, optional (default=100)
        Specifies the size of the memory window. Controlles the budget in the
        last w steps taken. Default = 100
    theta : float, optional (default=1)
        Specifies the starting threshold in wich instances are purchased. This
        value of theta will recalculated after each instance. Default = 1
    s : float, optional (default=0.01)
        Specifies the value in wich theta is decresed or increased based on the
        purchase of the given label. Default = 0.01
    delta : float, optional (default=1.0)
        Specifies the standart deviation of the distribution. Default 1.0
    random_state : int | np.random.RandomState, optional (default=None)
        Random state for candidate selection.
    """

    def __init__(
        self,
        budget=None,
        w=100,
        theta=1.0,
        s=0.01,
        delta=1.0,
        random_state=None,
    ):
        super().__init__(budget, w)
        self.theta = theta
        self.s = s
        self.delta = delta
        self.random_state = random_state

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which utilities are sufficient to query the
        corresponding instance.

        Parameters
        ----------
        utilities : ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether sampling an instance
            is worth it given the budgeting constraint.
        return_utilities : bool, optional
            If true, also return whether there was budget left for each
            assessed utility. The default is False.

        Returns
        -------
        queried_indices : ndarray of shape (n_queried_instances,)
            The indices of instances represented by utilities which should be
            queried, with 0 <= n_queried_instances <= n_samples.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialize return parameters
        queried_indices = []
        budget_left = []
        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_
        tmp_theta = self.theta_

        prior_random_state = self.random_state_.get_state()

        # get confidence
        for i, u in enumerate(confidence):
            budget_left.append(self.budget_ > tmp_u_t / self.w)

            if not budget_left[-1]:
                sample = False
            else:
                eta = self.random_state_.normal(1, self.delta)
                theta_random = tmp_theta * eta
                sample = u < theta_random
                # get the indices instances that should be queried
                if sample:
                    tmp_theta *= 1 - self.s
                    queried_indices.append(i)
                else:
                    tmp_theta *= 1 + self.s
            # u_t = u_t-1 * (w-1)/w + labeling_t
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample

        self.random_state_.set_state(prior_random_state)

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape
        (n_samples, n_features)
            The instances which could be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.

        queried_indices : array-like of shape (n_samples,)
            Indicates which instances from candidates have been queried.

        Returns
        -------
        self : RandomVariableUncertaintyBudgetManager
            The RandomVariableUncertaintyBudget returns itself, after it is
            updated.
        """
        self._validate_data(np.array([]))

        queried = np.zeros(len(candidates))
        queried[queried_indices] = 1
        self.random_state_.random_sample(len(candidates))
        for s in queried:
            if self.budget_ > self.u_t_ / self.w:
                if s:
                    self.theta_ *= 1 - self.s
                else:
                    self.theta_ *= 1 + self.s
        super().update(candidates, queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy.


        Returns
        -------
        utilities : ndarray of shape (n_samples,)
            Checked utilities.
        """

        utilities = super()._validate_data(utilities)
        # Check w
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        # Check theta
        self._validate_theta()
        # Chack s
        check_scalar(
            self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
        )
        # Check delta
        check_scalar(
            self.delta, "delta", float, min_val=0, min_inclusive=False
        )
        self._validate_random_state()

        return utilities

    def _validate_theta(self):
        """Validate if theta is set as a float."""
        check_scalar(self.theta, "theta", float)
        # check if theta exists
        if not hasattr(self, "theta_"):
            self.theta_ = self.theta

    def _validate_random_state(self):
        """Creates a copy 'random_state_' if random_state is an instance of
        np.random_state. If not create a new random state. See also
        :func:`~sklearn.utils.check_random_state`
        """
        if not hasattr(self, "random_state_"):
            self.random_state_ = deepcopy(self.random_state)
        self.random_state_ = check_random_state(self.random_state_)


[docs]class SplitBudgetManager(EstimatedBudgetZliobaite):
    """SplitBudgetManager

    Budget manager which checks, whether the specified budget has been
    exhausted already. If not, an instance is queried, when the utility is
    higher than the specified budget. 100*v% of instances will be queried
    randomly and in 100*(1-v)% of will be queried cases according
    to VariableUncertainty

    This budget manager calculates the estimated budget spent in the last
    w steps and compares that to the budget. If the ratio is smaller
    than the specified budget, i.e., budget - u_t / w > 0 , the budget
    manager samples an instance when its utility is higher than the budget.
    u is the estimate of how many true lables were queried within the last
    w steps. The recursive funktion,
    u_t = u_t-1 * (w-1) / w + labeling_t , is used to calculate u at time t.
    See also :class:`.EstimatedBudgetZliobaite`

    Parameters
    ----------
    budget : float, optional (default=None)
        Specifies the ratio of instances which are allowed to be queried, with
        0 <= budget <= 1. See Also :class:`BudgetManager`.
    w : int, optional (default=100)
        Specifies the size of the memory window. Controlles the budget in the
        last w steps taken. Default = 100
    theta : float, optional (default=1.0)
        Specifies the starting threshold in wich instances are purchased. This
        value of theta will recalculated after each instance. Default = 1
    s : float, optional (default=0.01)
        Specifies the value in wich theta is decresed or increased based on the
        purchase of the given label. Default = 0.01
    v : float, optional (default=0.1)
        Specifies the percent value of instances queried randomly.
    random_state : int | np.random.RandomState, optional (default=None)
        Random state for candidate selection.

    See Also
    --------
    EstimatedBudgetZliobaite : BudgetManager implementing the base class for
        Zliobaite based budget managers
    """

    def __init__(
        self, budget=None, w=100, theta=1.0, s=0.01, v=0.1, random_state=None
    ):
        super().__init__(budget, w)
        self.v = v
        self.theta = theta
        self.s = s
        self.random_state = random_state

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which utilities are sufficient to query the
        corresponding instance.

        Parameters
        ----------
        utilities : ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether sampling an instance
            is worth it given the budgeting constraint.

        Returns
        -------
        queried_indices : ndarray of shape (n_queried_instances,)
            The indices of instances represented by utilities which should be
            queried, with 0 <= n_queried_instances <= n_samples.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialise return parameters
        queried_indices = []
        budget_left = []
        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_
        tmp_theta = self.theta_
        random_state_state = self.random_state_.get_state()

        # check for each queried separately if budget is left and the utility
        # is high enough
        for i, u in enumerate(confidence):
            budget_left.append(tmp_u_t / self.w < self.budget_)
            if not budget_left[-1]:
                sample = False
            else:
                # changed self.v < self.rand_.random_sample()
                random_val = self.random_state_.random_sample()
                if self.v > random_val:
                    new_u = self.random_state_.random_sample()
                    sample = new_u <= self.budget_
                else:
                    sample = u < tmp_theta
                    # get the indices instances that should be queried
                    if sample:
                        tmp_theta *= 1 - self.s
                    else:
                        tmp_theta *= 1 + self.s
                if sample:
                    queried_indices.append(i)

            # u_t = u_t-1 * (w-1)/w + labeling_t
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample

        # set the internal state to the previous value
        self.random_state_.set_state(random_state_state)

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape
        (n_samples, n_features)
            The instances which could be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.

        queried_indices : array-like of shape (n_samples,)
            Indicates which instances from candidates have been queried.

        Returns
        -------
        self : SplitBudgetManager
            The SplitBudget returns itself, after it is updated.
        """
        self._validate_data(np.array([]))

        queried = np.zeros(len(candidates))
        queried[queried_indices] = 1
        for x_t, q in zip(candidates, queried):
            if self.u_t_ / self.w < self.budget_:
                if self.v > self.random_state_.random_sample():
                    _ = self.random_state_.random_sample()
                else:
                    if q:
                        self.theta_ *= 1 - self.s
                    else:
                        self.theta_ *= 1 + self.s
            new_queried_indices = [0] if q else []
            super().update([x_t], new_queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities : ndarray of shape (n_samples,)
            Checked utilities.
        """

        utilities = super()._validate_data(utilities)
        # Check w
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        # Check theta
        self._validate_theta()
        # Check s
        check_scalar(
            self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
        )
        # Check v
        check_scalar(
            self.v,
            "v",
            float,
            min_val=0,
            min_inclusive=False,
            max_inclusive=False,
            max_val=1,
        )
        # Check random_state
        self._validate_random_state()

        return utilities

    def _validate_theta(self):
        """Validate if theta is set as a float."""
        check_scalar(self.theta, "theta", float)
        # check if theta exists
        if not hasattr(self, "theta_"):
            self.theta_ = self.theta

    def _validate_random_state(self):
        """Creates a copy 'random_state_' if random_state is an instance of
        np.random_state. If not create a new random state. See also
        :func:`~sklearn.utils.check_random_state`
        """
        if not hasattr(self, "random_state_"):
            self.random_state_ = deepcopy(self.random_state)
        self.random_state_ = check_random_state(self.random_state_)


[docs]class RandomBudgetManager(EstimatedBudgetZliobaite):
    """RandomBudgetManager

    Budget manager which checks, whether the specified budget has been
    exhausted already. If not, an instance is queried, when the utility is
    higher than the specified budget. If budget is available, budget% instances
    are queried randomly.

    This budget manager calculates the estimated budget spent in the last
    w steps and compares that to the budget. If the ratio is smaller
    than the specified budget, i.e., budget - u_t / w > 0 , the budget
    manager samples an instance when its utility is higher than the budget.
    u is the estimate of how many true lables were queried within the last
    w steps. The recursive funktion,
    u_t = u_t-1 * (w-1) / w + labeling_t , is used to calculate u at time t.
    See also :class:`.EstimatedBudgetZliobaite`

    Parameters
    ----------
    budget : float, optional (default=None)
        Specifies the ratio of instances which are allowed to be queried, with
        0 <= budget <= 1. See Also :class:`BudgetManager`.
    w : int, optional (default=100)
        Specifies the size of the memory window. Controlles the budget in the
        last w steps taken. Default = 100
    random_state : int | np.random.RandomState, optional (default=None)
        Random state for candidate selection.
    """

    def __init__(self, budget=None, w=100, random_state=None):
        super().__init__(budget, w)
        self.random_state = random_state

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which utilities are sufficient to query the
        corresponding instance.

        Parameters
        ----------
        utilities : ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether sampling an instance
            is worth it given the budgeting constraint.

        return_utilities : bool, optional
            If true, also return whether there was budget left for each
            assessed utility. The default is False.

        Returns
        -------
        queried_indices : ndarray of shape (n_queried_instances,)
            The indices of instances represented by utilities which should be
            queried, with 0 <= n_queried_instances <= n_samples.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialize return parameters
        queried_indices = []

        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_

        prior_random_state = self.random_state_.get_state()

        samples = (
            self.random_state_.random_sample(len(confidence)) <= self.budget_
        )
        # check for each sample separately if budget is left and the utility is
        # high enough
        for i, d in enumerate(samples):
            budget_left = tmp_u_t / self.w < self.budget_
            d = d if budget_left else False
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + (
                d and not np.isnan(utilities[i])
            )
            # get the indices instances that should be queried
            if d and not np.isnan(utilities[i]):
                queried_indices.append(i)

        self.random_state_.set_state(prior_random_state)

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape
        (n_samples, n_features)
            The instances which could be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.

        queried_indices : array-like of shape (n_samples,)
            Indicates which instances from candidates have been queried.

        Returns
        -------
        self : RandomBudgetManager
            The RandomBudgetManager returns itself, after it is updated.
        """
        self._validate_data(np.array([]))
        self.random_state_.random_sample(len(candidates))
        super().update(candidates, queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: ndarray of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities : ndarray of shape (n_samples,)
            Checked utilities.
        """

        utilities = super()._validate_data(utilities)
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        self._validate_random_state()

        return utilities

    def _validate_random_state(self):
        """Creates a copy 'random_state_' if random_state is an instance of
        np.random_state. If not create a new random state. See also
        :func:`~sklearn.utils.check_random_state`
        """
        if not hasattr(self, "random_state_"):
            self.random_state_ = deepcopy(self.random_state)
        self.random_state_ = check_random_state(self.random_state_)