Source code for skactiveml.stream.budgetmanager._estimated_budget_zliobaite

from copy import deepcopy

import numpy as np

from ...base import BudgetManager
from ...utils import check_random_state, check_scalar
from skactiveml.utils import check_classes


[docs]class EstimatedBudgetZliobaite(BudgetManager):
    """EstimatedBudgetZliobaite

    Budget manager which checks, whether the specified `budget` has been
    exhausted already. If not, a sample is queried, when the utility is
    higher than the specified `budget`.

    This budget manager calculates the estimated budget [1]_ spent in the last
    `w` steps and compares that to the `budget`. If the ratio is smaller than
    the specified budget, i.e., `budget - u_t / w > 0`, the budget manager
    queries a sample when its utility is higher than the budget. `u` is the
    estimate of how many true labels were queried within the last `w` steps.
    The incremental function, `u_t = u_t-1 * (w-1) / w + labeling_t`, is used
    to calculate `u` at time `t`.

    Parameters
    ----------
    w : int, default=100
        Specifies the size of the memory window. Controlls the `budget` in the
        last `w` steps taken.
    budget : float, default=None
        Specifies the ratio of samples which are allowed to be sampled, with
        `0 <= budget <= 1`. If `budget` is `None`, it is replaced with the
        default budget 0.1.

    References
    ----------
    .. [1] I. Žliobaitė, A. Bifet, B. Pfahringer, and G. Holmes. Active
        Learning With Drifting Streaming Data. IEEE Trans. Neural Netw. Learn.
        Syst., 25(1):27–39, 2014
    """

    def __init__(self, w=100, budget=None):
        super().__init__(budget)
        self.w = w

[docs]    def update(self, candidates, queried_indices):
        """Updates the `EstimatedBudgetZliobaite`.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape\
                (n_samples, n_features)
            The samples which may be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.

        Returns
        -------
        self : EstimatedBudgetZliobaite
            The `EstimatedBudgetZliobaite` returns itself, after it is updated.
        """
        queried = np.zeros(len(candidates))
        queried[queried_indices] = 1
        self._validate_data(np.array([]))
        # update u_t for queried candidates
        for s in queried:
            self.u_t_ = self.u_t_ * ((self.w - 1) / self.w) + s

        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: array-like of shape (n_samples,)
            The `utilities` provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities: ndarray of shape (n_samples,)
            Checked `utilities`.
        """
        utilities = super()._validate_data(utilities)

        # check if calculation of estimate bought/true lables has begun
        if not hasattr(self, "u_t_"):
            self.u_t_ = 0

        return utilities


[docs]class FixedUncertaintyBudgetManager(EstimatedBudgetZliobaite):
    """Budget Manager for Fixed Uncertainty Strategy

    Budget manager which implements the budgeting for the Fixed Uncertainty
    Strategy [1]_. If the not `budget` is not exhausted, a sample is
    queried, when the utility is higher than the specified `budget` and the
    probability of the most likely class exceeds a threshold calculated based
    on the `budget` and the number of `classes`. See also
    :class:`.EstimatedBudgetZliobaite`.

    Parameters
    ----------
    classes : array-like of shape (n_classes)
        Holds the label for each class.
    w : int, default=100
        Specifies the size of the memory window. Controlls the `budget` in the
        last `w` steps taken.
    budget : float, default=None
        Specifies the ratio of samples which are allowed to be sampled, with
        `0 <= budget <= 1`. If `budget` is `None`, it is replaced with the
        default budget 0.1.

    References
    ----------
    .. [1] I. Žliobaitė, A. Bifet, B. Pfahringer, and G. Holmes. Active
        Learning With Drifting Streaming Data. IEEE Trans. Neural Netw. Learn.
        Syst., 25(1):27–39, 2014
    """

    def __init__(self, classes, w=100, budget=None):
        super().__init__(w=w, budget=budget)
        self.classes = classes

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which `utilities` are sufficient to query the
        corresponding labels.

        Parameters
        ----------
        utilities : array-like of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether querying a sample
            is worth it given the budgeting constraint.

        Returns
        -------
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialize return parameters
        queried_indices = []
        budget_left = []
        # calculate theta with number of classes
        theta = 1 / len(self.classes) + self.budget_ * (
            1 - 1 / len(self.classes)
        )

        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_

        samples = np.array(confidence) <= theta
        # check for each sample separately if budget is left and the utility is
        # high enough
        for i, d in enumerate(samples):
            budget_left.append(tmp_u_t / self.w < self.budget_)
            if not budget_left[-1]:
                d = False
            # u_t = u_t-1 * (w-1)/w + labeling_t
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + d
            # get the indices samples that should be queried
            if d:
                queried_indices.append(i)

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape\
                (n_samples, n_features)
            The samples which may be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.

        Returns
        -------
        self : FixedUncertaintyBudgetManager
            The budget manager returns itself, after it is updated.
        """
        super().update(candidates, queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: array-like of shape (n_samples,)
            The `utilities` provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities: ndarray of shape (n_samples,)
            Checked `utilities`.
        """
        utilities = super()._validate_data(utilities)
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)

        check_classes(self.classes)
        return utilities


[docs]class VariableUncertaintyBudgetManager(EstimatedBudgetZliobaite):
    """Budget Manager for Variable Uncertainty Strategy

    Budget manager which implements the budgeting for the Variable Uncertainty
    Strategy [1]_. Budget manager which checks, whether the specified budget
    has been exhausted already. If not, a sample is queried, when the
    utility is higher than `theta_`, which is a time-dependent threshold that
    increases or decreases when samples are queried or not queried,
    respectively. The rate for that change is controlled via `s`. See also
    :class:`.EstimatedBudgetZliobaite`.

    Parameters
    ----------
    theta : float, default=1.0
        Specifies the initial value for `theta_` that is compared to
        `utilities`.
    s : float, default=0.1
        Specifies the relative increase or decrease of the threshold if an
        sample is queried or not, respectively.
    w : int, default=100
        Specifies the size of the memory window. Controlls the `budget` in the
        last `w` steps taken.
    budget : float, default=None
        Specifies the ratio of samples which are allowed to be sampled, with
        `0 <= budget <= 1`. If `budget` is `None`, it is replaced with the
        default budget 0.1.

    References
    ----------
    .. [1] I. Žliobaitė, A. Bifet, B. Pfahringer, and G. Holmes. Active
        Learning With Drifting Streaming Data. IEEE Trans. Neural Netw. Learn.
        Syst., 25(1):27–39, 2014
    """

    def __init__(self, theta=1.0, s=0.01, w=100, budget=None):
        super().__init__(w=w, budget=budget)
        self.theta = theta
        self.s = s

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which `utilities` are sufficient to query the
        corresponding labels.

        Parameters
        ----------
        utilities : array-like of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether querying a sample
            is worth it given the budgeting constraint.

        Returns
        -------
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialize return parameters
        queried_indices = []
        budget_left = []
        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_
        tmp_theta = self.theta_

        # get confidence
        for i, c in enumerate(confidence):
            budget_left.append(self.budget_ > tmp_u_t / self.w)

            if not budget_left[-1]:
                sample = False
            else:
                sample = c < tmp_theta
                # get the indices samples that should be queried
                if sample:
                    tmp_theta *= 1 - self.s
                    queried_indices.append(i)
                else:
                    tmp_theta *= 1 + self.s
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape\
                (n_samples, n_features)
            The samples which may be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.

        Returns
        -------
        self : VariableUncertaintyBudgetManager
            The budget manager returns itself, after it is updated.
        """
        self._validate_data(np.array([]))

        queried = np.zeros(len(candidates))
        queried[queried_indices] = 1
        for i, s in enumerate(queried):
            if self.budget_ > self.u_t_ / self.w:
                if s:
                    self.theta_ *= 1 - self.s
                else:
                    self.theta_ *= 1 + self.s
        super().update(candidates, queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: array-like of shape (n_samples,)
            The `utilities` provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities: ndarray of shape (n_samples,)
            Checked `utilities`.
        """
        utilities = super()._validate_data(utilities)
        # Check w
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        # Check theta
        self._validate_theta()
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        # Chack s
        check_scalar(
            self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
        )

        return utilities

    def _validate_theta(self):
        """Validate if theta is set as a float."""
        check_scalar(self.theta, "theta", float)
        # check if theta exists
        if not hasattr(self, "theta_"):
            self.theta_ = self.theta


[docs]class RandomVariableUncertaintyBudgetManager(EstimatedBudgetZliobaite):
    """Budget Manager for Uncertainty Strategy With Randomization

    Budget manager which implements the budgeting for Uncertainty Strategy With
    Randomization [1]_. Budget manager which checks, whether the specified
    budget has been exhausted already. If not, a sample is queried, when the
    utility is higher than a randomized time-dependent threshold. The threshold
    is rendomized by multiplying `theta_` with a random variable following a
    normal distribution with mean 1 and standard deviation `mu`. Similarly, to
    :class:`.VariableUncertaintyBudgetManager`, `theta_` increases or decreases
    when samples are queried or not queried, respectively. The rate for that
    change is controlled via `s`. See also :class:`.EstimatedBudgetZliobaite`.

    Parameters
    ----------

    delta : float, default=1.0
        Specifies the standart deviation of the normal distribution used for
        randomization of the threshold.
    theta : float, default=1.0
        Specifies the initial value for `theta_` that is used for calculating
        the threshold.
    s : float, default=0.1
        Specifies the relative increase or decrease of the threshold if an
        sample is queried or not, respectively.
    random_state : int or RandomState instance or None, default=None
        Controls the randomness of the budget manager.
    w : int, default=100
        Specifies the size of the memory window. Controlls the `budget` in the
        last `w` steps taken.
    budget : float, default=None
        Specifies the ratio of samples which are allowed to be sampled, with
        `0 <= budget <= 1`. If `budget` is `None`, it is replaced with the
        default budget 0.1.

    References
    ----------
    .. [1] I. Žliobaitė, A. Bifet, B. Pfahringer, and G. Holmes. Active
        Learning With Drifting Streaming Data. IEEE Trans. Neural Netw. Learn.
        Syst., 25(1):27–39, 2014
    """

    def __init__(
        self,
        delta=1.0,
        theta=1.0,
        s=0.01,
        random_state=None,
        w=100,
        budget=None,
    ):
        super().__init__(w=w, budget=budget)
        self.delta = delta
        self.theta = theta
        self.s = s
        self.random_state = random_state

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which `utilities` are sufficient to query the
        corresponding labels.

        Parameters
        ----------
        utilities : array-like of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether querying a sample
            is worth it given the budgeting constraint.

        Returns
        -------
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialize return parameters
        queried_indices = []
        budget_left = []
        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_
        tmp_theta = self.theta_

        prior_random_state = self.random_state_.get_state()

        # get confidence
        for i, u in enumerate(confidence):
            budget_left.append(self.budget_ > tmp_u_t / self.w)

            if not budget_left[-1]:
                sample = False
            else:
                eta = self.random_state_.normal(1, self.delta)
                theta_random = tmp_theta * eta
                sample = u < theta_random
                # get the indices samples that should be queried
                if sample:
                    tmp_theta *= 1 - self.s
                    queried_indices.append(i)
                else:
                    tmp_theta *= 1 + self.s
            # u_t = u_t-1 * (w-1)/w + labeling_t
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample

        self.random_state_.set_state(prior_random_state)

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape\
                (n_samples, n_features)
            The samples which may be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.

        Returns
        -------
        self : RandomVariableUncertaintyBudgetManager
            The budget manager returns itself, after it is updated.
        """
        self._validate_data(np.array([]))

        queried = np.zeros(len(candidates))
        queried[queried_indices] = 1
        self.random_state_.random_sample(len(candidates))
        for s in queried:
            if self.budget_ > self.u_t_ / self.w:
                if s:
                    self.theta_ *= 1 - self.s
                else:
                    self.theta_ *= 1 + self.s
        super().update(candidates, queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: array-like of shape (n_samples,)
            The `utilities` provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities: ndarray of shape (n_samples,)
            Checked `utilities`.
        """
        utilities = super()._validate_data(utilities)
        # Check w
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        # Check theta
        self._validate_theta()
        # Chack s
        check_scalar(
            self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
        )
        # Check delta
        check_scalar(
            self.delta, "delta", float, min_val=0, min_inclusive=False
        )
        self._validate_random_state()

        return utilities

    def _validate_theta(self):
        """Validate if theta is set as a float."""
        check_scalar(self.theta, "theta", float)
        # check if theta exists
        if not hasattr(self, "theta_"):
            self.theta_ = self.theta

    def _validate_random_state(self):
        """Creates a copy 'random_state_' if random_state is an instance of
        np.random_state. If not create a new random state. See also
        :func:`~sklearn.utils.check_random_state`
        """
        if not hasattr(self, "random_state_"):
            self.random_state_ = deepcopy(self.random_state)
        self.random_state_ = check_random_state(self.random_state_)


[docs]class SplitBudgetManager(EstimatedBudgetZliobaite):
    """Budget Manager for Split Strategy

    Budget manager which implements the budgeting for the Split Strategy [1]_.
    The budget manager checks, whether the specified budget has been exhausted
    already. If not, a sample is queried, when the utility is higher than
    `theta_`, which is a time-dependent threshold that increases or decreases
    when samples are queried or not queried, respectively. The rate for that
    change is controlled via `s`. Additionally, samples are queried randomly
    with a probability of `v`. See also
    :class:`.VariableUncertaintyBudgetManager` and
    :class:`.EstimatedBudgetZliobaite`.

    Parameters
    ----------
    v : float, default=0.1
        Specifies the percent value of samples queried randomly.
    theta : float, default=1.0
        Specifies the initial value for `theta_` that is compared to
        `utilities`.
    s : float, default=0.1
        Specifies the relative increase or decrease of the threshold if an
        sample is queried or not, respectively.
    random_state : int or RandomState instance or None, default=None
        Controls the randomness of the budget manager.
    w : int, default=100
        Specifies the size of the memory window. Controlls the `budget` in the
        last `w` steps taken.
    budget : float, default=None
        Specifies the ratio of samples which are allowed to be sampled, with
        `0 <= budget <= 1`. If `budget` is `None`, it is replaced with the
        default budget 0.1.

    References
    ----------
    .. [1] I. Žliobaitė, A. Bifet, B. Pfahringer, and G. Holmes. Active
        Learning With Drifting Streaming Data. IEEE Trans. Neural Netw. Learn.
        Syst., 25(1):27–39, 2014
    """

    def __init__(
        self, v=0.1, theta=1.0, s=0.01, random_state=None, w=100, budget=None
    ):
        super().__init__(w=w, budget=budget)
        self.v = v
        self.theta = theta
        self.s = s
        self.random_state = random_state

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which `utilities` are sufficient to query the
        corresponding labels.

        Parameters
        ----------
        utilities : array-like of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether querying a sample
            is worth it given the budgeting constraint.

        Returns
        -------
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialise return parameters
        queried_indices = []
        budget_left = []
        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_
        tmp_theta = self.theta_
        random_state_state = self.random_state_.get_state()

        # check for each queried separately if budget is left and the utility
        # is high enough
        for i, u in enumerate(confidence):
            budget_left.append(tmp_u_t / self.w < self.budget_)
            if not budget_left[-1]:
                sample = False
            else:
                # changed self.v < self.rand_.random_sample()
                random_val = self.random_state_.random_sample()
                if self.v > random_val:
                    new_u = self.random_state_.random_sample()
                    sample = new_u <= self.budget_
                else:
                    sample = u < tmp_theta
                    # get the indices samples that should be queried
                    if sample:
                        tmp_theta *= 1 - self.s
                    else:
                        tmp_theta *= 1 + self.s
                if sample:
                    queried_indices.append(i)

            # u_t = u_t-1 * (w-1)/w + labeling_t
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + sample

        # set the internal state to the previous value
        self.random_state_.set_state(random_state_state)

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape\
                (n_samples, n_features)
            The samples which may be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.

        Returns
        -------
        self : SplitBudgetManager
            The budget manager returns itself, after it is updated.
        """
        self._validate_data(np.array([]))

        queried = np.zeros(len(candidates))
        queried[queried_indices] = 1
        for x_t, q in zip(candidates, queried):
            if self.u_t_ / self.w < self.budget_:
                if self.v > self.random_state_.random_sample():
                    _ = self.random_state_.random_sample()
                else:
                    if q:
                        self.theta_ *= 1 - self.s
                    else:
                        self.theta_ *= 1 + self.s
            new_queried_indices = [0] if q else []
            super().update([x_t], new_queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: array-like of shape (n_samples,)
            The `utilities` provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities: ndarray of shape (n_samples,)
            Checked `utilities`.
        """
        utilities = super()._validate_data(utilities)
        # Check w
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        # Check theta
        self._validate_theta()
        # Check s
        check_scalar(
            self.s, "s", float, min_val=0, min_inclusive=False, max_val=1
        )
        # Check v
        check_scalar(
            self.v,
            "v",
            float,
            min_val=0,
            min_inclusive=False,
            max_inclusive=False,
            max_val=1,
        )
        # Check random_state
        self._validate_random_state()

        return utilities

    def _validate_theta(self):
        """Validate if theta is set as a float."""
        check_scalar(self.theta, "theta", float)
        # check if theta exists
        if not hasattr(self, "theta_"):
            self.theta_ = self.theta

    def _validate_random_state(self):
        """Creates a copy 'random_state_' if random_state is an instance of
        np.random_state. If not create a new random state. See also
        :func:`~sklearn.utils.check_random_state`
        """
        if not hasattr(self, "random_state_"):
            self.random_state_ = deepcopy(self.random_state)
        self.random_state_ = check_random_state(self.random_state_)


[docs]class RandomBudgetManager(EstimatedBudgetZliobaite):
    """RandomBudgetManager

    Budget manager which checks, whether the specified budget has been
    exhausted already. If not, a sample is queried, when the utility is
    higher than the specified budget. If budget is available, samples are
    queried randomly with a probability of `budget` %. See also
    :class:`.EstimatedBudgetZliobaite`.

    Parameters
    ----------
    random_state : int or RandomState instance or None, default=None
        Controls the randomness of the budget manager.
    w : int, default=100
        Specifies the size of the memory window. Controlls the `budget` in the
        last `w` steps taken.
    budget : float, default=None
        Specifies the ratio of samples which are allowed to be sampled, with
        `0 <= budget <= 1`. If `budget` is `None`, it is replaced with the
        default budget 0.1.
    """

    def __init__(self, random_state=None, w=100, budget=None):
        super().__init__(w=w, budget=budget)
        self.random_state = random_state

[docs]    def query_by_utility(self, utilities):
        """Ask the budget manager which `utilities` are sufficient to query the
        corresponding labels.

        Parameters
        ----------
        utilities : array-like of shape (n_samples,)
            The utilities provided by the stream-based active learning
            strategy, which are used to determine whether querying a sample
            is worth it given the budgeting constraint.

        Returns
        -------
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.
        """
        utilities = self._validate_data(utilities)
        confidence = 1 - utilities

        # intialize return parameters
        queried_indices = []

        # keep the internal state to reset it later if simulate is true
        tmp_u_t = self.u_t_

        prior_random_state = self.random_state_.get_state()

        samples = (
            self.random_state_.random_sample(len(confidence)) <= self.budget_
        )
        # check for each sample separately if budget is left and the utility is
        # high enough
        for i, d in enumerate(samples):
            budget_left = tmp_u_t / self.w < self.budget_
            d = d if budget_left else False
            tmp_u_t = tmp_u_t * ((self.w - 1) / self.w) + (
                d and not np.isnan(utilities[i])
            )
            # get the indices samples that should be queried
            if d and not np.isnan(utilities[i]):
                queried_indices.append(i)

        self.random_state_.set_state(prior_random_state)

        return queried_indices

[docs]    def update(self, candidates, queried_indices):
        """Updates the budget manager.

        Parameters
        ----------
        candidates : {array-like, sparse matrix} of shape\
                (n_samples, n_features)
            The samples which may be queried. Sparse matrices are accepted
            only if they are supported by the base query strategy.
        queried_indices : np.ndarray of shape (n_queried_indices,)
            The indices of samples in candidates whose labels are queried,
            with `0 <= queried_indices <= n_candidates`.

        Returns
        -------
        self : SplitBudgetManager
            The budget manager returns itself, after it is updated.
        """
        self._validate_data(np.array([]))
        self.random_state_.random_sample(len(candidates))
        super().update(candidates, queried_indices)
        return self

    def _validate_data(self, utilities):
        """Validate input data.

        Parameters
        ----------
        utilities: array-like of shape (n_samples,)
            The `utilities` provided by the stream-based active learning
            strategy.

        Returns
        -------
        utilities: ndarray of shape (n_samples,)
            Checked `utilities`.
        """
        utilities = super()._validate_data(utilities)
        check_scalar(self.w, "w", int, min_val=0, min_inclusive=False)
        self._validate_random_state()

        return utilities

    def _validate_random_state(self):
        """Creates a copy 'random_state_' if random_state is an instance of
        np.random_state. If not create a new random state. See also
        :func:`~sklearn.utils.check_random_state`
        """
        if not hasattr(self, "random_state_"):
            self.random_state_ = deepcopy(self.random_state)
        self.random_state_ = check_random_state(self.random_state_)