Source code for skactiveml.stream.budgetmanager._threshold_budget

import numpy as np
from copy import deepcopy

from skactiveml.base import BudgetManager

from skactiveml.utils import check_scalar, check_random_state


[docs]class DensityBasedSplitBudgetManager(BudgetManager): """Budget Manager for DBALStream This budget manager is an adaptation of :class:`.RandomVariableUncertaintyBudgetManager` for DBALStream [1]_. It mainly differs in how the available budget ist estimated. Instead of the estimated budget proposed by Žliobaitė et. al. [2]_, this budget manager counts the number of queried and seen instance, such that the number of available queries is given as `n_seen_samples-n_queried_samples*budget`. Parameters ---------- theta : float, default=1.0 Specifies the initial value for `theta_` that is used for calculating the threshold. s : float, default=0.1 Specifies the relative increase or decrease of the threshold if an sample is queried or not, respectively. delta : float, default=1.0 Specifies the standart deviation of the normal distribution used for randomization of the threshold. random_state : int or RandomState instance or None, default=None Controls the randomness of the budget manager. budget : float, default=None Specifies the ratio of samples which are allowed to be sampled, with `0 <= budget <= 1`. If `budget` is `None`, it is replaced with the default budget 0.1. References ---------- .. [1] D. Ienco, I. Žliobaitė, and B. Pfahringer. High density-focused uncertainty sampling for active learning over evolving stream data. In Int. Workshop Big Data Streams Heterog. Source Min. Algorithms Syst. Program. Models Appl., pages 133–148, 2014. .. [2] I. Žliobaitė, A. Bifet, B. Pfahringer, and G. Holmes. Active Learning With Drifting Streaming Data. IEEE Trans. Neural Netw. Learn. Syst., 25(1):27–39, 2014 """ def __init__( self, theta=1.0, s=0.01, delta=1.0, random_state=None, budget=None, ): super().__init__(budget) self.theta = theta self.s = s self.delta = delta self.random_state = random_state
[docs] def query_by_utility(self, utilities): """Ask the budget manager which `utilities` are sufficient to query the corresponding labels. Parameters ---------- utilities : array-like of shape (n_samples,) The utilities provided by the stream-based active learning strategy, which are used to determine whether querying a sample is worth it given the budgeting constraint. Returns ------- queried_indices : np.ndarray of shape (n_queried_indices,) The indices of samples in candidates whose labels are queried, with `0 <= queried_indices <= n_candidates`. """ utilities = self._validate_data(utilities) confidence = 1 - utilities # intialize return parameters queried_indices = [] tmp_u = self.u_ tmp_t = self.t_ tmp_theta = self.theta_ prior_random_state = self.random_state_.get_state() # get confidence for i, u in enumerate(confidence): tmp_t += 1 budget_left = self.budget_ > tmp_u / tmp_t if not budget_left: sample = False else: eta = self.random_state_.normal(1, self.delta) theta_random = tmp_theta * eta sample = u < theta_random # get the indices samples that should be queried if sample: tmp_theta *= 1 - self.s queried_indices.append(i) else: tmp_theta *= 1 + self.s tmp_u += sample self.random_state_.set_state(prior_random_state) return queried_indices
[docs] def update(self, candidates, queried_indices): """Updates the budget manager. Parameters ---------- candidates : {array-like, sparse matrix} of shape\ (n_samples, n_features) The samples which may be queried. Sparse matrices are accepted only if they are supported by the base query strategy. queried_indices : np.ndarray of shape (n_queried_indices,) The indices of samples in candidates whose labels are queried, with `0 <= queried_indices <= n_candidates`. Returns ------- self : RandomVariableUncertaintyBudgetManager The budget manager returns itself, after it is updated. """ self._validate_data(np.array([])) queried = np.zeros(len(candidates)) queried[queried_indices] = 1 self.random_state_.random_sample(len(candidates)) for s in queried: self.t_ += 1 if self.budget_ > self.u_ / self.t_: if s: self.theta_ *= 1 - self.s else: self.theta_ *= 1 + self.s self.u_ += s return self
def _validate_data(self, utilities): """Validate input data. Parameters ---------- utilities: array-like of shape (n_samples,) The `utilities` provided by the stream-based active learning strategy. Returns ------- utilities: ndarray of shape (n_samples,) Checked `utilities`. """ utilities = super()._validate_data(utilities) # Check theta self._validate_theta() # Chack s check_scalar( self.s, "s", float, min_val=0, min_inclusive=False, max_val=1 ) # Check delta check_scalar( self.delta, "delta", float, min_val=0, min_inclusive=False ) # check if calculation of estimate bought/true lables has begun if not hasattr(self, "u_"): self.u_ = 0 if not hasattr(self, "t_"): self.t_ = 0 self._validate_random_state() return utilities def _validate_theta(self): """Validate if theta is set as a float.""" check_scalar(self.theta, "theta", float) # check if theta exists if not hasattr(self, "theta_"): self.theta_ = self.theta def _validate_random_state(self): """Creates a copy 'random_state_' if random_state is an instance of np.random_state. If not create a new random state. See also :func:`~sklearn.utils.check_random_state` """ if not hasattr(self, "random_state_"): self.random_state_ = deepcopy(self.random_state) self.random_state_ = check_random_state(self.random_state_)