Source code for skactiveml.stream.budgetmanager._threshold_budget

import numpy as np
from copy import deepcopy

from skactiveml.base import BudgetManager

from skactiveml.utils import check_scalar, check_random_state


[docs]class DensityBasedSplitBudgetManager(BudgetManager): """Budget manager which checks, whether the specified budget has been exhausted already. If not, an instance is queried, when the utility is higher than the specified budget and when the probability of the most likely class exceeds a time-dependent threshold calculated based on the budget, the number of classes and the number of observed and acquired samples. This class`s logic is the same as compared to SplitBudgetManager except for how available budget is calculated. This budget manager calculates the fixed budget spent and compares that to the budget. If the ratio is smaller than the specified budget, i.e., budget - u / t > 0 , the budget manager samples an instance when its utility is higher than the budget. u is the number of queried instances within t observed instances. Parameters ---------- budget : float, optional (default=None) Specifies the ratio of instances which are allowed to be queried, with 0 <= budget <= 1. See Also :class:`BudgetManager`. theta : float, optional (default=1.0) Specifies the starting threshold in wich instances are purchased. This value of theta will recalculated after each instance. Default = 1 s : float, optional (default=0.01) Specifies the value in wich theta is decresed or increased based on the purchase of the given label. Default = 0.01 delta : float, optional (default=1.0) Specifies the standart deviation of the distribution. Default 1.0 random_state : int | np.random.RandomState, optional (default=None) Random state for candidate selection. See Also -------- EstimatedBudgetZliobaite : BudgetManager implementing the base class for Zliobaite based budget managers SplitBudgetManager : BudgetManager that is using EstimatedBudgetZliobaite. """ def __init__( self, budget=None, theta=1.0, s=0.01, delta=1.0, random_state=None, ): super().__init__(budget) self.theta = theta self.s = s self.delta = delta self.random_state = random_state
[docs] def query_by_utility(self, utilities): """Ask the budget manager which utilities are sufficient to query the corresponding instance. Parameters ---------- utilities : ndarray of shape (n_samples,) The utilities provided by the stream-based active learning strategy, which are used to determine whether sampling an instance is worth it given the budgeting constraint. Returns ------- queried_indices : ndarray of shape (n_queried_instances,) The indices of instances represented by utilities which should be queried, with 0 <= n_queried_instances <= n_samples. """ utilities = self._validate_data(utilities) confidence = 1 - utilities # intialize return parameters queried_indices = [] tmp_u = self.u_ tmp_t = self.t_ tmp_theta = self.theta_ prior_random_state = self.random_state_.get_state() # get confidence for i, u in enumerate(confidence): tmp_t += 1 budget_left = self.budget_ > tmp_u / tmp_t if not budget_left: sample = False else: eta = self.random_state_.normal(1, self.delta) theta_random = tmp_theta * eta sample = u < theta_random # get the indices instances that should be queried if sample: tmp_theta *= 1 - self.s queried_indices.append(i) else: tmp_theta *= 1 + self.s tmp_u += sample self.random_state_.set_state(prior_random_state) return queried_indices
[docs] def update(self, candidates, queried_indices): """Updates the budget manager. Parameters ---------- candidates : {array-like, sparse matrix} of shape (n_samples, n_features) The instances which could be queried. Sparse matrices are accepted only if they are supported by the base query strategy. queried_indices : array-like of shape (n_samples,) Indicates which instances from candidates have been queried. Returns ------- self : DensityBasedBudgetManager The DensityBasedBudgetManager returns itself, after it is updated. """ self._validate_data(np.array([])) queried = np.zeros(len(candidates)) queried[queried_indices] = 1 self.random_state_.random_sample(len(candidates)) for s in queried: self.t_ += 1 if self.budget_ > self.u_ / self.t_: if s: self.theta_ *= 1 - self.s else: self.theta_ *= 1 + self.s self.u_ += s return self
def _validate_data(self, utilities): """Validate input data. Parameters ---------- utilities: ndarray of shape (n_samples,) The utilities provided by the stream-based active learning strategy. Returns ------- utilities : ndarray of shape (n_samples,) Checked utilities. """ utilities = super()._validate_data(utilities) # Check theta self._validate_theta() # Chack s check_scalar( self.s, "s", float, min_val=0, min_inclusive=False, max_val=1 ) # Check delta check_scalar( self.delta, "delta", float, min_val=0, min_inclusive=False ) # check if calculation of estimate bought/true lables has begun if not hasattr(self, "u_"): self.u_ = 0 if not hasattr(self, "t_"): self.t_ = 0 self._validate_random_state() return utilities def _validate_theta(self): """Validate if theta is set as a float.""" check_scalar(self.theta, "theta", float) # check if theta exists if not hasattr(self, "theta_"): self.theta_ = self.theta def _validate_random_state(self): """Creates a copy 'random_state_' if random_state is an instance of np.random_state. If not create a new random state. See also :func:`~sklearn.utils.check_random_state` """ if not hasattr(self, "random_state_"): self.random_state_ = deepcopy(self.random_state) self.random_state_ = check_random_state(self.random_state_)