Source code for skactiveml.classifier.multiannotator._crowd_layer_classifier

try:
    import numpy as np
    import torch

    from sklearn.utils.validation import check_array
    from torch import nn
    from torch.nn import CrossEntropyLoss
    from torch.nn import functional as F

    from ...base import SkactivemlClassifier
    from ...utils import (
        MISSING_LABEL,
        check_n_features,
    )
    from ._utils import (
        _SkorchMultiAnnotatorClassifier,
        _MultiAnnotatorClassificationModule,
        _MultiAnnotatorCollate,
    )

[docs] class CrowdLayerClassifier(_SkorchMultiAnnotatorClassifier): """Crowd Layer Crowd Layer [1]_ is a layer added at the end of a classifying neural network and allows us to train deep neural networks end-to-end, directly from the noisy labels of multiple annotators, using only backpropagation. The main idea is to insert an annotator-specific transformation on top of a shared latent prediction: for each annotator, the layer models how their noisy labels are generated from the underlying class probabilities (e.g., via a confusion-matrix-like mapping). By learning these annotator-specific mappings jointly with the base network, Crowd Layer can separate systematic annotator biases from the true label signal and thus leverage multiple noisy labels per sample during training. Parameters ---------- clf_module : nn.Module or nn.Module.__class__ A PyTorch module as classification model outputting logits for samples as input. In general, the uninstantiated class should be passed, although instantiated modules will also work. The `forward` module must return logits as first element and optional sample embeddings as second element. If no sample embeddings are returned, the implementation uses the original samples. n_annotators : int, default=None Number of annotators. If `n_annotators=None`, the number of annotators is inferred from `y` when calling `fit`. neural_net_param_dict : dict, default=None Additional arguments for `skorch.net.NeuralNet`. If `neural_net_param_dict` is None, no additional arguments are added. `module`, `criterion`, `predict_nonlinearity`, and `train_split` are not allowed in this dictionary. sample_dtype : str or type, default=np.float32 Dtype to which input samples are cast inside the estimator. If set to `None`, the input dtype is preserved. classes : array-like of shape (n_classes,), default=None Holds the label for each class. If `None`, the classes are determined during the fit. missing_label : scalar or string or np.nan or None, default=np.nan Value to represent a missing label. cost_matrix : array-like of shape (n_classes, n_classes), default=None Cost matrix with `cost_matrix[i,j]` indicating cost of predicting class `classes[j]` for a sample of class `classes[i]`. Can be only set, if `classes` is not `None`. random_state : int or RandomState sample or None, default=None Determines random number for `predict` method. Pass an int for reproducible results across multiple method calls. References ---------- .. [1] Rodrigues, Filipe, and Francisco Pereira. "Deep Learning from Crowds." AAAI Conference on Artificial Intelligence, 2018. """ _ALLOWED_EXTRA_OUTPUTS = { "logits", "embeddings", "annotator_perf", "annotator_class", } def __init__( self, clf_module, n_annotators=None, neural_net_param_dict=None, sample_dtype=np.float32, classes=None, cost_matrix=None, missing_label=MISSING_LABEL, random_state=None, ): super(CrowdLayerClassifier, self).__init__( multi_annotator_module=_CrowdLayerModule, clf_module=clf_module, n_annotators=n_annotators, criterion=CrossEntropyLoss, sample_dtype=sample_dtype, classes=classes, missing_label=missing_label, cost_matrix=cost_matrix, random_state=random_state, neural_net_param_dict=neural_net_param_dict, )
[docs] def predict( self, X, extra_outputs=None, ): """Return class predictions for the test samples `X`. By default, this method returns only the class predictions `y_pred`. If `extra_outputs` is provided, a tuple is returned whose first element is `y_pred` and whose remaining elements are the requested additional forward outputs, in the order specified by `extra_outputs`. Parameters ---------- X : array-like of shape (n_samples, ...) Test samples. extra_outputs : None or str or sequence of str, default=None Names of additional outputs to return next to `y_pred`. The names must be a subset of the following keys: - "logits" : Additionally return the class-membership logits `L_class` for the samples in `X`. - "embeddings" : Additionally return the learned embeddings `X_embed` for the samples in `X`. - "annotator_perf" : additionally return the estimated annotator performance probabilities `P_perf` for each sample–annotator pair. - "annotator_class" : Additionally return the annotator–class probability estimates `P_annot` for each sample, class, and annotator. Returns ------- y_pred : numpy.ndarray of shape (n_samples,) Class predictions of the test samples. *extras : numpy.ndarray, optional Only returned if `extra_outputs` is not `None`. In that case, the method returns a tuple whose first element is `y_pred` and whose remaining elements correspond to the requested forward outputs in the order given by `extra_outputs`. Potential outputs are: - `L_class` : `np.ndarray` of shape `(n_samples, n_classes)`, where `L_class[n, c]` is the logit for the class `classes_[c]` of sample `X[n]`. - `X_embed` : `np.ndarray` of shape `(n_samples, ...)`, where `X_embed[n]` refers to the learned embedding for sample `X[n]`. - `P_perf` : `np.ndarray` of shape `(n_samples, n_annotators)`, where `P_perf[n, m]` refers to the estimated label correctness probability (performance) of annotator `m` when labeling sample `X[n]`. - `P_annot` : `np.ndarray` of shape `(n_samples, n_annotators, n_classes)`, where `P_annot[n, m, c]` refers to the probability that annotator `m` provides the class label `c` for sample `X[n]`. """ return SkactivemlClassifier.predict( self, X=X, extra_outputs=extra_outputs, )
[docs] def predict_proba( self, X, extra_outputs=None, ): """Return class probability estimates for the test samples `X`. By default, this method returns only the class probabilities `P`. If `extra_outputs` is provided, a tuple is returned whose first element is `P` and whose remaining elements are the requested additional forward outputs, in the order specified by `extra_outputs`. Parameters ---------- X : array-like of shape (n_samples, ...) Test samples. extra_outputs : None or str or sequence of str, default=None Names of additional outputs to return next to `P`. The names must be a subset of the following keys: - "logits" : Additionally return the class-membership logits `L_class` for the samples in `X`. - "embeddings" : Additionally return the learned embeddings `X_embed` for the samples in `X`. - "annotator_perf" : additionally return the estimated annotator performance probabilities `P_perf` for each sample–annotator pair. - "annotator_class" : Additionally return the annotator–class probability estimates `P_annot` for each sample, class, and annotator. Returns ------- P : numpy.ndarray of shape (n_samples, n_classes) Class probabilities of the test samples. Classes are ordered according to `self.classes_`. *extras : numpy.ndarray, optional Only returned if `extra_outputs` is not `None`. In that case, the method returns a tuple whose first element is `P` and whose remaining elements correspond to the requested forward outputs in the order given by `extra_outputs`. Potential outputs are: - `L_class` : `np.ndarray` of shape `(n_samples, n_classes)`, where `L_class[n, c]` is the logit for the class `classes_[c]` of sample `X[n]`. - `X_embed` : `np.ndarray` of shape `(n_samples, ...)`, where `X_embed[n]` refers to the learned embedding for sample `X[n]`. - `P_perf` : `np.ndarray` of shape `(n_samples, n_annotators)`, where `P_perf[n, m]` refers to the estimated label correctness probability (performance) of annotator `m` when labeling sample `X[n]`. - `P_annot` : `np.ndarray` of shape `(n_samples, n_annotators, n_classes)`, where `P_annot[n, m, c]` refers to the probability that annotator `m` provides the class label `c` for sample `X[n]`. """ # Check input parameters. self._validate_data_kwargs() X = check_array(X, **self.check_X_dict_) check_n_features( self, X, reset=not hasattr(self, "n_features_in_") ) extra_outputs = self._normalize_extra_outputs( extra_outputs=extra_outputs, allowed_names=CrowdLayerClassifier._ALLOWED_EXTRA_OUTPUTS, ) # Initialize module, if not done yet. if not hasattr(self, "neural_net_"): self.initialize() # Set forward options to obtain the different outputs required # by the input parameters. net = self.neural_net_.module_ old_forward_return = net.forward_return forward_outputs = {"probas": (0, nn.Softmax(dim=-1))} forward_returns = ["logits_class"] out_idx = 1 if "logits" in extra_outputs: forward_outputs["logits"] = (0, None) if "embeddings" in extra_outputs: forward_outputs["embeddings"] = (out_idx, None) forward_returns.append("x_embed") out_idx += 1 if "annotator_perf" in extra_outputs: forward_outputs["annotator_perf"] = (out_idx, None) forward_returns.append("p_annot_perf") out_idx += 1 if "annotator_class" in extra_outputs: forward_outputs["annotator_class"] = ( out_idx, nn.Softmax(dim=-1), ) forward_returns.append("logits_annot") # Compute predictions for the different outputs required # by the input parameters. try: net.set_forward_return(forward_returns) fw_out = self._forward_with_named_outputs( X=X, forward_outputs=forward_outputs, extra_outputs=extra_outputs, ) finally: net.set_forward_return(old_forward_return) # Initialize fallbacks if the classifier hasn't been fitted before. self._initialize_fallbacks( fw_out[0] if isinstance(fw_out, tuple) else fw_out ) return fw_out
def _build_neural_net_param_overrides(self, X, y): collate_fn = _MultiAnnotatorCollate(missing_label=-1) return { "criterion__reduction": "mean", "criterion__ignore_index": -1, "module__n_classes": len(self.classes_), "module__n_annotators": self.n_annotators_, "iterator_train__collate_fn": collate_fn, }
class _CrowdLayerModule(_MultiAnnotatorClassificationModule): """Crowd Layer Module Crowd Layer [1]_ is a layer added at the end of a classifying neural network and allows us to train deep neural networks end-to-end, directly from the noisy labels of multiple annotators, using only backpropagation. Parameters ---------- n_classes : int Number of classes. n_annotators : int Number of annotators. clf_module : nn.Module or nn.Module.__class__ Classifier backbone/head that maps `x -> logits_class` or `(logits_class, x_embed)`. If it returns only logits, `x_embed` is set to the input `x` (or to `None` if `x` is not an embedding). clf_module_param_dict : dict Keyword args for constructing `clf_module` if a class is passed. References ---------- .. [1] Rodrigues, Filipe, and Francisco Pereira. "Deep Learning from Crowds." AAAI Conference on Artificial Intelligence, 2018. """ def __init__( self, n_classes, n_annotators, clf_module, clf_module_param_dict ): super().__init__( clf_module=clf_module, clf_module_param_dict=clf_module_param_dict, default_forward_outputs="logits_annot", full_forward_outputs=[ "logits_class", "x_embed", "p_annot_perf", "logits_annot", ], ) self.n_classes = n_classes self.n_annotators = n_annotators # Setup crowd layer. self.W_annot = torch.eye(n_classes).repeat(n_annotators, 1, 1) self.W_annot = nn.Parameter(self.W_annot) def forward(self, x, input_ids=None): """ Forward pass through the classification module and optionally through the crowd layer. Parameters ---------- x : torch.Tensor of shape (batch_size, ...) Input samples. input_ids : torch.Tensor of shape (batch_size, 2), default=None - If a tensor is given, `input_ids[:, 0]` are sample indices and `input_ids[:, 1]` are annotator indices. One output row is produced per (sample, annotator) pair. - If `input_ids=None`, all combinations of samples and annotators are propagated through the crowd-layer. Returns ------- logits_class : torch.Tensor of shape (batch_size, n_classes) Class-membership logits. x_embed : torch.Tensor of shape (batch_size, ...), optional Learned embeddings of samples. Only returned if "x_embed" in `self.forward_return`. p_annot_perf : torch.Tensor of shape (batch_size, n_annotators), \ optional Estimated performance, i.e., label correctness probability, per sample-annotator pair. logits_annot : torch.Tensor of shape (batch_size, n_annotators,\ n_classes) or (len(input_ids), n_classes), optional Annotation logits for sample-annotator pairs. Only returned if "logits_annot" in self.forward_return. Shape depends on whether `input_ids` is given or `None`. """ # Inference of classification model. logits_class, x_embed = self.clf_module_forward(x) # Append classifier outputs to `out` if required. out = [] if "logits_class" in self.forward_return: out.append(logits_class) if "x_embed" in self.forward_return: out.append(x_embed.detach().flatten(start_dim=1)) # Add annotator logits / performances to `out` if required. if ( "logits_annot" in self.forward_return or "p_annot_perf" in self.forward_return ): p_class = F.softmax(logits_class, dim=-1) # Expected annotator performance: (n_samples, n_annotators). if "p_annot_perf" in self.forward_return: # Convert logits to confusion probabilities: # confusion[a, c, o] = P(z=o | y=c, annotator=a). confusion = F.softmax(self.W_annot, dim=-1) # Compute perf_per_class[a, c] # = P(correct | y=c, annotator=a) = confusion[a, c, c]. diag_idx = torch.arange( self.n_classes, device=confusion.device ) perf_per_class = confusion[:, diag_idx, diag_idx] # Compute expected performance per sample-annotator pair: # logits_annot_perf[n, a] = # sum_c p_class[n, c] * perf_per_class[a, c]. logits_annot_perf = torch.einsum( "nc,ac->na", p_class, perf_per_class ) out.append(logits_annot_perf) # Expected annotator outputs / logits for labels. if "logits_annot" in self.forward_return: if isinstance(input_ids, torch.Tensor): # input_ids: (m, 2) -> (sample_idx, annotator_idx) p_sel = p_class.index_select(0, input_ids[:, 0]) W_sel = self.W_annot.index_select(0, input_ids[:, 1]) logits_annot = torch.einsum("mc,mco->mo", p_sel, W_sel) else: logits_annot = torch.einsum( "nc,aco->nao", p_class, self.W_annot ) out.append(logits_annot) return out[0] if len(out) == 1 else tuple(out) except ImportError: # pragma: no cover pass