Note
Go to the end to download the full example code.
Cognitive Dual-Query Strategy with Variable-Uncertainty#
Note
The generated animation can be found at the bottom of the page.
Google Colab Note: If the notebook fails to run after installing the
needed packages, try to restart the runtime (Ctrl + M) under
Runtime -> Restart session.
Notebook Dependencies
Uncomment the following cell to install all dependencies for this
tutorial.
# !pip install scikit-activeml
import numpy as np
from matplotlib import pyplot as plt, animation
from sklearn.datasets import make_blobs
from skactiveml.utils import MISSING_LABEL
from skactiveml.visualization import (
plot_stream_training_data,
plot_stream_decision_boundary,
)
from skactiveml.classifier import ParzenWindowClassifier
from skactiveml.stream import CognitiveDualQueryStrategyVarUn
random_state = np.random.RandomState(0)
init_size = 0
# Build a dataset.
X, y_true = make_blobs(
n_samples=200 + init_size,
n_features=1,
centers=[[0], [-3], [1], [2], [-0.5]],
cluster_std=0.7,
random_state=random_state,
)
y_true = y_true % 2
X_init = X[:init_size]
y_init = y_true[:init_size]
X_stream = X[init_size:]
y_stream = y_true[init_size:]
# Initialise the classifier.
clf = ParzenWindowClassifier(classes=[0, 1], random_state=random_state)
# Initialise the query strategy.
qs = CognitiveDualQueryStrategyVarUn(budget=0.2)
plot_step = 5
X_train = []
X_train.extend(X_init)
y_train = []
y_train.extend(y_init)
classes = np.unique(y_true)
# Preparation for plotting.
fig, ax = plt.subplots()
feature_bound = [[0, len(X)], [min(X), max(X)]]
ax.set_xlim(0, len(X))
ax.set_ylim(bottom=min(X), top=max(X))
artists = []
X_train = []
X_train.extend(X_init)
y_train = []
y_train.extend(y_init)
queried_indices = [True] * len(y_init)
predictions_list = []
for t_x, (x_t, y_t) in enumerate(zip(X_stream, y_stream)):
X_cand = x_t.reshape([1, -1])
y_cand = y_t
clf.fit(X_train, y_train)
# Check whether to sample the instance or not
sampled_indices, utilities = qs.query(
candidates=X_cand, clf=clf, return_utilities=True
)
budget_manager_param_dict = {"utilities": utilities}
# Update the query strategy and budget_manager to calculate the right budget
qs.update(candidates=X_cand, queried_indices=sampled_indices)
# Label the queried instances.
X_train.append(x_t)
if len(sampled_indices):
y_train.append(y_t)
else:
y_train.append(MISSING_LABEL)
queried_indices.append(len(sampled_indices) > 0)
# Plot the labeled data.
if t_x % plot_step == 0:
coll_old = list(ax.collections)
ax, predictions_list = plot_stream_decision_boundary(
ax, t_x, plot_step, clf, X, predictions_list, res=25
)
data_lines = plot_stream_training_data(
ax, X_train, y_train, queried_indices, classes, feature_bound
)
title_string = (
f"Decision boundary after {t_x} new instances \n"
+ f"with utility: {utilities[0]: .4f} "
+ f"budget is {sum(queried_indices) / (t_x + 1):.4f}"
)
title = ax.text(
x=0.5,
y=1.05,
s=title_string,
size=plt.rcParams["axes.titlesize"],
ha="center",
transform=ax.transAxes,
)
coll_new = list(ax.collections)
coll_new.append(title)
artists.append(
[x for x in coll_new if (x not in coll_old)] + data_lines
)
ani = animation.ArtistAnimation(fig, artists, interval=500, blit=True)

References:
The implementation of this strategy is based on Liu et al.1.
- 1
Sanmin Liu, Shan Xue, Jia Wu, Chuan Zhou, Jian Yang, Zhao Li, and Jie Cao. Online active learning for drifting data streams. IEEE Transactions on Neural Networks and Learning Systems, 34(1):186–200, 2023.
Total running time of the script: (0 minutes 18.053 seconds)