Note
Go to the end to download the full example code.
Fast Active Learning by Contrastive UNcertainty (FALCUN)#
Note
The generated animation can be found at the bottom of the page.
Google Colab Note: If the notebook fails to run after installing the
needed packages, try to restart the runtime (Ctrl + M) under
Runtime -> Restart session.
Notebook Dependencies
Uncomment the following cell to install all dependencies for this
tutorial.
# !pip install scikit-activeml
import numpy as np
from matplotlib import pyplot as plt, animation
from sklearn.datasets import make_blobs
from skactiveml.utils import MISSING_LABEL, labeled_indices, unlabeled_indices
from skactiveml.visualization import (
plot_utilities,
plot_decision_boundary,
plot_contour_for_samples,
)
from skactiveml.classifier import ParzenWindowClassifier
from skactiveml.pool import Falcun
# Set a fixed random state for reproducibility.
random_state = np.random.RandomState(0)
# Build a dataset.
X, y_true = make_blobs(
n_samples=200,
n_features=2,
centers=[[0, 1], [-3, 0.5], [-1, -1], [2, 1], [1, -0.5]],
cluster_std=0.7,
random_state=random_state,
)
y_true = y_true % 2
y = np.full(shape=y_true.shape, fill_value=MISSING_LABEL)
# Initialise the classifier.
clf = ParzenWindowClassifier(classes=[0, 1], random_state=random_state)
# Initialise the query strategy.
qs = Falcun(random_state=42)
# Preparation for plotting: create a 2x2 grid of subplots.
fig, axs = plt.subplots(2, 2, constrained_layout=True)
feature_bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]]
artists = [[] for j in range(5)]
# Active learning cycle.
n_cycles = 5
for c in range(n_cycles):
# Train the classifier with the current labels.
clf.fit(X, y)
# Query the next batch of samples; retrieve both indices and utility values.
query_idx, utilities = qs.query(X=X, y=y, clf=clf, batch_size=4, return_utilities=True)
# Plot results on each subplot.
for i, ax in enumerate(axs.flatten()):
# Save current collections to identify new plot elements.
coll_old = list(ax.collections)
# Plot the utility contour for the current subplot.
plot_contour_for_samples(
X,
utilities[i],
res=25,
feature_bound=feature_bound,
replace_nan=None,
ax=ax,
)
# Scatter all samples with true labels.
ax.scatter(X[:, 0], X[:, 1], c=y_true, cmap="coolwarm", marker=".", zorder=2)
# Highlight the labeled samples.
X_labeled = X[labeled_indices(y)]
ax.scatter(
X_labeled[:, 0],
X_labeled[:, 1],
c="grey",
alpha=0.8,
marker=".",
s=300,
)
# Overlay the decision boundary.
ax = plot_decision_boundary(clf, feature_bound, ax=ax)
# Set the title indicating the current batch and subplot index.
ax.set_title(f"Batch {c+1}, Utilities[{i}]")
# Collect new artists (plot elements) added during this cycle.
for x in ax.collections:
if x not in coll_old:
artists[c].append(x)
# Update the labels for the queried samples.
y[query_idx] = y_true[query_idx]
# Create the animation using the collected artists.
ani = animation.ArtistAnimation(fig, artists, interval=1000, blit=True)

References:
The implementation of this strategy is based on Gilhuber et al.1.
- 1
Sandra Gilhuber, Anna Beer, Yunpu Ma, and Thomas Seidl. FALCUN: A Simple and Efficient Deep Active Learning Strategy. In Jt. Eur. Conf. Mach. Learn. Knowl. Discov. Databases, 421–439. 2024.
Total running time of the script: (0 minutes 11.026 seconds)