.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "generated/sphinx_gallery_examples/pool/plot-UncertaintySampling-Dual_Strategy_for_Active_Learning.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_generated_sphinx_gallery_examples_pool_plot-UncertaintySampling-Dual_Strategy_for_Active_Learning.py: Dual Strategy for Active Learning ================================= .. GENERATED FROM PYTHON SOURCE LINES 7-20 .. note:: The generated animation can be found at the bottom of the page. | **Google Colab Note**: If the notebook fails to run after installing the needed packages, try to restart the runtime (Ctrl + M) under Runtime -> Restart session. .. image:: https://colab.research.google.com/assets/colab-badge.svg :target: https://colab.research.google.com/github/scikit-activeml/scikit-activeml.github.io/blob/gh-pages/latest/generated/sphinx_gallery_notebooks//pool/plot-UncertaintySampling-Dual_Strategy_for_Active_Learning.ipynb | **Notebook Dependencies** | Uncomment the following cell to install all dependencies for this tutorial. .. GENERATED FROM PYTHON SOURCE LINES 20-23 .. code-block:: Python # !pip install scikit-activeml .. GENERATED FROM PYTHON SOURCE LINES 24-28 .. raw:: html
.. GENERATED FROM PYTHON SOURCE LINES 30-117 .. code-block:: Python import numpy as np from matplotlib import pyplot as plt, animation from sklearn.datasets import make_blobs from skactiveml.utils import MISSING_LABEL, is_labeled, simple_batch from skactiveml.visualization import plot_decision_boundary, \ plot_contour_for_samples from sklearn.linear_model import LogisticRegression from sklearn.mixture import GaussianMixture from skactiveml.classifier import SklearnClassifier from skactiveml.pool import UncertaintySampling random_state = np.random.RandomState(0) # Build a dataset. X, y_true = make_blobs(n_samples=200, n_features=2, centers=[[0, 1], [-3, .5], [-1, -1], [2, 1], [1, -.5]], cluster_std=.7, random_state=random_state) y_true = y_true % 2 y = np.full(shape=y_true.shape, fill_value=MISSING_LABEL) # Initialise the classifier. clf = SklearnClassifier(LogisticRegression(), classes=np.unique(y_true)) # Initialise the query strategy. qs = UncertaintySampling(method='least_confident', random_state=random_state) gmm = GaussianMixture(init_params='kmeans', n_components=5) gmm.fit(X) density = np.exp(gmm.score_samples(X)) delta = 0.1 u_max = -np.inf switching_point = False # Preparation for plotting. fig, ax = plt.subplots() feature_bound = [[min(X[:, 0]), min(X[:, 1])], [max(X[:, 0]), max(X[:, 1])]] artists = [] # The active learning cycle: n_cycles = 20 for c in range(n_cycles): # Fit the classifier. clf.fit(X, y) # Get labeled samples. X_labeled = X[is_labeled(y)] # Query the next sample(s). if not switching_point: # DWUS query_idx, utils = qs.query(X=X, y=y, clf=clf, utility_weight=density, return_utilities=True) utilities = utils[0] switching_point = utilities[query_idx[0]] - u_max < delta u_max = utilities[query_idx[0]] strategy = "DWUS" else: # DWUS + US utils_US = qs.query(X=X, y=y, clf=clf, return_utilities=True)[1][0] err = np.nanmean(utils_US) utilities = (1-err)*utils_US + err*density query_idx = simple_batch(utilities, random_state) strategy = "DWUS + US" # Plot the labeled data. coll_old = list(ax.collections) title = ax.text( 0.5, 1.05, f"Decision boundary after acquring {c} labels with {strategy}", size=plt.rcParams["axes.titlesize"], ha="center", transform=ax.transAxes ) ax = plot_contour_for_samples(X, utilities, feature_bound=feature_bound, res=31, ax=ax, replace_nan=None) ax.scatter(X[:, 0], X[:, 1], c=y_true, cmap="coolwarm", marker=".", zorder=2) ax.scatter(X_labeled[:, 0], X_labeled[:, 1], c="grey", alpha=.8, marker=".", s=300) ax = plot_decision_boundary(clf, feature_bound, ax=ax) coll_new = list(ax.collections) coll_new.append(title) artists.append([x for x in coll_new if (x not in coll_old)]) # Label the queried samples. y[query_idx] = y_true[query_idx] ani = animation.ArtistAnimation(fig, artists, interval=1000, blit=True) .. container:: sphx-glr-animation .. raw:: html
.. GENERATED FROM PYTHON SOURCE LINES 118-123 .. rubric:: References: The implementation of this strategy is based on :footcite:t:`donmez2007dual`. .. footbibliography:: .. rst-class:: sphx-glr-timing **Total running time of the script:** (0 minutes 5.136 seconds) .. _sphx_glr_download_generated_sphinx_gallery_examples_pool_plot-UncertaintySampling-Dual_Strategy_for_Active_Learning.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot-UncertaintySampling-Dual_Strategy_for_Active_Learning.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot-UncertaintySampling-Dual_Strategy_for_Active_Learning.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: plot-UncertaintySampling-Dual_Strategy_for_Active_Learning.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_