import warnings
import numpy as np
from matplotlib import lines, pyplot as plt
from matplotlib.axes import Axes
from sklearn.base import ClassifierMixin
from sklearn.neighbors import KNeighborsRegressor
from sklearn.utils.validation import (
check_array,
check_consistent_length,
column_or_1d,
)
from ._misc import (
mesh,
check_bound,
_get_boundary_args,
_get_confidence_args,
_get_contour_args,
_get_cmap,
)
from ..base import (
QueryStrategy,
SingleAnnotatorPoolQueryStrategy,
MultiAnnotatorPoolQueryStrategy,
)
from ..exceptions import MappingError
from ..utils import (
check_scalar,
unlabeled_indices,
call_func,
check_type,
check_indices,
)
[docs]def plot_utilities(qs, X, y, candidates=None, **kwargs):
"""Plot the utility for the given single-annotator query strategy.
Parameters
----------
qs : skactiveml.base.SingleAnnotatorPoolQueryStrategy
The query strategy for which the utility is plotted.
X : array-like of shape (n_samples, n_features)
Training data set, usually complete, i.e. including the labeled and
unlabeled samples.
y : array-like of shape (n_samples, ) or (n_samples, n_annotators)
Labels of the training data set (possibly including unlabeled ones
indicated by self.MISSING_LABEL).
candidates : None or array-like of shape (n_candidates,), dtype=int or
array-like of shape (n_candidates, n_features),
optional (default=None)
If `candidates` is None, the unlabeled samples from (X,y) are
considered as candidates.
If `candidates` is of shape (n_candidates,) and of type int,
candidates is considered as the indices of the samples in (X,y).
If `candidates` is of shape (n_candidates, n_features), the
candidates are directly given in candidates (not necessarily
contained in X). This is not supported by all query strategies.
Other Parameters
----------------
replace_nan : numeric or None, optional (default=0.0)
Only used if plotting with mesh instances is not possible.
If numeric, the utility of labeled instances will be plotted with
value `replace_nan`. If None, these samples will be ignored.
ignore_undefined_query_params : bool, optional (default=False)
If True, query parameters that are not defined in the query function
are ignored and will not raise an exception.
feature_bound : array-like of shape [[xmin, ymin], [xmax, ymax]], optional
(default=None)
Determines the area in which the boundary is plotted. If candidates is
not given, bound must not be None. Otherwise, the bound is determined
based on the data.
ax : matplotlib.axes.Axes, optional (default=None)
The axis on which the utility is plotted. Only if y.ndim = 1 (single
annotator).
res : int, optional (default=21)
The resolution of the plot.
contour_dict : dict, optional (default=None)
Additional parameters for the utility contour.
**kwargs
Remaining keyword arguments are passed the query function of the query
strategy.
Returns
-------
ax : matplotlib.axes.Axes
The axis on which the utilities were plotted.
"""
check_type(qs, "qs", SingleAnnotatorPoolQueryStrategy)
return _general_plot_utilities(
qs=qs, X=X, y=y, candidates=candidates, **kwargs
)
[docs]def plot_annotator_utilities(qs, X, y, candidates=None, **kwargs):
"""Plot the utility for the given query strategy.
Parameters
----------
qs : skactiveml.base.QueryStrategy
The query strategy for which the utility is plotted.
X : array-like of shape (n_samples, n_features)
Training data set, usually complete, i.e. including the labeled and
unlabeled samples.
y : array-like of shape (n_samples, ) or (n_samples, n_annotators)
Labels of the training data set (possibly including unlabeled ones
indicated by self.MISSING_LABEL).
candidates : None or array-like of shape (n_candidates,), dtype=int or
array-like of shape (n_candidates, n_features),
optional (default=None)
If `candidates` is None, the unlabeled samples from (X,y) are
considered as candidates.
If `candidates` is of shape (n_candidates,) and of type int,
candidates is considered as the indices of the samples in (X,y).
If `candidates` is of shape (n_candidates, n_features), the
candidates are directly given in candidates (not necessarily
contained in X). This is not supported by all query strategies.
Other Parameters
----------------
replace_nan : numeric or None, optional (default=0.0)
Only used if plotting with mesh instances is not possible.
If numeric, the utility of labeled instances will be plotted with
value `replace_nan`. If None, these samples will be ignored.
ignore_undefined_query_params : bool, optional (default=False)
If True, query parameters that are not defined in the query function
are ignored and will not raise an exception.
feature_bound : array-like of shape [[xmin, ymin], [xmax, ymax]], optional
(default=None)
Determines the area in which the boundary is plotted. If candidates is
not given, bound must not be None. Otherwise, the bound is determined
based on the data.
axes : array-like of matplotlib.axes.Axes, optional (default=None)
The axes on which the utilities for the annotators are plotted. Only
supported for y.ndim = 2 (multi annotator).
res : int, optional (default=21)
The resolution of the plot.
contour_dict : dict, optional (default=None)
Additional parameters for the utility contour.
plot_annotators : None or array-like of shape (n_annotators_to_plot,),
optional (default=None)
Contains the indices of the annotators to be plotted. If it is None,
all annotators are plotted. Only supported for y.ndim = 2
(multi annotator).
**kwargs
Remaining keyword arguments are passed the query function of the query
strategy.
Returns
-------
axes : array-like of shape (n_annotators_to_plot,)
The axes on which the utilities were plotted.
"""
check_type(qs, "qs", MultiAnnotatorPoolQueryStrategy)
return _general_plot_utilities(
qs=qs, X=X, y=y, candidates=candidates, **kwargs
)
[docs]def plot_decision_boundary(
clf,
feature_bound,
ax=None,
res=21,
boundary_dict=None,
confidence=0.75,
cmap="coolwarm",
confidence_dict=None,
):
"""Plot the decision boundary of the given classifier.
Parameters
----------
clf: Sklearn classifier
The fitted classifier whose decision boundary is plotted. If confidence
is not None, the classifier must implement the predict_proba function.
feature_bound: array-like, [[xmin, ymin], [xmax, ymax]]
Determines the area in which the boundary is plotted.
ax: matplotlib.axes.Axes or List, optional (default=None)
The axis on which the decision boundary is plotted. If ax is a List,
each entry has to be an `matplotlib.axes.Axes`.
res: int, optional (default=21)
The resolution of the plot.
boundary_dict: dict, optional (default=None)
Additional parameters for the boundary contour.
confidence: scalar | None, optional (default=0.75)
The confidence interval plotted with dashed lines. It is not plotted if
confidence is None. Must be in the open interval (0.5, 1). The value
stands for the ratio best class / second best class.
cmap: str | matplotlib.colors.Colormap, optional (default='coolwarm_r')
The colormap for the confidence levels.
confidence_dict: dict, optional (default=None)
Additional parameters for the confidence contour. Must not contain a
colormap because cmap is used.
Returns
-------
ax: matplotlib.axes.Axes or List
The axis on which the boundary was plotted or the list of axis if ax
was a list.
"""
check_type(clf, "clf", ClassifierMixin)
check_scalar(res, "res", int, min_val=1)
if ax is None:
ax = plt.gca()
check_type(ax, "ax", Axes)
feature_bound = check_bound(bound=feature_bound)
# Check and convert the colormap
cmap = _get_cmap(cmap)
if confidence is not None:
check_scalar(
confidence,
"confidence",
float,
min_inclusive=False,
max_inclusive=False,
min_val=0.5,
max_val=1,
)
# Update additional arguments
boundary_args = _get_boundary_args(boundary_dict)
confidence_args = _get_confidence_args(confidence_dict)
# Create mesh for plotting
X_mesh, Y_mesh, mesh_instances = mesh(feature_bound, res)
# Calculate predictions
if hasattr(clf, "predict_proba"):
predictions = clf.predict_proba(mesh_instances)
classes = np.arange(predictions.shape[1])
elif hasattr(clf, "predict"):
if confidence is not None:
warnings.warn(
"The given classifier does not implement "
"'predict_proba'. Thus, the confidence cannot be "
"plotted."
)
confidence = None
predicted_classes = clf.predict(mesh_instances)
classes = np.arange(len(np.unique(predicted_classes)))
predictions = np.zeros((len(predicted_classes), len(classes)))
for idx, y in enumerate(predicted_classes):
predictions[idx, y] = 1
else:
raise AttributeError(
"'clf' must implement 'predict' or " "'predict_proba'"
)
posterior_list = []
for y in classes:
posteriors = predictions[:, y].reshape(X_mesh.shape)
posterior_list.append(posteriors)
norm = plt.Normalize(vmin=min(classes), vmax=max(classes))
for y in classes:
posteriors = posterior_list[y]
posteriors_best_alternative = np.zeros_like(posteriors)
for y2 in np.setdiff1d(classes, [y]):
posteriors_best_alternative = np.max(
[posteriors_best_alternative, posterior_list[y2]], axis=0
)
posteriors = posteriors / (posteriors + posteriors_best_alternative)
ax.contour(X_mesh, Y_mesh, posteriors, [0.5], **boundary_args)
if confidence is not None:
ax.contour(
X_mesh,
Y_mesh,
posteriors,
[confidence],
colors=[cmap(norm(y))],
**confidence_args,
)
return ax
[docs]def plot_contour_for_samples(
X,
values,
replace_nan=0.0,
feature_bound=None,
ax=None,
res=21,
contour_dict=None,
):
"""Plot the utility for the given query strategy.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data set, usually complete, i.e. including the labeled and
unlabeled samples.
values : array-like of shape (n_samples)
Values to plot for samples `X` (may contain np.nan, can be replaced
or ignored, see `replace_nan`).
replace_nan : numeric or None, optional (default=0.0)
If numeric, nan-values in `values` will be replaced by this number.
If None, these samples will be ignored.
feature_bound : array-like, [[xmin, ymin], [xmax, ymax]]
Determines the area in which the boundary is plotted. If candidates is
not given, bound must not be None. Otherwise, the bound is determined
based on the data.
ax : matplotlib.axes.Axes, optional (default=None)
The axis on which the utility is plotted. If no axis is given, the
current axis (`plt.gca()`) will be used instead.
res : int, optional (default=21)
The resolution of the plot.
contour_dict : dict, optional (default=None)
Additional parameters for the utility contour.
Returns
-------
matplotlib.axes.Axes: The axis on which the utility was plotted.
"""
check_array(X, ensure_2d=True)
values = check_array(
values, ensure_2d=False, force_all_finite=False, copy=True
)
values[np.isinf(values)] = np.nan
feature_bound = check_bound(bound=feature_bound, X=X)
X_mesh, Y_mesh, mesh_instances = mesh(feature_bound, res)
if ax is None:
ax = plt.gca()
if replace_nan is None:
valid_idx = ~np.isnan(values)
X = X[valid_idx]
values = values[valid_idx]
else:
values = np.nan_to_num(values, nan=replace_nan)
contour_args = _get_contour_args(contour_dict)
neighbors = KNeighborsRegressor(n_neighbors=1)
neighbors.fit(X, values)
scores = neighbors.predict(mesh_instances).reshape(X_mesh.shape)
ax.contourf(X_mesh, Y_mesh, scores, **contour_args)
return ax
[docs]def plot_stream_training_data(
ax,
X,
y,
queried_indices,
classes,
feature_bound,
unlabeled_color="grey",
cmap="coolwarm",
alpha=0.2,
linewidth=3,
plot_cand_highlight=True,
):
"""Plot the utility for the given query strategy.
Parameters
----------
ax : matplotlib.axes.Axes
The axis on which the utility is plotted. Only if y.ndim = 1 (single
annotator).
X : array-like of shape (n_samples, 1)
Training data set, usually complete, i.e. including the labeled and
unlabeled samples.
y : array-like of shape (n_samples, )
Labels of the training data set (possibly including unlabeled ones
indicated by self.MISSING_LABEL).
queried_indices : array-like of shape (n_samples,)
Indicates which instances from candidates have been queried.
classes : array-like of shape (n_classes)
Holds the label for each class.
feature_bound : array-like of shape [[xmin, ymin], [xmax, ymax]]
Determines the area in which the boundary is plotted. If candidates is
not given, bound must not be None. Otherwise, the bound is determined
based on the data.
unlabeled_color: str | matplotlib.colors.Colormap, optional
(default='grey')
The color for the unlabled samples.
cmap: str | matplotlib.colors.Colormap, optional (default='coolwarm_r')
The colormap for the confidence levels.
alpha: scalar
Set the alpha value used for blending - not supported on all backends.
linewidth: float
Set the line width in points.
plot_cand_highlight: bool
The indicator to higlight the current candidate.
Returns
-------
axes : array-like of shape (n_annotators_to_plot,)
The axes on which the utilities were plotted.
"""
column_or_1d(X)
check_array(y, ensure_2d=False, force_all_finite="allow-nan")
check_consistent_length(X, y)
check_array(queried_indices, ensure_2d=False)
check_array(classes, ensure_2d=False)
check_type(unlabeled_color, "unlabeled_color", str)
check_type(plot_cand_highlight, "plot_cand_highlight", bool)
check_type(ax, "ax", Axes)
data_lines = []
cmap = _get_cmap(cmap)
norm = plt.Normalize(vmin=min(classes), vmax=max(classes))
highlight_color = (
cmap(norm(y[-1])) if queried_indices[-1] else unlabeled_color
)
if plot_cand_highlight:
data_lines.append(
lines.Line2D(
[0, feature_bound[0][1]],
[X[-1], X[-1]],
c=highlight_color,
alpha=alpha,
linewidth=linewidth * 2,
)
)
for t, (x_t, a, y_t) in enumerate(zip(X, queried_indices, y)):
line_color = cmap(norm(y_t)) if a else unlabeled_color
zorder = 3 if a else 2
alpha_tmp = alpha * 2 if a else alpha
data_lines.append(
lines.Line2D(
[t, len(X) - 1],
[x_t, x_t],
zorder=zorder,
color=line_color,
alpha=alpha_tmp,
linewidth=linewidth,
)
)
for d_line in data_lines:
ax.add_line(d_line)
return data_lines
[docs]def plot_stream_decision_boundary(
ax,
t_x,
plot_step,
clf,
X,
pred_list,
color="k",
res=25,
):
"""Plot the decision boundary of the given classifier.
Parameters
----------
ax: matplotlib.axes.Axes or List
The axis on which the decision boundary is plotted. If ax is a List,
each entry has to be an `matplotlib.axes.Axes`.
t_x: int
The position of the newest instance for the x axies.
plot_step: int
The interval in which the clf should predict new samples.
clf: Sklearn classifier
The fitted classifier whose decision boundary is plotted.
X : array-like of shape (n_samples, 1)
Training data set, usually complete, i.e. including the labeled and
unlabeled samples.
pred_list: array-like of shape (n_samples, )
The list containing classifier prediction for the last steps.
color: str | matplotlib.colors.Colormap, optional (default='k')
The color for the decision boundary.
res : int, optional (default=25)
The resolution of the plot.
Returns
-------
ax: matplotlib.axes.Axes or List
The axis on which the boundary was plotted or the list of axis if ax
was a list.
pred_list: array-like of shape (n_samples, )
The list containing classifier prediction for the last steps.
"""
X = column_or_1d(X)
check_array(pred_list, ensure_2d=False, ensure_min_samples=0)
check_scalar(t_x, "t_x", int, min_val=0)
check_scalar(plot_step, "plot_step", int, min_val=1)
check_type(ax, "ax", Axes)
check_type(clf, "clf", ClassifierMixin)
x_vec = np.linspace(np.min(X), np.max(X), res)
t_vec = np.arange(1, t_x // plot_step + 1) * plot_step
t_mesh, x_mesh = np.meshgrid(t_vec, x_vec)
predictions = np.array([clf.predict(x_vec.reshape([-1, 1]))])
pred_list.extend(predictions)
if len(pred_list) > 2 and np.sum(pred_list) > 0:
ax.contour(
t_mesh,
x_mesh,
np.array(pred_list[1:]).T,
levels=[0.5],
colors=color,
)
return ax, pred_list
def _general_plot_utilities(qs, X, y, candidates=None, **kwargs):
"""Plot the utility for the given query strategy.
Parameters
----------
qs : skactiveml.base.QueryStrategy
The query strategy for which the utility is plotted.
X : array-like of shape (n_samples, n_features)
Training data set, usually complete, i.e. including the labeled and
unlabeled samples.
y : array-like of shape (n_samples, ) or (n_samples, n_annotators)
Labels of the training data set (possibly including unlabeled ones
indicated by self.MISSING_LABEL).
candidates : None or array-like of shape (n_candidates,), dtype=int or
array-like of shape (n_candidates, n_features),
optional (default=None)
If `candidates` is None, a mesh with the specified resolution is
generated and considered as candidates.
If `candidates` is of shape (n_candidates,) and of type int,
candidates is considered as the indices of the samples in (X,y).
If `candidates` is of shape (n_candidates, n_features), the
candidates are directly given in candidates (not necessarily
contained in X). This is not supported by all query strategies.
Other Parameters
----------------
replace_nan : numeric or None, optional (default=0.0)
Only used if plotting with mesh instances is not possible.
If numeric, the utility of labeled instances will be plotted with
value `replace_nan`. If None, these samples will be ignored.
ignore_undefined_query_params : bool, optional (default=False)
If True, query parameters that are not defined in the query function
are ignored and will not raise an exception.
feature_bound : array-like of shape [[xmin, ymin], [xmax, ymax]], optional
(default=None)
Determines the area in which the boundary is plotted. If candidates is
not given, bound must not be None. Otherwise, the bound is determined
based on the data.
ax : matplotlib.axes.Axes, optional (default=None)
The axis on which the utility is plotted. Only if y.ndim = 1 (single
annotator).
axes : array-like of matplotlib.axes.Axes, optional (default=None)
The axes on which the utilities for the annotators are plotted. Only
supported for y.ndim = 2 (multi annotator).
res : int, optional (default=21)
The resolution of the plot.
contour_dict : dict, optional (default=None)
Additional parameters for the utility contour.
plot_annotators : None or array-like of shape (n_annotators_to_plot,),
optional (default=None)
Contains the indices of the annotators to be plotted. If it is None,
all annotators are plotted. Only supported for y.ndim = 2
(multi annotator).
**kwargs
Remaining keyword arguments are passed the query function of the query
strategy.
Returns
-------
axes : array-like of shape (n_annotators_to_plot,)
The axes on which the utilities were plotted.
"""
replace_nan = kwargs.pop("replace_nan", 0.0)
ignore_undefined_query_params = kwargs.pop(
"ignore_undefined_query_params", False
)
feature_bound = kwargs.pop("feature_bound", None)
ax = kwargs.pop("ax", None)
axes = kwargs.pop("axes", None)
res = kwargs.pop("res", 21)
contour_dict = kwargs.pop("contour_dict", None)
plot_annotators = kwargs.pop("plot_annotators", None)
check_type(qs, "qs", QueryStrategy)
X = check_array(X, allow_nd=False, ensure_2d=True)
if X.shape[1] != 2:
raise ValueError("Samples in `X` must have 2 features.")
# Check labels
y = check_array(y, ensure_2d=False, force_all_finite="allow-nan")
check_consistent_length(X, y)
if y.ndim == 2:
if plot_annotators is None:
n_annotators = y.shape[1]
plot_annotators = np.arange(n_annotators)
else:
plot_annotators = column_or_1d(plot_annotators)
check_indices(plot_annotators, y, dim=1)
n_annotators = len(plot_annotators)
else:
n_annotators = None
if plot_annotators is not None:
raise TypeError(
"`plot_annotator` can be only used in the multi-annotator "
"setting."
)
else:
plot_annotators = np.arange(1)
if n_annotators is None:
if axes is not None:
raise TypeError(
"`axes` can be only used in the multi-annotator setting. "
"Use `ax` instead."
)
if ax is None:
axes = np.array([plt.subplots(1, 1)[1]])
else:
check_type(ax, "ax", Axes)
axes = np.array([ax])
else:
if ax is not None:
raise ValueError(
"`ax` can be only used in the single-annotator setting. "
"Use `axes` instead."
)
if axes is None:
axes = plt.subplots(1, n_annotators)[1]
else:
[check_type(ax_, "ax", Axes) for ax_ in axes]
if n_annotators is not None and len(axes) != n_annotators:
raise ValueError(
"`axes` must contain one `Axes` object for each "
"annotator to be plotted (indicated by `plot_annotators`)."
)
# ensure that utilities are returned
kwargs["return_utilities"] = True
if candidates is None:
# plot mesh
try:
check_scalar(res, "res", int, min_val=1)
feature_bound = check_bound(bound=feature_bound, X=X)
X_mesh, Y_mesh, mesh_instances = mesh(feature_bound, res)
contour_args = _get_contour_args(contour_dict)
if ignore_undefined_query_params:
_, utilities = call_func(
qs.query, X=X, y=y, candidates=mesh_instances, **kwargs
)
else:
_, utilities = qs.query(
X=X, y=y, candidates=mesh_instances, **kwargs
)
for a_idx, ax_ in zip(plot_annotators, axes):
if n_annotators is not None:
utilities_a_idx = utilities[0, :, a_idx]
else:
utilities_a_idx = utilities[0, :]
utilities_a_idx = utilities_a_idx.reshape(X_mesh.shape)
ax_.contourf(X_mesh, Y_mesh, utilities_a_idx, **contour_args)
if n_annotators is None:
return axes[0]
else:
return axes
except MappingError:
candidates = unlabeled_indices(y, missing_label=qs.missing_label)
except BaseException as err:
warnings.warn(
f"Unable to create utility plot with mesh because "
f"of the following error. Trying plotting over "
f"candidates. \n\n Unexpected {err.__repr__()}"
)
candidates = unlabeled_indices(y, missing_label=qs.missing_label)
candidates = check_array(
candidates,
allow_nd=False,
ensure_2d=False,
force_all_finite="allow-nan",
)
if candidates.ndim == 1:
X_utils = X
candidates = check_indices(candidates, X)
else:
X_utils = candidates
if ignore_undefined_query_params:
_, utilities = call_func(
qs.query, X=X, y=y, candidates=candidates, **kwargs
)
else:
_, utilities = qs.query(X=X, y=y, candidates=candidates, **kwargs)
for a_idx, ax_ in zip(plot_annotators, axes):
if n_annotators is not None:
utilities_a_idx = utilities[0, :, a_idx]
else:
utilities_a_idx = utilities[0, :]
plot_contour_for_samples(
X_utils,
utilities_a_idx,
replace_nan=replace_nan,
feature_bound=feature_bound,
ax=ax_,
res=res,
contour_dict=contour_dict,
)
if n_annotators is None:
return axes[0]
else:
return axes