Source code for ipsuite.analysis.model.plots
import typing
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.interpolate import interpn
from scipy.optimize import curve_fit
from scipy.stats import foldnorm, gaussian_kde
[docs]
def density_scatter(ax, x, y, bins, **kwargs) -> None:
"""Create a scatter plot colored by 2d histogram density.
Parameters
----------
ax: matplotlib.axes.Axes
x: np.ndarray
y: np.ndarray
bins: int
kwargs
any kwargs passed to 'ax.scatter'
Returns
-------
References
----------
Adapted from https://stackoverflow.com/a/53865762/10504481
"""
# convert e.g. DataFrame to numpy array values
x = np.array(x)
y = np.array(y)
if "cmap" not in kwargs:
kwargs["cmap"] = "viridis"
data, x_e, y_e = np.histogram2d(x, y, bins=bins, density=True)
points = (0.5 * (x_e[1:] + x_e[:-1]), 0.5 * (y_e[1:] + y_e[:-1]))
xi = np.vstack([x, y]).T
z = interpn(points, data, xi, method="splinef2d", bounds_error=False)
# To be sure to plot all data
z[np.where(np.isnan(z))] = 0.0
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
ax.scatter(x, y, c=z, **kwargs)
[docs]
def get_figure(
true,
prediction,
datalabel: str,
xlabel: str,
ylabel: str,
ymax: typing.Optional[float] = None,
figsize: tuple = (10, 7),
density=True,
) -> plt.Figure:
"""Create a correlation plot for true, prediction values.
Parameters
----------
true: the true values
prediction: the predicted values
datalabel: str, the label for the prediction, e.g. 'MAE: 0.123 eV'
xlabel: str, the xlabel
ylabel: str, the xlabel
figsize: tuple, size of the figure
Returns
-------
plt.Figure
"""
sns.set()
fig, ax = plt.subplots(figsize=figsize)
ax.plot(true, np.zeros_like(true), color="grey", zorder=0)
bins = 25
if true.shape[0] < 20 or not density:
# don't use density for very small datasets
ax.scatter(true, prediction, marker="x", s=20.0, label=datalabel)
else:
density_scatter(
ax, true, prediction, bins=bins, marker="x", s=20.0, label=datalabel
)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if ymax:
ax.set_ylim([-ymax, ymax])
ax.legend()
return fig
[docs]
def get_calibration_figure(
error,
std,
markersize: float = 3.0,
datalabel=None,
forces=False,
figsize: tuple = (10, 7),
):
"""Log-log plot of errors vs predicted standard deviations with quantiles
for a linearly increasing noise level.
"""
fig, ax = plt.subplots(1, 1, figsize=figsize, dpi=300)
x = np.linspace(1e-6, 5e3, 5)
noise_level_2 = x
quantiles_lower_01 = [foldnorm.ppf(0.15, 0.0, 0.0, i) for i in noise_level_2]
quantiles_upper_01 = [foldnorm.ppf(0.85, 0.0, 0.0, i) for i in noise_level_2]
quantiles_lower_05 = [foldnorm.ppf(0.05, 0.0, 0.0, i) for i in noise_level_2]
quantiles_upper_05 = [foldnorm.ppf(0.95, 0.0, 0.0, i) for i in noise_level_2]
quantiles_lower_005 = [foldnorm.ppf(0.005, 0.0, 0.0, i) for i in noise_level_2]
quantiles_upper_005 = [foldnorm.ppf(0.995, 0.0, 0.0, i) for i in noise_level_2]
ax.scatter(
std,
error,
s=markersize,
alpha=0.3,
color="tab:blue",
rasterized=True,
linewidth=0.0,
label=datalabel,
)
ax.loglog()
ax.plot(x, quantiles_upper_05, color="gray", alpha=0.5)
ax.plot(x, quantiles_lower_05, color="gray", alpha=0.5)
ax.plot(x, quantiles_upper_01, color="gray", alpha=0.5)
ax.plot(x, quantiles_lower_01, color="gray", alpha=0.5)
ax.plot(x, quantiles_upper_005, color="gray", alpha=0.5)
ax.plot(x, quantiles_lower_005, color="gray", alpha=0.5)
ax.plot(
np.logspace(-10, 100.0), np.logspace(-10, 100.0), linestyle="--", color="grey"
)
xlower = max(np.min(std) / 1.5, 1e-7)
ylower = max(np.min(error) / 1.5, 1e-7)
ax.set_xlim(xlower, np.max(std) * 1.5)
ax.set_ylim(ylower, np.max(error) * 1.5)
if forces:
xlabel = r"$\sigma_{f_{i\alpha}}(A)$ [meV/$\AA$] "
ylabel = r"$|\Delta f_{i\alpha}(A)|$ [meV/$\AA$] "
else:
xlabel = r"$\sigma_{E_{i}}(A)$ [meV/atom] "
ylabel = r"$|\Delta E_{i}(A)|$ [meV/atom] "
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if datalabel:
ax.legend()
return fig
[docs]
def gauss(x, *p):
m, s = p
return np.exp(-(((x - m) / s) ** 2) * 0.5) / np.sqrt(2 * np.pi * s**2)
[docs]
def slice_ensemble_uncertainty(true, pred_ens, slice_start, slice_end):
pred_mean = np.mean(pred_ens, axis=1)
pred_std = np.std(pred_ens, axis=1)
isel = np.where((slice_start < pred_std) & (pred_std < slice_end))[0]
error_true = np.reshape(true[isel] - pred_mean[isel], -1)
error_pred = np.reshape(pred_ens[isel, :] - pred_mean[isel, np.newaxis], -1)
return error_true, error_pred
[docs]
def slice_uncertainty(true, pred_mean, pred_std, slice_start, slice_end):
isel = np.where((slice_start < pred_std) & (pred_std < slice_end))[0]
error_true = np.reshape(true[isel] - pred_mean[isel], -1)
error_pred = pred_std[isel]
return error_true, error_pred
[docs]
def get_gaussianicity_figure(error_true, error_pred, forces=True):
"""Plots empirical and predicted error distributions.
If possible, it also tries to fit a gaussian to the empirical distribution.
"""
true_kde_sel = gaussian_kde(error_true)
ens_kde_sel = gaussian_kde(error_pred)
bounds = 1.5 * max(np.max(np.abs(error_true)), np.max(np.abs(error_pred)))
fig, ax = plt.subplots()
xgrid = np.linspace(-bounds, bounds, 400)
ax.set_xlim([-bounds, bounds])
ens_sel = ens_kde_sel(xgrid)
true_sel = true_kde_sel(xgrid)
try:
guess = [0.0, 100]
coeff, _ = curve_fit(gauss, xgrid, true_sel, p0=guess)
std = coeff[1]
ax.semilogy(xgrid, gauss(xgrid, 0, std), "k--", label="Gaussian")
except RuntimeError:
print("Curve fit failed, only plotting distributions")
ax.semilogy(xgrid, true_sel, "r-", label="empirical")
ax.semilogy(xgrid, ens_sel, "b-", label="predicted")
ymax = 5 * max(np.max(true_sel), np.max(ens_sel))
ax.set_ylim(1e-6, ymax)
ax.set_yscale("log")
if forces:
xlabel = r"$\Delta (S)$ / meV/Ang"
ylabel = r"$p(\Delta | S)$"
else:
xlabel = r"$\Delta (S)$ / meV/atom"
ylabel = r"$p(\Delta | S)$"
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.legend()
return fig
[docs]
def get_hist(data, label, xlabel, ylabel) -> typing.Tuple[plt.Figure, plt.Axes]:
fig, ax = plt.subplots()
sns.histplot(
data,
ax=ax,
stat="percent",
label=label,
)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.legend()
return fig, ax
[docs]
def get_histogram_figure(
bin_edges,
counts,
datalabel: str,
xlabel: str,
ylabel: str,
x_lim: typing.Tuple[float, float] = None,
y_lim: typing.Tuple[float, float] = None,
logy_scale=True,
figsize: tuple = (10, 7),
) -> plt.Figure:
"""Creates a Matplotlib figure based on precomputed bin edges and counts.
Parameters
----------
bin_edges: np.array
Edges of the histogram bins.
counts: np.array
Number of occurrences in each bin.
datalabel: str
Labels for the figure legend.
xlabel: str
X-axis label.
ylabel: str
Y-axis label.
x_lim: tuple
X-axis limits.
y_lim: tuple
Y-axis limits.
figsize: tuple
Size of the Matplotlib figure
"""
sns.set()
fig, ax = plt.subplots(figsize=figsize)
ax.stairs(counts, bin_edges, label=datalabel, fill=True)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.legend()
if logy_scale:
ax.set_yscale("log")
if x_lim is not None:
ax.set_xlim(x_lim)
if y_lim is not None:
ax.set_ylim(y_lim)
return fig