import pathlib
import numpy as np
import pandas as pd
import zntrack
from ipsuite import base
from ipsuite.analysis.model.math import decompose_stress_tensor
from ipsuite.analysis.model.plots import get_histogram_figure
[docs]
class LabelHistogram(base.AnalyseAtoms):
"""Base class for creating histogram of a dataset.
Parameters
----------
data: list
List of Atoms objects.
bins: int | str
Number of bins in the histogram, or string indicating how to find the number of bins.
See https://numpy.org/devdocs/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges
""" # noqa: E501
bins: int | str = zntrack.params("auto")
x_lim: tuple = zntrack.params(None)
y_lim: tuple = zntrack.params(None)
plots_dir: pathlib.Path = zntrack.outs_path(zntrack.nwd / "plots")
labels_df: pd.DataFrame = zntrack.plots()
logy_scale: bool = zntrack.params(True)
metrics: float = zntrack.metrics()
[docs]
def get_labels(self):
raise NotImplementedError
[docs]
def get_hist(self):
"""Create a pandas dataframe from the given data."""
labels = self.get_labels()
self.metrics = {
"mean": np.mean(labels),
"std": np.std(labels),
"max": np.max(labels),
"min": np.min(labels),
}
bin_edges = np.histogram_bin_edges(labels, bins=self.bins)
counts, bin_edges = np.histogram(labels, bins=bin_edges)
return counts, bin_edges
[docs]
def get_plots(self, counts, bin_edges):
"""Create figures for all available data."""
self.plots_dir.mkdir(exist_ok=True)
ylabel = "Occurrences"
label_hist = get_histogram_figure(
bin_edges,
counts,
datalabel=self.datalabel,
xlabel=self.xlabel,
ylabel=ylabel,
x_lim=self.x_lim,
y_lim=self.y_lim,
logy_scale=self.logy_scale,
)
label_hist.savefig(self.plots_dir / "hist.png")
[docs]
def run(self):
counts, bin_edges = self.get_hist()
self.get_plots(counts, bin_edges)
self.labels_df = pd.DataFrame({"bin_edges": bin_edges[1:], "counts": counts})
[docs]
class EnergyHistogram(LabelHistogram):
"""Creates a histogram of all energy labels contained in a dataset."""
datalabel: str = zntrack.params("energy")
xlabel: str = zntrack.params(r"$E$ / eV")
[docs]
def get_labels(self):
return [x.get_potential_energy() for x in self.data]
[docs]
class ForcesHistogram(LabelHistogram):
"""Creates a histogram of all force labels contained in a dataset."""
datalabel: str = zntrack.params("forces")
xlabel: str = zntrack.params(r"$F$ / eV/Ang")
[docs]
def get_labels(self):
labels = np.concatenate([x.get_forces() for x in self.data], axis=0)
# compute magnitude of vector labels. Histogram works element wise for N-D Arrays
labels = np.linalg.norm(labels, ord=2, axis=1)
return labels
[docs]
class ForcesUncertaintyHistogram(LabelHistogram):
"""Creates a histogram of all force uncertainties in a prediction."""
datalabel: str = zntrack.params("forces-uncertainty")
xlabel: str = zntrack.params(r"$\sigma(F)$ / eV/Ang")
[docs]
def get_labels(self):
labels = np.concatenate(
[x.calc.results["forces_uncertainty"] for x in self.data], axis=0
)
labels = np.linalg.norm(labels, ord=2, axis=1)
return labels
[docs]
class EnergyUncertaintyHistogram(LabelHistogram):
"""Creates a histogram of all energy uncertainties in a prediction."""
datalabel: str = zntrack.params("energy-uncertainty")
xlabel: str = zntrack.params(r"$\sigma(E)$ / eV")
[docs]
def get_labels(self):
return np.reshape([x.calc.results["energy_uncertainty"] for x in self.data], (-1))
[docs]
class DipoleHistogram(LabelHistogram):
"""Creates a histogram of all dipole labels contained in a dataset."""
datalabel: str = zntrack.params("dipole")
xlabel: str = zntrack.params(r"$\mu$ / eV Ang")
[docs]
def get_labels(self):
labels = np.array([x.calc.results["dipole"] for x in self.data])
# compute magnitude of vector labels. Histogram works element wise for N-D Arrays
labels = np.linalg.norm(labels, ord=2, axis=1)
return labels
[docs]
class StressHistogram(base.AnalyseAtoms):
"""Creates histograms for the hydrostatic and
deviatoric components of the stress tensor.
Parameters
----------
data: list
List of Atoms objects.
bins: int
Number of bins in the histogram.
"""
bins: int = zntrack.params(None)
plots_dir: pathlib.Path = zntrack.outs_path(zntrack.nwd / "plots")
labels_df: pd.DataFrame = zntrack.plots()
logy_scale: bool = zntrack.params(True)
[docs]
def get_labels(self):
labels = np.array([x.get_stress(voigt=False) for x in self.data])
return labels
[docs]
def get_hist(self):
"""Create a pandas dataframe from the given data."""
labels = self.get_labels()
hydrostatic_stresses, deviatoric_stresses = decompose_stress_tensor(labels)
if self.bins is None:
self.bins = int(np.ceil(len(labels) / 100))
hydro_counts, hydro_bin_edges = np.histogram(hydrostatic_stresses, self.bins)
devia_counts, devia_bin_edges = np.histogram(deviatoric_stresses, self.bins)
counts = (hydro_counts, devia_counts)
bin_edges = (hydro_bin_edges, devia_bin_edges)
return counts, bin_edges
[docs]
def get_plots(self, counts, bin_edges, hydrostatic=True):
"""Create figures for all available data."""
if hydrostatic:
xlabel = r"$\pi$ / eV / Ang$^3$"
datalabel = "hydrostatic stress"
fname = "hydrostatic_hist.png"
else:
xlabel = r"$\sigma_{ij}$ / eV / Ang$^3$"
datalabel = "deviatoric stress components"
fname = "deviatoric_hist.png"
label_hist = get_histogram_figure(
bin_edges,
counts,
datalabel=datalabel,
xlabel=xlabel,
ylabel="Occurrences",
logy_scale=self.logy_scale,
)
label_hist.savefig(self.plots_dir / fname)
[docs]
def run(self):
counts, bin_edges = self.get_hist()
self.plots_dir.mkdir()
self.get_plots(counts[0], bin_edges[0], hydrostatic=True)
self.get_plots(counts[1], bin_edges[1], hydrostatic=False)
self.labels_df = pd.DataFrame(
{
"hydro_bin_edges": bin_edges[0][1:],
"hydro_counts": counts[0],
"deviatoric_bin_edges": bin_edges[1][1:],
"deviatoric_counts": counts[1],
}
)