[docs]classBootstrap(base.IPSNode):"""Base class for dataset bootstrapping with structural modifications. Parameters ---------- data : list[ase.Atoms] Input atomic configurations to bootstrap from. data_id : int, default=-1 Index of the configuration to use from the data list. n_configurations : int Number of new configurations to generate. maximum : float Maximum displacement/rotation/translation magnitude. include_original : bool, default=True Whether to include the original configuration in output. seed : int, default=0 Random seed for reproducible generation. Attributes ---------- frames : list[ase.Atoms] Generated atomic configurations after bootstrapping. frames_path : Path Path to the HDF5 file storing the generated configurations. """data:list[ase.Atoms]=zntrack.deps()data_id:int=zntrack.params(-1)n_configurations:int=zntrack.params()maximum:float=zntrack.params()include_original:bool=zntrack.params(True)seed:int=zntrack.params(0)frames_path:Path=zntrack.outs_path(zntrack.nwd/"frames.h5")
[docs]defrun(self)->None:atoms=self.data[self.data_id]rng=default_rng(self.seed)atoms_list=self.bootstrap_configs(atoms,rng,)# Store frames in HDF5 filedb=znh5md.IO(self.frames_path)db.extend(atoms_list)
@propertydefframes(self)->list[ase.Atoms]:"""Load and return the generated atomic configurations."""withself.state.fs.open(self.frames_path,"rb")asf:withh5py.File(f)asfile:returnznh5md.IO(file_handle=file)[:]
[docs]classRattleAtoms(Bootstrap):"""Generate configurations with randomly displaced atomic positions. Creates new configurations by applying random displacements to each atom's position. Parameters ---------- data : list[ase.Atoms] Input atomic configurations to modify. data_id : int, default=-1 Index of the configuration to use from the data list. n_configurations : int Number of rattled configurations to generate. maximum : float Maximum displacement magnitude (Ångström) for each atom. include_original : bool, default=True Whether to include the original configuration in output. seed : int, default=0 Random seed for reproducible displacement generation. Attributes ---------- frames : list[ase.Atoms] Generated configurations with rattled atomic positions. frames_path : Path Path to the HDF5 file storing the generated configurations. Examples -------- >>> with project: ... data = ips.AddData(file="ethanol.xyz") ... rattled = ips.RattleAtoms(data=data.frames, n_configurations=5, maximum=0.1) >>> project.repro() >>> print(f"Generated {len(rattled.frames)} rattled configurations") Generated 6 rattled configurations """
[docs]classTranslateMolecules(Bootstrap):"""Generate configurations with randomly translated molecular units. Creates new configurations by applying random translations to individual molecular units while preserving their internal structure. Requires the presence of distinct molecular entities in the system. Parameters ---------- data : list[ase.Atoms] Input atomic configurations containing molecular units. data_id : int, default=-1 Index of the configuration to use from the data list. n_configurations : int Number of configurations with translated molecules to generate. maximum : float Maximum translation distance (Ångström) for each molecule. include_original : bool, default=True Whether to include the original configuration in output. seed : int, default=0 Random seed for reproducible translation generation. Attributes ---------- frames : list[ase.Atoms] Generated configurations with translated molecular units. frames_path : Path Path to the HDF5 file storing the generated configurations. Examples -------- >>> with project: ... data = ips.AddData(file="ethanol.xyz") ... translated = ips.TranslateMolecules(data=data.frames, n_configurations=5, ... maximum=0.5) >>> project.repro() >>> print(f"Generated {len(translated.frames)} configurations with translated " ... f"molecules") Generated 6 configurations with translated molecules """
[docs]classRotateMolecules(Bootstrap):"""Generate configurations with randomly rotated molecular units. Creates new configurations by applying random rotations to individual molecular units around their barycenter while preserving internal bond structures. Requires distinct molecular entities in the system. Parameters ---------- data : list[ase.Atoms] Input atomic configurations containing molecular units. data_id : int, default=-1 Index of the configuration to use from the data list. n_configurations : int Number of configurations with rotated molecules to generate. maximum : float Maximum rotation angle (radians) for each molecule. include_original : bool, default=True Whether to include the original configuration in output. seed : int, default=0 Random seed for reproducible rotation generation. Attributes ---------- frames : list[ase.Atoms] Generated configurations with rotated molecular units. frames_path : Path Path to the HDF5 file storing the generated configurations. Examples -------- >>> with project: ... data = ips.AddData(file="ethanol.xyz") ... rotated = ips.RotateMolecules(data=data.frames, n_configurations=5, ... maximum=3.14159) >>> project.repro() >>> print(f"Generated {len(rotated.frames)} configurations with rotated molecules") Generated 6 configurations with rotated molecules """
[docs]defbootstrap_configs(self,atoms,rng):ifself.include_original:atoms_list=[atoms]else:atoms_list=[]ifself.maximum>2*np.pi:log.warning("Setting maximum to 2 Pi.")mapping=ips.BarycenterMapping()_,molecules=mapping.forward_mapping(atoms.copy())for_inrange(self.n_configurations):molecule_lst=[]formoleculeinmolecules:mol=molecule.copy()euler_angles=rng.uniform(0,self.maximum,size=(3,))rotate=Rotation.from_euler("zyx",euler_angles,degrees=False)pos=mol.positionsbarycenter=np.mean(pos,axis=0)pos-=barycenterpos_rotated=rotate.apply(pos)mol.positions=barycenter+pos_rotatedmolecule_lst.append(mol)new_atoms=molecule_lst[0]foriinrange(1,len(molecule_lst)):new_atoms+=molecule_lst[i]atoms_list.append(new_atoms)returnatoms_list