# -*- coding: utf-8 -*-
"""Copyright 2019 DScribe developers
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
import math
import numpy as np
import scipy.spatial.distance
from sklearn.preprocessing import normalize
import sparse
from ase import Atoms
import ase.data
from dscribe.core import System
from dscribe.descriptors.mbtr import (
check_geometry,
check_weighting,
check_grid,
k1_geometry_functions,
)
from dscribe.descriptors.descriptorlocal import DescriptorLocal
from dscribe.ext import MBTRWrapper
import dscribe.utils.geometry
[docs]
class LMBTR(DescriptorLocal):
"""
Implementation of the Local Many-body tensor representation.
You can use this descriptor for finite and periodic systems. When dealing
with periodic systems or when using machine learning models that use the
Euclidean norm to measure distance between vectors, it is advisable to use
some form of normalization. This implementation does not support the use of
a non-identity correlation matrix.
Notice that the species of the central atom is not encoded in the output,
but is instead represented by a chemical species X with atomic number 0.
This allows LMBTR to be also used on general positions not corresponding to
real atoms. The surrounding environment is encoded by the two- and
three-body interactions with neighouring atoms. If there is a need to
distinguish the central species, one can for example train a different model
for each central species.
"""
[docs]
def __init__(
self,
geometry=None,
grid=None,
weighting=None,
normalize_gaussians=True,
normalization="none",
species=None,
periodic=False,
sparse=False,
dtype="float64",
):
"""
Args:
geometry (dict): Setup the geometry function.
For example::
"geometry": {"function": "distance"}
The geometry function determines the degree :math:`k` for MBTR.
The order :math:`k` tells how many atoms are involved in the
calculation and thus also heavily influence the computational
time.
The following geometry functions are available:
* :math:`k=2`
* ``"distance"``: Pairwise distance in angstroms.
* ``"inverse_distance"``: Pairwise inverse distance in 1/angstrom.
* :math:`k=3`
* ``"angle"``: Angle in degrees.
* ``"cosine"``: Cosine of the angle.
grid (dict): Setup the discretization grid.
For example::
"grid": {"min": 0.1, "max": 2, "sigma": 0.1, "n": 50}
In the grid setup *min* is the minimum value of the axis, *max*
is the maximum value of the axis, *sigma* is the standard
deviation of the gaussian broadening and *n* is the number of
points sampled on the grid.
weighting (dict): Setup the weighting function and its parameters.
For example::
"weighting" : {"function": "exp", "r_cut": 10, "threshold": 1e-3}
The following weighting functions are available:
* :math:`k=2`
* ``"unity"``: No weighting.
* ``"exp"``: Weighting of the form :math:`e^{-sx}`
* ``"inverse_square"``: Weighting of the form :math:`1/(x^2)`
* :math:`k=3`
* ``"unity"``: No weighting.
* ``"exp"``: Weighting of the form :math:`e^{-sx}`
* ``"smooth_cutoff"``: Weighting of the form :math:`f_{ij}f_{ik}`,
where :math:`f = 1+y(x/r_{cut})^{y+1}-(y+1)(x/r_{cut})^{y}`
The meaning of :math:`x` changes for different terms as follows:
* For :math:`k=2`: :math:`x` = Distance between A->B
* For :math:`k=3`: :math:`x` = Distance from A->B->C->A.
The exponential weighting is motivated by the exponential decay
of screened Coulombic interactions in solids. In the exponential
weighting the parameters **threshold** determines the value of
the weighting function after which the rest of the terms will be
ignored. Either the parameter **scale** or **r_cut** can be used
to determine the parameter :math:`s`: **scale** directly
corresponds to this value whereas **r_cut** can be used to
indirectly determine it through :math:`s=-\log()`:.
The inverse square and smooth cutoff function weightings use a
cutoff parameter **r_cut**, which is a radial distance after
which the rest of the atoms will be ignored. For the smooth
cutoff function, additional weighting key **sharpness** can be
added, which changes the value of :math:`y`. If a value for it
is not provided, it defaults to `2`.
normalize_gaussians (bool): Determines whether the gaussians are
normalized to an area of 1. Defaults to True. If False, the
normalization factor is dropped and the gaussians have the form.
:math:`e^{-(x-\mu)^2/2\sigma^2}`
normalization (str): Determines the method for normalizing the
output. The available options are:
* ``"none"``: No normalization.
* ``"l2"``: Normalize the Euclidean length of the output to unity.
species (iterable): The chemical species as a list of atomic
numbers or as a list of chemical symbols. Notice that this is not
the atomic numbers that are present for an individual system, but
should contain all the elements that are ever going to be
encountered when creating the descriptors for a set of systems.
Keeping the number of chemical speices as low as possible is
preferable.
periodic (bool): Set to true if you want the descriptor output to
respect the periodicity of the atomic systems (see the
pbc-parameter in the constructor of ase.Atoms).
sparse (bool): Whether the output should be a sparse matrix or a
dense numpy array.
dtype (str): The data type of the output. Valid options are:
* ``"float32"``: Single precision floating point numbers.
* ``"float64"``: Double precision floating point numbers.
"""
super().__init__(
periodic=periodic,
sparse=sparse,
dtype=dtype,
)
self.system = None
self.geometry = geometry
self.grid = grid
self.weighting = weighting
self.species = species
self.normalization = normalization
self.normalize_gaussians = normalize_gaussians
if self.normalization == "valle_oganov" and not periodic:
raise ValueError(
"Valle-Oganov normalization does not support non-periodic systems."
)
# Initializing .create() level variables
self._interaction_limit = None
@property
def grid(self):
return self._grid
@grid.setter
def grid(self, value):
check_grid(value)
self._grid = value
@property
def geometry(self):
return self._geometry
@geometry.setter
def geometry(self, value):
check_geometry(value)
if value["function"] in k1_geometry_functions:
raise ValueError(
"LMBTR does not support geometry functions for degree k=1."
)
k_map = {
"distance": 2,
"inverse_distance": 2,
"angle": 3,
"cosine": 3,
}
self.k = k_map[value["function"]]
self._geometry = value
@property
def weighting(self):
return self._weighting
@weighting.setter
def weighting(self, value):
check_weighting(self.k, value, self.periodic)
self._weighting = value
@property
def species(self):
return self._species
@species.setter
def species(self, value):
"""Used to check the validity of given atomic numbers and to initialize
the C-memory layout for them.
Args:
value(iterable): Chemical species either as a list of atomic
numbers or list of chemical symbols.
"""
# The species are stored as atomic numbers for internal use.
self._set_species(value)
# The atomic number 0 is reserved for ghost atoms in this
# implementation.
if 0 in self._atomic_number_set:
raise ValueError(
"The atomic number 0 is reserved for the ghost atoms in this "
"implementation."
)
self._atomic_number_set.add(0)
indices = np.searchsorted(self._atomic_numbers, 0)
self._atomic_numbers = np.insert(self._atomic_numbers, indices, 0)
# Setup mappings between atom indices and types together with some
# statistics
self.atomic_number_to_index = {}
self.index_to_atomic_number = {}
for i_atom, atomic_number in enumerate(self._atomic_numbers):
self.atomic_number_to_index[atomic_number] = i_atom
self.index_to_atomic_number[i_atom] = atomic_number
self.n_elements = len(self._atomic_numbers)
self.max_atomic_number = max(self._atomic_numbers)
self.min_atomic_number = min(self._atomic_numbers)
@property
def normalization(self):
return self._normalization
@normalization.setter
def normalization(self, value):
"""Checks that the given normalization is valid. Overrides the
normalization check from original MBTR because the normalization with
respect to number of atoms is not valid for a local descriptor.
Args:
value(str): The normalization method to use.
"""
norm_options = set(("none", "l2"))
if value not in norm_options:
raise ValueError(
"Unknown normalization option given. Please use one of the "
"following: {}.".format(", ".join(sorted(list(norm_options))))
)
self._normalization = value
[docs]
def create(
self, system, centers=None, n_jobs=1, only_physical_cores=False, verbose=False
):
"""Return the LMBTR output for the given systems and given centers.
Args:
system (:class:`ase.Atoms` or list of :class:`ase.Atoms`): One or
many atomic structures.
centers (list): Centers where to calculate LMBTR. Can be
provided as cartesian positions or atomic indices. If no
centers are defined, the LMBTR output will be created for all
atoms in the system. When calculating LMBTR for multiple
systems, provide the centers as a list for each system.
n_jobs (int): Number of parallel jobs to instantiate. Parallellizes
the calculation across samples. Defaults to serial calculation
with n_jobs=1. If a negative number is given, the used cpus
will be calculated with, n_cpus + n_jobs, where n_cpus is the
amount of CPUs as reported by the OS. With only_physical_cores
you can control which types of CPUs are counted in n_cpus.
only_physical_cores (bool): If a negative n_jobs is given,
determines which types of CPUs are used in calculating the
number of jobs. If set to False (default), also virtual CPUs
are counted. If set to True, only physical CPUs are counted.
verbose(bool): Controls whether to print the progress of each job
into to the console.
Returns:
np.ndarray | scipy.sparse.csr_matrix: The LMBTR output for the given
systems and centers. The return type depends on the
'sparse'-attribute. The first dimension is determined by the amount
of centers and systems and the second dimension is determined by
the get_number_of_features()-function.
"""
# Combine input arguments
if isinstance(system, Atoms):
system = [system]
centers = [centers]
n_samples = len(system)
if centers is None:
inp = [(i_sys,) for i_sys in system]
else:
n_pos = len(centers)
if n_pos != n_samples:
raise ValueError(
"The given number of centers does not match the given"
"number of systems."
)
inp = list(zip(system, centers))
# Determine if the outputs have a fixed size
n_features = self.get_number_of_features()
static_size = None
if centers is None:
n_centers = len(inp[0][0])
else:
first_sample, first_pos = inp[0]
if first_pos is not None:
n_centers = len(first_pos)
else:
n_centers = len(first_sample)
def is_static():
for i_job in inp:
if centers is None:
if len(i_job[0]) != n_centers:
return False
else:
if i_job[1] is not None:
if len(i_job[1]) != n_centers:
return False
else:
if len(i_job[0]) != n_centers:
return False
return True
if is_static():
static_size = [n_centers, n_features]
# Create in parallel
output = self.create_parallel(
inp,
self.create_single,
n_jobs,
static_size,
only_physical_cores,
verbose=verbose,
)
return output
[docs]
def create_single(
self,
system,
centers=None,
):
"""Return the local many-body tensor representation for the given
system and centers.
Args:
system (:class:`ase.Atoms` | :class:`.System`): Input system.
centers (iterable): Centers for which LMBTR is created. Can be
a list of integer numbers or a list of xyz-coordinates. If
integers provided, the atoms at that index are used as centers.
If cartesian positions are provided, new atoms are added at that
position. If no centers are provided, all atoms in the system
will be used as centers.
Returns:
1D ndarray: The local many-body tensor representations of given
centers, for k terms, as an array. These are ordered as given in
centers.
"""
# Validate and normalize system
self.validate_positions(system.get_positions())
atomic_numbers = self.validate_atomic_numbers(system.get_atomic_numbers())
pbc = self.validate_pbc(system.get_pbc())
self.validate_cell(system.get_cell(), pbc)
self._interaction_limit = len(system)
system_positions = system.get_positions()
# Ensure that the atomic number 0 is not present in the system
if 0 in set(atomic_numbers):
raise ValueError(
"Please do not use the atomic number 0 in local MBTR as it "
"is reserved to mark the atoms use as analysis centers."
)
# Form a list of indices, centers and atomic numbers for the local
# centers. k=3 and k=2 use a slightly different approach, so two
# versions are built
i_new = len(system)
indices_k2 = []
new_pos_k2 = []
new_atomic_numbers_k2 = []
indices_k3 = []
new_pos_k3 = []
new_atomic_numbers_k3 = []
n_atoms = len(system)
if centers is not None:
# Check validity of centers definitions and create final cartesian
# position list
if len(centers) == 0:
raise ValueError(
"The argument 'centers' should contain a non-empty set of"
" atomic indices or cartesian coordinates with x, y and z "
"components."
)
for i in centers:
if np.issubdtype(type(i), np.integer):
i_len = len(system)
if i >= i_len or i < 0:
raise ValueError(
"The provided index {} is not valid for the system "
"with {} atoms.".format(i, i_len)
)
indices_k2.append(i)
indices_k3.append(i)
new_pos_k2.append(system_positions[i])
new_atomic_numbers_k2.append(atomic_numbers[i])
elif isinstance(i, (list, tuple, np.ndarray)):
if len(i) != 3:
raise ValueError(
"The argument 'centers' should contain a "
"non-empty set of atomic indices or cartesian "
"coordinates with x, y and z components."
)
new_pos_k2.append(np.array(i))
new_pos_k3.append(np.array(i))
new_atomic_numbers_k2.append(0)
new_atomic_numbers_k3.append(0)
i_new += 1
else:
raise ValueError(
"Create method requires the argument 'centers', a "
"list of atom indices and/or positions."
)
# If centers are not supplied, it is assumed that each atom is used
# as a center
else:
indices_k2 = np.arange(n_atoms)
indices_k3 = np.arange(n_atoms)
new_pos_k2 = system.get_positions()
new_atomic_numbers_k2 = system.get_atomic_numbers()
# Calculate the "raw" output
if self.k == 2:
new_system = System(
symbols=new_atomic_numbers_k2,
positions=new_pos_k2,
)
indices = indices_k2
elif self.k == 3:
new_system = System(
symbols=new_atomic_numbers_k3,
positions=new_pos_k3,
)
indices = indices_k3
mbtr = getattr(self, f"_get_k{self.k}")(system, new_system, indices)
# Handle normalization
if self.normalization == "l2":
normalize(mbtr.tocsr(), norm="l2", axis=1, copy=False)
# Make into a dense array
result = mbtr.todense()
return result
[docs]
def get_number_of_features(self):
"""Used to inquire the final number of features that this descriptor
will have.
The number of features for the LMBTR is calculated as follows:
For the pair term (k=2), only pairs where at least one of the atom is
the central atom (in periodic systems the central atom may connect to
itself) are considered. This means that there are only as many
combinations as there are different elements to pair the central atom
with (n_elem). This number of combinations is the multiplied by the
discretization of the k=2 grid.
For the three-body term (k=3), only triplets where at least one of the
atoms is the central atom (in periodic systems the central atom may
connect to itself) and the k >= i (symmetry) are considered. This means
that as k runs from 0 to n-1, where n is the number of elements, there
are n + k combinations that fill this rule. This sum becomes:
:math:`\sum_{k=0}^{n-1} n + k = n^2+(n-1)*n/2`. This number of
combinations is the multiplied by the discretization of the k=3 grid.
Returns:
int: Number of features for this descriptor.
"""
n_features = 0
n_elem = self.n_elements
n_grid = self.grid["n"]
if self.k == 2:
n_features = (n_elem) * n_grid
if self.k == 3:
n_features = n_elem * (3 * n_elem - 1) * n_grid / 2
return int(n_features)
def _make_new_klist_local(self, kx_list):
new_kx_list = []
for item in kx_list:
new_kx_map = {}
item = dict(item)
for key, value in item.items():
new_key = tuple(int(x) for x in key.split(","))
new_kx_map[new_key] = np.array(value, dtype=self.dtype)
new_kx_list.append(new_kx_map)
return new_kx_list
def _get_k2(self, system, new_system, indices):
"""Calculates the second order terms where the scalar mapping is the
inverse distance between atoms.
Returns:
1D ndarray: flattened K2 values.
"""
start = self.grid["min"]
stop = self.grid["max"]
n = self.grid["n"]
sigma = self.grid["sigma"]
# Determine the weighting function and possible radial cutoff
radial_cutoff = None
parameters = {}
if self.weighting is not None:
weighting_function = self.weighting["function"]
if weighting_function == "exponential" or weighting_function == "exp":
scale = self.weighting["scale"]
threshold = self.weighting["threshold"]
if scale != 0:
radial_cutoff = -math.log(threshold) / scale
parameters = {
b"scale": self.weighting["scale"],
b"threshold": self.weighting["threshold"],
}
else:
weighting_function = "unity"
# Determine the geometry function
geom_func_name = self.geometry["function"]
# Calculate extended system
if self.periodic:
centers = new_system.get_positions()
ext_system, cell_indices = dscribe.utils.geometry.get_extended_system(
system,
radial_cutoff,
centers,
return_cell_indices=True,
)
ext_system = System.from_atoms(ext_system)
else:
ext_system = System.from_atoms(system)
cell_indices = np.zeros((len(system), 3), dtype=int)
cmbtr = MBTRWrapper(
self.atomic_number_to_index, self._interaction_limit, cell_indices
)
# If radial cutoff is finite, use it to calculate the sparse distance
# matrix to reduce computational complexity from O(n^2) to O(n log(n)).
# If radial cutoff is not available, calculate full matrix.
n_atoms_ext = len(ext_system)
n_atoms_new = len(new_system)
ext_pos = ext_system.get_positions()
new_pos = new_system.get_positions()
if radial_cutoff is not None:
dmat = new_system.get_distance_matrix_within_radius(
radial_cutoff, pos=ext_pos
)
adj_list = dscribe.utils.geometry.get_adjacency_list(dmat)
dmat_dense = np.full(
(n_atoms_new, n_atoms_ext), sys.float_info.max
) # The non-neighbor values are treated as "infinitely far".
dmat_dense[dmat.row, dmat.col] = dmat.data
else:
dmat_dense = scipy.spatial.distance.cdist(new_pos, ext_pos)
adj_list = np.tile(np.arange(n_atoms_ext), (n_atoms_new, 1))
# Form new indices that include the existing atoms and the newly added
# ones
indices = np.array(
np.append(
indices, [n_atoms_ext + i for i in range(n_atoms_new - len(indices))]
),
dtype=int,
)
k2_list = cmbtr.get_k2_local(
indices,
ext_system.get_atomic_numbers(),
dmat_dense,
adj_list,
geom_func_name.encode(),
weighting_function.encode(),
parameters,
start,
stop,
sigma,
n,
)
k2_list = self._make_new_klist_local(k2_list)
n_elem = self.n_elements
n_loc = len(indices)
k2 = sparse.DOK((n_loc, n_elem * n), dtype=self.dtype)
for i_loc, k2_map in enumerate(k2_list):
for key, gaussian_sum in k2_map.items():
i = key[1]
m = i
start = int(m * n)
end = int((m + 1) * n)
# Denormalize if requested
if not self.normalize_gaussians:
max_val = 1 / (sigma * math.sqrt(2 * math.pi))
gaussian_sum /= max_val
k2[i_loc, start:end] = gaussian_sum
k2 = k2.to_coo()
return k2
def _get_k3(self, system, new_system, indices):
"""Calculates the second order terms where the scalar mapping is the
inverse distance between atoms.
Returns:
1D ndarray: flattened K2 values.
"""
start = self.grid["min"]
stop = self.grid["max"]
n = self.grid["n"]
sigma = self.grid["sigma"]
# Determine the weighting function and possible radial cutoff
radial_cutoff = None
parameters = {}
if self.weighting is not None:
weighting_function = self.weighting["function"]
if weighting_function == "exponential" or weighting_function == "exp":
scale = self.weighting["scale"]
threshold = self.weighting["threshold"]
if scale != 0:
radial_cutoff = -0.5 * math.log(threshold) / scale
parameters = {b"scale": scale, b"threshold": threshold}
else:
weighting_function = "unity"
# Determine the geometry function
geom_func_name = self.geometry["function"]
# Calculate extended system
if self.periodic:
centers_new = new_system.get_positions()
centers_existing = system.get_positions()[indices]
centers = np.concatenate((centers_new, centers_existing), axis=0)
ext_system, cell_indices = dscribe.utils.geometry.get_extended_system(
system,
radial_cutoff,
centers,
return_cell_indices=True,
)
ext_system = System.from_atoms(ext_system)
else:
ext_system = System.from_atoms(system)
cell_indices = np.zeros((len(system), 3), dtype=int)
cmbtr = MBTRWrapper(
self.atomic_number_to_index, self._interaction_limit, cell_indices
)
# If radial cutoff is finite, use it to calculate the sparse
# distance matrix to reduce computational complexity from O(n^2) to
# O(n log(n))
fin_system = ext_system + new_system
n_atoms_ext = len(ext_system)
n_atoms_fin = len(fin_system)
n_atoms_new = len(new_system)
ext_pos = ext_system.get_positions()
new_pos = new_system.get_positions()
if radial_cutoff is not None:
# Calculate distance within the extended system
dmat_ext_to_ext = ext_system.get_distance_matrix_within_radius(
radial_cutoff, pos=ext_pos
)
col = dmat_ext_to_ext.col
row = dmat_ext_to_ext.row
data = dmat_ext_to_ext.data
dmat = scipy.sparse.coo_matrix(
(data, (row, col)), shape=(n_atoms_fin, n_atoms_fin)
)
# Calculate the distances from the new positions to atoms in the
# extended system using the cutoff
if len(new_pos) != 0:
dmat_ext_to_new = ext_system.get_distance_matrix_within_radius(
radial_cutoff, pos=new_pos
)
col = dmat_ext_to_new.col
row = dmat_ext_to_new.row
data = dmat_ext_to_new.data
dmat.col = np.append(dmat.col, col + n_atoms_ext)
dmat.row = np.append(dmat.row, row)
dmat.data = np.append(dmat.data, data)
dmat.col = np.append(dmat.col, row)
dmat.row = np.append(dmat.row, col + n_atoms_ext)
dmat.data = np.append(dmat.data, data)
# Calculate adjacencies and transform to the dense matrix for
# sending information to C++
adj_list = dscribe.utils.geometry.get_adjacency_list(dmat)
dmat_dense = np.full(
(n_atoms_fin, n_atoms_fin), sys.float_info.max
) # The non-neighbor values are treated as "infinitely far".
dmat_dense[dmat.row, dmat.col] = dmat.data
# If no weighting is used, the full distance matrix is calculated
else:
dmat = scipy.sparse.lil_matrix((n_atoms_fin, n_atoms_fin))
# Fill in block for extended system
dmat_ext_to_ext = ext_system.get_distance_matrix()
dmat[0:n_atoms_ext, 0:n_atoms_ext] = dmat_ext_to_ext
# Fill in block for extended system to new system
dmat_ext_to_new = scipy.spatial.distance.cdist(ext_pos, new_pos)
dmat[0:n_atoms_ext, n_atoms_ext : n_atoms_ext + n_atoms_new] = (
dmat_ext_to_new
)
dmat[n_atoms_ext : n_atoms_ext + n_atoms_new, 0:n_atoms_ext] = (
dmat_ext_to_new.T
)
# Calculate adjacencies and the dense version
dmat = dmat.tocoo()
adj_list = dscribe.utils.geometry.get_adjacency_list(dmat)
dmat_dense = np.full(
(n_atoms_fin, n_atoms_fin), sys.float_info.max
) # The non-neighbor values are treated as "infinitely far".
dmat_dense[dmat.row, dmat.col] = dmat.data
# Form new indices that include the existing atoms and the newly added
# ones
indices = np.array(
np.append(indices, [n_atoms_ext + i for i in range(n_atoms_new)]), dtype=int
)
k3_list = cmbtr.get_k3_local(
indices,
fin_system.get_atomic_numbers(),
dmat_dense,
adj_list,
geom_func_name.encode(),
weighting_function.encode(),
parameters,
start,
stop,
sigma,
n,
)
k3_list = self._make_new_klist_local(k3_list)
n_elem = self.n_elements
n_loc = len(indices)
k3 = sparse.DOK(
(n_loc, int((n_elem * (3 * n_elem - 1) * n / 2))), dtype=self.dtype
)
for i_loc, k3_map in enumerate(k3_list):
for key, gaussian_sum in k3_map.items():
i = key[0]
j = key[1]
k = key[2]
# This is the index of the spectrum. It is given by enumerating the
# elements of a three-dimensional array and only considering
# elements for which k>=i and i || j == 0. The enumeration begins
# from [0, 0, 0], and ends at [n_elem, n_elem, n_elem], looping the
# elements in the order k, i, j.
if j == 0:
m = k + i * n_elem - i * (i + 1) / 2
else:
m = n_elem * (n_elem + 1) / 2 + (j - 1) * n_elem + k
start = int(m * n)
end = int((m + 1) * n)
# Denormalize if requested
if not self.normalize_gaussians:
max_val = 1 / (sigma * math.sqrt(2 * math.pi))
gaussian_sum /= max_val
k3[i_loc, start:end] = gaussian_sum
k3 = k3.to_coo()
return k3
[docs]
def get_location(self, species):
"""Can be used to query the location of a species combination in the
the flattened output.
Args:
species(tuple): A tuple containing a species combination as
chemical symbols or atomic numbers. The central atom is marked as
species "X". The tuple can be for example ("X", "O") or ("X", "O",
"H")
Returns:
slice: slice containing the location of the specified species
combination. The location is given as a python slice-object, that
can be directly used to target ranges in the output.
Raises:
ValueError: If the requested species combination is not in the
output or if invalid species defined.
"""
# Check that the corresponding part is calculated
k = len(species)
if k is not self.k:
raise ValueError(
"Cannot retrieve the location for {}, as the term k={} has not "
"been specified.".format(species, k)
)
# Change chemical elements into atomic numbers
numbers = []
for specie in species:
if isinstance(specie, str):
try:
specie = ase.data.atomic_numbers[specie]
except KeyError:
raise ValueError("Invalid chemical species")
numbers.append(specie)
# Check that species exists and that X is included
self.validate_atomic_numbers(numbers)
if 0 not in numbers:
raise ValueError(
"The central species X (atomic number 0) has to be one of the elements."
)
# Change into internal indexing
numbers = [self.atomic_number_to_index[x] for x in numbers]
n_elem = self.n_elements
n = self.grid["n"]
# k=2
if k == 2:
if numbers[0] > numbers[1]:
numbers = list(reversed(numbers))
j = numbers[1]
m = j
start = int(m * n)
end = int((m + 1) * n)
# k=3
if k == 3:
if numbers[0] > numbers[2]:
numbers = list(reversed(numbers))
i = numbers[0]
j = numbers[1]
k = numbers[2]
# This is the index of the spectrum. It is given by enumerating the
# elements of a three-dimensional array and only considering
# elements for which k>=i and i || j == 0. The enumeration begins
# from [0, 0, 0], and ends at [n_elem, n_elem, n_elem], looping the
# elements in the order k, i, j.
if j == 0:
m = k + i * n_elem - i * (i + 1) / 2
else:
m = n_elem * (n_elem + 1) / 2 + (j - 1) * n_elem + k
start = int(m * n)
end = int((m + 1) * n)
return slice(start, end)