Source code for dscribe.descriptors.mbtr

# -*- coding: utf-8 -*-
"""Copyright 2019 DScribe developers

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
import math
import numpy as np

import sparse

from ase import Atoms
import ase.data

from dscribe.core import System
from dscribe.descriptors import Descriptor
from dscribe.ext import MBTRWrapper
import dscribe.utils.geometry


[docs]class MBTR(Descriptor):
    """Implementation of the Many-body tensor representation up to :math:`k=3`.

    You can choose which terms to include by providing a dictionary in the
    k1, k2 or k3 arguments. This dictionary should contain information
    under three keys: "geometry", "grid" and "weighting". See the examples
    below for how to format these dictionaries.

    You can use this descriptor for finite and periodic systems. When dealing
    with periodic systems or when using machine learning models that use the
    Euclidean norm to measure distance between vectors, it is advisable to use
    some form of normalization.

    For the geometry functions the following choices are available:

    * :math:`k=1`:

       * "atomic_number": The atomic numbers.

    * :math:`k=2`:

       * "distance": Pairwise distance in angstroms.
       * "inverse_distance": Pairwise inverse distance in 1/angstrom.

    * :math:`k=3`:

       * "angle": Angle in degrees.
       * "cosine": Cosine of the angle.

    For the weighting the following functions are available:

    * :math:`k=1`:

       * "unity": No weighting.

    * :math:`k=2`:

       * "unity": No weighting.
       * "exp" or "exponential": Weighting of the form :math:`e^{-sx}`

    * :math:`k=3`:

       * "unity": No weighting.
       * "exp" or "exponential": Weighting of the form :math:`e^{-sx}`

    The exponential weighting is motivated by the exponential decay of screened
    Coulombic interactions in solids. In the exponential weighting the
    parameters **cutoff** determines the value of the weighting function after
    which the rest of the terms will be ignored and the parameter **scale**
    corresponds to :math:`s`. The meaning of :math:`x` changes for different
    terms as follows:

    * :math:`k=2`: :math:`x` = Distance between A->B
    * :math:`k=3`: :math:`x` = Distance from A->B->C->A.

    In the grid setup *min* is the minimum value of the axis, *max* is the
    maximum value of the axis, *sigma* is the standard deviation of the
    gaussian broadening and *n* is the number of points sampled on the
    grid.

    If flatten=False, a list of dense np.ndarrays for each k in ascending order
    is returned. These arrays are of dimension (n_elements x n_elements x
    n_grid_points), where the elements are sorted in ascending order by their
    atomic number.

    If flatten=True, a sparse.COO sparse matrix is returned. This sparse matrix
    is of size (n_features,), where n_features is given by
    get_number_of_features(). This vector is ordered so that the different
    k-terms are ordered in ascending order, and within each k-term the
    distributions at each entry (i, j, h) of the tensor are ordered in an
    ascending order by (i * n_elements) + (j * n_elements) + (h * n_elements).

    This implementation does not support the use of a non-identity correlation
    matrix.
    """

[docs]    def __init__(
        self,
        species,
        periodic,
        k1=None,
        k2=None,
        k3=None,
        normalize_gaussians=True,
        normalization="none",
        flatten=True,
        sparse=False,
    ):
        """
        Args:
            species (iterable): The chemical species as a list of atomic
                numbers or as a list of chemical symbols. Notice that this is not
                the atomic numbers that are present for an individual system, but
                should contain all the elements that are ever going to be
                encountered when creating the descriptors for a set of systems.
                Keeping the number of chemical speices as low as possible is
                preferable.
            periodic (bool): Set to true if you want the descriptor output to
                respect the periodicity of the atomic systems (see the
                pbc-parameter in the constructor of ase.Atoms).
            k1 (dict): Setup for the k=1 term. For example::

                k1 = {
                    "geometry": {"function": "atomic_number"},
                    "grid": {"min": 1, "max": 10, "sigma": 0.1, "n": 50}
                }

            k2 (dict): Dictionary containing the setup for the k=2 term.
                Contains setup for the used geometry function, discretization and
                weighting function. For example::

                    k2 = {
                        "geometry": {"function": "inverse_distance"},
                        "grid": {"min": 0.1, "max": 2, "sigma": 0.1, "n": 50},
                        "weighting": {"function": "exp", "scale": 0.75, "cutoff": 1e-2}
                    }

            k3 (dict): Dictionary containing the setup for the k=3 term.
                Contains setup for the used geometry function, discretization and
                weighting function. For example::

                    k3 = {
                        "geometry": {"function": "angle"},
                        "grid": {"min": 0, "max": 180, "sigma": 5, "n": 50},
                        "weighting" : {"function": "exp", "scale": 0.5, "cutoff": 1e-3}
                    }

            normalize_gaussians (bool): Determines whether the gaussians are
                normalized to an area of 1. Defaults to True. If False, the
                normalization factor is dropped and the gaussians have the form.
                :math:`e^{-(x-\mu)^2/2\sigma^2}`
            normalization (str): Determines the method for normalizing the
                output. The available options are:

                * "none": No normalization.
                * "l2_each": Normalize the Euclidean length of each k-term
                  individually to unity.
                * "n_atoms": Normalize the output by dividing it with the number
                  of atoms in the system. If the system is periodic, the number
                  of atoms is determined from the given unit cell.

            flatten (bool): Whether the output should be flattened to a 1D
                array. If False, a dictionary of the different tensors is
                provided, containing the values under keys: "k1", "k2", and
                "k3":
            sparse (bool): Whether the output should be a sparse matrix or a
                dense numpy array.
        """
        if sparse and not flatten:
            raise ValueError(
                "Sparse, non-flattened output is currently not supported. If "
                "you want a non-flattened output, please specify sparse=False "
                "in the MBTR constructor."
            )
        super().__init__(periodic=periodic, flatten=flatten, sparse=sparse)
        self.system = None
        self.k1 = k1
        self.k2 = k2
        self.k3 = k3

        # Setup the involved chemical species
        self.species = species

        self.normalization = normalization
        self.normalize_gaussians = normalize_gaussians

        # Initializing .create() level variables
        self._interaction_limit = None

        # Check that weighting function is specified for periodic systems
        if self.periodic:
            if self.k2 is not None:
                valid = False
                weighting = self.k2.get("weighting")
                if weighting is not None:
                    function = weighting.get("function")
                    if function is not None:
                        if function != "unity":
                            valid = True
                if not valid:
                    raise ValueError(
                        "Periodic systems need to have a weighting function."
                    )

            if self.k3 is not None:
                valid = False
                weighting = self.k3.get("weighting")
                if weighting is not None:
                    function = weighting.get("function")
                    if function is not None:
                        if function != "unity":
                            valid = True

                if not valid:
                    raise ValueError(
                        "Periodic systems need to have a weighting function."
                    )

[docs]    def check_grid(self, grid):
        """Used to ensure that the given grid settings are valid.

        Args:
            grid(dict): Dictionary containing the grid setup.
        """
        msg = "The grid information is missing the value for {}"
        val_names = ["min", "max", "sigma", "n"]
        for val_name in val_names:
            try:
                grid[val_name]
            except Exception:
                raise KeyError(msg.format(val_name))

        # Make the n into integer
        grid["n"] = int(grid["n"])
        if grid["min"] >= grid["max"]:
            raise ValueError("The min value should be smaller than the max value.")

    @property
    def k1(self):
        return self._k1

    @k1.setter
    def k1(self, value):
        if value is not None:

            # Check that only valid keys are used in the setups
            for key in value.keys():
                valid_keys = set(("geometry", "grid", "weighting"))
                if key not in valid_keys:
                    raise ValueError(
                        "The given setup contains the following invalid key: {}".format(
                            key
                        )
                    )

            # Check the geometry function
            geom_func = value["geometry"].get("function")
            if geom_func is not None:
                valid_geom_func = set(("atomic_number",))
                if geom_func not in valid_geom_func:
                    raise ValueError(
                        "Unknown geometry function specified for k=1. Please use one of"
                        " the following: {}".format(valid_geom_func)
                    )

            # Check the weighting function
            weighting = value.get("weighting")
            if weighting is not None:
                valid_weight_func = set(("unity",))
                weight_func = weighting.get("function")
                if weight_func not in valid_weight_func:
                    raise ValueError(
                        "Unknown weighting function specified for k=1. Please use one of"
                        " the following: {}".format(valid_weight_func)
                    )

            # Check grid
            self.check_grid(value["grid"])
        self._k1 = value

    @property
    def k2(self):
        return self._k2

    @k2.setter
    def k2(self, value):
        if value is not None:

            # Check that only valid keys are used in the setups
            for key in value.keys():
                valid_keys = set(("geometry", "grid", "weighting"))
                if key not in valid_keys:
                    raise ValueError(
                        "The given setup contains the following invalid key: {}".format(
                            key
                        )
                    )

            # Check the geometry function
            geom_func = value["geometry"].get("function")
            if geom_func is not None:
                valid_geom_func = set(("distance", "inverse_distance"))
                if geom_func not in valid_geom_func:
                    raise ValueError(
                        "Unknown geometry function specified for k=2. Please use one of"
                        " the following: {}".format(valid_geom_func)
                    )

            # Check the weighting function
            weighting = value.get("weighting")
            if weighting is not None:
                valid_weight_func = set(("unity", "exponential", "exp"))
                weight_func = weighting.get("function")
                if weight_func not in valid_weight_func:
                    raise ValueError(
                        "Unknown weighting function specified for k=2. Please use one of"
                        " the following: {}".format(valid_weight_func)
                    )
                else:
                    if weight_func == "exponential" or weight_func == "exp":
                        needed = ("cutoff", "scale")
                        for pname in needed:
                            param = weighting.get(pname)
                            if param is None:
                                raise ValueError(
                                    "Missing value for '{}' in the k=2 weighting.".format(
                                        key
                                    )
                                )

            # Check grid
            self.check_grid(value["grid"])
        self._k2 = value

    @property
    def k3(self):
        return self._k3

    @k3.setter
    def k3(self, value):
        if value is not None:

            # Check that only valid keys are used in the setups
            for key in value.keys():
                valid_keys = set(("geometry", "grid", "weighting"))
                if key not in valid_keys:
                    raise ValueError(
                        "The given setup contains the following invalid key: {}".format(
                            key
                        )
                    )

            # Check the geometry function
            geom_func = value["geometry"].get("function")
            if geom_func is not None:
                valid_geom_func = set(("angle", "cosine"))
                if geom_func not in valid_geom_func:
                    raise ValueError(
                        "Unknown geometry function specified for k=2. Please use one of"
                        " the following: {}".format(valid_geom_func)
                    )

            # Check the weighting function
            weighting = value.get("weighting")
            if weighting is not None:
                valid_weight_func = set(("unity", "exponential", "exp"))
                weight_func = weighting.get("function")
                if weight_func not in valid_weight_func:
                    raise ValueError(
                        "Unknown weighting function specified for k=2. Please use one of"
                        " the following: {}".format(valid_weight_func)
                    )
                else:
                    if weight_func == "exponential" or weight_func == "exp":
                        needed = ("cutoff", "scale")
                        for pname in needed:
                            param = weighting.get(pname)
                            if param is None:
                                raise ValueError(
                                    "Missing value for '{}' in the k=3 weighting.".format(
                                        key
                                    )
                                )

            # Check grid
            self.check_grid(value["grid"])
        self._k3 = value

    @property
    def species(self):
        return self._species

    @species.setter
    def species(self, value):
        """Used to check the validity of given atomic numbers and to initialize
        the C-memory layout for them.

        Args:
            value(iterable): Chemical species either as a list of atomic
                numbers or list of chemical symbols.
        """
        # The species are stored as atomic numbers for internal use.
        self._set_species(value)

        # Setup mappings between atom indices and types together with some
        # statistics
        self.atomic_number_to_index = {}
        self.index_to_atomic_number = {}
        for i_atom, atomic_number in enumerate(self._atomic_numbers):
            self.atomic_number_to_index[atomic_number] = i_atom
            self.index_to_atomic_number[i_atom] = atomic_number
        self.n_elements = len(self._atomic_numbers)
        self.max_atomic_number = max(self._atomic_numbers)
        self.min_atomic_number = min(self._atomic_numbers)

    @property
    def normalization(self):
        return self._normalization

    @normalization.setter
    def normalization(self, value):
        """Checks that the given normalization is valid.

        Args:
            value(str): The normalization method to use.
        """
        norm_options = set(("l2_each", "none", "n_atoms"))
        if value not in norm_options:
            raise ValueError(
                "Unknown normalization option given. Please use one of the "
                "following: {}.".format(", ".join([str(x) for x in norm_options]))
            )
        self._normalization = value

[docs]    def get_k1_axis(self):
        """Used to get the discretized axis for geometry function of the k=1
        term.

        Returns:
            np.ndarray: The discretized axis for the k=1 term.
        """
        start = self.k1["grid"]["min"]
        stop = self.k1["grid"]["max"]
        n = self.k1["grid"]["n"]

        return np.linspace(start, stop, n)

[docs]    def get_k2_axis(self):
        """Used to get the discretized axis for geometry function of the k=2
        term.

        Returns:
            np.ndarray: The discretized axis for the k=2 term.
        """
        start = self.k2["grid"]["min"]
        stop = self.k2["grid"]["max"]
        n = self.k2["grid"]["n"]

        return np.linspace(start, stop, n)

[docs]    def get_k3_axis(self):
        """Used to get the discretized axis for geometry function of the k=3
        term.

        Returns:
            np.ndarray: The discretized axis for the k=3 term.
        """
        start = self.k3["grid"]["min"]
        stop = self.k3["grid"]["max"]
        n = self.k3["grid"]["n"]

        return np.linspace(start, stop, n)

[docs]    def create(self, system, n_jobs=1, only_physical_cores=False, verbose=False):
        """Return MBTR output for the given systems.

        Args:
            system (:class:`ase.Atoms` or list of :class:`ase.Atoms`): One or many atomic structures.
            n_jobs (int): Number of parallel jobs to instantiate. Parallellizes
                the calculation across samples. Defaults to serial calculation
                with n_jobs=1. If a negative number is given, the used cpus
                will be calculated with, n_cpus + n_jobs, where n_cpus is the
                amount of CPUs as reported by the OS. With only_physical_cores
                you can control which types of CPUs are counted in n_cpus.
            only_physical_cores (bool): If a negative n_jobs is given,
                determines which types of CPUs are used in calculating the
                number of jobs. If set to False (default), also virtual CPUs
                are counted.  If set to True, only physical CPUs are counted.
            verbose(bool): Controls whether to print the progress of each job
                into to the console.

        Returns:
            np.ndarray | sparse.COO | list: MBTR for the
            given systems. The return type depends on the 'sparse' and
            'flatten'-attributes. For flattened output a single numpy array or
            sparse.COO matrix is returned. If the output is not flattened,
            dictionaries containing the MBTR tensors for each k-term are
            returned.
        """
        # If single system given, skip the parallelization
        if isinstance(system, (Atoms, System)):
            return self.create_single(system)
        else:
            self._check_system_list(system)

        # Combine input arguments
        inp = [(i_sys,) for i_sys in system]

        # Determine if the outputs have a fixed size
        if self.flatten:
            static_size = [self.get_number_of_features()]
        else:
            static_size = None

        # Create in parallel
        output = self.create_parallel(
            inp,
            self.create_single,
            n_jobs,
            static_size,
            only_physical_cores,
            verbose=verbose,
        )

        return output

[docs]    def create_single(self, system):
        """Return the many-body tensor representation for the given system.

        Args:
            system (:class:`ase.Atoms` | :class:`.System`): Input system.

        Returns:
            dict | np.ndarray | sparse.COO: The return type is
            specified by the 'flatten' and 'sparse'-parameters. If the output
            is not flattened, a dictionary containing of MBTR outputs as numpy
            arrays is created. Each output is under a "kX" key. If the output
            is flattened, a single concatenated output vector is returned,
            either as a sparse or a dense vector.
        """
        # Transform the input system into the internal System-object
        system = self.get_system(system)

        # Ensuring variables are re-initialized when a new system is introduced
        self.system = system
        self._interaction_limit = len(system)

        # Check that the system does not have elements that are not in the list
        # of atomic numbers
        self.check_atomic_numbers(system.get_atomic_numbers())

        mbtr = {}
        if self.k1 is not None:
            mbtr["k1"] = self._get_k1(system)
        if self.k2 is not None:
            mbtr["k2"] = self._get_k2(system)
        if self.k3 is not None:
            mbtr["k3"] = self._get_k3(system)

        # Handle normalization
        if self.normalization == "l2_each":
            if self.flatten is True:
                for key, value in mbtr.items():
                    i_data = np.array(value.data)
                    i_norm = np.linalg.norm(i_data)
                    mbtr[key] = value / i_norm
            else:
                for key, value in mbtr.items():
                    i_data = value.ravel()
                    i_norm = np.linalg.norm(i_data)
                    mbtr[key] = value / i_norm
        elif self.normalization == "n_atoms":
            n_atoms = len(self.system)
            if self.flatten is True:
                for key, value in mbtr.items():
                    mbtr[key] = value / n_atoms
            else:
                for key, value in mbtr.items():
                    mbtr[key] = value / n_atoms

        # Flatten output if requested
        if self.flatten:
            keys = sorted(mbtr.keys())
            if len(keys) > 1:
                mbtr = sparse.concatenate([mbtr[key] for key in keys], axis=0)
            else:
                mbtr = mbtr[keys[0]]

            # Make into a dense array if requested
            if not self.sparse:
                mbtr = mbtr.todense()

        return mbtr

[docs]    def get_number_of_features(self):
        """Used to inquire the final number of features that this descriptor
        will have.

        Returns:
            int: Number of features for this descriptor.
        """
        n_features = 0
        n_elem = self.n_elements

        if self.k1 is not None:
            n_k1_grid = self.k1["grid"]["n"]
            n_k1 = n_elem * n_k1_grid
            n_features += n_k1
        if self.k2 is not None:
            n_k2_grid = self.k2["grid"]["n"]
            n_k2 = (n_elem * (n_elem + 1) / 2) * n_k2_grid
            n_features += n_k2
        if self.k3 is not None:
            n_k3_grid = self.k3["grid"]["n"]
            n_k3 = (n_elem * n_elem * (n_elem + 1) / 2) * n_k3_grid
            n_features += n_k3

        return int(n_features)

[docs]    def get_location(self, species):
        """Can be used to query the location of a species combination in the
        the flattened output.

        Args:
            species(tuple): A tuple containing a species combination as
            chemical symbols or atomic numbers. The tuple can be for example
            ("H"), ("H", "O") or ("H", "O", "H").

        Returns:
            slice: slice containing the location of the specified species
            combination. The location is given as a python slice-object, that
            can be directly used to target ranges in the output.

        Raises:
            ValueError: If the requested species combination is not in the
                output or if invalid species defined.
        """
        # Check that the corresponding part is calculated
        k = len(species)
        term = getattr(self, "k{}".format(k))
        if term is None:
            raise ValueError(
                "Cannot retrieve the location for {}, as the term k{} has not "
                "been specied.".format(species, k)
            )

        # Change chemical elements into atomic numbers
        numbers = []
        for specie in species:
            if isinstance(specie, str):
                try:
                    specie = ase.data.atomic_numbers[specie]
                except KeyError:
                    raise ValueError("Invalid chemical species: {}".format(specie))
            numbers.append(specie)

        # Change into internal indexing
        numbers = [self.atomic_number_to_index[x] for x in numbers]
        n_elem = self.n_elements

        # k=1
        if len(numbers) == 1:
            n1 = self.k1["grid"]["n"]
            i = numbers[0]
            m = i
            start = int(m * n1)
            end = int((m + 1) * n1)

        # k=2
        if len(numbers) == 2:
            if numbers[0] > numbers[1]:
                numbers = list(reversed(numbers))

            n2 = self.k2["grid"]["n"]
            i = numbers[0]
            j = numbers[1]

            # This is the index of the spectrum. It is given by enumerating the
            # elements of an upper triangular matrix from left to right and top
            # to bottom.
            m = j + i * n_elem - i * (i + 1) / 2

            offset = 0
            if self.k1 is not None:
                n1 = self.k1["grid"]["n"]
                offset += n_elem * n1
            start = int(offset + m * n2)
            end = int(offset + (m + 1) * n2)

        # k=3
        if len(numbers) == 3:
            if numbers[0] > numbers[2]:
                numbers = list(reversed(numbers))

            n3 = self.k3["grid"]["n"]
            i = numbers[0]
            j = numbers[1]
            k = numbers[2]

            # This is the index of the spectrum. It is given by enumerating the
            # elements of a three-dimensional array where for valid elements
            # k>=i. The enumeration begins from [0, 0, 0], and ends at [n_elem,
            # n_elem, n_elem], looping the elements in the order k, i, j.
            m = j * n_elem * (n_elem + 1) / 2 + k + i * n_elem - i * (i + 1) / 2

            offset = 0
            if self.k1 is not None:
                n1 = self.k1["grid"]["n"]
                offset += n_elem * n1
            if self.k2 is not None:
                n2 = self.k2["grid"]["n"]
                offset += (n_elem * (n_elem + 1) / 2) * n2
            start = int(offset + m * n3)
            end = int(offset + (m + 1) * n3)

        return slice(start, end)

    def _make_new_k1map(self, kx_map):
        kx_map = dict(kx_map)
        new_kx_map = {}

        for key, value in kx_map.items():
            new_key = tuple([int(key)])
            new_kx_map[new_key] = np.array(value, dtype=np.float32)

        return new_kx_map

    def _make_new_kmap(self, kx_map):
        kx_map = dict(kx_map)
        new_kx_map = {}

        for key, value in kx_map.items():
            new_key = tuple(int(x) for x in key.split(","))
            new_kx_map[new_key] = np.array(value, dtype=np.float32)

        return new_kx_map

    def _get_k1(self, system):
        """Calculates the second order terms where the scalar mapping is the
        inverse distance between atoms.

        Returns:
            1D ndarray: flattened K2 values.
        """
        grid = self.k1["grid"]
        start = grid["min"]
        stop = grid["max"]
        n = grid["n"]
        sigma = grid["sigma"]

        # Determine the geometry function
        geom_func_name = self.k1["geometry"]["function"]

        cmbtr = MBTRWrapper(
            self.atomic_number_to_index,
            self._interaction_limit,
            np.zeros((len(system), 3), dtype=int),
        )

        k1_map = cmbtr.get_k1(
            system.get_atomic_numbers(),
            geom_func_name.encode(),
            b"unity",
            {},
            start,
            stop,
            sigma,
            n,
        )

        k1_map = self._make_new_k1map(k1_map)

        # Depending on flattening, use either a sparse matrix or a dense one.
        n_elem = self.n_elements
        if self.flatten:
            k1 = sparse.DOK((n_elem * n), dtype=np.float32)
        else:
            k1 = np.zeros((n_elem, n), dtype=np.float32)

        for key, gaussian_sum in k1_map.items():
            i = key[0]

            # Denormalize if requested
            if not self.normalize_gaussians:
                max_val = 1 / (sigma * math.sqrt(2 * math.pi))
                gaussian_sum /= max_val

            if self.flatten:
                start = i * n
                end = (i + 1) * n
                k1[start:end] = gaussian_sum
            else:
                k1[i, :] = gaussian_sum
        if self.flatten:
            k1 = k1.to_coo()

        return k1

    def _get_k2(self, system):
        """Calculates the second order terms where the scalar mapping is the
        inverse distance between atoms.

        Returns:
            1D ndarray: flattened K2 values.
        """
        grid = self.k2["grid"]
        start = grid["min"]
        stop = grid["max"]
        n = grid["n"]
        sigma = grid["sigma"]
        # Determine the weighting function and possible radial cutoff
        radial_cutoff = None
        weighting = self.k2.get("weighting")
        parameters = {}
        if weighting is not None:
            weighting_function = weighting["function"]
            if weighting_function == "exponential" or weighting_function == "exp":
                scale = weighting["scale"]
                cutoff = weighting["cutoff"]
                if scale != 0:
                    radial_cutoff = -math.log(cutoff) / scale
                parameters = {b"scale": scale, b"cutoff": cutoff}
        else:
            weighting_function = "unity"

        # Determine the geometry function
        geom_func_name = self.k2["geometry"]["function"]

        # If needed, create the extended system
        if self.periodic:
            centers = system.get_positions()
            ext_system, cell_indices = dscribe.utils.geometry.get_extended_system(
                system, radial_cutoff, centers, return_cell_indices=True
            )
            ext_system = System.from_atoms(ext_system)
        else:
            ext_system = system
            cell_indices = np.zeros((len(system), 3), dtype=int)

        cmbtr = MBTRWrapper(
            self.atomic_number_to_index, self._interaction_limit, cell_indices
        )

        # If radial cutoff is finite, use it to calculate the sparse
        # distance matrix to reduce computational complexity from O(n^2) to
        # O(n log(n))
        n_atoms = len(ext_system)
        if radial_cutoff is not None:
            dmat = ext_system.get_distance_matrix_within_radius(radial_cutoff)
            adj_list = dscribe.utils.geometry.get_adjacency_list(dmat)
            dmat_dense = np.full(
                (n_atoms, n_atoms), sys.float_info.max
            )  # The non-neighbor values are treated as "infinitely far".
            dmat_dense[dmat.row, dmat.col] = dmat.data
        # If no weighting is used, the full distance matrix is calculated
        else:
            dmat_dense = ext_system.get_distance_matrix()
            adj_list = np.tile(np.arange(n_atoms), (n_atoms, 1))

        k2_map = cmbtr.get_k2(
            ext_system.get_atomic_numbers(),
            dmat_dense,
            adj_list,
            geom_func_name.encode(),
            weighting_function.encode(),
            parameters,
            start,
            stop,
            sigma,
            n,
        )

        k2_map = self._make_new_kmap(k2_map)

        # Depending of flattening, use either a sparse matrix or a dense one.
        n_elem = self.n_elements
        if self.flatten:
            k2 = sparse.DOK((int(n_elem * (n_elem + 1) / 2 * n)), dtype=np.float32)
        else:
            k2 = np.zeros((self.n_elements, self.n_elements, n), dtype=np.float32)

        for key, gaussian_sum in k2_map.items():
            i = key[0]
            j = key[1]

            # This is the index of the spectrum. It is given by enumerating the
            # elements of an upper triangular matrix from left to right and top
            # to bottom.
            m = int(j + i * n_elem - i * (i + 1) / 2)

            # Denormalize if requested
            if not self.normalize_gaussians:
                max_val = 1 / (sigma * math.sqrt(2 * math.pi))
                gaussian_sum /= max_val

            if self.flatten:
                start = m * n
                end = (m + 1) * n
                k2[start:end] = gaussian_sum
            else:
                k2[i, j, :] = gaussian_sum
        if self.flatten:
            k2 = k2.to_coo()

        return k2

    def _get_k3(self, system):
        """Calculates the third order terms.

        Returns:
            1D ndarray: flattened K3 values.
        """
        grid = self.k3["grid"]
        start = grid["min"]
        stop = grid["max"]
        n = grid["n"]
        sigma = grid["sigma"]

        # Determine the weighting function and possible radial cutoff
        radial_cutoff = None
        weighting = self.k3.get("weighting")
        parameters = {}
        if weighting is not None:
            weighting_function = weighting["function"]
            if weighting_function == "exponential" or weighting_function == "exp":
                scale = weighting["scale"]
                cutoff = weighting["cutoff"]
                if scale != 0:
                    radial_cutoff = -0.5 * math.log(cutoff) / scale
                parameters = {b"scale": scale, b"cutoff": cutoff}
        else:
            weighting_function = "unity"

        # Determine the geometry function
        geom_func_name = self.k3["geometry"]["function"]

        # If needed, create the extended system
        if self.periodic:
            centers = system.get_positions()
            ext_system, cell_indices = dscribe.utils.geometry.get_extended_system(
                system, radial_cutoff, centers, return_cell_indices=True
            )
            ext_system = System.from_atoms(ext_system)
        else:
            ext_system = system
            cell_indices = np.zeros((len(system), 3), dtype=int)

        cmbtr = MBTRWrapper(
            self.atomic_number_to_index, self._interaction_limit, cell_indices
        )

        # If radial cutoff is finite, use it to calculate the sparse
        # distance matrix to reduce computational complexity from O(n^2) to
        # O(n log(n))
        n_atoms = len(ext_system)
        if radial_cutoff is not None:
            dmat = ext_system.get_distance_matrix_within_radius(radial_cutoff)
            adj_list = dscribe.utils.geometry.get_adjacency_list(dmat)
            dmat_dense = np.full(
                (n_atoms, n_atoms), sys.float_info.max
            )  # The non-neighbor values are treated as "infinitely far".
            dmat_dense[dmat.col, dmat.row] = dmat.data
        # If no weighting is used, the full distance matrix is calculated
        else:
            dmat_dense = ext_system.get_distance_matrix()
            adj_list = np.tile(np.arange(n_atoms), (n_atoms, 1))

        k3_map = cmbtr.get_k3(
            ext_system.get_atomic_numbers(),
            dmat_dense,
            adj_list,
            geom_func_name.encode(),
            weighting_function.encode(),
            parameters,
            start,
            stop,
            sigma,
            n,
        )

        k3_map = self._make_new_kmap(k3_map)
        # Depending of flattening, use either a sparse matrix or a dense one.
        n_elem = self.n_elements
        if self.flatten:
            k3 = sparse.DOK(
                (int(n_elem * n_elem * (n_elem + 1) / 2 * n)), dtype=np.float32
            )
        else:
            k3 = np.zeros((n_elem, n_elem, n_elem, n), dtype=np.float32)

        for key, gaussian_sum in k3_map.items():
            i = key[0]
            j = key[1]
            k = key[2]

            # This is the index of the spectrum. It is given by enumerating the
            # elements of a three-dimensional array where for valid elements
            # k>=i. The enumeration begins from [0, 0, 0], and ends at [n_elem,
            # n_elem, n_elem], looping the elements in the order j, i, k.
            m = int(j * n_elem * (n_elem + 1) / 2 + k + i * n_elem - i * (i + 1) / 2)

            # Denormalize if requested
            if not self.normalize_gaussians:
                max_val = 1 / (sigma * math.sqrt(2 * math.pi))
                gaussian_sum /= max_val

            if self.flatten:
                start = m * n
                end = (m + 1) * n
                k3[start:end] = gaussian_sum
            else:
                k3[i, j, k, :] = gaussian_sum
        if self.flatten:
            k3 = k3.to_coo()

        return k3