Source code for lir.transform.distance

import numpy as np

from lir import Transformer
from lir.data.models import FeatureData, InstanceData, PairedFeatureData
from lir.util import check_type



[docs]
class ElementWiseDifference(Transformer):
    """
    Calculate the element-wise absolute difference between pairs.

    Takes an array of sample pairs and returns the element-wise absolute difference.

    Expects:
    - a PairedFeatureData object with n_trace_instances=1 and n_ref_instances=1;

    Returns
    -------
    - a copy of the FeatureData object with features of shape (n, f)
    """


[docs]
    def apply(self, instances: InstanceData) -> FeatureData:
        """
        Calculate the absolute difference between all elements in the instance data (pairs).

        Parameters
        ----------
        instances : InstanceData
            Input instances to be processed by this method.

        Returns
        -------
        FeatureData
            FeatureData object parsed from the source.
        """
        instances = check_type(PairedFeatureData, instances)
        if instances.n_ref_instances != 1 or instances.n_trace_instances != 1:
            raise ValueError(
                f'{self.__class__.__name__} must have exactly one reference instance and one trace instance;'
                f' found: n_ref_instances={instances.n_ref_instances}, n_trace_instances={instances.n_trace_instances}'
            )

        return instances.replace_as(FeatureData, features=np.abs(instances.features[:, 0] - instances.features[:, 1]))





[docs]
class ManhattanDistance(Transformer):
    """
    Calculate the Manhattan distance between pairs.

    Takes a PairedFeatureData object or a FeatureData object and returns the manhattan distance.

    If the input is a PairedFeatureData object, the distance is computed as the manhattan distance, i.e. the sum of the
    element-wise difference between both sides of the pairs, for all features.

    If the input is a FeatureData object, it is assumed that it contains the element-wise differences, and the sum over
    these differences is calculated.
    """


[docs]
    def apply(self, instances: InstanceData) -> FeatureData:
        """
        Calculate the Manhattan distance between all elements in the instance data (pairs).

        Parameters
        ----------
        instances : InstanceData
            Input instances to be processed by this method.

        Returns
        -------
        FeatureData
            FeatureData object parsed from the source.
        """
        instances = check_type(FeatureData, instances)

        # if the data are paired instances, calculate the element wise difference first
        if isinstance(instances, PairedFeatureData):
            instances = ElementWiseDifference().apply(instances)

        # the feature axes are all axes except the first
        feature_axes = tuple(range(1, len(instances.features.shape)))

        # manhattan distance is the sum over all feature axes
        return instances.replace(features=np.sum(instances.features, axis=feature_axes))





[docs]
class EuclideanDistance(Transformer):
    """
    Calculate the Euclidean distance between pairs.

    Takes a PairedFeatureData object or a FeatureData object and returns the euclidean distance.

    If the input is a PairedFeatureData object, the distance is computed as the euclidean distance, i.e. the square
    root of the sum of the squared element-wise difference between both sides of the pairs, for all features.

    If the input is a FeatureData object, it is assumed that it contains the element-wise differences, and the square
    root of the sum over these differences is calculated.

    In yaml configurations, it can be used by specifying `euclidean_distance`, e.g.:
    `scoring: euclidean_distance`
    """


[docs]
    def apply(self, instances: InstanceData) -> FeatureData:
        """
        Calculate the Euclidean distance between all elements in the instance data (pairs).

        Parameters
        ----------
        instances : InstanceData
            Input instances to be processed by this method.

        Returns
        -------
        FeatureData
            FeatureData object parsed from the source.
        """
        instances = check_type(FeatureData, instances)

        # if the data are paired instances, calculate the element wise difference first
        if isinstance(instances, PairedFeatureData):
            instances = ElementWiseDifference().apply(instances)

        # the feature axes are all axes except the first
        feature_axes = tuple(range(1, len(instances.features.shape)))

        # euclidean distance is the square root of the sum over all feature axes
        return instances.replace(features=np.sqrt(np.sum(instances.features**2, axis=feature_axes)))