Source code for lir.bounding

import logging
from abc import ABC, abstractmethod
from typing import Any, Self

import numpy as np

from lir import Transformer
from lir.data.models import FeatureData, InstanceData, LLRData
from lir.util import check_type


LOG = logging.getLogger(__name__)


[docs] class LLRBounder(Transformer, ABC): """ Base class for LLR bounders. A bounder updates any LLRs that are out of bounds. Any LLR values within bounds remain unchanged. LLR values that are out-of-bounds are updated to the nearest bound. Parameters ---------- lower_llr_bound : float | None The lower bound for the LLRs. If `None`, no lower bound is applied. upper_llr_bound : float | None The upper bound for the LLRs. If `None`, no upper bound is applied. """ def __init__( self, lower_llr_bound: float | None = None, upper_llr_bound: float | None = None, ): self.lower_llr_bound = lower_llr_bound self.upper_llr_bound = upper_llr_bound
[docs] @abstractmethod def calculate_bounds(self, llrdata: LLRData) -> tuple[float | None, float | None]: """ Calculate and returns appropriate bounds for a set of LLRs and their labels. Parameters ---------- llrdata : LLRData The LLR data for which to calculate the bounds. This includes the LLRs, their labels, and any other relevant information. """ raise NotImplementedError
@staticmethod def _validate(instances: InstanceData) -> LLRData: instances = check_type(FeatureData, instances) if not isinstance(instances, LLRData): LOG.info(f'casting `{type(instances)}` to `LLRData`') instances = instances.replace_as(LLRData) return instances
[docs] def fit(self, instances: InstanceData) -> Self: """ Configure this bounder by calculating bounds. assuming that y=1 corresponds to Hp, y=0 to Hd Parameters ---------- instances : InstanceData The data to fit the bounder on. This should include the LLRs and their corresponding labels. Returns ------- Self The fitted bounder instance. """ instances = self._validate(instances) if instances.labels is None: raise ValueError(f'{type(self)}.fit() requires labeled data') # calculate the bounds self.lower_llr_bound, self.upper_llr_bound = self.calculate_bounds(instances) # check the sanity of the bounds if ( self.lower_llr_bound is not None and self.upper_llr_bound is not None and self.lower_llr_bound > self.upper_llr_bound ): raise ValueError( 'the lower bound must be lower than the upper bound; ' f'lower_llr_bound={self.lower_llr_bound}; upper_llr_bound={self.upper_llr_bound}' ) return self
[docs] def apply(self, instances: InstanceData) -> LLRData: """ Recalculate the LLR data using the first step calibrator and applying the bounds. Parameters ---------- instances : InstanceData The data to apply the bounder to. This should include the LLRs and their corresponding labels. Returns ------- LLRData The LLR data with the LLRs bounded according to the calculated bounds. """ instances = self._validate(instances) llrs = instances.features # Clip the LLRs to the bounds, where np.clip handles the None values correctly. llrs = np.clip(llrs, self.lower_llr_bound, self.upper_llr_bound) return instances.replace( features=llrs, llr_upper_bound=self.upper_llr_bound, llr_lower_bound=self.lower_llr_bound )
[docs] class StaticBounder(LLRBounder): """ Bound LLRs to constant values. This bounder takes arguments for a lower and upper bound, which may take `None` in which case no bounds are applied. Parameters ---------- lower_llr_bound : float | None The lower bound for the LLRs. If `None`, no lower bound is applied. upper_llr_bound : float | None The upper bound for the LLRs. If `None`, no upper bound is applied. """ def __init__(self, lower_llr_bound: float | None, upper_llr_bound: float | None): super().__init__(lower_llr_bound, upper_llr_bound)
[docs] def calculate_bounds(self, llrdata: LLRData) -> tuple[float | None, float | None]: """ Calculate and return the lower and upper LLR bounds. Parameters ---------- llrdata : LLRData Not used, but included for compatibility with the base class. Returns ------- tuple[float | None, float | None] The lower and upper LLR bounds, as specified in the constructor. """ return self.lower_llr_bound, self.upper_llr_bound
[docs] class NSourceBounder(LLRBounder): """ Bound LLRs based on the number of sources. This bounder sets the lower LLR bound to -log(N) and the upper bound to log(N), where N is the number of sources. In non-log space, this corresponds to bounding likelihood ratios to [1/N, N]. This is a logical consequence of having N sources: no source can provide more than N support for one hypothesis over the other. """
[docs] def calculate_bounds(self, llrdata: LLRData) -> tuple[float | None, float | None]: """ Calculate and return the lower and upper LLR bounds. Parameters ---------- llrdata : LLRData The LLR data for which to calculate the bounds. This should include the source IDs. Returns ------- tuple[float | None, float | None] The lower and upper LLR bounds, calculated based on the number of sources. """ if llrdata.source_ids is None: raise ValueError(f'{type(self)} requires source IDs to calculate bounds') n_sources = np.unique(llrdata.source_ids, sorted=False) log_n_sopurces = np.log10(len(n_sources)) LOG.debug(f'NSourceBounder: number of sources: N={len(n_sources)}') LOG.debug(f'NSourceBounder: calculated bounds: -log(N)={-log_n_sopurces}, log(N)={log_n_sopurces}') return -log_n_sopurces, log_n_sopurces
__all__: list[Any] = [ LLRBounder, StaticBounder, NSourceBounder, ]