Source code for hansken_extraction_plugin.api.extraction_trace

"""
This module contains the different Trace apis.

Note that there are a couple of different traces:
  * The ExtractionTrace and MetaExtractionTrace, which are offered to the process function.
  * ExtractionTraceBuilder, which is a trace that can be built; it does not exist in hansken yet, but it is added after
    building.
  * SearchTrace, which represents an immutable trace which is returned after searching for traces.
"""
from abc import ABC, abstractmethod
from io import BufferedReader, BufferedWriter, TextIOBase
from typing import Any, Literal, Mapping, Optional, Union

from hansken_extraction_plugin.api.tracelet import Tracelet
from hansken_extraction_plugin.api.transformation import Transformation


[docs] class ExtractionTraceBuilder(ABC): """ ExtractionTrace that can be build. Represents child traces. """
[docs] @abstractmethod def update(self, key_or_updates: Optional[Union[Mapping, str]] = None, value: Optional[Any] = None, data: Optional[Mapping[str, bytes]] = None) -> 'ExtractionTraceBuilder': """ Update or add metadata properties for this `.ExtractionTraceBuilder`. Can be used to update the name of the Trace represented by this builder, if not already set. :param key_or_updates: either a `str` (the metadata property to be updated) or a mapping supplying both keys and values to be updated :param value: the value to update metadata property *key* to (used only when *key_or_updates* is a `str`, an exception will be thrown if *key_or_updates* is a mapping) :param data: a `dict` mapping data type / stream name to bytes to be added to the trace :return: this `.ExtractionTraceBuilder` """
[docs] @abstractmethod def add_tracelet(self, tracelet: Union[Tracelet, str], value: Optional[Mapping[str, Any]] = None) -> 'ExtractionTraceBuilder': """ Add a `.Tracelet` to this `.ExtractionTraceBuilder`. :param tracelet: the Tracelet or tracelet type (supplied as a `str`) to add :param value: the tracelet properties to add (only applicable when *tracelet* is a `str`) :return: this `.ExtractionTraceBuilder` """
[docs] @abstractmethod def add_transformation(self, data_type: str, transformation: Transformation) -> 'ExtractionTraceBuilder': """ Update or add transformations for this `.ExtractionTraceBuilder`. :param data_type: data type of the Transformation :param transformation: the Transformation to add :return: this `.ExtractionTraceBuilder` """
[docs] @abstractmethod def child_builder(self, name: Optional[str] = None) -> 'ExtractionTraceBuilder': """ Create a new `.TraceBuilder` to build a child trace to the trace to be represented by this builder. .. note:: Traces should be created and built in depth first order, parent before child (pre-order). :return: a `.TraceBuilder` set up to save a new trace as the child trace of this builder """
[docs] def add_data(self, stream: str, data: bytes) -> 'ExtractionTraceBuilder': """ Add data to this trace as a named stream. :param stream: name of the data stream to be added :param data: data to be attached :return: this `.ExtractionTraceBuilder` """ return self.update(data={stream: data})
[docs] @abstractmethod def open(self, data_type: Optional[str] = None, offset: int = 0, size: Optional[int] = None, mode: Literal['rb', 'wb', 'w', 'wt'] = 'rb', encoding='utf-8', buffer_size: Optional[int] = None) \ -> Union[BufferedReader, BufferedWriter, TextIOBase]: """ Open a data stream to read or write data from or to the `.ExtractionTrace`. :param data_type: the data type of the datastream, 'raw' by default :param offset: byte offset to start the stream on when reading :param size: the number of bytes to make available when reading :param mode: 'rb' for reading, 'wb' for writing :param encoding: encoding for writing text, used to convert `str` values to bytes, \ only valid for modes 'w' and 'wt' :param buffer_size: buffer size for reading (cache read back/ahead) or writing (cache for flush) data :return: a file-like object to read or write bytes from the named stream """
[docs] @abstractmethod def build(self) -> str: """ Save the trace being built by this builder to remote. .. note:: Building more than once will result in an error being raised. :return: the new trace' id """
[docs] class Trace(ABC): """All trace classes should be able to return values."""
[docs] @abstractmethod def get(self, key: str, default: Optional[Any] = None) -> Any: """ Return metadata properties for this `.ExtractionTrace`. :param key: the metadata property to be retrieved :param default: value returned if property is not set :return: the value of the requested metadata property """
[docs] class SearchTrace(Trace): """SearchTraces represent traces returned when searching for traces."""
[docs] @abstractmethod def open(self, stream: str = 'raw', offset: int = 0, size: Optional[int] = None, buffer_size: Optional[int] = None) -> BufferedReader: """ Open a data stream of the data that is being processed. :param stream: data stream of trace to open. defaults to raw. other examples are html, text, etc. :param offset: byte offset to start the stream on :param size: the number of bytes to make available :param buffer_size: buffer size for reading data :return: a file-like object to read bytes from the named stream """
[docs] class MetaExtractionTrace(Trace): """ MetaExtractionTraces contain only metadata. This class represenst traces during the extraction of an extraction plugin without a data stream. """
[docs] @abstractmethod def update(self, key_or_updates: Optional[Union[Mapping, str]] = None, value: Optional[Any] = None, data: Optional[Mapping[str, bytes]] = None) -> None: """ Update or add metadata properties for this `.ExtractionTrace`. :param key_or_updates: either a `str` (the metadata property to be updated) or a mapping supplying both keys and values to be updated :param value: the value to update metadata property *key* to (used only when *key_or_updates* is a `str`, an exception will be thrown if *key_or_updates* is a mapping) :param data: a `dict` mapping data type / stream name to bytes to be added to the trace """
[docs] @abstractmethod def add_tracelet(self, tracelet: Union[Tracelet, str], value: Optional[Mapping[str, Any]] = None) -> None: """ Add a `.Tracelet` to this `.MetaExtractionTrace`. :param tracelet: the Tracelet or tracelet type to add :param value: the tracelet properties to add (only applicable when *tracelet* is a tracelet type) """
[docs] @abstractmethod def add_transformation(self, data_type: str, transformation: Transformation) -> None: """ Update or add transformations for this `.ExtractionTraceBuilder`. :param data_type: data type of the Transformation :param transformation: the Transformation to add """
[docs] @abstractmethod def child_builder(self, name: Optional[str] = None) -> ExtractionTraceBuilder: """ Create a `.TraceBuilder` to build a trace to be saved as a child of this `.Trace`. A new trace will only be added to the index once explicitly saved (e.g. through `.TraceBuilder.build`). .. note:: Traces should be created and built in depth first order, parent before child (pre-order). :param name: the name for the trace being built :return: a `.TraceBuilder` set up to create a child trace of this `.MetaExtractionTrace` """
[docs] class ExtractionTrace(MetaExtractionTrace): """Trace offered to be processed."""
[docs] @abstractmethod def open(self, data_type: Optional[str] = None, offset: int = 0, size: Optional[int] = None, mode: Literal['rb', 'wb', 'w', 'wt'] = 'rb', encoding='utf-8', buffer_size: Optional[int] = None) \ -> Union[BufferedReader, BufferedWriter, TextIOBase]: """ Open a data stream to read or write data from or to the `.ExtractionTrace`. :param data_type: the data type of the datastream, 'raw' by default :param offset: byte offset to start the stream on when reading :param size: the number of bytes to make available when reading :param mode: 'rb' for reading, 'wb' for writing :param encoding: encoding for writing text, used to convert `str` values to bytes, \ only valid for modes 'w' and 'wt' :param buffer_size: buffer size for reading (cache read back/ahead) or writing (cache for flush) data :return: a file-like object to read or write bytes from the named stream """