Source code for hstrat.frozen_instrumentation._HereditaryStratigraphicAssemblageSpecimen

import functools
import typing

import numpy as np
import pandera as pa

from .._auxiliary_lib import (
    CopyableSeriesItemsIter,
    get_nullable_mask,
    get_nullable_vals,
)

_nullable_unsigned_integer_series_t = typing.Union[
    pa.typing.Series[pa.typing.UINT8()],
    pa.typing.Series[pa.typing.UINT16()],
    pa.typing.Series[pa.typing.UINT32()],
    pa.typing.Series[pa.typing.UINT64()],
]


class HereditaryStratigraphicAssemblageSpecimen:
    """Postprocessing representation of the differentia retained by an extant
    HereditaryStratigraphicColumn, indexed by deposition rank.

    Differentia are stored using a nullable integer representation, which allows
    for inclusion of entries for all ranks retained by any specimen within the
    assemblage, even if that particualr rank is not retained by this specimen.
    This allows for more efficient comparisons between specimens, due to direct
    alignment.

    See Also
    --------
    HereditaryStratigraphicSpecimen
        Specimen representation that can contain only ranks retained by that
        specimen.
    HereditaryStratigraphicAssemblage
        Gathers a collection of `HereditaryStratigraphicSpecimen`s and
        facilitates creation of corresponding aligned `HereditaryStratigraphicAssemblageSpecimen`s.
    """

    __slots__ = ("_data", "_stratum_differentia_bit_width")

    _data: _nullable_unsigned_integer_series_t
    _stratum_differentia_bit_width: int

[docs] def __init__( self: "HereditaryStratigraphicAssemblageSpecimen", stratum_differentia_series: _nullable_unsigned_integer_series_t, stratum_differentia_bit_width: int, ) -> None: """Initialize a HereditaryStratigraphicAssemblageSpecimen object with a (potentially sparse) sequence of rank-indexed differentia and a differentia bit width.""" self._data = stratum_differentia_series self._data.index.astype(np.int64, copy=False) self._stratum_differentia_bit_width = stratum_differentia_bit_width
[docs] def GetStratumDifferentiaBitWidth( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> int: """How many bits wide are the differentia of strata?""" return self._stratum_differentia_bit_width
[docs] @functools.lru_cache(maxsize=None) def GetNumStrataDeposited( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> int: """How many strata have been deposited on the column? Note that a first stratum is deposited on the column during initialization. """ return self._data.last_valid_index() + 1
[docs] @functools.lru_cache(maxsize=None) def GetNumStrataRetained( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> int: """How many strata are currently stored within the column? May be fewer than the number of strata deposited if strata have been discarded as part of the configured stratum retention policy. """ return len(self._data) - self.GetStratumMask().sum()
[docs] def GetNumDiscardedStrata( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> int: """How many deposited strata have been discarded? Determined by number of generations elapsed and the configured column retention policy. """ return self.GetNumStrataDeposited() - self.GetNumStrataRetained()
[docs] def HasDiscardedStrata( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> bool: """Have any deposited strata been discarded?""" return self.GetNumDiscardedStrata() > 0
[docs] def GetData( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> _nullable_unsigned_integer_series_t: """Get the underlying Pandas Series containing differentia values indexed by rank. Notes ----- This function directly returns the specimen's underlying Series data, so mutation of the returned object will alter or invalidate this specimen. """ return self._data
[docs] def GetStratumMask( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> np.ndarray: """Get a boolean mask indicating which entries in the stored Pandas NullableInteger Series are null. I.e., which ranks does this specimen not retain differentia at? Returns ------- mask : np.ndarray A 1-dimensional boolean NumPy ndarray. True values indicate nullness. Notes ----- This function returns the underlying boolean mask used by the stored Pandas Series object to represent null values. This mask is a direct view into the Series data, so no copy is made. Changes to the mask will propagate to the store Series object, and vice versa. """ try: return get_nullable_mask(self._data) except AttributeError: # object type (not nullable int) array return self._data.isna()
[docs] def GetDifferentiaVals( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> np.ndarray: """Get the integer underlying values in the stored Pandas NullableInteger Series. Returns ------- differentia : np.ndarray A 1-dimensional NumPy integer ndarray containing differentia values, including garbage values where the underlying Series is null. Notes ----- This function returns a direct view into the Series data, so no copy is made. Changes to the returned array will propagate to the Series object's underlying values, and vice versa. """ try: return get_nullable_vals(self._data) except AttributeError: # object type (not nullable int) array return self._data.array
[docs] def GetRankIndex( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> np.ndarray: """Get the integer index in the stored Pandas Series, representing the ranks of stratum entries. Returns ------- ranks : np.ndarray A numpy array containing ranks of differentia entries, including null entries for differentia that are not retained. """ return self._data.index.array.to_numpy(copy=False, dtype=np.int64)
[docs] def GetRankAtColumnIndex( self: "HereditaryStratigraphicAssemblageSpecimen", index: int, ) -> int: """Map array position to generation of deposition. What is the deposition rank of the stratum positioned at index i among retained strata? Index order is from most ancient (index 0) to most recent. """ return self.GetData().dropna().index[index]
[docs] def IterRetainedRanks( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> typing.Iterator[int]: """Iterate over deposition ranks of strata retained in the specimen.""" yield from self.GetData().dropna().index
[docs] def IterRetainedDifferentia( self: "HereditaryStratigraphicAssemblageSpecimen", ) -> typing.Iterator[int]: """Iterate over differentia of strata retained in the specimen. Differentia yielded from most ancient to most recent. """ yield from self.GetData().dropna()
[docs] def IterRankDifferentiaZip( self: "HereditaryStratigraphicAssemblageSpecimen", copyable: bool = False, ) -> typing.Iterator[typing.Tuple[int, int]]: """Iterate over ranks of retained strata and their differentia. If `copyable`, return an iterator that can be copied to produce a new fully-independent iterator at the same position. Equivalent to `zip(specimen.IterRetainedRanks(), specimen.IterRetainedDifferentia())`, but may be more efficient. """ if copyable: return CopyableSeriesItemsIter(self._data.dropna()) else: return self._data.dropna().items()