Source code for riverine.components

from __future__ import annotations

from abc import ABC, abstractmethod
from math import isnan
from typing import TYPE_CHECKING, Any, Sequence, Tuple, TypeVar, cast

import attrs
import pandas as pd

from .dictstructure import _STRUCTURE_CLASSES, _structure, _unstructure
from .locations import WellPos, _parse_wellpos_optional
from .logging import log
from .printing import TableFormat
from .units import (
    NAN_VOL,
    Q_,
    ZERO_VOL,
    DecimalQuantity,
    _parse_conc_optional,
    _parse_vol_optional,
    nM,
    ureg,
    NAN_CONC
)
import polars as pl

if TYPE_CHECKING:  # pragma: no cover
    from attrs import Attribute

    from .experiments import Experiment
    from .references import Reference


T = TypeVar("T")

__all__ = ["AbstractComponent", "Component", "Strand"]


[docs] class AbstractComponent(ABC): """Abstract class for a component in a mix. Custom components that don't inherit from a concrete class should inherit from this class and implement the methods here. """ @property @abstractmethod
[docs] def name(self) -> str: # pragma: no cover "Name of the component." ...
@property
[docs] def location(self) -> tuple[str, WellPos | None]: return ("", None)
@property
[docs] def plate(self) -> str | None: return None
@property
[docs] def is_mix(self) -> bool: return False
@property
[docs] def well(self) -> WellPos | None: return None
@property
[docs] def _well_list(self) -> list[WellPos]: if self.well is not None: return [self.well] return []
@property
[docs] def volume(self) -> DecimalQuantity: return NAN_VOL
@property @abstractmethod
[docs] def concentration(self) -> DecimalQuantity: # pragma: no cover "(Source) concentration of the component as a pint Quantity. NaN if undefined." ...
@abstractmethod
[docs] def all_components(self) -> pd.DataFrame: # pragma: no cover "A dataframe of all components." ...
@abstractmethod
[docs] def with_reference( self: T, reference: Reference, *, inplace: bool = False ) -> T: # pragma: no cover ...
@abstractmethod
[docs] def with_experiment( self, experiment: Experiment, *, inplace: bool = True ) -> AbstractComponent: # pragma: no cover ...
@classmethod @abstractmethod
[docs] def _structure( cls, d: dict[str, Any], experiment: Experiment | None = None ) -> AbstractComponent: # pragma: no cover ...
@abstractmethod
[docs] def _unstructure(self, experiment: Experiment | None = None) -> dict[str, Any]: ...
[docs] def printed_name(self, tablefmt: str | TableFormat) -> str: return self.name
[docs] def _update_volumes( self, consumed_volumes: dict[str, DecimalQuantity] | None = None, made_volumes: dict[str, DecimalQuantity] | None = None, _cache_key=None, ) -> Tuple[dict[str, DecimalQuantity], dict[str, DecimalQuantity]]: """ Given a """ if consumed_volumes is None: consumed_volumes = {} if made_volumes is None: made_volumes = {} if self.name in made_volumes: # We've already been seen. Ignore our components. return consumed_volumes, made_volumes made_volumes[self.name] = ZERO_VOL return consumed_volumes, made_volumes
def norm_nan_for_eq(v1: DecimalQuantity) -> DecimalQuantity: if isnan(v1.m): return Q_(-1, v1.u) return v1 @attrs.define()
[docs] class Component(AbstractComponent): """A single named component, potentially with a concentration and location. Location is stored as a `plate` and `well` property. `plate` is """
[docs] name: str # type: ignore
[docs] def _get_name(self, _cache_key=None) -> str: return self.name
[docs] concentration: DecimalQuantity = attrs.field( converter=_parse_conc_optional, default=NAN_CONC, on_setattr=attrs.setters.convert, eq=norm_nan_for_eq, )
[docs] def _get_concentration(self, _cache_key=None) -> DecimalQuantity: return self.concentration
[docs] plate: str | None = attrs.field( default=None, kw_only=True )
[docs] well: WellPos | None = attrs.field( converter=_parse_wellpos_optional, default=None, kw_only=True, on_setattr=attrs.setters.convert, )
[docs] volume: DecimalQuantity = attrs.field( # type: ignore converter=_parse_vol_optional, default=NAN_VOL, on_setattr=attrs.setters.convert, eq=norm_nan_for_eq, )
@property
[docs] def location(self) -> tuple[str | None, WellPos | None]: # type: ignore return (self.plate, self.well)
[docs] def all_components_polars(self, _cache_key=None) -> pl.DataFrame: c = self.concentration.to(nM).magnitude if isnan(c): c = None comp_df = pl.DataFrame( { 'name': [self.name], 'concentration_nM': [c], 'component': [self] }, schema={ 'name': pl.String, 'concentration_nM': pl.Decimal(scale=6), 'component': pl.Object } ) return comp_df
[docs] def all_components(self) -> pd.DataFrame: df = self.all_components_polars().to_pandas() df.set_index('name', inplace=True) return df
[docs] def _unstructure(self, experiment: Experiment | None = None) -> dict[str, Any]: d = {} d["class"] = self.__class__.__name__ for att in cast("Sequence[Attribute]", self.__attrs_attrs__): # type: ignore if att.name in ["reference"]: continue val = getattr(self, att.name) if val is att.default: continue if isinstance(val, ureg.Quantity) and isnan(val.m): continue d[att.name] = _unstructure(val) return d
@classmethod
[docs] def _structure( cls, d: dict[str, Any], experiment: Experiment | None = None ) -> Component: for k, v in d.items(): d[k] = _structure(v, experiment) return cls(**d)
[docs] def with_experiment( self: Component, experiment: Experiment, inplace: bool = True ) -> AbstractComponent: if self.name in experiment.components: return experiment.components[self.name] # FIXME: add checks else: return self
[docs] def with_reference( self: Component, reference: Reference, inplace: bool = False ) -> Component: if reference.df.index.name == "Name": ref_by_name = reference.df else: ref_by_name = reference.df.set_index("Name") try: ref_comps = ref_by_name.loc[ [self.name], : ] # using this format to force a dataframe result except KeyError: return self mismatches = [] matches = [] for _, ref_comp in ref_comps.iterrows(): ref_conc = Q_(ref_comp["Concentration (nM)"], nM) if not isnan(self.concentration.m) and ref_conc != self.concentration: mismatches.append(("Concentration (nM)", ref_comp)) continue ref_plate = ref_comp["Plate"] if self.plate and ref_plate != self.plate: mismatches.append(("Plate", ref_comp)) continue ref_well = _parse_wellpos_optional(ref_comp["Well"]) if self.well and self.well != ref_well: mismatches.append(("Well", ref_well)) continue matches.append(ref_comp) if len(matches) > 1: log.warning( "Component %s has more than one location: %s. Choosing first.", self.name, [(x["Plate"], x["Well"]) for x in matches], ) elif (len(matches) == 0) and len(mismatches) > 0: raise ValueError( f"Component has only mismatched references: {self}, {mismatches}" ) match = matches[0] ref_conc = ureg.Quantity(match["Concentration (nM)"], nM) ref_plate = match["Plate"] ref_well = _parse_wellpos_optional(match["Well"]) if inplace: self.concentration = ref_conc self.plate = ref_plate self.well = ref_well return self else: return attrs.evolve( self, name=self.name, concentration=ref_conc, plate=ref_plate, well=ref_well, )
@attrs.define()
[docs] class Strand(Component): """A single named strand, potentially with a concentration, location and sequence."""
[docs] sequence: str | None = None
[docs] def with_reference( self: Strand, reference: Reference, inplace: bool = False ) -> Strand: if reference.df.index.name == "Name": ref_by_name = reference.df else: ref_by_name = reference.df.set_index("Name") try: ref_comps = ref_by_name.loc[ [self.name], : ] # using this format to force a dataframe result except KeyError: return self mismatches = [] matches = [] for _, ref_comp in ref_comps.iterrows(): ref_conc = ureg.Quantity(ref_comp["Concentration (nM)"], nM) if not isnan(self.concentration.m) and ref_conc != self.concentration: mismatches.append(("Concentration (nM)", ref_comp)) continue ref_plate = ref_comp["Plate"] if self.plate and ref_plate != self.plate: mismatches.append(("Plate", ref_comp)) continue ref_well = _parse_wellpos_optional(ref_comp["Well"]) if self.well and self.well != ref_well: mismatches.append(("Well", ref_well)) continue if isinstance(self.sequence, str) and isinstance(ref_comp["Sequence"], str): y = ref_comp["Sequence"] self.sequence = self.sequence.replace(" ", "").replace("-", "") y = y.replace(" ", "").replace("-", "") if self.sequence != y: mismatches.append(("Sequence", ref_comp["Sequence"])) continue matches.append(ref_comp) del ref_comp # Ensure we never use this again if len(matches) > 1: log.warning( "Strand %s has more than one location: %s. Choosing first.", self.name, [(x["Plate"], x["Well"]) for x in matches], ) elif (len(matches) == 0) and len(mismatches) > 0: raise ValueError( f"Strand has only mismatched references: {self}, {mismatches}" ) m = matches[0] ref_conc = Q_(m["Concentration (nM)"], nM) ref_plate = m["Plate"] ref_well = _parse_wellpos_optional(m["Well"]) ss, ms = self.sequence, m["Sequence"] if (ss is None) and (ms is None): seq = None elif isinstance(ss, str) and ((ms is None) or (ms == "")): seq = ss elif isinstance(ms, str) and ((ss is None) or isinstance(ss, str)): seq = ms else: raise RuntimeError("should be unreachable") if inplace: self.concentration = ref_conc self.plate = ref_plate self.well = ref_well self.sequence = seq return self else: return attrs.evolve( self, name=self.name, concentration=ref_conc, plate=ref_plate, well=ref_well, sequence=seq, )
def _maybesequence_comps( object_or_sequence: Sequence[AbstractComponent | str] | AbstractComponent | str, ) -> list[AbstractComponent]: if isinstance(object_or_sequence, str): return [Component(object_or_sequence)] elif isinstance(object_or_sequence, Sequence): return [Component(x) if isinstance(x, str) else x for x in object_or_sequence] return [object_or_sequence] def _empty_components() -> pd.DataFrame: cps = pd.DataFrame( index=pd.Index([], name="name"), ) cps["concentration_nM"] = pd.Series([], dtype=object) cps["component"] = pd.Series([], dtype=object) return cps for c in [Component, Strand]: _STRUCTURE_CLASSES[c.__name__] = c