from __future__ import annotations
from abc import ABC, abstractmethod
from math import isnan
from typing import TYPE_CHECKING, Any, Sequence, Tuple, TypeVar, cast
import attrs
import pandas as pd
from .dictstructure import _STRUCTURE_CLASSES, _structure, _unstructure
from .locations import WellPos, _parse_wellpos_optional
from .logging import log
from .printing import TableFormat
from .units import (
NAN_VOL,
Q_,
ZERO_VOL,
DecimalQuantity,
_parse_conc_optional,
_parse_vol_optional,
nM,
ureg,
NAN_CONC
)
import polars as pl
if TYPE_CHECKING: # pragma: no cover
from attrs import Attribute
from .experiments import Experiment
from .references import Reference
T = TypeVar("T")
__all__ = ["AbstractComponent", "Component", "Strand"]
[docs]
class AbstractComponent(ABC):
"""Abstract class for a component in a mix. Custom components that don't inherit from
a concrete class should inherit from this class and implement the methods here.
"""
@property
@abstractmethod
[docs]
def name(self) -> str: # pragma: no cover
"Name of the component."
...
@property
[docs]
def location(self) -> tuple[str, WellPos | None]:
return ("", None)
@property
[docs]
def plate(self) -> str | None:
return None
@property
[docs]
def is_mix(self) -> bool:
return False
@property
[docs]
def well(self) -> WellPos | None:
return None
@property
[docs]
def _well_list(self) -> list[WellPos]:
if self.well is not None:
return [self.well]
return []
@property
[docs]
def volume(self) -> DecimalQuantity:
return NAN_VOL
@property
@abstractmethod
[docs]
def concentration(self) -> DecimalQuantity: # pragma: no cover
"(Source) concentration of the component as a pint Quantity. NaN if undefined."
...
@abstractmethod
[docs]
def all_components(self) -> pd.DataFrame: # pragma: no cover
"A dataframe of all components."
...
@abstractmethod
[docs]
def with_reference(
self: T, reference: Reference, *, inplace: bool = False
) -> T: # pragma: no cover
...
@abstractmethod
[docs]
def with_experiment(
self, experiment: Experiment, *, inplace: bool = True
) -> AbstractComponent: # pragma: no cover
...
@classmethod
@abstractmethod
[docs]
def _structure(
cls, d: dict[str, Any], experiment: Experiment | None = None
) -> AbstractComponent: # pragma: no cover
...
@abstractmethod
[docs]
def _unstructure(self, experiment: Experiment | None = None) -> dict[str, Any]:
...
[docs]
def printed_name(self, tablefmt: str | TableFormat) -> str:
return self.name
[docs]
def _update_volumes(
self,
consumed_volumes: dict[str, DecimalQuantity] | None = None,
made_volumes: dict[str, DecimalQuantity] | None = None,
_cache_key=None,
) -> Tuple[dict[str, DecimalQuantity], dict[str, DecimalQuantity]]:
"""
Given a
"""
if consumed_volumes is None:
consumed_volumes = {}
if made_volumes is None:
made_volumes = {}
if self.name in made_volumes:
# We've already been seen. Ignore our components.
return consumed_volumes, made_volumes
made_volumes[self.name] = ZERO_VOL
return consumed_volumes, made_volumes
def norm_nan_for_eq(v1: DecimalQuantity) -> DecimalQuantity:
if isnan(v1.m):
return Q_(-1, v1.u)
return v1
@attrs.define()
[docs]
class Component(AbstractComponent):
"""A single named component, potentially with a concentration and location.
Location is stored as a `plate` and `well` property. `plate` is
"""
[docs]
name: str # type: ignore
[docs]
def _get_name(self, _cache_key=None) -> str:
return self.name
[docs]
concentration: DecimalQuantity = attrs.field(
converter=_parse_conc_optional,
default=NAN_CONC,
on_setattr=attrs.setters.convert,
eq=norm_nan_for_eq,
)
[docs]
def _get_concentration(self, _cache_key=None) -> DecimalQuantity:
return self.concentration
[docs]
plate: str | None = attrs.field(
default=None,
kw_only=True
)
[docs]
well: WellPos | None = attrs.field(
converter=_parse_wellpos_optional,
default=None,
kw_only=True,
on_setattr=attrs.setters.convert,
)
[docs]
volume: DecimalQuantity = attrs.field( # type: ignore
converter=_parse_vol_optional,
default=NAN_VOL,
on_setattr=attrs.setters.convert,
eq=norm_nan_for_eq,
)
@property
[docs]
def location(self) -> tuple[str | None, WellPos | None]: # type: ignore
return (self.plate, self.well)
[docs]
def all_components_polars(self, _cache_key=None) -> pl.DataFrame:
c = self.concentration.to(nM).magnitude
if isnan(c):
c = None
comp_df = pl.DataFrame(
{
'name': [self.name],
'concentration_nM': [c],
'component': [self]
},
schema={
'name': pl.String,
'concentration_nM': pl.Decimal(scale=6),
'component': pl.Object
}
)
return comp_df
[docs]
def all_components(self) -> pd.DataFrame:
df = self.all_components_polars().to_pandas()
df.set_index('name', inplace=True)
return df
[docs]
def _unstructure(self, experiment: Experiment | None = None) -> dict[str, Any]:
d = {}
d["class"] = self.__class__.__name__
for att in cast("Sequence[Attribute]", self.__attrs_attrs__): # type: ignore
if att.name in ["reference"]:
continue
val = getattr(self, att.name)
if val is att.default:
continue
if isinstance(val, ureg.Quantity) and isnan(val.m):
continue
d[att.name] = _unstructure(val)
return d
@classmethod
[docs]
def _structure(
cls, d: dict[str, Any], experiment: Experiment | None = None
) -> Component:
for k, v in d.items():
d[k] = _structure(v, experiment)
return cls(**d)
[docs]
def with_experiment(
self: Component, experiment: Experiment, inplace: bool = True
) -> AbstractComponent:
if self.name in experiment.components:
return experiment.components[self.name]
# FIXME: add checks
else:
return self
[docs]
def with_reference(
self: Component, reference: Reference, inplace: bool = False
) -> Component:
if reference.df.index.name == "Name":
ref_by_name = reference.df
else:
ref_by_name = reference.df.set_index("Name")
try:
ref_comps = ref_by_name.loc[
[self.name], :
] # using this format to force a dataframe result
except KeyError:
return self
mismatches = []
matches = []
for _, ref_comp in ref_comps.iterrows():
ref_conc = Q_(ref_comp["Concentration (nM)"], nM)
if not isnan(self.concentration.m) and ref_conc != self.concentration:
mismatches.append(("Concentration (nM)", ref_comp))
continue
ref_plate = ref_comp["Plate"]
if self.plate and ref_plate != self.plate:
mismatches.append(("Plate", ref_comp))
continue
ref_well = _parse_wellpos_optional(ref_comp["Well"])
if self.well and self.well != ref_well:
mismatches.append(("Well", ref_well))
continue
matches.append(ref_comp)
if len(matches) > 1:
log.warning(
"Component %s has more than one location: %s. Choosing first.",
self.name,
[(x["Plate"], x["Well"]) for x in matches],
)
elif (len(matches) == 0) and len(mismatches) > 0:
raise ValueError(
f"Component has only mismatched references: {self}, {mismatches}"
)
match = matches[0]
ref_conc = ureg.Quantity(match["Concentration (nM)"], nM)
ref_plate = match["Plate"]
ref_well = _parse_wellpos_optional(match["Well"])
if inplace:
self.concentration = ref_conc
self.plate = ref_plate
self.well = ref_well
return self
else:
return attrs.evolve(
self,
name=self.name,
concentration=ref_conc,
plate=ref_plate,
well=ref_well,
)
@attrs.define()
[docs]
class Strand(Component):
"""A single named strand, potentially with a concentration, location and sequence."""
[docs]
sequence: str | None = None
[docs]
def with_reference(
self: Strand, reference: Reference, inplace: bool = False
) -> Strand:
if reference.df.index.name == "Name":
ref_by_name = reference.df
else:
ref_by_name = reference.df.set_index("Name")
try:
ref_comps = ref_by_name.loc[
[self.name], :
] # using this format to force a dataframe result
except KeyError:
return self
mismatches = []
matches = []
for _, ref_comp in ref_comps.iterrows():
ref_conc = ureg.Quantity(ref_comp["Concentration (nM)"], nM)
if not isnan(self.concentration.m) and ref_conc != self.concentration:
mismatches.append(("Concentration (nM)", ref_comp))
continue
ref_plate = ref_comp["Plate"]
if self.plate and ref_plate != self.plate:
mismatches.append(("Plate", ref_comp))
continue
ref_well = _parse_wellpos_optional(ref_comp["Well"])
if self.well and self.well != ref_well:
mismatches.append(("Well", ref_well))
continue
if isinstance(self.sequence, str) and isinstance(ref_comp["Sequence"], str):
y = ref_comp["Sequence"]
self.sequence = self.sequence.replace(" ", "").replace("-", "")
y = y.replace(" ", "").replace("-", "")
if self.sequence != y:
mismatches.append(("Sequence", ref_comp["Sequence"]))
continue
matches.append(ref_comp)
del ref_comp # Ensure we never use this again
if len(matches) > 1:
log.warning(
"Strand %s has more than one location: %s. Choosing first.",
self.name,
[(x["Plate"], x["Well"]) for x in matches],
)
elif (len(matches) == 0) and len(mismatches) > 0:
raise ValueError(
f"Strand has only mismatched references: {self}, {mismatches}"
)
m = matches[0]
ref_conc = Q_(m["Concentration (nM)"], nM)
ref_plate = m["Plate"]
ref_well = _parse_wellpos_optional(m["Well"])
ss, ms = self.sequence, m["Sequence"]
if (ss is None) and (ms is None):
seq = None
elif isinstance(ss, str) and ((ms is None) or (ms == "")):
seq = ss
elif isinstance(ms, str) and ((ss is None) or isinstance(ss, str)):
seq = ms
else:
raise RuntimeError("should be unreachable")
if inplace:
self.concentration = ref_conc
self.plate = ref_plate
self.well = ref_well
self.sequence = seq
return self
else:
return attrs.evolve(
self,
name=self.name,
concentration=ref_conc,
plate=ref_plate,
well=ref_well,
sequence=seq,
)
def _maybesequence_comps(
object_or_sequence: Sequence[AbstractComponent | str] | AbstractComponent | str,
) -> list[AbstractComponent]:
if isinstance(object_or_sequence, str):
return [Component(object_or_sequence)]
elif isinstance(object_or_sequence, Sequence):
return [Component(x) if isinstance(x, str) else x for x in object_or_sequence]
return [object_or_sequence]
def _empty_components() -> pd.DataFrame:
cps = pd.DataFrame(
index=pd.Index([], name="name"),
)
cps["concentration_nM"] = pd.Series([], dtype=object)
cps["component"] = pd.Series([], dtype=object)
return cps
for c in [Component, Strand]:
_STRUCTURE_CLASSES[c.__name__] = c