Source code for dnachisel.reports.constraints_reports.constraints_breaches_dataframe
try:
import pandas
PANDAS_AVAILABLE = True
except ImportError:
PANDAS_AVAILABLE = False
from ...DnaOptimizationProblem import DnaOptimizationProblem
from ...Location import Location
def _breaches(constraint, sequence):
problem = DnaOptimizationProblem(sequence, mutation_space={})
new_constraint = constraint.initialized_on_problem(problem, role=None)
evaluation = new_constraint.evaluate(problem)
locations = Location.merge_overlapping_locations(evaluation.locations)
return ", ".join(map(str, locations))
def _install_extras_message(libname):
return (
"Could not load %s (is it installed ?). You can install it separately "
" with: pip install %s\n\n"
"Install all dependencies for generating DNA Chisel reports with:"
"\n\npip install dnachisel[reports]"
% (libname, libname.lower().replace(" ", "_"))
)
[docs]def constraints_breaches_dataframe(
constraints, sequences, display_constraints_locations=False,
):
"""Return a dataframe summarizing constraints breaches in the sequences.
Output dataframe schema (cst = constraint):
===== ======== ======================
/ Cst1 Cst2
===== ======== ======================
Seq1 10-50(+) 100-200(+), 300-350(+)
seq2
Seq3 2-10(+)
Seq4 500-1000(-)
===== ======== ======================
Parameters
----------
constraints
A list of DNA Chisel Specifications.
sequences
Either a list [("name", "sequence")...] or a dict {"name": "sequence"}
or a list of biopython records whole id is the sequence name.
Examples
--------
>>> import dnachisel as dc
>>> from dnachisel.utils import constraints_breaches_dataframe
>>> sequences = [
>>> ("SEQ 1", "ATTGTGCATGTGACAC"),
>>> ("SEQ 2", "ACATGTGTTGTGACAC"),
>>> ("SEQ 3", "TTGTGCACACATGTGA"),
>>> ]
>>> constraints = [
>>> dc.AvoidPattern('ATTG'),
>>> dc.EnforceGCContent(0.4, 0.6),
>>> dc.UniquifyAllKmers(5)
>>> ]
>>> dataframe = constraints_breaches_dataframe(constraints, sequences)
>>> dataframe.to_excel('summary_spreadsheet.xlsx')
"""
if not PANDAS_AVAILABLE:
raise ImportError(_install_extras_message("Pandas"))
if isinstance(sequences, dict):
sequences = list(sequences.items())
if hasattr(sequences[0], "id"):
sequences = [(s.id, s) for s in sequences]
dataframe_records = [
dict(
[("sequence", name)]
+ [
(
constraint.label(
use_breach_form=True,
with_location=display_constraints_locations,
),
_breaches(constraint, sequence),
)
for constraint in constraints
]
)
for (name, sequence) in sequences
]
return pandas.DataFrame.from_records(dataframe_records, index="sequence")