from flametree import file_tree
import proglog
import pandas
from ..tools import format_data_dicts_records_for_spreadsheet
from ..biotools import write_record
from ..Assembly.AssemblyReportWriter import AssemblyReportWriter
from .plot_leveled_graph import plot_leveled_graph
import matplotlib.pyplot as plt
try:
import pdf_reports
PDF_REPORTS_AVAILABLE = True
except ImportError:
PDF_REPORTS_AVAILABLE = False
from ..reports import write_simulation_pdf_report
[docs]class AssemblyPlanSimulation:
def __init__(
self,
assembly_plan,
assembly_simulations,
sequence_repository=None,
cancelled=(),
):
"""Class to represent and report on the simulation of a whole plan.
This is the product of AssemblyPlan.simulate().
Parameters
----------
assembly_plan
The AssemblyPlan from which the instance was created.
assembly_simulations
List of AssemblySimulation instances, in the same order as the
Assembly instances in the assembly_plan.
sequence_repository
SequenceRepository that provided the part records for the simulation.
cancelled
List of the name of assemblies that were cancelled because they
depended on assemblies for which the simulation errored.
"""
self.assembly_plan = assembly_plan
self.assembly_simulations = assembly_simulations
self.sequence_repository = sequence_repository
self.cancelled = cancelled
[docs] def compute_all_construct_data_dicts(self):
"""Return the list of data dict for each assembly simulation."""
return [
data_dict
for simulation in self.assembly_simulations
for data_dict in simulation.compute_all_construct_data_dicts()
]
def compute_summary_dataframe(self):
first_columns = [
"assembly_name",
"construct_id",
"assembly_level",
"construct_size",
"number_of_parts",
]
construct_data_dicts = self.compute_all_construct_data_dicts()
extra_data_columns = [
field
for data_dict in construct_data_dicts
for field in data_dict
if field not in (first_columns + ["parts"])
]
extra_data_columns = sorted(set(extra_data_columns))
columns = first_columns + extra_data_columns + ["parts"]
data = format_data_dicts_records_for_spreadsheet(construct_data_dicts)
return pandas.DataFrame(data, columns=columns)
[docs] def compute_stats(self):
"""Return a dictionary of stats.
For instance {"cancelled_assemblies": 2, "errored_assemblies": 1,
"valid_assemblies": 5}.
"""
errored = [s for s in self.assembly_simulations if len(s.errors)]
valid = [s for s in self.assembly_simulations if len(s.errors) == 0]
return {
"cancelled_assemblies": len(self.cancelled),
"errored_assemblies": len(errored),
"valid_assemblies": len(valid),
}
[docs] def write_report(
self,
target,
folder_name="auto",
assembly_report_writer="default",
logger="bar",
include_original_parts_records=True,
):
"""Write a comprehensive report to a folder or zip file.
Parameters
----------
target
Either a path to a folder, to a zip file, or ``"@memory"`` to write
into a virtual zip file whose raw data is then returned.
folder_name
Name of the folder created inside the target to host the report (yes,
it is a folder inside a folder, which can be very practical).
assembly_report_writer
Either the "default" or any AssemblyReportWriter instance.
logger
Either "bar" for a progress bar, or None, or any Proglog logger.
include_original_parts_records
If true, the original provided part records will be included in the
report (creates larger file sizes, but better for traceability).
"""
if assembly_report_writer == "default":
# We'll write all records into one folder for the whole plan
assembly_report_writer = AssemblyReportWriter(include_part_records=False)
logger = proglog.default_bar_logger(logger)
if folder_name == "auto":
folder_name = self.assembly_plan.name + "_simulation"
report_root = file_tree(target)._dir(folder_name, replace=True)
self._write_assembly_reports(report_root, assembly_report_writer, logger=logger)
self._write_errors_spreadsheet(report_root, error_type="error")
self._write_errors_spreadsheet(report_root, error_type="warning")
self._write_all_required_parts(report_root)
self._write_construct_summary_spreadsheet(report_root)
self._write_assembly_plan_spreadsheets(report_root)
self._write_summary_stats(report_root)
if len(self.cancelled):
self._write_cancelled_assemblies(report_root)
if include_original_parts_records:
self._write_all_required_parts_records(report_root)
if not self.has_single_level:
self._plot_assembly_graph(report_root)
if assembly_report_writer.include_pdf_report:
if not PDF_REPORTS_AVAILABLE:
raise ImportError(
"Could not load PDF Reports. Install with `pip install pdf_reports`"
" to generate a PDF report."
)
simulation_info = self._calculate_simulation_info()
write_simulation_pdf_report(
report_root._file("Report.pdf"), simulation_info
)
if target == "@memory":
return report_root._close()
@property
def has_single_level(self):
return len(self.assembly_plan.levels) == 1
def _get_file_name(self, filename):
name = self.assembly_plan.name
prefix = (name + "_") if (name and len(name)) else ""
return prefix + filename
def _write_summary_stats(self, report_root):
filename = self._get_file_name("simulation_stats.csv")
stats = self.compute_stats()
lines = ["%s: %s" % (k, v) for (k, v) in sorted(stats.items())]
report_root._file(filename).write("\n".join(lines))
def _write_cancelled_assemblies(self, report_root):
filename = self._get_file_name("cancelled_assemblies.csv")
columns = ",".join(["cancelled_assembly", "failed_parent_assembly"])
cancelled = [
",".join([c.assembly_name, c.failed_dependency]) for c in self.cancelled
]
report_root._file(filename).write("\n".join([columns] + cancelled))
def _plot_assembly_graph(self, report_root):
all_parts = []
def parts_sort_key(name):
assemblies = enumerate(self.assembly_plan.assemblies)
indices = [i for i, asm in assemblies if name in asm.parts]
if indices == []:
return 1000000
return indices[0]
all_parts = self.list_all_original_parts_used() + self.assembly_plan.all_parts
all_parts = sorted(set(all_parts), key=parts_sort_key)
def sort_key(name):
assemblies = enumerate(self.assembly_plan.assemblies)
return [i for i, asm in assemblies if asm.name == name][0]
levels_dict = self.assembly_plan.levels
levels = [all_parts] + [
sorted([assembly.name for assembly in assemblies], key=sort_key)
for lvl, assemblies in sorted(levels_dict.items())
]
edges = list(self.assembly_plan.graph.edges())
def draw_node(x, y, node, ax):
text = node.replace("_", " ")
ax.text(x, y, text, bbox={"facecolor": "white"})
_, ax = plot_leveled_graph(levels=levels, edges=edges, draw_node=draw_node)
target = report_root._file("assembly_plan_graph.pdf")
ax.figure.savefig(target.open("wb"), format="pdf")
plt.close(ax.figure)
def _write_errors_spreadsheet(self, report_root, error_type="error"):
all_errors = [
error
for simulation in self.assembly_simulations
for error in (
simulation.errors if error_type == "error" else simulation.warnings
)
]
if len(all_errors) > 0:
columns = ";".join(
["assembly_name", "message", "suggestion", "data", "used_in"]
)
all_error_rows = [
";".join(
[
err.assembly.name,
err.message,
err.data_as_string(),
" & ".join(err.assembly.dependencies["used_in"]),
]
)
for err in all_errors
]
filename = "%s_%ss.csv" % (self.assembly_plan.name, error_type)
errors_spreadsheet = report_root._file(filename)
errors_spreadsheet.write("\n".join([columns] + all_error_rows))
def _write_assembly_reports(self, report_root, report_writer, logger):
all_records_folder = report_root._dir("all_construct_records")
logger(message="Generating assemblies reports...")
for simulation in logger.iter_bar(assembly=self.assembly_simulations):
# TODO: skip cancelled assemblies!
assembly_folder = report_root._dir(simulation.assembly.name)
simulation.write_report(
target=assembly_folder, report_writer=report_writer,
)
for record in simulation.construct_records:
target = all_records_folder._file(record.id + ".gb")
write_record(record, target.open("w"), "genbank")
def _write_construct_summary_spreadsheet(self, report_root):
data = self.compute_summary_dataframe()
file_name = self._get_file_name("summary.csv")
data.to_csv(report_root._file(file_name).open("w"), index=False)
def list_all_original_parts_used(self):
all_parts = [
part
for simulation in self.assembly_simulations
for part in simulation.list_all_parts_used()
]
assemblies = [
simulation.assembly.name for simulation in self.assembly_simulations
]
parts_that_arent_assembled = set(all_parts).difference(set(assemblies))
return sorted(parts_that_arent_assembled)
def _write_all_required_parts(self, report_root):
all_parts = self.list_all_original_parts_used()
file_name = self._get_file_name("all_required_parts.txt")
report_root._file(file_name).write("\n".join(all_parts))
def _write_all_required_parts_records(self, report_root):
all_parts = self.list_all_original_parts_used()
part_records = self.sequence_repository.get_records(all_parts)
records_dir = report_root._dir("part_records")
for part_record in part_records:
filename = part_record.id + ".gb"
target = records_dir._file(filename)
write_record(part_record, target, "genbank")
def _write_assembly_plan_spreadsheets(self, report_root):
data = self.compute_summary_dataframe()
for level, subdata in data.groupby("assembly_level"):
construct_parts = [
(row.construct_id, row.parts.split(" & "))
for i, row in subdata.iterrows()
]
if self.has_single_level:
file_name = self._get_file_name("assembly_plan.csv")
else:
file_name = "constructs_level_%s.csv" % level
file_name = self._get_file_name(file_name)
f = report_root._file(file_name)
lines = [",".join([c] + parts) for c, parts in construct_parts]
f.write("\n".join(["construct,parts"] + lines))
def _calculate_simulation_info(self):
stats_dict = self.compute_stats()
stats_dict_series = {
"Outcome": pandas.Series(["Valid", "Cancelled", "Errored"]),
"Number of assemblies": pandas.Series(
[
stats_dict["valid_assemblies"],
stats_dict["cancelled_assemblies"],
stats_dict["errored_assemblies"],
]
),
}
stats_df = pandas.DataFrame(stats_dict_series)
return stats_df