Source code for dnacauldron.utils.insert_parts_on_backbones

"""Useful functions built on top of the DnaCauldron classes to simplify the`````
most common operations."""

import pandas
from ..AssemblyMix import (
    AssemblyMixError,
    generate_type2s_restriction_mix,
    RestrictionLigationMix,
)
from ..biotools import reverse_complement, write_record, autoselect_enzyme
import flametree


[docs]class BackboneChoice: """Class to represent the result of a backbone autoselection. Parameters ---------- record Record that was analyzed, containing an insert, to be backbonized, and potentially an original backbone too. already_on_backbone Was a backbone detected in that record ? error backbone_record Record of the backone that was selected for this record among all provided choices. final_record Record featuring the original insert in the given ``record`` cloned into the auto-selected backbone. """ def __init__( self, record, already_on_backbone=None, error=None, backbone_record=None, final_record=None, ): """Initialize.""" self.record = record self.already_on_backbone = already_on_backbone self.backbone_record = backbone_record self.final_record = final_record self.error = error def __repr__(self): """Write [recordname] (already/inserted on backbone).""" if self.already_on_backbone: return "%s (already on backbone)" % self.record.id elif isinstance(self.backbone_record, str): return "%s: no backbone found." % self.record.id else: return "%s inserted on %s" % ( self.record.id, self.backbone_record.id, )
[docs] def to_dict(self): """Return the object as a dict. Used for dataframe/spreadsheet.""" backbone, final_record = self.backbone_record, self.final_record detected = backbone.id if hasattr(backbone, "id") else "" final_length = len(final_record) if final_record else len(self.record) return dict( original_record=self.record.id, already_on_backbone="yes" if self.already_on_backbone else "no", detected_backbone=detected, final_record_length=final_length, error=str(self.error.args[0]) if self.error else "", )
[docs] @staticmethod def list_to_infos_spreadsheet(choices): """Return a pandas dataframe summarizing a list of BackboneChoices. The dataframe's columns are 'original_record', 'already_on_backbone', 'detected_backbone', 'final_record_length', 'error'. """ return pandas.DataFrame.from_records( [choice.to_dict() for choice in choices], columns=[ "original_record", "already_on_backbone", "detected_backbone", "final_record_length", "error", ], )
[docs] @staticmethod def write_final_records(choices, directory): """Write a list of BackboneChoices final records as genbanks.""" target_dir = flametree.file_tree(directory) for choice in choices: if choice.already_on_backbone: record = choice.record else: record = choice.final_record if record is not None: record.id = choice.record.id record.id = choice.record.id target_file = target_dir._file("%s.gb" % choice.record.id) write_record(record, target_file, "genbank")
def _get_insert_from_record(record, enzyme="BsmBI"): """Return the record of the one digested fragment without enzyme site.""" mix = generate_type2s_restriction_mix(parts=[record], enzyme=enzyme) inserts = [frag for frag in mix.filtered_fragments if not frag.is_reversed] if len(inserts) != 1: raise ValueError("") return inserts[0] def _standardize_overhangs_pair(overhangs): """Standardize a pair of overhangs (o1, o2). Returns either ``(o1, o2)`` or its reverse complement ``(rev_o2, rev_o1)``, whichever is smaller in alphabetical order. """ o1, o2 = overhangs ro1, ro2 = [reverse_complement(o) for o in (o1, o2)] return min((o1, o2), (ro2, ro1)) def get_overhangs_from_record(record, enzyme="BsmBI", standardize=True): """Return a pair (o1, o2) of overhangs found by record digestion. If ``standardized=True`` the pair returned is either ``(o1, o2)`` or its reverse complement ``(rev_o2, rev_o1)``, which ever is smaller in alphabetical order. """ insert = _get_insert_from_record(record, enzyme=enzyme) overhangs = str(insert.seq.left_end), str(insert.seq.right_end) return _standardize_overhangs_pair(overhangs) if standardize else overhangs def _records_to_overhangs_dict(records, allow_multiple_choices=False): """Return ``{(o1, o2): rec}`` where o1, o2 are standardized overhangs. If ``allow_multiple_choices=True`` it will return ``{(o1, o2): [r1, r2]}`` when several records share the same overhangs pair. If ``allow_multiple_choices=False`` and several records share the same overhangs pair, an error is raised. """ result = {} for record in records: overhangs = get_overhangs_from_record(record) if overhangs in result: if allow_multiple_choices: result[overhangs].append(record) else: raise ValueError( "Vector %s has same overhangs as %s" % (record.id, result[overhangs].id) ) else: if allow_multiple_choices: result[overhangs] = [record] else: result[overhangs] = record return result
[docs]def record_contains_backbone(record, enzyme="BsmBI", min_backbone_length=500): """Return True iff it believes the given record contains a backbone. A backbone is detected if, when cutting the circularized record with the given enzyme, there is one fragment with no site (the insert), and the rest has a total size above the given ``min_backbone_length``. """ mix = generate_type2s_restriction_mix(parts=[record], enzyme="BsmBI") fragments = [ frag for frag in mix.filtered_fragments if not frag.is_reversed ] if fragments == []: raise AssemblyMixError( "No site-less fragment found digesting record " + record.id, mix=mix, ) insert = fragments[0] return (len(record) - len(insert)) > min_backbone_length
[docs]def swap_donor_vector_part( donor_vector, insert, enzyme, allow_overhangs_edits=False ): """Return the records obtained by cloning inserts into a donor vector. Meant for Type-2S assembly standards only (Golden Gate, etc.) This method is meant to quickly go from a linearized sequence of a part to a circular vector (the part in its donor vector) by starting from an existing donor vector (with same overhangs) and swapping this vector's part for the insert of interest. Parameters ---------- donor_vector Biopython record of a donor vector. must have an insert producing a restriction-free fragment insert Biopython record of a plasmid or a linear DNA sequence containing an insert (i.e. a fragment that is cut out) enzyme The name of the enzyme to use e.g. 'BsmBI', 'BsaI', ... """ mix = generate_type2s_restriction_mix(parts=[donor_vector], enzyme=enzyme) donor_fragments = [ f for f in mix.fragments if len(mix.enzymes[0].search("A" + f.seq.to_standard_sequence())) > 0 ] for fr in donor_fragments: fr.features = [f for f in fr.features if "source" not in f.qualifiers] assert len(donor_fragments) == len(mix.fragments) - 1 mix = generate_type2s_restriction_mix(parts=[insert], enzyme=enzyme) insert_fragments = [ f for f in mix.fragments if len(mix.enzymes[0].search("A" + f.seq.to_standard_sequence())) == 0 ] assert len(insert_fragments) == 1 mix = RestrictionLigationMix( fragments=[insert_fragments[0]] + donor_fragments, enzymes=[enzyme], fragment_filters=(), ) assemblies = list(mix.compute_circular_assemblies()) assert len(assemblies) == 1 return assemblies[0]
[docs]def insert_parts_on_backbones( part_records, backbone_records, enzyme="autodetect", min_backbone_length=500, process_parts_with_backbone=False, default_backbone_choice=None, ): """Autodetect the right backbone for each Golden Gate part. This method is meant to process a batch of genbank files, some of which might represent a part on a backbone, and some of which represent simply a part (and enzyme-flanked overhangs) which needs to be complemented with the right backbone. It will return, for each part, whether it has already a backbone, and if not, which backbone was selected and what the final sequence is. Parameters ---------- part_records List of genbanks of the parts to put on vectors. backbone_vectors Vectors to insert parts in, typically donor vectors for different positions of an assembly standard. enzyme Enzyme to use. Use autodetect for autodetection. min_backbone_length Minimal length of a backbone. Used to determine if a part is represented alone or with a backbone. process_parts_with_backbone If true, parts will be inserted in an autoselected backbone even when they already have a backbone (it will be replaced). """ if enzyme == "autodetect": enzyme = autoselect_enzyme( part_records, ["BsmBI", "BsaI", "BbsI", "SapI", "AarI"] ) overhangs_dict = _records_to_overhangs_dict(backbone_records) backbone_choices = [] for record in part_records: try: has_backbone = record_contains_backbone( record, enzyme=enzyme, min_backbone_length=min_backbone_length ) if (not process_parts_with_backbone) and has_backbone: choice = BackboneChoice(record, already_on_backbone=True) else: overhangs = get_overhangs_from_record(record, enzyme=enzyme) if overhangs in overhangs_dict: backbone_record = overhangs_dict[overhangs] final_record = swap_donor_vector_part( donor_vector=backbone_record, insert=record, enzyme=enzyme, ) choice = BackboneChoice( record=record, already_on_backbone=has_backbone, backbone_record=backbone_record, final_record=final_record, ) else: if default_backbone_choice is not None: choice = default_backbone_choice(record) else: choice = BackboneChoice( record=record, already_on_backbone=has_backbone, backbone_record="none found", final_record=None, ) except AssemblyMixError as e: choice = BackboneChoice(record, error=e) backbone_choices.append(choice) return backbone_choices