Source code for genedom.PartDomesticator

"""Defines central class BlockFinder."""
from collections import OrderedDict
import pandas
import numpy as np

from Bio import SeqIO, Restriction

from dnachisel import (AvoidPattern, reverse_complement, reverse_translate,
                       DnaOptimizationProblem, EnforceTranslation,
                       annotate_record, CodonOptimize, Location,
                       sequence_to_biopython_record, random_dna_sequence,
                       AvoidChanges, AvoidChanges)

from dnachisel.reports import (optimization_with_report,
                               SpecAnnotationsTranslator)


from dna_features_viewer import BiopythonTranslator
import matplotlib.pyplot as plt
import matplotlib.cm as cm


from .biotools import reverse_complement, sequence_to_record, annotate_record
from .StandardDomesticatorsSet import StandardDomesticatorsSet
from .DomesticationResult import DomesticationResult

def nan_to_empty_string(val):
    """Return the value unless it is NaN, then it returns an empty string."""
    return val if (isinstance(val, str) or not np.isnan(val)) else ''

[docs]class PartDomesticator: """Generic domesticator. Parameters ---------- name Domesticator name as it will appear in reports etc. description Short domesticator description as it will appear in reports etc. left_flank String. Left addition to the sequence (homology arms, enzymes sites etc.) right_flank String. Right addition to the sequence (homology arms, enz. sites etc.) constraints Either Dnachisel constraints or functions (sequence => constraint) to be applied to the sequence for optimization objectives Either Dnachisel objectives or functions (sequence => objective) to be applied to the sequence for optimization. simultaneous_mutations Number of sequences mutations to be applied simulatenously during optimization. A larger number creates more noise but could allow to solve tougher problems. minimize_edits If true, the optimizer will attempt to minimize changes while making sure the constraints hold (each edit incurs a penalty of 1 in the total optimization score). logger A proglog logger or 'bar' or None for no logger at all. """ def __init__(self, name='unnamed domesticator', left_flank='', right_flank='', constraints=(), objectives=(), description=None, simultaneous_mutations=1, minimize_edits=True, logger=None): if isinstance(left_flank, str): left_flank = sequence_to_biopython_record(left_flank) annotate_record(left_flank, label='left flank') if isinstance(right_flank, str): right_flank = sequence_to_biopython_record(right_flank) annotate_record(right_flank, label='right flank') self.name = name self.constraints = constraints self.left_flank = left_flank self.right_flank = right_flank self.constraints = list(constraints) self.objectives = list(objectives) self.description = description self.logger = logger self.simultaneous_mutations = simultaneous_mutations self.minimize_edits = minimize_edits def domesticate(self, dna_sequence=None, protein_sequence=None, is_cds=False, codon_optimization=None, extra_constraints=(), extra_objectives=(), final_record_target=None, edit=False, barcode='', barcode_spacer='AA', report_target=None): """Domesticate a sequence. Parameters ---------- dna_sequence The DNA sequence string to domesticate. protein_sequence Amino-acid sequence of the protein, which will be converted into a DNA sequence string. is_cds If True, sequence edits are restricted to synonymous mutations. codon_optimization Either None for no codon optimization or the name of an organism supported by DnaChisel. extra_constraints List of extra constraints to apply to the domesticated sequences. Each constraint is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). extra_objectives List of extra optimization objectives to apply to the domesticated sequences. Each objective is either a DnaChisel constraint or a function (dna_sequence => DnaChisel constraint). final_record_target Path to the file where to write the final genbank edit Turn to True to allow sequence edits (if it is false and no all constraints are originally satisfied, a failed domestication result (i.e. with attribute ``success`` set to False) will be returned. report_target Target for the sequence optimization report (a folder path, or a zip path) barcode A sequence of DNA that will be added to the left of the sequence once the domestication is done. barcode_spacer Nucleotides to be added between the barcode and the enzyme (optional, the idea here is that they will make sure to avoid the creation of unwanted cutting sites). Returns ------- final_record, edits_record, report_data, success, msg """ if protein_sequence is not None: is_cds = True dna_sequence = reverse_translate(protein_sequence) constraints = [ c(dna_sequence) if hasattr(c, '__call__') else c for c in list(extra_constraints) + self.constraints ] location = Location(len(self.left_flank), len(self.left_flank) + len(dna_sequence)) if is_cds: constraints.append(EnforceTranslation(location=location)) objectives = [ o(dna_sequence) if hasattr(o, '__call__') else o for o in list(extra_objectives) + self.objectives ] if codon_optimization: objectives.append(CodonOptimize(species=codon_optimization, location=location)) if self.minimize_edits: objectives.append(AvoidChanges()) extended_sequence = self.left_flank + dna_sequence + self.right_flank if (not is_cds) and (not edit): constraints.append(AvoidChanges()) problem = DnaOptimizationProblem( extended_sequence, constraints=constraints, objectives=objectives, logger=self.logger ) problem.n_mutations = self.simultaneous_mutations optimization_successful = True message = "" if report_target is not None: (success, message, report_data) = optimization_with_report( target=report_target, problem=problem, project_name=self.name ) optimization_successful = success else: report_data = None try: problem.resolve_constraints() problem.optimize() except Exception as err: message = str(err) optimization_successful = False report_data = None final_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False ) edits_record = problem.to_record( with_original_features=True, with_original_spec_features=False, with_constraints=False, with_objectives=False, with_sequence_edits=True ) if final_record_target is not None: SeqIO.write(final_record, final_record_target, 'genbank') return DomesticationResult(problem.sequence_before, final_record, edits_record, report_data, optimization_successful, message) def details_list(self): """List of details for representing the domesticator in reports.""" return [(label, value) for (label, value) in [ ("Name", self.name), ("Description", self.description), ("Left addition", str(self.left_flank.seq)), ("Right addition", str(self.right_flank.seq)), ] if value not in (None, "")] def html_details(self): """HTML representation of the ``details_list``, for reports.""" return "<br />".join([ "<b>%s</b>: %s" % (name, value) for (name, value) in self.details_list() ]) @staticmethod def plot_record(record, ax=None): """Plot the given record with a custom DnaFeaturesViewer plotter.""" translator = SpecAnnotationsTranslator() gr_record = translator.translate_record(record) return gr_record.plot(ax=ax)
[docs]class GoldenGateDomesticator(PartDomesticator): """Special domesticator class for Golden-Gate standards Parameters ---------- left_overhang 4bp overhang to be added on the left right_overhang 4bp overhang to be added on the right left_addition Extra sequence of DNA to be systematically added on the left of each part between the enzyme site and the rest of the sequence. right_addition Extra sequence to be systematically added on the right of each part between the enzyme site and the rest of the sequence. enzyme Enzyme used for the Golden Gate assembly. This enzyme will be added on the flanks of the sequence, and the internal sequence will be protected against sites from this enzyme during optimization. extra_avoided_sites Other enzymes from which the sequence should be protected during optimization in addition to the assembly ``enzyme``. description Description of the domesticator as it will appear in reports. name Name of the domesticator as it will appear in reports constraints Either Dnachisel constraints or functions (sequence => constraint) to be applied to the sequence for optimization objectives Either Dnachisel objectives or functions (sequence => objective) to be applied to the sequence for optimization. """ def __init__(self, left_overhang, right_overhang, left_addition='', right_addition='', enzyme='BsmBI', extra_avoided_sites=(), description='Golden Gate domesticator', name='unnamed_domesticator', constraints=(), objectives=()): self.enzyme = enzyme self.left_overhang = left_overhang left_overhang = sequence_to_biopython_record(left_overhang) self.right_overhang = right_overhang right_overhang = sequence_to_biopython_record(right_overhang) for seq in [left_overhang, right_overhang]: annotate_record(seq, label=str(seq.seq)) enzyme_seq = Restriction.__dict__[enzyme].site enzyme_seq = sequence_to_biopython_record(enzyme_seq) annotate_record(enzyme_seq, label=enzyme) self.enzyme_seq = enzyme_seq left_flank = self.enzyme_seq + "A" + left_overhang + left_addition right_flank = (right_addition + right_overhang + (self.enzyme_seq + "A").reverse_complement()) constraints = list(constraints) + [ (lambda seq: AvoidPattern( enzyme=enzyme, location=Location(len(left_flank), len(left_flank) + len(seq)))) for enz in ([enzyme] + list(extra_avoided_sites)) ] PartDomesticator.__init__(self, left_flank=left_flank, right_flank=right_flank, constraints=constraints, objectives=objectives, description=description, name=name) def __repr__(self): return "GgDomesticator[%s](%s-%s)" % (self.enzyme, self.left_overhang, self.right_overhang) def __str__(self): return "GgDomesticator[%s](%s-%s)" % (self.enzyme, self.left_overhang, self.right_overhang) def details_list(self): return PartDomesticator.details_list(self) + [ ("Enzyme", "%s (%s)" % (self.enzyme, self.enzyme_seq)), ("Left overhang", self.left_overhang), ("Right overhang", self.right_overhang) ] @staticmethod def standard_from_spreadsheet(path=None, dataframe=None, name_prefix=''): """Parse a spreadsheet into a standard with Golden Gate domesticators. The input should be a table with columns names as follows: slot_name, left_overhang, right_overhang, left_addition, right_addition, enzyme, extra_avoided_sites, description. Parameters ---------- path Path to a CSV or XLS(X) file. A dataframe can be provided instead. dataframe A pandas Dataframe which can be provided instead of a path """ if path is not None: if path.lower().endswith(".csv"): dataframe = pandas.read_csv(path) else: dataframe = pandas.read_excel(path) return StandardDomesticatorsSet(OrderedDict([ (row.slot_name, GoldenGateDomesticator( left_overhang=row.left_overhang, right_overhang=row.right_overhang, left_addition=nan_to_empty_string(row.left_addition), right_addition=nan_to_empty_string(row.right_addition), enzyme=row.enzyme, extra_avoided_sites = [ e.strip() for e in row.extra_avoided_sites.split(',') ] if hasattr(row.extra_avoided_sites, 'split') else [], description=row.description, name=name_prefix + row.slot_name )) for i, row in dataframe.iterrows() ]))