Source code for dnachisel.builtin_specifications.codon_optimization.HarmonizeRCA

import numpy as np

from ...Specification.SpecEvaluation import SpecEvaluation
from .BaseCodonOptimizationClass import BaseCodonOptimizationClass


[docs]class HarmonizeRCA(BaseCodonOptimizationClass): """Codon-Harmonize a native sequence for a new host (Claassens method). This specification will optimize a Sequence 1 from Host 1 into a Sequence 2 for target Host 2. In simple, rare Host 1 codons will be replaced by rare Host 2 codons, and high-frequency Host 1 codons will get replaced by codons that are high-frequency in Host 2. In more specific, each codon along Sequence 1 gets replaced by the codon whose Relative Codon Adaptiveness (RCA) in Host 2 is the closest from the RCA of the original codon in Host 1. A codon's RCA in a given organism is defined by f/fmax where f is the codon's frequency in the organism and fmax is the highest frequency of all synonymous codons. The minimized quantity is sum_i abs(RCA(c_i, H1) - RCA(c'_i, H2)) where c_i, c'_i represent the i-th codon before and after optimization This method is taken from Claassens 2017, where they simplify a previous algorithm (Angov 2008), which was much more complicated as it involved predicting "ribosome pausing" sites in the sequence. Warning: always use with an EnforceTranslation constraint. Parameters ---------- species Name or TaxID of the species for which to optimize the sequence. A custom codon_usage_table can be provided instead (or in addition, for species names whose codon usage table cannot be imported). codon_usage_table Optional - can be provided instead of ``species``. A dict of the form ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}`` giving the RSCU table (relative usage of each codon). original_species Name or TaxID of the species the original sequence was taken from. This information will be used to spot codons which are supposed to be rare or common. A codon_usage_table can be provided instead (or in addition, for species names whose codon usage table cannot be imported). original_codon_usage_table A dict of the form ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}`` giving the RSCU table (relative usage of each codon). location Location on which the specification applies boost Score multiplicator (=weight) for when the specification is used as an optimization objective alongside competing objectives. References ---------- Claassens et. al., Improving heterologous membrane protein production in Escherichia coli by combining transcriptional tuning and codon usage algorithms. PLOS One, 2017 """ shorthand_name = "harmonize_rca" def __init__( self, species=None, codon_usage_table=None, original_species=None, original_codon_usage_table=None, location=None, boost=1, ): if isinstance(species, str) and "->" in species: original_species, species = species.split("->") species = species.strip() original_species = original_species.strip() BaseCodonOptimizationClass.__init__( self, species=species, codon_usage_table=codon_usage_table, location=location, boost=boost, ) self.codons_synonyms = self.get_codons_synonyms() self.original_species = original_species self.original_codon_usage_table = self.get_codons_table( original_species, original_codon_usage_table ) for table in [self.codon_usage_table, self.original_codon_usage_table]: if "RCA" not in table: table["RCA"] = { codon: frequency / max(codons_frequencies.values()) for aa, codons_frequencies in table.items() for codon, frequency in codons_frequencies.items() if len(aa) == 1 } def initialized_on_problem(self, problem, role): new_spec = self._copy_with_full_span_if_no_location(problem) new_spec.original_codons = new_spec.get_codons(problem) rca = new_spec.codon_usage_table["RCA"] rca_o = new_spec.original_codon_usage_table["RCA"] new_spec.smallest_possible_discrepancies = [ min([abs(rca[c] - rca_o[c]) for c in self.codons_synonyms[codon]]) for codon in new_spec.original_codons ] return new_spec def evaluate(self, problem): """Return the evaluation for mode==best_codon.""" codons = self.get_codons(problem) if len(codons) == 1: # We are evaluating a single codon. Easy! codon = codons[0] original = self.original_codons[0] rca_codon = self.codon_usage_table["RCA"][codon] rca_original = self.original_codon_usage_table["RCA"][original] score = -abs(rca_codon - rca_original) return SpecEvaluation( self, problem, score=score, locations=[] if (score == 0) else [self.location], message="Codon harmonization on window %s scored %.02E" % (self.location, score), ) # print (len(codons)) rca_in_original_species = [ self.original_codon_usage_table["RCA"][original_codon] for original_codon in self.original_codons ] rca_in_target_species = [ self.codon_usage_table["RCA"][codon] for codon in codons ] discrepancies = abs( np.array(rca_in_original_species) - np.array(rca_in_target_species) ) non_optimality = self.smallest_possible_discrepancies - discrepancies nonoptimal_indices = np.nonzero(non_optimality)[0] locations = self.codons_indices_to_locations(nonoptimal_indices) score = -discrepancies.sum() return SpecEvaluation( self, problem, score=score, locations=locations, message="Codon harmonization on %s scored %.02E" % (self.location, score), ) def label_parameters(self): if self.species is None: return ["(custom table)"] else: return [self.original_species + " -> " + self.species] def short_label(self): result = "best-codon-optimize" if self.species is not None: result += " (%s)" % self.species return result