Source code for dnachisel.builtin_specifications.codon_optimization.MaximizeCAI

import numpy as np

from .BaseCodonOptimizationClass import BaseCodonOptimizationClass
from ...Specification.SpecEvaluation import SpecEvaluation

[docs]class MaximizeCAI(BaseCodonOptimizationClass): """Codon-optimize a coding sequence for a given species. Maximizes the CAI. To be precise, the score computed by this specification is N*log(CAI) where N is the number of codons. Maximizing this score also maximizes the CAI. Index (CAI). For a sequence with N codons, the CAI is the geometric mean of the Relative Codon Adaptiveness (RCA) of the different codons. The RCA of a codon is (f_i/fmax_i) were fi is the frequency of an oligo in the codon usage table, and fmax is the maximal frequency of the synonymous codons. So N*log(CAI) = sum_i ( log(f_i) - log(fmax_i) ) This score is between -inf. and 0 (0 meaning a perfectly optimal sequence). Parameters ---------- species Species for which the sequence will be codon-optimized. Either a TaxID (this requires a web connection as the corresponding table will be downloaded from the internet) or the name of the species to codon-optimize for (the name must be supported by ``python_codon_tables`` e.g. ``e_coli``, ``s_cerevisiae``, ``h_sapiens``, ``c_elegans``, ``b_subtilis``, ``d_melanogaster``). Note that a ``codon_usage_table`` can be provided instead, or even in addition, for species whose codon usage table cannot be auto-imported. location Either a DnaChisel Location or a tuple of the form (start, end, strand) or just (start, end), with strand defaulting to +1, indicating the position of the gene to codon-optimize. If not provided, the whole sequence is considered as the gene. The location should have a length that is a multiple of 3. The location strand is either 1 if the gene is encoded on the (+) strand, or -1 for antisense. codon_usage_table A dict of the form ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}`` giving the RSCU table (relative usage of each codon). Only provide if no ``species`` parameter was provided. boost Score multiplicator (=weight) for when the specification is used as an optimization objective alongside competing objectives. Examples -------- >>> objective = MaximizeCAI( >>> species = "E. coli", >>> location = (150, 300), # coordinates of a gene >>> strand = -1 >>> ) """ shorthand_name = "use_best_codon" def __init__( self, species=None, location=None, codon_usage_table=None, boost=1.0 ): BaseCodonOptimizationClass.__init__( self, species=species, location=location, codon_usage_table=codon_usage_table, boost=boost, ) self.codons_translations = self.get_codons_translations() if "log_best_frequencies" not in self.codon_usage_table: self.codon_usage_table["log_best_frequencies"] = { aa: np.log(max(aa_data.values())) for aa, aa_data in self.codon_usage_table.items() if len(aa) == 1 } if "log_codons_frequencies" not in self.codon_usage_table: self.codon_usage_table["log_codons_frequencies"] = { codon: np.log(frequency or 0.001) for aa, frequencies in self.codon_usage_table.items() for codon, frequency in frequencies.items() if len(aa) == 1 } def evaluate(self, problem): """Evaluate!""" codons = self.get_codons(problem) ct = self.codons_translations if len(codons) == 1: # We are evaluating a single codon. Easy! codon = codons[0] freq = self.codon_usage_table["log_codons_frequencies"][codon] optimal = self.codon_usage_table["log_best_frequencies"][ct[codon]] score = freq - optimal return SpecEvaluation( self, problem, score=freq - optimal, locations=[] if (freq == optimal) else [self.location], message="Codon opt. on window %s scored %.02E" % (self.location, score), ) current_usage = [ self.codon_usage_table["log_codons_frequencies"][codon] for codon in codons ] optimal_usage = [ self.codon_usage_table["log_best_frequencies"][ct[codon]] for codon in codons ] non_optimality = np.array(optimal_usage) - np.array(current_usage) nonoptimal_indices = np.nonzero(non_optimality)[0] locations = self.codons_indices_to_locations(nonoptimal_indices) score = -non_optimality.sum() return SpecEvaluation( self, problem, score=score, locations=locations, message="Codon opt. on window %s scored %.02E" % (self.location, score), ) def label_parameters(self): return ["(custom table)" if self.species is None else self.species] def short_label(self): result = "best-codon-optimize" if self.species is not None: result += " (%s)" % self.species return result