Source code for bandwitch.DigestionProblem.SeparatingDigestionProblem

from collections import OrderedDict
import itertools

import numpy as np

from .DigestionProblem import DigestionProblem
from ..tools import max_min_distance

try:
    import matplotlib.pyplot as plt

    PLOTS_AVAILABLE = True
except ImportError:
    PLOTS_AVAILABLE = False


[docs]class SeparatingDigestionsProblem(DigestionProblem): """Problem: find best digestion(s) to identify constructs. Provided a set of constructs (possibly from a combinatorial assembly), find a list of digestions such that all pair of constructs have different migration patterns for at least one digestion of the list. """ """General class for solving digestions problems. By digestion problem we mean *enzymatic* digestion problems. For other kinds of digestion problems, consult your proctologist. Other digestion problems subclass this problem and implement a computation of coverages which depends on the problem. Parameters ---------- sequences An (ordered) dictionary of the form {sequence_name: sequence} where the sequence is an ATGC string. enzymes List of the names of the enzymes to consider, e.g. ``['EcoRI', 'XbaI']``. ladder A Ladder object representing the ladder used for migrations. linear True for linear sequences, false for circular sequences. max_enzymes_per_digestion Maximal number of enzymes that can go in a single digestion. Experimentally the best is 1, but you can try 2, or 3 in desperate situations. Not sure if more enzymes will work. relative_error Variance of the bands measured during the migration, given as a proportion of the total migration span (difference between the migration of the ladder's smallest and largest bands). """ def __init__( self, enzymes, ladder, sequences=None, categories=None, topology="default_to_linear", max_enzymes_per_digestion=1, min_discrepancy="auto", relative_migration_precision=0.1, ): """Initialize.""" if isinstance(sequences, (list, tuple)): sequences = OrderedDict([(r.id, r) for r in sequences]) if categories is None: categories = OrderedDict( [ (seq_name, {seq_name: sequence}) for seq_name, sequence in sequences.items() ] ) else: sequences = OrderedDict( [ (seq_name, sequence) for category, seqs in categories.items() for seq_name, sequence in seqs.items() ] ) self.categories = categories DigestionProblem.__init__( self, sequences=sequences, enzymes=enzymes, ladder=ladder, topology=topology, max_enzymes_per_digestion=max_enzymes_per_digestion, relative_migration_precision=relative_migration_precision, ) def _compute_elements(self): category_pairs = itertools.combinations(self.categories.values(), 2) return set( (seq1, seq2) for category1, category2 in category_pairs for seq1 in category1 for seq2 in category2 ) def _parameter_element_score(self, digestion, sequences_pair): """See max_patterns_difference.""" sequence1, sequence2 = sequences_pair digestion1, digestion2 = [ self.sequences_digestions[s][digestion] for s in (sequence1, sequence2) ] # If similar pair already computed, return the previous result. if digestion1["same_as"] == digestion2["same_as"]: if digestion1["same_as"] in self.scores[sequences_pair]: return self.scores[sequences_pair][digestion1["same_as"]] mini, maxi = self.migration_min, self.migration_max margin = self.relative_migration_precision * self.migration_span / 2.0 distance = max_min_distance( digestion1["migration"], digestion2["migration"], zone=[mini + margin, maxi - margin], ) return 1.0 * distance / self.migration_span @staticmethod def _score_to_color(score, maxi=0.1): """Transform a similarity score to a green/red color. Parameters ---------- score Value between 0 (perfect similarity, green) and 1 (red). maxi Value of the score above which everything appears completely red. Below this value the color goes progressively from red to green in 0. """ return ( max(0, min(1, score / maxi)), min(1, max(0, 1 - score / maxi)), 0, 0.5, )
[docs] def plot_distances_map(self, digestions, ax=None, target_file=None): """Plot how well the digestions separate each construct pair. Parameters ---------- digestions A list of digestions, eg ``[('EcoRV'), ('XbaI', 'MfeI')]``. ax A matplotlib ax on which to plot, if none is provided, one is created and returned at the end. target_file The name of the (PNG, SVG, JPEG...) file in which to write the plot. Returns ------- axes The axes of the generated figure (if a target file is written to, the figure is closed and None is returned instead). """ if not PLOTS_AVAILABLE: raise ImportError("Plots require Matplotlib/Bandwagon installed.") grid = np.zeros(2 * (len(self.sequences),)) for i, s1 in enumerate(self.sequences): for j, s2 in enumerate(self.sequences): if i >= j: grid[i, j] = np.nan else: scores = [ self._parameter_element_score(digestion, (s1, s2)) for digestion in digestions ] grid[i, j] = max(scores) if ax is None: _, ax = plt.subplots(1, figsize=2 * (0.8 * len(grid),)) ax.imshow( grid[:, ::-1], interpolation="nearest", cmap="OrRd_r", vmin=0, vmax=self.relative_migration_precision, ) for i in range(len(grid)): for j in range(len(grid)): if i > j: ax.text( len(self.sequences) - i - 1, j, "%d%%" % (100 * grid[j, i]), fontdict=dict(color="black", weight="bold", size=14), horizontalalignment="center", verticalalignment="center", ) ax.set_yticks(range(len(grid))) ax.set_yticklabels(list(self.sequences), size=14, fontdict={"weight": "bold"}) ax.set_xticks(range(len(grid))) ax.xaxis.set_ticks_position("top") ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xticklabels( [" " + s for s in list(self.sequences)[::-1]], rotation=90, size=14, fontdict={"weight": "bold"}, ) ax.set_xlim(-0.5, len(self.sequences) - 1.5) ax.set_ylim(len(self.sequences) - 1.5, -0.5) if target_file is not None: ax.figure.savefig(target_file, bbox_inches="tight") plt.close(ax.figure) return ax