Source code for bandwitch.DigestionProblem.SeparatingDigestionProblem

from collections import OrderedDict
import itertools

import numpy as np

from .DigestionProblem import DigestionProblem
from ..tools import max_min_distance

try:
    import matplotlib.pyplot as plt

    PLOTS_AVAILABLE = True
except ImportError:
    PLOTS_AVAILABLE = False


[docs] class SeparatingDigestionsProblem(DigestionProblem): """Problem: find best digestion(s) to identify constructs. Provided a set of constructs (possibly from a combinatorial assembly), find a list of digestions such that all pair of constructs have different migration patterns for at least one digestion of the list. Parameters ---------- enzymes List of the names of the enzymes to consider, e.g. ``['EcoRI', 'XbaI']``. ladder A Ladder object representing the ladder used for migrations. sequences An (ordered) dictionary of the form {sequence_name: sequence} where the sequence is an ATGC string. topology max_enzymes_per_digestion Maximum number of enzymes that can go in a single digestion. Experimentally the best is 1, but you can try 2, or 3 in desperate situations. Not sure if more enzymes will work. min_discrepancy relative_migration_precision Variance of the bands measured during the migration, given as a proportion of the total migration span (difference between the migration of the ladder's smallest and largest bands). """ def __init__( self, enzymes, ladder, sequences=None, categories=None, topology="default_to_linear", max_enzymes_per_digestion=1, min_discrepancy="auto", relative_migration_precision=0.1, ): """Initialize.""" if isinstance(sequences, (list, tuple)): sequences = OrderedDict([(r.id, r) for r in sequences]) if categories is None: categories = OrderedDict( [ (seq_name, {seq_name: sequence}) for seq_name, sequence in sequences.items() ] ) else: sequences = OrderedDict( [ (seq_name, sequence) for category, seqs in categories.items() for seq_name, sequence in seqs.items() ] ) self.categories = categories DigestionProblem.__init__( self, sequences=sequences, enzymes=enzymes, ladder=ladder, topology=topology, max_enzymes_per_digestion=max_enzymes_per_digestion, relative_migration_precision=relative_migration_precision, ) def _compute_elements(self): category_pairs = itertools.combinations(self.categories.values(), 2) return set( (seq1, seq2) for category1, category2 in category_pairs for seq1 in category1 for seq2 in category2 ) def _parameter_element_score(self, digestion, sequences_pair): """See max_patterns_difference.""" sequence1, sequence2 = sequences_pair digestion1, digestion2 = [ self.sequences_digestions[s][digestion] for s in (sequence1, sequence2) ] # If similar pair already computed, return the previous result. if digestion1["same_as"] == digestion2["same_as"]: if digestion1["same_as"] in self.scores[sequences_pair]: return self.scores[sequences_pair][digestion1["same_as"]] mini, maxi = self.migration_min, self.migration_max margin = self.relative_migration_precision * self.migration_span / 2.0 distance = max_min_distance( digestion1["migration"], digestion2["migration"], zone=[mini + margin, maxi - margin], ) return 1.0 * distance / self.migration_span @staticmethod def _score_to_color(score, maxi=0.1): """Transform a similarity score to a green/red color. Parameters ---------- score Value between 0 (perfect similarity, green) and 1 (red). maxi Value of the score above which everything appears completely red. Below this value the color goes progressively from red to green in 0. """ return ( max(0, min(1, score / maxi)), min(1, max(0, 1 - score / maxi)), 0, 0.5, )
[docs] def plot_distances_map(self, digestions, ax=None, target_file=None): """Plot how well the digestions separate each construct pair. Parameters ---------- digestions A list of digestions, eg ``[('EcoRV'), ('XbaI', 'MfeI')]``. ax A matplotlib ax on which to plot, if none is provided, one is created and returned at the end. target_file The name of the (PNG, SVG, JPEG...) file in which to write the plot. Returns ------- axes The axes of the generated figure (if a target file is written to, the figure is closed and None is returned instead). """ if not PLOTS_AVAILABLE: raise ImportError("Plots require Matplotlib/Bandwagon installed.") grid = np.zeros(2 * (len(self.sequences),)) for i, s1 in enumerate(self.sequences): for j, s2 in enumerate(self.sequences): if i >= j: grid[i, j] = np.nan else: scores = [ self._parameter_element_score(digestion, (s1, s2)) for digestion in digestions ] grid[i, j] = max(scores) if ax is None: _, ax = plt.subplots(1, figsize=2 * (0.8 * len(grid),)) ax.imshow( grid[:, ::-1], interpolation="nearest", cmap="OrRd_r", vmin=0, vmax=self.relative_migration_precision, ) for i in range(len(grid)): for j in range(len(grid)): if i > j: ax.text( len(self.sequences) - i - 1, j, "%d%%" % (100 * grid[j, i]), fontdict=dict(color="black", weight="bold", size=14), horizontalalignment="center", verticalalignment="center", ) ax.set_yticks(range(len(grid))) ax.set_yticklabels(list(self.sequences), size=14, fontdict={"weight": "bold"}) ax.set_xticks(range(len(grid))) ax.xaxis.set_ticks_position("top") ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xticklabels( [" " + s for s in list(self.sequences)[::-1]], rotation=90, size=14, fontdict={"weight": "bold"}, ) ax.set_xlim(-0.5, len(self.sequences) - 1.5) ax.set_ylim(len(self.sequences) - 1.5, -0.5) if target_file is not None: ax.figure.savefig(target_file, bbox_inches="tight") plt.close(ax.figure) return ax