from collections import OrderedDict
import itertools
import numpy as np
from .DigestionProblem import DigestionProblem
from ..tools import max_min_distance
try:
import matplotlib.pyplot as plt
PLOTS_AVAILABLE = True
except ImportError:
PLOTS_AVAILABLE = False
[docs]
class SeparatingDigestionsProblem(DigestionProblem):
"""Problem: find best digestion(s) to identify constructs.
Provided a set of constructs (possibly from a combinatorial assembly),
find a list of digestions such that all pair of constructs have different
migration patterns for at least one digestion of the list.
Parameters
----------
enzymes
List of the names of the enzymes to consider, e.g. ``['EcoRI', 'XbaI']``.
ladder
A Ladder object representing the ladder used for migrations.
sequences
An (ordered) dictionary of the form {sequence_name: sequence} where the
sequence is an ATGC string.
topology
max_enzymes_per_digestion
Maximum number of enzymes that can go in a single digestion.
Experimentally the best is 1, but you can try 2, or 3 in desperate
situations. Not sure if more enzymes will work.
min_discrepancy
relative_migration_precision
Variance of the bands measured during the migration, given as a
proportion of the total migration span (difference between the migration
of the ladder's smallest and largest bands).
"""
def __init__(
self,
enzymes,
ladder,
sequences=None,
categories=None,
topology="default_to_linear",
max_enzymes_per_digestion=1,
min_discrepancy="auto",
relative_migration_precision=0.1,
):
"""Initialize."""
if isinstance(sequences, (list, tuple)):
sequences = OrderedDict([(r.id, r) for r in sequences])
if categories is None:
categories = OrderedDict(
[
(seq_name, {seq_name: sequence})
for seq_name, sequence in sequences.items()
]
)
else:
sequences = OrderedDict(
[
(seq_name, sequence)
for category, seqs in categories.items()
for seq_name, sequence in seqs.items()
]
)
self.categories = categories
DigestionProblem.__init__(
self,
sequences=sequences,
enzymes=enzymes,
ladder=ladder,
topology=topology,
max_enzymes_per_digestion=max_enzymes_per_digestion,
relative_migration_precision=relative_migration_precision,
)
def _compute_elements(self):
category_pairs = itertools.combinations(self.categories.values(), 2)
return set(
(seq1, seq2)
for category1, category2 in category_pairs
for seq1 in category1
for seq2 in category2
)
def _parameter_element_score(self, digestion, sequences_pair):
"""See max_patterns_difference."""
sequence1, sequence2 = sequences_pair
digestion1, digestion2 = [
self.sequences_digestions[s][digestion] for s in (sequence1, sequence2)
]
# If similar pair already computed, return the previous result.
if digestion1["same_as"] == digestion2["same_as"]:
if digestion1["same_as"] in self.scores[sequences_pair]:
return self.scores[sequences_pair][digestion1["same_as"]]
mini, maxi = self.migration_min, self.migration_max
margin = self.relative_migration_precision * self.migration_span / 2.0
distance = max_min_distance(
digestion1["migration"],
digestion2["migration"],
zone=[mini + margin, maxi - margin],
)
return 1.0 * distance / self.migration_span
@staticmethod
def _score_to_color(score, maxi=0.1):
"""Transform a similarity score to a green/red color.
Parameters
----------
score
Value between 0 (perfect similarity, green) and 1 (red).
maxi
Value of the score above which everything appears completely red.
Below this value the color goes progressively from red to green in 0.
"""
return (
max(0, min(1, score / maxi)),
min(1, max(0, 1 - score / maxi)),
0,
0.5,
)
[docs]
def plot_distances_map(self, digestions, ax=None, target_file=None):
"""Plot how well the digestions separate each construct pair.
Parameters
----------
digestions
A list of digestions, eg ``[('EcoRV'), ('XbaI', 'MfeI')]``.
ax
A matplotlib ax on which to plot, if none is provided, one is created
and returned at the end.
target_file
The name of the (PNG, SVG, JPEG...) file in which to write the plot.
Returns
-------
axes
The axes of the generated figure (if a target file is written to,
the figure is closed and None is returned instead).
"""
if not PLOTS_AVAILABLE:
raise ImportError("Plots require Matplotlib/Bandwagon installed.")
grid = np.zeros(2 * (len(self.sequences),))
for i, s1 in enumerate(self.sequences):
for j, s2 in enumerate(self.sequences):
if i >= j:
grid[i, j] = np.nan
else:
scores = [
self._parameter_element_score(digestion, (s1, s2))
for digestion in digestions
]
grid[i, j] = max(scores)
if ax is None:
_, ax = plt.subplots(1, figsize=2 * (0.8 * len(grid),))
ax.imshow(
grid[:, ::-1],
interpolation="nearest",
cmap="OrRd_r",
vmin=0,
vmax=self.relative_migration_precision,
)
for i in range(len(grid)):
for j in range(len(grid)):
if i > j:
ax.text(
len(self.sequences) - i - 1,
j,
"%d%%" % (100 * grid[j, i]),
fontdict=dict(color="black", weight="bold", size=14),
horizontalalignment="center",
verticalalignment="center",
)
ax.set_yticks(range(len(grid)))
ax.set_yticklabels(list(self.sequences), size=14, fontdict={"weight": "bold"})
ax.set_xticks(range(len(grid)))
ax.xaxis.set_ticks_position("top")
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xticklabels(
[" " + s for s in list(self.sequences)[::-1]],
rotation=90,
size=14,
fontdict={"weight": "bold"},
)
ax.set_xlim(-0.5, len(self.sequences) - 1.5)
ax.set_ylim(len(self.sequences) - 1.5, -0.5)
if target_file is not None:
ax.figure.savefig(target_file, bbox_inches="tight")
plt.close(ax.figure)
return ax