Source code for bandwagon.plot_records_digestions

from copy import deepcopy
from collections import OrderedDict
import itertools

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.backends.backend_pdf import PdfPages

from .Band import Band
from .BandsPattern import BandsPattern
from .BandsPatternsSet import BandsPatternsSet
from .tools import (
    find_cut_sites,
    annotate_record,
    compute_digestion_bands,
    record_is_linear,
)

try:
    from dna_features_viewer import BiopythonTranslator

    class DigestGraphicTranslator(BiopythonTranslator):
[docs] def compute_feature_color(self, feature): if "band_label" in feature.qualifiers: return "#e5d740" elif "source" in feature.qualifiers: return "#ff8282" else: return "#eaedff"
except ImportError:
[docs] class DigestGraphicTranslator: """Not available, install dna_features_viewer. pip install dna_features_viewer """ def __init__(self, *args, **kwargs): raise ImportError( "You need to install dna_features_viewer to plot" " annotated digestion graphics. Use " "'pip install dna_features_viewer'" )
[docs]def annotate_digestion_bands(record, enzymes, ladder): """Annotate the record to indicate the regions corresponding to bands. """ linear = record_is_linear(record, default=False) sequence = record.seq all_cuts = sorted( [0, len(record)] + find_cut_sites(sequence, enzymes, linear=linear) ) bands = list(zip(all_cuts, all_cuts[1:])) if (not linear) and len(bands) > 1: start, end = bands.pop() band0 = [-(end - start), bands[0][1]] if bands == []: bands = [band0] else: bands[0] = band0 sorted_bands = sorted(bands, key=lambda b: b[0] - b[1]) new_record = deepcopy(record) for (band, label) in zip(sorted_bands, "abcdefghijkl"): band_size = abs(band[1] - band[0]) formatted_size = Band._format_dna_size(band_size) annotate_record( new_record, location=band, label="%s - %s" % (label, formatted_size), feature_type="misc_feature", band_label=label, band_size=band_size, ) return new_record
[docs]def plot_record_digestion(record_digestion, ladder, record_label, digestion_label): """Plot the digestion along with a schema of cuts locations in the record. Parameters ---------- record_digestion Biopython record with features indicating bands. ladder record_label Label to use as the title of the record digestion plot. digestion_label Label to use as the title of the bands pattern(s). """ gs = gridspec.GridSpec(4, 10) ax_bands, ax_record, ax_digest = [ plt.subplot(z) for z in (gs[:, 0], gs[:3, 3:], gs[3, 3:]) ] ax_digest.figure.set_size_inches(20, 7) ax_digest.set_title("BANDS") topology = "linear" if record_is_linear(record_digestion) else "circular" ax_record.set_title("%s (%s)" % (record_label, topology), fontsize=22) bands = sorted( [ ( feature.qualifiers["band_label"], feature.qualifiers["band_size"], feature.location.start, ) for feature in record_digestion.features if feature.qualifiers.get("band_label", False) ] ) for _, size, start in bands: for ax in (ax_digest, ax_record): ax.axvline(start, ls=":", color="k", lw=0.5) ax.axvline(start + size, ls=":", color="k", lw=0.5) gr_record, gr_digestion = [ DigestGraphicTranslator([fl]).translate_record(record_digestion) for fl in [ lambda f: ("band_label" not in f.qualifiers) and ("homology" not in f.qualifiers.get("label", [])), lambda f: ("band_label" in f.qualifiers), ] ] gr_digestion.split_overflowing_features_circularly() gr_digestion.plot(ax=ax_digest) gr_record.plot(ax=ax_record, with_ruler=False) linear = record_is_linear(record_digestion, default=False) pattern = BandsPattern( [Band(dnasize, ladder=ladder, label=label) for label, dnasize, _ in bands], ladder=ladder, topology="linear" if linear else "circular", ) patternset = BandsPatternsSet( [pattern], ladder=ladder, ladder_ticks=3, ticks_fontdict=dict(size=12), label=digestion_label, label_fontdict=dict(size=18), ) patternset.plot(ax_bands) return (ax_bands, ax_record, ax_digest)
[docs]def plot_records_digestions( target, ladder, records_and_digestions=None, records=None, digestions=None ): """Plot records digestions in a multipage PDF file. Parameters ---------- records_and_digestions A list [(record, digestion), ...] where record is a Genbank record and digestions a tuple of enzymes. Alternatively, a list of ``records`` and ``digestions`` can be provided. records List of biopython records whose digests need to be digested by the ``digestions``. digestions List of tuples of enzyme names e.g. ``[('BsaI',),`('MfeI', 'BsmBI')...]`` representing the digestions for the provided ``records``. ladder A Ladder object representing the ladder to be used for placing the bands. target path to a PDF file, or file-like object. full_report """ if records_and_digestions is None: records_and_digestions = itertools.product(records, digestions) annotated_records = OrderedDict() with PdfPages(target) as details_pdf: for record, enzymes in records_and_digestions: record_label = record.id if record_label not in annotated_records: annotated_records[record_label] = OrderedDict() enzymes_label = " + ".join(sorted(enzymes)) basename = "%s--%s" % (record_label.replace(" ", "_"), "+".join(enzymes),) record_digestion = annotate_digestion_bands(record, enzymes, ladder) record_digestion.id = basename annotated_records[record_label][enzymes_label] = record_digestion (ax, _, _) = plot_record_digestion( record_digestion, ladder, record_label, enzymes_label ) details_pdf.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) return annotated_records
[docs]def plot_all_digestion_patterns( records, digestions, ladder, axes=None, group_by="digestions", show_band_sizes=False, plot_ladder=False, ): """Plot a grid (RECORD x DIGESTION) of predicted records digestions. Parameters ---------- records List of biopython records whose digests need to be digested by the ``digestions``. digestions List of tuples of enzyme names e.g. ``[('BsaI',),`('MfeI', 'BsmBI')...]`` representing the digestions for the provided ``records``. ladder A Ladder object representing the ladder to be used for placing the bands axes. If None, new axes will be created. group_by Either "sequence" or "digestion". Determines the plotting order. show_band_sizes If true, the band sizes will be printed on each band. """ all_patterns = OrderedDict() for record in records: topology = record.annotations.get("topology", "linear") linear = topology == "linear" for enzymes in digestions: enzymes_label = " + ".join(sorted(enzymes)) bands = compute_digestion_bands(record.seq, enzymes, linear=linear) cuts = find_cut_sites(record.seq, enzymes) bands = sorted(bands) if group_by == "digestions": if enzymes_label not in all_patterns: all_patterns[enzymes_label] = OrderedDict() all_patterns[enzymes_label][record.id] = (bands, cuts) else: if record.id not in all_patterns: all_patterns[record.id] = OrderedDict() all_patterns[record.id][enzymes_label] = (bands, cuts) Y = len(all_patterns) X = len(list(all_patterns.values())[0]) if axes is None: _, axes = plt.subplots(Y, 1, figsize=(0.9 * X, 3 * Y)) if Y == 1: axes = [axes] bands_props = {"band_thickness": 2.5} if show_band_sizes: bands_props.update(dict(label="=size", label_fontdict=dict(size=6))) for ax, (cat1, cat2s) in zip(axes, sorted(all_patterns.items())): patterns = [ BandsPattern( _bands, ladder=ladder, label=cat2 if (ax == axes[0]) else None, label_fontdict=dict(rotation=70), global_bands_props=bands_props, band_is_uncut=len(_cuts) == 0, ) for cat2, (_bands, _cuts) in cat2s.items() ] if plot_ladder: patterns = [ladder] + patterns pattern_set = BandsPatternsSet( patterns=patterns, ladder=ladder, ladder_ticks=4, ticks_fontdict=dict(size=9), label=cat1, ) pattern_set.plot(ax) return axes