Source code for genedom.BarcodesCollection

import os
import dnachisel as dc
from collections import OrderedDict
from .biotools import sequence_to_record, annotate_record, write_record

[docs]class BarcodesCollection(OrderedDict): """Class representing a set of named barcode sequences. These barcodes are meant to be annealed with same-sequence primers for PCR or sequencing. The constructor taked a list [(name, barcode), ...] as an input. Use ``BarcodesCollection.from_specs(n_barcodes=25)`` to generate an instance with 25 compatible barcodes. """ def __init__(self, barcodes): OrderedDict.__init__(self, barcodes) @staticmethod def from_specs(n_barcodes=96, barcode_length=20, spacer='AA', forbidden_enzymes=('BsaI', 'BsmBI', 'BbsI'), barcode_tmin=55, barcode_tmax=70, other_primer_sequences=(), heterodim_tmax=5, max_homology_length=10, include_spacers=True, names_template="B_%03d"): """Return a BarcodesCollection object with compatible barcodes. Parameters ---------- n_barcodes Number of barcodes to design barcode_length Length of each barcode spacer Spacer to place between each barcode during the optimization, ideally the same spacer that will be used when adding the barcode to a part. include_spacers Whether the spacers should be part of the final sequence of the barcodes (they still won't be considered part of the annealing primer and won't be used for melting temperature computations) forbidden_enzymes Name of enzymes whose sites should not be in the barcodes. barcode_tmin, barcode_tmax Interval of acceptable values for the melting temperature other_primer_sequences External sequences with which the primers should not anneal. heterodim_tmax Max acceptable melting temperature for the annealing of a barcode and one of the other_primer_sequences. max_homology_length Maximal homology between any two barcodes in the sequence. names_template The template used to name barcode number "i". """ unit_length = barcode_length + len(spacer) seq_len = n_barcodes * unit_length units_coordinates = [(i, i + unit_length) for i in range(0, seq_len, unit_length)] constraints = [ dc.AvoidPattern(enzyme=enzyme) for enzyme in forbidden_enzymes ] for start, end in units_coordinates: constraints += [ dc.AllowPrimer( tmin=barcode_tmin, tmax=barcode_tmax, max_homology_length=max_homology_length, avoid_heterodim_with=None, max_heterodim_tm=5, location=(start, end - len(spacer)) ), dc.EnforceSequence(spacer, location=(end - len(spacer), end)), dc.EnforceGCContent(mini=0.4, maxi=0.6, location=(start, end - len(spacer))) ] problem = dc.DnaOptimizationProblem( sequence=dc.random_dna_sequence(seq_len), constraints=constraints ) problem.logger.ignored_bars.add('location') problem.resolve_constraints() barcodes = [problem.sequence[start: end] for (start, end) in units_coordinates] if not include_spacers: barcodes = [b[:-len(spacer)] for b in barcodes] names = [(names_template % (i + 1)) for i in range(len(barcodes))] return BarcodesCollection(zip(names, barcodes)) def to_sequences_list(self): """Return a list of sequences ["ATTG...", "TTCTGT..."]""" return list(self.values()) def to_fasta(self, path=None): """Return (and optionally write) a fasta string of the barcodes.""" fasta = "\n\n".join(["> %s\n%s" % (name, barcode) for name, barcode in self.items()]) if path is not None: with open(path, "w+") as f: f.write(fasta) else: return fasta def to_records(self, path=None): """Return (optionally write) individual Genbanks of the barcodes.""" records = [] for (name, barcode) in self.items(): record = sequence_to_record(barcode) record.id = name annotate_record(record, label=name) records.append(record) if path is not None: for r in records: write_record(r, os.path.join(path, "%s.gb" % r.id)) return records