Source code for dna_features_viewer.biotools
import textwrap
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.PDB.Polypeptide import aa1, aa3
from Bio import SeqIO
try:
from BCBio import GFF
except ImportError:
class GFF:
def parse(*a):
"""Not available. Please install bcbio-gff."""
raise ImportError(
"Please install the bcbio-gff library to parse GFF data"
)
[docs]def complement(dna_sequence):
"""Return the complement of the DNA sequence.
For instance ``complement("ATGCCG")`` returns ``"TACGGC"``.
Uses BioPython for speed.
"""
return str(Seq(dna_sequence).complement())
[docs]def reverse_complement(sequence):
"""Return the reverse-complement of the DNA sequence.
For instance ``complement("ATGCCG")`` returns ``"GCCGTA"``.
Uses BioPython for speed.
"""
return complement(sequence)[::-1]
aa_short_to_long_form_dict = {
_aa1: _aa3[0] + _aa3[1:].lower()
for (_aa1, _aa3) in zip(aa1 + "*", aa3 + ["*"])
}
[docs]def translate(dna_sequence, long_form=False):
"""Translate the DNA sequence into an amino-acids sequence MLKYQT...
If long_form is true, a list of 3-letter amino acid representations
is returned instead (['Ala', 'Ser', ...]).
"""
result = str(Seq(dna_sequence).translate())
if long_form:
result = [aa_short_to_long_form_dict[aa] for aa in result]
return result
[docs]def load_record(path):
"""Load a Genbank file """
if isinstance(path, str):
# Input is a file path
if path.lower().endswith('.gff'):
return list(GFF.parse(path))[0]
else:
return SeqIO.read(path, "genbank")
else:
# Input is a file-like object
try:
return SeqIO.read(path, "genbank")
except:
path.seek(0)
return list(GFF.parse(path))[0]
[docs]def annotate_biopython_record(
seqrecord,
location="full",
feature_type="misc_feature",
margin=0,
**qualifiers
):
"""Add a feature to a Biopython SeqRecord.
Parameters
----------
seqrecord
The biopython seqrecord to be annotated.
location
Either (start, end) or (start, end, strand). (strand defaults to +1)
feature_type
The type associated with the feature
margin
Number of extra bases added on each side of the given location.
qualifiers
Dictionnary that will be the Biopython feature's `qualifiers` attribute.
"""
if location == "full":
location = (margin, len(seqrecord) - margin)
strand = location[2] if len(location) == 3 else 1
seqrecord.features.append(
SeqFeature(
FeatureLocation(location[0], location[1], strand),
qualifiers=qualifiers,
type=feature_type,
)
)
[docs]def find_narrowest_text_wrap(text, max_line_length):
"""Wrap the text into a multi-line text minimizing the longest line length.
This is done by first wrapping the text using max_line_length, then
attempt new wraps by iteratively decreasing the line_length, as long as the
number of lines stays the same as with max_line_length.
"""
narrowest_wrap = textwrap.wrap(text, max_line_length)
narrowest_width = max([len(l) for l in narrowest_wrap])
for line_length in range(max_line_length - 1, 0, -1):
wrap = textwrap.wrap(text, line_length)
if len(wrap) <= len(narrowest_wrap):
width = max([len(l) for l in wrap])
if width < narrowest_width:
narrowest_wrap = wrap
narrowest_width = width
else:
break
return "\n".join(narrowest_wrap)