Module overhang.Overhang
View Source
import minotaor
from .tools import gc_content, order_overhangs, generate_overhang_pairs
class Overhang:
"""Class for an overhang and its reverse complement.
Note that the overhang with the lower order (e.g. AATA < TATT) will be stored in
`Overhang.overhang` and the reverse complement in `Overhang.overhang_rc`, regardless
of which was given as parameter.
**Parameters**
**seq**
> ACGT sequence (`str`).
"""
def __init__(self, seq):
self.overhang, self.overhang_rc = order_overhangs(seq)
if len(set([self.overhang, self.overhang_rc])) == 1:
self.is_palindromic = True
else:
self.is_palindromic = False
# its reverse complement has the same GC content:
self.gc_content = gc_content(seq)
self.aa_patterns = minotaor.convert_dna_to_aa_pattern(self.overhang)
self.count_max_repeat()
self.find_codons()
def is_good(self):
"""Summarise attributes and decide whether overhang can be used for assembly."""
return not any([self.is_palindromic])
def count_max_repeat(self, repeat=3):
"""Check overhang for repeating letters.
**Parameters**
**repeat**
> Number of minimum repeats to flag (`int`). For example, 3 checks for AAA etc.
"""
for letter in "ATCG":
if self.overhang.count(letter * repeat) != 0:
self.has_multimer = True
return
self.has_multimer = False
def find_codons(self):
"""Check overhang for presence of start and stop codons.
This is important information on the suitability of an overhang.
"""
start = ["ATG"]
stop = ["TAA", "TAG", "TGA"]
self.has_start_codon = False
self.has_stop_codon = False
self.has_rc_start_codon = False
self.has_rc_stop_codon = False
for codon in start:
if self.overhang.count(codon) != 0:
self.has_start_codon = True
for codon in stop: # "ATGA" can have both
if self.overhang.count(codon) != 0:
self.has_stop_codon = True
for codon in start:
if self.overhang_rc.count(codon) != 0:
self.has_rc_start_codon = True
for codon in stop: # "ATGA" can have both
if self.overhang_rc.count(codon) != 0:
self.has_rc_stop_codon = True
def generate_all_overhangs(overhang_length=4):
"""Generate list Overhang class instances for all overhangs of given length.
**Parameters**
**overhang_length**
> Length of overhangs (`int`).
"""
overhang_pairs = generate_overhang_pairs(overhang_length=overhang_length)
overhang_strings = [next(iter(overhang_pair)) for overhang_pair in overhang_pairs]
overhang_strings.sort()
overhangs = []
for overhang_string in overhang_strings:
overhang_class = Overhang(overhang_string)
overhangs += [overhang_class]
return overhangs
def get_overhang_distance(oh1, oh2):
"""Calculate number of different letters between two `Overhang` instances.
**Parameters**
**oh1**
> An `Overhang` instance.
**oh2**
> An `Overhang` instance.
"""
distance = get_hamming_distance(oh1.overhang, oh2.overhang)
distance_rc = get_hamming_distance(oh1.overhang, oh2.overhang_rc)
if distance < distance_rc: # we want to find the most similar ones
return distance
else:
return distance_rc
def get_hamming_distance(seq1, seq2):
"""Calculate Hamming distance between two overhang sequences.
**Parameters**
**seq1**
> ACGT sequence (`str`).
**seq2**
> ACGT sequence (`str`).
"""
distance = 0
for i, letter in enumerate(seq1):
if letter != seq2[i]:
distance += 1
return distance
Functions
generate_all_overhangs
def generate_all_overhangs(
overhang_length=4
)
Generate list Overhang class instances for all overhangs of given length.
Parameters
overhang_length
Length of overhangs (
int
).
View Source
def generate_all_overhangs(overhang_length=4):
"""Generate list Overhang class instances for all overhangs of given length.
**Parameters**
**overhang_length**
> Length of overhangs (`int`).
"""
overhang_pairs = generate_overhang_pairs(overhang_length=overhang_length)
overhang_strings = [next(iter(overhang_pair)) for overhang_pair in overhang_pairs]
overhang_strings.sort()
overhangs = []
for overhang_string in overhang_strings:
overhang_class = Overhang(overhang_string)
overhangs += [overhang_class]
return overhangs
get_hamming_distance
def get_hamming_distance(
seq1,
seq2
)
Calculate Hamming distance between two overhang sequences.
Parameters
seq1
ACGT sequence (
str
).
seq2
ACGT sequence (
str
).
View Source
def get_hamming_distance(seq1, seq2):
"""Calculate Hamming distance between two overhang sequences.
**Parameters**
**seq1**
> ACGT sequence (`str`).
**seq2**
> ACGT sequence (`str`).
"""
distance = 0
for i, letter in enumerate(seq1):
if letter != seq2[i]:
distance += 1
return distance
get_overhang_distance
def get_overhang_distance(
oh1,
oh2
)
Calculate number of different letters between two Overhang
instances.
Parameters
oh1
An
Overhang
instance.
oh2
An
Overhang
instance.
View Source
def get_overhang_distance(oh1, oh2):
"""Calculate number of different letters between two `Overhang` instances.
**Parameters**
**oh1**
> An `Overhang` instance.
**oh2**
> An `Overhang` instance.
"""
distance = get_hamming_distance(oh1.overhang, oh2.overhang)
distance_rc = get_hamming_distance(oh1.overhang, oh2.overhang_rc)
if distance < distance_rc: # we want to find the most similar ones
return distance
else:
return distance_rc
Classes
Overhang
class Overhang(
seq
)
Class for an overhang and its reverse complement.
Note that the overhang with the lower order (e.g. AATA < TATT) will be stored in
Overhang.overhang
and the reverse complement in Overhang.overhang_rc
, regardless
of which was given as parameter.
Parameters
seq
ACGT sequence (
str
).
View Source
class Overhang:
"""Class for an overhang and its reverse complement.
Note that the overhang with the lower order (e.g. AATA < TATT) will be stored in
`Overhang.overhang` and the reverse complement in `Overhang.overhang_rc`, regardless
of which was given as parameter.
**Parameters**
**seq**
> ACGT sequence (`str`).
"""
def __init__(self, seq):
self.overhang, self.overhang_rc = order_overhangs(seq)
if len(set([self.overhang, self.overhang_rc])) == 1:
self.is_palindromic = True
else:
self.is_palindromic = False
# its reverse complement has the same GC content:
self.gc_content = gc_content(seq)
self.aa_patterns = minotaor.convert_dna_to_aa_pattern(self.overhang)
self.count_max_repeat()
self.find_codons()
def is_good(self):
"""Summarise attributes and decide whether overhang can be used for assembly."""
return not any([self.is_palindromic])
def count_max_repeat(self, repeat=3):
"""Check overhang for repeating letters.
**Parameters**
**repeat**
> Number of minimum repeats to flag (`int`). For example, 3 checks for AAA etc.
"""
for letter in "ATCG":
if self.overhang.count(letter * repeat) != 0:
self.has_multimer = True
return
self.has_multimer = False
def find_codons(self):
"""Check overhang for presence of start and stop codons.
This is important information on the suitability of an overhang.
"""
start = ["ATG"]
stop = ["TAA", "TAG", "TGA"]
self.has_start_codon = False
self.has_stop_codon = False
self.has_rc_start_codon = False
self.has_rc_stop_codon = False
for codon in start:
if self.overhang.count(codon) != 0:
self.has_start_codon = True
for codon in stop: # "ATGA" can have both
if self.overhang.count(codon) != 0:
self.has_stop_codon = True
for codon in start:
if self.overhang_rc.count(codon) != 0:
self.has_rc_start_codon = True
for codon in stop: # "ATGA" can have both
if self.overhang_rc.count(codon) != 0:
self.has_rc_stop_codon = True
Methods
count_max_repeat
def count_max_repeat(
self,
repeat=3
)
Check overhang for repeating letters.
Parameters
repeat
Number of minimum repeats to flag (
int
). For example, 3 checks for AAA etc.
View Source
def count_max_repeat(self, repeat=3):
"""Check overhang for repeating letters.
**Parameters**
**repeat**
> Number of minimum repeats to flag (`int`). For example, 3 checks for AAA etc.
"""
for letter in "ATCG":
if self.overhang.count(letter * repeat) != 0:
self.has_multimer = True
return
self.has_multimer = False
find_codons
def find_codons(
self
)
Check overhang for presence of start and stop codons.
This is important information on the suitability of an overhang.
View Source
def find_codons(self):
"""Check overhang for presence of start and stop codons.
This is important information on the suitability of an overhang.
"""
start = ["ATG"]
stop = ["TAA", "TAG", "TGA"]
self.has_start_codon = False
self.has_stop_codon = False
self.has_rc_start_codon = False
self.has_rc_stop_codon = False
for codon in start:
if self.overhang.count(codon) != 0:
self.has_start_codon = True
for codon in stop: # "ATGA" can have both
if self.overhang.count(codon) != 0:
self.has_stop_codon = True
for codon in start:
if self.overhang_rc.count(codon) != 0:
self.has_rc_start_codon = True
for codon in stop: # "ATGA" can have both
if self.overhang_rc.count(codon) != 0:
self.has_rc_stop_codon = True
is_good
def is_good(
self
)
Summarise attributes and decide whether overhang can be used for assembly.
View Source
def is_good(self):
"""Summarise attributes and decide whether overhang can be used for assembly."""
return not any([self.is_palindromic])