Source code for dnachisel.SequencePattern.DnaNotationPattern
import itertools
from ..biotools import (
reverse_complement,
NUCLEOTIDE_TO_REGEXPR,
IUPAC_NOTATION,
)
from .SequencePattern import SequencePattern
[docs]class DnaNotationPattern(SequencePattern):
"""Class for patterns in plain DNA notation: ATTGCCA, GCNNKTA, etc.
If the sequence is not palindromic, the pattern will be looked for in
both strands of sequences.
"""
def __init__(self, sequence, name=None):
"""Initialize"""
SequencePattern.__init__(
self,
size=len(sequence),
expression=self.dna_sequence_to_regexpr(sequence),
name=name,
is_palyndromic=reverse_complement(sequence) == sequence,
)
self.sequence = sequence
[docs] @staticmethod
def dna_sequence_to_regexpr(sequence):
"""Return a regular expression to find the pattern in a sequence."""
regexpr = "".join([NUCLEOTIDE_TO_REGEXPR[n] for n in sequence])
return regexpr
[docs] def all_variants(self):
"""Return all ATGC sequence variants of a sequence"""
return [
"".join(nucleotides)
for nucleotides in itertools.product(
*[IUPAC_NOTATION[n] for n in self.sequence]
)
]
def __repr__(self):
"""Represent the pattern as PatternType(name) """
return self.sequence + (
"" if self.name is None else " (%s)" % self.name
)
def __str__(self):
"""Represent the pattern as PatternType(name) """
return self.sequence + (
"" if self.name is None else " (%s)" % self.name
)
@staticmethod
def from_string(string):
if set(string) <= set(NUCLEOTIDE_TO_REGEXPR.keys()):
return DnaNotationPattern(string)