Source code for dnachisel.SequencePattern.RepeatedKmerPattern

import re
from .SequencePattern import SequencePattern

[docs]class RepeatedKmerPattern(SequencePattern): """Direct repeats like ATT-ATT, ATGC-ATGC-ATGC, etc. Shorthand string version: "3x4mer", "5x2mer", etc. Examples -------- >>> RepeatedKmerPattern(3, 2) # dimers repeated 3 times """ def __init__(self, n_repeats, kmer_size): self.n_repeats = n_repeats self.kmer_size = kmer_size SequencePattern.__init__( self, size=kmer_size * n_repeats, expression=r"([ATGC]{%d})\1{%d}" % (kmer_size, n_repeats - 1), name="%d-repeats %d-mers" % (n_repeats, kmer_size), is_palyndromic=True, # a repeat on a strand is also on the other lookahead="loop", ) @staticmethod def from_string(string): match = re.match(r"(\d+)x(\d+)mer$", string) if match is not None: n_repeats, kmer_size = match.groups() return RepeatedKmerPattern(int(n_repeats), int(kmer_size)) def __str__(self): return "%sx%smer" % (self.n_repeats, self.kmer_size)