Source code for geneblocks.utils

from .CommonBlocks import CommonBlocks
from .biotools import sequence_to_record


def _turn_sequence_into_record_if_necessary(sequence, record_id="id"):
    if hasattr(sequence, "seq"):
        return sequence
    else:
        return sequence_to_record(sequence, record_id=record_id)


[docs]def sequences_are_circularly_equal(sequences): """Return whether all the sequences represent the same circular sequence. This means that the sequences are differently rotated versions of a same circular sequence, and for any pair s1, s2 in the sequences, there is an index i such that s1 = s2[i:] + s2[:i]. The ``sequences`` parameter should be a list of "ATGC" strings or SeqRecords. """ sequences = [ _turn_sequence_into_record_if_necessary(seq, record_id="REC_%d" % i) for i, seq in enumerate(sequences) ] if len(sequences) < 2: raise ValueError("Provide at least 2 sequences") elif len(sequences) > 2: first_equal = sequences_are_circularly_equal(sequences[:2]) return first_equal and sequences_are_circularly_equal(sequences[1:]) s1, s2 = sequences[:2] if s1.id == s2.id: s1.id += "_b" if len(s1) != len(s2): return False blocks = CommonBlocks.from_sequences(sequences=[s1, s2], min_block_size=2) if len(blocks.common_blocks) > 2: return False potential_pivot_indices = [ index for data in blocks.common_blocks.values() for (origin, (start, end, _)) in data["locations"] for index in [start, end] ] s1, s2 = str(s1.seq), str(s2.seq) for index in potential_pivot_indices: new_s1 = s1[index:] + s1[:index] if new_s1 == s2: return True return False