Skip to content

Module genealloy.genealloy

View Source
from .codontable import (

    codon_to_aa,

    aa_to_codon_extended,

    codon_extended_to_aa,

    complement_table,

    compare_letters,

)

def convert_seq_to_codons(seq):

    """Convert a string (sequence) into a list of 3-letter strings (triplets)."""

    seq_codons = [seq[i : i + 3] for i in range(0, len(seq), 3)]

    return seq_codons

def convert_codonlist_to_tuplelist(seq_codons, codon_to_codon_extended):

    """Convert a list of triplets into a list of tuples, using a swaptable.

    The swaptable is a dict of triplet: triplets, and determines the

    allowed swaps.

    """

    codon_extended = [None] * len(seq_codons)

    for i, codon in enumerate(seq_codons):

        codon_extended[i] = codon_to_codon_extended[codon]

    return codon_extended

def get_next_letter(sequence_tuplelist, current_letter_index):

    """Get next letter in the sequence."""

    letter_index = current_letter_index

    if current_letter_index[2] == 2:

        # last letter of triplet, advance to next codon:

        letter_index = [letter_index[0] + 1, 0, 0]

        try:

            i, j, k = letter_index[0], letter_index[1], letter_index[2]

            letter = (sequence_tuplelist[i][j][k], letter_index)

            return letter

        except:

            raise

    else:

        # letter is not the last in triplet, get the next one:

        letter_index[2] = letter_index[2] + 1

        i, j, k = letter_index[0], letter_index[1], letter_index[2]

        letter = (sequence_tuplelist[i][j][k], letter_index)

        return letter

def get_letter_in_next_triplet(sequence_tuplelist, current_letter_index):

    """Get first letter in the next triplet."""

    letter_index = current_letter_index

    letter_index[1] = letter_index[1] + 1  # triplet position

    letter_index[2] = 0  # move letter position to triplet's first letter

    try:

        i, j, k = letter_index[0], letter_index[1], letter_index[2]

        letter = (sequence_tuplelist[i][j][k], letter_index)

        return letter

    except:

        raise

def compare_then_get_letter_recursively(

    host_tuplelist, parasite_tuplelist, host_letter, parasite_letter

):

    """Compare two letters then get next pair of letters recursively.

    Returns string for match or no match between the sequences.

    """

    # This function works only for in-frame comparisons and is given the first

    # letters of the host and parasite sequences. If the two letters match,

    # then gets the next parasite and host letters, and calls itself.

    # If can't get next parasite letter, then the comparison has finished

    # and a full match has been found.

    # If letters do not match, then gets the next parasite triplet and calls

    # itself; if there are no more parasite triplets, it gets the next host

    # triplet and calls itself. If there are no more host triplets, then there is

    # no match between the sequences.

    is_match = compare_letters(host_letter[0], parasite_letter[0])

    if is_match:

        try:

            next_parasite_letter = get_next_letter(

                parasite_tuplelist, parasite_letter[1]

            )

        except:

            return str(

                "Finished parasite sequence, match found! Ending host position:"

                + str(host_letter[1])

            )

        else:

            next_host_letter = get_next_letter(

                host_tuplelist, host_letter[1]

            )  # always OK

            return compare_then_get_letter_recursively(

                host_tuplelist,

                parasite_tuplelist,

                next_host_letter,

                next_parasite_letter,

            )

    else:  # letters do not match, move on to next parasite triplet

        try:

            next_parasite_letter = get_letter_in_next_triplet(

                parasite_tuplelist, parasite_letter[1]

            )

        except:  # no more parasite triplet, get next host

            try:

                next_host_letter = get_letter_in_next_triplet(

                    host_tuplelist, host_letter[1]

                )

            except:

                return "No match for this starting codon position"

            else:

                i = parasite_letter[1][0]  # same codon

                j, k = 0, 0  # reset parasite to first triplet, first letter

                next_parasite_letter = (parasite_tuplelist[i][j][k], [i, j, k])

                return compare_then_get_letter_recursively(

                    host_tuplelist,

                    parasite_tuplelist,

                    next_host_letter,

                    next_parasite_letter,

                )

        else:

            i, j = host_letter[1][0], host_letter[1][1]  # same codon, same triplet

            k = 0  # reset host letter to beginning of triplet because parasite was reset

            next_host_letter = (host_tuplelist[i][j][k], [i, j, k])

            return compare_then_get_letter_recursively(

                host_tuplelist,

                parasite_tuplelist,

                next_host_letter,

                next_parasite_letter,

            )

def walk_seqstep(seqstep):

    """Compare two sequences by calling `advance_step` until it returns the result."""

    while not seqstep.result:

        seqstep.advance_step()

def compare_sequence_tuplelists(parasite_tuplelist, host_tuplelist, frameshift):

    """Compare two sequence's tuplists for given frame and return list of matches."""

    len_parasite = len(parasite_tuplelist)

    len_host = len(host_tuplelist)

    list_of_matches = []

    for start_host_codon in range(0, len_host - len_parasite + 1):

        seqstep = SeqStep(

            host_tuplelist,

            parasite_tuplelist,

            start_host_codon=start_host_codon,

            frameshift=frameshift,

        )

        walk_seqstep(seqstep)

        if seqstep.match:

            list_of_matches.append(start_host_codon)

    return list_of_matches

def compare_sequence_tuplelists_in_all_frames(

    parasite_tuplelist, host_tuplelist, prefix=""

):

    """Compare two sequence's tuplists for all frames and return dict of matches."""

    results_for_all_frames = dict()

    for frameshift in [0, 1, 2]:

        result = compare_sequence_tuplelists(

            parasite_tuplelist, host_tuplelist, frameshift

        )

        key = prefix + str(frameshift)

        results_for_all_frames[key] = result

    return results_for_all_frames

def find_partial_overlaps(host, parasite, swaptable, verbose=True):

    flank = len(parasite) * "N"

    host_flank = flank + host + flank

    swaptable["NNN"] = ("NNN",)

    result = make_genealloy(host_flank, parasite, swaptable, verbose=True)

    return result

def make_genealloy(host, parasite, swaptable, verbose=True):

    """Compare two sequence strings and return dictionary of matches."""

    host_codons = convert_seq_to_codons(host)

    host_tuplelist = convert_codonlist_to_tuplelist(host_codons, swaptable)

    parasite_codons = convert_seq_to_codons(parasite)

    parasite_tuplelist = convert_codonlist_to_tuplelist(parasite_codons, swaptable)

    forward_results = compare_sequence_tuplelists_in_all_frames(

        parasite_tuplelist, host_tuplelist, prefix="f_"

    )

    reverse_complement_tuplelist = get_reverse_complement_tuplelist(host_tuplelist)

    reverse_complement_results = compare_sequence_tuplelists_in_all_frames(

        parasite_tuplelist, reverse_complement_tuplelist, prefix="rc_"

    )

    result = forward_results.copy()

    result.update(reverse_complement_results)

    if verbose:

        if all(value == [] for value in result.values()):

            print("These sequences cannot be mixed")

        else:

            print("A genealloy can be made using these sequences!")

    return result

def get_complement_tuplelist(codon_tuplelist):

    """Get complement triplets of a sequence tuplelist."""

    complement_tuplelist = []

    for index, codon in enumerate(codon_tuplelist):

        complement_tripletlist = []

        for triplet in codon:

            letter1 = triplet[0]

            letter2 = triplet[1]

            letter3 = triplet[2]

            complement_triplet = (

                complement_table[letter1]

                + complement_table[letter2]

                + complement_table[letter3]

            )

            complement_tripletlist.append(complement_triplet)

        complement_codon = tuple(complement_tripletlist)

        complement_tuplelist.append(complement_codon)

    return complement_tuplelist

def get_reverse_tuplelist(codon_tuplelist):

    """Get reverse of a tuplelist with reversed triplets."""

    reverse_tuplelist = []

    for codon in reversed(codon_tuplelist):

        reverse_tripletlist = []

        for triplet in codon:

            reverse_triplet = triplet[::-1]

            reverse_tripletlist.append(reverse_triplet)

        reverse_codon = tuple(reverse_tripletlist)

        reverse_tuplelist.append(reverse_codon)

    return reverse_tuplelist

def get_reverse_complement_tuplelist(codon_tuplelist):

    """Get reverse complement of a sequence's tuplelist."""

    complement_tuplelist = get_complement_tuplelist(codon_tuplelist)

    reverse_complement_tuplelist = get_reverse_tuplelist(complement_tuplelist)

    return reverse_complement_tuplelist

class Duodon:

    """Class for storing two triplets"""

    def __init__(self, first_triplet, second_triplet):

        self.first_triplet = first_triplet

        self.second_triplet = second_triplet

class SeqStep:

    """Class for keeping track of sequence comparison

    It stores a method that aligns a parasite triplet with two consecutive host

    triplets (duodons), a cursor that marks the position of the comparison process,

    and methods for generating duodons and comparing them with triplets.

    The `advance_step()` method attempts to advance the comparison by one codon

    step. It can (i) advance the cursor or (ii) conclude that there is no match

    between the sequences, or (iii) conclude that there is a match.

    Parameters

    ----------

    host_tuplelist

      A list of tuples. Each tuple stores the allowed triplets for a codon

      position of the host sequence.

    parasite_tuplelist

      A list of tuples. Each tuple stores the allowed triplets for a codon

      position of the parasite sequence.

    frameshift

      An integer (0, 1 or 2) denoting the frameshift between host and parasite.

    start_host_codon

      The host codon position from which the comparison should start.

    """

    def __init__(

        self, host_tuplelist, parasite_tuplelist, frameshift=0, start_host_codon=0

    ):

        self.host_tuplelist = host_tuplelist

        self.parasite_tuplelist = parasite_tuplelist

        self.frameshift = frameshift

        self.start_host_codon = start_host_codon

        self.cursor = 0

        self.len_parasite = len(parasite_tuplelist)

        self.len_host = len(host_tuplelist)

        self.parasite_path = []

        self.host_path = []

        self.match = False

        self.result = None

    def generate_duodons(self):

        self.cursor

        self.start_host_codon

        duodons = []

        host_codon = self.start_host_codon + self.cursor

        if self.host_path == []:  # first time it's made

            for triplet_1 in self.host_tuplelist[host_codon]:

                for triplet_2 in self.host_tuplelist[host_codon + 1]:

                    duodons.append(Duodon(triplet_1, triplet_2))

        else:

            triplet_1 = self.host_path[-1].second_triplet  # of last used duodon

            try:

                for triplet_2 in self.host_tuplelist[host_codon + 1]:

                    duodons.append(Duodon(triplet_1, triplet_2))

            except:

                duodons.append(Duodon(triplet_1, "NNN"))

        return duodons

    def return_all_matching_duodons(self, triplet, duodons, frameshift=0):

        matching_duodons = []

        for duodon in duodons:

            host_letters = duodon.first_triplet + duodon.second_triplet

            # compare 1st letter:

            if not compare_letters(host_letters[0 + frameshift], triplet[0]):

                continue

            if not compare_letters(host_letters[1 + frameshift], triplet[1]):

                continue

            if not compare_letters(host_letters[2 + frameshift], triplet[2]):

                continue

            matching_duodons.append(duodon)

        return matching_duodons

    def compare_triplet_and_duodon(self, triplet, duodon, frameshift=0):

        host_letters = duodon.first_triplet + duodon.second_triplet

        if not compare_letters(host_letters[0 + frameshift], triplet[0]):

            return False

        if not compare_letters(host_letters[1 + frameshift], triplet[1]):

            return False

        if not compare_letters(host_letters[2 + frameshift], triplet[2]):

            return False

        return True

    def advance_step(self):

        if self.result:

            return self.result

        if self.cursor == self.len_parasite:

            self.match = True

            self.result = (

                "Match found between parasite and host sequence. Start codon was: "

                + str(self.start_host_codon)

            )

            return self.result

        parasite_triplets = list(self.parasite_tuplelist[self.cursor])

        host_duodons = self.generate_duodons()

        while True:

            try:

                parasite_triplet = parasite_triplets[0]

            except:

                self.result = "Sequences don't match. Start codon was: " + str(

                    self.start_host_codon

                )

                return self.result

            else:

                host_duodons_for_parasite_triplet = host_duodons[:]

                host_duodons_for_parasite_triplet = self.return_all_matching_duodons(

                    parasite_triplet,

                    host_duodons_for_parasite_triplet,

                    frameshift=self.frameshift,

                )

                self.parasite_path.append(parasite_triplet)

                while True:

                    try:

                        host_doudon = host_duodons_for_parasite_triplet[0]

                    except:

                        self.parasite_path.pop()

                        del parasite_triplets[0]

                        break

                    else:

                        if self.compare_triplet_and_duodon(

                            parasite_triplet, host_doudon, frameshift=self.frameshift

                        ):

                            self.host_path.append(host_doudon)

                            self.cursor += 1

                            return "Codon matched, cursor advanced"

                        else:

                            del host_duodons_for_parasite_triplet[0]

Variables

aa_to_codon_extended
codon_extended_to_aa
codon_to_aa
complement_table

Functions

compare_sequence_tuplelists

def compare_sequence_tuplelists(
    parasite_tuplelist,
    host_tuplelist,
    frameshift
)

Compare two sequence's tuplists for given frame and return list of matches.

View Source
def compare_sequence_tuplelists(parasite_tuplelist, host_tuplelist, frameshift):

    """Compare two sequence's tuplists for given frame and return list of matches."""

    len_parasite = len(parasite_tuplelist)

    len_host = len(host_tuplelist)

    list_of_matches = []

    for start_host_codon in range(0, len_host - len_parasite + 1):

        seqstep = SeqStep(

            host_tuplelist,

            parasite_tuplelist,

            start_host_codon=start_host_codon,

            frameshift=frameshift,

        )

        walk_seqstep(seqstep)

        if seqstep.match:

            list_of_matches.append(start_host_codon)

    return list_of_matches

compare_sequence_tuplelists_in_all_frames

def compare_sequence_tuplelists_in_all_frames(
    parasite_tuplelist,
    host_tuplelist,
    prefix=''
)

Compare two sequence's tuplists for all frames and return dict of matches.

View Source
def compare_sequence_tuplelists_in_all_frames(

    parasite_tuplelist, host_tuplelist, prefix=""

):

    """Compare two sequence's tuplists for all frames and return dict of matches."""

    results_for_all_frames = dict()

    for frameshift in [0, 1, 2]:

        result = compare_sequence_tuplelists(

            parasite_tuplelist, host_tuplelist, frameshift

        )

        key = prefix + str(frameshift)

        results_for_all_frames[key] = result

    return results_for_all_frames

compare_then_get_letter_recursively

def compare_then_get_letter_recursively(
    host_tuplelist,
    parasite_tuplelist,
    host_letter,
    parasite_letter
)

Compare two letters then get next pair of letters recursively.

Returns string for match or no match between the sequences.

View Source
def compare_then_get_letter_recursively(

    host_tuplelist, parasite_tuplelist, host_letter, parasite_letter

):

    """Compare two letters then get next pair of letters recursively.

    Returns string for match or no match between the sequences.

    """

    # This function works only for in-frame comparisons and is given the first

    # letters of the host and parasite sequences. If the two letters match,

    # then gets the next parasite and host letters, and calls itself.

    # If can't get next parasite letter, then the comparison has finished

    # and a full match has been found.

    # If letters do not match, then gets the next parasite triplet and calls

    # itself; if there are no more parasite triplets, it gets the next host

    # triplet and calls itself. If there are no more host triplets, then there is

    # no match between the sequences.

    is_match = compare_letters(host_letter[0], parasite_letter[0])

    if is_match:

        try:

            next_parasite_letter = get_next_letter(

                parasite_tuplelist, parasite_letter[1]

            )

        except:

            return str(

                "Finished parasite sequence, match found! Ending host position:"

                + str(host_letter[1])

            )

        else:

            next_host_letter = get_next_letter(

                host_tuplelist, host_letter[1]

            )  # always OK

            return compare_then_get_letter_recursively(

                host_tuplelist,

                parasite_tuplelist,

                next_host_letter,

                next_parasite_letter,

            )

    else:  # letters do not match, move on to next parasite triplet

        try:

            next_parasite_letter = get_letter_in_next_triplet(

                parasite_tuplelist, parasite_letter[1]

            )

        except:  # no more parasite triplet, get next host

            try:

                next_host_letter = get_letter_in_next_triplet(

                    host_tuplelist, host_letter[1]

                )

            except:

                return "No match for this starting codon position"

            else:

                i = parasite_letter[1][0]  # same codon

                j, k = 0, 0  # reset parasite to first triplet, first letter

                next_parasite_letter = (parasite_tuplelist[i][j][k], [i, j, k])

                return compare_then_get_letter_recursively(

                    host_tuplelist,

                    parasite_tuplelist,

                    next_host_letter,

                    next_parasite_letter,

                )

        else:

            i, j = host_letter[1][0], host_letter[1][1]  # same codon, same triplet

            k = 0  # reset host letter to beginning of triplet because parasite was reset

            next_host_letter = (host_tuplelist[i][j][k], [i, j, k])

            return compare_then_get_letter_recursively(

                host_tuplelist,

                parasite_tuplelist,

                next_host_letter,

                next_parasite_letter,

            )

convert_codonlist_to_tuplelist

def convert_codonlist_to_tuplelist(
    seq_codons,
    codon_to_codon_extended
)

Convert a list of triplets into a list of tuples, using a swaptable.

The swaptable is a dict of triplet: triplets, and determines the allowed swaps.

View Source
def convert_codonlist_to_tuplelist(seq_codons, codon_to_codon_extended):

    """Convert a list of triplets into a list of tuples, using a swaptable.

    The swaptable is a dict of triplet: triplets, and determines the

    allowed swaps.

    """

    codon_extended = [None] * len(seq_codons)

    for i, codon in enumerate(seq_codons):

        codon_extended[i] = codon_to_codon_extended[codon]

    return codon_extended

convert_seq_to_codons

def convert_seq_to_codons(
    seq
)

Convert a string (sequence) into a list of 3-letter strings (triplets).

View Source
def convert_seq_to_codons(seq):

    """Convert a string (sequence) into a list of 3-letter strings (triplets)."""

    seq_codons = [seq[i : i + 3] for i in range(0, len(seq), 3)]

    return seq_codons

find_partial_overlaps

def find_partial_overlaps(
    host,
    parasite,
    swaptable,
    verbose=True
)
View Source
def find_partial_overlaps(host, parasite, swaptable, verbose=True):

    flank = len(parasite) * "N"

    host_flank = flank + host + flank

    swaptable["NNN"] = ("NNN",)

    result = make_genealloy(host_flank, parasite, swaptable, verbose=True)

    return result

get_complement_tuplelist

def get_complement_tuplelist(
    codon_tuplelist
)

Get complement triplets of a sequence tuplelist.

View Source
def get_complement_tuplelist(codon_tuplelist):

    """Get complement triplets of a sequence tuplelist."""

    complement_tuplelist = []

    for index, codon in enumerate(codon_tuplelist):

        complement_tripletlist = []

        for triplet in codon:

            letter1 = triplet[0]

            letter2 = triplet[1]

            letter3 = triplet[2]

            complement_triplet = (

                complement_table[letter1]

                + complement_table[letter2]

                + complement_table[letter3]

            )

            complement_tripletlist.append(complement_triplet)

        complement_codon = tuple(complement_tripletlist)

        complement_tuplelist.append(complement_codon)

    return complement_tuplelist

get_letter_in_next_triplet

def get_letter_in_next_triplet(
    sequence_tuplelist,
    current_letter_index
)

Get first letter in the next triplet.

View Source
def get_letter_in_next_triplet(sequence_tuplelist, current_letter_index):

    """Get first letter in the next triplet."""

    letter_index = current_letter_index

    letter_index[1] = letter_index[1] + 1  # triplet position

    letter_index[2] = 0  # move letter position to triplet's first letter

    try:

        i, j, k = letter_index[0], letter_index[1], letter_index[2]

        letter = (sequence_tuplelist[i][j][k], letter_index)

        return letter

    except:

        raise

get_next_letter

def get_next_letter(
    sequence_tuplelist,
    current_letter_index
)

Get next letter in the sequence.

View Source
def get_next_letter(sequence_tuplelist, current_letter_index):

    """Get next letter in the sequence."""

    letter_index = current_letter_index

    if current_letter_index[2] == 2:

        # last letter of triplet, advance to next codon:

        letter_index = [letter_index[0] + 1, 0, 0]

        try:

            i, j, k = letter_index[0], letter_index[1], letter_index[2]

            letter = (sequence_tuplelist[i][j][k], letter_index)

            return letter

        except:

            raise

    else:

        # letter is not the last in triplet, get the next one:

        letter_index[2] = letter_index[2] + 1

        i, j, k = letter_index[0], letter_index[1], letter_index[2]

        letter = (sequence_tuplelist[i][j][k], letter_index)

        return letter

get_reverse_complement_tuplelist

def get_reverse_complement_tuplelist(
    codon_tuplelist
)

Get reverse complement of a sequence's tuplelist.

View Source
def get_reverse_complement_tuplelist(codon_tuplelist):

    """Get reverse complement of a sequence's tuplelist."""

    complement_tuplelist = get_complement_tuplelist(codon_tuplelist)

    reverse_complement_tuplelist = get_reverse_tuplelist(complement_tuplelist)

    return reverse_complement_tuplelist

get_reverse_tuplelist

def get_reverse_tuplelist(
    codon_tuplelist
)

Get reverse of a tuplelist with reversed triplets.

View Source
def get_reverse_tuplelist(codon_tuplelist):

    """Get reverse of a tuplelist with reversed triplets."""

    reverse_tuplelist = []

    for codon in reversed(codon_tuplelist):

        reverse_tripletlist = []

        for triplet in codon:

            reverse_triplet = triplet[::-1]

            reverse_tripletlist.append(reverse_triplet)

        reverse_codon = tuple(reverse_tripletlist)

        reverse_tuplelist.append(reverse_codon)

    return reverse_tuplelist

make_genealloy

def make_genealloy(
    host,
    parasite,
    swaptable,
    verbose=True
)

Compare two sequence strings and return dictionary of matches.

View Source
def make_genealloy(host, parasite, swaptable, verbose=True):

    """Compare two sequence strings and return dictionary of matches."""

    host_codons = convert_seq_to_codons(host)

    host_tuplelist = convert_codonlist_to_tuplelist(host_codons, swaptable)

    parasite_codons = convert_seq_to_codons(parasite)

    parasite_tuplelist = convert_codonlist_to_tuplelist(parasite_codons, swaptable)

    forward_results = compare_sequence_tuplelists_in_all_frames(

        parasite_tuplelist, host_tuplelist, prefix="f_"

    )

    reverse_complement_tuplelist = get_reverse_complement_tuplelist(host_tuplelist)

    reverse_complement_results = compare_sequence_tuplelists_in_all_frames(

        parasite_tuplelist, reverse_complement_tuplelist, prefix="rc_"

    )

    result = forward_results.copy()

    result.update(reverse_complement_results)

    if verbose:

        if all(value == [] for value in result.values()):

            print("These sequences cannot be mixed")

        else:

            print("A genealloy can be made using these sequences!")

    return result

walk_seqstep

def walk_seqstep(
    seqstep
)

Compare two sequences by calling advance_step until it returns the result.

View Source
def walk_seqstep(seqstep):

    """Compare two sequences by calling `advance_step` until it returns the result."""

    while not seqstep.result:

        seqstep.advance_step()

Classes

Duodon

class Duodon(
    first_triplet,
    second_triplet
)

Class for storing two triplets

View Source
class Duodon:

    """Class for storing two triplets"""

    def __init__(self, first_triplet, second_triplet):

        self.first_triplet = first_triplet

        self.second_triplet = second_triplet

SeqStep

class SeqStep(
    host_tuplelist,
    parasite_tuplelist,
    frameshift=0,
    start_host_codon=0
)

Class for keeping track of sequence comparison

It stores a method that aligns a parasite triplet with two consecutive host triplets (duodons), a cursor that marks the position of the comparison process, and methods for generating duodons and comparing them with triplets. The advance_step() method attempts to advance the comparison by one codon step. It can (i) advance the cursor or (ii) conclude that there is no match between the sequences, or (iii) conclude that there is a match.

Parameters

host_tuplelist A list of tuples. Each tuple stores the allowed triplets for a codon position of the host sequence.

parasite_tuplelist A list of tuples. Each tuple stores the allowed triplets for a codon position of the parasite sequence.

frameshift An integer (0, 1 or 2) denoting the frameshift between host and parasite.

start_host_codon The host codon position from which the comparison should start.

View Source
class SeqStep:

    """Class for keeping track of sequence comparison

    It stores a method that aligns a parasite triplet with two consecutive host

    triplets (duodons), a cursor that marks the position of the comparison process,

    and methods for generating duodons and comparing them with triplets.

    The `advance_step()` method attempts to advance the comparison by one codon

    step. It can (i) advance the cursor or (ii) conclude that there is no match

    between the sequences, or (iii) conclude that there is a match.

    Parameters

    ----------

    host_tuplelist

      A list of tuples. Each tuple stores the allowed triplets for a codon

      position of the host sequence.

    parasite_tuplelist

      A list of tuples. Each tuple stores the allowed triplets for a codon

      position of the parasite sequence.

    frameshift

      An integer (0, 1 or 2) denoting the frameshift between host and parasite.

    start_host_codon

      The host codon position from which the comparison should start.

    """

    def __init__(

        self, host_tuplelist, parasite_tuplelist, frameshift=0, start_host_codon=0

    ):

        self.host_tuplelist = host_tuplelist

        self.parasite_tuplelist = parasite_tuplelist

        self.frameshift = frameshift

        self.start_host_codon = start_host_codon

        self.cursor = 0

        self.len_parasite = len(parasite_tuplelist)

        self.len_host = len(host_tuplelist)

        self.parasite_path = []

        self.host_path = []

        self.match = False

        self.result = None

    def generate_duodons(self):

        self.cursor

        self.start_host_codon

        duodons = []

        host_codon = self.start_host_codon + self.cursor

        if self.host_path == []:  # first time it's made

            for triplet_1 in self.host_tuplelist[host_codon]:

                for triplet_2 in self.host_tuplelist[host_codon + 1]:

                    duodons.append(Duodon(triplet_1, triplet_2))

        else:

            triplet_1 = self.host_path[-1].second_triplet  # of last used duodon

            try:

                for triplet_2 in self.host_tuplelist[host_codon + 1]:

                    duodons.append(Duodon(triplet_1, triplet_2))

            except:

                duodons.append(Duodon(triplet_1, "NNN"))

        return duodons

    def return_all_matching_duodons(self, triplet, duodons, frameshift=0):

        matching_duodons = []

        for duodon in duodons:

            host_letters = duodon.first_triplet + duodon.second_triplet

            # compare 1st letter:

            if not compare_letters(host_letters[0 + frameshift], triplet[0]):

                continue

            if not compare_letters(host_letters[1 + frameshift], triplet[1]):

                continue

            if not compare_letters(host_letters[2 + frameshift], triplet[2]):

                continue

            matching_duodons.append(duodon)

        return matching_duodons

    def compare_triplet_and_duodon(self, triplet, duodon, frameshift=0):

        host_letters = duodon.first_triplet + duodon.second_triplet

        if not compare_letters(host_letters[0 + frameshift], triplet[0]):

            return False

        if not compare_letters(host_letters[1 + frameshift], triplet[1]):

            return False

        if not compare_letters(host_letters[2 + frameshift], triplet[2]):

            return False

        return True

    def advance_step(self):

        if self.result:

            return self.result

        if self.cursor == self.len_parasite:

            self.match = True

            self.result = (

                "Match found between parasite and host sequence. Start codon was: "

                + str(self.start_host_codon)

            )

            return self.result

        parasite_triplets = list(self.parasite_tuplelist[self.cursor])

        host_duodons = self.generate_duodons()

        while True:

            try:

                parasite_triplet = parasite_triplets[0]

            except:

                self.result = "Sequences don't match. Start codon was: " + str(

                    self.start_host_codon

                )

                return self.result

            else:

                host_duodons_for_parasite_triplet = host_duodons[:]

                host_duodons_for_parasite_triplet = self.return_all_matching_duodons(

                    parasite_triplet,

                    host_duodons_for_parasite_triplet,

                    frameshift=self.frameshift,

                )

                self.parasite_path.append(parasite_triplet)

                while True:

                    try:

                        host_doudon = host_duodons_for_parasite_triplet[0]

                    except:

                        self.parasite_path.pop()

                        del parasite_triplets[0]

                        break

                    else:

                        if self.compare_triplet_and_duodon(

                            parasite_triplet, host_doudon, frameshift=self.frameshift

                        ):

                            self.host_path.append(host_doudon)

                            self.cursor += 1

                            return "Codon matched, cursor advanced"

                        else:

                            del host_duodons_for_parasite_triplet[0]

Methods

advance_step
def advance_step(
    self
)
View Source
    def advance_step(self):

        if self.result:

            return self.result

        if self.cursor == self.len_parasite:

            self.match = True

            self.result = (

                "Match found between parasite and host sequence. Start codon was: "

                + str(self.start_host_codon)

            )

            return self.result

        parasite_triplets = list(self.parasite_tuplelist[self.cursor])

        host_duodons = self.generate_duodons()

        while True:

            try:

                parasite_triplet = parasite_triplets[0]

            except:

                self.result = "Sequences don't match. Start codon was: " + str(

                    self.start_host_codon

                )

                return self.result

            else:

                host_duodons_for_parasite_triplet = host_duodons[:]

                host_duodons_for_parasite_triplet = self.return_all_matching_duodons(

                    parasite_triplet,

                    host_duodons_for_parasite_triplet,

                    frameshift=self.frameshift,

                )

                self.parasite_path.append(parasite_triplet)

                while True:

                    try:

                        host_doudon = host_duodons_for_parasite_triplet[0]

                    except:

                        self.parasite_path.pop()

                        del parasite_triplets[0]

                        break

                    else:

                        if self.compare_triplet_and_duodon(

                            parasite_triplet, host_doudon, frameshift=self.frameshift

                        ):

                            self.host_path.append(host_doudon)

                            self.cursor += 1

                            return "Codon matched, cursor advanced"

                        else:

                            del host_duodons_for_parasite_triplet[0]
compare_triplet_and_duodon
def compare_triplet_and_duodon(
    self,
    triplet,
    duodon,
    frameshift=0
)
View Source
    def compare_triplet_and_duodon(self, triplet, duodon, frameshift=0):

        host_letters = duodon.first_triplet + duodon.second_triplet

        if not compare_letters(host_letters[0 + frameshift], triplet[0]):

            return False

        if not compare_letters(host_letters[1 + frameshift], triplet[1]):

            return False

        if not compare_letters(host_letters[2 + frameshift], triplet[2]):

            return False

        return True
generate_duodons
def generate_duodons(
    self
)
View Source
    def generate_duodons(self):

        self.cursor

        self.start_host_codon

        duodons = []

        host_codon = self.start_host_codon + self.cursor

        if self.host_path == []:  # first time it's made

            for triplet_1 in self.host_tuplelist[host_codon]:

                for triplet_2 in self.host_tuplelist[host_codon + 1]:

                    duodons.append(Duodon(triplet_1, triplet_2))

        else:

            triplet_1 = self.host_path[-1].second_triplet  # of last used duodon

            try:

                for triplet_2 in self.host_tuplelist[host_codon + 1]:

                    duodons.append(Duodon(triplet_1, triplet_2))

            except:

                duodons.append(Duodon(triplet_1, "NNN"))

        return duodons
return_all_matching_duodons
def return_all_matching_duodons(
    self,
    triplet,
    duodons,
    frameshift=0
)
View Source
    def return_all_matching_duodons(self, triplet, duodons, frameshift=0):

        matching_duodons = []

        for duodon in duodons:

            host_letters = duodon.first_triplet + duodon.second_triplet

            # compare 1st letter:

            if not compare_letters(host_letters[0 + frameshift], triplet[0]):

                continue

            if not compare_letters(host_letters[1 + frameshift], triplet[1]):

                continue

            if not compare_letters(host_letters[2 + frameshift], triplet[2]):

                continue

            matching_duodons.append(duodon)

        return matching_duodons