Skip to content

Module crecombio.crecombio

View Source
import re

from Bio.Seq import Seq

SITES = {

    "Flp": {

        "seq": Seq("GAAGTTCCTATTCtctagaaaGtATAGGAACTTC".upper()),

        "5p": Seq("GAAGTTCCTATTC"),

        "3p": Seq("tctagaaaGtATAGGAACTTC".upper()),

    },

    "Cre": {"seq": Seq("ATAACTTCGTATAATGTATGCTATACGAAGTTAT")},

    "Cre_lox_511": {"seq": Seq("ATAACTTCGTATAATGTATACTATACGAAGTTAT")},

    "Cre_lox_5171": {"seq": Seq("ATAACTTCGTATAATGTGTACTATACGAAGTTAT")},

    "Cre_lox_2272": {"seq": Seq("ATAACTTCGTATAAAGTATCCTATACGAAGTTAT")},

    "Cre_M2": {"seq": Seq("ATAACTTCGTATAAGAAACCATATACGAAGTTAT")},

    "Cre_M3": {"seq": Seq("ATAACTTCGTATATAATACCATATACGAAGTTAT")},

    "Cre_M7": {"seq": Seq("ATAACTTCGTATAAGATAGAATATACGAAGTTAT")},

    "Cre_M11": {"seq": Seq("ATAACTTCGTATACGATACCATATACGAAGTTAT")},

}

def recombine(sequences, recombinase="Flp"):

    """Simulate recombination and return sequences.

    **Parameters**

    **sequences**

    > List of one or two Biopython SeqRecord objects (`list`).

    **recombinases**

    > Recombinase name (`str`). Default Flp.

    """

    if len(sequences) == 1:

        return recombine_one_sequence(sequences, recombinase)

    if len(sequences) == 2:

        return recombine_two_sequences(sequences, recombinase)

    else:

        raise Exception("Only 1- or 2-sequence recombination is supported")

def recombine_one_sequence(sequences, recombinase="Flp"):

    """Simulate recombination of one sequence."""

    recombined_sequences = []

    for sequence in sequences:

        site = SITES[recombinase]["seq"]

        matches, rc_matches = count_number_of_sites(sequence, site)

        if (len(matches) + len(rc_matches)) != 2:

            raise Exception("The sequence must contain 2 recombinase sites")

        # Excision

        if len(matches) == 2:

            new_sequence = sequence[: matches[0]] + sequence[matches[1] :]

            recombined_sequences += [new_sequence]

        # Excision on reverse strand

        if len(rc_matches) == 2:

            new_sequence = sequence[: matches[0]] + sequence[matches[1] :]

            recombined_sequences += [new_sequence]

        # Inversion

        if len(matches) == 1 and len(rc_matches) == 1:

            if matches[0] > rc_matches[0]:

                raise Exception(

                    "Orientation incorrect: reverse complement of recombination site is"

                    " before the site."

                )

                # extend to circular sequences later

            inside_sequence = sequence[

                (matches[0] + len(site)) : rc_matches[0]

            ]  # added length to not include the site

            inverted_sequence = inside_sequence.reverse_complement()

            left_part = sequence[: matches[0] + len(site)]

            right_part = sequence[rc_matches[0] :]

            new_sequence = left_part + inverted_sequence + right_part

            recombined_sequences += [new_sequence]

    return recombined_sequences

def recombine_two_sequences(sequences, recombinase="Flp"):

    """Simulate recombination of two sequences."""

    seq0 = sequences[0]

    seq1 = sequences[1]

    site = SITES[recombinase]["seq"]

    seq0_matches, seq0_rc_matches = count_number_of_sites(seq0, site)

    seq1_matches, seq1_rc_matches = count_number_of_sites(seq1, site)

    if len(seq0_rc_matches) != 0 or len(seq1_rc_matches) != 0:

        print(

            "Simulating recombination of reverse complement sites are not implemented "

            "yet and these sites are ignored"

        )

    recombined_sequences = []

    if len(seq0_matches) == 1 and len(seq1_matches) == 1:

        # Translocate

        seq0_left_part = seq0[: seq0_matches[0]]

        seq0_right_part = seq0[seq0_matches[0] :]  # includes the site

        seq1_left_part = seq1[: seq1_matches[0]]

        seq1_right_part = seq1[seq1_matches[0] :]  # includes the site

        seq2 = seq0_left_part + seq1_right_part

        seq3 = seq1_left_part + seq0_right_part

        recombined_sequences += [seq2, seq3]

    elif len(seq0_matches) == 1 and len(seq1_matches) == 2:

        # Insert from seq1 to seq0

        donor = seq1

        acceptor = seq0

        donor_matches = seq1_matches

        acceptor_matches = seq0_matches

        new_donor = donor[: donor_matches[0]] + donor[donor_matches[1] :]

        excised_seq = donor[donor_matches[0] : donor_matches[1]]

        new_acceptor = (

            acceptor[: acceptor_matches[0]]

            + excised_seq

            + acceptor[acceptor_matches[0] :]

        )

        recombined_sequences += [new_acceptor, new_donor]

    elif len(seq0_matches) == 2 and len(seq1_matches) == 1:

        # Insert from seq0 to seq1

        donor = seq0

        acceptor = seq1

        donor_matches = seq0_matches

        acceptor_matches = seq1_matches

        # Same code as above:

        new_donor = donor[: donor_matches[0]] + donor[donor_matches[1] :]

        excised_seq = donor[donor_matches[0] : donor_matches[1]]

        new_acceptor = (

            acceptor[: acceptor_matches[0]]

            + excised_seq

            + acceptor[acceptor_matches[0] :]

        )

        recombined_sequences += [new_acceptor, new_donor]

    elif len(seq0_matches) == 2 and len(seq1_matches) == 2:

        raise Exception("Both sequences have 2 recombination sites")

    else:

        raise Exception(

            "The number of recombination sites must be 1 or 2 in each sequence"

        )

    return recombined_sequences

def count_number_of_sites(sequence, site):

    """Count the number of recombination sites in a sequence.

    **Parameters**

    **sequence**

    > A Biopython SeqRecord object.

    **site**

    > The site sequence (`str`).

    """

    matches = [

        m.start() for m in re.finditer(re.escape(str(site)), str(sequence.seq).upper())

    ]

    rc_matches = [

        m.start()

        for m in re.finditer(

            re.escape(str(site.reverse_complement())), str(sequence.seq).upper()

        )

    ]

    return matches, rc_matches

Variables

SITES

Functions

count_number_of_sites

def count_number_of_sites(
    sequence,
    site
)

Count the number of recombination sites in a sequence.

Parameters

sequence

A Biopython SeqRecord object.

site

The site sequence (str).

View Source
def count_number_of_sites(sequence, site):

    """Count the number of recombination sites in a sequence.

    **Parameters**

    **sequence**

    > A Biopython SeqRecord object.

    **site**

    > The site sequence (`str`).

    """

    matches = [

        m.start() for m in re.finditer(re.escape(str(site)), str(sequence.seq).upper())

    ]

    rc_matches = [

        m.start()

        for m in re.finditer(

            re.escape(str(site.reverse_complement())), str(sequence.seq).upper()

        )

    ]

    return matches, rc_matches

recombine

def recombine(
    sequences,
    recombinase='Flp'
)

Simulate recombination and return sequences.

Parameters

sequences

List of one or two Biopython SeqRecord objects (list).

recombinases

Recombinase name (str). Default Flp.

View Source
def recombine(sequences, recombinase="Flp"):

    """Simulate recombination and return sequences.

    **Parameters**

    **sequences**

    > List of one or two Biopython SeqRecord objects (`list`).

    **recombinases**

    > Recombinase name (`str`). Default Flp.

    """

    if len(sequences) == 1:

        return recombine_one_sequence(sequences, recombinase)

    if len(sequences) == 2:

        return recombine_two_sequences(sequences, recombinase)

    else:

        raise Exception("Only 1- or 2-sequence recombination is supported")

recombine_one_sequence

def recombine_one_sequence(
    sequences,
    recombinase='Flp'
)

Simulate recombination of one sequence.

View Source
def recombine_one_sequence(sequences, recombinase="Flp"):

    """Simulate recombination of one sequence."""

    recombined_sequences = []

    for sequence in sequences:

        site = SITES[recombinase]["seq"]

        matches, rc_matches = count_number_of_sites(sequence, site)

        if (len(matches) + len(rc_matches)) != 2:

            raise Exception("The sequence must contain 2 recombinase sites")

        # Excision

        if len(matches) == 2:

            new_sequence = sequence[: matches[0]] + sequence[matches[1] :]

            recombined_sequences += [new_sequence]

        # Excision on reverse strand

        if len(rc_matches) == 2:

            new_sequence = sequence[: matches[0]] + sequence[matches[1] :]

            recombined_sequences += [new_sequence]

        # Inversion

        if len(matches) == 1 and len(rc_matches) == 1:

            if matches[0] > rc_matches[0]:

                raise Exception(

                    "Orientation incorrect: reverse complement of recombination site is"

                    " before the site."

                )

                # extend to circular sequences later

            inside_sequence = sequence[

                (matches[0] + len(site)) : rc_matches[0]

            ]  # added length to not include the site

            inverted_sequence = inside_sequence.reverse_complement()

            left_part = sequence[: matches[0] + len(site)]

            right_part = sequence[rc_matches[0] :]

            new_sequence = left_part + inverted_sequence + right_part

            recombined_sequences += [new_sequence]

    return recombined_sequences

recombine_two_sequences

def recombine_two_sequences(
    sequences,
    recombinase='Flp'
)

Simulate recombination of two sequences.

View Source
def recombine_two_sequences(sequences, recombinase="Flp"):

    """Simulate recombination of two sequences."""

    seq0 = sequences[0]

    seq1 = sequences[1]

    site = SITES[recombinase]["seq"]

    seq0_matches, seq0_rc_matches = count_number_of_sites(seq0, site)

    seq1_matches, seq1_rc_matches = count_number_of_sites(seq1, site)

    if len(seq0_rc_matches) != 0 or len(seq1_rc_matches) != 0:

        print(

            "Simulating recombination of reverse complement sites are not implemented "

            "yet and these sites are ignored"

        )

    recombined_sequences = []

    if len(seq0_matches) == 1 and len(seq1_matches) == 1:

        # Translocate

        seq0_left_part = seq0[: seq0_matches[0]]

        seq0_right_part = seq0[seq0_matches[0] :]  # includes the site

        seq1_left_part = seq1[: seq1_matches[0]]

        seq1_right_part = seq1[seq1_matches[0] :]  # includes the site

        seq2 = seq0_left_part + seq1_right_part

        seq3 = seq1_left_part + seq0_right_part

        recombined_sequences += [seq2, seq3]

    elif len(seq0_matches) == 1 and len(seq1_matches) == 2:

        # Insert from seq1 to seq0

        donor = seq1

        acceptor = seq0

        donor_matches = seq1_matches

        acceptor_matches = seq0_matches

        new_donor = donor[: donor_matches[0]] + donor[donor_matches[1] :]

        excised_seq = donor[donor_matches[0] : donor_matches[1]]

        new_acceptor = (

            acceptor[: acceptor_matches[0]]

            + excised_seq

            + acceptor[acceptor_matches[0] :]

        )

        recombined_sequences += [new_acceptor, new_donor]

    elif len(seq0_matches) == 2 and len(seq1_matches) == 1:

        # Insert from seq0 to seq1

        donor = seq0

        acceptor = seq1

        donor_matches = seq0_matches

        acceptor_matches = seq1_matches

        # Same code as above:

        new_donor = donor[: donor_matches[0]] + donor[donor_matches[1] :]

        excised_seq = donor[donor_matches[0] : donor_matches[1]]

        new_acceptor = (

            acceptor[: acceptor_matches[0]]

            + excised_seq

            + acceptor[acceptor_matches[0] :]

        )

        recombined_sequences += [new_acceptor, new_donor]

    elif len(seq0_matches) == 2 and len(seq1_matches) == 2:

        raise Exception("Both sequences have 2 recombination sites")

    else:

        raise Exception(

            "The number of recombination sites must be 1 or 2 in each sequence"

        )

    return recombined_sequences