Module plasmid_assessor.Assessment

View Source

import re

import matplotlib.pyplot as plt

import Bio

import Bio.Restriction

from Bio.Seq import Seq

from Bio.SeqFeature import SeqFeature, FeatureLocation

import dnacauldron as dc

try:

    from dna_features_viewer import BiopythonTranslator

except ImportError:

    class AssessmentTranslator:

        """Please install dna_features_viewer to use this class."""

        def __init__(self):

            raise Exception("Please install dna_features_viewer to use this class.")

else:

    class AssessmentTranslator(BiopythonTranslator):

        """Custom translator for highlighting key features."""

        def compute_feature_color(self, feature):

            assessment_ref = "plasmid_assessment"

            if assessment_ref in feature.qualifiers:

                if feature.qualifiers[assessment_ref] == "enzyme":

                    return "red"

                elif feature.qualifiers[assessment_ref] == "excised":

                    return "yellow"

                elif feature.qualifiers[assessment_ref] == "backbone":

                    return "tab:cyan"

                else:

                    return "tab:blue"  # default dna_features_viewer color

            else:

                return "tab:blue"

class Assessment:

    """The plasmid assessment class.

    **Parameters**

    **record**

    > A Biopython `SeqRecord`.

    **enzyme**

    > A restriction enzyme (`str`). A Biopython `RestrictionType` will be looked

    up using the string.

    """

    UNKNOWN_IDS = [

        "None",

        "",

        "<unknown id>",

        ".",

        "EXPORTED",

        "<unknown name>",

        "Exported",

    ]

    def __init__(self, record, enzyme):

        self.record = record

        self.enzyme = Bio.Restriction.__dict__[enzyme]

        self.enzyme_name = str(self.enzyme)

        self.results = {}

    def assess_plasmid(self, other_enzymes=None):

        """Evaluate plasmid for Golden Gate.

        **Parameters**

        **other_enzymes**

        > List of enzymes used in higher level assemblies (`list`).

        """

        if other_enzymes:

            self.other_enzymes = ", ".join([str(enz) for enz in other_enzymes])

        self.add_name()

        self.check_circularity()

        self.get_number_of_sites()

        self.evaluate_orientation()

        self.digest_plasmid()

        self.count_other_sites(other_enzymes)

        self.check_enzyme_site_locations()

        self.sum_results()

        self.plot_plasmid()

    def add_name(self):

        """Set a name for the assessment."""

        # To display on the report:

        if str(self.record.id).strip() in self.UNKNOWN_IDS:

            self.name = "Unnamed plasmid"

        else:

            if len(self.record.id) > 16:  # Genbank limit, also for width in report

                self.name = self.record.id[:16] + "..."

            else:

                self.name = self.record.id

    def check_circularity(self):

        if "topology" not in self.record.annotations:

            self.results["is_circular"] = False

        elif self.record.annotations["topology"] == "circular":

            self.results["is_circular"] = True

        else:

            self.results["is_circular"] = False

    def get_number_of_sites(self):

        if "is_circular" in self.results:

            is_linear = not self.results["is_circular"]

        else:

            is_linear = False

        restriction_batch = Bio.Restriction.RestrictionBatch([self.enzyme])

        analysis = Bio.Restriction.Analysis(

            restriction_batch, sequence=self.record.seq, linear=is_linear

        )

        self.analysis_results = analysis.full(linear=is_linear)

        self.results["number_of_sites"] = len(self.analysis_results[self.enzyme])

        # Add as features for plot in report:

        for enzyme, sites in self.analysis_results.items():

            for site in sites:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(site, site + 1),

                        id=str(enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": str(enzyme),

                            "plasmid_assessment": "enzyme",

                        },

                    )

                )

    def evaluate_orientation(self):

        self.results["is_site_orientation_correct"] = False  # default

        # Forward strand:

        self.iter_forward = [

            match.end() for match in re.finditer(self.enzyme.site, str(self.record.seq))

        ]

        if sum(1 for _ in self.iter_forward) == 1:

            self.forward_enzyme = self.iter_forward[0]

            # rev_complement_site = str(self.record.seq.reverse_complement())

            rev_complement_site = str(Seq(self.enzyme.site).reverse_complement())

            self.iter_reverse = [

                m.start()

                for m in re.finditer(rev_complement_site, str(self.record.seq))

            ]

            if sum(1 for _ in self.iter_reverse) == 1:  # 1 site in both strands:

                self.results["is_site_orientation_correct"] = True

                self.reverse_enzyme = self.iter_reverse[0]

        if self.results["is_site_orientation_correct"]:

            if self.reverse_enzyme < self.forward_enzyme:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(

                            self.reverse_enzyme - 1, self.forward_enzyme + 1

                        ),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

            else:  # put annotation together from two pieces:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(0, self.forward_enzyme + 1),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(self.reverse_enzyme - 1, len(self.record)),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

    def digest_plasmid(self):

        # Obtain fragments and get the backbone's overhangs.

        # This method has two assumptions:

        # - the sequence has two, correctly oriented enzyme sites.

        # - the sequence is circular.

        # Therefore there will be exactly two fragments, with one containing both sites.

        self.results["digest"] = {}

        if not self.results["is_circular"]:

            return

        if not self.results["is_site_orientation_correct"]:

            return

        record_fragments = dc.StickyEndFragment.list_from_record_digestion(

            record=self.record, enzyme=self.enzyme, linear=False

        )

        if self.enzyme.site in record_fragments[0].to_standard_string():

            backbone_index = 1  # there are only two fragments

            excise_index = 0

        else:

            backbone_index = 0

            excise_index = 1  # reversed

        self.results["digest"]["backbone_seq"] = record_fragments[backbone_index]

        self.results["digest"]["excised_seq"] = record_fragments[excise_index]

        self.results["digest"]["first_overhang"] = str(

            record_fragments[excise_index].seq.left_end

        )

        self.results["digest"]["last_overhang"] = str(

            record_fragments[excise_index].seq.right_end

        )

    def count_other_sites(self, other_enzymes):

        self.results["other_sites"] = {}

        self.results["other_sites"]["has_any_other_sites"] = False

        if other_enzymes is None:

            return

        bio_enzymes = [Bio.Restriction.__dict__[enzyme] for enzyme in other_enzymes]

        restriction_batch = Bio.Restriction.RestrictionBatch(bio_enzymes)

        # Work with the assumption that the sequence is circular:

        analysis = Bio.Restriction.Analysis(

            restriction_batch, sequence=self.record.seq, linear=False

        )

        self.results["other_sites"]["enzyme"] = analysis.full(linear=False)

        for enzyme, matches in self.results["other_sites"]["enzyme"].items():

            if len(matches) != 0:

                self.results["other_sites"]["has_any_other_sites"] = True

                # Also add as features for plot in report:

                for site in matches:

                    self.record.features.append(

                        SeqFeature(

                            FeatureLocation(site, site + 1),

                            id=str(enzyme),

                            type="misc_feature",

                            qualifiers={

                                "label": str(enzyme),

                                "plasmid_assessment": "enzyme",

                            },

                        )

                    )

    def check_enzyme_site_locations(self):

        """Flag enzyme sites that are within the retained backbone."""

        try:

            self.results["other_sites"]["has_any_other_sites"]

            self.results["is_site_orientation_correct"]

        except KeyError:

            print("Run assessment methods first!")

        else:

            self.sites_outside_excised_region = {}

            if (

                self.results["other_sites"]["has_any_other_sites"]

                and self.results["is_site_orientation_correct"]

            ):

                # if there are no other sites, no need to run:

                if self.reverse_enzyme < self.forward_enzyme:

                    # orientation = reverse -> forward

                    for enzyme, sites in self.results["other_sites"]["enzyme"].items():

                        problem_sites = []

                        for site in sites:

                            if self.reverse_enzyme < site < self.forward_enzyme:

                                pass

                            else:

                                problem_sites += [str(site)]

                        if problem_sites != []:

                            self.sites_outside_excised_region[

                                str(enzyme)

                            ] = problem_sites

                    txt = ""  # for the pdf report

                    for (

                        enzyme,

                        problem_sites,

                    ) in self.sites_outside_excised_region.items():

                        txt += enzyme + ": " + " ".join(problem_sites) + ";"

                    self.sites_outside_excised_region_txt = txt

                else:

                    # orientation = forward -> reverse

                    for enzyme, sites in self.results["other_sites"]["enzyme"].items():

                        problem_sites = []

                        for site in sites:

                            if self.forward_enzyme < site < self.reverse_enzyme:

                                # in this case the site is within the retained backbone

                                problem_sites += [str(site)]

                        if problem_sites != []:

                            self.sites_outside_excised_region[

                                str(enzyme)

                            ] = problem_sites

                    txt = ""  # for the pdf report

                    for (

                        enzyme,

                        problem_sites,

                    ) in self.sites_outside_excised_region.items():

                        txt += enzyme + ": " + " ".join(problem_sites) + ";"

                    self.sites_outside_excised_region_txt = txt

            else:  # no other sites or orientation not correct

                self.sites_outside_excised_region_txt = ""

    def sum_results(self):

        self.results["pass"] = True

        if self.results["is_circular"] is False:

            self.results["pass"] = False

            return

        if self.results["is_site_orientation_correct"] is False:

            # implicitly checks number of sites too

            self.results["pass"] = False

            return

        if self.sites_outside_excised_region_txt:

            self.results["pass"] = False

            return

        # if self.results["other_sites"]["has_any_other_sites"]:

        #     self.results["pass"] = False

        #     return

    def plot_plasmid(self):

        """Plot an outline of the plasmid."""

        fig, ax = plt.subplots(figsize=(7, 4))

        graphic_record = AssessmentTranslator().translate_record(self.record)

        graphic_record.plot(ax=ax, with_ruler=False, strand_in_label_threshold=2)

        self.fig = fig

Classes

Assessment

class Assessment(
    record,
    enzyme
)

The plasmid assessment class.

Parameters

record

A Biopython SeqRecord.

enzyme

A restriction enzyme (str). A Biopython RestrictionType will be looked up using the string.

View Source

class Assessment:

    """The plasmid assessment class.

    **Parameters**

    **record**

    > A Biopython `SeqRecord`.

    **enzyme**

    > A restriction enzyme (`str`). A Biopython `RestrictionType` will be looked

    up using the string.

    """

    UNKNOWN_IDS = [

        "None",

        "",

        "<unknown id>",

        ".",

        "EXPORTED",

        "<unknown name>",

        "Exported",

    ]

    def __init__(self, record, enzyme):

        self.record = record

        self.enzyme = Bio.Restriction.__dict__[enzyme]

        self.enzyme_name = str(self.enzyme)

        self.results = {}

    def assess_plasmid(self, other_enzymes=None):

        """Evaluate plasmid for Golden Gate.

        **Parameters**

        **other_enzymes**

        > List of enzymes used in higher level assemblies (`list`).

        """

        if other_enzymes:

            self.other_enzymes = ", ".join([str(enz) for enz in other_enzymes])

        self.add_name()

        self.check_circularity()

        self.get_number_of_sites()

        self.evaluate_orientation()

        self.digest_plasmid()

        self.count_other_sites(other_enzymes)

        self.check_enzyme_site_locations()

        self.sum_results()

        self.plot_plasmid()

    def add_name(self):

        """Set a name for the assessment."""

        # To display on the report:

        if str(self.record.id).strip() in self.UNKNOWN_IDS:

            self.name = "Unnamed plasmid"

        else:

            if len(self.record.id) > 16:  # Genbank limit, also for width in report

                self.name = self.record.id[:16] + "..."

            else:

                self.name = self.record.id

    def check_circularity(self):

        if "topology" not in self.record.annotations:

            self.results["is_circular"] = False

        elif self.record.annotations["topology"] == "circular":

            self.results["is_circular"] = True

        else:

            self.results["is_circular"] = False

    def get_number_of_sites(self):

        if "is_circular" in self.results:

            is_linear = not self.results["is_circular"]

        else:

            is_linear = False

        restriction_batch = Bio.Restriction.RestrictionBatch([self.enzyme])

        analysis = Bio.Restriction.Analysis(

            restriction_batch, sequence=self.record.seq, linear=is_linear

        )

        self.analysis_results = analysis.full(linear=is_linear)

        self.results["number_of_sites"] = len(self.analysis_results[self.enzyme])

        # Add as features for plot in report:

        for enzyme, sites in self.analysis_results.items():

            for site in sites:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(site, site + 1),

                        id=str(enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": str(enzyme),

                            "plasmid_assessment": "enzyme",

                        },

                    )

                )

    def evaluate_orientation(self):

        self.results["is_site_orientation_correct"] = False  # default

        # Forward strand:

        self.iter_forward = [

            match.end() for match in re.finditer(self.enzyme.site, str(self.record.seq))

        ]

        if sum(1 for _ in self.iter_forward) == 1:

            self.forward_enzyme = self.iter_forward[0]

            # rev_complement_site = str(self.record.seq.reverse_complement())

            rev_complement_site = str(Seq(self.enzyme.site).reverse_complement())

            self.iter_reverse = [

                m.start()

                for m in re.finditer(rev_complement_site, str(self.record.seq))

            ]

            if sum(1 for _ in self.iter_reverse) == 1:  # 1 site in both strands:

                self.results["is_site_orientation_correct"] = True

                self.reverse_enzyme = self.iter_reverse[0]

        if self.results["is_site_orientation_correct"]:

            if self.reverse_enzyme < self.forward_enzyme:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(

                            self.reverse_enzyme - 1, self.forward_enzyme + 1

                        ),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

            else:  # put annotation together from two pieces:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(0, self.forward_enzyme + 1),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(self.reverse_enzyme - 1, len(self.record)),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

    def digest_plasmid(self):

        # Obtain fragments and get the backbone's overhangs.

        # This method has two assumptions:

        # - the sequence has two, correctly oriented enzyme sites.

        # - the sequence is circular.

        # Therefore there will be exactly two fragments, with one containing both sites.

        self.results["digest"] = {}

        if not self.results["is_circular"]:

            return

        if not self.results["is_site_orientation_correct"]:

            return

        record_fragments = dc.StickyEndFragment.list_from_record_digestion(

            record=self.record, enzyme=self.enzyme, linear=False

        )

        if self.enzyme.site in record_fragments[0].to_standard_string():

            backbone_index = 1  # there are only two fragments

            excise_index = 0

        else:

            backbone_index = 0

            excise_index = 1  # reversed

        self.results["digest"]["backbone_seq"] = record_fragments[backbone_index]

        self.results["digest"]["excised_seq"] = record_fragments[excise_index]

        self.results["digest"]["first_overhang"] = str(

            record_fragments[excise_index].seq.left_end

        )

        self.results["digest"]["last_overhang"] = str(

            record_fragments[excise_index].seq.right_end

        )

    def count_other_sites(self, other_enzymes):

        self.results["other_sites"] = {}

        self.results["other_sites"]["has_any_other_sites"] = False

        if other_enzymes is None:

            return

        bio_enzymes = [Bio.Restriction.__dict__[enzyme] for enzyme in other_enzymes]

        restriction_batch = Bio.Restriction.RestrictionBatch(bio_enzymes)

        # Work with the assumption that the sequence is circular:

        analysis = Bio.Restriction.Analysis(

            restriction_batch, sequence=self.record.seq, linear=False

        )

        self.results["other_sites"]["enzyme"] = analysis.full(linear=False)

        for enzyme, matches in self.results["other_sites"]["enzyme"].items():

            if len(matches) != 0:

                self.results["other_sites"]["has_any_other_sites"] = True

                # Also add as features for plot in report:

                for site in matches:

                    self.record.features.append(

                        SeqFeature(

                            FeatureLocation(site, site + 1),

                            id=str(enzyme),

                            type="misc_feature",

                            qualifiers={

                                "label": str(enzyme),

                                "plasmid_assessment": "enzyme",

                            },

                        )

                    )

    def check_enzyme_site_locations(self):

        """Flag enzyme sites that are within the retained backbone."""

        try:

            self.results["other_sites"]["has_any_other_sites"]

            self.results["is_site_orientation_correct"]

        except KeyError:

            print("Run assessment methods first!")

        else:

            self.sites_outside_excised_region = {}

            if (

                self.results["other_sites"]["has_any_other_sites"]

                and self.results["is_site_orientation_correct"]

            ):

                # if there are no other sites, no need to run:

                if self.reverse_enzyme < self.forward_enzyme:

                    # orientation = reverse -> forward

                    for enzyme, sites in self.results["other_sites"]["enzyme"].items():

                        problem_sites = []

                        for site in sites:

                            if self.reverse_enzyme < site < self.forward_enzyme:

                                pass

                            else:

                                problem_sites += [str(site)]

                        if problem_sites != []:

                            self.sites_outside_excised_region[

                                str(enzyme)

                            ] = problem_sites

                    txt = ""  # for the pdf report

                    for (

                        enzyme,

                        problem_sites,

                    ) in self.sites_outside_excised_region.items():

                        txt += enzyme + ": " + " ".join(problem_sites) + ";"

                    self.sites_outside_excised_region_txt = txt

                else:

                    # orientation = forward -> reverse

                    for enzyme, sites in self.results["other_sites"]["enzyme"].items():

                        problem_sites = []

                        for site in sites:

                            if self.forward_enzyme < site < self.reverse_enzyme:

                                # in this case the site is within the retained backbone

                                problem_sites += [str(site)]

                        if problem_sites != []:

                            self.sites_outside_excised_region[

                                str(enzyme)

                            ] = problem_sites

                    txt = ""  # for the pdf report

                    for (

                        enzyme,

                        problem_sites,

                    ) in self.sites_outside_excised_region.items():

                        txt += enzyme + ": " + " ".join(problem_sites) + ";"

                    self.sites_outside_excised_region_txt = txt

            else:  # no other sites or orientation not correct

                self.sites_outside_excised_region_txt = ""

    def sum_results(self):

        self.results["pass"] = True

        if self.results["is_circular"] is False:

            self.results["pass"] = False

            return

        if self.results["is_site_orientation_correct"] is False:

            # implicitly checks number of sites too

            self.results["pass"] = False

            return

        if self.sites_outside_excised_region_txt:

            self.results["pass"] = False

            return

        # if self.results["other_sites"]["has_any_other_sites"]:

        #     self.results["pass"] = False

        #     return

    def plot_plasmid(self):

        """Plot an outline of the plasmid."""

        fig, ax = plt.subplots(figsize=(7, 4))

        graphic_record = AssessmentTranslator().translate_record(self.record)

        graphic_record.plot(ax=ax, with_ruler=False, strand_in_label_threshold=2)

        self.fig = fig

Class variables

UNKNOWN_IDS

Methods

add_name

def add_name(
    self
)

Set a name for the assessment.

View Source

    def add_name(self):

        """Set a name for the assessment."""

        # To display on the report:

        if str(self.record.id).strip() in self.UNKNOWN_IDS:

            self.name = "Unnamed plasmid"

        else:

            if len(self.record.id) > 16:  # Genbank limit, also for width in report

                self.name = self.record.id[:16] + "..."

            else:

                self.name = self.record.id

assess_plasmid

def assess_plasmid(
    self,
    other_enzymes=None
)

Evaluate plasmid for Golden Gate.

Parameters

other_enzymes

List of enzymes used in higher level assemblies (list).

View Source

    def assess_plasmid(self, other_enzymes=None):

        """Evaluate plasmid for Golden Gate.

        **Parameters**

        **other_enzymes**

        > List of enzymes used in higher level assemblies (`list`).

        """

        if other_enzymes:

            self.other_enzymes = ", ".join([str(enz) for enz in other_enzymes])

        self.add_name()

        self.check_circularity()

        self.get_number_of_sites()

        self.evaluate_orientation()

        self.digest_plasmid()

        self.count_other_sites(other_enzymes)

        self.check_enzyme_site_locations()

        self.sum_results()

        self.plot_plasmid()

check_circularity

def check_circularity(
    self
)

View Source

    def check_circularity(self):

        if "topology" not in self.record.annotations:

            self.results["is_circular"] = False

        elif self.record.annotations["topology"] == "circular":

            self.results["is_circular"] = True

        else:

            self.results["is_circular"] = False

check_enzyme_site_locations

def check_enzyme_site_locations(
    self
)

Flag enzyme sites that are within the retained backbone.

View Source

    def check_enzyme_site_locations(self):

        """Flag enzyme sites that are within the retained backbone."""

        try:

            self.results["other_sites"]["has_any_other_sites"]

            self.results["is_site_orientation_correct"]

        except KeyError:

            print("Run assessment methods first!")

        else:

            self.sites_outside_excised_region = {}

            if (

                self.results["other_sites"]["has_any_other_sites"]

                and self.results["is_site_orientation_correct"]

            ):

                # if there are no other sites, no need to run:

                if self.reverse_enzyme < self.forward_enzyme:

                    # orientation = reverse -> forward

                    for enzyme, sites in self.results["other_sites"]["enzyme"].items():

                        problem_sites = []

                        for site in sites:

                            if self.reverse_enzyme < site < self.forward_enzyme:

                                pass

                            else:

                                problem_sites += [str(site)]

                        if problem_sites != []:

                            self.sites_outside_excised_region[

                                str(enzyme)

                            ] = problem_sites

                    txt = ""  # for the pdf report

                    for (

                        enzyme,

                        problem_sites,

                    ) in self.sites_outside_excised_region.items():

                        txt += enzyme + ": " + " ".join(problem_sites) + ";"

                    self.sites_outside_excised_region_txt = txt

                else:

                    # orientation = forward -> reverse

                    for enzyme, sites in self.results["other_sites"]["enzyme"].items():

                        problem_sites = []

                        for site in sites:

                            if self.forward_enzyme < site < self.reverse_enzyme:

                                # in this case the site is within the retained backbone

                                problem_sites += [str(site)]

                        if problem_sites != []:

                            self.sites_outside_excised_region[

                                str(enzyme)

                            ] = problem_sites

                    txt = ""  # for the pdf report

                    for (

                        enzyme,

                        problem_sites,

                    ) in self.sites_outside_excised_region.items():

                        txt += enzyme + ": " + " ".join(problem_sites) + ";"

                    self.sites_outside_excised_region_txt = txt

            else:  # no other sites or orientation not correct

                self.sites_outside_excised_region_txt = ""

count_other_sites

def count_other_sites(
    self,
    other_enzymes
)

View Source

    def count_other_sites(self, other_enzymes):

        self.results["other_sites"] = {}

        self.results["other_sites"]["has_any_other_sites"] = False

        if other_enzymes is None:

            return

        bio_enzymes = [Bio.Restriction.__dict__[enzyme] for enzyme in other_enzymes]

        restriction_batch = Bio.Restriction.RestrictionBatch(bio_enzymes)

        # Work with the assumption that the sequence is circular:

        analysis = Bio.Restriction.Analysis(

            restriction_batch, sequence=self.record.seq, linear=False

        )

        self.results["other_sites"]["enzyme"] = analysis.full(linear=False)

        for enzyme, matches in self.results["other_sites"]["enzyme"].items():

            if len(matches) != 0:

                self.results["other_sites"]["has_any_other_sites"] = True

                # Also add as features for plot in report:

                for site in matches:

                    self.record.features.append(

                        SeqFeature(

                            FeatureLocation(site, site + 1),

                            id=str(enzyme),

                            type="misc_feature",

                            qualifiers={

                                "label": str(enzyme),

                                "plasmid_assessment": "enzyme",

                            },

                        )

                    )

digest_plasmid

def digest_plasmid(
    self
)

View Source

    def digest_plasmid(self):

        # Obtain fragments and get the backbone's overhangs.

        # This method has two assumptions:

        # - the sequence has two, correctly oriented enzyme sites.

        # - the sequence is circular.

        # Therefore there will be exactly two fragments, with one containing both sites.

        self.results["digest"] = {}

        if not self.results["is_circular"]:

            return

        if not self.results["is_site_orientation_correct"]:

            return

        record_fragments = dc.StickyEndFragment.list_from_record_digestion(

            record=self.record, enzyme=self.enzyme, linear=False

        )

        if self.enzyme.site in record_fragments[0].to_standard_string():

            backbone_index = 1  # there are only two fragments

            excise_index = 0

        else:

            backbone_index = 0

            excise_index = 1  # reversed

        self.results["digest"]["backbone_seq"] = record_fragments[backbone_index]

        self.results["digest"]["excised_seq"] = record_fragments[excise_index]

        self.results["digest"]["first_overhang"] = str(

            record_fragments[excise_index].seq.left_end

        )

        self.results["digest"]["last_overhang"] = str(

            record_fragments[excise_index].seq.right_end

        )

evaluate_orientation

def evaluate_orientation(
    self
)

View Source

    def evaluate_orientation(self):

        self.results["is_site_orientation_correct"] = False  # default

        # Forward strand:

        self.iter_forward = [

            match.end() for match in re.finditer(self.enzyme.site, str(self.record.seq))

        ]

        if sum(1 for _ in self.iter_forward) == 1:

            self.forward_enzyme = self.iter_forward[0]

            # rev_complement_site = str(self.record.seq.reverse_complement())

            rev_complement_site = str(Seq(self.enzyme.site).reverse_complement())

            self.iter_reverse = [

                m.start()

                for m in re.finditer(rev_complement_site, str(self.record.seq))

            ]

            if sum(1 for _ in self.iter_reverse) == 1:  # 1 site in both strands:

                self.results["is_site_orientation_correct"] = True

                self.reverse_enzyme = self.iter_reverse[0]

        if self.results["is_site_orientation_correct"]:

            if self.reverse_enzyme < self.forward_enzyme:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(

                            self.reverse_enzyme - 1, self.forward_enzyme + 1

                        ),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

            else:  # put annotation together from two pieces:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(0, self.forward_enzyme + 1),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(self.reverse_enzyme - 1, len(self.record)),

                        id=str(self.enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": "Excised",

                            "plasmid_assessment": "excised",

                        },

                    )

                )

get_number_of_sites

def get_number_of_sites(
    self
)

View Source

    def get_number_of_sites(self):

        if "is_circular" in self.results:

            is_linear = not self.results["is_circular"]

        else:

            is_linear = False

        restriction_batch = Bio.Restriction.RestrictionBatch([self.enzyme])

        analysis = Bio.Restriction.Analysis(

            restriction_batch, sequence=self.record.seq, linear=is_linear

        )

        self.analysis_results = analysis.full(linear=is_linear)

        self.results["number_of_sites"] = len(self.analysis_results[self.enzyme])

        # Add as features for plot in report:

        for enzyme, sites in self.analysis_results.items():

            for site in sites:

                self.record.features.append(

                    SeqFeature(

                        FeatureLocation(site, site + 1),

                        id=str(enzyme),

                        type="misc_feature",

                        qualifiers={

                            "label": str(enzyme),

                            "plasmid_assessment": "enzyme",

                        },

                    )

                )

plot_plasmid

def plot_plasmid(
    self
)

Plot an outline of the plasmid.

View Source

    def plot_plasmid(self):

        """Plot an outline of the plasmid."""

        fig, ax = plt.subplots(figsize=(7, 4))

        graphic_record = AssessmentTranslator().translate_record(self.record)

        graphic_record.plot(ax=ax, with_ruler=False, strand_in_label_threshold=2)

        self.fig = fig

sum_results

def sum_results(
    self
)

View Source

    def sum_results(self):

        self.results["pass"] = True

        if self.results["is_circular"] is False:

            self.results["pass"] = False

            return

        if self.results["is_site_orientation_correct"] is False:

            # implicitly checks number of sites too

            self.results["pass"] = False

            return

        if self.sites_outside_excised_region_txt:

            self.results["pass"] = False

            return

AssessmentTranslator

class AssessmentTranslator(
    features_filters=(),
    features_properties=None
)

Custom translator for highlighting key features.

View Source

    class AssessmentTranslator:

        """Please install dna_features_viewer to use this class."""

        def __init__(self):

            raise Exception("Please install dna_features_viewer to use this class.")

Ancestors (in MRO)

dna_features_viewer.BiopythonTranslator.BiopythonTranslator.BiopythonTranslator
dna_features_viewer.BiopythonTranslator.BiopythonTranslatorBase.BiopythonTranslatorBase

Class variables

default_feature_color

graphic_record_parameters

ignored_features_types

label_fields

Static methods

quick_class_plot

def quick_class_plot(
    record,
    figure_width=12,
    **kwargs
)

Allows super quick and dirty plotting of Biopython records.

This is really meant for use in a Jupyter/Ipython notebook with the "%matplotlib inline" setting.

from dna_features_viewer import BiopythonTranslator BiopythonTranslator.quick_plot(my_record)

View Source

    @classmethod

    def quick_class_plot(cls, record, figure_width=12, **kwargs):

        """Allows super quick and dirty plotting of Biopython records.

        This is really meant for use in a Jupyter/Ipython notebook with

        the "%matplotlib inline" setting.

        >>> from dna_features_viewer import BiopythonTranslator

        >>> BiopythonTranslator.quick_plot(my_record)

        """

        graphic_record = cls().translate_record(record)

        ax, _ = graphic_record.plot(figure_width=figure_width, **kwargs)

        return ax

Methods

compute_feature_box_color

def compute_feature_box_color(
    self,
    feature
)

Compute a box_color for this feature.

View Source

    def compute_feature_box_color(self, feature):

        """Compute a box_color for this feature."""

        return "auto"

compute_feature_box_linewidth

def compute_feature_box_linewidth(
    self,
    feature
)

Compute a box_linewidth for this feature.

View Source

    def compute_feature_box_linewidth(self, feature):

        """Compute a box_linewidth for this feature."""

        return 0.3

compute_feature_color

def compute_feature_color(
    self,
    feature
)

Compute a color for this feature.

If the feature has a color qualifier it will be used. Otherwise, the classe's default_feature_color is used.

To change the behaviour, create a subclass of BiopythonTranslator and overwrite this method.

View Source

        def compute_feature_color(self, feature):

            assessment_ref = "plasmid_assessment"

            if assessment_ref in feature.qualifiers:

                if feature.qualifiers[assessment_ref] == "enzyme":

                    return "red"

                elif feature.qualifiers[assessment_ref] == "excised":

                    return "yellow"

                elif feature.qualifiers[assessment_ref] == "backbone":

                    return "tab:cyan"

                else:

                    return "tab:blue"  # default dna_features_viewer color

            else:

                return "tab:blue"

compute_feature_fontdict

def compute_feature_fontdict(
    self,
    feature
)

Compute a font dict for this feature.

View Source

    def compute_feature_fontdict(self, feature):

        """Compute a font dict for this feature."""

        return None

compute_feature_html

def compute_feature_html(
    self,
    feature
)

Gets the 'label' of the feature.

View Source

    def compute_feature_html(self, feature):

        """Gets the 'label' of the feature."""

        return self.compute_feature_label(feature)

compute_feature_label

def compute_feature_label(
    self,
    feature
)

Compute the label of the feature.

View Source

    def compute_feature_label(self, feature):

        """Compute the label of the feature."""

        label = feature.type

        for key in self.label_fields:

            if key in feature.qualifiers and len(feature.qualifiers[key]):

                label = feature.qualifiers[key]

                break

        if isinstance(label, list):

            label = "|".join(label)

        return label

compute_feature_label_link_color

def compute_feature_label_link_color(
    self,
    feature
)

Compute the color of the line linking the label to its feature.

View Source

    def compute_feature_label_link_color(self, feature):

        """Compute the color of the line linking the label to its feature."""

        return "black"

compute_feature_legend_text

def compute_feature_legend_text(
    self,
    feature
)

View Source

    def compute_feature_legend_text(self, feature):

        return None

compute_feature_linewidth

def compute_feature_linewidth(
    self,
    feature
)

Compute the edge width of the feature's arrow/rectangle.

View Source

    def compute_feature_linewidth(self, feature):

        """Compute the edge width of the feature's arrow/rectangle."""

        return 1.0

compute_filtered_features

def compute_filtered_features(
    self,
    features
)

Return the list of features minus the ignored ones.

By the method keeps any feature whose type is not in ignored_features_types and for which all filter(f) pass.

View Source

    def compute_filtered_features(self, features):

        """Return the list of features minus the ignored ones.

        By the method keeps any feature whose type is not in

        ignored_features_types and for which all filter(f) pass.

        """

        return [

            f

            for f in features

            if all([fl(f) for fl in self.features_filters])

            and f.type not in self.ignored_features_types

        ]

quick_plot

def quick_plot(
    self,
    record,
    figure_width=12,
    **kwargs
)

Allows super quick and dirty plotting of Biopython records.

This is really meant for use in a Jupyter/Ipython notebook with the "%matplotlib inline" setting.

from dna_features_viewer import BiopythonTranslator BiopythonTranslator.quick_plot(my_record)

View Source

    def quick_plot(self, record, figure_width=12, **kwargs):

        """Allows super quick and dirty plotting of Biopython records.

        This is really meant for use in a Jupyter/Ipython notebook with

        the "%matplotlib inline" setting.

        >>> from dna_features_viewer import BiopythonTranslator

        >>> BiopythonTranslator.quick_plot(my_record)

        """

        graphic_record = self.translate_record(record)

        ax, _ = graphic_record.plot(figure_width=figure_width, **kwargs)

        return ax

translate_feature

def translate_feature(
    self,
    feature
)

Translate a Biopython feature into a Dna Features Viewer feature.

View Source

    def translate_feature(self, feature):

        """Translate a Biopython feature into a Dna Features Viewer feature."""

        properties = dict(

            label=self.compute_feature_label(feature),

            color=self.compute_feature_color(feature),

            html=self.compute_feature_html(feature),

            fontdict=self.compute_feature_fontdict(feature),

            box_linewidth=self.compute_feature_box_linewidth(feature),

            box_color=self.compute_feature_box_color(feature),

            linewidth=self.compute_feature_linewidth(feature),

            label_link_color=self.compute_feature_label_link_color(feature),

            legend_text=self.compute_feature_legend_text(feature),

        )

        if self.features_properties is not None:

            other_properties = self.features_properties

            if hasattr(other_properties, "__call__"):

                other_properties = other_properties(feature)

            properties.update(other_properties)

        return GraphicFeature(

            start=feature.location.start,

            end=feature.location.end,

            strand=feature.location.strand,

            **properties

        )

translate_record

def translate_record(
    self,
    record,
    record_class=None,
    filetype=None
)

Create a new GraphicRecord from a BioPython Record object.

Parameters

record A BioPython Record object or the path to a Genbank or a GFF file.

record_class The graphic record class to use, e.g. GraphicRecord (default) or CircularGraphicRecord. Strings 'circular' and 'linear' can also be provided.

filetype Used only when a Genbank or a GFF file is provided; one of "genbank" or "gff" to be used. Default None infers from file extension.

View Source

    def translate_record(self, record, record_class=None, filetype=None):

        """Create a new GraphicRecord from a BioPython Record object.

        Parameters

        ----------

        record

          A BioPython Record object or the path to a Genbank or a GFF file.

        record_class

          The graphic record class to use, e.g. GraphicRecord (default) or

          CircularGraphicRecord. Strings 'circular' and 'linear' can also be

          provided.

        filetype

          Used only when a Genbank or a GFF file is provided; one of "genbank"

          or "gff" to be used. Default None infers from file extension.

        """

        classes = {

            "linear": GraphicRecord,

            "circular": CircularGraphicRecord,

            None: GraphicRecord,

        }

        if record_class in classes:

            record_class = classes[record_class]

        if isinstance(record, str) or hasattr(record, "read"):

            record = load_record(record, filetype=filetype)

        filtered_features = self.compute_filtered_features(record.features)

        return record_class(

            sequence_length=len(record),

            sequence=str(record.seq),

            features=[

                self.translate_feature(feature)

                for feature in filtered_features

                if feature.location is not None

            ],

            **self.graphic_record_parameters

        )