Source code for plateo.parsers.plate_from_tables

import re
import os
from copy import deepcopy
import pandas as pd

from plateo.containers import get_plate_class
from ..tools import (
    infer_plate_size_from_wellnames,
    number_to_rowname,
    unit_factors,
)


[docs]def plate_from_dataframe( dataframe, wellname_field="wellname", num_wells="infer", data=None ): """Create a plate from a Pandas dataframe where each row contains the name of a well and data on the well. it is assumed that the dataframe's index is given by the well names. This function is used e.g. in `plate_from_list_spreadsheet`. Parameters ---------- dataframe A Pandas dataframe wellname_field The name of the Pandas dataframe column indicating the name of the wells. num_wells Number of wells in the Plate to be created. If left to default 'infer', the size of the plate will be chosen as the smallest format (out of 96, 384 and 1536 wells) which contains all the well names. data Metadata information for the plate. """ # TODO: infer plate class automatically ? dataframe = dataframe.set_index(wellname_field) wells_data = {well: row.to_dict() for well, row in dataframe.iterrows()} if num_wells == "infer": num_wells = infer_plate_size_from_wellnames(wells_data.keys()) plate_class = get_plate_class(num_wells=num_wells) return plate_class(wells_data=wells_data, data=data)
[docs]def plate_from_list_spreadsheet( filename, sheet_name=0, num_wells="infer", wellname_field="wellname" ): """Create a plate from a Pandas dataframe where each row contains the name of a well and metadata on the well. Parameters ---------- filename Path to the spreadsheet file. sheet_name Index or name of the spreadsheet to use. num_wells Number of wells in the Plate to be created. If left to default 'infer', the size of the plate will be chosen as the smallest format (out of 96, 384 and 1536 wells) which contains all the well names. wellname_field="wellname" Name of the column of the spreadsheet giving the well names """ if ".xls" in filename: # includes xlsx dataframe = pd.read_excel(filename, sheet_name=sheet_name) elif filename.endswith(".csv"): dataframe = pd.read_csv(filename) return plate_from_dataframe( dataframe, wellname_field=wellname_field, num_wells=num_wells, data={"filename": filename}, )
[docs]def plate_from_platemap_spreadsheet( file_handle, file_type="auto", original_filename=None, data_field="info", num_wells="infer", plate_class=None, multiply_by=None, headers=True, sheet_name=0, skiprows=None, ): """Parse spreadsheets representing a plate map. Parameters ---------- file_handle Either a file handle or a file path to a CSV/Excel spreadsheet. If a file handle is provided, then the file_type must be set, or at least the "original_filename". file_type Either "csv" or "excel" or "auto" (at which case the type is determined based on the provided file path in ``file_handle`` or ``original_filename``) original_filename Original filename (optional if file_handle is already a file path or if file_type is specified) data_field Data field of the well under which platemap's information will be stored num_wells Number of wells in the Plate to be created. If left to default 'infer', the size of the plate will be chosen as the smallest format (out of 96, 384 and 1536 wells) which contains all the well names. headers Whether the spreadsheet actually writes the "A" "B", and "1" "2" skiprows Number of rows to skip (= rows before the platemap starts) The spreadsheet should be either a 8 rows x 12 columns csv/excel file, or have headers like this .. code:: bash 1 2 3 4 5 6 7 8 9 10 11 12 A . . . . . . . . . . . . B . . . . . . . . . . . . C . . . . . . . . . . . . D . . . . . . . . . . . . E . . . . . . . . . . . . F . . . . . . . . . . . . G . . . . . . . . . . . . H . . . . . . . . . . . . """ if isinstance(file_handle, str): # The provided file is a file path original_filename = file_handle if file_type == "auto": # Determine the file type based on the file name. base, ext = os.path.splitext(original_filename) ext = ext.lower() if ext == ".csv": file_type = "csv" elif ext in [".xls", ".xlsx"]: file_type = "excel" index_col = 0 if headers else None if file_type == "csv": dataframe = pd.read_csv( file_handle, index_col=index_col, header=index_col, skiprows=skiprows, ) elif file_type == "excel": dataframe = pd.read_excel( file_handle, index_col=index_col, sheet_name=sheet_name, header=index_col, skiprows=skiprows, ) if headers: def compute_value(content, multiply_by, row, column): if multiply_by is None: return content else: try: return content * multiply_by except Exception as err: wellname = row + str(column) raise ValueError(("In well %s: " % wellname) + str(err)) wells_data = { row + str(column): { data_field: compute_value(content, multiply_by, row, column) } for column, column_content in dataframe.to_dict().items() for row, content in column_content.items() if isinstance(row, str) } else: wells_data = { number_to_rowname(row + 1) + str(column + 1): {data_field: content} for column, column_content in dataframe.to_dict().items() for row, content in column_content.items() } if num_wells == "infer": num_wells = infer_plate_size_from_wellnames(wells_data.keys()) if plate_class is None: plate_class = get_plate_class(num_wells=num_wells) return plate_class( wells_data=wells_data, data={"file_source": original_filename} )
def plate_from_content_spreadsheet( spreadsheet_file, headers=True, plate_class=None, original_filename=None, sheet_names="default", ): """Load plate from Excel with 'content', 'volume', 'concentration' sheets. The 'content' sheet contains a platemap of the product contained in each well (e.g. a part name or a strain name) The 'volume' sheet contains a platemap of the volume contained in each well (e.g. a part name or a strain name) in liter The 'concentration' sheet contains a platemap of the concentration contained in each well (e.g. a part name or a strain name) in gram/liter. Parameters ---------- spreadsheet_file Path to the excel spreadsheet, or file-like object. headers Set to True if the spreadsheets represent the platemaps using headers ABCDEF... for columns and 123456... for rows plate_class Class of the plate returned. If set to None, a generic Plate96 or Plate384 or other is returned depending on the autodetected plate size sheet_name A triple of the name of the sheets containing """ # NOTE: We use deepcopy(spreadsheet_file) throughout this function because # Some pandas operations close file-like objects, which prevents from # parsing the spreadsheet several times, as this method requires. if original_filename is None: if isinstance(spreadsheet_file, str): original_filename = spreadsheet_file else: original_filename = "unknown.xlsx" excel = pd.ExcelFile(deepcopy(spreadsheet_file)) sheet_names = excel.sheet_names sheet_name_patterns = { "concentration": r"[cC]oncentration(| \((\S+)-(\S+)\))", "volume": r"[vV]olume(| \((\S+)\))", "content": r"[cC]ontent(| \((\S+)\))", } default_factors = { "concentration": 1e-3, # default is ng/ul = 0.001 g/l "volume": 1e-6, # default is ul "content": "content", } field_data = {} for field, pattern in sheet_name_patterns.items(): sheet_name = None for sheet_name in sheet_names: match = re.fullmatch(pattern, sheet_name) if match is not None: field_data[field] = data = {"sheet_name": sheet_name} groups = list(match.groups()) if groups[0] == "": data["factor"] = default_factors[field] elif len(groups) == 2: if field == "content": data["factor"] = groups[1] else: data["factor"] = unit_factors[groups[1]] elif len(groups) == 3: factor = unit_factors[groups[1]] / unit_factors[groups[2]] data["factor"] = factor else: raise ValueError("Couldn't parse sheet %s" % sheet_name) break else: raise ValueError( ("No sheet found for field %s." "Check your sheet names.") % field ) content_field_name = field_data["content"]["factor"] plate = plate_from_platemap_spreadsheet( deepcopy(spreadsheet_file), headers=headers, sheet_name=field_data["content"]["sheet_name"], data_field=content_field_name, plate_class=plate_class, original_filename=original_filename, ) plate.merge_data_from( plate_from_platemap_spreadsheet( deepcopy(spreadsheet_file), data_field="volume", headers=headers, multiply_by=field_data["volume"]["factor"], sheet_name=field_data["volume"]["sheet_name"], original_filename=original_filename, ) ) plate.merge_data_from( plate_from_platemap_spreadsheet( deepcopy(spreadsheet_file), data_field="concentration", headers=headers, sheet_name=field_data["concentration"]["sheet_name"], multiply_by=field_data["concentration"]["factor"], original_filename=original_filename, ) ) for well in plate.iter_wells(): content = well.data[content_field_name] if str(content) == "nan": well.data[content_field_name] = None continue volume = well.data.volume concentration = well.data.concentration try: well.add_content({content: volume * concentration}, volume=volume) except Exception as err: raise type(err)("Check your data for well %s" % well.name) from err for field in ("volume", "concentration"): well.data.pop(field) return plate