示例#1
0
def readMetadata(metaFile):
    metadata = meta.Metadata(metaFile)
    plates = metadata.data["Metadata_Plate"].unique()
    print("Total plates:",len(plates))
    for i in range(len(plates)):
        #plate = metadata.filterRecords(lambda df: (df.Metadata_Plate == plates[0]) & (df.Metadata_Well == "a01"), copy=True)
        plate = metadata.filterRecords(lambda df: (df.Metadata_Plate == plates[i]), copy=True)
        yield plate
    return
示例#2
0
def readPlates(metaFile):
    metadata = meta.Metadata(metaFile)
    plates = metadata.data["Metadata_Plate"].unique()
    utils.logger.info("Total plates: " + str(len(plates)))
    for i in range(len(plates)):
        plate = metadata.filterRecords(lambda df:
                                       (df.Metadata_Plate == plates[i]),
                                       copy=True)
        yield plate
    return
示例#3
0
def readDataset(metaFile, images_dir):
    # Read metadata and split data in training and validation
    metadata = meta.Metadata(metaFile, dtype=None)
    trainingFilter = lambda df: df["Allele_Replicate"] <= 5
    validationFilter = lambda df: df["Allele_Replicate"] > 5
    metadata.splitMetadata(trainingFilter, validationFilter)
    # Create a dataset
    keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r[
        "Metadata_Well"], r["Metadata_Site"])
    dataset = ds.Dataset(metadata, "Allele", CHANNELS, images_dir, keyGen)
    print(metadata.data.iloc[100])
    return dataset
示例#4
0
def processMetadata(plate_maps, barcode_file, csv_list, root):
    # Load plate maps data and create labels
    plateMaps = meta.Metadata(plate_maps, "multi", "blanks")
    maps = plateMaps.data
    maps["Treatment"] = maps["broad_sample"] + "@" + maps[
        "mmoles_per_liter"].astype(str)
    maps["Compound"] = 0
    treatments = maps["Treatment"].unique()
    compounds = maps["broad_sample"].unique()
    print("Unique treatments:", len(treatments))
    for i in range(len(treatments)):
        maps.loc[lambda df: df.Treatment == treatments[i], "Treatment"] = i
        utils.printProgress(i + 1, len(treatments), prefix="Treatments")
    print("Unique compounds:", len(compounds))
    for i in range(len(compounds)):
        maps.loc[lambda df: df.broad_sample == compounds[i], "Compound"] = i
        utils.printProgress(i + 1, len(compounds), prefix="Compounds")

    # Load barcodes and csv files
    barcodes = meta.Metadata(barcode_file, "single")
    load_data = meta.Metadata(csv_list, "multi")

    # Merge two frames: csvs + barcodes to attach compound layout to each image
    columns = list(load_data.data.columns.values)
    metadata = pd.merge(load_data.data.drop(columns[13:], axis=1),
                        barcodes.data,
                        left_on=["Metadata_Plate"],
                        right_on=["Assay_Plate_Barcode"],
                        how="inner")
    metadata = metadata.drop(
        ["Batch_Number", "Batch_Date", "Assay_Plate_Barcode"], axis=1)
    del load_data, barcodes

    # Concatenate paths and filenames and make them relative
    metadata = relativePaths(metadata, "RNA", "PathName_OrigRNA",
                             "FileName_OrigRNA", root)
    metadata = relativePaths(metadata, "ER", "PathName_OrigER",
                             "FileName_OrigER", root)
    metadata = relativePaths(metadata, "AGP", "PathName_OrigAGP",
                             "FileName_OrigAGP", root)
    metadata = relativePaths(metadata, "Mito", "PathName_OrigMito",
                             "FileName_OrigMito", root)
    metadata = relativePaths(metadata, "DNA", "PathName_OrigDNA",
                             "FileName_OrigDNA", root)
    print(metadata.info())

    # Merge two frames: metadata + plateMaps to attach treatment info to each image
    metadata = pd.merge(metadata,
                        maps,
                        left_on=["Plate_Map_Name", "Metadata_Well"],
                        right_on=["plate_map_name", "well_position"],
                        how="left")
    metadata = metadata.drop([
        "plate_map_name", "well_position", "broad_sample", "mg_per_ml",
        "mmoles_per_liter", "solvent"
    ],
                             axis=1)
    metadata[
        "plate_well"] = metadata["Metadata_Plate"] + metadata["Metadata_Well"]

    # Find replicate labels
    metadata["Treatment_Replicate"] = 0
    replicateDistribution = {}
    for i in range(len(treatments)):
        mask1 = metadata["Treatment"] == i
        wells = metadata[mask1]["plate_well"].unique()
        utils.printProgress(i + 1, len(treatments), "Replicates")
        replicate = 1
        for j in range(len(wells)):
            mask2 = metadata["plate_well"] == wells[j]
            metadata.loc[mask1 & mask2, "Treatment_Replicate"] = replicate
            replicate += 1
        try:
            replicateDistribution[replicate - 1] += 1
        except:
            replicateDistribution[replicate - 1] = 1
    metadata = metadata.drop(["plate_well"], axis=1)
    print(replicateDistribution)
    print(metadata.info())

    # Save resulting metadata
    metadata.to_csv("metadata.csv", index=False)
    dframe = pd.DataFrame({
        "ID": pd.Series(range(len(treatments))),
        "Treatment": pd.Series(treatments)
    })
    dframe.to_csv("treatments.csv", index=False)
    dframe = pd.DataFrame({
        "ID": pd.Series(range(len(compounds))),
        "Compound": pd.Series(compounds)
    })
    dframe.to_csv("compounds.csv", index=False)
示例#5
0
def processMetadata(plate_maps, barcode_file, csv_list, root):
    # Load plate maps data and create labels
    plateMaps = meta.Metadata(plate_maps, "multi", "tabs")
    maps = plateMaps.data
    maps["Allele"] = maps["NCBIGeneID"].astype(
        str) + "@" + maps["x_mutation_status"]
    maps["Gene"] = 0
    alleles = maps["Allele"].unique()
    genes = maps["NCBIGeneID"].unique()
    print("Unique alleles:", len(alleles))
    for i in range(len(alleles)):
        maps.loc[lambda df: df.Allele == alleles[i], "Allele"] = i
        utils.printProgress(i + 1, len(alleles), prefix="Alleles")
    print("Unique genes:", len(genes))
    for i in range(len(genes)):
        maps.loc[lambda df: df.broad_sample == genes[i], "Gene"] = i
        utils.printProgress(i + 1, len(genes), prefix="Genes")

    # Load barcodes and csv files
    barcodes = meta.Metadata(barcode_file, "single")
    load_data = meta.Metadata(csv_list, "multi")

    # Merge two frames: csvs + barcodes to attach gene layout to each image
    columns = list(load_data.data.columns.values)
    metadata = pd.merge(load_data.data.drop(columns[13:], axis=1),
                        barcodes.data,
                        left_on=["Metadata_Plate"],
                        right_on=["Assay_Plate_Barcode"],
                        how="inner")
    del load_data, barcodes

    # Concatenate paths and filenames and make them relative
    metadata = relativePaths(metadata, "RNA", "PathName_OrigRNA",
                             "FileName_OrigRNA", root)
    metadata = relativePaths(metadata, "ER", "PathName_OrigER",
                             "FileName_OrigER", root)
    metadata = relativePaths(metadata, "AGP", "PathName_OrigAGP",
                             "FileName_OrigAGP", root)
    metadata = relativePaths(metadata, "Mito", "PathName_OrigMito",
                             "FileName_OrigMito", root)
    metadata = relativePaths(metadata, "DNA", "PathName_OrigDNA",
                             "FileName_OrigDNA", root)
    print(metadata.info())

    # Merge two frames: metadata + plateMaps to attach treatment info to each image
    metadata = pd.merge(metadata,
                        maps,
                        left_on=["Plate_Map_Name", "Metadata_Well"],
                        right_on=["plate_map_name", "well_position"],
                        how="left")
    metadata = metadata.drop([
        "plate_map_name", "well_position", "broad_sample", "NCBIGeneID",
        "pert_type", "PublicID", "Transcript", "VirusPlateName",
        "well_position", "x_mutation_status", "broad_sample", "pert_name"
    ],
                             axis=1)
    metadata["plate_well"] = metadata["Metadata_Plate"].astype(
        str) + "::" + metadata["Metadata_Well"]

    # Find replicate labels
    metadata["Allele_Replicate"] = 0
    replicateDistribution = {}
    for i in range(len(alleles)):
        mask1 = metadata["Allele"] == i
        wells = metadata[mask1]["plate_well"].unique()
        utils.printProgress(i + 1, len(alleles), "Replicates")
        replicate = 1
        for j in range(len(wells)):
            mask2 = metadata["plate_well"] == wells[j]
            metadata.loc[mask1 & mask2, "Allele_Replicate"] = replicate
            replicate += 1
        try:
            replicateDistribution[replicate - 1] += 1
        except:
            replicateDistribution[replicate - 1] = 1
    metadata = metadata.drop(["plate_well"], axis=1)
    print(replicateDistribution)
    print(metadata.info())

    # Save resulting metadata
    metadata.to_csv("metadata.csv", index=False)
    dframe = pd.DataFrame({
        "ID": pd.Series(range(len(alleles))),
        "Allele": pd.Series(alleles)
    })
    dframe.to_csv("alleles.csv", index=False)
    dframe = pd.DataFrame({
        "ID": pd.Series(range(len(genes))),
        "Gene": pd.Series(genes)
    })
    dframe.to_csv("genes.csv", index=False)