示例#1
0
def import_data_from_cp():

    # import data from files

    try:
        image_cp2_file
    except:
        raise Exception('You need to specify the CellProfiler 2 image CSV file!')
    try:
        object_cp2_csv_files
    except:
        raise Exception('You need to specify the CellProfiler 2 object CSV files!')

    image_file_postfix = '_' + os.path.splitext(os.path.basename(image_cp2_file))[0].split('_')[-1]
    object_file_postfixes = []
    for object_file in object_cp2_csv_files:
        object_file = str(object_file)
        object_file_postfix = '_' + os.path.splitext(os.path.basename(object_file))[0].split('_')[-1]
        object_file_postfixes.append(object_file_postfix)

    pdc = import_cp2_csv_results(cp2_csv_path, image_file_postfix, object_file_postfixes, csv_delimiter, csv_extension)

    Importer().set_pdc(pdc)

    utils.update_state(importer.__name__, 'imported')

    print 'Finished importing data from CellProfiler'
    return 'Finished importing data from CellProfiler'
示例#2
0
def load_hdf5():

    try:
        hdf5_input_file
    except:
        raise Exception('You need to specify the YACA HDF5 input file!')

    further_hdf5_input_files = []
    try:
        further_hdf5_input_files = optional_hdf5_input_files
    except:
        pass

    pdc = import_hdf5_results(hdf5_input_file, further_hdf5_input_files)

    Importer().set_pdc(pdc)

    utils.update_state(__name__, 'imported')

    return 'Finished loading HDF5 file'
示例#3
0
def import_data_from_db():

    from sqlalchemy import select, or_, and_

    engine = DBConnection().engine
    images_table = DBConnection().images_table
    objects_table = DBConnection().objects_table

    try:
        treatment_filter_list = file(treatment_filter_file).readlines()
        treatment_filter_list \
            = [tr.strip() for tr in treatment_filter_list if tr.strip()]
    except:
        treatment_filter_list = None

    local_images_db_columns = dict([(column, True) \
                                    for column in images_db_columns])
    skip_img_columns = {}
    for column in image_files_db_columns:
        local_images_db_columns[column] = True
        skip_img_columns[column] = True
    for column in (plate_db_column, replicate_db_column,
                   well_db_column, treatment_db_column,
                   image_id_db_column):
        if column not in local_images_db_columns:
            local_images_db_columns[column] = True
            skip_img_columns[column] = True

    img_column_filter = lambda column: column in local_images_db_columns \
        and column not in skip_img_columns

    local_objects_db_columns = dict([(column, True) \
                                     for column in objects_db_columns])
    skip_obj_columns = {}
    for column in (position_x_db_column, position_y_db_column,
                   object_img_id_db_column):
        if column not in local_objects_db_columns:
            local_objects_db_columns[column] = True
            if column == object_img_id_db_column:
                skip_obj_columns[column] = True

    obj_column_filter = lambda column: column in local_objects_db_columns \
        and column not in skip_obj_columns

    img_index = 0
    obj_index = 0
    pl_index = 0
    repl_index = 0
    wl_index = 0
    tr_index = 0
    pdc = yaca_data_container()

    stmt = select([c for c in images_table.c if c.name \
                   in local_images_db_columns])
    and_args = []
    if use_plate_filter:
        or_args \
            = [images_table.c[plate_db_column] == pl for pl in plate_filter]
        and_args.append(or_(*or_args))
    if use_well_filter:
        or_args = [images_table.c[well_db_column] == wl for wl in well_filter]
        and_args.append(or_(*or_args))
    if use_replicate_filter:
        or_args = [images_table.c[replicate_db_column] == repl \
                   for repl in replicate_filter]
        and_args.append(or_(*or_args))
    if use_position_filter:
        or_args = [images_table.c[position_db_column] == pos \
                   for pos in position_filter]
        and_args.append(or_(*or_args))
    if and_args:
        if len(and_args) > 1:
            whereargs = and_(*and_args)
        else:
            whereargs = and_args[0]
        stmt = stmt.where(whereargs)
    if use_treatment_filter and len(treatment_filter_list) < 500:
        count_stmt = stmt.count()
        or_args = [images_table.c[treatment_db_column] == tr \
                   for tr in treatment_filter_list]
        count_stmt = stmt.where(or_(*or_args)).count()
        img_result = engine.execute(count_stmt)
        img_count = img_result.fetchone()[0]
        img_result.close()
    else:
        img_count = None
    if not use_treatment_filter:
        treatment_filter_list = [None]
    from itertools import izip_longest
    def grouper(iterable, n, fillvalue=None, fill=False):
        args = [iter(iterable)] * n
        return izip_longest(*args, fillvalue=fillvalue)
    conn = engine.connect()
    conn.execution_options(autocommit=False)
    #for tr_index, tr in enumerate(treatment_filter_list):
    for i, tr_group in enumerate(grouper(treatment_filter_list, 100)):
        trans = conn.begin()
        img_stmt = stmt
        if tr_group is not None:
            or_args = [images_table.c[treatment_db_column] == tr \
                   for tr in tr_group if tr is not None]
            img_stmt = img_stmt.where(or_(*or_args))
        img_result = conn.execute(img_stmt)
        for img_row in img_result:
            if img_count is None:
                sys.stdout.write(
                    '\rImporting treatment #{}-{} of {}, image #{}'.format(
                        i * 100 + 1, (i + 1) * 100,
                        len(treatment_filter_list), img_index + 1))
            else:
                sys.stdout.write(
                    '\rImporting image #{} of {}'.format(
                        img_index + 1, img_count))
            sys.stdout.flush()
            img = yaca_data_image()
            img.index = img_index
            plate = img_row[plate_db_column]
            replicate = img_row[replicate_db_column]
            well = img_row[well_db_column]
            treatment = img_row[treatment_db_column]
            if treatment not in pdc.treatmentByName:
                tr = yaca_data_treatment(treatment)
                tr.index = tr_index
                pdc.treatments.append(tr)
                tr_index += 1
                pdc.treatmentByName[treatment] = tr.index
            img.treatment = pdc.treatmentByName[treatment]
            if plate not in pdc.plateByName:
                pl = yaca_data_plate(plate)
                pl.index = pl_index
                pdc.plates.append(tr)
                pl_index += 1
                pdc.plateByName[plate] = pl.index
            img.plate = pdc.plateByName[plate]
            if well not in pdc.wellByName:
                wl = yaca_data_well(well)
                wl.index = wl_index
                pdc.wells.append(tr)
                wl_index += 1
                pdc.wellByName[well] = wl.index
            img.well = pdc.wellByName[well]
            if replicate not in pdc.replicateByName:
                repl = yaca_data_replicate(replicate)
                repl.index = pl_index
                pdc.replicates.append(tr)
                repl_index += 1
                pdc.replicateByName[replicate] = repl.index
            img.replicate = pdc.replicateByName[replicate]
            filenames = {}
            paths = {}
            for entry in img_row.iterkeys():
                if entry.startswith(IMAGE_FILENAME_IDENTIFIER):
                    entity_name = entry[len(IMAGE_FILENAME_IDENTIFIER):]
                    filenames[entity_name] = img_row[entry]
                elif entry.startswith(IMAGE_PATHNAME_IDENTIFIER):
                    entity_name = entry[len(IMAGE_PATHNAME_IDENTIFIER):]
                    paths[entity_name] = img_row[entry]
            imageFiles = []
            for entity_name, filename in filenames.iteritems():
                if entity_name in paths:
                    path = paths[entity_name]
                    full_path = os.path.join(path, filename)
                    imageFiles.append((entity_name, full_path))
            img.imageFiles = imageFiles
            if pdc.imgFeatures is None:
                for entry in img_row.iterkeys():
                    if not img_column_filter(entry):
                        continue
                    if not entry.startswith('Metadata_') \
                       and not entry.startswith(IMAGE_FILENAME_IDENTIFIER) \
                       and not entry.startswith(IMAGE_PATHNAME_IDENTIFIER):
                        pdc.imgFeatureIds[entry] = len(pdc.imgFeatureIds)
                pdc.imgFeatureIds[IMAGE_ID_FEATURE_NAME] = len(pdc.imgFeatureIds)
                pdc.imgImageFeatureId = pdc.imgFeatureIds[IMAGE_ID_FEATURE_NAME]
                pdc.imgFeatureIds[PLATE_ID_FEATURE_NAME] = len(pdc.imgFeatureIds)
                pdc.imgPlateFeatureId = pdc.imgFeatureIds[PLATE_ID_FEATURE_NAME]
                pdc.imgFeatureIds[WELL_ID_FEATURE_NAME] = len(pdc.imgFeatureIds)
                pdc.imgWellFeatureId = pdc.imgFeatureIds[WELL_ID_FEATURE_NAME]
                pdc.imgFeatureIds[REPLICATE_ID_FEATURE_NAME] \
                    = len(pdc.imgFeatureIds)
                pdc.imgReplicateFeatureId \
                    = pdc.imgFeatureIds[REPLICATE_ID_FEATURE_NAME]
                pdc.imgFeatureIds[TREATMENT_ID_FEATURE_NAME] \
                    = len(pdc.imgFeatureIds)
                pdc.imgTreatmentFeatureId \
                    = pdc.imgFeatureIds[TREATMENT_ID_FEATURE_NAME]
                pdc.imgFeatureIds[QUALITY_CONTROL_FEATURE_NAME] \
                    = len(pdc.imgFeatureIds)
                pdc.imgQualityControlFeatureId \
                    = pdc.imgFeatureIds[QUALITY_CONTROL_FEATURE_NAME]
                pdc.imgFeatures = numpy.empty((IMAGE_ARRAY_BLOCKSIZE,
                                               len(pdc.imgFeatureIds)))
            if not img.index < pdc.imgFeatures.shape[0]:
                imgFeatureShape = list(pdc.imgFeatures.shape)
                imgFeatureShape[0] += IMAGE_ARRAY_BLOCKSIZE
                pdc.imgFeatures.resize(imgFeatureShape)
            for entry in img_row.iterkeys():
                if entry.startswith('Metadata_') and not img_column_filter(entry):
                    img.properties[entry] = img_row[entry]
                elif entry in pdc.imgFeatureIds:
                    pdc.imgFeatures[img.index, pdc.imgFeatureIds[entry]] \
                        = img_row[entry]
            pdc.imgFeatures[img.index, pdc.imgImageFeatureId] = img.index
            pdc.imgFeatures[img.index, pdc.imgWellFeatureId] = wl.index
            pdc.imgFeatures[img.index, pdc.imgPlateFeatureId] = pl.index
            pdc.imgFeatures[img.index, pdc.imgTreatmentFeatureId] = tr.index
            pdc.imgFeatures[img.index, pdc.imgReplicateFeatureId] = repl.index
            pdc.imgFeatures[img.index, pdc.imgQualityControlFeatureId] \
                = QUALITY_CONTROL_DEFAULT
    
            obj_stmt = select([c for c in objects_table.c \
                           if c.name in local_objects_db_columns],
                          objects_table.c[object_img_id_db_column] \
                          == img_row[image_id_db_column])
            #count_obj_stmt = obj_stmt.count()
            #obj_result = conn.execute(count_obj_stmt)
            #obj_count = obj_result.fetchone()[0]
            #obj_result.close()
            obj_result = conn.execute(obj_stmt)
            for obj_row in obj_result:
                obj = yaca_data_object()
                obj.index = obj_index
                obj.image = img
                obj.position_x = obj_row[position_x_db_column]
                obj.position_y = obj_row[position_y_db_column]
                if pdc.objFeatures is None:
                    for entry in obj_row.iterkeys():
                        if not obj_column_filter(entry):
                            continue
                        if not entry.startswith('Metadata_'):
                            pdc.objFeatureIds[entry] = len(pdc.objFeatureIds)
                    pdc.objFeatureIds[OBJECT_ID_FEATURE_NAME] \
                        = len(pdc.objFeatureIds)
                    pdc.objObjectFeatureId \
                        = pdc.objFeatureIds[OBJECT_ID_FEATURE_NAME]
                    pdc.objFeatureIds[IMAGE_ID_FEATURE_NAME] \
                        = len(pdc.objFeatureIds)
                    pdc.objImageFeatureId \
                        = pdc.objFeatureIds[IMAGE_ID_FEATURE_NAME]
                    pdc.objFeatureIds[PLATE_ID_FEATURE_NAME] \
                        = len(pdc.objFeatureIds)
                    pdc.objPlateFeatureId \
                        = pdc.objFeatureIds[PLATE_ID_FEATURE_NAME]
                    pdc.objFeatureIds[WELL_ID_FEATURE_NAME] \
                        = len(pdc.objFeatureIds)
                    pdc.objWellFeatureId = pdc.objFeatureIds[WELL_ID_FEATURE_NAME]
                    pdc.objFeatureIds[REPLICATE_ID_FEATURE_NAME] \
                        = len(pdc.objFeatureIds)
                    pdc.objReplicateFeatureId \
                        = pdc.objFeatureIds[REPLICATE_ID_FEATURE_NAME]
                    pdc.objFeatureIds[TREATMENT_ID_FEATURE_NAME] \
                        = len(pdc.objFeatureIds)
                    pdc.objTreatmentFeatureId \
                        = pdc.objFeatureIds[TREATMENT_ID_FEATURE_NAME]
                    pdc.objFeatureIds[QUALITY_CONTROL_FEATURE_NAME] \
                        = len(pdc.objFeatureIds)
                    pdc.objQualityControlFeatureId \
                        = pdc.objFeatureIds[QUALITY_CONTROL_FEATURE_NAME]
                    pdc.objFeatures = numpy.empty((OBJECT_ARRAY_BLOCKSIZE,
                                                   len(pdc.objFeatureIds)))
                if not obj.index < pdc.objFeatures.shape[0]:
                    objFeatureShape = list(pdc.objFeatures.shape)
                    objFeatureShape[0] += OBJECT_ARRAY_BLOCKSIZE
                    pdc.objFeatures.resize(objFeatureShape)
                for entry in obj_row.iterkeys():
                    if entry in pdc.objFeatureIds:
                        pdc.objFeatures[obj.index, pdc.objFeatureIds[entry]] \
                            = obj_row[entry]
                pdc.objFeatures[obj.index, pdc.objObjectFeatureId] = obj.index
                pdc.objFeatures[obj.index, pdc.objImageFeatureId] = img.index
                pdc.objFeatures[obj.index, pdc.objWellFeatureId] = wl.index
                pdc.objFeatures[obj.index, pdc.objPlateFeatureId] = pl.index
                pdc.objFeatures[obj.index, pdc.objTreatmentFeatureId] = tr.index
                pdc.objFeatures[obj.index, pdc.objReplicateFeatureId] = repl.index
                pdc.objFeatures[obj.index, pdc.objQualityControlFeatureId] \
                    = QUALITY_CONTROL_DEFAULT
                pdc.objects.append(obj)
                obj_index += 1
            obj_result.close()
            pdc.images.append(img)
            img_index += 1
        img_result.close()
        trans.commit()
    sys.stdout.write('\n')

    assert img_index == len(pdc.images)
    assert obj_index == len(pdc.objects)
    assert wl_index == len(pdc.wells)
    assert pl_index == len(pdc.plates)
    assert tr_index == len(pdc.treatments)
    assert repl_index == len(pdc.replicates)

    # actually len(pdc.images) == 0 implies len(pdc.objects) == 0
    if len(pdc.images) == 0 or len(pdc.objects) == 0:
        raise Exception("Failed to import data: no objects")

    imgFeatureShape = list(pdc.imgFeatures.shape)
    imgFeatureShape[0] = img_index
    pdc.imgFeatures.resize(imgFeatureShape)

    objFeatureShape = list(pdc.objFeatures.shape)
    objFeatureShape[0] = obj_index
    pdc.objFeatures.resize(objFeatureShape)

    importer.Importer().set_pdc(pdc)

    utils.update_state(importer.__name__, 'imported')

    print 'Finished importing data from database'
    return 'Finished importing data from database'