def import_data_from_cp(): # import data from files try: image_cp2_file except: raise Exception('You need to specify the CellProfiler 2 image CSV file!') try: object_cp2_csv_files except: raise Exception('You need to specify the CellProfiler 2 object CSV files!') image_file_postfix = '_' + os.path.splitext(os.path.basename(image_cp2_file))[0].split('_')[-1] object_file_postfixes = [] for object_file in object_cp2_csv_files: object_file = str(object_file) object_file_postfix = '_' + os.path.splitext(os.path.basename(object_file))[0].split('_')[-1] object_file_postfixes.append(object_file_postfix) pdc = import_cp2_csv_results(cp2_csv_path, image_file_postfix, object_file_postfixes, csv_delimiter, csv_extension) Importer().set_pdc(pdc) utils.update_state(importer.__name__, 'imported') print 'Finished importing data from CellProfiler' return 'Finished importing data from CellProfiler'
def load_hdf5(): try: hdf5_input_file except: raise Exception('You need to specify the YACA HDF5 input file!') further_hdf5_input_files = [] try: further_hdf5_input_files = optional_hdf5_input_files except: pass pdc = import_hdf5_results(hdf5_input_file, further_hdf5_input_files) Importer().set_pdc(pdc) utils.update_state(__name__, 'imported') return 'Finished loading HDF5 file'
def import_data_from_db(): from sqlalchemy import select, or_, and_ engine = DBConnection().engine images_table = DBConnection().images_table objects_table = DBConnection().objects_table try: treatment_filter_list = file(treatment_filter_file).readlines() treatment_filter_list \ = [tr.strip() for tr in treatment_filter_list if tr.strip()] except: treatment_filter_list = None local_images_db_columns = dict([(column, True) \ for column in images_db_columns]) skip_img_columns = {} for column in image_files_db_columns: local_images_db_columns[column] = True skip_img_columns[column] = True for column in (plate_db_column, replicate_db_column, well_db_column, treatment_db_column, image_id_db_column): if column not in local_images_db_columns: local_images_db_columns[column] = True skip_img_columns[column] = True img_column_filter = lambda column: column in local_images_db_columns \ and column not in skip_img_columns local_objects_db_columns = dict([(column, True) \ for column in objects_db_columns]) skip_obj_columns = {} for column in (position_x_db_column, position_y_db_column, object_img_id_db_column): if column not in local_objects_db_columns: local_objects_db_columns[column] = True if column == object_img_id_db_column: skip_obj_columns[column] = True obj_column_filter = lambda column: column in local_objects_db_columns \ and column not in skip_obj_columns img_index = 0 obj_index = 0 pl_index = 0 repl_index = 0 wl_index = 0 tr_index = 0 pdc = yaca_data_container() stmt = select([c for c in images_table.c if c.name \ in local_images_db_columns]) and_args = [] if use_plate_filter: or_args \ = [images_table.c[plate_db_column] == pl for pl in plate_filter] and_args.append(or_(*or_args)) if use_well_filter: or_args = [images_table.c[well_db_column] == wl for wl in well_filter] and_args.append(or_(*or_args)) if use_replicate_filter: or_args = [images_table.c[replicate_db_column] == repl \ for repl in replicate_filter] and_args.append(or_(*or_args)) if use_position_filter: or_args = [images_table.c[position_db_column] == pos \ for pos in position_filter] and_args.append(or_(*or_args)) if and_args: if len(and_args) > 1: whereargs = and_(*and_args) else: whereargs = and_args[0] stmt = stmt.where(whereargs) if use_treatment_filter and len(treatment_filter_list) < 500: count_stmt = stmt.count() or_args = [images_table.c[treatment_db_column] == tr \ for tr in treatment_filter_list] count_stmt = stmt.where(or_(*or_args)).count() img_result = engine.execute(count_stmt) img_count = img_result.fetchone()[0] img_result.close() else: img_count = None if not use_treatment_filter: treatment_filter_list = [None] from itertools import izip_longest def grouper(iterable, n, fillvalue=None, fill=False): args = [iter(iterable)] * n return izip_longest(*args, fillvalue=fillvalue) conn = engine.connect() conn.execution_options(autocommit=False) #for tr_index, tr in enumerate(treatment_filter_list): for i, tr_group in enumerate(grouper(treatment_filter_list, 100)): trans = conn.begin() img_stmt = stmt if tr_group is not None: or_args = [images_table.c[treatment_db_column] == tr \ for tr in tr_group if tr is not None] img_stmt = img_stmt.where(or_(*or_args)) img_result = conn.execute(img_stmt) for img_row in img_result: if img_count is None: sys.stdout.write( '\rImporting treatment #{}-{} of {}, image #{}'.format( i * 100 + 1, (i + 1) * 100, len(treatment_filter_list), img_index + 1)) else: sys.stdout.write( '\rImporting image #{} of {}'.format( img_index + 1, img_count)) sys.stdout.flush() img = yaca_data_image() img.index = img_index plate = img_row[plate_db_column] replicate = img_row[replicate_db_column] well = img_row[well_db_column] treatment = img_row[treatment_db_column] if treatment not in pdc.treatmentByName: tr = yaca_data_treatment(treatment) tr.index = tr_index pdc.treatments.append(tr) tr_index += 1 pdc.treatmentByName[treatment] = tr.index img.treatment = pdc.treatmentByName[treatment] if plate not in pdc.plateByName: pl = yaca_data_plate(plate) pl.index = pl_index pdc.plates.append(tr) pl_index += 1 pdc.plateByName[plate] = pl.index img.plate = pdc.plateByName[plate] if well not in pdc.wellByName: wl = yaca_data_well(well) wl.index = wl_index pdc.wells.append(tr) wl_index += 1 pdc.wellByName[well] = wl.index img.well = pdc.wellByName[well] if replicate not in pdc.replicateByName: repl = yaca_data_replicate(replicate) repl.index = pl_index pdc.replicates.append(tr) repl_index += 1 pdc.replicateByName[replicate] = repl.index img.replicate = pdc.replicateByName[replicate] filenames = {} paths = {} for entry in img_row.iterkeys(): if entry.startswith(IMAGE_FILENAME_IDENTIFIER): entity_name = entry[len(IMAGE_FILENAME_IDENTIFIER):] filenames[entity_name] = img_row[entry] elif entry.startswith(IMAGE_PATHNAME_IDENTIFIER): entity_name = entry[len(IMAGE_PATHNAME_IDENTIFIER):] paths[entity_name] = img_row[entry] imageFiles = [] for entity_name, filename in filenames.iteritems(): if entity_name in paths: path = paths[entity_name] full_path = os.path.join(path, filename) imageFiles.append((entity_name, full_path)) img.imageFiles = imageFiles if pdc.imgFeatures is None: for entry in img_row.iterkeys(): if not img_column_filter(entry): continue if not entry.startswith('Metadata_') \ and not entry.startswith(IMAGE_FILENAME_IDENTIFIER) \ and not entry.startswith(IMAGE_PATHNAME_IDENTIFIER): pdc.imgFeatureIds[entry] = len(pdc.imgFeatureIds) pdc.imgFeatureIds[IMAGE_ID_FEATURE_NAME] = len(pdc.imgFeatureIds) pdc.imgImageFeatureId = pdc.imgFeatureIds[IMAGE_ID_FEATURE_NAME] pdc.imgFeatureIds[PLATE_ID_FEATURE_NAME] = len(pdc.imgFeatureIds) pdc.imgPlateFeatureId = pdc.imgFeatureIds[PLATE_ID_FEATURE_NAME] pdc.imgFeatureIds[WELL_ID_FEATURE_NAME] = len(pdc.imgFeatureIds) pdc.imgWellFeatureId = pdc.imgFeatureIds[WELL_ID_FEATURE_NAME] pdc.imgFeatureIds[REPLICATE_ID_FEATURE_NAME] \ = len(pdc.imgFeatureIds) pdc.imgReplicateFeatureId \ = pdc.imgFeatureIds[REPLICATE_ID_FEATURE_NAME] pdc.imgFeatureIds[TREATMENT_ID_FEATURE_NAME] \ = len(pdc.imgFeatureIds) pdc.imgTreatmentFeatureId \ = pdc.imgFeatureIds[TREATMENT_ID_FEATURE_NAME] pdc.imgFeatureIds[QUALITY_CONTROL_FEATURE_NAME] \ = len(pdc.imgFeatureIds) pdc.imgQualityControlFeatureId \ = pdc.imgFeatureIds[QUALITY_CONTROL_FEATURE_NAME] pdc.imgFeatures = numpy.empty((IMAGE_ARRAY_BLOCKSIZE, len(pdc.imgFeatureIds))) if not img.index < pdc.imgFeatures.shape[0]: imgFeatureShape = list(pdc.imgFeatures.shape) imgFeatureShape[0] += IMAGE_ARRAY_BLOCKSIZE pdc.imgFeatures.resize(imgFeatureShape) for entry in img_row.iterkeys(): if entry.startswith('Metadata_') and not img_column_filter(entry): img.properties[entry] = img_row[entry] elif entry in pdc.imgFeatureIds: pdc.imgFeatures[img.index, pdc.imgFeatureIds[entry]] \ = img_row[entry] pdc.imgFeatures[img.index, pdc.imgImageFeatureId] = img.index pdc.imgFeatures[img.index, pdc.imgWellFeatureId] = wl.index pdc.imgFeatures[img.index, pdc.imgPlateFeatureId] = pl.index pdc.imgFeatures[img.index, pdc.imgTreatmentFeatureId] = tr.index pdc.imgFeatures[img.index, pdc.imgReplicateFeatureId] = repl.index pdc.imgFeatures[img.index, pdc.imgQualityControlFeatureId] \ = QUALITY_CONTROL_DEFAULT obj_stmt = select([c for c in objects_table.c \ if c.name in local_objects_db_columns], objects_table.c[object_img_id_db_column] \ == img_row[image_id_db_column]) #count_obj_stmt = obj_stmt.count() #obj_result = conn.execute(count_obj_stmt) #obj_count = obj_result.fetchone()[0] #obj_result.close() obj_result = conn.execute(obj_stmt) for obj_row in obj_result: obj = yaca_data_object() obj.index = obj_index obj.image = img obj.position_x = obj_row[position_x_db_column] obj.position_y = obj_row[position_y_db_column] if pdc.objFeatures is None: for entry in obj_row.iterkeys(): if not obj_column_filter(entry): continue if not entry.startswith('Metadata_'): pdc.objFeatureIds[entry] = len(pdc.objFeatureIds) pdc.objFeatureIds[OBJECT_ID_FEATURE_NAME] \ = len(pdc.objFeatureIds) pdc.objObjectFeatureId \ = pdc.objFeatureIds[OBJECT_ID_FEATURE_NAME] pdc.objFeatureIds[IMAGE_ID_FEATURE_NAME] \ = len(pdc.objFeatureIds) pdc.objImageFeatureId \ = pdc.objFeatureIds[IMAGE_ID_FEATURE_NAME] pdc.objFeatureIds[PLATE_ID_FEATURE_NAME] \ = len(pdc.objFeatureIds) pdc.objPlateFeatureId \ = pdc.objFeatureIds[PLATE_ID_FEATURE_NAME] pdc.objFeatureIds[WELL_ID_FEATURE_NAME] \ = len(pdc.objFeatureIds) pdc.objWellFeatureId = pdc.objFeatureIds[WELL_ID_FEATURE_NAME] pdc.objFeatureIds[REPLICATE_ID_FEATURE_NAME] \ = len(pdc.objFeatureIds) pdc.objReplicateFeatureId \ = pdc.objFeatureIds[REPLICATE_ID_FEATURE_NAME] pdc.objFeatureIds[TREATMENT_ID_FEATURE_NAME] \ = len(pdc.objFeatureIds) pdc.objTreatmentFeatureId \ = pdc.objFeatureIds[TREATMENT_ID_FEATURE_NAME] pdc.objFeatureIds[QUALITY_CONTROL_FEATURE_NAME] \ = len(pdc.objFeatureIds) pdc.objQualityControlFeatureId \ = pdc.objFeatureIds[QUALITY_CONTROL_FEATURE_NAME] pdc.objFeatures = numpy.empty((OBJECT_ARRAY_BLOCKSIZE, len(pdc.objFeatureIds))) if not obj.index < pdc.objFeatures.shape[0]: objFeatureShape = list(pdc.objFeatures.shape) objFeatureShape[0] += OBJECT_ARRAY_BLOCKSIZE pdc.objFeatures.resize(objFeatureShape) for entry in obj_row.iterkeys(): if entry in pdc.objFeatureIds: pdc.objFeatures[obj.index, pdc.objFeatureIds[entry]] \ = obj_row[entry] pdc.objFeatures[obj.index, pdc.objObjectFeatureId] = obj.index pdc.objFeatures[obj.index, pdc.objImageFeatureId] = img.index pdc.objFeatures[obj.index, pdc.objWellFeatureId] = wl.index pdc.objFeatures[obj.index, pdc.objPlateFeatureId] = pl.index pdc.objFeatures[obj.index, pdc.objTreatmentFeatureId] = tr.index pdc.objFeatures[obj.index, pdc.objReplicateFeatureId] = repl.index pdc.objFeatures[obj.index, pdc.objQualityControlFeatureId] \ = QUALITY_CONTROL_DEFAULT pdc.objects.append(obj) obj_index += 1 obj_result.close() pdc.images.append(img) img_index += 1 img_result.close() trans.commit() sys.stdout.write('\n') assert img_index == len(pdc.images) assert obj_index == len(pdc.objects) assert wl_index == len(pdc.wells) assert pl_index == len(pdc.plates) assert tr_index == len(pdc.treatments) assert repl_index == len(pdc.replicates) # actually len(pdc.images) == 0 implies len(pdc.objects) == 0 if len(pdc.images) == 0 or len(pdc.objects) == 0: raise Exception("Failed to import data: no objects") imgFeatureShape = list(pdc.imgFeatures.shape) imgFeatureShape[0] = img_index pdc.imgFeatures.resize(imgFeatureShape) objFeatureShape = list(pdc.objFeatures.shape) objFeatureShape[0] = obj_index pdc.objFeatures.resize(objFeatureShape) importer.Importer().set_pdc(pdc) utils.update_state(importer.__name__, 'imported') print 'Finished importing data from database' return 'Finished importing data from database'