def get_measurement_columns(self, pipeline): '''Return a sequence describing the measurement columns needed by this module ''' cols = [] for fd in self.images: name = fd[FD_IMAGE_NAME].value cols += [('Image', 'FileName_' + name, cpm.COLTYPE_VARCHAR_FILE_NAME)] cols += [('Image', 'PathName_' + name, cpm.COLTYPE_VARCHAR_PATH_NAME)] cols += [('Image', 'MD5Digest_' + name, cpm.COLTYPE_VARCHAR_FORMAT % 32)] fd = self.images[0] if fd[FD_METADATA_CHOICE] == M_FILE_NAME or fd[ FD_METADATA_CHOICE] == M_BOTH: tokens = cpm.find_metadata_tokens(fd[FD_FILE_METADATA].value) cols += [('Image', 'Metadata_' + token, cpm.COLTYPE_VARCHAR_FILE_NAME) for token in tokens] if fd[FD_METADATA_CHOICE] == M_PATH or fd[FD_METADATA_CHOICE] == M_BOTH: tokens = cpm.find_metadata_tokens(fd[FD_PATH_METADATA].value) cols += [('Image', 'Metadata_' + token, cpm.COLTYPE_VARCHAR_PATH_NAME) for token in tokens] return cols
def run_objects(self, object_names, file_name, workspace): """Create a file (or files if there's metadata) based on the object names object_names - a sequence of object names (or Image or Experiment) which tell us which objects get piled into each file file_name - a file name or file name with metadata tags to serve as the output file. workspace - get the images from here. """ if len(object_names) == 1 and object_names[0] == EXPERIMENT: self.make_experiment_file(file_name, workspace) return tags = cpmeas.find_metadata_tokens(file_name) if self.directory.is_custom_choice: tags += cpmeas.find_metadata_tokens(self.directory.custom_path) metadata_groups = workspace.measurements.group_by_metadata(tags) for metadata_group in metadata_groups: if len(object_names) == 1 and object_names[0] == IMAGE: self.make_image_file(file_name, metadata_group.image_numbers, workspace) if self.wants_genepattern_file.value: self.make_gct_file(file_name, metadata_group.image_numbers, workspace) elif len(object_names) == 1 and object_names[0] == OBJECT_RELATIONSHIPS: self.make_relationships_file(file_name, metadata_group.image_numbers, workspace) else: self.make_object_file(object_names, file_name, metadata_group.image_numbers, workspace)
def run_objects(self, object_names, file_name, workspace): """Create a file (or files if there's metadata) based on the object names object_names - a sequence of object names (or Image or Experiment) which tell us which objects get piled into each file file_name - a file name or file name with metadata tags to serve as the output file. workspace - get the images from here. """ if len(object_names) == 1 and object_names[0] == EXPERIMENT: self.make_experiment_file(file_name, workspace) return tags = cpmeas.find_metadata_tokens(file_name) if self.directory.is_custom_choice: tags += cpmeas.find_metadata_tokens(self.directory.custom_path) metadata_groups = workspace.measurements.group_by_metadata(tags) for metadata_group in metadata_groups: if len(object_names) == 1 and object_names[0] == IMAGE: self.make_image_file(file_name, metadata_group.image_numbers, workspace) if self.wants_genepattern_file.value: self.make_gct_file(file_name, metadata_group.image_numbers, workspace) elif len(object_names ) == 1 and object_names[0] == OBJECT_RELATIONSHIPS: self.make_relationships_file(file_name, metadata_group.image_numbers, workspace) else: self.make_object_file(object_names, file_name, metadata_group.image_numbers, workspace)
def get_metadata_groups(self, workspace, settings_group=None): '''Find the metadata groups that are relevant for creating the file name workspace - the workspace with the image set metadata elements and grouping measurements populated. settings_group - if saving individual objects, this is the settings group that controls naming the files. ''' if settings_group is None or settings_group.wants_automatic_file_name: tags = [] else: tags = cpmeas.find_metadata_tokens(settings_group.file_name.value) if self.directory.is_custom_choice: tags += cpmeas.find_metadata_tokens(self.directory.custom_path) metadata_groups = workspace.measurements.group_by_metadata(tags) return metadata_groups
def get_metadata_tags(self, fd=None): """Find the metadata tags for the indexed image fd - an image file directory from self.images """ if not fd: s = set() for fd in self.images: s.update(self.get_metadata_tags(fd)) tags = list(s) tags.sort() return tags tags = [] if fd[FD_METADATA_CHOICE] in (M_FILE_NAME, M_BOTH): tags += cpm.find_metadata_tokens(fd[FD_FILE_METADATA].value) if fd[FD_METADATA_CHOICE] in (M_PATH, M_BOTH): tags += cpm.find_metadata_tokens(fd[FD_PATH_METADATA].value) return tags
def get_measurement_columns(self, pipeline): '''Return a sequence describing the measurement columns needed by this module ''' cols = [] for fd in self.images: name = fd[FD_IMAGE_NAME].value cols += [('Image','FileName_'+name, cpm.COLTYPE_VARCHAR_FILE_NAME)] cols += [('Image','PathName_'+name, cpm.COLTYPE_VARCHAR_PATH_NAME)] cols += [('Image','MD5Digest_'+name, cpm.COLTYPE_VARCHAR_FORMAT%32)] fd = self.images[0] if fd[FD_METADATA_CHOICE]==M_FILE_NAME or fd[FD_METADATA_CHOICE]==M_BOTH: tokens = cpm.find_metadata_tokens(fd[FD_FILE_METADATA].value) cols += [('Image', 'Metadata_'+token, cpm.COLTYPE_VARCHAR_FILE_NAME) for token in tokens] if fd[FD_METADATA_CHOICE]==M_PATH or fd[FD_METADATA_CHOICE]==M_BOTH: tokens = cpm.find_metadata_tokens(fd[FD_PATH_METADATA].value) cols += [('Image', 'Metadata_'+token, cpm.COLTYPE_VARCHAR_PATH_NAME) for token in tokens] return cols
def update_imported_metadata(self): new_imported_metadata = [] ipd_metadata_keys = set(getattr(self, "ipd_metadata_keys", [])) for group in self.extraction_methods: if group.extraction_method == X_MANUAL_EXTRACTION: if group.source == XM_FILE_NAME: regexp = group.file_regexp else: regexp = group.folder_regexp ipd_metadata_keys.update( cpmeas.find_metadata_tokens(regexp.value)) elif group.extraction_method == X_IMPORTED_EXTRACTION: joiner = group.csv_joiner csv_path = group.csv_location.value if not os.path.isfile(csv_path): continue found = False best_match = None for i, imported_metadata in enumerate(self.imported_metadata): assert isinstance(imported_metadata, self.ImportedMetadata) if imported_metadata.is_match(csv_path, joiner, self.CSV_JOIN_NAME, self.IPD_JOIN_NAME): new_imported_metadata.append(imported_metadata) found = True break elif (best_match is None and imported_metadata.path == csv_path): best_match = i if found: del self.imported_metadata[i] else: if best_match is not None: imported_metadata = self.imported_metadata[i] del self.imported_metadata[i] else: try: imported_metadata = self.ImportedMetadata(csv_path) except: logger.debug("Failed to load csv file: %s" % csv_path) continue new_imported_metadata.append(imported_metadata) joiner.entities[self.CSV_JOIN_NAME] = \ imported_metadata.get_csv_metadata_keys() joiner.entities[self.IPD_JOIN_NAME] = \ list(ipd_metadata_keys) imported_metadata.set_joiner(joiner, self.CSV_JOIN_NAME, self.IPD_JOIN_NAME) ipd_metadata_keys.update( imported_metadata.get_csv_metadata_keys()) self.imported_metadata = new_imported_metadata
def update_imported_metadata(self): new_imported_metadata = [] ipd_metadata_keys = set(getattr(self, "ipd_metadata_keys", [])) for group in self.extraction_methods: if group.extraction_method == X_MANUAL_EXTRACTION: if group.source == XM_FILE_NAME: regexp = group.file_regexp else: regexp = group.folder_regexp ipd_metadata_keys.update(cpmeas.find_metadata_tokens(regexp.value)) elif group.extraction_method == X_IMPORTED_EXTRACTION: joiner = group.csv_joiner csv_path = group.csv_location.value if not os.path.isfile(csv_path): continue found = False best_match = None for i, imported_metadata in enumerate(self.imported_metadata): assert isinstance(imported_metadata, self.ImportedMetadata) if imported_metadata.is_match(csv_path, joiner, self.CSV_JOIN_NAME, self.IPD_JOIN_NAME): new_imported_metadata.append(imported_metadata) found = True break elif (best_match is None and imported_metadata.path == csv_path): best_match = i if found: del self.imported_metadata[i] else: if best_match is not None: imported_metadata = self.imported_metadata[i] del self.imported_metadata[i] else: try: imported_metadata = self.ImportedMetadata(csv_path) except: logger.debug("Failed to load csv file: %s" % csv_path) continue new_imported_metadata.append(imported_metadata) joiner.entities[self.CSV_JOIN_NAME] = \ imported_metadata.get_csv_metadata_keys() joiner.entities[self.IPD_JOIN_NAME] = \ list(ipd_metadata_keys) imported_metadata.set_joiner(joiner, self.CSV_JOIN_NAME, self.IPD_JOIN_NAME) ipd_metadata_keys.update(imported_metadata.get_csv_metadata_keys()) self.imported_metadata = new_imported_metadata
def validate_module(self, pipeline): '''Validate the module settings pipeline - current pipeline Metadata throws an exception if any of the metadata tags collide with tags that can be automatically extracted. ''' for group in self.extraction_methods: if group.extraction_method == X_MANUAL_EXTRACTION: re_setting = (group.file_regexp if group.source == XM_FILE_NAME else group.folder_regexp) for token in cpmeas.find_metadata_tokens(re_setting.value): if token in cpmeas.RESERVED_METADATA_TAGS: raise cps.ValidationError( 'The metadata tag, "%s", is reserved for use by CellProfiler. Please use some other tag name.' % token, re_setting)
def get_metadata_keys(self): """Return a collection of metadata keys to be associated with files""" keys = set() self.update_imported_metadata() for group in self.extraction_methods: if group.extraction_method == X_MANUAL_EXTRACTION: if group.source == XM_FILE_NAME: regexp = group.file_regexp else: regexp = group.folder_regexp keys.update(cpmeas.find_metadata_tokens(regexp.value)) elif group.extraction_method == X_IMPORTED_EXTRACTION: imported_metadata = self.get_imported_metadata_for_group(group) if imported_metadata is None: logger.warn("Unable to import metadata from %s" % group.csv_location.value) keys.update(imported_metadata.metadata_keys) elif group.extraction_method == X_AUTOMATIC_EXTRACTION: # Assume that automatic extraction will populate T and Z keys.add(cpp.ImagePlaneDetails.MD_T) keys.add(cpp.ImagePlaneDetails.MD_Z) return list(keys)
def get_metadata_keys(self): '''Return a collection of metadata keys to be associated with files''' keys = set() self.update_imported_metadata() for group in self.extraction_methods: if group.extraction_method == X_MANUAL_EXTRACTION: if group.source == XM_FILE_NAME: regexp = group.file_regexp else: regexp = group.folder_regexp keys.update(cpmeas.find_metadata_tokens(regexp.value)) elif group.extraction_method == X_IMPORTED_EXTRACTION: imported_metadata = self.get_imported_metadata_for_group(group) if imported_metadata is None: logger.warn("Unable to import metadata from %s" % group.csv_location.value) keys.update(imported_metadata.metadata_keys) elif group.extraction_method == X_AUTOMATIC_EXTRACTION: # Assume that automatic extraction will populate T and Z keys.add(cpp.ImagePlaneDetails.MD_T) keys.add(cpp.ImagePlaneDetails.MD_Z) return list(keys)