def run_cv_batch(cv_configs): for job_settings in cv_configs: results_file = job_settings['results'] data_file = job_settings['data'] if os.path.exists(results_file): print 'Warning: results file %s already exists; aborting' % results_file continue if Condorizable.is_locked(results_file): print 'WARNING: Results file %s is locked; another job may be writing to this file' % results_file continue if not os.path.exists(data_file): print 'WARNING: arff file %s does not exist; aborting' % data_file continue if Condorizable.is_locked(data_file): print 'WARNING: data file %s is locked; another job may be writing to this file' % data_file continue CrossValidationTask(kw=job_settings)
class MakeGistCorpusTask(Condorizable): binary = Condorizable.path_to_script(__file__) def check_args(self, argv): parser = ArgumentParser() parser.add_argument('file_list', type=str, help='File containing list of images to process') parser.add_argument('dest_corpus', type=str, help='Path to write GIST corpus') parser.add_argument('--labeler', type=str, help='Labeler to apply') parser.add_argument('--color', action='store_true', help='Color GIST?') options = parser.parse_args(argv[1:]) if options.labeler is None: log.warning('no labeler provided') elif options.labeler not in labelers.registry: labeler_names = ', '.join(sorted(labelers.registry.keys())) parser.error('Invalid labeler "%s"; available options are %s' % (options.labeler, labeler_names)) if not os.path.exists(options.file_list): parser.error('Input file %s does not exist!' % options.file_list) self.add_output_file(options.dest_corpus) return options def run(self, options): labeler = None if options.labeler is None else labelers.registry[ options.labeler] # Wait to instantiate the corpus writer until we know the dimensionality of the descriptors we'll be writing writer = None log.info('Writing SAM corpus to %s' % options.dest_corpus) filenames = open(options.file_list).readlines() for i, filename in enumerate(filenames): filename = filename.strip() log.info('Processing image %d/%d' % (i + 1, len(filenames))) descriptor = color_gist( filename) if options.color else grayscale_gist(filename) if writer is None: dim = descriptor.size writer = CorpusWriter(options.dest_corpus, data_series='sam', dim=dim) normalized_descriptor = l2_normalize(descriptor) doc_label = labeler(filename) if labeler else None writer.write_doc(ascolvector(normalized_descriptor), name=filename, label=doc_label) writer.close()
class MakeGistArffTask(Condorizable): binary = Condorizable.path_to_script(__file__) def check_args(self, argv): parser = ArgumentParser() parser.add_argument('file_list', type=str, help='File containing list of images to process') parser.add_argument('dest', type=str, help='Destination ARFF file') parser.add_argument('--labeler', type=str, required=True, choices=labelers.registry.keys(), help='Labeler to apply') parser.add_argument('--color', action='store_true', help='Color GIST?') parser.add_argument('--normalize', action='store_true', help='L2 normalize GIST data?') options = parser.parse_args(argv[1:]) if not os.path.exists(options.file_list): parser.error('Input file %s does not exist!' % options.file_list) self.add_output_file(options.dest) return options def run(self, options): labeler = labelers.registry[options.labeler] # Wait to instantiate the corpus writer until we know the dimensionality of the descriptors we'll be writing filenames = open(options.file_list).readlines() labels = [labeler(each) for each in filenames] class_list = sorted(set(labels)) writer = ArffWriter(options.dest, class_list=class_list) log.info('Writing GIST data to %s' % options.dest) for i, (filename, label) in enumerate(izip(filenames, labels)): filename = filename.strip() log.info('Processing image %d/%d' % (i + 1, len(filenames))) descriptor = color_gist( filename) if options.color else grayscale_gist(filename) if options.normalize: descriptor = l2_normalize(descriptor) writer.write_example(descriptor, label) writer.close()
def run_sam_batch(vem_configs): """ Runs SAM on every experimental configuration defined by 'config'. Jobs that have already been run or are current running (i.e. for which the model file already exists, or for which a lock file exists) will be skipped. """ for job_settings in vem_configs: model_file = job_settings['model'] if os.path.exists(model_file): log.warning('Model %s already exists; skipping' % os.path.basename(model_file)) continue if Condorizable.is_locked(model_file): log.warning('Model %s is locked; check that another job isn''t writing to this path' %\ os.path.basename(model_file)) continue VEMTask(kw=job_settings)