Exemplo n.º 1
0
def extract_features(feature_certificate,
                     image_certificate,
                     model_certificate,
                     feature_config,
                     feature_hash,
                     convolve_func_name):

    image_certdict = cPickle.load(open(image_certificate))
    image_hash = image_certdict['image_hash']
    image_args = image_certdict['args']

    model_certdict = cPickle.load(open(model_certificate))
    model_hash = model_certdict['model_hash']
    model_args = model_certdict['args']

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    f_coll = db['features.files']
    f_fs = gridfs.GridFS(db,'features')
    
    remove_existing(f_coll,f_fs,feature_hash)

    extract_features_core(image_certificate,
                          model_certificate,
                          feature_hash,
                          image_hash,
                          model_hash,
                          convolve_func_name)
     
    createCertificateDict(feature_certificate,{'feature_hash':feature_hash,
                                               'image_hash':image_hash,
                                               'model_hash':model_hash,
                                               'args':feature_config,})
Exemplo n.º 2
0
def extract_features_parallel(feature_certificate,
                              image_certificate,
                              model_certificate,
                              feature_config,
                              feature_hash,
                              convolve_func_name,
                              batch_size):
    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    
    image_certdict = cPickle.load(open(image_certificate))
    image_hash = image_certdict['image_hash']
    image_args = image_certdict['args']

    model_certdict = cPickle.load(open(model_certificate))
    model_hash = model_certdict['model_hash']
    model_args = model_certdict['args']

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    f_coll = db['features.files']
    f_fs = gridfs.GridFS(db,'features')
    
    remove_existing(f_coll,f_fs,feature_hash)
    
    limits = get_feature_batches(image_hash,model_hash,db['images.files'],db['models.files'],batch_size = batch_size)
    
    jobids = []

    if convolve_func_name == 'numpy':
        queueName = 'extraction_cpu.q'
    elif convolve_func_name == 'pyfft':
        queueName = 'extraction_gpu.q'

    opstring = '-l qname=' + queueName + ' -o /home/render -e /home/render'
    for (ind,limit) in enumerate(limits):
        im_from,im_to,m_from,m_to = limit
        jobid = qsub(extract_features_core,[(image_certificate,
                                             model_certificate,
                                             feature_hash,
                                             image_hash,
                                             model_hash,
                                             convolve_func_name),
                                             {'im_skip':im_from,
                                              'im_limit':im_to-im_from,
                                              'm_skip':m_from,
                                              'm_limit':m_to-m_from
                                              }],
                                            opstring=opstring)
        jobids.append(jobid)

    createCertificateDict(feature_certificate,{'feature_hash':feature_hash,
                                               'image_hash':image_hash,
                                               'model_hash':model_hash,
                                               'args':feature_config})

    return {'child_jobs':jobids}
Exemplo n.º 3
0
def train_test_loop(outfile,extract_creates,task_config,feature_config_path,hash):

    feature_config = get_config(feature_config_path)
        
    base_query = SON([('__config_hash__',hash)])
    
    image_params = SON([('image',feature_config['image'])])
    models_params = feature_config['models']

    ntrain = task_config['ntrain']
    ntest = task_config['ntest']
    ntrain_pos = task_config.get('ntrain_pos')
    N = task_config.get('N',10)
    query = task_config['query']  
    base_query.update(reach_in('config',task_config.get('universe',SON([]))))
 
    print('\n')
    print('BASE',base_query)
    print('\n')
    
    conn = pm.Connection(document_class=SON)
    db = conn['v1']
    fs = gridfs.GridFS(db, collection = 'model_performance')
    
    cquery = reach_in('config',query)
    for m in models_params:
        base_query_copy = base_query.copy()
        base_query_copy.update(reach_in('config.model',m))
        splitdata, results = train_test(cquery,'v1','features',ntrain,ntest,ntrain_pos=ntrain_pos,N=N,universe=base_query_copy)
        
        splitpickle = cPickle.dumps(splitdata)
        
        data = SON([('feature_config_path',feature_config_path),
                    ('model',m),
                    ('task',son_escape(task_config)),
                    ('image__aggregate__',son_escape(feature_config['image']))])
        filename = get_filename(data)
        data.update(results)
        data['filename'] = filename
        

        fs.put(splitpickle,**data)
        
    createCertificateDict(outfile,{'task_config':task_config,'feature_config':feature_config,'feature_config_path':feature_config_path})    
Exemplo n.º 4
0
def generate_images_parallel(outfile,im_hash,config_gen):

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    im_coll = db['images.files']
    im_fs = gridfs.GridFS(db,'images')
    
    remove_existing(im_coll,im_fs,im_hash)
    
    X = rendering.config_gen(config_gen)
       
    jobids = []
    for (i,x) in enumerate(X):
        x['image']['generator'] = config_gen['images']['generator'] 
        jobid = qsub(generate_and_insert_single_image,(x,im_hash),opstring='-pe orte 2 -l qname=rendering.q -o /home/render -e /home/render')  
        jobids.append(jobid)
        
    createCertificateDict(outfile,{'image_hash':im_hash,'args':config_gen})

    return {'child_jobs':jobids}
Exemplo n.º 5
0
def extract_and_evaluate(outfile,image_certificate_file,model_certificate_file,cpath,convolve_func_name,task,ext_hash):

    (model_configs, image_config_gen, model_hash, image_hash, task_list, 
    perf_col, split_coll, split_fs, splitperf_coll, splitperf_fs) = prepare_extract_and_evaluate(ext_hash,
                                                image_certificate_file,
                                                model_certificate_file,
                                                task)
    for m in model_configs: 
        print('Evaluating model',m)
        for task in task_list:  
            print('task',task)
            split_results = []
            splits = generate_splits(task,image_hash,'images') 
            for (ind,split) in enumerate(splits):
                put_in_split(split,image_config_gen,m,task,ext_hash,ind,split_fs)
                res = extract_and_evaluate_core(split,m,convolve_func_name,task,None)    
                put_in_split_result(res,image_config_gen,m,task,ext_hash,ind,splitperf_fs)
                split_results.append(res)
            put_in_performance(split_results,image_config_gen,m,model_hash,image_hash,perf_col,task,ext_hash)

        
    createCertificateDict(outfile,{'image_file':image_certificate_file,'models_file':model_certificate_file})
Exemplo n.º 6
0
def generate_images(outfile,im_hash,config_gen):

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    im_coll = db['images.files']
    im_fs = gridfs.GridFS(db,'images')
    
    remove_existing(im_coll,im_fs,im_hash)
    
    X = rendering.config_gen(config_gen)
    
    for (i,x) in enumerate(X):
        if (i/100)*100 == i:
            print(i,x)       
        image_string = rendering.render_image(x['image']) 
        y = SON([('config',x)])
        filename = get_filename(x)
        y['filename'] = filename
        y['__hash__'] = im_hash
        im_fs.put(image_string,**y)
        
    createCertificateDict(outfile,{'image_hash':im_hash,'args':config_gen})
Exemplo n.º 7
0
def extract_and_evaluate_parallel(outfile,image_certificate_file,model_certificate_file,cpath,convolve_func_name,task,ext_hash):
        
    (model_configs, image_config_gen, model_hash, image_hash, task_list,
     perf_col, split_coll, split_fs, splitperf_coll, splitperf_fs) = prepare_extract_and_evaluate(ext_hash,
                                                                                                  image_certificate_file,
                                                                                                  model_certificate_file,
                                                                                                  task)

    
    jobids = []
    if convolve_func_name == 'numpy':
        opstring = '-l qname=extraction_cpu.q'
    elif convolve_func_name == 'pyfft':
        opstring = '-l qname=extraction_gpu.q -o /home/render -e /home/render'
        
    for m in model_configs: 
        print('Evaluating model',m)
        for task in task_list:
            classifier_kwargs = task.get('classifier_kwargs',{})    
            print('task',task)
            splits = generate_splits(task,image_hash,'images') 
            for (ind,split) in enumerate(splits):
                put_in_split(split,image_config_gen,m,task,ext_hash,ind,split_fs)  
                jobid = qsub(extract_and_evaluate_parallel_core,(image_config_gen,m,task,ext_hash,ind,convolve_func_name),opstring=opstring)
                jobids.append(jobid)

    print(jobids)
    statuses = wait_and_get_statuses(jobids)
    
    for m in model_configs: 
        print('Evaluating model',m)
        for task in task_list:
            split_results = get_most_recent_files(splitperf_coll,{'__hash__':ext_hash,'task':son_escape(task),'model':m['config']['model'],'images':son_escape(image_config_gen['images'])})
            put_in_performance(split_results,image_config_gen,m,model_hash,image_hash,perf_col,task,ext_hash)

    createCertificateDict(outfile,{'image_file':image_certificate_file,'models_file':model_certificate_file})
Exemplo n.º 8
0
def generate_models(outfile,m_hash,config_gen):

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    m_coll = db['models.files']
    m_fs = gridfs.GridFS(db,'models')
    
    remove_existing(m_coll,m_fs,m_hash)
    
    M = model_config_generator(config_gen)       
    
    for (i,m) in enumerate(M):
        filterbank = filter_generation.get_filterbank(m['model']) 
        filterbank_string = cPickle.dumps(filterbank)
        if (i/100)*100 == i:
            print(i,m) 
        
        y = SON([('config',m)])
        filename = get_filename(m)
        y['filename'] = filename
        y['__hash__'] = m_hash
        m_fs.put(filterbank_string,**y)
        
    createCertificateDict(outfile,{'model_hash':m_hash,'args':config_gen})
Exemplo n.º 9
0
def greedy_optimization(outfile,task,image_certificate_file,initial_model,convolve_func,rep_limit, modifier_args,modifier):

    conn = pm.Connection(document_class=bson.SON)
    db = conn['v1']
    
    opt_fs = gridfs.GridFS(db,'optimized_performance')
    
    image_coll = db['raw_images.files']
    image_fs = gridfs.GridFS(db,'raw_images')
    
    image_certdict = cPickle.load(open(image_certificate_file))
    print('using image certificate', image_certificate_file)
    
    image_hash = image_certdict['run_hash']
    image_args = image_certdict['out_args']

    if convolve_func == v1f.v1like_filter_pyfft:
        v1_pyfft.setup_pyfft()
    
  
    filterbanks = []
    perfs = []
    model_configs = []
    center_config = initial_model
    
    i = 0
    improving = True
    
    
    while ((i < rep_limit) or rep_limit is None):
        i += 1
        print('Round', i)
        next_configs = [m for m in get_consistent_deltas(center_config,modifier) if m not in model_configs]

        if next_configs:
            next_results = [get_performance(task,image_hash,image_fs,m,convolve_func) for m in next_configs]
            next_perfs = [x[0] for x in next_results]
            next_filterbanks = [x[1] for x in next_results]
            next_perf_ac_max = np.array([x['test_accuracy'] for x in next_perfs]).max()
            perf_ac_max = max([x['test_accuracy'] for x in perfs]) if perfs else 0
            if next_perf_ac_max > perf_ac_max:
                next_perf_ac_argmax = np.array([x['test_accuracy'] for x in next_perfs]).argmax()
                center_config = next_configs[next_perf_ac_argmax]  
                print('\n\n')
                print('new best performance is', next_perf_ac_max, 'from model', center_config)
                print('\n\n')
                perfs.extend(next_perfs)  
                model_configs.extend(next_configs)
                filterbanks.extend(next_filterbanks)
            else:
                print('Breaking because no further optimization could be done.  Best existing performance was', perf_ac_max, 'while best next performance was', next_perf_ac_max)
                break
            
        else:
            print('Breaking because no next configs')
            break
        

    perfargmax = np.array([p['test_accuracy'] for p in perfs]).argmax()
    best_model = model_configs[perfargmax]
    best_performance = perfs[perfargmax]
        
    out_record = SON([('initial_model',initial_model),
                       ('task',son_escape(task)),
                       ('images',son_escape(image_args)),
                       ('images_hash',image_hash),
                       ('modifier_args',son_escape(modifier_args)),
                       ('modifier',modifier.__class__.__module__ + '.' + modifier.__class__.__name__)
                     ])   
    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record.update(SON([('performances',perfs)]))
    out_record.update(SON([('best_model',best_model)]))
    out_record.update(SON([('best_performance',best_performance)]))
    out_record.update(SON([('num_steps',len(model_configs))]))
    out_record.update(SON([('models',model_configs)]))
    outdata = cPickle.dumps(filterbanks)
        
    opt_fs.put(outdata,**out_record)
     
    if convolve_func == v1f.v1like_filter_pyfft:
        v1_pyfft.cleanup_pyfft() 
      
    createCertificateDict(outfile,{'image_file':image_certificate_file})
Exemplo n.º 10
0
def evaluate(outfile,feature_certificate,cpath,task,ext_hash):

    conn = pm.Connection(document_class=bson.SON)
    db = conn[DB_NAME]
    
    perf_fs = gridfs.GridFS(db,'performance')
    perf_coll = db['performance.files']
    
    remove_existing(perf_coll,perf_fs,ext_hash)

    feature_certdict = cPickle.load(open(feature_certificate))
    feature_hash = feature_certdict['feature_hash']
    image_hash = feature_certdict['image_hash']
    model_hash = feature_certdict['model_hash']
    image_config_gen = feature_certdict['args']['images']
    model_col = db['models.files']
    feature_fs = gridfs.GridFS(db,'features')
    feature_col = db['features.files']
    
    stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy']    
       
    if isinstance(task,list):
        task_list = task
    else:
        task_list = [task]
    
    model_configs = get_most_recent_files(model_col,{'__hash__':model_hash})
    
    for m in model_configs:
        print('Evaluating model',m) 
        for task in task_list:
            task['universe'] = task.get('universe',SON([]))
            task['universe']['model'] = m['config']['model']
            print('task', task)
            classifier_kwargs = task.get('classifier_kwargs',{})    
            split_results = []
            splits = generate_splits(task,feature_hash,'features') 
            for (ind,split) in enumerate(splits):
                print ('split', ind)
                train_data = split['train_data']
                test_data = split['test_data']
                
                train_filenames = [t['filename'] for t in train_data]
                test_filenames = [t['filename'] for t in test_data]
                assert set(train_filenames).intersection(test_filenames) == set([])
                
                print('train feature extraction ...')
                train_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in train_data])
                print('test feature extraction ...')
                test_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in test_data])
                train_labels = split['train_labels']
                test_labels = split['test_labels']
    
                print('classifier ...')
                res = svm.classify(train_features,train_labels,test_features,test_labels,classifier_kwargs)
                print('Split test accuracy', res['test_accuracy'])
                split_results.append(res)
        
            model_results = SON([])
            for stat in STATS:
                if stat in split_results[0] and split_results[0][stat] != None:
                    model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean()           
    
            out_record = SON([('model',m['config']['model']),
                              ('model_hash',model_hash), 
                              ('model_filename',m['filename']), 
                              ('images',son_escape(image_config_gen)),
                              ('image_hash',image_hash),
                              ('task',son_escape(task)),
                         ])
                                             
            filename = get_filename(out_record)
            out_record['filename'] = filename
            out_record['config_path'] = cpath
            out_record['__hash__'] = ext_hash
            out_record.update(model_results)
            print('dump out ...')
            out_data = cPickle.dumps(SON([('split_results',split_results),('splits',splits)]))
            
            perf_fs.put(out_data,**out_record)

    createCertificateDict(outfile,{'feature_file':feature_certificate})