def separate_validation_set(data_dir, num_per_class=384, ratio=1): '''For comparing impact of different imbalance ratios: this script extracts a perfectly balanced validation set from a sequence of nets with strict training set inclusion. ''' if os.path.exists(ojoin(data_dir,'validation')): shutil.rmtree(ojoin(data_dir,'validation')) nets = [net for net in os.listdir(data_dir) if net.startswith('net_')] os.mkdir(ojoin(data_dir,'validation')) min_ratio_net_dir = ojoin(data_dir, 'net_'+str(len(nets)-1)) print 'min_ratio_net_dir: %s'%('net_'+str(len(nets)-1)) removed = extract_validation_set(min_ratio_net_dir, num_per_class, ratio) for net in nets[:-1]: remove_imgs(ojoin(data_dir,net), removed) print "Done. Now on each graphic machine, you need to:" print " 1) run batching on validation here on graphic02. " print " 2) scp the validation-batch dir and a net-raw dir from graphic02" print " 3) run batching on net dir" print " 4) copy validation-batch dir to each remote net-batch dir" print " 5) copy validation batches to net dir, but changing batch numbers such that they follow from the max batch in net dir. NOTE: you have a script for this :) merge_validation_batches()"
def imbalance_experiment(data_dir, min_ratio, max_ratio, num_nets): ''' given a data directory containing subdirs to each class, a range of imbalance ratios to cover, and a number of nets to train, creates num_nets directories, each holding a subdir for each class, with max_ratio as imbalance for net_0, ..., min_ratio as imbalance for net_num_nets. ''' if min_ratio < 1 or max_ratio < 1: print 'Error: ratios must be >=1.' exit # using cool log calculus, compute la raison de la suite # geometrique donnant les ratios a obtenir pour chaque net. step = compute_step(min_ratio, max_ratio, num_nets) # move contents of data_dir to a new subdir, 'all' if os.path.isdir(ojoin(data_dir,'all')): shutil.rmtree(ojoin(data_dir,'all')) all_names = os.listdir(data_dir) for name in all_names: shutil.move(ojoin(data_dir,name), ojoin(data_dir,'all',name)) # recursively make subdirs for each net, preserving strict set # inclusion from net[i] to net[i+1] nets = ['all'] + ['net_'+str(i) for i in range(num_nets)] random_delete_recursive(data_dir, step, nets, ratio=2, i=0) print 'NOTE: net_0 has highest imbalance ratio.'
def random_delete_aux(data_dir, ratio): ''' randomly deletes as few images from outnumbering class dirs as possible such that #biggest/#smallest == ratio. ''' data_dir = os.path.abspath(data_dir) dump = raw_input('Do you want a json dump in %s of which files were randomly deleted?(Y/any) '%(data_dir)) # D is for dict, d is for directory D = {} os.chdir(data_dir) dirs = [d for d in os.listdir(data_dir) if os.path.isdir(ojoin(data_dir,d))] print 'the directories are: %s'%(dirs) for d in dirs: D[d] = {} D[d]['total'] = len(os.listdir(ojoin(data_dir,d))) dirs = [(d,D[d]['total']) for d in D.keys()] dirs = sorted(dirs, key = lambda x: x[1]) print '%s is smallest class with %i images'%(dirs[0][0],dirs[0][1]) for d in D.keys(): D[d]['remove'] = max(0,int(D[d]['total']-(ratio*dirs[0][1]))) print '%s has %i images so %i will be randomly removed'%(d, D[d]['total'], D[d]['remove']) if D[d]['remove'] > 0 : D = random_delete_aux2(data_dir,d,D) if dump == 'Y': json.dump(D, open(data_dir+'/random_remove_dict.txt','w')) return D
def random_delete_aux2(data_dir,d,D,delete_hard=False): D[d]['deleted'] = random.sample(os.listdir(ojoin(data_dir,d)),D[d]['remove']) print 'successfully condemned images from %s'%(d) back = os.getcwd() os.chdir(ojoin(data_dir,d)) for link in D[d]['deleted']: os.remove(link) os.chdir(back) return D
def random_delete_recursive(data_dir, step, nets, ratio, i): # os.mkdir(ojoin(data_dir,nets[i+1])) if os.path.isdir(ojoin(data_dir,nets[i+1])): shutil.rmtree(ojoin(data_dir,nets[i+1])) shutil.copytree(ojoin(data_dir, nets[i]), ojoin(data_dir, nets[i+1]), symlinks=True) random_delete_aux(ojoin(data_dir, nets[i+1]), ratio) if i+2 in range(len(nets)): random_delete_recursive(data_dir, step, nets, float(ratio)/step, i+1)
def convert_bboxes(self, p, img_id): im = Image.open(ojoin(p, '{}.jpg'.format(img_id))) bboxes = self.read_csv(ojoin(p, '{}.csv'.format(img_id))) new_bbox = [] for bbox in bboxes: bbox = self.convert(im.size, bbox) # Add a zero for the class. Only 1 here new_bbox.append([0] + bbox) new_bbox = pd.DataFrame(new_bbox) # Dump the file to a csv file with a space separator fname = ojoin(p, 'labels', '{}.txt'.format(img_id)) new_bbox.to_csv(fname, index=None, header=None, sep=" ")
def extract_validation_set(net_dir, num_per_class, ratio): '''randomly move num_per_class images out of each dir, and into a new sidealong dir called validation. ''' classes = os.listdir(net_dir) print 'going to extract %i images from: %s'%(num_per_class,classes) d = {} for c in classes: os.mkdir(ojoin(net_dir, '..', 'validation',c)) d[c] = random.sample(os.listdir(ojoin(net_dir,c)), num_per_class) for fname in d[c]: shutil.move(ojoin(net_dir, c, fname), ojoin(net_dir, '..', 'validation',c)) return d
def dump_to_files(Keep, dump, data_info): if os.path.exists(data_info): rmtree(data_info) os.mkdir(data_info) dump_fnames = ['train.txt','val.txt','test.txt'] for i in xrange(3): dfile = open(ojoin(data_info,dump_fnames[i]),'w') dfile.writelines(["%s %i\n" % (f,num) for (f,num) in dump[i]]) dfile.close() # write to read file how to interpret values as classes read_file = open(ojoin(data_info,'read.txt'), 'w') read_file.writelines(["%i %s\n" % (num,label) for (num, label) in enumerate(Keep.keys())]) read_file.close()
def get_train_file(model_dir): train_file = '' for fname in os.listdir(model_dir): if fname.endswith('train.prototxt'): return open(ojoin(model_dir,fname),'r') print 'no train prototxt found' sys.exit()
def main(): config_file = ojoin(CFG_PATH, CFG_FILE) if SHOW: subs = get_avail_subs(config_file) print("Following subscriptions are availabe in your config:") print("\n".join(subs)) elif ADD_SUB: print("New configuration entry will be created.") print("Please provide appropriate information:") sub_name = input("Subscription name: ") sub_id = input("Subscription ID: ") tenant_id = input("Tenant ID: ") client_id = input("Client ID: ") client_secret = getpass.getpass("Client Secret: ") new_cfg = add_sub_cfg(sub_name, tenant_id, sub_id, client_id, client_secret, config_file) with open(config_file, 'w') as fh: json.dump(new_cfg, fh, indent=2) elif DEL_SUB: new_cfg = del_sub_cfg(DEL_SUB, config_file) with open(config_file, 'w') as fh: json.dump(new_cfg, fh, indent=2) print(f"{DEL_SUB} deleted.") else: variables = get_sub_secrets(SUBS, config_file) if variables: print(exp_templ.format(**variables)) else: print(f"Subscription {SUBS} does not exist in config file.") sys.exit(1)
class Friends(BaseHandler): '''Handles the list of friends''' url = ojoin(Main.url, "friends") @web.authenticated def get(self): self.render('friendlist.html')
def test_wellintersections_tvdrange_wfilter(loadwells1): """Find well crossing using coarser sampling to Fence, with wfilter settings. """ wfilter = { "parallel": { "xtol": 4.0, "ytol": 4.0, "ztol": 2.0, "itol": 10, "atol": 5.0 } } mywell_list = loadwells1 mywells = Wells() mywells.wells = mywell_list print("Limit TVD and downsample...") mywells.limit_tvd(1300, 1400) mywells.downsample(interval=6) print("Limit TVD and downsample...DONE") dfr = mywells.wellintersections(wfilter=wfilter) dfr.to_csv(ojoin(td, "wells_crossings_filter.csv")) print(dfr)
def test_wellintersections_tvdrange_wfilter(loadwells1): """Find well crossing using coarser sampling to Fence, with wfilter settings. """ wfilter = { 'parallel': { 'xtol': 4.0, 'ytol': 4.0, 'ztol': 2.0, 'itol': 10, 'atol': 5.0 } } mywell_list = loadwells1 mywells = Wells() mywells.wells = mywell_list print('Limit TVD and downsample...') mywells.limit_tvd(1300, 1400) mywells.downsample(interval=6) print('Limit TVD and downsample...DONE') dfr = mywells.wellintersections(wfilter=wfilter) dfr.to_csv(ojoin(td, 'wells_crossings_filter.csv')) print(dfr)
class DeleteTask(BaseHandler): url = ojoin(Task.url, "delete") @web.authenticated def get(self, task_id): self.redirect(Tasks) @web.authenticated @rollback_on_failure def post(self, task_id): if self.get_argument('delete', 'false') == 'true': task = self.session.query( orm.Task).filter(orm.Task.task_id == task_id).one() if len(task.users) > 1 and self.current_user in task.users: task.users.remove(self.current_user) self.current_user.notify( 'message', "You have been removed from the task '{.name}'".format( task)) elif len(task.users) == 1 and self.current_user in task.users: self.session.delete(task) self.current_user.notify( 'message', "The task '{.name}' has been deleted.".format(task)) self.session.commit() else: print("Didn't get the expected argument delete=true. Hacking?") self.redirect(Tasks)
class Logout(BaseHandler): url = ojoin(Main.url, 'logout') @web.authenticated def get(self): self.clear_cookie('user') self.redirect(Login)
class ShareTask(BaseHandler): r'''Handles sharing tasks''' url = ojoin(Task.url, "share") @web.authenticated @rollback_on_failure def post(self, task_id): try: task = self.session.query( orm.Task).filter_by(task_id=task_id).one() if task not in self.current_user.tasks: raise Exception('User does not own this task') email = self.get_argument('friend', None) if email is None: raise Exception('Email argument not given') friend = self.session.query(orm.User).filter_by(email=email).one() if friend not in self.current_user.friends: raise Exception( 'Cannot share a task with someone who is not a friend.') friend.share_task(task=task, sharer=self.current_user) self.session.commit() except Exception as e: print(str(e)) finally: self.redirect(Tasks)
def rename_classes(to_dir, labels): ''' once move_to_dirs is done, may wish to rename classes (eg so they can fit in preds). ''' more = 'Y' while more == 'Y': if raw_input('Rename (another) class? (Y/N) ') == 'Y': rename = [-1] while not all([idx in range(len(labels)) for idx in rename]): for elem in enumerate(labels): print elem rename = [int(elem) for elem in raw_input("Name a class number from above: ").split()] new_name = raw_input('Rename to: ') os.rename(ojoin(to_dir,labels[rename[0]]), ojoin(to_dir,new_name)) labels = update_labels(labels, rename, new_name) else: more = 'N' return labels
class NewTask(BaseHandler): r'''Allows creating a new task''' url = ojoin(Tasks.url, 'new') def get(self): self.render('newtask.html')
class Tasks(BaseHandler): '''Allows creation of tasks''' url = ojoin(Main.url, "tasks") @web.authenticated def get(self): '''Renders the task list''' self.render('tasklist.html') @web.authenticated @rollback_on_failure def post(self): '''Adds a task to the current user''' t = orm.Task( self.get_argument('taskname'), int(self.get_argument('length')), self.get_argument('firstdue'), self.get_argument('allowearly', 'off') == 'on', int(self.get_argument('points', 100)), int(self.get_argument('decay_length', self.get_argument('length'))), set(self.get_argument('tags', '').replace(',', ' ').split()), self.get_argument('notes', None)) t.user_email = self.current_user.email self.current_user.tasks.append(t) self.session.commit() self.redirect(Tasks)
def test_quickplot_wells(loadwells1): """Import wells from file to Wells and quick plot.""" mywell_list = loadwells1 mywells = Wells() mywells.wells = mywell_list mywells.quickplot(filename=ojoin(td, "quickwells.png"))
class Notifications(BaseHandler): '''Displays all notifications for a user''' url = ojoin(Main.url, 'notifications') @web.authenticated def get(self): self.render('notifications.html')
def write_content_to_deploy_file(model_dir, content): model_name = model_dir.split('/')[-1] model_name = model_name.split('-fine')[0] fname = ojoin(model_dir,model_name+'_deploy.prototxt') # print "fname: %s"%(fname) deploy_file = open(fname,'w') deploy_file.writelines(content) deploy_file.close()
def write_content_to_deploy_file(model_dir, content): model_name = model_dir.split('/')[-1] model_name = model_name.split('-fine')[0] fname = ojoin(model_dir, model_name + '_deploy.prototxt') # print "fname: %s"%(fname) deploy_file = open(fname, 'w') deploy_file.writelines(content) deploy_file.close()
def symlink_dataset(Keep, from_dir, to_dir): dump = [] part = [0, 0.8, 0.87, 1] # partition into train val test if os.path.isdir(to_dir): rmtree(to_dir) os.mkdir(to_dir) for i in xrange(3): dump.append([]) for [num,key] in enumerate(Keep.keys()): l = len(Keep[key]) dump[i] += [[f,num] for f in Keep[key][int(part[i]*l):int(part[i+1]*l)]] random.shuffle(dump[i]) # cross_val = [np.array(d, dtype=[('x',object),('y',int)]) # for d in dump] for d,dname in zip(dump,['train','val','test']): data_dst_dir = ojoin(to_dir,dname) os.mkdir(data_dst_dir) for i in xrange(len(d)): if os.path.islink(ojoin(data_dst_dir,d[i][0])): old = d[i][0] while os.path.islink(ojoin(data_dst_dir,d[i][0])): print '%s symlinked already, creating duplicate'%(d[i][0]) d[i][0] = d[i][0].split('.')[0]+'_.jpg' os.symlink(ojoin(from_dir,old), ojoin(data_dst_dir,d[i][0])) else: os.symlink(ojoin(from_dir,d[i][0]), ojoin(data_dst_dir,d[i][0])) return dump
def get_train_file(model_dir): train_file = '' for fname in os.listdir(model_dir): if fname.endswith('train.prototxt'): return open(ojoin(model_dir,fname),'r') if not os.path.isdir(model_dir): print "error:", model_dir, "does not exist" else: print 'no train prototxt found in', model_dir sys.exit()
def __init__(self, feature_path, split, batch_size=None, seed=None): self.feature_path = feature_path self.split = split self.hf = ojoin(feature_path, 'feature.h5') config = json.load( open(ojoin(feature_path, 'feature_config.json'), 'r')) self.__dict__.update(config) if batch_size is not None: self.batch_size = batch_size self.nb_sample = self.get_nsample(split) if self.batch_size > self.nb_sample: self.batch_size = self.nb_sample print('Set batch_size to {}'.format(self.nb_sample)) # DONT CHANGE shuffle - reason in the docstring super(H5FeatureIterator, self).__init__(self.nb_sample, batch_size=self.batch_size, shuffle=False, seed=seed)
def get_train_file(model_dir): train_file = '' for fname in os.listdir(model_dir): if fname.endswith('train.prototxt'): return open(ojoin(model_dir, fname), 'r') if not os.path.isdir(model_dir): print "error:", model_dir, "does not exist" else: print 'no train prototxt found in', model_dir sys.exit()
def dump_to_files(Keep, data_info, task, data_dir): ''' This function "trusts" you. It will overwrite data lookup files. ''' dump = [] part = [0, 0.82, 0.89, 1] # partition into train val test dump_fnames = ['train.txt','val.txt','test.txt'] for i in xrange(3): dump.append([]) for [key,num] in [('Default',0),(task,1)]: l = len(Keep[key]) dump[i] += [[f,num] for f in Keep[key][int(part[i]*l):int(part[i+1]*l)]] # this is the important shuffle actually random.shuffle(dump[i]) if os.path.isfile(ojoin(data_info,dump_fnames[i])): print "WARNING: overwriting", ojoin(data_info,dump_fnames[i]) with open(ojoin(data_info,dump_fnames[i]),'w') as dfile: dfile.writelines(["%s %i\n" % (ojoin(data_dir,f),num) for (f,num) in dump[i]])
def merge_validation_batches(data_dir): ''' assuming validation-batches dir is in the net-batches dir, moves contents of former into latter, but changing names of batches so that batch numbers follow sequentially and validation batch nums are highest. ''' names = os.listdir(data_dir) train_batches = [name for name in names if name.startswith('data_batch_')] names = os.listdir(ojoin(data_dir, 'validation')) valid_batches = [name for name in names if name.startswith('data_batch_')] maxx = len(train_batches) for (i,batch) in enumerate(valid_batches): shutil.move(ojoin(data_dir,'validation',batch), ojoin(data_dir,'data_batch_'+str(maxx+i+1))) shutil.rmtree(ojoin(data_dir,'validation')) print 'validation batches start at data_batch_%i'%(maxx+1) print 'WARNING: batches.meta for validation thrown away. this might harm validation performance because the mean being subtracted will not be the mean over the validation set but over the training set. apart from that, don\'t think there\'s a problem. '
def run_model(data_path, out_path): rules, hard_rules, _, atoms = ground(data_path) results = map_inference(rules, hard_rules) reviews = atoms['review'] with open(ojoin(out_path, 'POSITIVEREVIEW.txt'), 'w') as f: for (review, paper), (vid, _) in reviews.items(): print("'%s'\t'%s'\t%f" % (review, paper, results[vid]), file=f) acceptable = atoms['acceptable'] with open(ojoin(out_path, 'ACCEPTABLE.txt'), 'w') as f: for paper, (vid, _) in acceptable.items(): print("'%s'\t%f" % (paper, results[vid]), file=f) presents = atoms['presents'] with open(ojoin(out_path, 'PRESENTS.txt'), 'w') as f: for author, (vid, _) in presents.items(): print("'%s'\t%f" % (author, results[vid]), file=f)
def test_wellintersections(loadwells1): """Find well crossing""" mywell_list = loadwells1 mywells = Wells() mywells.wells = mywell_list dfr = mywells.wellintersections() logger.info(dfr) dfr.to_csv(ojoin(td, "wells_crossings.csv"))
def dump_to_files(Keep, data_info, task, data_dir): ''' This function "trusts" you. It will overwrite data lookup files. ''' dump = [] part = [0, 0.82, 0.89, 1] # partition into train val test dump_fnames = ['train.txt', 'val.txt', 'test.txt'] for i in xrange(3): dump.append([]) for [key, num] in [('Default', 0), (task, 1)]: l = len(Keep[key]) dump[i] += [[f, num] for f in Keep[key][int(part[i] * l):int(part[i + 1] * l)]] # this is the important shuffle actually random.shuffle(dump[i]) if os.path.isfile(ojoin(data_info, dump_fnames[i])): print "WARNING: overwriting", ojoin(data_info, dump_fnames[i]) with open(ojoin(data_info, dump_fnames[i]), 'w') as dfile: dfile.writelines([ "%s %i\n" % (ojoin(data_dir, f), num) for (f, num) in dump[i] ])
class EditTasks(BaseHandler): r'''Lists tasks to be edited''' url = ojoin(Tasks.url, "edit") @web.authenticated @rollback_on_failure def get(self): '''Shows the task edit selection screen''' try: self.render('edittasks.html') except Exception as e: print(str(e))
def create_annotations(self): ''' Create the annotation for yolo ''' for split in ['train', 'validation', 'test']: print(split) p = ojoin(self.data_folder, split) if not os.path.isdir(ojoin(p, 'labels')): os.mkdir(ojoin(p, 'labels')) img_ids = [ os.path.splitext(f)[0] for f in os.listdir(p) if f.endswith('.jpg') ] path_imgs = [] for img_id in tqdm(img_ids): path_imgs.append(ojoin(p, '{}.jpg'.format(img_id))) self.convert_bboxes(p, img_id) data = pd.DataFrame(path_imgs) data.to_csv(ojoin(self.data_folder, '{}.txt'.format(split)), index=None)
def trans_dir_xlsx(in_file_dir): from os import listdir from os.path import join as ojoin file_pathli=[] for f in listdir(in_file_dir): file_pathli.append(ojoin(in_file_dir,f)) from threading import Thread thread_list=[] for f in file_pathli: t=Thread(target=trans_to_xlsx,args=(f,f)) for t in thread_list: t.start() for t in thread_list: t.join() pass
def get_label_dict_knowing(data_dir, task, pos_class): ''' get_label_dict() knowing exactly which flags to look for and how to group them into classes. task is the name of what we're learning to detect, pos_class is a list of the actual flag names to look for. ''' d = {'Default': [], task: []} print 'generating specific dict of class:files from %s...'%(data_dir) for filename in os.listdir(data_dir): if not filename.endswith('.dat'): continue with open(ojoin(data_dir, filename)) as f: content = [line.strip() for line in f.readlines()] if any([label==line for (label,line) in itertools.product(pos_class,content)]): d[task].append(filename.split('.')[0]+'.jpg') else: d['Default'].append(filename.split('.')[0]+'.jpg') return d
def get_label_dict_knowing(data_dir, task, pos_class): ''' get_label_dict() knowing exactly which flags to look for and how to group them into classes. task is the name of what we're learning to detect, pos_class is a list of the actual flag names to look for. ''' d = {'Default': [], task: []} print 'generating specific dict of class:files from %s...' % (data_dir) for filename in os.listdir(data_dir): if not filename.endswith('.dat'): continue with open(ojoin(data_dir, filename)) as f: content = [line.strip() for line in f.readlines()] if any([ label == line for (label, line) in itertools.product(pos_class, content) ]): d[task].append(filename.split('.')[0] + '.jpg') else: d['Default'].append(filename.split('.')[0] + '.jpg') return d
class Completion(BaseHandler): url = ojoin(Task.url, "completions", "({})".format(DATE_REGEX)) @web.authenticated @rollback_on_failure def post(self, task_id, completed_on): completion_date = parsedate(completed_on) task = self.session.query( orm.Task).filter(orm.Task.task_id == task_id).one() if task.last_completed is None or completion_date > task.last_completed: task.complete(self.current_user, parsedate(completed_on)) self.session.commit() else: self.current_user.notify( 'error', "You already completed '{}' on {}".format( task.name, date_str(completion_date)), task.task_id) self.session.commit() self.redirect(Main)
class Invite(BaseHandler): '''Handles an invitation to become friends''' url = ojoin(Main.url, "invite") @web.authenticated @rollback_on_failure def post(self): email = self.get_argument('email') if email is None: redirect(Friends) return potential_friend = self.session.query( orm.User).filter_by(email=email).one() if potential_friend == self.current_user: self.current_user.notify( 'error', 'Forever Alone: you tried to befriend yourself') else: potential_friend.befriend(self.current_user) self.session.commit() self.redirect(Friends)
class EditTask(BaseHandler): r'''Handles editing a task''' url = ojoin(Task.url, "edit") @web.authenticated @rollback_on_failure def get(self, task_id): '''Shows the form for editing a particular task''' try: task = next(task for task in self.current_user.tasks if str(task.task_id) == task_id) except Exception as e: self.set_status(404) else: try: self.render('edittask.html', task=task) except Exception as e: print(str(e)) @web.authenticated @rollback_on_failure def post(self, task_id): '''Actually updates the task with the edits''' try: task = self.session.query( orm.Task).filter(orm.Task.task_id == task_id).one() except Exception as e: self.set_status(404) task.name = self.get_argument('taskname', task.name) task.length = timedelta( days=int(self.get_argument('length', task.length))) task.allow_early = self.get_argument( 'allowearly', 'on' if task.allow_early else 'off') == 'on' task.points = int(self.get_argument('points', task.points)) task.tags = self.get_argument('tags', ', '.join(task.tags)).split(',') notes = self.get_argument('notes', task.notes) self.current_user.notify( 'message', 'The task {.name} has been updated'.format(task)) self.session.commit() self.redirect(Tasks)
def compare(cli_out, py_out): print('presents') cli_present = read_present(ojoin(cli_out, 'PRESENTS.txt')) py_present = read_present(ojoin(py_out, 'PRESENTS.txt')) compare_dicts(cli_present, py_present) print('acceptable') cli_accept = read_accept(ojoin(cli_out, 'ACCEPTABLE.txt')) py_accept = read_accept(ojoin(py_out, 'ACCEPTABLE.txt')) compare_dicts(cli_accept, py_accept) print('positiveReview') cli_rev = read_review(ojoin(cli_out, 'POSITIVEREVIEW.txt')) py_rev = read_review(ojoin(py_out, 'POSITIVEREVIEW.txt')) compare_dicts(cli_rev, py_rev)
class Notification(BaseHandler): url = ojoin(Notifications.url, "({})".format(UUID_REGEX)) @web.authenticated @rollback_on_failure def post(self, notification_id): note = self.session.query(orm.Notification)\ .filter_by(notification_id=notification_id).one() if note not in self.current_user.notifications: pass # will just redirect elif self.get_argument('delete', None) == 'true': self.current_user.notifications.remove(note) self.session.commit() elif note.noti_type == 'befriend' and self.get_argument( 'accept', 'false') == 'true': friend = self.session.query( orm.User).filter_by(email=note.sender).one() self.current_user._followers.append(friend) friend.notify( 'message', '{.name} has accepted your friend request'.format( self.current_user)) self.current_user.notifications.remove(note) self.session.commit() elif note.noti_type == 'share' and self.get_argument( 'accept', 'false') == 'true': task = self.session.query( orm.Task).filter_by(task_id=note.task_id).one() sender = self.session.query( orm.User).filter_by(email=note.sender).one() self.current_user.tasks.append(task) self.current_user.notify( 'message', "You have accepted the task '{.name}'".format(task)) sender.notify( 'message', "{.name} has accepted the task '{.name}'".format( self.current_user, task)) self.current_user.notifications.remove(note) self.session.commit() self.redirect(Notifications)
class Google(BaseHandler, auth.GoogleMixin): url = ojoin(Main.url, "auth", "google") @web.asynchronous def get(self): if self.get_argument("openid.mode", None): self.get_authenticated_user(self.async_callback(self._on_auth)) return self.authenticate_redirect() def _on_auth(self, user): if not user: raise web.HTTPError(500, "Google auth failed") self.set_secure_cookie('user', user['email']) if not self.session.query( orm.User).filter_by(email=user['email']).first(): usr = orm.User(email=user['email'], name=user.get('name'), firstname=user.get('first_name'), lastname=user.get('last_name')) self.session.add(usr) self.session.commit() self.redirect(Main)
def merge_classes(to_dir, labels): ''' once move_to_dirs is done, may wish to merge classes. ''' more = 'Y' while more == 'Y': print '%s' % (', '.join(map(str,labels))) if raw_input('Merge (more) classes? (Y/N) ') == 'Y': merge = [-1] while not all([idx in range(len(labels)) for idx in merge]): for elem in enumerate(labels): print elem merge = [int(elem) for elem in raw_input("Name two class numbers from above, separated by ' ': ").split()] print 'moving files...' for fname in os.listdir(ojoin(to_dir,labels[merge[1]])): shutil.move(ojoin(to_dir,labels[merge[1]],fname), ojoin(to_dir,labels[merge[0]])) new_label = raw_input('name of merged class? ') os.rmdir(ojoin(to_dir,labels[merge[1]])) os.rename(ojoin(to_dir,labels[merge[0]]), ojoin(to_dir,new_label)) labels = update_labels(labels, merge, new_label) else: more = False return labels
for line in open(path): colon_index = line.find(":") if colon_index != -1: key = line[:colon_index].strip() if key in keyvars: value = float(line[colon_index + 1:]) if key in results: results[key].append(value) else: results[key] = [value] return [results[key] for key in keyvars] def min_at(values): return min( (v, i) for i, v in enumerate(values) ) if __name__ == '__main__': logfile = sys.argv[1] if not logfile.endswith('.txt'): logfile = ojoin(logfile, 'log.txt') curves = parse_log(logfile, ['valid_approx_cost_class_corr', 'valid_approx_error_rate']) valid_cost, valid_i = min_at(curves[0]) print 'epochs:\t\t', len(curves[0]) print 'min cost:\t', valid_cost print 'ER at min cost:\t', curves[1][valid_i] print 'min ER:\t\t', min(curves[1])
raise Exception("Need to specify --task flag") task = optDict["task"] data_info = "/data/ad6813/caffe/data/" + task if not "box" in optDict: raise Exception("Need to specify --box flag\nRed, Blue, RedBlue") data_dir = "/data/ad6813/pipe-data/" + optDict["box"].capitalize() + "box/raw_data/dump" if not "learn" in optDict: raise Exception("Need to specify --learn flag\nlabNum1-labNum2-...-labNumk") pos_class = flag_lookup(optDict["learn"]) target_bad_min = None if "target-bad-min" in optDict: target_bad_min = float(optDict["target-bad-min"]) # baseDir = os.path.abspath("../task/" + task) + "/" # write to read file how to interpret values as classes and might # as well save entire command if not os.path.isdir(data_info): os.mkdir(data_info) with open(ojoin(data_info,'read.txt'), 'w') as read_file: read_file.write(" ".join(sys.argv)+'\n') # do your shit main(data_dir, data_info, task, pos_class, target_bad_min) # still need to automate this # p = subprocess.Popen("./setup_rest.sh " + task + " " + str(num_output), shell=True) # p.wait()
def to_table( self, rootname="myconfig", destination=None, template=None, entry=None, createfolders=False, sep=",", ): # pylint: disable=too-many-arguments # pylint: disable=too-many-branches """Export a particular entry in config as text table files; one with true values and one with templated variables. Args: rootname: Root file name without extension. An extension .txt will be added for destination, and .txt.tmpl for template output. destination: The directory path for the destination file. If None, then no output will be given template: The directory path for the templated file. If None, then no templated output will be given. entry (str): Using one of the specified key/entry sections in the master config that holds a table, e.g. 'global.FWL'. createfolders (bool): If True then folders will be created if they do not exist (default is False). sep (str): Table separator, e.g. ' ', default is ',' Raises: ValueError: If both destination and template output is None, or folder does not exist in advance, if createfolder=False, or entry is not spesified. Example: >>> config.to_table('fwl', destination='../', entry='global.FWL') """ if not destination and not template: raise ValueError("Both destination and template are None." "At least one of them has to be set!.") if entry is None: raise ValueError("The entry is None; need a value, " 'e.g. "global.FWL"') if createfolders: self._force_create_folders([destination, template]) else: self._check_folders([destination, template]) keys = entry.split(".") if len(keys) == 1: cfg = self.config[keys[0]] elif len(keys) == 2: cfg = self.config[keys[0]][keys[1]] elif len(keys) == 3: cfg = self.config[keys[0]][keys[1]][keys[2]] elif len(keys) == 4: cfg = self.config[keys[0]][keys[1]][keys[2]][keys[3]] else: raise ValueError("Entry with more that 4 sublevels, not supported") if destination: with open(ojoin(destination, rootname + ".txt"), "w") as dest: for row in cfg: for col in row: stream = str(col) stream = self._get_required_form(stream, template=False) # print('<{}>'.format(stream)) print(str(stream) + sep, file=dest, end="") print("", file=dest) if template: with open(ojoin(template, rootname + ".txt.tmpl"), "w") as tmpl: for row in cfg: for col in row: stream = str(col) stream = self._get_required_form(stream, template=True) print(str(stream) + sep, file=tmpl, end="") print("", file=tmpl)
d = {'Perfect': []} print 'generating dict of label:files from %s...'%(data_dir) for filename in os.listdir(path): if not filename.endswith('.dat'): continue total_num_images += 1 fullname = os.path.join(path, filename) with open(fullname) as f: content = [line.strip() for line in f.readlines()] if content == []: d['Perfect'].append(filename.split('.')[0]+'.jpg') else: for label in content: if label not in d.keys(): d[label] = [] d[label].append(filename.split('.')[0]+'.jpg') d['total_num_images'] = total_num_images json.dump(d, open('label_dict_'+str(date.today()),'w')) return d if __name__ == '__main__': here = os.getcwd() data_dir = '/data2/ad6813/pipe-data/Bluebox' os.chdir('../scripts/data_preparation') d = get_label_dict(data_dir) os.chdir(here) for label in d.keys(): if type(d[label]) == list: if not os.path.isdir(label): os.mkdir(label) length = min(20,len(d[label])) for f in d[label][:length]: shutil.copy(ojoin(data_dir,f),ojoin(ojoin(label,f)))
def test_imbalance_experiment(): os.mkdir('temp') os.mkdir(ojoin('temp','class1')) os.mkdir(ojoin('temp','class2'))
def create_lookup_txtfiles(data_dir, to_dir=None): ''' data_dir: where raw data is to_dir: where to store .txt files. ''' list_dir = os.listdir(data_dir) # names of all elements in dir lastLabelIsDefault = False img_labels = [] # image's labels to train on dump = [] # contain text to write to .txt files case_count = 0 # number of training cases tagless_count = 0 # n badcase_count = 0 # num of images with multiple flags to train on if to_dir is not None: train_file = open(ojoin(to_dir,'train.txt'), 'w') val_file = open(ojoin(to_dir,'val.txt'), 'w') test_file = open(ojoin(to_dir,'test.txt'), 'w') read = open(ojoin(to_dir,'read.txt'), 'w') # get labels of classes to learn labels_read = get_all_pipe_labels(data_dir,save=False)['labels'] lookup = {} for num,label in enumerate(labels_read): lookup[label] = num for elem in enumerate(labels_read): print elem labels_read = [labels_read[int(num)] for num in raw_input("Numbers of labels to learn, separated by ' ': ").split()] labels_write = labels_read[:] lookup, labels_write = merge_classes(lookup, labels_write) label_default = raw_input("Default label for all images not containing any of given labels? (name/N) ") if label_default is not 'N': lastLabelIsDefault = True lookup[label_default] = len(labels_write) labels_write.append(label_default) print 'sorting images by class label...' for fname in list_dir: if not fname.endswith('.dat'): continue case_count += 1 fullname_dat = os.path.join(data_dir, fname) rootname = os.path.splitext(fname)[0] with open(fullname_dat) as f: content = [line.strip() for line in f.readlines()] img_labels = [label for label in labels_read if label in content] # if last label is a normal label, images with no labels will # not be batched if not img_labels: if lastLabelIsDefault: dump.append((fname.split('.')[0]+'.jpg',lookup[label_default])) else: tagless_count += 1 else: # if image has multiple flags, it will appear in each flag # subdir, each time with only one label. this is very bad for # training, so hopefully such cases are very rare.' if len(img_labels)>1: badcase_count += len(img_labels)-1 case_count += len(img_labels)-1 for label in img_labels: dump.append((fname.split('.')[0]+'.jpg',lookup[label])) print "dump has %i elements, looking like %s and %s"%(len(dump),dump[0], dump[300]) # write dump to train and val files # randomise!! # 10% of dataset for validation, rest for training # print "val_dump has %i elements, looking like %s and %s"%(len(val_dump),val_dump[0], val_dump[300]) non_train_dump_size = int(0.2*len(dump)) relative_val_size = int(0.34*non_train_dump_size) non_train_dump = random.sample(dump, non_train_dump_size) val_dump = random.sample(non_train_dump, relative_val_size) test_dump = [elem for elem in non_train_dump if elem not in val_dump] train_dump = [elem for elem in dump if elem not in non_train_dump] random.shuffle(train_dump) if to_dir is not None: train_file.writelines(["%s %i\n" % (fname,num) for (fname,num) in train_dump]) val_file.writelines(["%s %i\n" % (fname,num) for (fname,num) in val_dump]) test_file.writelines(["%s %i\n" % (fname,num) for (fname,num) in test_dump]) # write to read file how to interpret values as classes read.writelines(["%i %s\n" % (lookup[label],label,) for label in labels_write]) train_file.close() val_file.close() test_file.close() read_file.close() print 'create_lookup_txtfiles complete. summary stats:' print 'badcase_freq: %0.2f' % (float(badcase_count) / case_count) print 'tagless_freq: %0.2f' % (float(tagless_count) / case_count) return train_dump, val_dump, test_dump
def remove_imgs(net_dir, remove_dic): ''' remove_dics knows which imgs to remove in each class subdir, and does so in net_dir. ''' for c in remove_dic.keys(): for fname in remove_dic[c]: os.remove(ojoin(net_dir, c, fname))
print 'generating dict of label:files from %s...' % (data_dir) for filename in os.listdir(path): if not filename.endswith('.dat'): continue total_num_images += 1 fullname = os.path.join(path, filename) with open(fullname) as f: content = [line.strip() for line in f.readlines()] if content == []: d['Perfect'].append(filename.split('.')[0] + '.jpg') else: for label in content: if label not in d.keys(): d[label] = [] d[label].append(filename.split('.')[0] + '.jpg') d['total_num_images'] = total_num_images json.dump(d, open('label_dict_' + str(date.today()), 'w')) return d if __name__ == '__main__': here = os.getcwd() data_dir = '/data2/ad6813/pipe-data/Bluebox' os.chdir('../scripts/data_preparation') d = get_label_dict(data_dir) os.chdir(here) for label in d.keys(): if type(d[label]) == list: if not os.path.isdir(label): os.mkdir(label) length = min(20, len(d[label])) for f in d[label][:length]: shutil.copy(ojoin(data_dir, f), ojoin(ojoin(label, f)))
import numpy as np import matplotlib.pyplot as plt #%matplotlib inline import os, sys from os.path import join as ojoin from subprocess import call # Make sure that caffe is on the python path: caffe_root = '../' # this file is expected to be in {caffe_root}/examples imagenet_dir = ojoin(caffe_root,'examples/imagenet') sys.path.insert(0, caffe_root + 'python') import caffe # Set the right path to your model definition file, pretrained model weights, # and the image you would like to classify. MODEL_FILE = ojoin(imagenet_dir,'imagenet_deploy.prototxt') PRETRAINED = ojoin(imagenet_dir, 'caffe_reference_imagenet_model') MEAN_FILE = ojoin(caffe_root,'python/caffe/imagenet/ilsvrc_2012_mean.npy') IMAGE_FILE = ojoin(caffe_root,'examples/images/cat.jpg') # get PRETRAINED # if not os.path.isfile(PRETRAINED): # call(['./get_caffe_reference_imagenet_model.sh']) # load network print os.getcwd() net = caffe.Classifier(MODEL_FILE, PRETRAINED, image_dims=(256, 256), input_scale=255, mean_file=MEAN_FILE, channel_swap=(2,1,0))