def run(args): ms.context.set_context(mode=ms.context.GRAPH_MODE, device_target=args.device) dataset_sink_mode = False download_dataset(args.data_dir) # define the loss function net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # create the network network = LeNet5() # define the optimizer net_opt = build_optimizer(args, network) config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10) # save the network model and parameters for subsequence fine-tuning ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) # group layers into an object with training and evaluation features model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) if args.init_ckpt: load_ckpt(network, args.init_ckpt) train_net(network, model, args, ckpoint_cb, dataset_sink_mode)
def download(self): """ API call to download OpenImages slice :return: None """ download_dataset(dest_dir=self.directory + "/train", meta_dir="./data/create", class_labels=self.labels, exclusions_path=None, limit=self.limit)
def download_field(fname, basedir=None, force=False): # check database if basedir is None: print('No basedir supplied, working in current directory...') basedir = '.' workdir = basedir + '/' + fname with SurveysDB() as sdb: result = sdb.get_field(fname) if result is None: print('Field', fname, 'does not exist in the database') sys.exit(1) if result['status'] != 'Not started' and result[ 'status'] != 'D/L failed': print('Field', fname, 'has status', result['status']) if not force: return False # get the ids of the observations sdb.cur.execute( 'select * from observations where field=%s and (status="DI_processed" or status="Archived")', (fname, )) obs = sdb.cur.fetchall() if len(obs) > 0: result['status'] = 'Downloading' if not os.path.isdir(workdir): os.mkdir(workdir) tag_field(sdb, result) sdb.set_field(result) else: print('No downloadable observations for this field') sys.exit(3) # now do the download for each field overall_success = True for o in obs: print('Downloading observation ID L' + str(o['id'])) for prefix in ['', 'prefactor_v1.0/', 'prefactor_v3.0/']: success = download_dataset('https://lofar-webdav.grid.sara.nl', '/SKSP/' + prefix + 'L' + str(o['id']) + '/', workdir=workdir) if success: break else: print('URL failed, trying alternative') if not success: print('Download failed') overall_success = overall_success and success with SurveysDB() as sdb: if overall_success: result['status'] = 'Downloaded' tag_field(sdb, result, workdir=workdir) else: result['status'] = 'D/L failed' tag_field(sdb, result, workdir=workdir) sdb.set_field(result) return overall_success
def load_eval_dataset(ds_name, data_path, categorical=False): # download dataset if not exists if not os.path.exists(data_path): parent_path = os.path.dirname(os.path.dirname(data_path)) parent_path = os.path.join(parent_path, "") download.download_dataset(ds_name, parent_path) spec = config.SPECS[ds_name] input_size = spec["input_size"] category_depth = 1 if spec.get("categorical", False) else 0 has_val = "n_val" in spec if not has_val and "val_portion" not in spec: raise NotImplementedError( 'Either "has_val" or "val_portion" must be specified in dataset configuration' ) n_eval = spec["n_val"] if has_val else spec["n_train"] eval_imgs_dir = data_path + "stimuli" eval_maps_dir = data_path + "maps" eval_fixs_dir = data_path + "fixations" if has_val: eval_imgs_dir += "/val" eval_maps_dir += "/val" eval_fixs_dir += "/val" eval_x = _get_file_list(eval_imgs_dir, category_depth) eval_y = _get_file_list(eval_maps_dir, category_depth) eval_fixs = _get_file_list(eval_fixs_dir, category_depth) _check_consistency(zip(eval_x, eval_y, eval_fixs), n_eval) eval_ds = tf.data.Dataset.from_tensor_slices((eval_x, eval_y, eval_fixs)) eval_ds = _prepare_image_ds(eval_ds, ds_name, input_size, category_depth, one_at_a_time=True) return (eval_ds, n_eval)
def download_field(fname,basedir=None,force=False): # check database if basedir is None: print 'No basedir supplied, working in current directory...' basedir='.' workdir=basedir+'/'+fname with SurveysDB() as sdb: result=sdb.get_field(fname) if result is None: print 'Field',fname,'does not exist in the database' sys.exit(1) if result['status']!='Not started' and result['status']!='D/L failed': print 'Field',fname,'has status',result['status'] if not force: return False # get the ids of the observations sdb.cur.execute('select * from observations where field=%s and status="DI_processed"',(fname,)) obs=sdb.cur.fetchall() if len(obs)>0: result['status']='Downloading' if not os.path.isdir(workdir): os.mkdir(workdir) tag_field(sdb,result) sdb.set_field(result) else: print 'No downloadable observations for this field' sys.exit(3) # now do the download for each field overall_success=True for o in obs: print 'Downloading observation ID L'+str(o['id']) success=download_dataset('https://lofar-webdav.grid.sara.nl','/SKSP/L'+str(o['id'])+'/',workdir=workdir) if success==False: print 'Download failed' overall_success=overall_success and success with SurveysDB() as sdb: if overall_success: result['status']='Downloaded' tag_field(sdb,result,workdir=workdir) else: result['status']='D/L failed' tag_field(sdb,result,workdir=workdir) sdb.set_field(result) return overall_success
def old_parse_data(): """Parsing data from dataset to fit the frontend. If dataset is 'out of date' (on hour old) it downloads new data. The parsed data is then saved as a .json file. If dataset is NOT 'out of date' it will return immediately. Returns the filename of the parsed data """ filename = "data/api/" + get_todays_date() + "-external.json" if os.path.exists(filename): file_time = datetime.fromtimestamp(os.path.getmtime(filename)) now = datetime.now() file_lifetime = now - file_time if (file_lifetime.total_seconds() / 60) / 60 < 1: return filename data = download_dataset() if data == None: with open("data/external/latest.json", "r") as f: data = json.load(f) stats = { "confirmed": data['latest']['confirmed'], "deaths": data['latest']['deaths'], "recovered": data['latest']['recovered'], "countries": len( list( set([ country['country'] for country in data['confirmed']['locations'] ]))), "updated": data['confirmed']['last_updated'].split("T")[0] } cities = [] for c, d, r in zip(data['confirmed']['locations'], data['deaths']['locations'], data['recovered']['locations']): if c['country'] == d['country'] == r['country']: if not (c['latest'] == d['latest'] == r['latest'] == 0): parsed_city = { "type": "Feature", "properties": { "city": c['country'], "count": c['latest'], "deaths": d['latest'], "recovered": r['latest'], "icon": "theatre" }, "geometry": { "type": "Point", "coordinates": [ float(c['coordinates']['long']), float(c['coordinates']['lat']) ] } } cities.append(parsed_city) with open(filename, "+w") as json_file: json.dump({ "cities": cities, "stats": stats }, json_file, sort_keys=True, indent=2, separators=(",", ":")) return filename
os.chdir(rootdir) name = sys.argv[1] try: qsubfile = sys.argv[2] except: qsubfile = '/home/mjh/git/ddf-pipeline/pipeline.qsub' try: os.mkdir(name) except OSError: warn('Working directory already exists') pass os.chdir(name) report('Downloading data') if not download_dataset('https://lofar-webdav.grid.sara.nl', '/SKSP/' + name + '/'): die('Download failed to get the right number of files') report('Unpacking data') unpack() report('Deleting tar files') os.system('rm *.tar.gz') report('Making ms lists') if make_list(): report('Submit job') os.system('qsub -N ddfp-' + name + ' -v WD=' + rootdir + '/' + name + ' ' + qsubfile) else: die('make_list could not construct the MS list')
def do_run_pipeline(name, basedir): if name[0] != 'P' and name[0] != 'L': die('This code should be used only with field or observation names', database=False) do_field = (name[0] == 'P') try: qsubfile = sys.argv[2] except: qsubfile = '/home/mjh/pipeline-master/ddf-pipeline/torque/pipeline.qsub' workdir = basedir + '/' + name try: os.mkdir(workdir) except OSError: warn('Working directory already exists') report('Downloading data') if do_field: success = download_field(name, basedir=basedir) else: success = download_dataset('https://lofar-webdav.grid.sara.nl', '/SKSP/' + name + '/', basedir=basedir) if not success: die('Download failed, see earlier errors', database=False) report('Unpacking data') try: unpack(workdir=workdir) except RuntimeError: if do_field: update_status(name, 'List failed', workdir=workdir) raise if do_field: update_status(name, 'Unpacked', workdir=workdir) report('Deleting tar files') os.system('rm ' + workdir + '/*.tar.gz') os.system('rm ' + workdir + '/*.tar') averaged = False report('Checking structure') g = glob.glob(workdir + '/*.ms') msl = MSList(None, mss=g) uobsids = set(msl.obsids) for thisobs in uobsids: # check one MS with each ID for m, ch, o in zip(msl.mss, msl.channels, msl.obsids): if o == thisobs: channels = len(ch) print 'MS', m, 'has', channels, 'channels' if channels > 20: update_status(name, 'Averaging', workdir=workdir) print 'Averaging needed for', thisobs, '!' averaged = True average(wildcard=workdir + '/*' + thisobs + '*') os.system('rm -r ' + workdir + '/*' + thisobs + '*pre-cal.ms') break report('Making ms lists') success = make_list(workdir=workdir) if do_field: list_db_update(success, workdir=workdir) if not success: die('make_list could not construct the MS list', database=False) report('Creating custom config file from template') make_custom_config(name, workdir, do_field, averaged) # now run the job do_run_job(name, basedir=basedir, qsubfile=None, do_field=do_field)
def load_train_dataset(ds_name, data_path): # download dataset if not exists if not os.path.exists(data_path): parent_path = os.path.dirname(os.path.dirname(data_path)) parent_path = os.path.join(parent_path, "") download.download_dataset(ds_name, parent_path) spec = config.SPECS[ds_name] input_size = spec["input_size"] category_depth = 1 if spec.get("categorical", False) else 0 n_train = spec["n_train"] has_val = "n_val" in spec if not has_val and "val_portion" not in spec: raise NotImplementedError( 'Either "has_val" or "val_portion" must be specified in dataset configuration' ) train_imgs_dir = data_path + "stimuli" train_maps_dir = data_path + "maps" if has_val: n_val = spec["n_val"] train_imgs_dir += "/train" train_maps_dir += "/train" else: n_val = math.floor(n_train * spec["val_portion"]) # loading dataset train_x = _get_file_list(train_imgs_dir, category_depth) train_y = _get_file_list(train_maps_dir, category_depth) _check_consistency(zip(train_x, train_y), n_train) train_ds = tf.data.Dataset.from_tensor_slices((train_x, train_y)) # if has a separate validation set: load them # else: take a certain portion from train dataset if has_val: train_ds = train_ds.shuffle(n_train, reshuffle_each_iteration=False) val_x = _get_file_list(data_path + "stimuli/val", category_depth) val_y = _get_file_list(data_path + "maps/val", category_depth) _check_consistency(zip(val_x, val_y), n_val) val_ds = tf.data.Dataset.from_tensor_slices((val_x, val_y)) val_ds = val_ds.shuffle(n_val, reshuffle_each_iteration=False) else: if category_depth > 0: n_category = spec["n_category"] n_per_cat = n_train // n_category n_val_per_cat = math.floor(n_per_cat * spec["val_portion"]) n_val = n_val_per_cat * n_category for i in range(n_category): cat_train_ds = train_ds.take(n_per_cat) cat_train_ds = cat_train_ds.shuffle( n_per_cat, reshuffle_each_iteration=False) if i == 0: val_ds = cat_train_ds.take(n_val_per_cat) new_train_ds = cat_train_ds.skip(n_val_per_cat) else: val_ds = val_ds.concatenate( cat_train_ds.take(n_val_per_cat)) new_train_ds = new_train_ds.concatenate( cat_train_ds.skip(n_val_per_cat)) train_ds = train_ds.skip(n_per_cat) train_ds = new_train_ds else: train_ds = train_ds.shuffle(n_train, reshuffle_each_iteration=False) val_ds = train_ds.take(n_val) train_ds = train_ds.skip(n_val) n_train -= n_val train_ds = _prepare_image_ds(train_ds, ds_name, input_size, category_depth) val_ds = _prepare_image_ds(val_ds, ds_name, input_size, category_depth) return ((train_ds, n_train), (val_ds, n_val))
if name[0]!='P' and name[0]!='L': die('This code should be used only with field or observation names',database=False) do_field=(name[0]=='P') try: os.mkdir(name) except OSError: warn('Working directory already exists') pass os.chdir(name) report('Downloading data') if do_field: success=download_field(name) else: success=download_dataset('https://lofar-webdav.grid.sara.nl','/SKSP/'+name+'/') if not success: die('Download failed, see earlier errors',database=False) report('Unpacking data') unpack() if do_field: unpack_db_update() report('Deleting tar files') os.system('rm *.tar.gz') report('Making ms lists') success=make_list()
def do_run_pipeline(name,basedir,qsubfile=None,do_field=True): ''' set do_field False for the now obsolete behaviour of downloading and imaging a particular observation ''' if qsubfile is None: qsubfile='/home/mjh/pipeline-master/ddf-pipeline/torque/pipeline.qsub' workdir=basedir+'/'+name try: os.mkdir(workdir) except OSError: warn('Working directory already exists') report('Downloading data') if do_field: success=download_field(name,basedir=basedir) else: success=download_dataset('https://lofar-webdav.grid.sara.nl','/SKSP/'+name+'/',basedir=basedir) if not success: die('Download failed, see earlier errors',database=False) report('Unpacking data') try: unpack(workdir=workdir) except RuntimeError: if do_field: update_status(name,'Unpack failed',workdir=workdir) raise if do_field: update_status(name,'Unpacked',workdir=workdir) report('Deleting tar files') os.system('rm '+workdir+'/*.tar.gz') os.system('rm '+workdir+'/*.tar') averaged=False report('Checking structure') g=glob.glob(workdir+'/*.ms') msl=MSList(None,mss=g) dysco=np.any(msl.dysco) uobsids=set(msl.obsids) for thisobs in uobsids: # check one MS with each ID for m,ch,o,hc in zip(msl.mss,msl.channels,msl.obsids,msl.hascorrected): if o==thisobs: if not(hc): print('MS',m,'has no corrected_data column, force use of DATA') averaged=True channels=len(ch) print('MS',m,'has',channels,'channels') if channels>20: update_status(name,'Averaging',workdir=workdir) print('Averaging needed for',thisobs,'!') averaged=True average(wildcard=workdir+'/*'+thisobs+'*') os.system('rm -r '+workdir+'/*'+thisobs+'*pre-cal.ms') break report('Making ms lists') success=make_list(workdir=workdir) if do_field: list_db_update(success,workdir=workdir) if not success: die('make_list could not construct the MS list',database=False) report('Creating custom config file from template') make_custom_config(name,workdir,do_field,averaged) # now run the job do_run_job(name,basedir=basedir,qsubfile=None,do_field=do_field,dysco=dysco)