def get_vienna_layout(self, data): temp_folder = "/tmp/"+str(uuid.uuid4()) ensure_dir(temp_folder) dot_bracket_filepath = temp_folder+"/dotbracket.txt" f = open(dot_bracket_filepath, "w") f.write(data["sequence"]+"\n"+data["structure"]+"\n") f.close() # change to tmp folder os.chdir(temp_folder) # use RNAplot CLI to generate the xrna tab delimited file os.system("RNAplot -o xrna < "+dot_bracket_filepath) # get the coords out by parsing the file coords = [] with open(temp_folder+"/rna.ss") as f: for line in f: line = line.strip() if line == "" or line[0] == "#": continue bits = line.split() x = float(bits[2]) y = float(bits[3]) coords.append([x, y]) os.system("rm -rf "+temp_folder) return coords
def split(self): from Bio import SeqIO from utils import ensure_dir # Count the sequences print("Counting sequences...") n_seqs = 0 handle = open(self.sauce_filepath, "r") for record in SeqIO.parse(handle, "fasta"): n_seqs += 1 handle.close() chunk_size = int(n_seqs / self.n_chunks) print("...There are ["+str(n_seqs)+"] sequences") # Iterate through chunks of sequences. # For each sequence, write a single fasta file in the right location. n_seqs = 0 handle = open(self.sauce_filepath, "r") for record in SeqIO.parse(handle, "fasta"): chunk_n = int(n_seqs / chunk_size) chunk_dir = self.target_dirpath+"/chunk_"+str(chunk_n)+"/"+record.id ensure_dir(chunk_dir+"/") f = open(chunk_dir+"/seq.fasta", "w") SeqIO.write(record, f, "fasta") f.close() n_seqs += 1 if n_seqs % 100 == 0: print("["+str(n_seqs)+"] fasta files written") handle.close()
def simple_file_logger(name, log_file_name): lgr = logging.getLogger(name) if lgr.handlers: return lgr lgr.propagate = False log_dir = resolve_path(mbs_config.MBS_LOG_PATH) ensure_dir(log_dir) lgr.setLevel(logging.INFO) formatter = logging.Formatter("%(levelname)8s | %(asctime)s | %(message)s") logfile = os.path.join(log_dir, log_file_name) fh = TimedRotatingFileHandler(logfile, backupCount=10, when="midnight") fh.setFormatter(formatter) # add the handler to the root logger lgr.addHandler(fh) if LOG_TO_STDOUT: sh = logging.StreamHandler(sys.stdout) sh.setFormatter(formatter) lgr.addHandler(sh) return lgr
def update_times_idx(self): self.times_idx += 1 ensure_dir(op.join(self.path, str(self.idx), str(self.times_idx))) ensure_dir( op.join(self.path, str(self.idx), str(self.times_idx), self.prefix)) self.counter = 0
def plot_snr_figure(self, dirname, snr_acc, snrs): """[基于不同SNR下的分类准确率绘制图像] Args: dirname ([str]): [存储图像的文件夹] snr_acc ([一维array]]): [不同SNR下的分类准确率] snrs ([一维array]): [不同的SNR的名称] """ # Plot accuracy curve plt.switch_backend('agg') now_time = datetime.datetime.now() now_time = now_time.strftime("%m-%d_%H-%M") util.ensure_dir(dirname) plt.plot(snrs, snr_acc) plt.xlabel("Signal to Noise Ratio") plt.ylabel("Classification Accuracy") plt.title("Classification Accuracy On Different SNR") plt.savefig( os.path.join( dirname, 'Classification Accuracy On Different SNR' + now_time + '.jpg')) print( "Figure 'Classification Accuracy On Different SNR' generated successfully" )
def __init__(self, conf, uuid, namespace=None, service=None, pids_path=None, default_cmd_callback=None, cmd_addl_env=None, pid_file=None, run_as_root=False): self.conf = conf self.uuid = uuid self.namespace = namespace self.default_cmd_callback = default_cmd_callback self.cmd_addl_env = cmd_addl_env self.pids_path = pids_path or self.conf.external_pids self.pid_file = pid_file self.run_as_root = run_as_root if service: self.service_pid_fname = 'pid.' + service self.service = service else: self.service_pid_fname = 'pid' self.service = 'default-service' utils.ensure_dir(os.path.dirname(self.get_pid_file_name()))
def submit_emmy_experiment(nt, N, tgs, is_dp, procs, ts, kernel, outfile, target_dir): import os import subprocess from string import Template from utils import ensure_dir job_template = Template( """export OMP_NUM_THREADS=10; export I_MPI_PIN_DOMAIN=socket; export KMP_AFFINITY=granularity=fine,scatter; mpirun_rrze -np $procs -npernode 2 -- $exec_path --n-tests 2 --disable-source-point --npx 1 --npy $procs --npz 1 --nx $N --ny $N --nz $N --verbose 1 --halo-concatenate 1 --target-ts $ts --wavefront 1 --nt $nt --target-kernel $kernel --thread-group-size $tgs | tee $outfile""" ) target_dir = os.path.join(os.path.abspath("."), target_dir) ensure_dir(target_dir) outpath = os.path.join(target_dir, outfile) if is_dp == 1: exec_path = os.path.join(os.path.abspath("."), "build_dp/mwd_kernel") else: exec_path = os.path.join(os.path.abspath("."), "build/mwd_kernel") job_cmd = job_template.substitute( nt=nt, N=N, tgs=tgs, procs=procs, ts=ts, kernel=kernel, outfile=outpath, exec_path=exec_path, target_dir=target_dir, ) print job_cmd sts = subprocess.call(job_cmd, shell=True)
def submit_emmy_experiment(nwf, tgs, wf, is_dp, th, ts, tb, kernel, outfile, target_dir): import os import subprocess from string import Template from utils import ensure_dir job_template=Template( """export OMP_NUM_THREADS=$thn; likwid-perfctr -m -s 0x03 -g MEM -C S0:0-$th $exec_path --n-tests 2 --disable-source-point --npx 1 --npy 1 --npz 1 --nx 960 --ny 960 --nz 960 --verbose 1 --target-ts $ts --wavefront $wf --nt 100 --target-kernel $kernel --t-dim $tb --thread-group-size $tgs --num-wavefronts $nwf --cache-size 0 | tee $outfile""") #"""export OMP_NUM_THREADS=$thn; numactl -N 0 $exec_path --n-tests 2 --disable-source-point --npx 1 --npy 1 --npz 1 --nx 480 --ny 480 --nz 480 --verbose 1 --target-ts $ts --wavefront $wf --nt 100 --target-kernel $kernel --t-dim $tb --thread-group-size $tgs | tee $outfile""") target_dir = os.path.join(os.path.abspath("."),target_dir) ensure_dir(target_dir) outpath = os.path.join(target_dir,outfile) if(is_dp==1): exec_path = os.path.join(os.path.abspath("."),"build_dp/mwd_kernel") else: exec_path = os.path.join(os.path.abspath("."),"build/mwd_kernel") job_cmd = job_template.substitute(nwf=nwf, tgs=tgs, wf=wf, th=(th-1), thn=th, ts=ts, tb=tb, kernel=kernel, outfile=outpath, exec_path=exec_path, target_dir=target_dir) print job_cmd sts = subprocess.call(job_cmd, shell=True)
def run(self): self.info("Starting up... ") self.info("PID is %s" % os.getpid()) self.info("TEMP DIR is '%s'" % self.temp_dir) if self.tags: self.info("Tags are: %s" % document_pretty_string(self.tags)) else: self.info("No tags configured") ensure_dir(self._temp_dir) self._update_pid_file() # Start the command server self._start_command_server() # start the backup processor self._backup_processor.start() # start the restore processor self._restore_processor.start() # start the backup processor self._backup_processor.join() # start the restore processor self._restore_processor.join() self.info("Engine completed") self._pre_shutdown()
def process_audio_poly(wavdir, outdir, tol, ssm_read_pk, read_pk, n_jobs=4, tonnetz=False): utils.ensure_dir(outdir) files = glob.glob(os.path.join(wavdir, "*.wav")) Parallel(n_jobs=n_jobs)(delayed(process_piece)( wav, outdir, tol, ssm_read_pk, read_pk, tonnetz) for wav in files)
def copy_image_subset(source_dir, target_dir, subset_inds): frames_list = os.listdir(source_dir) frames_list.sort() utils.ensure_dir(target_dir, True) for i in subset_inds: if os.path.exists(os.path.join(source_dir, frames_list[i])): shutil.copy2(os.path.join(source_dir, frames_list[i]), os.path.join(target_dir, frames_list[i]))
def parse_sentences(self, entries): os.chdir('magyarlanc') self.tmp_dir = '../' + self.cfg.get('data', 'tmp_dir') ensure_dir(self.tmp_dir) with NamedTemporaryFile(dir=self.tmp_dir, delete=False) as in_file: json.dump(entries, in_file) in_file_name = in_file.name with NamedTemporaryFile(dir=self.tmp_dir, delete=False) as out_file: out_file_name = out_file.name success = self.run_magyarlanc(in_file_name, out_file_name) if success: print 'magyarlanc ok' else: print 'magyarlanc nem ok' # with open(out_file_name) as out_file: # new_entries = json.load(out_file) new_entries = self.parse_output(out_file_name) os.chdir('..') return new_entries
def recursively_extract_features(path, dest_dir, wl=20, ws=10, nf=24, nceps=19, fmin=0., fmax=4000., d_w=2, pre=0.97, mel=True): for f in os.listdir(path): f_location = os.path.join(path, f) if utils.is_wav_file(f_location): logger.debug('Extract features: ' + f_location) stripped_filename = utils.strip_filename(f) dest_file = os.path.join(dest_dir, stripped_filename) dest_file += '.hdf5' mfcc = feature_extraction(f_location, wl, ws, nf, nceps, fmin, fmax, d_w, pre, mel) utils.ensure_dir(dest_dir) bob.io.save(mfcc, dest_file) logger.debug('savin mfcc to ' + dest_file) elif os.path.isdir(f_location): logger.debug('Extract features: ' + f_location) new_path = os.path.join(path, f) new_dest_dir = os.path.join(dest_dir, f) recursively_extract_features(new_path, new_dest_dir) else: logger.info('Unknown file type: ' + str(f_location)) continue
def __init__(self, cfg): self.cfg = cfg self.out_fn = self.cfg.get("machine", "ext_definitions") ensure_dir(os.path.dirname(self.out_fn)) dep_map_fn = cfg.get("deps", "dep_map") self.read_dep_map(dep_map_fn) self.lemmatizer = Lemmatizer(cfg)
def extract_i_vectors(gmm_stats_path, ivec_dir, ivec_machine): for f in os.listdir(gmm_stats_path): f_location = os.path.join(gmm_stats_path, f) logger.debug('extract_i_vectors: ' + f_location) if utils.is_hdf5_file(f_location): stripped_filename = utils.strip_filename(f) dest_file = os.path.join(ivec_dir, stripped_filename) dest_file += '.hdf5' utils.ensure_dir(ivec_dir) gmm_stats_to_ivec(f_location, dest_file, ivec_machine) logger.debug('saving ivec ' + dest_file) elif os.path.isdir(f_location): new_path = os.path.join(gmm_stats_path, f) new_dest_dir = os.path.join(ivec_dir, f) extract_i_vectors(new_path, new_dest_dir, ivec_machine) else: logger.info('Unknown file type: ' + str(f_location)) continue
def recursive_execute_gmms_on_machine(gmm_stats_path, dest_path, machine): for f in os.listdir(gmm_stats_path): gmm_stats_file = os.path.join(gmm_stats_path, f) logger.debug('extract_i_vectors: ' + gmm_stats_file) if utils.is_hdf5_file(gmm_stats_file): stripped_filename = utils.strip_filename(f) dest_file = os.path.join(dest_path, stripped_filename) dest_file += '.hdf5' utils.ensure_dir(dest_path) # load the GMM stats file gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(gmm_stats_file)) # extract i-vector output = machine.forward(gmm_stats) # save them! bob.io.save(output, dest_file) print 'savin ivec ' + dest_file elif os.path.isdir(gmm_stats_file): new_path = os.path.join(gmm_stats_path, f) new_dest_dir = os.path.join(dest_path, f) extract_i_vectors(new_path, new_dest_dir, machine) else: logger.info('Unknown file type: ' + str(gmm_stats_file)) continue
def __init__(self, task_type_name): super(TaskLogFileSweeper, self).__init__(schedule=Schedule(frequency_in_seconds=3600)) self._logs_dir = task_log_dir(task_type_name) self._archive_logs_dir = os.path.join(self._logs_dir, "ARCHIVE") ensure_dir(self._logs_dir) ensure_dir(self._archive_logs_dir)
def compute_gmm_sufficient_statistics(ubm, src_dir, dest_dir): for f in os.listdir(src_dir): f_location = os.path.join(src_dir, f) logger.debug('compute_gmm_sufficient_statistics: ' + f_location) if utils.is_hdf5_file(f_location): stripped_filename = utils.strip_filename(f) dest_file = os.path.join(dest_dir, stripped_filename) dest_file += '.hdf5' feature = bob.io.load(f_location) gmm_stats = bob.machine.GMMStats(ubm.dim_c, ubm.dim_d) ubm.acc_statistics(feature, gmm_stats) utils.ensure_dir(dest_dir) gmm_stats.save(bob.io.HDF5File(dest_file, 'w')) logger.debug('savin gmm_stats to ' + dest_file) elif os.path.isdir(f_location): new_path = os.path.join(src_dir, f) new_dest_dir = os.path.join(dest_dir, f) compute_gmm_sufficient_statistics(ubm, new_path, new_dest_dir) else: logger.info('Unknown file type: ' + str(f_location)) continue
def main(args): utils.ensure_dir( [join(args.model_path, args.id), join(args.pred_path, args.id)]) print('-' * 30) print('Loading and preprocessing test data...') print('-' * 30) try: imgs_test, imgs_id_test = utils.load_test_data(args.data_path) except FileNotFoundError: utils.create_test_data(args.data_path, args.data_path) imgs_test, imgs_id_test = utils.load_test_data(args.data_path) print('-' * 30) print('Loading saved model...') print('-' * 30) net = model_build.load_model(join(args.model_path, args.id)) output_shape = net.layers[0].output_shape[1:-1] if len(output_shape) == 2: imgs_test = utils.preprocess_x(imgs_test, new_shape=output_shape) else: imgs_test = utils.preprocess_x(imgs_test, new_shape=(96, 96)) print('-' * 30) print('Predicting masks on test data...') print('-' * 30) imgs_mask_test, presence_test = net.predict(imgs_test, verbose=1) print(imgs_mask_test.shape, presence_test.shape) np.save(join(args.pred_path, args.id, 'imgs_mask_test.npy'), imgs_mask_test) np.save(join(args.pred_path, args.id, 'imgs_presence_test.npy'), presence_test) """print('-' * 30)
def compileJsFiles(self, rootpath): print "**Begin JS minification**" minDir = os.path.join(rootpath, 'arches', 'Media', 'js', 'min') print "minDir: " + minDir mf = os.path.join(minDir, '1231_11_Arches.min.js') utils.ensure_dir(mf) minfile = open(mf,'w') print "Minifying...." buildfiles = JsFiles(debug=False) mediapath = os.path.join(rootpath,'arches','Media') mindata = self.getDataFromFiles(buildfiles, mediapath) mintext = jsmin(mindata.getvalue()) print "Minification complete. Writing .js file" minfile.write(mintext) minfile.close() print "Writing .gzip file" gzipfile = gzip.open(os.path.join(minDir, '1231_11_Arches.min.js.gz'), 'wb') gzipfile.writelines(mintext) gzipfile.close() print "**End JS minification**"
def get_logger(): global logger, _logging_level if logger: return logger logger = logging.getLogger("MongoctlLogger") log_file_name="mongoctl.log" conf_dir = mongoctl_globals.DEFAULT_CONF_ROOT log_dir = utils.resolve_path(os.path.join(conf_dir, LOG_DIR)) utils.ensure_dir(log_dir) logger.setLevel(logging.DEBUG) formatter = logging.Formatter("%(levelname)8s | %(asctime)s | %(message)s") logfile = os.path.join(log_dir, log_file_name) fh = TimedRotatingFileHandler(logfile, backupCount=50, when="midnight") fh.setFormatter(formatter) fh.setLevel(logging.DEBUG) # add the handler to the root logger logging.getLogger().addHandler(fh) global _log_to_stdout if _log_to_stdout: sh = logging.StreamHandler(sys.stdout) std_formatter = logging.Formatter("%(message)s") sh.setFormatter(std_formatter) sh.setLevel(_logging_level) logging.getLogger().addHandler(sh) return logger
def database_ish(): ensure_dir(os.getcwd() + "/build/") initialise() image_store = create_engine(direc) Session = sessionmaker(bind=image_store) session = Session() return session
def main(): app_settings = settings.get_app_settings() api = TwitterAPI.TwitterAPI(** app_settings) store_data = store.get_data() search_term = 'vnnlp' query = {'screen_name': search_term} filename = 'user_timeline/{}.yaml'.format(search_term) utils.ensure_dir(filename) if 'user_timeline' in store_data and 'max_id' in store_data['user_timeline']: query['max_id'] = store_data['user_timeline']['max_id'] - 1 max_id = None try: with open(filename, 'a') as output_file: r = TwitterAPI.TwitterPager(api, 'statuses/user_timeline', query) for tweet in r.get_iterator(): yaml.dump([tweet], output_file, default_flow_style=False) if 'id' in tweet: max_id = tweet['id'] except KeyboardInterrupt: pass if not 'user_timeline' in store_data: store_data['user_timeline'] = {} store_data['user_timeline']['max_id'] = max_id store.store_data(store_data)
def _diff_w_ct_formula_name(self, name1, name2): utils.ensure_dir(FORMULAS_DIR) filename = '{}-{}-diff-w_ct-{}.dimacs'.format(self.enc_strategy[2:], name1, name2) fname = os.path.join(FORMULAS_DIR, filename) return fname
def print_edges(self, edge_fn): ensure_dir(os.path.dirname(edge_fn)) logging.info("printing edges to {0}".format(edge_fn)) with open(edge_fn, 'w') as f: for c, i in enumerate(self.coocc[0]): j = self.coocc[1][c] k = self.coocc[2][c] f.write("{0}\t{1}\t{2}\n".format(i, j, k))
def _trojan_formula_name(self, name, label): utils.ensure_dir(FORMULAS_DIR) filename = '{}-{}-trojan-label_{}.dimacs'.format( self.enc_strategy[2:], name, label) fname = os.path.join(FORMULAS_DIR, filename) return fname
def _robust_formula_name(self, name, perturb_k, perturb_num): utils.ensure_dir(FORMULAS_DIR) filename = '{}-{}-robustness-perturb_{}-id_{}.dimacs'.format( self.enc_strategy[2:], name, perturb_k, perturb_num) fname = os.path.join(FORMULAS_DIR, filename) return fname
def save_mapped_text(pages_text, path_file): dir_save = path_file.replace('.pdf', '/') print(dir_save) ensure_dir(dir_save) for idx, page in enumerate(pages_text): page_file = open(dir_save + 'page_' + str(idx) + '.txt', mode="w", encoding="utf-8") page_file.write(page) page_file.close()
def __init__(self, conf, network, process_monitor, version=None, plugin=None): super(DhcpLocalProcess, self).__init__(conf, network, process_monitor, version, plugin) self.confs_dir = self.get_confs_dir(conf) self.network_conf_dir = os.path.join(self.confs_dir, network.id) utils.ensure_dir(self.network_conf_dir) LOG.debug("__init__ DhcpLocalProcess ok")
def __init__(self): self._logs_dir = 'logs' ensure_dir(self._logs_dir) self.c = Config() self._run_name = Model._get_run_name() self.model = None # TODO: self._model? self._datasets = {} self._data_generators = {}
def __init__(self, cfg): self.cfg = cfg self.text_to_4lang = TextTo4lang(cfg) self.graph_dir = self.cfg.get("qa", "graph_dir") self.dep_dir = self.cfg.get("qa", "deps_dir") ensure_dir(self.graph_dir) ensure_dir(self.dep_dir) self.word_similarity = WordSimilarity(cfg)
def batch_raw2jpg(in_dir=RAW_DIR, out_dir=IMG_DIR): ensure_dir(out_dir) for in_f, out_f in files(in_dir=in_dir, out_dir=out_dir, in_ext=".nef", out_ext=".jpg"): raw2jpg(in_f, out_f)
def setidx(self, idx): self.idx = idx ensure_dir(op.join(self.path, str(self.idx))) names = os.listdir(op.join(self.path, str(self.idx))) names = list(filter(lambda x: x.isdigit(), names)) names = list(map(int, names)) self.times_idx = (max(names)) if len(names) > 0 else 0
def exe(input_file): clean() input(input_file) strat_from_cars() out = 'data/from_cars/' utils.ensure_dir(out) save_cars(out+input_file+'_out') print('done')
def compute_all_features(audio_file, audio_beats=False): """Computes all the features for a specific audio file and its respective human annotations. Returns ------- features : dict Dictionary with the following features: mfcc : np.array Mel Frequency Cepstral Coefficients representation hpcp : np.array Harmonic Pitch Class Profiles tonnets : np.array Tonal Centroids (or Tonnetz) """ # Makes sure the output features folder exists utils.ensure_dir(OUTPUT_FEATURES) features_file = os.path.join(OUTPUT_FEATURES, os.path.basename(audio_file) + ".json") # If already precomputed, read and return if os.path.exists(features_file): with open(features_file, "r") as f: features = json.load(f) return list_to_array(features) # Load Audio logging.info("Loading audio file %s" % os.path.basename(audio_file)) audio = ES.MonoLoader(filename=audio_file, sampleRate=SAMPLE_RATE)() duration = len(audio) / float(SAMPLE_RATE) # Estimate Beats features = {} ticks, conf = compute_beats(audio) ticks = np.concatenate(([0], ticks, [duration])) # Add first and last time ticks = essentia.array(np.unique(ticks)) features["beats"] = ticks.tolist() # Compute Beat-sync features features["mfcc"], features["hpcp"], features["tonnetz"] = \ compute_beatsync_features(ticks, audio) # Save output as audio file if audio_beats: logging.info("Saving Beats as an audio file") marker = ES.AudioOnsetsMarker(onsets=ticks, type='beep', sampleRate=SAMPLE_RATE) marked_audio = marker(audio) ES.MonoWriter(filename='beats.wav', sampleRate=SAMPLE_RATE)(marked_audio) # Save features with open(features_file, "w") as f: json.dump(features, f) return list_to_array(features)
def _write_egg_info_arcname(self, name, dest): ensure_dir(dest) source = self.z.open(name) try: with file(dest, "wb") as target: shutil.copyfileobj(source, target) self.files.append(dest) finally: source.close()
def test_load_save_args(self): parser = argparse.ArgumentParser() args = parser.parse_args(args=[]) args.__dict__ = {"name": "test", "foo": "bar"} path = os.path.join(TMP, "args") ensure_dir(path) save_args(args, path) args_loaded = load_args(path) self.assertEqual(args, args_loaded)
def z_travel(self, z3d, v_pts, limit=3.0, int_step_size=0.66, save_sample_to=None, save_start_id=0): """ Traverse latent space to visualise the learnt reps. :param z3d: [B, K, D] :param v_pts: [B, L, dView] :param limit: numerical bounds for traverse :param int_step_size: traverse step size (interpolation gap between traverse points) :param save_dir: save the output to this dir :param start_id: save the output as file """ from torchvision.utils import save_image save_dir = os.path.join(save_sample_to, 'disen_3d') utils.ensure_dir(save_dir) B, K, D = z3d.size() V = v_pts.size(1) v_feat = self.view_encoder(v_pts.reshape(B * V, -1)) # output [B*V, 8] v_feat = v_feat.reshape(B, V, -1).unsqueeze(1).repeat(1, K, 1, 1) H, W = tuple(self.config.image_size) interpolation = torch.arange(-limit, limit + 0.1, int_step_size) gifs = [] # ------------ Select intereted object and informtive latent dimensions here ------------ k = 2 # we select only one object out of K for analysis # SPECIFY_DIMENSIONS=[9, 31] # D = len(SPECIFY_DIMENSIONS) # --------------------------------------------------------------------------------------- for d in range(D): for int_val in interpolation: z = z3d.clone() # [B, K, D] z[:, k, d] += int_val for vq in range(V): yq = v_feat[:, :, vq, :] z_yq = self.projector(torch.cat((z.reshape(B * K, -1), yq.reshape(B * K, -1)), dim=-1)) mask_logits, mu_x = self.decode(z_yq) gifs.append(torch.sum(torch.softmax(mask_logits, dim=1) * mu_x, dim=1).data) gifs = torch.cat(gifs, dim=0) gifs = gifs.reshape(D, len(interpolation), V, B, 3, H, W).permute([3, 0, 1, 2, 4, 5, 6]) for b in range(B): save_batch_dir = os.path.join(save_dir, str(save_start_id + b)) utils.ensure_dir(save_batch_dir) b_gifs = gifs[b, ...] b_gifs = torch.cat(b_gifs.chunk(V, dim=2), dim=0).squeeze(2) for iid in range(len(interpolation)): key = 'frame' vis.torch_save_image_enhanced(tensor=b_gifs[:, iid, ...].cpu(), filename=os.path.join(save_batch_dir, '{}_{:02d}.jpg'.format(key, iid)), nrow=D, pad_value=1, enhance=True) vis.grid2gif(str(os.path.join(save_batch_dir, key + '*.jpg')), str(os.path.join(save_batch_dir, 'disten3d.gif')), delay=20) print(" -- traversed latent space for {} scene samples".format(b + 1))
def __init__(self, model, loss, metrics, optimizer, resume, config): """ :param model: :param loss: :param metrics: a function list. :param optimizer: :param resume: bool, if resume from checkpoints. :param config: """ self.config = config self.logger = logging.getLogger('MC') # setup GPU device if available, move model into configured device self.device, device_ids = self._prepare_device(config['n_gpu']) self.model = model.to(self.device) # data parrallel if len(device_ids) > 1: self.model = torch.nn.DataParallel(model, device_ids=device_ids) self.loss = loss self.metrics = metrics # function list self.optimizer = optimizer self.epochs = config['trainer']['epochs'] self.save_freq = config['trainer']['save_freq'] self.verbosity = config['trainer']['verbosity'] # 每隔多少epoch打印一次日志 # configuration to monitor model performance and save best self.monitor = config['trainer'][ 'monitor'] # monitor which configured metric self.monitor_mode = config['trainer']['monitor_mode'] assert self.monitor_mode in ['min', 'max', 'off'] self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf self.start_epoch = 1 self.log_step = config['trainer']['log_step'] # setup directory for checkpoint saving # start_time = datetime.datetime.now().strftime('%m%d_%H%M%S') self.checkpoint_dir = os.path.join(config['trainer']['save_dir'], config['arch']['type'], config["name"]) # setup visualization writer instance # writer_dir = os.path.join(config['visualization']['log_dir'], config['arch']['type']) writer_dir = os.path.join(self.checkpoint_dir, config['visualization']['log_dir']) self.writer = SummaryWriter(log_dir=writer_dir) # Save configuration file into checkpoint directory: ensure_dir(self.checkpoint_dir) config_save_path = os.path.join(self.checkpoint_dir, 'config.json') with open(config_save_path, 'w') as f: json.dump(config, f, indent=4, sort_keys=False) # if resume from checkpoint if resume: self._resume_checkpoint(resume)
def get_file(self, filename): pkg = utils.Package.from_filename(filename) if pkg.filename in self.packages: return self.packages[pkg.filename] directory = os.path.join('packages', pkg.name.lower()) utils.ensure_dir(directory) filepath = os.path.join(directory, pkg.filename) self.packages[pkg.filename] = utils.PackageReadWriter(filepath) return self.packages[pkg.filename]
def main(infile, outdir, config): outdir = os.path.abspath(outdir) ensure_dir(outdir, False) cparser = SafeConfigParser() cparser.read(config) labelfile = cparser.get('configs', 'labelfile') infile = _check_labelling(infile, labelfile) infile = _verify_valid_distance(infile) infile = _filterPredictionsByClass_reformat2gff(infile, outdir) bname = os.path.basename(infile) pdf_rplots = os.path.join(outdir, bname + '.plots.pdf') dat = _read_dat(infile) if all(dat['correlation'] == ''): ignoreCorr = True else: ignoreCorr = False dat_mirna = _item_findClosestPartner(dat, 'mirna', ignoreCorr) dat_tss = _item_findClosestPartner(dat, 'tss', ignoreCorr) print '## Generating plot file...' grdevices = importr('grDevices') grdevices.pdf(file=pdf_rplots) _plt_pier(dat, 'All predicted TSS-miRNA pairs', True) _plt_distr(dat, 'distance', 'All predicted tss-miRNA pairs', False) _plt_distr(dat, 'distance', 'All predicted tss-miRNA pairs') if not ignoreCorr: _plt_distr(dat, 'correlation', 'All predicted tss-miRNA pairs', False) _plt_distr(dat, 'correlation', 'All predicted tss-miRNA pairs') _plt_percountr(dat) _plt_pier(dat_tss, 'TSS (label from closest miRNA)') _plt_distr(dat_tss, 'distance', 'TSS to closest miRNA', False) _plt_distr(dat_tss, 'distance', 'TSS to closest miRNA') if not ignoreCorr: _plt_distr(dat_tss, 'correlation', 'TSS to closest miRNA', False) _plt_distr(dat_tss, 'correlation', 'TSS to closest miRNA') _plt_pier(dat_mirna, 'miRNA') _plt_distr(dat_mirna, 'distance', 'miRNA to closest TSS', False) _plt_distr(dat_mirna, 'distance', 'miRNA to closest TSS') if not ignoreCorr: _plt_distr(dat_mirna, 'correlation', 'miRNA to closest TSS', False) _plt_distr(dat_mirna, 'correlation', 'miRNA to closest TSS') grdevices.dev_off() print '## Plot file:' print pdf_rplots return pdf_rplots
def generate_datasets(inputs_files, outputs_files, masks_files, config_path): cf = imp.load_source('config', config_path) num_directions = cf.num_directions num_unique_vels = len(cf.unique_vel_idxs) train_data = [] test_data = [] for idx, maps in enumerate([cf.training_maps, cf.test_maps]): inputs = np.empty((0, cf.nn_input_size, cf.nn_input_size)) if cf.conditional_prob: outputs = np.empty((0, cf.nn_output_size, cf.nn_output_size, num_directions, num_directions)) masks = np.empty((0, cf.nn_output_size, cf.nn_output_size, num_directions, num_directions)) else: outputs = np.empty( (0, cf.nn_output_size, cf.nn_output_size, num_unique_vels)) masks = np.empty( (0, cf.nn_output_size, cf.nn_output_size, num_unique_vels)) for map in maps: map_io_folder = cf.data_folder + '/network_io/' + map + '/' + cf.algo_str for data in ['inputs', 'outputs', 'masks']: f_name = map_io_folder + '/' + data + '.npy' if data == 'inputs': #print("inputs shape:{}".format(str(np.array(inputs).shape))) #print("file shape:{}".format(str(np.load(f_name).shape))) inputs = np.concatenate([inputs, np.load(f_name)]) elif data == 'outputs': outputs = np.concatenate([outputs, np.load(f_name)]) else: # print("masks shape:{}".format(str(np.load(f_name).shape))) masks = np.concatenate([masks, np.load(f_name)]) if idx == 0: train_data.append(inputs) train_data.append(outputs) train_data.append(masks) else: test_data.append(inputs) test_data.append(outputs) test_data.append(masks) # split train_data into train/val data num_instances = train_data[0].shape[0] print("found {} instances.".format(num_instances)) idxs = np.random.choice(num_instances, int(0.15 * num_instances), replace=False) left_idxs = [idx for idx in range(num_instances) if idx not in idxs] val_data = [data[idxs] for data in train_data] train_data_ = [data[left_idxs] for data in train_data] data_ = list(zip(train_data_, val_data, test_data)) for i, filenames in enumerate([inputs_files, outputs_files, masks_files]): for j, filename in enumerate(filenames): ensure_dir(filename) np.save(filename, data_[i][j])
def enable(self): """Enables DHCP for this network by spawning a local process.""" if self.active: self.restart() elif self._enable_dhcp(): LOG.debug("network_conf_dir:%s", self.network_conf_dir) utils.ensure_dir(self.network_conf_dir) interface_name = self.device_manager.setup(self.network) self.interface_name = interface_name self.spawn_process()
def get_jython_paths(self): self.jython_path = self.cfg.get('stanford', 'jython') if not os.path.exists(self.jython_path): raise Exception("cannot find jython executable!") self.jython_module = os.path.join( os.path.dirname(__file__), "stanford_parser.py") self.tmp_dir = self.cfg.get('data', 'tmp_dir') ensure_dir(self.tmp_dir)
def fetch_one(api, resource, search_term, query): filename = '{0}/{1}.yaml'.format(resource.replace('/', '_'), search_term) utils.ensure_dir(filename) r = api.request(resource, query) items = [item for item in r] with open(filename, 'w') as output_file: yaml.dump(items, output_file, default_flow_style=False)
def create_task_workspace(self, task): """ """ # ensure task workspace try: workspace_dir = self.get_task_workspace_dir(task) ensure_dir(workspace_dir) except Exception, e: raise errors.WorkspaceCreationError("Failed to create workspace: %s" % e)
def _fair_formula_name(self, name, constraints_fname): utils.ensure_dir(FORMULAS_DIR) constraints_fname = os.path.splitext( os.path.basename(constraints_fname))[0] filename = '{}-{}-{}-fair.dimacs'.format(self.enc_strategy[2:], name, constraints_fname) fname = os.path.join(FORMULAS_DIR, filename) return fname
def __init__(self, args): self.args = args self.max_change = args.max_change self.resize = (int(args.resize.split(',')[0]), int(args.resize.split(',')[1])) self.model = bnn.BNNModel.factory('%s_trojan' % args.arch, self.resize, args.num_classes) self.load_exist_model() utils.ensure_dir(definitions.ADV_TRAIN_DIR) self.trojan_gradient_path = os.path.join( definitions.ADV_TRAIN_DIR, '%s_%s_prefc1_grad' % (args.dataset, args.arch)) definitions.TROJAN_PREFC1_PATH = self.trojan_gradient_path name = self.model.name # filename should be self-explanatory filename = '%s-' % args.dataset + str( self.resize[0] * self.resize[1]) + '-' + name self.filename = filename # the trained model is saved in the models directory # trained_models_dir = os.path.join(definitions.TRAINED_MODELS_DIR, 'adv_train_%s' % args.dataset) # utils.ensure_dir(trained_models_dir) # self.saved_model = os.path.join(trained_models_dir, filename + '.pt') # # the parameters are saved in the models directory # self.model_dir = os.path.join(trained_models_dir, filename + '.params') # utils.ensure_dir(self.model_dir) # trained_models_cp_dir = os.path.join(definitions.TRAINED_MODELS_CP_DIR, 'adv_train_%s' % args.dataset) # utils.ensure_dir(trained_models_cp_dir) # self.saved_checkpoint_model = os.path.join(trained_models_cp_dir, # filename + '.pt') self.name = self.model.name kwargs = { 'num_workers': 1, 'pin_memory': True } if 'cuda' in args else {} if 'cuda' in args: # self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr) if 'mnist' == args.dataset[:5]: self.train_loader, self.test_loader = bnn_dataset.create_mnist_loaders( self.resize, args.batch_size, args.test_batch_size, kwargs) else: self.train_loader, self.test_loader = bnn_dataset.create_data_loaders( args.data_folder, args.batch_size, args.test_batch_size, kwargs) self.criterion = nn.CrossEntropyLoss() if 'cuda' in args: if args.cuda: self.model.cuda()
def __init__(self, network_creator, batch_env, args): logging.debug('PAAC init is started') self.args = copy.copy(vars(args)) self.checkpoint_dir = join_path(self.args['debugging_folder'], self.CHECKPOINT_SUBDIR) ensure_dir(self.checkpoint_dir) checkpoint = self._load_latest_checkpoint(self.checkpoint_dir) self.last_saving_step = checkpoint['last_step'] if checkpoint else 0 self.final_rewards = [] self.global_step = self.last_saving_step self.network = network_creator() self.batch_env = batch_env self.optimizer = optim.RMSprop( self.network.parameters(), lr=self.args['initial_lr'], eps=self.args['e'], ) #RMSprop defualts: momentum=0., centered=False, weight_decay=0 if checkpoint: logging.info('Restoring agent variables from previous run') self.network.load_state_dict(checkpoint['network_state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) self.lr_scheduler = LinearAnnealingLR(self.optimizer, self.args['lr_annealing_steps']) #pytorch documentation says: #In most cases it’s better to use CUDA_VISIBLE_DEVICES environmental variable #Therefore to specify a particular gpu one should use CUDA_VISIBLE_DEVICES. self.use_cuda = self.args['device'] == 'gpu' self.use_rnn = hasattr( self.network, 'get_initial_state' ) #get_initial_state should return state of the rnn layers self._tensors = torch.cuda if self.use_cuda else torch self.action_codes = np.eye( batch_env.num_actions) #envs reveive actions in one-hot encoding! self.gamma = self.args['gamma'] # future rewards discount factor self.entropy_coef = self.args['entropy_regularisation_strength'] self.loss_scaling = self.args['loss_scaling'] #5. self.critic_coef = self.args['critic_coef'] #0.25 self.eval_func = None if self.args['clip_norm_type'] == 'global': self.clip_gradients = nn.utils.clip_grad_norm_ elif self.args['clip_norm_type'] == 'local': self.clip_gradients = utils.clip_local_grad_norm elif self.args['clip_norm_type'] == 'ignore': self.clip_gradients = lambda params, _: utils.global_grad_norm( params) else: raise ValueError('Norm type({}) is not recoginized'.format( self.args['clip_norm_type'])) logging.debug('Paac init is done')
def download_url(url, local_path, max_retries=3): """ Download a remote URL to the location local_path with retries. On download, the file size is first obtained and stored. When the download completes, the file size is compared to the stored file. This prevents broken downloads from contaminating the processing chain. :param url: the remote URL :param local_path: the path to the local file :param max_retries: how many times we may retry to download the file """ logging.info('download_url %s as %s' % (url, local_path)) r = requests.get(url, stream=True) content_size = int(r.headers['Content-Length']) # dump the correct file size to an info file next to the grib file # when re-using the GRIB2 file, we check its file size against this record # to avoid using partial files info_path = local_path + '.size' open(ensure_dir(info_path), 'w').write(str(content_size)) # stream the download to file with open(ensure_dir(local_path), 'wb') as f: for chunk in r.iter_content(1024 * 1024): f.write(chunk) # does the server accept byte range queries? e.g. the NOMADs server does accepts_ranges = 'bytes' in r.headers.get('Accept-Ranges', '') retries_available = max_retries file_size = osp.getsize(local_path) while file_size < content_size: if retries_available > 0: logging.info('download_url trying again, retries available %d' % retries_available) if accepts_ranges: # if range queries are supported, try to download only the missing portion of the file headers = {'Range': 'bytes=%d-%d' % (file_size, content_size)} r = requests.get(url, headers=headers, stream=True) with open(local_path, 'ab') as f: for chunk in r.iter_content(1024 * 1024): f.write(chunk) retries_available -= 1 file_size = osp.getsize(local_path) else: # call the entire function recursively, this will attempt to redownload the entire file # and overwrite previously downloaded data self.download_grib(url, local_path, max_retries - 1) else: os.remove(local_path) os.remove(info_path) raise DownloadError('failed to download file %s' % url)
def download_url(url, local_path, max_retries=3): """ Download a remote URL to the location local_path with retries. On download, the file size is first obtained and stored. When the download completes, the file size is compared to the stored file. This prevents broken downloads from contaminating the processing chain. :param url: the remote URL :param local_path: the path to the local file :param max_retries: how many times we may retry to download the file """ logging.info('download_url %s as %s' % (url,local_path)) r = requests.get(url, stream=True) content_size = int(r.headers['Content-Length']) # dump the correct file size to an info file next to the grib file # when re-using the GRIB2 file, we check its file size against this record # to avoid using partial files info_path = local_path + '.size' open(ensure_dir(info_path), 'w').write(str(content_size)) # stream the download to file with open(ensure_dir(local_path), 'wb') as f: for chunk in r.iter_content(1024 * 1024): f.write(chunk) # does the server accept byte range queries? e.g. the NOMADs server does accepts_ranges = 'bytes' in r.headers.get('Accept-Ranges', '') retries_available = max_retries file_size = osp.getsize(local_path) while file_size < content_size: if retries_available > 0: logging.info('download_url trying again, retries available %d' % retries_available) if accepts_ranges: # if range queries are supported, try to download only the missing portion of the file headers = { 'Range' : 'bytes=%d-%d' % (file_size, content_size) } r = requests.get(url, headers=headers, stream=True) with open(local_path, 'ab') as f: for chunk in r.iter_content(1024 * 1024): f.write(chunk) retries_available -= 1 file_size = osp.getsize(local_path) else: # call the entire function recursively, this will attempt to redownload the entire file # and overwrite previously downloaded data self.download_grib(url, local_path, max_retries-1) else: os.remove(local_path) os.remove(info_path) raise DownloadError('failed to download file %s' % url)
def convert_to_json(self, input_dir, output_file): self._check_format(Format.JSON) ensure_dir(os.path.dirname(output_file)) records = [] for item in self.iter_from_dir(input_dir): record = deepcopy(item['input']) for name, value in item['output'].items(): record[name] = self._prettify(value) records.append(record) with io.open(output_file, mode='w') as fout: json.dump(records, fout, indent=2)
def train(datasets): train, train_lbl = datasets['train'], datasets['train_lbl'] valid, valid_lbl = datasets['valid'], datasets['valid_lbl'] size = train[0].shape[0] nlabels = train_lbl.shape[1] # placeholders for input, None means batch of any size X = tf.placeholder(tf.float32, shape=(None, size, size, 1), name="X") y_ = tf.placeholder(tf.float32, shape=(None, nlabels), name="y_") keep = tf.placeholder(tf.float32) logits = build_model(X, nlabels, keep) # predict from logits using softmax predictions = tf.nn.softmax(logits) # cross-entropy as the loss loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y_)) # create the optimizer to minimize the loss optimizer = tf.train.AdamOptimizer(0.005).minimize(loss) utils.ensure_dir(MODEL_DIR) saver = tf.train.Saver() with tf.Session() as session: correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) session.run(tf.initialize_all_variables()) nbatches = train_lbl.shape[0] / BATCH_SIZE for epoch in range(EPOCHS): for step in range(nbatches): batch_data = train[step*BATCH_SIZE:(step+1)*BATCH_SIZE, :, :, :] batch_labels = train_lbl[step*BATCH_SIZE:(step+1)*BATCH_SIZE, :] feed_dict = {X: batch_data, y_: batch_labels, keep: 0.5} optimizer.run(feed_dict=feed_dict) #print 'Batch labels:\n', batch_labels #print 'Predictions:\n', preds.eval(feed_dict=feed_dict) #print 'Correct pred:\n', correct_prediction.eval(feed_dict=feed_dict) if step % int(nbatches / 4) == 0: train_accuracy = accuracy.eval( feed_dict={X: batch_data, y_: batch_labels, keep: 1.0}) print("epoch %d, step %d, training accuracy %g" % (epoch+1, step, train_accuracy)) valid_accuracy = accuracy.eval( feed_dict={X: valid, y_: valid_lbl, keep: 1.0}) print("epoch %d, step %d, valid accuracy %g" % (epoch+1, step, valid_accuracy)) # save the model saver.save(session, os.path.join(MODEL_DIR, 'cnn{}_e{}_s{}.tf'.format(valid[0].shape[0], epoch+1, step)))
def postprocess_cycle(cycle, region_cfg, wksp_path): """ Build rasters from the computed fuel moisture. :param cycle: the UTC cycle time :param region_cfg: the region configuration :param wksp_path: the workspace path :return: the postprocessing path """ data_path = compute_model_path(cycle, region_cfg.code, wksp_path) year_month = '%04d%02d' % (cycle.year, cycle.month) cycle_dir = 'fmda-%s-%04d%02d%02d-%02d' % (region_cfg.code, cycle.year, cycle.month, cycle.day, cycle.hour) postproc_path = osp.join(wksp_path, year_month, cycle_dir) # open and read in the fuel moisture values d = netCDF4.Dataset(data_path) fmc_gc = d.variables['FMC_GC'][:,:,:] d.close() # read in the longitudes and latitudes geo_path = osp.join(wksp_path, '%s-geo.nc' % region_cfg.code) d = netCDF4.Dataset(geo_path) lats = d.variables['XLAT'][:,:] lons = d.variables['XLONG'][:,:] d.close() fm_wisdom = { 'native_unit' : '-', 'colorbar' : '-', 'colormap' : 'jet_r', 'scale' : [0.0, 0.4] } esmf_cycle = utc_to_esmf(cycle) mf = { "1" : {esmf_cycle : {}}} manifest_name = 'fmda-%s-%04d%02d%02d-%02d.json' % (region_cfg.code, cycle.year, cycle.month, cycle.day, cycle.hour) ensure_dir(osp.join(postproc_path, manifest_name)) for i,name in [(0, '1-hr'), (1, '10-hr'), (2, '100-hr')]: fm_wisdom['name'] = '%s fuel moisture' % name raster_png, coords, cb_png = scalar_field_to_raster(fmc_gc[:,:,i], lats, lons, fm_wisdom) raster_name = 'fmda-%s-raster.png' % name cb_name = 'fmda-%s-raster-cb.png' % name with open(osp.join(postproc_path, raster_name), 'w') as f: f.write(raster_png) with open(osp.join(postproc_path, cb_name), 'w') as f: f.write(cb_png) mf["1"][esmf_cycle][name] = { 'raster' : raster_name, 'coords' : coords, 'colorbar' : cb_name } logging.info('writing manifest file %s' % osp.join(postproc_path, manifest_name) ) json.dump(mf, open(osp.join(postproc_path, manifest_name), 'w')) return postproc_path
def compile(self): utils.ensure_dir(self._output_prefix + self.output_path) string = utils.get_file_contents(self._source).format( student_name=self.student.printed_name, title='{student_name} - {course_name}'.format( student_name=self.student.printed_name, course_name=self.course_name ) ) weasyprint.HTML( string=string, base_url=self._base_path ).write_pdf(self._output_prefix + self.output_file)