def test_get_dist_matrix(self): """Test dnadiff pairwise on multiple bins and get the resulting distance matrix""" names = [ "sample0_gt1000_bin0", "sample0_gt1000_bin10", "sample0_gt1000_bin11", "sample0_gt1000_bin1", "sample0_gt1000_bin2", "sample1_gt1000_bin51", "sample1_gt1000_bin67", "sample1_gt1000_bin6", ] files = [ospj(DATA_PATH, "{}.fa".format(n)) for n in names] dnadiff_dist_matrix.run_dnadiff_pairwise(files, names, TMP_BASENAME_DIR) for f in [ ospj(TMP_BASENAME_DIR, "{}_vs_{}".format(names[i], names[j]), "out.report") for i in range(len(names)) for j in range(i + 1, len(names))]: ok_(os.path.exists(f)) matrix = dnadiff_dist_matrix.get_dist_matrix(TMP_BASENAME_DIR, names, 50) matrix_exp = np.genfromtxt(ospj(DATA_PATH, "expected_dist_matrix.tsv"), delimiter="\t") np.testing.assert_almost_equal(matrix, matrix_exp, decimal=2)
def render_story(self, story): subdir = ospj(OUTPUT_DIR, 'stories', story.author.fs_name) makedirs(subdir, exist_ok=True) filename = '{}.html'.format(story.fs_name) template = self.env.get_template('story.html') with open(ospj(subdir, filename), 'w') as f: print(template.render(story=story, depth=2), file=f)
def render_story_list_by_author(self, author): subdir = ospj(OUTPUT_DIR, 'authors') makedirs(subdir, exist_ok=True) filename = '{}.html'.format(author.fs_name) template = self.env.get_template('stories_by_author.html') stories = sorted(author.stories, key=attrgetter('date_sort_key')) with open(ospj(subdir, filename), 'w') as f: print(template.render(author=author, stories=stories, depth=1), file=f)
def main(args): with open(os.path.expanduser(args.config_file), 'r') as fh: config = yaml.load(fh) # Create an archive dirname = get_dirname(args.repo) logger.info("Using repo {}".format(dirname)) repo = Repo(args.repo) assert not repo.is_dirty() archive_name = dirname git_tag = next((tag for tag in repo.tags if tag.commit == repo.head.commit), None) if git_tag: archive_name += '_' + git_tag else: archive_name += '_' + repo.head.object.hexsha if args.extra_tag: archive_name += '_' + args.extra_tag logger.info("Creating repo archive {}".format(archive_name)) archive = "{0}.tar.gz".format(archive_name) archive_path = ospj(args.repo, archive) run_dir = os.getcwd() os.chdir(args.repo) os.system("git-archive-all {}".format(archive)) os.chdir(run_dir) logger.info("Archive created.") # Transfer archive to remote remote_dir = config['hosts']['irma']['archive_dir'] Connection('irma').put(archive_path, remote=remote_dir) logger.info("Archive successfully transferred to irma") # Extract remote archive c = Connection('irma') remote_archive_path = ospj(remote_dir, archive) remote_extracted_path = remote_archive_path.replace('.tar.gz', '') c.run('rm -r {} || true'.format(remote_extracted_path)) c.run('cd {}; tar -xvzf {}'.format(remote_dir, remote_archive_path)) # Create a link from dev or latest to the new archive if args.mode == 'dev': link_name = "{}_dev".format(dirname) else: link_name = "{}_latest".format(dirname) c.run('cd {}; ln -sfn {} {}'.format(remote_dir, remote_extracted_path, link_name)) logger.info("Linking: {} {}".format(remote_extracted_path, link_name)) logger.info("{} successfully linked as the new {}".format(dirname, link_name))
def render_story_list_all(self): filename = 'stories_all.html' template = self.env.get_template(filename) stories = [] for author in self.story_data.values(): stories.extend(author.stories) with open(ospj(OUTPUT_DIR, 'stories_all_date.html'), 'w') as f: print(template.render(stories=sorted(stories, key=attrgetter('date_sort_key')), depth=0), file=f) with open(ospj(OUTPUT_DIR, 'stories_all_title.html'), 'w') as f: print(template.render(stories=sorted(stories, key=attrgetter('title_sort_key')), depth=0), file=f)
def setUp(self): self.prefs = Prefs() work_dir = ospj(dirname(__file__), 'data/purgecounter/dynamic') self.filename = ospj(work_dir, PURGE_HISTORY) self.prefs._Prefs__data['WORK_DIR'] = work_dir self.counter = Counter() host = 'host' count = 1 self.counter[host] += count self.test_string = '%s:%s\n' % (host, self.counter[host])
def test_verify_del(self): out = StringIO() verify.print_revertant_mutations_info( ospj(DATA_PATH, "to_be_reverted_mutations.txt"), ospj(DATA_PATH, "oncotator.del.txt"), ospj(DATA_PATH, "BRCA_transcripts.fa"), revmuts_file_format='hgvs', outfile=out ) assert_equals(open(ospj(DATA_PATH, "output", "oncotator.del.maf.out.tsv")).read(), out.getvalue())
def test_validate_mutations(self): out = StringIO() sufam.__main__.validate_mutations(ospj(DATA_PATH, "mutations.vcf"), ospj(DATA_PATH, "subset1.bam"), ospj(DATA_PATH, "human_g1k_v37_chr17.fa"), "test", "matrix", out ) assert_equals("0\n1\n", out.getvalue())
def test_multi_bam_vcf(self): out = StringIO() sufam.__main__.validate_mutations(ospj(DATA_PATH, "mutations.vcf"), [ospj(DATA_PATH, "subset{}.bam".format(i)) for i in range(1, 4)], ospj(DATA_PATH, "human_g1k_v37_chr17.fa"), ["subset{}".format(i) for i in range(1, 4)], "vcf", out ) assert_equals(open(ospj(DATA_PATH, "multi_bam.vcf")).read(), out.getvalue())
def test_sum_bases_in_bins(self): """Test sum_bases_in_bins""" scg_tsv = ospj(DATA_PATH, "sample0_gt500_scg.tsv") b = sum_bases_in_bins(pd.read_csv(scg_tsv, sep="\t"), ospj(DATA_PATH, "sample0_gt500.fa")) assert_equal(12, b) df = get_approved_bins(ospj(DATA_PATH, "sample0_gt500_scg.tsv"), max_missing_scg=2, max_multicopy_scg=4) b = sum_bases_in_bins(df, ospj(DATA_PATH, "sample0_gt500.fa")) assert_equal(4, b)
def run_dnadiff_pairwise(fasta_files, fasta_names, output_folder): """Runs MUMmer's dnadiff pairwise for given fasta_files. Uses fasta_names to organize output folders for dnadiff as fastaname1_vs_fastaname2.""" assert len(fasta_files) == len(fasta_names) for i in range(len(fasta_files)): for j in range(i + 1, len(fasta_files)): out_dir = ospj(output_folder, "{fn1}_vs_{fn2}".format( fn1=fasta_names[i], fn2=fasta_names[j])) dir_utils.mkdir_p(out_dir) run_dnadiff(fasta_files[i], fasta_files[j], ospj(out_dir, "out"))
def test_plot_dist_matrix_88_bins(self): """Plot a distance matrix with 88 samples""" names = [ "sample0_gt1000_bin{}".format(i) for i in range(88) ] matrix = np.genfromtxt(ospj(DATA_PATH, "expected_dist_matrix_88_bins.tsv"), delimiter="\t") heatmap = ospj(TMP_BASENAME_DIR, "hclust_heatmap.pdf") dendrogram = ospj(TMP_BASENAME_DIR, "hclust_dendrogram.pdf") dnadiff_dist_matrix.plot_dist_matrix(matrix, names, heatmap, dendrogram) ok_(os.path.exists(heatmap)) ok_(os.path.exists(dendrogram))
def test_write_approved_bins(self): """Test write_approved_bins""" df = get_approved_bins(ospj(DATA_PATH, "sample0_gt500_scg.tsv"), max_missing_scg=2, max_multicopy_scg=4) assert_equal(2, int(df.Cluster)) write_approved_bins(df, ospj(DATA_PATH, "sample0_gt500.fa"), TMP_BASENAME_DIR, "sample0_gt500") ok_(os.path.exists(ospj(TMP_BASENAME_DIR, "sample0_gt500_bin2.fa"))) # make sure both have equal amount of records assert_equal( open(ospj(TMP_BASENAME_DIR, "sample0_gt500_bin2.fa")).read().count(">"), open(ospj(DATA_PATH, "sample0_gt500_bin2.fa")).read().count(">"))
def setUp(self): self.directory = ospj(dirname(__file__), 'data/deny_hosts') self.work_dir = ospj(self.directory, 'work') self.logfile = ospj(self.work_dir, 'logfile') self.prefs = Prefs() self.lock_file = LockFile(ospj(self.directory, 'lockfile')) self.lock_file.remove(die_=False) self.lock_file.create() self.prefs._Prefs__data['ETC_DIR'] = ospj(self.directory, 'etc') self.prefs._Prefs__data['WORK_DIR'] = self.work_dir
def test_mummer_report_class(self): """Test mummer report class""" dnadiff_dist_matrix.run_dnadiff(ospj(DATA_PATH, "sample0_gt1000_bin0.fa"), ospj(DATA_PATH, "sample0_gt1000_bin1.fa"), ospj(TMP_BASENAME_DIR, "out")) ok_(os.path.exists(ospj(TMP_BASENAME_DIR, "out.report"))) mumr = dnadiff_dist_matrix.MUMmerReport(ospj(TMP_BASENAME_DIR, "out.report")) assert_equal(mumr.tot_bases[0], 3213) assert_equal(mumr.tot_bases[1], 43514) assert_equal(mumr.aligned_bases[0], 0) assert_equal(mumr.aligned_bases[1], 0)
def test_get_winning_bins(self): """Test get_winning_bins""" scg_tsvs = [ospj(DATA_PATH, p) for p in ["sample0_gt300_scg.tsv", "sample0_gt500_scg.tsv"]] fasta_files = [ospj(DATA_PATH, p) for p in ["sample0_gt300.fa", "sample0_gt500.fa"]] winning_index, df = get_winning_bins(scg_tsvs, fasta_files, max_missing_scg=2, max_multicopy_scg=4) assert_equal(1, winning_index) winning_index, df = get_winning_bins(list(reversed(scg_tsvs)), list(reversed(fasta_files)), max_missing_scg=2, max_multicopy_scg=4) assert_equal(0, winning_index)
def test_parallel_run_dnadiff_pairwise(self): """Test dnadiff pairwise on multiple bins""" names = ["bin{0}".format(i) for i in range(3)] dnadiff_dist_matrix.parallel_run_dnadiff_pairwise( [ospj(DATA_PATH, b) for b in ["sample0_gt1000_bin0.fa", "sample0_gt1000_bin1.fa", "sample0_gt1000_bin2.fa"]], names, TMP_BASENAME_DIR) for f in [ ospj(TMP_BASENAME_DIR, "{}_vs_{}".format(names[i], names[j]), "out.report") for i in range(len(names)) for j in range(i + 1, len(names))]: ok_(os.path.exists(f))
def test_find(self): out = StringIO() reffa = ospj(DATA_PATH, "human_g1k_v37_chr17.fa") mutations_tsv = ospj(DATA_PATH, "germline_mutations", "T1_test_mutation.tsv") search_bam = ospj(DATA_PATH, "T1.bam") normal_bam = ospj(DATA_PATH, "N1.bam") find_revertant_mutations(reffa, mutations_tsv, search_bam, normal_bam, out) out.seek(0) test = pd.read_csv(out, sep="\t") truth = pd.read_csv(ospj(DATA_PATH, "output", "T1_test.tsv"), sep="\t") assert_frame_equal(truth.drop("MAF", axis=1), test.drop("MAF", axis=1)) assert_array_almost_equal(truth.MAF, test.MAF, decimal=6)
def test_write_fasta_names(self): names = [ "sample0_gt1000_bin0", "sample0_gt1000_bin10", "sample0_gt1000_bin11", "sample0_gt1000_bin1", "sample0_gt1000_bin2", "sample1_gt1000_bin51", "sample1_gt1000_bin67", "sample1_gt1000_bin6", ] files = [ospj(DATA_PATH, "{}.fa".format(n)) for n in names] dnadiff_dist_matrix.write_fasta_names(names, files, ospj(TMP_BASENAME_DIR, "fasta_names.tsv"), "\t") ok_(os.path.exists(ospj(TMP_BASENAME_DIR, "fasta_names.tsv")))
def __init__(self, args): super().__init__() self.args = args self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.nets, self.nets_ema = build_model(args) # below setattrs are to make networks be children of Solver, e.g., for self.to(self.device) for name, module in self.nets.items(): utils.print_network(module, name) setattr(self, name, module) for name, module in self.nets_ema.items(): setattr(self, name + '_ema', module) if args.mode == 'train': self.optims = Munch() for net in self.nets.keys(): if net == 'fan': continue self.optims[net] = torch.optim.Adam( params=self.nets[net].parameters(), lr=args.f_lr if net == 'mapping_network' else args.lr, betas=[args.beta1, args.beta2], weight_decay=args.weight_decay) self.ckptios = [ CheckpointIO(ospj(args.checkpoint_dir, '{:06d}_nets.ckpt'), **self.nets), CheckpointIO(ospj(args.checkpoint_dir, '{:06d}_nets_ema.ckpt'), **self.nets_ema), CheckpointIO(ospj(args.checkpoint_dir, '{:06d}_optims.ckpt'), **self.optims) ] else: self.ckptios = [ CheckpointIO(ospj(args.checkpoint_dir, '{:06d}_nets_ema.ckpt'), **self.nets_ema) ] self.to(self.device) for name, network in self.named_children(): # Do not initialize the FAN parameters if ('ema' not in name) and ('fan' not in name): print('Initializing %s...' % name) network.apply(utils.he_init) ### modify def sample self._load_checkpoint(args.resume_iter)
def test_plot_dist_matrix_88_bins(self): """Plot a distance matrix with 88 samples""" names = ["sample0_gt1000_bin{}".format(i) for i in range(88)] matrix = np.genfromtxt(ospj(DATA_PATH, "expected_dist_matrix_88_bins.tsv"), delimiter="\t") heatmap = ospj(TMP_BASENAME_DIR, "hclust_heatmap.pdf") dendrogram = ospj(TMP_BASENAME_DIR, "hclust_dendrogram.pdf") clustering = ospj(TMP_BASENAME_DIR, "clustering.tsv") clustering_threshold = 0.05 dnadiff_dist_matrix.plot_dist_matrix(matrix, names, heatmap, dendrogram, clustering_threshold, clustering) ok_(os.path.exists(heatmap)) ok_(os.path.exists(dendrogram)) ok_(os.path.exists(clustering))
def render_story_list_all(self): filename = 'stories_all.html' template = self.env.get_template(filename) stories = [] for author in self.story_data.values(): stories.extend(author.stories) with open(ospj(OUTPUT_DIR, 'stories_all_date.html'), 'w') as f: print(template.render(stories=sorted( stories, key=attrgetter('date_sort_key')), depth=0), file=f) with open(ospj(OUTPUT_DIR, 'stories_all_title.html'), 'w') as f: print(template.render(stories=sorted( stories, key=attrgetter('title_sort_key')), depth=0), file=f)
def write_seqs(self, mytup): completename = ospj(self.aln_dir, self.glob_exon + ".unaligned.fasta") with open(completename, 'a') as f: f.write("{exon}\n{seq}\n".format(exon=mytup[0], seq=mytup[1]))
def check_corenames(self): names = self.corenames out = [] for k, v in names.items(): isreqs = check_reqs(self.int_reqs, v) islabel = v.__contains__(self.step) if isreqs and not islabel: stem = ospj(self.path, k) out.append((k, [ ospj(stem, i) for i in os.listdir(stem) if re.findall(self.pattern, i) ])) return out
def export_agent(self, step): _dir = ospj(self._t_prof.path_agent_export_storage, str(self._t_prof.name), str(step)) file_util.create_dir_if_not_exist(_dir) # """""""""""""""""""""""""""" # Deep CFR # """""""""""""""""""""""""""" if self._AVRG: MODE = EvalAgentDeepCFR.EVAL_MODE_AVRG_NET t_prof = copy.deepcopy(self._t_prof) t_prof.eval_modes_of_algo = [MODE] eval_agent = EvalAgentDeepCFR(t_prof=t_prof) eval_agent.reset() w = {EvalAgentDeepCFR.EVAL_MODE_AVRG_NET: self._pull_avrg_net_eval_strat()} eval_agent.update_weights(w) eval_agent.set_mode(mode=MODE) eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE) # """""""""""""""""""""""""""" # SD-CFR # """""""""""""""""""""""""""" if self._SINGLE: MODE = EvalAgentDeepCFR.EVAL_MODE_SINGLE t_prof = copy.deepcopy(self._t_prof) t_prof.eval_modes_of_algo = [MODE] eval_agent = EvalAgentDeepCFR(t_prof=t_prof) eval_agent.reset() eval_agent._strategy_buffers = self._strategy_buffers # could copy - it's just for the export, so it's ok eval_agent.set_mode(mode=MODE) eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE)
def find_html_files(directory): # I'm inclined to use a generator for os.walk usage, but listdir # already returns a list so there isn't much benefit here return [ ospj(directory, filename) for filename in listdir(directory) if filename.endswith(HTML_FILE_EXTENSION) ]
def test_send_hosts(self): sync = Sync(self.prefs) self.assertEqual(sync.receive_new_hosts(), self.test_hosts) filename = ospj(self.work_dir, SYNC_RECEIVED_HOSTS) with open(filename) as f: hosts = [line.strip().split(':')[0] for line in f] self.assertEqual(self.test_hosts, hosts)
def export_all(self, iter_nr): """ Exports all logs of the current run in Tensorboard's format and as json files. """ if self._path_log_storage is not None: path_crayon = ospj(self._path_log_storage, str(self._name), str(iter_nr), "crayon") path_json = ospj(self._path_log_storage, str(self._name), str(iter_nr), "as_json") create_dir_if_not_exist(path=path_crayon) create_dir_if_not_exist(path=path_json) for e in self._experiments.values(): e.to_zip(filename=ospj(path_crayon, e.xp_name + ".zip")) write_dict_to_file_json(dictionary=self._custom_logs, _dir=path_json, file_name="logs")
def celeba_preprocess(self): assert os.path.exists( self.celeba_image_dir ), f'Image data directory does not exist: {self.celeba_image_dir}' assert os.path.exists( self.celeba_attr_file ), f'Attribute file does not exist: {self.celeba_attr_file}' with open(self.celeba_attr_file, 'r') as f: img_name_attrs_lines = f.readlines() all_attr_names = img_name_attrs_lines[1].split() for i, attr_name in enumerate(all_attr_names): self.attr2idx[attr_name] = i self.idx2attr[i] = attr_name lines = img_name_attrs_lines[2:] random.seed(1234) random.shuffle(lines) for i, line in enumerate(lines): split = line.strip().split() filename = split[0] values = split[1:] label = [] for attr_name in self.selected_attrs: idx = self.attr2idx[attr_name] label.append(values[idx] == '1') filepath = ospj(self.celeba_image_dir, filename) if i < 2000: self.celeba_test_dataset.append([filepath, label]) else: # 28000 self.celeba_train_dataset.append([filepath, label]) print( f'Finished preprocessing the {self.celeba_dataset_name} dataset...' )
def setUp(self): self.work_dir = mkdtemp() self.warned_hosts_filename = ospj(self.work_dir, ALLOWED_WARNED_HOSTS) self.prefs = Prefs() self.prefs._Prefs__data['WORK_DIR'] = self.work_dir self.prefs._Prefs__data['ALLOWED_HOSTS_HOSTNAME_LOOKUP'] = 'false' self.allowed_hosts = AllowedHosts(self.prefs)
def test_resolve_fetch_http_basic_auth_get(self): logger.info( self.getTestHeader('test resolve fetch http basic auth GET')) try: patched_requests_get = None def mocked_request_auth_get_success(*args, **kwargs): args[0].auth = None patched_requests_get.stop() return BaseTest.MockResponse({}, 200) patched_requests_get = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", get=mocked_request_auth_get_success, auth=None, create=True) patched_requests_get.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-1.json'), cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def get_split_info(self): ''' Helper method to read image, attrs, objs samples Returns train_data, val_data, test_data: List of tuple of image, attrs, obj ''' data = torch.load(ospj(self.root, 'metadata_{}.t7'.format(self.split))) train_data, val_data, test_data = [], [], [] for instance in data: image, attr, obj, settype = instance['image'], instance['attr'], \ instance['obj'], instance['set'] curr_data = [image, attr, obj] if attr == 'NA' or (attr, obj) not in self.pairs or settype == 'NA': # Skip incomplete pairs, unknown pairs and unknown set continue if settype == 'train': train_data.append(curr_data) elif settype == 'val': val_data.append(curr_data) else: test_data.append(curr_data) return train_data, val_data, test_data
def test_read_with_create_default_keychain(self): logger.info(self.getTestHeader('read keychain with create default if missing')) try: keychain_file = ospj(self.test_config_dir, ".bdbag", 'keychain.json') keychain.read_keychain(keychain_file=keychain_file) except Exception as e: self.fail(get_typed_exception(e))
def sample(self, loaders): args = self.args nets_ema = self.nets_ema os.makedirs(args.result_dir, exist_ok=True) self._load_checkpoint(args.resume_iter) src = next(InputFetcher(loaders.src, None, args.latent_dim, 'test')) ref = next(InputFetcher(loaders.ref, None, args.latent_dim, 'test')) fname = ospj(args.result_dir, 'reference.jpg') print('Working on {}...'.format(fname)) utils.translate_using_reference(nets_ema, args, src.x, ref.x, ref.y, fname) fname = ospj(args.result_dir, 'video_ref.mp4') print('Working on {}...'.format(fname)) utils.video_ref(nets_ema, args, src.x, ref.x, ref.y, fname)
def test_create_bag_duplicate_manifest_entry_from_remote(self): logger.info( self.getTestHeader( 'create bag with fetch.txt entry for local file')) try: duplicate_file = "test-fetch-http.txt" shutil.copy(ospj(self.test_http_dir, duplicate_file), ospj(self.test_data_dir, duplicate_file)) with self.assertRaises(bdbagit.BagManifestConflict) as ar: bdb.make_bag(self.test_data_dir, remote_file_manifest=ospj( self.test_config_dir, 'test-fetch-manifest.json')) logger.error(bdbag.get_typed_exception(ar.exception)) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def add_new_iteration_strategy_model(self, owner, adv_net_state_dict, cfr_iter): iter_strat = IterationStrategy(t_prof=self._t_prof, env_bldr=self._env_bldr, owner=owner, device=self._t_prof.device_inference, cfr_iter=cfr_iter) iter_strat.load_net_state_dict( self._ray.state_dict_to_torch( adv_net_state_dict, device=self._t_prof.device_inference)) self._strategy_buffers[iter_strat.owner].add( iteration_strat=iter_strat) # Store to disk if self._t_prof.export_each_net: path = ospj(self._t_prof.path_strategy_nets, self._t_prof.name) file_util.create_dir_if_not_exist(path) file_util.do_pickle(obj=iter_strat.state_dict(), path=path, file_name=str(iter_strat.cfr_iteration) + "_P" + str(iter_strat.owner) + ".pkl") if self._t_prof.log_verbose: if owner == 1: # Logs process = psutil.Process(os.getpid()) self.add_scalar(self._exp_mem_usage, "Debug/Memory Usage/Chief", cfr_iter, process.memory_info().rss)
def ffhq_preprocess(self): assert os.path.exists( self.ffhq_image_dir ), f'Image data directory does not exist: {self.ffhq_image_dir}' assert os.path.exists( self.ffhq_attr_file ), f'Attribute file does not exist: {self.ffhq_attr_file}' with open(self.ffhq_attr_file, 'r') as f: img_name_attrs_lines = f.readlines() lines = img_name_attrs_lines[2:] for i, line in enumerate(lines): split = line.strip().split() filename = split[0] values = split[1:] label = [] for attr_name in self.selected_attrs: idx = self.attr2idx[attr_name] label.append(values[idx] == '1') img_sub_dir = f'{(i // 1000):02d}000' filepath = ospj(self.ffhq_image_dir, img_sub_dir, filename) if i >= 66000: self.ffhq_test_dataset.append([filepath, label]) else: # 4000 self.ffhq_train_dataset.append([filepath, label]) print( f'Finished preprocessing the {self.ffhq_dataset_name} dataset...')
def test_read_with_update_base_config(self): logger.info(self.getTestHeader('read config with auto-upgrade version')) try: config_file = ospj(self.test_config_dir, 'base-config.json') bdbcfg.read_config(config_file=config_file, auto_upgrade=True) except Exception as e: self.fail(get_typed_exception(e))
def test_create_keychain(self): logger.info(self.getTestHeader('create keychain')) try: keychain_file = ospj(self.test_config_dir, ".bdbag", 'keychain.json') keychain.write_keychain(keychain_file=keychain_file) except Exception as e: self.fail(get_typed_exception(e))
def test_read_with_create_default_config(self): logger.info(self.getTestHeader('read config with create default if missing')) try: config_file = ospj(self.test_config_dir, ".bdbag", 'bdbag.json') bdbcfg.read_config(config_file=config_file) except Exception as e: self.fail(get_typed_exception(e))
def test_create_config(self): logger.info(self.getTestHeader('create config')) try: config_file = ospj(self.test_config_dir, ".bdbag", 'bdbag.json') bdbcfg.write_config(config_file=config_file) except Exception as e: self.fail(get_typed_exception(e))
def __get_last_offset(self): offset_file = ospj(self.work_dir, self.offset_file) first_line_of_file = "" offset = 0 try: fh = open(offset_file, 'r') first_line_of_file = self.__get_first_line(fh) offset_line = fh.readline() if offset_line is None or offset_line == '': offset = 0 else: offset = int(offset_line) fh.close() except IOError: pass self.log_message.send_message( 'debug', '__get_last_offset(): first_line: {} offset {}'.format( first_line_of_file, offset ) ) return first_line_of_file, offset
def test_resolve_fetch_http_auth_token_get(self): logger.info(self.getTestHeader('test resolve fetch http token auth')) try: patched_requests_get_auth = None def mocked_request_auth_token_get_success(*args, **kwargs): args[0].auth = None args[0].headers = {} patched_requests_get_auth.stop() return args[0].get(args[1], **kwargs) patched_requests_get_auth = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", get=mocked_request_auth_token_get_success, auth=None, create=True) patched_requests_get_auth.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-6.json'), cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_http_auth_token_get_with_disallowed_redirects(self): logger.info( self.getTestHeader( 'test resolve fetch http token auth with allowed redirect')) try: patched_requests_get_auth = None def mocked_request_auth_token_get_success(*args, **kwargs): headers = args[0].headers or {} headers.update({"Location": args[1]}) args[0].auth = None args[0].headers = {} patched_requests_get_auth.stop() return BaseTest.MockResponse({}, 302, headers=headers) patched_requests_get_auth = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", get=mocked_request_auth_token_get_success, auth=None, create=True) patched_requests_get_auth.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-7.json'), cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) output = self.stream.getvalue() self.assertExpectedMessages([ "Authorization bearer token propagation on redirect is disabled" ], output) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def main(args): # Get fasta names # Get basename from fasta files and see if those are unique fasta_names_ref = [ ".".join(os.path.basename(f).split(".")[0:-1]) for f in args.fasta_files_ref ] fasta_names_mag = [ os.path.basename(f).split(".")[0] for f in args.fasta_files_mag ] print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format( "ref_fasta_name", "mag_fasta_name", "aligned_bases_ref", "aligned_perc_ref", "aligned_bases_mag", "aligned_perc_mag", "avg_identity")) for ref_fasta_name in fasta_names_ref: for mag_fasta_name in fasta_names_mag: repfile = ospj( args.input_dir, "{fn1}_vs_{fn2}.report".format(fn1=ref_fasta_name, fn2=mag_fasta_name)) mumr = MUMmerReport(repfile) if mumr.aligned_perc_mag >= args.min_coverage: print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format( ref_fasta_name, mag_fasta_name, mumr.aligned_bases_ref, mumr.aligned_perc_ref, mumr.aligned_bases_mag, mumr.aligned_perc_mag, mumr.avg_identity))
def test_update_existing_archive(self): args = ARGS + [ospj(self.test_archive_dir, 'test-bag.zip'), '--update'] logfile.writelines( self.getTestHeader('--update an existing archive file', args)) self._test_bad_argument_error_handling( args, ["Error: An existing bag archive cannot be updated in-place"])
def test_update_keychain_add_multi(self): logger.info(self.getTestHeader('test update keychain add multi')) keychain_file = ospj(self.test_config_dir, 'test-keychain-8.json') added_entries = [{ "uri": "https://foo.bar.com/", "auth_type": "http-basic", "auth_params": { "auth_method": "get", "username": "******", "password": "******" } }, { "uri": "https://foo.bar.com/", "auth_type": "bearer-token", "auth_params": { "token": "bar", "allow_redirects_with_token": "True", "additional_request_headers": { "X-Requested-With": "XMLHttpRequest" } } }] try: keychain = read_keychain(keychain_file, create_default=False) entries = get_auth_entries("https://foo.bar.com/", keychain) self.assertFalse(entries) updated_keychain = update_keychain(added_entries, keychain_file=keychain_file) logger.info("Updated keychain: %s" % json.dumps(updated_keychain)) entries = get_auth_entries("https://foo.bar.com/", updated_keychain) self.assertTrue(len(entries) == 2) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_update_keychain_single(self): logger.info(self.getTestHeader('test update keychain single')) keychain_file = ospj(self.test_config_dir, 'test-keychain-8.json') updated_entry = { "uri": "https://raw.githubusercontent.com/", "auth_type": "http-basic", "auth_params": { "auth_method": "get", "username": "******", "password": "******" } } try: updated_keychain = update_keychain(updated_entry, keychain_file=keychain_file) logger.info("Updated keychain: %s" % json.dumps(updated_keychain)) entries = get_auth_entries("https://raw.githubusercontent.com/", updated_keychain) found = False for entry in entries: if entry["auth_type"] == "http-basic": if entry["auth_params"]["username"] == "foo" and entry[ "auth_params"]["password"] == "bar!": found = True break self.assertTrue(found) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_correct_mutation(self): mutations = mutation.parse_vcf(ospj(DATA_PATH, "mutation_tests.vcf")) assert_equals(mutations[0].type, ".") assert_equals(mutations[0].change, "G") assert_equals(mutations[2].type, "-") assert_equals(mutations[2].change, "A") assert_equals(mutations[3].type, "+") assert_equals(mutations[3].change, "A")
def test_normal_filter(self): nmuts = mutation.parse_vcf(ospj(DATA_PATH, "normal_mutation_tests.vcf")) for m in nmuts: m.ref = "C" m.count = 21 m.cov = 100 nmuts = mutation.MutationsAtSinglePosition.from_mutation_list(nmuts) muts = mutation.parse_vcf(ospj(DATA_PATH, "mutation_tests.vcf")) for m in muts: m.ref = "C" m.count = 21 m.cov = 100 muts = mutation.MutationsAtSinglePosition.from_mutation_list(muts) filt_muts = muts.filter_against_normal(nmuts) assert_equals(len(filt_muts), 1)
def test_mpileup_test2(self): test = open(ospj(DATA_PATH, "mpileup_test2.tsv")).read() bpdf = sufam.__main__.get_baseparser_extended_df("test", [mpileup_parser.parse(test)], "G", "GAA") assert_equals(int(bpdf['cov'].iloc[0]), int(bpdf.G.iloc[0])) assert_equals(test.count(",") + test.count("."), int(bpdf['cov'].iloc[0])) assert_almost_equals(0.4324, float(bpdf.val_maf.iloc[0]), places=3) assert_almost_equals(0.4324, float(bpdf.most_common_indel_maf.iloc[0]), places=3) assert_equals("+", bpdf.most_common_indel_type.iloc[0])
def parallel_run_dnadiff_pairwise(fasta_files, fasta_names, output_folder): """Runs MUMmer's dnadiff pairwise for given fasta_files using multiprocessing. Uses fasta_names to organize output folders for dnadiff as fastaname1_vs_fastaname2.""" assert len(fasta_files) == len(fasta_names) pool = Pool() args = [] for i in range(len(fasta_files)): for j in range(i + 1, len(fasta_files)): out_dir = ospj(output_folder, "{fn1}_vs_{fn2}".format( fn1=fasta_names[i], fn2=fasta_names[j])) dir_utils.mkdir_p(out_dir) args.append((fasta_files[i], fasta_files[j], ospj(out_dir, "out"))) pool.map(run_dnadiff_star, args) pool.close() pool.join()
def test_mpileup_test3(self): test = open(ospj(DATA_PATH, "mpileup_test3.tsv")).read() bpdf = sufam.__main__.get_baseparser_extended_df("test", [mpileup_parser.parse(test)], "G", "A") assert_equals(int(bpdf['cov'].iloc[0]), int(bpdf.G.iloc[0]) + int(bpdf.A.iloc[0]) + int(bpdf["T"].iloc[0])) assert_equals(1, int(bpdf["T"].iloc[0])) assert_equals("AA", bpdf.most_common_indel.iloc[0]) assert_equals("+", bpdf.most_common_indel_type.iloc[0]) assert_almost_equals(0.0139, float(bpdf.val_maf.iloc[0]), places=3) assert_almost_equals(0.0139, float(bpdf.most_common_al_maf.iloc[0]), places=3)
def test_find_best_per_group(self): fasta_files = [ ospj(DATA_PATH, "sample0_gt300.fa"), ospj(DATA_PATH, "sample0_gt500.fa"), ] args = collections.namedtuple('Arguments', " ".join(["output_folder", "scg_tsvs", "fasta_files", "names", "max_missing_scg", "max_multicopy_scg", "groups"])) groupargs = args( output_folder=TMP_BASENAME_DIR, scg_tsvs=[os.path.splitext(f)[0] + "_scg.tsv" for f in fasta_files], fasta_files=fasta_files, names=[os.path.splitext(os.path.basename(f))[0] for f in fasta_files], max_missing_scg=2, max_multicopy_scg=4, groups=("gt300", "gt500") ) main(groupargs)
def test_extract_bag_archive_tar(self): logger.info(self.getTestHeader('extract bag tar format')) try: bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.tar'), temp=True) self.assertTrue(ospe(bag_path)) self.assertTrue(bdb.is_bag(bag_path)) bdb.cleanup_bag(os.path.dirname(bag_path)) except Exception as e: self.fail(bdbag.get_named_exception(e))
def test_validate_profile_serialization(self): logger.info(self.getTestHeader('validate profile serialization')) try: bag_path = ospj(self.test_archive_dir, 'test-bag.zip') bdb.validate_bag_serialization( bag_path, bag_profile_path='https://raw.githubusercontent.com/ini-bdds/bdbag/master/profiles/bdbag-profile.json') except Exception as e: self.fail(bdbag.get_named_exception(e))
def setUp(self): data_dir = ospj(dirname(abspath(__file__)), 'data') # Initialize minimal preferences dict: just enough # for the AllowedHosts constructor prefs = { 'WORK_DIR': data_dir, 'ALLOWED_HOSTS_HOSTNAME_LOOKUP': 'false', } self.allowed_hosts = AllowedHosts(prefs)