예제 #1
0
def evaluate(tree_file, ref_msa, out_dir):
    util.expect_file_exists(tree_file)
    util.expect_file_exists(ref_msa)
    util.make_path(out_dir)

    prefix = "eval"

    cmd = []
    cmd.append(common.raxml)
    cmd.append('--evaluate')
    cmd.append('--msa')
    cmd.append(ref_msa)
    cmd.append('--model')
    cmd.append(common.subst_model)
    cmd.append('--tree')
    cmd.append(tree_file)
    cmd.append('--prefix')
    cmd.append(prefix)
    cmd.append('--threads')
    cmd.append(str(common.iqtree_threads))
    cmd.append('--blopt')
    cmd.append('nr_safe')
    cmd.append('--redo')
    cmd.append('--force')
    cmd.append('perf_threads')
    cmd.append("--blmin")
    cmd.append(common.raxml_min_bl)

    sub.check_call(cmd, cwd=out_dir, stdout=sub.DEVNULL)

    modelfile = os.path.join(out_dir, prefix + ".raxml.bestModel")

    return modelfile
def launch_epa(tree, modelfile, ref_msa, query_msa, out_dir, thorough=True):
  util.make_path(out_dir)

  cmd = []
  cmd.append(common.epa)
  cmd.append("--tree")
  cmd.append(tree)
  cmd.append("--model")
  cmd.append(modelfile)
  cmd.append("--msa")
  cmd.append(ref_msa)
  cmd.append("--query")
  cmd.append(query_msa)
  cmd.append("--threads")
  if not util.is_slurm():
    cmd.append(str(common.available_cores))
  else:
    cmd.append("4")
  if thorough:
  	cmd.append("--no-heur")
  cmd.append("--filter-max")
  cmd.append("50")
  cmd.append("--filter-acc-lwr")
  cmd.append("1.0")
  cmd.append("--out-dir")
  cmd.append(out_dir)
  cmd.append("--redo")
  cmd.append("--verbose")
  sub.check_call(cmd, stdout=sub.DEVNULL)
def ggplot_lwr_histogram(hist_csv_file, out_dir):
  util.make_path( out_dir )

  # gappa examine lwr --jplace-path ./*/*.jplace --no-list-file --out-dir ../../results/epa_rooting/
  cmd = []
  cmd.append( os.path.join(common.scripts_dir, "lwr_hist.r") )
  cmd.append(hist_csv_file)
  # get the input file name without ending
  f_name=os.path.splitext( os.path.basename(hist_csv_file) )[0]
  out_file=os.path.join(out_dir, f_name+".pdf")
  cmd.append(out_file)
  sub.check_call(cmd)

  return out_file
def launch_split4epa(ref_phylip, aln_result, out_dir):
  util.make_path(out_dir)

  cmd = []
  cmd.append(common.epa)
  cmd.append("--split")
  cmd.append(ref_phylip)
  cmd.append(aln_result)
  cmd.append("--out-dir")
  cmd.append(out_dir)
  cmd.append("--redo")
  sub.check_call(cmd)

  return os.path.join(out_dir, "reference.fasta"), os.path.join(out_dir, "query.fasta")
예제 #5
0
def launch_raxml(alignment,
                 model,
                 output_dir,
                 seed,
                 starting_trees=1,
                 parsimony=False,
                 bs_trees=0,
                 cores=16,
                 debug=False,
                 parse=False):
    util.expect_file_exists(alignment)
    util.make_path(output_dir)

    prefix = os.path.join(output_dir, "raxml")
    prefix += "_" + model.replace("+", "")
    cmd = []
    cmd.append("mpiexec")
    cmd.append("-np")
    cmd.append(str(cores))
    cmd.append(common.raxml)
    cmd.append("--msa")
    cmd.append(alignment)
    cmd.append("--model")
    cmd.append(model)
    cmd.append("--seed")
    cmd.append(str(seed))
    cmd.append("--blmin")
    cmd.append(common.raxml_min_bl)
    if (parse):
        cmd.append("--parse")
    cmd.append("--tree")
    if (starting_trees > 0):
        if (not parsimony):
            cmd.append("rand{" + str(starting_trees) + "}")
            prefix += "_rand" + str(starting_trees)
        else:
            cmd.append("pars{" + str(starting_trees) + "}")
            prefix += "_pars" + str(starting_trees)
    if (bs_trees > 0):
        cmd.append("--boostrap")
        cmd.append("--bs-trees")
        cmd.append(str(bs_trees))
        prefix += "_bs" + str(bs_trees)

    prefix += "_seed" + str(seed)
    cmd.append("--prefix")
    cmd.append(prefix)
    cmd.append('--force')
    cmd.append('perf_threads')
    launcher.submit(prefix, cmd, cores, debug)
def outgroup_check(jplace_files, out_dir):
  for f in jplace_files:
    util.expect_file_exists( f )
  util.make_path( out_dir )

  cmd = []
  cmd.append(common.genesis_outgroup_check)
  for f in jplace_files:
    cmd.append(f)

  outfile = os.path.join( out_dir, "outgroup_check.txt" )
  with open( outfile, "w+" ) as logfile:
    sub.check_call(cmd, stdout=logfile)

  return outfile
def launch_hmmbuild(ref_msa, out_dir):
  util.make_path(out_dir)

  ref_hmm="reference.hmm"

  cmd = []
  cmd.append( os.path.join(common.hmmer_dir, "hmmbuild") )
  if not util.is_slurm():
    cmd.append("--cpu")
    cmd.append(str(common.available_cores))
  cmd.append(ref_hmm)
  cmd.append(ref_msa)
  sub.check_call(cmd, cwd=out_dir)

  return os.path.join(out_dir, ref_hmm)
def split_alignment_outgroups(input_msa, outgroup_spec, out_dir):
  util.expect_file_exists( input_msa )
  util.expect_file_exists( outgroup_spec )
  util.make_path( out_dir )

  new_ref_msa="reference.fasta"
  query_msa="query.fasta"

  cmd = []
  cmd.append(common.genesis_remove_sequences)
  cmd.append(input_msa)
  cmd.append(outgroup_spec)
  cmd.append(new_ref_msa)
  cmd.append(query_msa)
  sub.check_call(cmd, cwd=out_dir)

  return os.path.join(out_dir, new_ref_msa), os.path.join(out_dir, query_msa)
def gappa_examine_lwr(jplace_path, out_dir):
  util.make_path( out_dir )

  # gappa examine lwr --jplace-path ./*/*.jplace --no-list-file --out-dir ../../results/epa_rooting/
  cmd = []
  cmd.append(common.gappa)
  cmd.append("examine")
  cmd.append("lwr")
  cmd.append("--jplace-path")
  cmd += glob(jplace_path)
  cmd.append("--no-list-file")
  cmd.append("--no-compat-check")
  cmd.append("--allow-file-overwriting")
  cmd.append("--histogram-bins")
  cmd.append("20")
  cmd.append("--out-dir")
  cmd.append(out_dir)
  sub.check_call(cmd)

  return os.path.join( out_dir, "lwr_histogram.csv" )
예제 #10
0
def setup_new_version( date=datetime.datetime.now().strftime("%Y-%m-%d"),
    datasets = ["fmsao", "fmsan", "smsao", "smsan"]):

  version_id = get_current_version_id( date )

  version = "{}_{}".format( date, version_id )

  # make the base path for the version
  util.make_path( util.versioned_path(version, "") )

  # generate the appropriate paths
  paths = []
  for ds in datasets:
    p = common.Paths([version, ds], 0)
    setup_new_dataset(p)
    paths.append(p)

  print(version)

  return paths
def launch_hmmalign(ref_hmm, ref_msa, query_fasta, out_dir):
  util.make_path(out_dir)

  out_file = "both.afa"

  cmd = []
  cmd.append( os.path.join(common.hmmer_dir, "hmmalign") )
  # if not util.is_slurm():
  #   cmd.append("--cpu")
  #   cmd.append(str(common.available_cores))
  cmd.append("-o")
  cmd.append(out_file)
  cmd.append("--outformat")
  cmd.append("afa")
  cmd.append("--mapali")
  cmd.append(ref_msa)
  cmd.append(ref_hmm)
  cmd.append(query_fasta)
  sub.check_call(cmd, cwd=out_dir)

  return os.path.join(out_dir, out_file)
def launch_papara(tree, ref_phylip, query_fasta, out_dir):
  util.make_path(out_dir)

  name = "aln"

  cmd = []
  cmd.append(common.papara)
  cmd.append("-t")
  cmd.append(tree)
  cmd.append("-s")
  cmd.append(ref_phylip)
  cmd.append("-q")
  cmd.append(query_fasta)
  if not util.is_slurm():
    cmd.append("-j")
    cmd.append(str(common.available_cores))
  cmd.append("-r")
  cmd.append("-n")
  cmd.append(name)
  sub.check_call(cmd, cwd=out_dir)

  return os.path.join( out_dir, "papara_alignment." + name )
예제 #13
0
args = parser.parse_args(
    '--name test --ep best --whichset train_val --batch_id 0 --shuffle'.split(
    ))
# args = parser.parse_args()

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

dataset = util.DataSet('./dataset/' + args.whichset, args.whichset)
dataloader = DataLoader(dataset,
                        batch_size=args.batch_size,
                        num_workers=args.num_workers,
                        shuffle=args.shuffle)

exp_path, train_path, val_path, infer_path, ckpt_path = util.make_path(args)

model = torch.load(ckpt_path + '/' + 'model.archi').to(device)
# model = nn.DataParallel(model).to(device)
model.load_state_dict(
    torch.load(ckpt_path + '/' + 'weight_' + args.ep + '.pth')['state_dict'])

dataiter = iter(dataloader)
for _ in range(args.batch_id + 1):
    data2show = next(dataiter)

img, state, gt = data2show
# img, state, gt = util.NaN2Zero(img), util.NaN2Zero(state), util.NaN2Zero(gt)
img, state, gt = img.to(device), state.to(device), gt.to(device)

prediction = model(img, state)
예제 #14
0
def setup_new_dataset(path):
  util.make_path(path.data_dir)
  util.make_path(path.runs_dir)
  util.make_path(path.results_dir)
예제 #15
0
def setup_directory(directory, subdirectory, version):
  real_current = os.path.join(directory, version, subdirectory)
  util.make_path(real_current)