Exemplo n.º 1
0
def datastore(datastore, listing, remove):
    """
    Manage datastore of expected value calculations.
    """
    if listing:

        table = get_datastore_info_table(datastore)

        click.secho(table.to_string(), fg="blue")
        # except:
        #     error("Cannot find datastore: {0}".format(datastore))
    elif remove is not None:
        warn("""
            The MTSv datastores keep previously collected expected values for 
            faster statistical analysis. Deleting datastores will require
            these to be recalculated. Datastores should be deleted if a new
            sequence database has been build and the expected values
            calculated from the previous database are no longer needed.
            """)

        while True:
            click.secho('Are you sure you want to remove? [y/n] ',
                        nl=False,
                        fg="red")
            c = click.getchar()
            click.echo()
            if c.lower() == 'y':
                remove_datastore(datastore, remove)
                click.secho('Deleting: {0}'.format(remove), fg="red")
                break
            elif c.lower() == 'n':
                click.secho('Abort!', fg="red")
                break
            else:
                click.secho('Invalid input', fg="red")
Exemplo n.º 2
0
def run_command(cmd,
                cmd_name,
                ignore_changed=False,
                final_target=False,
                config=False,
                check_result=False):
    if "--unlock" in cmd:
        # Bypass run command if --unlock is passed
        info("Unlocking working directory", color="blue")
        run_subprocess(cmd)
        return

    info("Running MTSv {0}".format(cmd_name))
    if ignore_changed:
        changed_targets = []
        warn("""
            Ignoring parameter changes.
            This is may cause down stream analysis to be wrong.
            Do not ignore parameter changes if binning parameters,
            kmer size, or sequence database have been modified.
            """)
    else:
        # try to capture snakemake utility options
        passthrough = set(PASSTHROUGH_ARGS).intersection(set(cmd))
        if passthrough:
            sp.run(cmd, check=check_result)
            return

        info("Checking if parameters have changed.", color="blue")
        changed_targets = get_targets_with_changed_params(cmd)
        if changed_targets:
            warn("Parameters changed for targets: {0}\nRerunning".format(
                "\n".join(changed_targets)))
        else:
            info("No parameters have changed.", color="blue")
    # add filter_candidate_taxa because it is not revaluated after
    # checkpoint is hit.
    if final_target:
        changed_targets.append(final_target)
    cmd = cmd if not changed_targets else add_force_targets(
        cmd, changed_targets)
    if config:
        cmd = add_config(cmd, config)
    dryrun_flag = set(["--dryrun", "--dry-run", "-n"]).intersection(set(cmd))
    if dryrun_flag:
        # don't capture standard out for dryrun
        sp.run(cmd)
    else:
        p = run_subprocess(cmd)
        info("Finished running MTSv {0}".format(cmd_name))
Exemplo n.º 3
0
def setup_and_run(argv, parser):
    """Setup and run a command"""
    change_wkdir(argv)
    if '--config' in argv or '-c' in argv:
        args, snake_args, missing = add_cfg_to_args(argv, parser)
        if missing:
            warn("Section(s) missing in config file, "
                 "using defaults: {}".format(", ".join(missing)))
    else:
        args, snake_args = parser.parse_known_args()
    args.log_file = set_log_file(args.log_file, args.cmd_class.__name__,
                                 args.timestamp)
    params = Parameters(args, snake_args)
    cmd = args.cmd_class(params)
    cmd.run()
Exemplo n.º 4
0
def outpath_type(input_path):
    '''Outpath_type creates a directory if one does not exist.
    Throws PermissionError if there are no permissions to create
    directory. If path already exists and it is not empty, a warning
    is issued. Returns absolute path to directory.'''
    input_path = os.path.abspath(input_path)
    try:
        os.mkdir(input_path)
        logger.info("Creating directory: {}".format(input_path))
    except PermissionError:
        error("No permission to make directory: {}".format(input_path))
    except OSError:
        logger.info("Directory already exists: {}. ".format(input_path))
        if os.listdir(input_path):
            warn("Files in {} may be overwritten!".format(input_path))
    return input_path
Exemplo n.º 5
0
 def run(self):
     for rule in self.rules:
         cmd = ["snakemake", "--snakefile", rule, "--config"]
         config = [
             "{0}={1}".format(k,v)
             for k, v in self.params.items() if v is not None]
         cmd += config
         cmd += self.snake_params
         try:
             p = sp.run(cmd,
                     check=True)
             self._params.write_parameters()
         except ( KeyboardInterrupt, sp.CalledProcessError) as e:
             warn("Unlocking directory after failed snakemake")
             sp.run(cmd + ["--unlock"], check=True )
             error(e)
Exemplo n.º 6
0
def heatmap_figure(analysis_files, output, table_output, kwargs):
    df = cat_analysis_files(analysis_files)
    if len(df['sample'].unique()) == 1:
        msg = """
        Only one sample, a duplicate sample is added for clustermap
        to work.
        """
        logging.warn(msg)
        warn(msg)
        df = duplicate_column(df) # need to have more than one column to work
    if len(df['Scientific_Name'].unique()) == 1:
        msg = """
        Only one taxa is significant, a duplicate of this taxa
        is added for clustermap to work.
        """
        logging.warn(msg)
        warn(msg)
        df = duplicate_row(df) # need to have more than one row to work
    df = get_pivot_table(df)
    df.to_csv(table_output)
    fig = draw_figure(df, kwargs)
    fig.savefig(output, bbox_inches="tight")
Exemplo n.º 7
0
        PRECALC_CANTAXA = '{}'
        if snakemake.params['use_data']:
            LOGGER.info("Removing taxa that already have estimates")
            CANTAXA, PRECALC_CANTAXA = remove_prev_calculated(
                CANTAXA, **snakemake.params['exp_db_params'])
            LOGGER.info("The following taxa still need expected "
                        "value estimates\n{}".format(",".join(
                            [str(c) for c in CANTAXA])))
        write_to_file(CANTAXA, snakemake.output[1])
        with open(snakemake.output[0], 'w') as json_out:
            json_out.write(PRECALC_CANTAXA)
        if len(CANTAXA) > 300:
            warn("The number of candidate taxa is very large "
                 "which may result in a very large query fasta file "
                 "that will take a long time to process and require a "
                 "lot of memory. You may want to rerun with a stricter "
                 "cutoff to reduce the size of the query fasta file "
                 "or break up the candidate taxa into chunks and run them "
                 "individually by passing each chunk into analyze using "
                 "the --can_taxa_list option")
        LOGGER.info("Finished collecting candidate taxa")

    except NameError:
        PARSER = argparse.ArgumentParser(
            prog="MTSv Candidate Taxa",
            description="Get list of candidate taxa from summary.",
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)

        PARSER.add_argument("summary",
                            metavar="SUMMARY_FILE",
                            type=file_type,
                            help="Path to summary output file.")