Пример #1
0
def execute(ssh_client, project_name, analysis_steps, s3_input_files_address,
            sample_list, group_name, s3_output_files_address, email):
    yaml_file = project_name + ".yaml"

    global log_dir
    log_dir = log_dir.format(project_name)

    print("making the yaml file ...")
    YamlFileMaker.make_yaml_file(yaml_file, project_name, analysis_steps,
                                 s3_input_files_address, sample_list,
                                 group_name, s3_output_files_address, "hg19",
                                 "NA")

    print("copying yaml files to remote master node...")
    ConnectionManager.copy_file(ssh_client, yaml_file,
                                workspace + "yaml_examples")
    os.remove(yaml_file)

    #if not email == "":

    print("executing pipeline...")
    ConnectionManager.execute_command(
        ssh_client, "qsub -o /dev/null -e /dev/null " + workspace +
        "scripts/run.sh " + workspace + "yaml_examples/" + yaml_file + "  " +
        log_dir + " " + "WGSPipeline.py")
Пример #2
0
def edit_step_tools_config(ssh_client, new_step_tools_conf, step_name):
    tools_conf = yaml.load(
        ConnectionManager.execute_command(
            ssh_client, "cat /shared/workspace/Pipelines/config/tools.yaml"))
    tools_conf[step_name] = new_step_tools_conf
    with open("tools.yaml", "w+") as f:
        f.write(yaml.dump(tools_conf, default_flow_style=False))
    ConnectionManager.execute_command(
        ssh_client,
        "mv -n /shared/workspace/Pipelines/config/tools.yaml /shared/workspace/Pipelines/config/tools.yaml.BACKUP"
    )
    ConnectionManager.copy_file(
        ssh_client, "{}/tools.yaml".format(os.getcwd()),
        "/shared/workspace/Pipelines/config/tools.yaml")
Пример #3
0
def upload_script(ssh_client, pipeline, workflow, script_name):
    script_path_cluster = "/shared/workspace/Pipelines/scripts/"

    if pipeline == "all":
        script_path_cluster += script_name
    elif workflow == "all":
        script_path_cluster += "{}/{}".format(pipeline, script_name)
    else:
        script_path_cluster += "{}/{}/{}".format(pipeline, workflow,
                                                 script_name)

    ConnectionManager.execute_command(
        ssh_client, "mv -n {0} {0}.BACKUP".format(script_path_cluster))
    ConnectionManager.copy_file(ssh_client,
                                "{}/{}".format(os.getcwd(), script_name),
                                script_path_cluster)
def execute(ssh_client, project_name, analysis_steps, s3_input_files_address,
                       sample_list, group_list, s3_output_files_address):
    yaml_file = project_name + ".yaml"

    print "making the yaml file..."
    YamlFileMaker.make_yaml_file(yaml_file, project_name, analysis_steps, s3_input_files_address,
                   sample_list, group_list, s3_output_files_address, "hg19", "NA")

    print "copying yaml file to remote master node..."
    ConnectionManager.copy_file(ssh_client, yaml_file, workspace + "yaml_examples")

    ## Remove the local yaml file
    os.remove(yaml_file)

    print "executing pipeline..."
    ConnectionManager.execute_command(ssh_client, "sh " + workspace + "run.sh "
                                      + workspace + "yaml_examples/" + yaml_file)
def execute(ssh_client, project_name, analysis_steps, s3_input_files_address,
                   sample_list, group_name, s3_output_files_address):
    yaml_file = project_name + ".yaml"

    print "making the yaml file..."
    YamlFileMaker.make_yaml_file(yaml_file, project_name, analysis_steps, s3_input_files_address,
                   sample_list, group_name, s3_output_files_address, "hg19", "NA")

    print "copying yaml file to remote master node..."
    ConnectionManager.copy_file(ssh_client, yaml_file, workspace + "yaml_examples")

    ## Remove the local yaml file
    os.remove(yaml_file)

    print "executing pipeline..."
    ConnectionManager.execute_command(ssh_client, "sh " + workspace + "run.sh "
                                      + workspace + "yaml_examples/" + yaml_file)
Пример #6
0
def edit_step_specific_config(ssh_client, pipeline, workflow,
                              new_extra_bash_args, step_name):
    conf_file_name = "{}_{}.yaml".format(pipeline, workflow)
    spec_conf = yaml.load(
        ConnectionManager.execute_command(
            ssh_client, "cat /shared/workspace/Pipelines/config/{}/{}".format(
                pipeline, conf_file_name)))
    spec_conf[step_name] = new_extra_bash_args

    with open(conf_file_name, "w+") as f:
        f.write(yaml.dump(spec_conf, default_flow_style=False))
    ConnectionManager.execute_command(
        ssh_client,
        "mv -n /shared/workspace/Pipelines/config/{0}/{1} /shared/workspace/Pipelines/config/{0}/{1}.BACKUP"
        .format(pipeline, conf_file_name))
    ConnectionManager.copy_file(
        ssh_client, "{}/{}".format(os.getcwd(), conf_file_name),
        "/shared/workspace/Pipelines/config/{}/{}".format(
            pipeline, conf_file_name))
Пример #7
0
def execute(pipeline, ssh_client, project_name, workflow, analysis_steps,
            s3_input_files_address, sample_list, group_list,
            s3_output_files_address, genome, style, pairs_list):
    """Executes a pipeline.

    The main local side function for executing a pipeline with all user inputs to jupyter notebook.
    Calls the run.sh shell script on the cluster head node using nohup after creating
    a yaml file summarizing user input and uploaded that file to the cluster.

    Args:
        pipeline: name of the pipeline to be run, supported pipelines can be found in CirrusAddons notebook
        ssh_client: a paramiko SSHClient object that connects to the cluster where analysis is run
        project_name: name of the current project, <project_name>.yaml contains all user input to notebook
        workflow: name of the workflow to be run, supported workflows can be found in CirrusAddons notebook
        analysis_steps: set of analysis steps to be run, supported steps can be found in pipeline's notebook
        s3_input_files_address: s3 bucket containing all fastq files for project
        sample_list: list of dictionaries with sample info for each sample
        group_list: list of all groups, shares indices with sample_list (sample_list[0] is in group[0], etc)
        s3_output_files_address: root s3 bucket where analysis results should be uploaded
        genome: reference genome to be used, supported genomes can be found in pipeline's notebook
        style: only for ChIPSeq homer workflow, can be "factor" or "histone"
        pairs_list: dictionary with keys=normal samples, values=experimental samples
            for ChIPSeq the keys=ChIP samples, values=corresponding input regularization samples

    Returns:
        None
    """
    yaml_file = project_name + ".yaml"

    if s3_output_files_address.endswith("/"):
        s3_output_files_address = s3_output_files_address[:-1]
    if s3_input_files_address.endswith("/"):
        s3_input_files_address = s3_input_files_address[:-1]

    logs_dir = "/shared/workspace/logs/{}/{}/{}".format(
        pipeline, workflow, project_name)

    print("making the yaml file...")
    YamlFileMaker.make_yaml_file(yaml_file, pipeline, project_name, workflow,
                                 analysis_steps, s3_input_files_address,
                                 sample_list, group_list,
                                 s3_output_files_address, genome, style,
                                 pairs_list)

    print("copying yaml file to remote master node...")

    # Make sure remote directory exists
    remote_dir = workspace + "yaml_files/" + pipeline + "/" + workflow
    ssh_client.exec_command("mkdir -p " + remote_dir)

    ConnectionManager.copy_file(
        ssh_client, yaml_file,
        "{}yaml_files/{}/{}".format(workspace, pipeline, workflow))

    # Remove the local yaml file
    os.remove(yaml_file)

    print("executing pipeline...")

    ConnectionManager.execute_command(
        ssh_client, "nohup bash " + workspace + "scripts/run.sh " + workspace +
        "yaml_files/{}/{}/{} ".format(pipeline, workflow, yaml_file) +
        logs_dir + " " + pipeline + "_" + workflow)