示例#1
0
 def test_format_payload(self):
     ainfo = [
         ("Demultiplexed",
          [("fp1", "preprocessed_fasta"), ("fp2", "preprocessed_fastq")],
          True, True)]
     obs = format_payload(True, artifacts_info=ainfo, error_msg="Ignored")
     exp = {'success': True, 'error': '',
            'artifacts': [{'artifact_type': "Demultiplexed",
                           'filepaths': [("fp1", "preprocessed_fasta"),
                                         ("fp2", "preprocessed_fastq")],
                           'can_be_submitted_to_ebi': True,
                           'can_be_submitted_to_vamps': True}]}
     self.assertEqual(obs, exp)
def split_libraries_fastq(qclient, job_id, parameters, out_dir):
    """Run split libraries fastq with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job
    """
    # Step 1 get the rest of the information need to run split libraries
    update_job_step(qclient, job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    filepaths, mapping_file, atype = get_artifact_information(
        qclient, artifact_id)

    # Step 2 generate the split libraries fastq command
    update_job_step(qclient, job_id, "Step 2 of 4: Generating command")
    command, sl_out = generate_split_libraries_fastq_cmd(
        filepaths, mapping_file, atype, out_dir, parameters)

    # Step 3 execute split libraries
    update_job_step(
        qclient, job_id,
        "Step 3 of 4: Executing demultiplexing and quality control")
    std_out, std_err, return_value = system_call(command)
    if return_value != 0:
        raise RuntimeError(
            "Error processing files:\nStd output: %s\n Std error:%s"
            % (std_out, std_err))

    # Step 4 generate the demux file
    update_job_step(qclient, job_id, "Step 4 of 4: Generating demux file")
    generate_demux_file(sl_out)

    artifacts_info = generate_artifact_info(sl_out)

    return format_payload(True, artifacts_info=artifacts_info)
示例#3
0
def split_libraries_fastq(qclient, job_id, parameters, out_dir):
    """Run split libraries fastq with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job
    """
    # Step 1 get the rest of the information need to run split libraries
    update_job_step(qclient, job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    filepaths, mapping_file, atype = get_artifact_information(
        qclient, artifact_id)

    # Step 2 generate the split libraries fastq command
    update_job_step(qclient, job_id, "Step 2 of 4: Generating command")
    command, sl_out = generate_split_libraries_fastq_cmd(
        filepaths, mapping_file, atype, out_dir, parameters)

    # Step 3 execute split libraries
    update_job_step(
        qclient, job_id,
        "Step 3 of 4: Executing demultiplexing and quality control")
    std_out, std_err, return_value = system_call(command)
    if return_value != 0:
        raise RuntimeError(
            "Error processing files:\nStd output: %s\n Std error:%s" %
            (std_out, std_err))

    # Step 4 generate the demux file
    update_job_step(qclient, job_id, "Step 4 of 4: Generating demux file")
    generate_demux_file(sl_out)

    artifacts_info = generate_artifact_info(sl_out)

    return format_payload(True, artifacts_info=artifacts_info)
示例#4
0
def execute_job(server_url, job_id, output_dir):
    """Starts the plugin and executes the assigned task

    Parameters
    ----------
    server_url : str
        The url of the server
    job_id : str
        The job id

    Raises
    ------
    RuntimeError
        If there is a problem gathering the job information
    """
    qclient = QiitaClient(server_url)
    # Request job information
    job_info = qclient.get("/qiita_db/jobs/%s" % job_id)
    # Check if we have received the job information so we can start it
    if job_info and job_info['success']:
        # Starting the heartbeat
        start_heartbeat(qclient, job_id)
        # Execute the given task
        task_name = job_info['command']
        task = TASK_DICT[task_name]

        if not exists(output_dir):
            makedirs(output_dir)
        try:
            payload = task(qclient, job_id, job_info['parameters'],
                           output_dir)
        except Exception:
            exc_str = repr(traceback.format_exception(*sys.exc_info()))
            error_msg = ("Error executing %s:\n%s" % (task_name, exc_str))
            payload = format_payload(False, error_msg=error_msg)
        # The job completed
        complete_job(qclient, job_id, payload)
    else:
        raise RuntimeError("Can't get job (%s) information" % job_id)
示例#5
0
文件: plugin.py 项目: mjunaidi/qiita
def execute_job(server_url, job_id, output_dir):
    """Starts the plugin and executes the assigned task

    Parameters
    ----------
    server_url : str
        The url of the server
    job_id : str
        The job id

    Raises
    ------
    RuntimeError
        If there is a problem gathering the job information
    """
    qclient = QiitaClient(server_url)
    # Request job information
    job_info = qclient.get("/qiita_db/jobs/%s" % job_id)
    # Check if we have received the job information so we can start it
    if job_info and job_info['success']:
        # Starting the heartbeat
        start_heartbeat(qclient, job_id)
        # Execute the given task
        task_name = job_info['command']
        task = TASK_DICT[task_name]

        if not exists(output_dir):
            makedirs(output_dir)
        try:
            payload = task(qclient, job_id, job_info['parameters'], output_dir)
        except Exception:
            exc_str = repr(traceback.format_exception(*sys.exc_info()))
            error_msg = ("Error executing %s:\n%s" % (task_name, exc_str))
            payload = format_payload(False, error_msg=error_msg)
        # The job completed
        complete_job(qclient, job_id, payload)
    else:
        raise RuntimeError("Can't get job (%s) information" % job_id)
示例#6
0
 def test_format_payload_error(self):
     obs = format_payload(False, error_msg="Some error",
                          artifacts_info=['ignored'])
     exp = {'success': False, 'error': 'Some error', 'artifacts': None}
     self.assertEqual(obs, exp)
示例#7
0
def split_libraries(qclient, job_id, parameters, out_dir):
    """Run split libraries with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job

    Raises
    ------
    NotImplementedError
        If one of the filepath types attached to the artifact is not recognized
    ValueError
        If the artifact has SFF and fasta files
        IF the artifact has qual files but not fasta files
        If the artifact has fasta files but not quals
    RuntimeError
        If there is an error processing an sff file
        If there is an error running split_libraries.py
        If there is an error merging the results
    """
    # Step 1 get the rest of the information need to run split libraries
    update_job_step(qclient, job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    filepaths, mapping_file, atype = get_artifact_information(
        qclient, artifact_id)

    # Step 2 generate the split libraries command
    update_job_step(qclient, job_id, "Step 2 of 4: preparing files")
    sffs = []
    seqs = []
    quals = []
    for fp, fp_type in filepaths:
        if fp_type == 'raw_sff':
            sffs.append(fp)
        elif fp_type == 'raw_fasta':
            seqs.append(fp)
        elif fp_type == 'raw_qual':
            quals.append(fp)
        else:
            raise NotImplementedError("File type not supported %s" % fp_type)

    if seqs and sffs:
        raise ValueError('Cannot have SFF and raw fasta on the same artifact')
    elif quals and not seqs:
        raise ValueError('Cannot have just qual files on the artifact, you '
                         'also need raw fasta files')
    elif seqs and not quals:
        raise ValueError('It is not currently possible to process fasta '
                         'file(s) without qual file(s). This will be '
                         'supported in the future. You can track progress on '
                         'this by following: '
                         'https://github.com/biocore/qiita/issues/953')
    elif seqs:
        seqs = sorted(seqs)
        quals = sorted(quals)
    else:
        cmds, seqs, quals = generate_process_sff_commands(sffs, out_dir)
        len_cmds = len(cmds)
        for i, cmd in enumerate(cmds):
            update_job_step(
                qclient, job_id,
                "Step 2 of 4: preparing files (processing sff file %d of %d)" %
                (i, len_cmds))
            std_out, std_err, return_value = system_call(cmd)
            if return_value != 0:
                raise RuntimeError(
                    "Error processing sff file:\nStd output: %s\n Std error:%s"
                    % (std_out, std_err))

    output_dir = join(out_dir, 'sl_out')

    commands, sl_outs = generate_split_libraries_cmd(seqs, quals, mapping_file,
                                                     output_dir, parameters)

    # Step 3 execute split libraries
    cmd_len = len(commands)
    for i, cmd in enumerate(commands):
        update_job_step(
            qclient, job_id,
            "Step 3 of 4: Executing demultiplexing and quality control "
            "(%d of %d)" % (i, cmd_len))
        std_out, std_err, return_value = system_call(cmd)
        if return_value != 0:
            raise RuntimeError(
                "Error running split libraries:\nStd output: %s\nStd error:%s"
                % (std_out, std_err))

    # Step 4 merging results
    if cmd_len > 1:
        update_job_step(qclient, job_id,
                        "Step 4 of 4: Merging results (concatenating files)")
        to_cat = ['split_library_log.txt', 'seqs.fna']
        if quals:
            to_cat.append('seqs_filtered.qual')
        for tc in to_cat:
            files = [join(x, tc) for x in sl_outs]
            cmd = "cat %s > %s" % (' '.join(files), join(output_dir, tc))
            std_out, std_err, return_value = system_call(cmd)
            if return_value != 0:
                raise RuntimeError(
                    "Error concatenating %s files:\nStd output: %s\n"
                    "Std error:%s" % (tc, std_out, std_err))
    if quals:
        update_job_step(
            qclient, job_id,
            "Step 4 of 4: Merging results (converting fastqual to fastq)")
        cmd = ("convert_fastaqual_fastq.py -f %s -q %s -o %s -F" %
               (join(output_dir, 'seqs.fna'),
                join(output_dir, 'seqs_filtered.qual'), output_dir))
        std_out, std_err, return_value = system_call(cmd)
        if return_value != 0:
            raise RuntimeError(
                "Error converting the fasta/qual files to fastq")
    update_job_step(qclient, job_id,
                    "Step 4 of 4: Merging results (generating demux file)")

    generate_demux_file(output_dir)

    artifacts_info = generate_artifact_info(output_dir)

    return format_payload(True, artifacts_info=artifacts_info)
示例#8
0
def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
    """Run split libraries fastq with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job

    Raises
    ------
    ValueError
        If there is any error gathering the information from the server
    """
    update_job_step(qclient, job_id, "Step 1 of 3: Collecting information")
    artifact_id = parameters['input_data']
    fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id)
    if not fps_info or not fps_info['success']:
        error_msg = "Could not get artifact filepath information: %s"
        if fps_info:
            error_msg = error_msg % fps_info['error']
        else:
            error_msg = error_msg % "could not connect with the server"
        raise ValueError(error_msg)
    fps = fps_info['filepaths']

    reference_id = parameters['reference']
    ref_info = qclient.get("/qiita_db/references/%s/filepaths/" % reference_id)
    if not ref_info or not ref_info['success']:
        error_msg = "Could not get artifact filepath information: %s"
        if ref_info:
            error_msg = error_msg % ref_info['error']
        else:
            error_msg = error_msg % "could not connect with the server"
        raise ValueError(error_msg)
    reference_fps = ref_info['filepaths']

    update_job_step(qclient, job_id, "Step 2 of 3: Generating command")
    command, pick_out = generate_pick_closed_reference_otus_cmd(
        fps, out_dir, parameters, reference_fps)

    update_job_step(qclient, job_id, "Step 3 of 3: Executing OTU picking")
    std_out, std_err, return_value = system_call(command)
    if return_value != 0:
        error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s"
                     % (std_out, std_err))
        return format_payload(False, error_msg=error_msg)

    artifacts_info = generate_artifact_info(pick_out)

    return format_payload(True, artifacts_info=artifacts_info)
示例#9
0
def split_libraries(qclient, job_id, parameters, out_dir):
    """Run split libraries with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job

    Raises
    ------
    NotImplementedError
        If one of the filepath types attached to the artifact is not recognized
    ValueError
        If the artifact has SFF and fasta files
        IF the artifact has qual files but not fasta files
        If the artifact has fasta files but not quals
    RuntimeError
        If there is an error processing an sff file
        If there is an error running split_libraries.py
        If there is an error merging the results
    """
    # Step 1 get the rest of the information need to run split libraries
    update_job_step(qclient, job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    filepaths, mapping_file, atype = get_artifact_information(
        qclient, artifact_id)

    # Step 2 generate the split libraries command
    update_job_step(qclient, job_id, "Step 2 of 4: preparing files")
    sffs = []
    seqs = []
    quals = []
    for fp, fp_type in filepaths:
        if fp_type == 'raw_sff':
            sffs.append(fp)
        elif fp_type == 'raw_fasta':
            seqs.append(fp)
        elif fp_type == 'raw_qual':
            quals.append(fp)
        else:
            raise NotImplementedError("File type not supported %s" % fp_type)

    if seqs and sffs:
        raise ValueError('Cannot have SFF and raw fasta on the same artifact')
    elif quals and not seqs:
        raise ValueError('Cannot have just qual files on the artifact, you '
                         'also need raw fasta files')
    elif seqs and not quals:
        raise ValueError('It is not currently possible to process fasta '
                         'file(s) without qual file(s). This will be '
                         'supported in the future. You can track progress on '
                         'this by following: '
                         'https://github.com/biocore/qiita/issues/953')
    elif seqs:
        seqs = sorted(seqs)
        quals = sorted(quals)
    else:
        cmds, seqs, quals = generate_process_sff_commands(sffs, out_dir)
        len_cmds = len(cmds)
        for i, cmd in enumerate(cmds):
            update_job_step(
                qclient, job_id,
                "Step 2 of 4: preparing files (processing sff file %d of %d)"
                % (i, len_cmds))
            std_out, std_err, return_value = system_call(cmd)
            if return_value != 0:
                raise RuntimeError(
                    "Error processing sff file:\nStd output: %s\n Std error:%s"
                    % (std_out, std_err))

    output_dir = join(out_dir, 'sl_out')

    commands, sl_outs = generate_split_libraries_cmd(
        seqs, quals, mapping_file, output_dir, parameters)

    # Step 3 execute split libraries
    cmd_len = len(commands)
    for i, cmd in enumerate(commands):
        update_job_step(
            qclient, job_id,
            "Step 3 of 4: Executing demultiplexing and quality control "
            "(%d of %d)" % (i, cmd_len))
        std_out, std_err, return_value = system_call(cmd)
        if return_value != 0:
            raise RuntimeError(
                "Error running split libraries:\nStd output: %s\nStd error:%s"
                % (std_out, std_err))

    # Step 4 merging results
    if cmd_len > 1:
        update_job_step(
            qclient, job_id,
            "Step 4 of 4: Merging results (concatenating files)")
        to_cat = ['split_library_log.txt', 'seqs.fna']
        if quals:
            to_cat.append('seqs_filtered.qual')
        for tc in to_cat:
            files = [join(x, tc) for x in sl_outs]
            cmd = "cat %s > %s" % (' '.join(files), join(output_dir, tc))
            std_out, std_err, return_value = system_call(cmd)
            if return_value != 0:
                raise RuntimeError(
                    "Error concatenating %s files:\nStd output: %s\n"
                    "Std error:%s" % (tc, std_out, std_err))
    if quals:
        update_job_step(
            qclient, job_id,
            "Step 4 of 4: Merging results (converting fastqual to fastq)")
        cmd = ("convert_fastaqual_fastq.py -f %s -q %s -o %s -F"
               % (join(output_dir, 'seqs.fna'),
                  join(output_dir, 'seqs_filtered.qual'),
                  output_dir))
        std_out, std_err, return_value = system_call(cmd)
        if return_value != 0:
            raise RuntimeError(
                "Error converting the fasta/qual files to fastq")
    update_job_step(
        qclient, job_id,
        "Step 4 of 4: Merging results (generating demux file)")

    generate_demux_file(output_dir)

    artifacts_info = generate_artifact_info(output_dir)

    return format_payload(True, artifacts_info=artifacts_info)