Пример #1
0
def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
    """Run split libraries fastq with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job

    Raises
    ------
    ValueError
        If there is any error gathering the information from the server
    """
    qclient.update_job_step(job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id)
    fps = fps_info['filepaths']

    reference_id = parameters['reference']
    ref_info = qclient.get("/qiita_db/references/%s/filepaths/" % reference_id)
    reference_fps = ref_info['filepaths']

    qclient.update_job_step(job_id, "Step 2 of 4: Generating command")
    command, pick_out = generate_pick_closed_reference_otus_cmd(
        fps, out_dir, parameters, reference_fps)

    qclient.update_job_step(job_id, "Step 3 of 4: Executing OTU picking")
    std_out, std_err, return_value = system_call(command)
    if return_value != 0:
        error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s" %
                     (std_out, std_err))
        return False, None, error_msg

    qclient.update_job_step(job_id,
                            "Step 4 of 4: Generating tgz sortmerna folder")
    try:
        generate_sortmerna_tgz(pick_out)
    except Exception as e:
        error_msg = ("Error while tgz failures:\nError: %s" % str(e))
        return False, None, error_msg

    artifacts_info = generate_artifact_info(pick_out)

    return True, artifacts_info, ""
Пример #2
0
def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
    """Run split libraries fastq with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job

    Raises
    ------
    ValueError
        If there is any error gathering the information from the server
    """
    qclient.update_job_step(job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id)
    fps = fps_info['filepaths']

    reference_id = parameters['reference']
    ref_info = qclient.get("/qiita_db/references/%s/filepaths/" % reference_id)
    reference_fps = ref_info['filepaths']

    qclient.update_job_step(job_id, "Step 2 of 4: Generating command")
    command, pick_out = generate_pick_closed_reference_otus_cmd(
        fps, out_dir, parameters, reference_fps)

    qclient.update_job_step(job_id, "Step 3 of 4: Executing OTU picking")
    std_out, std_err, return_value = system_call(command)
    if return_value != 0:
        error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s"
                     % (std_out, std_err))
        return False, None, error_msg

    qclient.update_job_step(job_id,
                            "Step 4 of 4: Generating tgz sortmerna folder")
    try:
        generate_sortmerna_tgz(pick_out)
    except Exception as e:
        error_msg = ("Error while tgz failures:\nError: %s" % str(e))
        return False, None, error_msg

    artifacts_info = generate_artifact_info(pick_out)

    return True, artifacts_info, ""
Пример #3
0
def split_libraries_fastq(qclient, job_id, parameters, out_dir):
    """Run split libraries fastq with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job
    """
    # Step 1 get the rest of the information need to run split libraries
    update_job_step(qclient, job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    filepaths, mapping_file, atype = get_artifact_information(
        qclient, artifact_id)

    # Step 2 generate the split libraries fastq command
    update_job_step(qclient, job_id, "Step 2 of 4: Generating command")
    command, sl_out = generate_split_libraries_fastq_cmd(
        filepaths, mapping_file, atype, out_dir, parameters)

    # Step 3 execute split libraries
    update_job_step(
        qclient, job_id,
        "Step 3 of 4: Executing demultiplexing and quality control")
    std_out, std_err, return_value = system_call(command)
    if return_value != 0:
        raise RuntimeError(
            "Error processing files:\nStd output: %s\n Std error:%s" %
            (std_out, std_err))

    # Step 4 generate the demux file
    update_job_step(qclient, job_id, "Step 4 of 4: Generating demux file")
    generate_demux_file(sl_out)

    artifacts_info = generate_artifact_info(sl_out)

    return format_payload(True, artifacts_info=artifacts_info)
def split_libraries_fastq(qclient, job_id, parameters, out_dir):
    """Run split libraries fastq with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job
    """
    # Step 1 get the rest of the information need to run split libraries
    update_job_step(qclient, job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    filepaths, mapping_file, atype = get_artifact_information(
        qclient, artifact_id)

    # Step 2 generate the split libraries fastq command
    update_job_step(qclient, job_id, "Step 2 of 4: Generating command")
    command, sl_out = generate_split_libraries_fastq_cmd(
        filepaths, mapping_file, atype, out_dir, parameters)

    # Step 3 execute split libraries
    update_job_step(
        qclient, job_id,
        "Step 3 of 4: Executing demultiplexing and quality control")
    std_out, std_err, return_value = system_call(command)
    if return_value != 0:
        raise RuntimeError(
            "Error processing files:\nStd output: %s\n Std error:%s"
            % (std_out, std_err))

    # Step 4 generate the demux file
    update_job_step(qclient, job_id, "Step 4 of 4: Generating demux file")
    generate_demux_file(sl_out)

    artifacts_info = generate_artifact_info(sl_out)

    return format_payload(True, artifacts_info=artifacts_info)
Пример #5
0
 def test_system_call_error(self):
     obs_out, obs_err, obs_val = system_call("IHopeThisCommandDoesNotExist")
     self.assertEqual(obs_out, "")
     self.assertTrue("command not found" in obs_err)
     self.assertEqual(obs_val, 127)
Пример #6
0
 def test_system_call(self):
     obs_out, obs_err, obs_val = system_call("pwd")
     self.assertEqual(obs_out, "%s\n" % getcwd())
     self.assertEqual(obs_err, "")
     self.assertEqual(obs_val, 0)
Пример #7
0
def split_libraries(qclient, job_id, parameters, out_dir):
    """Run split libraries with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job

    Raises
    ------
    NotImplementedError
        If one of the filepath types attached to the artifact is not recognized
    ValueError
        If the artifact has SFF and fasta files
        IF the artifact has qual files but not fasta files
        If the artifact has fasta files but not quals
    RuntimeError
        If there is an error processing an sff file
        If there is an error running split_libraries.py
        If there is an error merging the results
    """
    # Step 1 get the rest of the information need to run split libraries
    update_job_step(qclient, job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    filepaths, mapping_file, atype = get_artifact_information(
        qclient, artifact_id)

    # Step 2 generate the split libraries command
    update_job_step(qclient, job_id, "Step 2 of 4: preparing files")
    sffs = []
    seqs = []
    quals = []
    for fp, fp_type in filepaths:
        if fp_type == 'raw_sff':
            sffs.append(fp)
        elif fp_type == 'raw_fasta':
            seqs.append(fp)
        elif fp_type == 'raw_qual':
            quals.append(fp)
        else:
            raise NotImplementedError("File type not supported %s" % fp_type)

    if seqs and sffs:
        raise ValueError('Cannot have SFF and raw fasta on the same artifact')
    elif quals and not seqs:
        raise ValueError('Cannot have just qual files on the artifact, you '
                         'also need raw fasta files')
    elif seqs and not quals:
        raise ValueError('It is not currently possible to process fasta '
                         'file(s) without qual file(s). This will be '
                         'supported in the future. You can track progress on '
                         'this by following: '
                         'https://github.com/biocore/qiita/issues/953')
    elif seqs:
        seqs = sorted(seqs)
        quals = sorted(quals)
    else:
        cmds, seqs, quals = generate_process_sff_commands(sffs, out_dir)
        len_cmds = len(cmds)
        for i, cmd in enumerate(cmds):
            update_job_step(
                qclient, job_id,
                "Step 2 of 4: preparing files (processing sff file %d of %d)" %
                (i, len_cmds))
            std_out, std_err, return_value = system_call(cmd)
            if return_value != 0:
                raise RuntimeError(
                    "Error processing sff file:\nStd output: %s\n Std error:%s"
                    % (std_out, std_err))

    output_dir = join(out_dir, 'sl_out')

    commands, sl_outs = generate_split_libraries_cmd(seqs, quals, mapping_file,
                                                     output_dir, parameters)

    # Step 3 execute split libraries
    cmd_len = len(commands)
    for i, cmd in enumerate(commands):
        update_job_step(
            qclient, job_id,
            "Step 3 of 4: Executing demultiplexing and quality control "
            "(%d of %d)" % (i, cmd_len))
        std_out, std_err, return_value = system_call(cmd)
        if return_value != 0:
            raise RuntimeError(
                "Error running split libraries:\nStd output: %s\nStd error:%s"
                % (std_out, std_err))

    # Step 4 merging results
    if cmd_len > 1:
        update_job_step(qclient, job_id,
                        "Step 4 of 4: Merging results (concatenating files)")
        to_cat = ['split_library_log.txt', 'seqs.fna']
        if quals:
            to_cat.append('seqs_filtered.qual')
        for tc in to_cat:
            files = [join(x, tc) for x in sl_outs]
            cmd = "cat %s > %s" % (' '.join(files), join(output_dir, tc))
            std_out, std_err, return_value = system_call(cmd)
            if return_value != 0:
                raise RuntimeError(
                    "Error concatenating %s files:\nStd output: %s\n"
                    "Std error:%s" % (tc, std_out, std_err))
    if quals:
        update_job_step(
            qclient, job_id,
            "Step 4 of 4: Merging results (converting fastqual to fastq)")
        cmd = ("convert_fastaqual_fastq.py -f %s -q %s -o %s -F" %
               (join(output_dir, 'seqs.fna'),
                join(output_dir, 'seqs_filtered.qual'), output_dir))
        std_out, std_err, return_value = system_call(cmd)
        if return_value != 0:
            raise RuntimeError(
                "Error converting the fasta/qual files to fastq")
    update_job_step(qclient, job_id,
                    "Step 4 of 4: Merging results (generating demux file)")

    generate_demux_file(output_dir)

    artifacts_info = generate_artifact_info(output_dir)

    return format_payload(True, artifacts_info=artifacts_info)
Пример #8
0
def split_libraries(qclient, job_id, parameters, out_dir):
    """Run split libraries with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job

    Raises
    ------
    NotImplementedError
        If one of the filepath types attached to the artifact is not recognized
    ValueError
        If the artifact has SFF and fasta files
        IF the artifact has qual files but not fasta files
        If the artifact has fasta files but not quals
    RuntimeError
        If there is an error processing an sff file
        If there is an error running split_libraries.py
        If there is an error merging the results
    """
    # Step 1 get the rest of the information need to run split libraries
    qclient.update_job_step(job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input_data']
    filepaths, mapping_file, atype = get_artifact_information(
        qclient, artifact_id)

    # Step 2 generate the split libraries command
    qclient.update_job_step(job_id, "Step 2 of 4: preparing files")
    sffs = []
    seqs = []
    quals = []
    for fp, fp_type in filepaths:
        if fp_type == 'raw_sff':
            sffs.append(fp)
        elif fp_type == 'raw_fasta':
            seqs.append(fp)
        elif fp_type == 'raw_qual':
            quals.append(fp)
        elif fp_type == 'html_summary':
            # Ignore the HTML summary file
            continue
        else:
            raise NotImplementedError("File type not supported %s" % fp_type)

    if seqs and sffs:
        raise ValueError('Cannot have SFF and raw fasta on the same artifact')
    elif quals and not seqs:
        raise ValueError('Cannot have just qual files on the artifact, you '
                         'also need raw fasta files')
    elif seqs and not quals:
        raise ValueError('It is not currently possible to process fasta '
                         'file(s) without qual file(s). This will be '
                         'supported in the future. You can track progress on '
                         'this by following: '
                         'https://github.com/biocore/qiita/issues/953')
    elif seqs:
        seqs = sorted(seqs)
        quals = sorted(quals)
    else:
        cmds, seqs, quals = generate_process_sff_commands(sffs, out_dir)
        len_cmds = len(cmds)
        for i, cmd in enumerate(cmds):
            qclient.update_job_step(
                job_id,
                "Step 2 of 4: preparing files (processing sff file %d of %d)"
                % (i, len_cmds))
            std_out, std_err, return_value = system_call(cmd)
            if return_value != 0:
                raise RuntimeError(
                    "Error processing sff file:\nStd output: %s\n Std error:%s"
                    % (std_out, std_err))

    output_dir = join(out_dir, 'sl_out')

    commands, sl_outs = generate_split_libraries_cmd(
        seqs, quals, mapping_file, output_dir, parameters)

    # Step 3 execute split libraries
    cmd_len = len(commands)
    for i, cmd in enumerate(commands):
        qclient.update_job_step(
            job_id,
            "Step 3 of 4: Executing demultiplexing and quality control "
            "(%d of %d)" % (i, cmd_len))
        std_out, std_err, return_value = system_call(cmd)
        if return_value != 0:
            raise RuntimeError(
                "Error running split libraries:\nStd output: %s\nStd error:%s"
                % (std_out, std_err))

    # Step 4 merging results
    if cmd_len > 1:
        qclient.update_job_step(
            job_id,
            "Step 4 of 4: Merging results (concatenating files)")
        to_cat = ['split_library_log.txt', 'seqs.fna']
        if quals:
            to_cat.append('seqs_filtered.qual')
        for tc in to_cat:
            files = [join(x, tc) for x in sl_outs]
            cmd = "cat %s > %s" % (' '.join(files), join(output_dir, tc))
            std_out, std_err, return_value = system_call(cmd)
            if return_value != 0:
                raise RuntimeError(
                    "Error concatenating %s files:\nStd output: %s\n"
                    "Std error:%s" % (tc, std_out, std_err))
    if quals:
        qclient.update_job_step(
            job_id,
            "Step 4 of 4: Merging results (converting fastqual to fastq)")
        cmd = ("convert_fastaqual_fastq.py -f %s -q %s -o %s -F"
               % (join(output_dir, 'seqs.fna'),
                  join(output_dir, 'seqs_filtered.qual'),
                  output_dir))
        std_out, std_err, return_value = system_call(cmd)
        if return_value != 0:
            raise RuntimeError(
                "Error converting the fasta/qual files to fastq")
    qclient.update_job_step(
        job_id, "Step 4 of 4: Merging results (generating demux file)")

    generate_demux_file(output_dir)

    artifacts_info = generate_artifact_info(output_dir)

    return format_payload(True, artifacts_info=artifacts_info)
Пример #9
0
def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
    """Run split libraries fastq with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        Yhe path to the job's output directory

    Returns
    -------
    dict
        The results of the job

    Raises
    ------
    ValueError
        If there is any error gathering the information from the server
    """
    update_job_step(qclient, job_id, "Step 1 of 3: Collecting information")
    artifact_id = parameters['input_data']
    fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id)
    if not fps_info or not fps_info['success']:
        error_msg = "Could not get artifact filepath information: %s"
        if fps_info:
            error_msg = error_msg % fps_info['error']
        else:
            error_msg = error_msg % "could not connect with the server"
        raise ValueError(error_msg)
    fps = fps_info['filepaths']

    reference_id = parameters['reference']
    ref_info = qclient.get("/qiita_db/references/%s/filepaths/" % reference_id)
    if not ref_info or not ref_info['success']:
        error_msg = "Could not get artifact filepath information: %s"
        if ref_info:
            error_msg = error_msg % ref_info['error']
        else:
            error_msg = error_msg % "could not connect with the server"
        raise ValueError(error_msg)
    reference_fps = ref_info['filepaths']

    update_job_step(qclient, job_id, "Step 2 of 3: Generating command")
    command, pick_out = generate_pick_closed_reference_otus_cmd(
        fps, out_dir, parameters, reference_fps)

    update_job_step(qclient, job_id, "Step 3 of 3: Executing OTU picking")
    std_out, std_err, return_value = system_call(command)
    if return_value != 0:
        error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s"
                     % (std_out, std_err))
        return format_payload(False, error_msg=error_msg)

    artifacts_info = generate_artifact_info(pick_out)

    return format_payload(True, artifacts_info=artifacts_info)