Пример #1
0
def multiple_file_checker(filenames, directions):
    # Check if multiple file exists:
    must_exist = compss_file_exists(*filenames)
    compss_delete_file(*filenames)
    must_not_exist = compss_file_exists(*filenames)
    assert all(must_exist), "Multiple files %s that must exist not found." % str(directions)
    assert not any(must_not_exist), "Multiple files %s that must NOT exist is found." % str(directions)
Пример #2
0
def file_checker(filename, direction):
    # Check if file exists:
    must_exist = compss_file_exists(filename)
    compss_delete_file(filename)
    must_not_exist = compss_file_exists(filename)
    assert must_exist is True, "File %s that must exist not found." % direction
    assert must_not_exist is False, "File %s that must NOT exist is found." % direction
Пример #3
0
 def test_default_file_out(self):
     initial_file = "my_out_file.txt"
     i_will_fail_file_out(initial_file)
     with compss_open(initial_file) as f:
         content = f.read()
     assert content == "EMPTY FILE OUT", "ERROR: Wrong file inout (%s != EMPTY FILE OUT)" % content
     compss_delete_file(initial_file)
Пример #4
0
 def test_default_file_inout(self):
     initial_file = "my_inout_file.txt"
     with open(initial_file, 'w') as f:
         f.write("INITIAL FILE INOUT")
     i_will_fail_file_inout(initial_file)
     with compss_open(initial_file) as f:
         content = f.read()
     assert content == "EMPTY FILE INOUT", "ERROR: Wrong file inout (%s != EMPTY FILE INOUT)" % content
     compss_delete_file(initial_file)
Пример #5
0
 def test_worker_producer_master_consumer_file(self):
     """
     """
     print("Worker produces file, master consumes")
     file_name = "worker_producer_master_consumer"
     create_file_with_content_worker(INITIAL_CONTENT, file_name)
     check_file_with_content_master(INITIAL_CONTENT, file_name)
     compss_barrier()
     compss_delete_file(file_name)
     print("\t OK")
Пример #6
0
 def test_master_producer_worker_consumer_file(self):
     """
     Creates a file and passes it as an input parameter to a task.
     """
     print("Master produces file, worker consumes")
     file_name = "master_producer_worker_consumer"
     create_file_with_content_master(INITIAL_CONTENT, file_name)
     check_file_with_content_worker(INITIAL_CONTENT, file_name)
     compss_barrier()
     compss_delete_file(file_name)
     print("\t OK")
Пример #7
0
    def test_main_to_task(self):
        """
        Creates a file and passes it as an input parameter to a task.
        """
        print("Creating file on main and using it on task")
        file_name = "main_to_task_file"
        with open(file_name, "w") as f_channel:
            f_channel.write(INITIAL_CONTENT)

        check_file_with_content(INITIAL_CONTENT, file_name)
        compss_barrier()
        compss_delete_file(file_name)
        print("\t OK")
Пример #8
0
    def testDeleteFile(self):
        # Check and get parameters
        initial_value = '1'
        counter_name = 'counter_INOUT'  # check that this file does not exist after the execution
        counter_name_IN = 'counter_IN'  # check that this file does not exist after the execution
        counter_name_OUT = 'counter_OUT'  # check that this file does not exist after the execution

        for i in range(3):
            # Write value
            if i <= 1:
                fos = open(counter_name, 'w')
                fos.write(initial_value)
                fos.close()
            fos2 = open(counter_name_IN, 'w')
            fos2.write(initial_value)
            fos2.close()
            print(('Initial counter value is %s' % initial_value))
            # Execute increment
            increment(counter_name)
            increment2(counter_name_IN, counter_name_OUT)
            # Read new value
            print('After sending task')
            if i == 0:
                compss_delete_file(counter_name)
            compss_delete_file(counter_name_IN)
            compss_delete_file(counter_name_OUT)

        fis = compss_open(counter_name, 'r+')
        final_value = fis.read()
        fis.close()
        print(('Final counter value is %s' % final_value))
        self.assertEqual(final_value, '3')
        compss_delete_file(counter_name)
Пример #9
0
 def test_task_to_main(self):
     """
     Creates a file on a task and reads it on the master.
     """
     print("Creating file on task and using it on main")
     file_name = "task_to_main_file"
     create_file_with_content(INITIAL_CONTENT, file_name)
     with compss_open(file_name, "r") as f_channel:
         line = f_channel.readline()
         verify_line(line, INITIAL_CONTENT)
         line = f_channel.readline()
         verify_line(line, None)
     compss_barrier()
     compss_delete_file(file_name)
     print("\t OK")
Пример #10
0
def main():
    hello_file = "/tmp/sharedDisk/hello_world.txt"
    f = open(hello_file, 'w')
    f.write(HELLO)
    f.close()

    update_file(hello_file, "test")

    compss_wait_on_file(hello_file)
    f = open(hello_file)
    content = f.read()
    f.close()
    if (content != WRITTING):
        raise Exception(" Wait on not used file is not working (" + content +
                        ")")

    compss_delete_file(hello_file)
Пример #11
0
    def test_basic_types(self):
        """
        Tries primitive types parameter passing.
        """
        print("Running basic types task")
        filename = "basic_types_file"
        b_val = True
        c_val = 'E'
        s_val = "My Test"
        by_val = 7
        sh_val = 77
        i_val = 777
        l_val = 7777
        f_val = 7.7
        d_val = 7.77777
        test_basic_types(filename, b_val, c_val, s_val, by_val, sh_val, i_val,
                         l_val, f_val, d_val)

        with compss_open(filename, "r") as f_channel:
            line = f_channel.readline()
            verify_line(line, "TEST BASIC TYPES\n")
            line = f_channel.readline()
            verify_line(line, "- boolean: " + str(b_val) + "\n")
            line = f_channel.readline()
            verify_line(line, "- char: " + str(c_val) + "\n")
            line = f_channel.readline()
            verify_line(line, "- String: " + str(s_val) + "\n")
            line = f_channel.readline()
            verify_line(line, "- byte: " + str(by_val) + "\n")
            line = f_channel.readline()
            verify_line(line, "- short: " + str(sh_val) + "\n")
            line = f_channel.readline()
            verify_line(line, "- int: " + str(i_val) + "\n")
            line = f_channel.readline()
            verify_line(line, "- long: " + str(l_val) + "\n")
            line = f_channel.readline()
            verify_line(line, "- float: " + str(f_val) + "\n")
            line = f_channel.readline()
            verify_line(line, "- double: " + str(d_val) + "\n")
            line = f_channel.readline()
            verify_line(line, None)

        compss_delete_file(filename)
        compss_barrier()
        print("\t OK")
Пример #12
0
    def test_file_dependencies(self):
        """
        Creates a file on a task, verifies its content in a second one,
        a third task updates its value, and, finally, the master checks
        the value.
        """
        file_name = "dependencies_file_1"

        create_file_with_content(INITIAL_CONTENT, file_name)
        check_file_with_content(INITIAL_CONTENT, file_name)
        check_and_update_file_with_content(INITIAL_CONTENT, UPDATED_CONTENT_1, file_name)
        with compss_open(file_name, "r") as f_channel:
            line = f_channel.readline()
            verify_line(line, UPDATED_CONTENT_1)

        compss_barrier()
        compss_delete_file(file_name)
        print("\t OK")
Пример #13
0
    def test_file_dependencies_complex(self):
        """
        Creates a file on a task, verifies its content in a second one,
        a third task updates its value, and, then, the master checks
        the value. After that, the master updates the value, verifies its
        content locally and on a task.

        Later, it updates the value on a task, checks the value on another
        task, and updates twice the value on using two tasks. Finally, the
        value returns to the master so it checks it.
        """
        print("Testing file dependencies - Complex Version")
        file_name = "dependencies_file_2"

        create_file_with_content(INITIAL_CONTENT, file_name)
        check_file_with_content(INITIAL_CONTENT, file_name)
        check_and_update_file_with_content(INITIAL_CONTENT, UPDATED_CONTENT_1, file_name)
        with compss_open(file_name, "r") as f_channel:
            line = f_channel.readline()
            verify_line(line, UPDATED_CONTENT_1)

        # Update File Content on Main
        with compss_open(file_name, "w") as f_channel:
            f_channel.write(UPDATED_CONTENT_2)

        # Verify File update on Main
        with compss_open(file_name, "r") as f_channel:
            line = f_channel.readline()
            verify_line(line, UPDATED_CONTENT_2)
        check_file_with_content(UPDATED_CONTENT_2, file_name)

        check_and_update_file_with_content(UPDATED_CONTENT_2, UPDATED_CONTENT_3, file_name)
        check_file_with_content(UPDATED_CONTENT_3, file_name)
        check_and_update_file_with_content(UPDATED_CONTENT_3, UPDATED_CONTENT_4, file_name)
        check_and_update_file_with_content(UPDATED_CONTENT_4, UPDATED_CONTENT_5, file_name)
        with compss_open(file_name, "r") as f_channel:
            line = f_channel.readline()
            verify_line(line, UPDATED_CONTENT_5)

        compss_barrier()
        compss_delete_file(file_name)
        print("\t OK")
Пример #14
0
def rmdup_analyze_tar(cmd_dir, ref_idx_file, ref_file, contig_sam,
                      contig_content, tar_file):
    # type: (str, str, str, str, str, str) -> str
    """ Remove duplicates and analyze the contig, then append the result to the tar file

    :param cmd_dir: path to the command directory
    :param ref_idx_file: path to reference index file
    :param ref_file: path to reference file
    :param contig_sam: '<contig>.sam'
    :param contig_content: content of the result of the merge of the mapping operations to this contig
    :param tar_file: path to a .tar file to which the results of the analysis to this contig will be applied
    :return: (future object of a string) path to the output .tar file (same as tar_file)
    """

    import os.path
    from pycompss.api.api import compss_delete_file
    '''
    import os
    import errno

    tmp_dir = "ngsa_mini_py_temp"
    try:
        os.makedirs(tmp_dir, mode=0700)
    except OSError as e:
        if e.errno != errno.EEXIST:
            print("Failed to create Directory[{}].\n".format(tmp_dir))
            raise
    '''

    # Remove duplicates (rmdup) and analyze
    samtools_bin = cmd_dir + "/samtools"
    snp_bin = cmd_dir + "/snp"
    contig = os.path.splitext(contig_sam)[0]
    out_file1 = "ngsa_mini_py_temp/" + contig + '.indel'
    out_file2 = "ngsa_mini_py_temp/" + contig + '.snp'
    out_file3 = "ngsa_mini_py_temp/" + contig + '.sum'
    # print("Calling rmdup_analyze with parameters:")               # dbg
    # print("samtools_bin=", samtools_bin)                          # dbg
    # print("snp_bin=", snp_bin)                                    # dbg
    # print("ref_idx_file=", ref_idx_file)                          # dbg
    # print("ref_file=", ref_file)                                  # dbg
    # print("contig_sam=", contig_sam)                              # dbg
    # print("str(len(contig_content))=", str(len(contig_content)))  # dbg
    # print("out_file1=", out_file1)                                # dbg
    # print("out_file2=", out_file2)                                # dbg
    # print("out_file3=", out_file3)                                # dbg
    rmdup_analyze(samtools_bin, snp_bin, ref_idx_file, ref_file, contig_sam,
                  contig_content, out_file1, out_file2, out_file3)

    # tar files
    # print("Calling tar with parameters:")                         # dbg
    # print("tar_file=", tar_file)                                  # dbg
    # print("out_file1=", out_file1)                                # dbg
    # print("out_file2=", out_file2)                                # dbg
    # print("out_file3=", out_file3)                                # dbg
    tar(tar_file, out_file1, out_file2, out_file3)
    compss_delete_file(out_file1)
    compss_delete_file(out_file2)
    compss_delete_file(out_file3)
    return tar_file
Пример #15
0
def delete_file(file_path):
    compss_delete_file(file_path)
Пример #16
0
def main(): 
    hello_file="hello_world.txt"
    f= open(hello_file, 'w')
    f.write(HELLO)
    f.close()
    
    #Check file_exists after creation
    if (not compss_file_exists(hello_file)):
        raise Exception("Wait on not used object not working")
    print("compss_file_exists in existing file is CORRECT.")
    
    #Check wait on not used object
    a = 20
    a = compss_wait_on(a)
    if ( a != 20):
        raise Exception(" Wait on not used object not working (" +str(a)+")")
    print("Wait on not used object is CORRECT.")    
    
    #Check wait on not used file
    compss_wait_on_file(hello_file)
    f = compss_open(hello_file)
    content = f.read()
    f.close()
    if (content != HELLO):
        raise Exception(" Wait on not used file is not working (" + content + ")")
    print("Wait on not used file is CORRECT.") 
    
    # Check delete_file in not used file
    compss_delete_file(hello_file)

    struct_file="structure.pdb"
    struct_file_2="structure2.pdb"
    #Check file exist before existing file
    if (compss_file_exists(struct_file)):
        raise Exception("Struct file shouldn't exists and it is existing")
    print("compss_file_exists in no existing file is CORRECT.") 
    gen_file(struct_file)

    #Check file exist after executint the task
    if (not compss_file_exists(struct_file)):
        raise Exception("Struct file should exists and it is not existing")
    print("compss_file_exists in used file is existing file is CORRECT.")
    
    #Check file exist after delete but task not finished
    compss_delete_file(struct_file)
    if (compss_file_exists(struct_file)):
        raise Exception("Struct file shouldn't exists after_delete and it is existing")

    print("compss_exists_file after delete is CORRECT")
    
    gen_file(struct_file_2)
    compss_wait_on_file(struct_file_2)
    f= open(struct_file_2)
    content = f.read()
    f.close()
    if (not compss_file_exists(struct_file_2)):
        raise Exception("Struct file 2 should exists and it is not existing")
    print("compss_exists_file after wait_on is CORRECT")

    if (content != WRITTING):
        raise Exception(" Wait on not used file is not working (" + content + ")")

    compss_delete_file(struct_file_2)
    if (compss_file_exists(struct_file_2)):
        raise Exception("Struct file 2 shouldn't exists and it is existing")
    print("compss_exists_file after delete and wait_on is CORRECT")
Пример #17
0
    def run(self, input_files, input_metadata, output_files):
        """
        The main function to run the test_writer tool

        Parameters
        ----------
        input_files : dict
            List of input files - In this case there are no input files required
        input_metadata: dict
            Matching metadata for each of the files, plus any additional data
        output_files : dict
            List of the output files that are to be generated

        Returns
        -------
        output_files : dict
            List of files with a single entry.
        output_metadata : dict
            List of matching metadata for the returned files
        """

        rtree_file_dat = "tests/data/test_rmap/rtree_file.dat"
        rtree_file_idx = "tests/data/test_rmap/rtree_file.idx"
        chr_handler = "tests/data/test_baitmap/chr_handler.txt"
        rmap = "tests/data/test_run_chicago/test.rmap"
        baitmap = "tests/data/test_run_chicago/test.baitmap"
        bait_sam = "tests/data/test_baitmap/baits.sam"
        nbpb = "tests/data/test_run_chicago/test.nbpb"
        npb = "tests/data/test_run_chicago/test.npb"
        poe = "tests/data/test_run_chicago/test.poe"
        out_bam = "tests/data/test_baitmap/baits.bam"
        sorted_bam = self.configuration["execution"] + "/" + "sorted_bam"

        if "RMAP" not in input_files:
            input_files["RMAP"] = rmap
        if "BAITMAP" not in input_files:
            input_files["BAITMAP"] = baitmap
        if "nbpb" not in input_files:
            input_files["nbpb"] = nbpb
        if "npb" not in input_files:
            input_files["npb"] = npb
        if "poe" not in input_files:
            input_files["poe"] = poe
        if "chinput" not in input_files:
            input_files["chinput"] = output_files["chinput"]

        if "pychic_binsize" not in self.configuration:
            self.configuration["pychic_binsize"] = \
                int(self.configuration["makeDesignFiles_binsize"])
        else:
            self.configuration["pychic_binsize"] = int(
                self.configuration["pychic_binsize"])

        if "pychic_minFragLen" not in self.configuration:
            self.configuration["pychic_minFragLen"] = \
                int(self.configuration["makeDesignFiles_minFragLen"])
        else:
            self.configuration["pychic_minFragLen"] = int(
                self.configuration["pychic_minFragLen"])

        if "pychic_maxFragLen" not in self.configuration:
            self.configuration["pychic_maxFragLen"] = \
                int(self.configuration["makeDesignFiles_maxFragLen"])
        else:
            self.configuration["pychic_maxFragLen"] = int(
                self.configuration["pychic_maxFragLen"])

        if "pychic_maxLBrownEst" not in self.configuration:
            self.configuration["pychic_maxLBrownEst"] = \
                float(self.configuration["makeDesignFiles_maxLBrownEst"])
        else:
            self.configuration["pychic_maxLBrownEst"] = \
                float(self.configuration["pychic_maxLBrownEst"])

        self.configuration["pychic_removeAdjacent"] = True
        self.configuration["pychic_adjBait2bait"] = True

        if "pychic_bam" not in self.configuration:
            self.configuration["pychic_bam"] = sorted_bam

        pychic_handler = pyCHiC(self.configuration)
        pychic_handler.run(input_files, input_metadata, output_files)

        if "genome_name" in self.configuration:
            files_dir = os.listdir(self.configuration["execution"])
            for file_ in files_dir:
                if file_.startswith("Digest_" +
                                    self.configuration["genome_name"]):
                    os.remove(file_)

        compss_delete_file(rtree_file_idx)
        compss_delete_file(rtree_file_dat)
        compss_delete_file(chr_handler)
        compss_delete_file(rmap)
        compss_delete_file(baitmap)
        compss_delete_file(bait_sam)
        compss_delete_file(npb)
        compss_delete_file(nbpb)
        compss_delete_file(poe)
        compss_delete_file(out_bam)
        compss_delete_file(sorted_bam)

        if "chinput" not in input_metadata:
            input_metadata["chinput"] = input_metadata["genome_fa"]

        output_metadata = {
            "washU_text":
            Metadata(data_type="data_chic",
                     file_type="TXT",
                     file_path=output_files["washU_text"],
                     sources=[],
                     taxon_id=input_metadata["chinput"].taxon_id,
                     meta_data={
                         "tool": "process_CHiC",
                         "tool_description": "run_chicago"
                     }),
            "pdf_examples":
            Metadata(data_type="data_chic",
                     file_type="PDF",
                     file_path=output_files["pdf_examples"],
                     sources=[],
                     taxon_id=input_metadata["chinput"].taxon_id,
                     meta_data={
                         "tool": "process_CHiC",
                         "tool_description": "run_chicago"
                     }),
            "params_out":
            Metadata(data_type="data_chic",
                     file_type="TXT",
                     file_path=output_files["params_out"],
                     sources=[],
                     taxon_id=input_metadata["chinput"].taxon_id,
                     meta_data={
                         "tool": "process_CHiC",
                         "tool_description": "run_chicago"
                     })
        }

        return output_files, output_metadata
Пример #18
0
    def bam_merge(self, in_bam_job_files):  # pylint: disable=too-many-branches
        """
        Wrapper task taking any number of bam files and merging them into a
        single bam file.

        Parameters
        ----------
        bam_job_files : list
            List of the locations of the separate bam files that are to be merged
            The first file in the list will be taken as the output file name
        """
        merge_round = -1

        if len(in_bam_job_files) == 1:
            return in_bam_job_files[0]

        bam_job_files = [i for i in in_bam_job_files]

        cleanup_files = []

        while True:
            merge_round += 1
            if len(bam_job_files) > 1:
                tmp_alignments = []

                if bam_job_files:
                    while len(bam_job_files) >= 10:
                        current_list_len = len(bam_job_files)
                        for i in range(0, current_list_len-9, 10):  # pylint: disable=unused-variable
                            bam_out = bam_job_files[0] + "_merge_" + str(merge_round) + ".bam"
                            tmp_alignments.append(bam_out)
                            cleanup_files.append(bam_out)

                            self.bam_merge_10(
                                bam_job_files.pop(0), bam_job_files.pop(0), bam_job_files.pop(0),
                                bam_job_files.pop(0), bam_job_files.pop(0), bam_job_files.pop(0),
                                bam_job_files.pop(0), bam_job_files.pop(0), bam_job_files.pop(0),
                                bam_job_files.pop(0), bam_out
                            )

                    bam_out = bam_job_files[0] + "_merge_" + str(merge_round) + ".bam"
                    if len(bam_job_files) >= 5:
                        tmp_alignments.append(bam_out)
                        cleanup_files.append(bam_out)
                        self.bam_merge_5(
                            bam_job_files.pop(0), bam_job_files.pop(0), bam_job_files.pop(0),
                            bam_job_files.pop(0), bam_job_files.pop(0), bam_out
                        )
                        if bam_job_files:
                            bam_out = bam_job_files[0] + "_merge_" + str(merge_round) + ".bam"

                    if len(bam_job_files) == 4:
                        tmp_alignments.append(bam_out)
                        cleanup_files.append(bam_out)
                        self.bam_merge_4(
                            bam_job_files.pop(0), bam_job_files.pop(0), bam_job_files.pop(0),
                            bam_job_files.pop(0), bam_out
                        )
                    elif len(bam_job_files) == 3:
                        tmp_alignments.append(bam_out)
                        cleanup_files.append(bam_out)
                        self.bam_merge_3(
                            bam_job_files.pop(0), bam_job_files.pop(0), bam_job_files.pop(0),
                            bam_out
                        )
                    elif len(bam_job_files) == 2:
                        tmp_alignments.append(bam_out)
                        cleanup_files.append(bam_out)
                        self.bam_merge_2(
                            bam_job_files.pop(0), bam_job_files.pop(0), bam_out
                        )
                    elif len(bam_job_files) == 1:
                        tmp_alignments.append(bam_job_files[0])

                barrier()

                bam_job_files = []
                bam_job_files = [new_bam for new_bam in tmp_alignments]

            else:
                break

        return_value = self.bam_copy(bam_job_files[0], in_bam_job_files[0])
        for tmp_bam_file in cleanup_files:
            compss_delete_file(tmp_bam_file)

        return return_value
Пример #19
0
    def run(self, input_files, input_metadata, output_files):  # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        """
        The main function to run MACS 2 for peak calling over a given BAM file
        and matching background BAM file.

        Parameters
        ----------
        input_files : list
            List of input bam file locations where 0 is the bam data file and 1
            is the matching background bam file
        metadata : dict


        Returns
        -------
        output_files : list
            List of locations for the output files.
        output_metadata : list
            List of matching metadata dict objects

        """
        root_name = os.path.split(input_files['bam'])
        name = root_name[1].replace('.bam', '')

        # input and output share most metadata
        output_bed_types = {
            'narrow_peak': "bed4+1",
            'summits': "bed6+4",
            'broad_peak': "bed6+3",
            'gapped_peak': "bed12+3"
        }

        for k in output_bed_types:
            if output_files[k] is None:
                output_files[k] = os.path.join(self.configuration['execution'],
                                               name + "_" + k + ".bed")

        command_params = self.get_macs2_params(self.configuration)

        bam_utils_handle = bamUtilsTask()
        bam_utils_handle.bam_index(input_files['bam'],
                                   input_files['bam'] + '.bai')
        if 'bam_bg' in input_files:
            bam_utils_handle.bam_index(input_files['bam_bg'],
                                       input_files['bam_bg'] + '.bai')

        chr_list = bam_utils_handle.bam_list_chromosomes(input_files['bam'])
        chr_list = compss_wait_on(chr_list)

        logger.info("MACS2 COMMAND PARAMS: " + ", ".join(command_params))

        chr_dict = {}
        for chromosome in chr_list:
            chr_dict[chromosome] = chromosome.replace("|", "_")

        for chromosome in chr_dict:
            if 'bam_bg' in input_files:
                result = self.macs2_peak_calling(
                    name + "." + str(chromosome), str(input_files['bam']),
                    str(input_files['bam']) + '.bai',
                    str(input_files['bam_bg']),
                    str(input_files['bam_bg']) + '.bai', command_params,
                    str(output_files['narrow_peak']) + "." +
                    str(chr_dict[chromosome]),
                    str(output_files['summits']) + "." +
                    str(chr_dict[chromosome]),
                    str(output_files['broad_peak']) + "." +
                    str(chr_dict[chromosome]),
                    str(output_files['gapped_peak']) + "." +
                    str(chr_dict[chromosome]), chromosome)
            else:
                result = self.macs2_peak_calling_nobgd(
                    name + "." + str(chromosome), str(input_files['bam']),
                    str(input_files['bam']) + '.bai', command_params,
                    str(output_files['narrow_peak']) + "." +
                    str(chr_dict[chromosome]),
                    str(output_files['summits']) + "." +
                    str(chr_dict[chromosome]),
                    str(output_files['broad_peak']) + "." +
                    str(chr_dict[chromosome]),
                    str(output_files['gapped_peak']) + "." +
                    str(chr_dict[chromosome]), chromosome)

            if result is False:
                logger.fatal(
                    "MACS2: Something went wrong with the peak calling")

        # Merge the results files into single files.
        with open(output_files['narrow_peak'], 'wb') as file_np_handle:
            with open(output_files['summits'], 'wb') as file_s_handle:
                with open(output_files['broad_peak'], 'wb') as file_bp_handle:
                    with open(output_files['gapped_peak'],
                              'wb') as file_gp_handle:
                        for chromosome in chr_dict:
                            np_file_chr = "{}.{}".format(
                                output_files['narrow_peak'],
                                chr_dict[chromosome])
                            s_file_chr = "{}.{}".format(
                                output_files['summits'], chr_dict[chromosome])
                            bp_file_chr = "{}.{}".format(
                                output_files['broad_peak'],
                                chr_dict[chromosome])
                            gp_file_chr = "{}.{}".format(
                                output_files['gapped_peak'],
                                chr_dict[chromosome])
                            if hasattr(sys, '_run_from_cmdl') is True:
                                with open(np_file_chr, 'rb') as file_in_handle:
                                    file_np_handle.write(file_in_handle.read())
                                with open(s_file_chr, 'rb') as file_in_handle:
                                    file_s_handle.write(file_in_handle.read())
                                with open(bp_file_chr, 'rb') as file_in_handle:
                                    file_bp_handle.write(file_in_handle.read())
                                with open(gp_file_chr, 'rb') as file_in_handle:
                                    file_gp_handle.write(file_in_handle.read())
                            else:
                                with compss_open(np_file_chr,
                                                 'rb') as file_in_handle:
                                    file_np_handle.write(file_in_handle.read())
                                with compss_open(s_file_chr,
                                                 'rb') as file_in_handle:
                                    file_s_handle.write(file_in_handle.read())
                                with compss_open(bp_file_chr,
                                                 'rb') as file_in_handle:
                                    file_bp_handle.write(file_in_handle.read())
                                with compss_open(gp_file_chr,
                                                 'rb') as file_in_handle:
                                    file_gp_handle.write(file_in_handle.read())
                                compss_delete_file(np_file_chr)
                                compss_delete_file(s_file_chr)
                                compss_delete_file(bp_file_chr)
                                compss_delete_file(gp_file_chr)

        output_files_created = {}
        output_metadata = {}
        for result_file in output_files:
            if (os.path.isfile(output_files[result_file]) is True
                    and os.path.getsize(output_files[result_file]) > 0):
                output_files_created[result_file] = output_files[result_file]

                sources = [input_metadata["bam"].file_path]
                if 'bam_bg' in input_files:
                    sources.append(input_metadata["bam_bg"].file_path)

                output_metadata[result_file] = Metadata(
                    data_type="data_chip_seq",
                    file_type="BED",
                    file_path=output_files[result_file],
                    sources=sources,
                    taxon_id=input_metadata["bam"].taxon_id,
                    meta_data={
                        "assembly":
                        input_metadata["bam"].meta_data["assembly"],
                        "tool": "macs2",
                        "bed_type": output_bed_types[result_file],
                        "parameters": command_params
                    })
            else:
                os.remove(output_files[result_file])

        logger.info('MACS2: GENERATED FILES: ', ' '.join(output_files))

        return (output_files_created, output_metadata)
Пример #20
0
def test_dummy_api():
    from pycompss.api.dummy.api import compss_start
    from pycompss.api.dummy.api import compss_stop
    from pycompss.api.dummy.api import compss_file_exists
    from pycompss.api.dummy.api import compss_open
    from pycompss.api.dummy.api import compss_delete_file
    from pycompss.api.dummy.api import compss_wait_on_file
    from pycompss.api.dummy.api import compss_wait_on_directory
    from pycompss.api.dummy.api import compss_delete_object
    from pycompss.api.dummy.api import compss_barrier
    from pycompss.api.dummy.api import compss_barrier_group
    from pycompss.api.dummy.api import compss_wait_on
    from pycompss.api.dummy.api import compss_get_number_of_resources
    from pycompss.api.dummy.api import compss_request_resources
    from pycompss.api.dummy.api import compss_free_resources
    from pycompss.api.dummy.api import TaskGroup

    file_name = "simulated_file.txt"
    file_names = ["simulated_file1.txt", "simulated_file2.txt"]
    directory_name = "simulated_directory"
    directory_names = ["simulated_directory1", "simulated_directory2"]
    group_name = "simulated_group"
    obj = [1, 2, 3]
    num_resources = 1

    with open(file_name, "w") as f:
        f.write("some content")
    os.mkdir(directory_name)

    for f_name in file_names:
        with open(f_name, "w") as f:
            f.write("some content")
    for d_name in directory_names:
        os.mkdir(d_name)

    compss_start(log_level="off", interactive=False)
    compss_stop(code=0)
    compss_file_exists(file_name)
    compss_file_exists(*file_names)
    compss_open(file_name, mode="r")
    compss_delete_file(file_name)
    compss_delete_file(*file_names)
    compss_wait_on_file(file_name)
    compss_wait_on_file(*file_names)
    compss_wait_on_directory(directory_name)
    compss_wait_on_directory(*directory_names)
    compss_delete_object(obj)
    compss_delete_object(*obj)
    compss_barrier(no_more_tasks=False)
    compss_barrier_group(group_name)
    compss_wait_on(obj)
    compss_wait_on(*obj)
    compss_get_number_of_resources()
    compss_request_resources(num_resources, group_name)
    compss_free_resources(num_resources, group_name)

    with TaskGroup(group_name, implicit_barrier=True):
        # Empty task group check
        pass

    os.remove(file_name)
    os.rmdir(directory_name)

    for f_name in file_names:
        os.remove(f_name)
    for d_name in directory_names:
        os.rmdir(d_name)