Пример #1
0
 def wfunc(self, fq_list, first_check=1, callback=False, callback_kwargs={}, max_workers=10, pass_if_exists=[], clean_before=[], **kwargs):
     """
     pass_if_exists: 路径列表, 如果列表中的路径都存在,的跳过相应样本的处理,路径中的{sample}及MetagenomePipline的context将会被相应属性替代
     """
     fq_list = split_list(fq_list, max_workers)
     print("######################Running " + str(func))
     start_time = time.time()
     if not callback:
         executor = ThreadPoolExecutor(max_workers=max_workers)
     for fqs in fq_list:
         if callback:
             executor = ThreadPoolExecutor(max_workers=max_workers)
         for fq in fqs:
             parsed_fqs = self.parse_fq_list(fq)
             if pass_if_exists:
                 paths = [p.format(**parsed_fqs, **self.context) for p in pass_if_exists]
                 if all_path_exists(paths):
                     continue
             if clean_before:
                 for path in clean_before:
                     path = path.format(**parsed_fqs, **self.context)
                     if os.path.exists(path):
                         self.system("rm -r {}".format(path))
             executor.submit(func, self, fq_list=fq, **kwargs)
         if callback:
             executor.shutdown(True)
             print("This run done! checking callback...")
             callback(**callback_kwargs)
     if not callback:
         executor.shutdown(True)
     end_time = time.time()
     time_used = (end_time - start_time) / 60
     print("######################" + str(func) + " done; time used: {} min".format(time_used))
Пример #2
0
    def wfunc(self, fq_list, first_check=10, callback=False, callback_kwargs={}, max_workers=10, pass_if_exists=[], clean_before=[], **kwargs):
        fq_list = split_list(fq_list, max_workers)
        print("######################Running " + str(func))
        start_time = time.time()
        for fqs in fq_list:
            for fq in fqs:

                parsed_fqs = self.parse_fq_list(fq)
                if pass_if_exists:
                    paths = [p.format(**parsed_fqs, **self.context) for p in pass_if_exists]
                    if all_path_exists(paths):
                        continue
                if clean_before:
                    for path in clean_before:
                        path = path.format(**parsed_fqs, **self.context)
                        if os.path.exists(path):
                            self.system("rm -r {}".format(path))

                func(self, fq_list=fq, **kwargs)
            if callback:
                wait_sge(first_check)
                print("This run done! checking callback...")
                callback(**callback_kwargs)
        if not callback:
            wait_sge(first_check)
        end_time = time.time()
        time_used = (end_time - start_time) / 60
        print("######################" + str(func) + " done; time used: {} min".format(time_used))
Пример #3
0
 def paired_data(self, pattern, each=False):
     r1 = self.map_list(pattern=pattern, each=False, use_direction="R1")
     r2 = self.map_list(pattern=pattern, each=False, use_direction="R2")
     if not len(r1) == len(r2):
         raise RawDataNotPairedError(
             "The file number of Reads1 and Reads2 is not equal")
     out = np.array([r1, r2]).T.tolist()
     return split_list(out, each) if each else out
Пример #4
0
 def map_list(self, pattern=False, each=False, use_direction="both"):
     out = []
     for fq_path, new_id, direction in self.fq_info:
         ele = pattern.format(sample_id=new_id, direction=direction, direction_num=re.search(
             r"\d$", direction).group()) if pattern else [new_id, direction]
         if direction == "R2" and (use_direction == "both" or use_direction == "R2"):
             out.append(ele)
         elif direction == "R1" and (use_direction == "both" or use_direction == "R1"):
             out.append(ele)
     out.sort()
     return split_list(out, each) if each else out