def processfiles(self): self.logger.info("Process Files Inititated") self.counter = 0 self.targets = [] self.masterdf = pd.DataFrame(columns=['seqid', 'position', 'coverage']) while self.running: currenttime = time.time() #for fastqfile, createtime in tqdm(sorted(self.creates.items(), key=lambda x: x[1])): fastqfilelist = list() for fastqfile, createtime in sorted(self.creates.items(), key=lambda x: x[1]): delaytime = 0 # file created 5 sec ago, so should be complete. For simulations we make the time longer. if (int(createtime) + delaytime < time.time()): self.logger.info(fastqfile) del self.creates[fastqfile] self.counter += 1 fastqfilelist.append(fastqfile) #print (fastqfile,md5Checksum(fastqfile), "\n\n\n\n") targets, self.masterdf = parse_fastq_file(fastqfilelist, self.args, logging, self.masterdf) print(targets) print(self.targets) if len(targets) > len(self.targets): updated_targets = set(targets) - set(self.targets) update_message = "Updating targets with {}".format( nice_join(updated_targets, conjunction="and")) self.logger.info(update_message) if not self.args.simulation: send_message(self.connection, update_message, Severity.WARN) write_new_toml(self.args, targets) self.targets = [] self.targets = targets.copy() if self.masterdf.shape[0] > 0 and self.masterdf.shape[0] == len( self.targets): # Every target is covered at the desired coverage level. self.logger.info( "Every target is covered at at least {}x".format( self.args.depth)) if not self.args.simulation: self.connection.protocol.stop_protocol() send_message( self.connection, "Iter Align has stopped the run as all targets should be covered by at least {}x" .format(self.args.depth), Severity.WARN, ) #parse_fastq_file(fastqfile, self.rundict, self.fastqdict, self.args, self.header, self.MinotourConnection) #self.args.files_processed += 1 if currenttime + 5 > time.time(): time.sleep(5)
metavar="THREADS", help= "Set the number of default threads to use for threaded tasks (default {})" .format(DEFAULT_CORES), default=DEFAULT_CORES, type=int, ), ), ( "--log-level", dict( metavar="LOG-LEVEL", action="store", default="info", choices=LOG_LEVELS, help="One of: {}".format(nice_join(LOG_LEVELS)), ), ), ( "--log-format", dict( metavar="LOG-FORMAT", action="store", default=DEFAULT_LOG_FORMAT, help="A standard Python logging format string (default: {!r})". format(DEFAULT_LOG_FORMAT.replace("%", "%%")), ), ), ( "--log-file", dict(
"--experiment-name", dict( metavar="EXPERIMENT-NAME", type=str, help="Describe the experiment being run, enclose in quotes", required=True, ), ), ( "--read-cache", dict( metavar="READ_CACHE", action="store", default=DEFAULT_READ_CACHE, choices=READ_CACHE, help="One of: {} (default: {})".format(nice_join(READ_CACHE), DEFAULT_READ_CACHE), ), ), ( "--workers", dict( metavar="WORKERS", type=int, help="Number of worker threads (default: {})".format( DEFAULT_WORKERS), default=DEFAULT_WORKERS, ), ), ( "--channels",
def processfiles(self): self.logger.info("Process Files Inititated") self.counter = 1 self.targets = [] self.masterdf = pd.DataFrame(columns=['seqid', 'position', 'coverage']) self.taxid_entries = 0 self.downloaded_set = set() self.length_dict = {} self.coverage_sum = {} if self.args.references: logging.info("References argument provided. Will download references genomes.") self.downloaded_set = set(self.args.references) logging.info(self.downloaded_set) self.url_list = url_list_generation(self.args, self.args.references) self.length_dict.update(download_references(self.args, self.url_list, self.downloaded_set)) generate_mmi(self.args, self.counter) while self.running: currenttime = time.time() # for fastqfile, createtime in tqdm(sorted(self.creates.items(), key=lambda x: x[1])): fastqfilelist = list() for fastqfile, createtime in sorted(self.creates.items(), key=lambda x: x[1]): delaytime = 0 # file created 5 sec ago, so should be complete. For simulations we make the time longer. if (int(createtime) + delaytime < time.time()): self.logger.info(fastqfile) del self.creates[fastqfile] self.counter += 1 fastqfilelist.append(fastqfile) # print (fastqfile,md5Checksum(fastqfile), "\n\n\n\n") # as long as there are files within the args.watch directory to parse if fastqfilelist: print(self.downloaded_set) targets, self.downloaded_set, self.taxid_entries, self.coverage_sum = parse_fastq_file(fastqfilelist, self.args, logging, self.length_dict, self.downloaded_set, self.taxid_entries, self.coverage_sum, self.connection) print(targets) print(self.targets) if len(targets) > len(self.targets): updated_targets = set(targets) - set(self.targets) update_message = "Updating targets with {}".format(nice_join(updated_targets, conjunction="and")) self.logger.info(update_message) if not self.args.simulation: #send_message_port(update_message, self.args.host, self.messageport) send_message(self.connection, update_message, Severity.WARN) write_new_toml(self.args, targets) self.targets = [] self.targets = targets.copy() if self.masterdf.shape[0] > 0 and self.masterdf.shape[0] == len(self.targets): # Every target is covered at the desired coverage level. self.logger.info("Every target is covered at at least {}x".format(self.args.depth)) if not self.args.simulation: self.connection.protocol.stop_protocol() #send_message_port( # "Iter Align has stopped the run as all targets should be covered by at least {}x".format( # self.args.depth), self.args.host, self.messageport) send_message(self.connection, "Iter Align has stopped the run as all targets should be covered by at least {}x".format( self.args.depth), Severity.WARN) # parse_fastq_file(fastqfile, self.rundict, self.fastqdict, self.args, self.header, self.MinotourConnection) # self.args.files_processed += 1 if currenttime + 5 > time.time(): time.sleep(5)
dict( metavar="EXPERIMENT-NAME", type=str, help="Describe the experiment being run, enclose in quotes", required=True, ), ), ( "--read-cache", dict( metavar="READ_CACHE", action="store", default=DEFAULT_READ_CACHE, choices=READ_CACHE, help="One of: {} (default: {})".format( nice_join(READ_CACHE), DEFAULT_READ_CACHE ), ), ), ( "--workers", dict( metavar="WORKERS", type=int, help="Number of worker threads (default: {})".format(DEFAULT_WORKERS), default=DEFAULT_WORKERS, ), ), ( "--channels", dict(