else: impactdir = None # Counting and printing out number of records of input file numOfRecords = core.countRecords(copts.input) if not copts.stdout: printNumOfRecords(numOfRecords) if options.args['logfile']: logging.info(str(numOfRecords) + ' records to be annotated.') # Writing header to output file if options.args['outputformat'] == 'VCF': outfile = open(copts.output + '.vcf', 'w') else: outfile = open(copts.output + '.txt', 'w') header = readHeader(copts.input) core.writeHeader(options, '\n'.join(header), outfile, copts.stdout) outfile.close() # Find break points in the input file breaks = findFileBreaks(copts.input, copts.threads) # Initializing annotation processes threadidx = 0 processes = [] for (startline, endline) in breaks: threadidx += 1 processes.append( SingleJob(threadidx, options, copts, startline, endline, genelist, transcriptlist, snplist, impactdir, numOfRecords)) # Running annotation processes
def run(copts, version, default_config_file): copts.threads = int(copts.threads) if copts.threads > 1: copts.stdout = False # Use default path read from the default_config_path file, if -c is not used if copts.conf is None: copts.conf = default_config_file # Check if input and configuration files exist if copts.conf is None: print '\nError: no configuration file specified.' print 'Please use option -c or add the absolute path to the default_config_path file.\n' quit() if not os.path.isfile(copts.conf): print '\nError: configuration file (' + copts.conf + ') cannot be found.\n' quit() if not os.path.isfile(copts.input): print '\nError: input file (' + copts.input + ') cannot be found.\n' quit() # Reading options from configuration file options = Options(copts.conf) # Initializing log file if options.args['logfile']: logging.basicConfig(filename=copts.output + '.log', filemode='w', format='%(asctime)s %(levelname)s: %(message)s', level=logging.DEBUG) # Printing out version information and start time if not copts.stdout: starttime = printStartInfo(version) if options.args['logfile']: logging.info('CAVA ' + version + ' started.') # Checking if options specified in the configuration file are correct core.checkOptions(options) # Printing out configuration, input and output file names if not copts.stdout: printInputFileNames(copts, options) # Reading gene, transcript and snp lists from files genelist = core.readSet(options, 'genelist') transcriptlist = core.readSet(options, 'transcriptlist') snplist = core.readSet(options, 'snplist') # Parsing @impactdef string if not (options.args['impactdef'] == '.' or options.args['impactdef'] == ''): impactdir = dict() valuev = options.args['impactdef'].split('|') for i in range(len(valuev)): classv = valuev[i].split(',') for c in classv: impactdir[c.strip()] = str(i + 1) else: impactdir = None # Counting and printing out number of records of input file numOfRecords = core.countRecords(copts.input) if not copts.stdout: printNumOfRecords(numOfRecords) if options.args['logfile']: logging.info(str(numOfRecords) + ' records to be annotated.') # Writing header to output file if options.args['outputformat'] == 'VCF': outfile = open(copts.output + '.vcf', 'w') else: outfile = open(copts.output + '.txt', 'w') header = readHeader(copts.input) core.writeHeader(options, '\n'.join(header), outfile, copts.stdout) outfile.close() # Find break points in the input file breaks = findFileBreaks(copts.input, copts.threads) # Initializing annotation processes threadidx = 0 processes = [] for (startline, endline) in breaks: threadidx += 1 processes.append( SingleJob(threadidx, options, copts, startline, endline, genelist, transcriptlist, snplist, impactdir, numOfRecords)) # Running annotation processes for process in processes: process.start() for process in processes: process.join() # Merging tmp files if copts.threads > 1: mergeTmpFiles(copts.output, options.args['outputformat'], copts.threads) # Printing out summary information and end time if not copts.stdout: printEndInfo(options, copts, starttime)