Пример #1
0
Файл: cava.py Проект: wwcrc/CAVA
else:
    impactdir = None

# Counting and printing out number of records of input file
numOfRecords = core.countRecords(copts.input)
if not copts.stdout: printNumOfRecords(numOfRecords)
if options.args['logfile']:
    logging.info(str(numOfRecords) + ' records to be annotated.')

# Writing header to output file
if options.args['outputformat'] == 'VCF':
    outfile = open(copts.output + '.vcf', 'w')
else:
    outfile = open(copts.output + '.txt', 'w')
header = readHeader(copts.input)
core.writeHeader(options, '\n'.join(header), outfile, copts.stdout)
outfile.close()

# Find break points in the input file
breaks = findFileBreaks(copts.input, copts.threads)

# Initializing annotation processes
threadidx = 0
processes = []
for (startline, endline) in breaks:
    threadidx += 1
    processes.append(
        SingleJob(threadidx, options, copts, startline, endline, genelist,
                  transcriptlist, snplist, impactdir, numOfRecords))

# Running annotation processes
Пример #2
0
def run(copts, version, default_config_file):

    copts.threads = int(copts.threads)
    if copts.threads > 1:
        copts.stdout = False

    # Use default path read from the default_config_path file, if -c is not used
    if copts.conf is None:
        copts.conf = default_config_file

    # Check if input and configuration files exist
    if copts.conf is None:
        print '\nError: no configuration file specified.'
        print 'Please use option -c or add the absolute path to the default_config_path file.\n'
        quit()
    if not os.path.isfile(copts.conf):
        print '\nError: configuration file (' + copts.conf + ') cannot be found.\n'
        quit()
    if not os.path.isfile(copts.input):
        print '\nError: input file (' + copts.input + ') cannot be found.\n'
        quit()

    # Reading options from configuration file
    options = Options(copts.conf)

    # Initializing log file
    if options.args['logfile']:
        logging.basicConfig(filename=copts.output + '.log',
                            filemode='w',
                            format='%(asctime)s %(levelname)s: %(message)s',
                            level=logging.DEBUG)

    # Printing out version information and start time
    if not copts.stdout: starttime = printStartInfo(version)
    if options.args['logfile']:
        logging.info('CAVA ' + version + ' started.')

    # Checking if options specified in the configuration file are correct
    core.checkOptions(options)

    # Printing out configuration, input and output file names
    if not copts.stdout:
        printInputFileNames(copts, options)

    # Reading gene, transcript and snp lists from files
    genelist = core.readSet(options, 'genelist')
    transcriptlist = core.readSet(options, 'transcriptlist')
    snplist = core.readSet(options, 'snplist')

    # Parsing @impactdef string
    if not (options.args['impactdef'] == '.'
            or options.args['impactdef'] == ''):
        impactdir = dict()
        valuev = options.args['impactdef'].split('|')
        for i in range(len(valuev)):
            classv = valuev[i].split(',')
            for c in classv:
                impactdir[c.strip()] = str(i + 1)
    else:
        impactdir = None

    # Counting and printing out number of records of input file
    numOfRecords = core.countRecords(copts.input)
    if not copts.stdout:
        printNumOfRecords(numOfRecords)
    if options.args['logfile']:
        logging.info(str(numOfRecords) + ' records to be annotated.')

    # Writing header to output file
    if options.args['outputformat'] == 'VCF':
        outfile = open(copts.output + '.vcf', 'w')
    else:
        outfile = open(copts.output + '.txt', 'w')
    header = readHeader(copts.input)
    core.writeHeader(options, '\n'.join(header), outfile, copts.stdout)
    outfile.close()

    # Find break points in the input file
    breaks = findFileBreaks(copts.input, copts.threads)

    # Initializing annotation processes
    threadidx = 0
    processes = []
    for (startline, endline) in breaks:
        threadidx += 1
        processes.append(
            SingleJob(threadidx, options, copts, startline, endline, genelist,
                      transcriptlist, snplist, impactdir, numOfRecords))

    # Running annotation processes
    for process in processes:
        process.start()
    for process in processes:
        process.join()

    # Merging tmp files
    if copts.threads > 1:
        mergeTmpFiles(copts.output, options.args['outputformat'],
                      copts.threads)

    # Printing out summary information and end time
    if not copts.stdout:
        printEndInfo(options, copts, starttime)