示例#1
0
def run_files_with_params(files, output_path, params, clusterer=None, min_points=1, retain_ascii_output=True, cleanup_tmp=True):
    if clusterer is None:
        from autorun_mflash import build, cleanup_build, collect_output
        from autorun_mflash import cluster
        clusterer = cluster

    logger = logging.getLogger('FlashAutorunLogger')
    
    now = datetime.datetime.now().strftime('Flash autosort started %Y%m%d-%H%M%S')
    logger.info(now)
    
    # Calculate the number of header lines based on the largest data file.
    
    f_sizes = [os.path.getsize(f) for f in files]
    largest_file_index = f_sizes.index(max(f_sizes))
    lma_pipe, command, any_input = cat_LMA(files[largest_file_index])
    lma_text, err = lma_pipe.communicate(input=any_input)
    isDataLine = r"^.*\*+.*data.*\*+.*" #Search for asterisks, data, and asterisks
    matchDataLine = re.compile(isDataLine, re.IGNORECASE)
    split_lma_text = lma_text.split('\n')
    for lineIdx, line in enumerate(split_lma_text):
        if matchDataLine.search(line):
            params['nhead'] = lineIdx+1
            logger.info("Header is %d lines. This length will be used for all files this run." % (params['nhead'],))
            break
            
    # We could parse for the number of sources from the header, but that is wrong sometimes. 
    # Instead, set the number of points to the total number of lines in the file minus the header.
    # Add 10% to the total. The largest file might actually not be the largest when uncompressed due to variable packing efficiency
    params['n_sources'] = int(1.10*(len(split_lma_text) - params['nhead']))
    logger.info('Calculated max source count for this run: {0}'.format(params['n_sources']))
    del lma_text, split_lma_text
    # lma_pipe.close()
    
    logger.info('%s', params)

    h5_outfiles = []
    for a_file in files:
        try:
            file_base_name = os.path.split(a_file)[-1].replace('.gz', '')
            outfile = os.path.join(output_path, file_base_name+'.flash')
            
            # clusterer should use the name outfile as the base for any, e.g., ASCII data it would like to save
            lmadata, flashes = clusterer(a_file, output_path, outfile, params, logger,
                       min_points=min_points, retain_ascii_output=retain_ascii_output, cleanup_tmp=cleanup_tmp )
                        
            header = ''.join(lmadata.header)
            fl_metadata = FlashMetadata(header)
            outfile_with_extension = outfile + '.h5'
            h5_outfiles.append(outfile_with_extension)
            write_output(outfile_with_extension, flashes, a_file, metadata=fl_metadata)
            
        except:
            logger.error("Did not successfully sort %s \n Error was: %s" % (a_file, sys.exc_info()[1]))
            raise
    # loghandler.doRollover()
    return h5_outfiles
示例#2
0
def sort_file(filename, directory):
    """ Sort one LMA data file into flashes. dir is the directory with the flash program"""
    logger = logging.getLogger('FlashAutorunLogger')
    
    f, command, the_input = cat_LMA(filename)
    
    run_cmd = [os.path.join(directory, flash_prg_name)]
    logger.info( 'Running %s' % (run_cmd,)) #, 'with stdin from ', command 
    
    # comment out stdout=subprocess.PIPE to print stdout to the terminal. when uncommented,
    #   stdout is captured to python, which leads to less noise in the terminal
    p = subprocess.Popen(run_cmd, stdin=f.stdout, stdout=subprocess.PIPE)#, preexec_fn=f.stdin.close)
    
    # The communication step is key to not blocking at completion.
    out, err = p.communicate()#input=the_input) #out, err not connected to pipes, so nothing to capture or print
    
    # print out
    # print 'Errors: ', err
    return out, err
示例#3
0
def sort_file(filename, directory):
    """ Sort one LMA data file into flashes. dir is the directory with the flash program"""
    logger = logging.getLogger('FlashAutorunLogger')

    f, command, the_input = cat_LMA(filename)

    run_cmd = [os.path.join(directory, flash_prg_name)]
    logger.info('Running %s' % (run_cmd, ))  #, 'with stdin from ', command

    # comment out stdout=subprocess.PIPE to print stdout to the terminal. when uncommented,
    #   stdout is captured to python, which leads to less noise in the terminal
    p = subprocess.Popen(run_cmd, stdin=f.stdout,
                         stdout=subprocess.PIPE)  #, preexec_fn=f.stdin.close)

    # The communication step is key to not blocking at completion.
    out, err = p.communicate(
    )  #input=the_input) #out, err not connected to pipes, so nothing to capture or print

    # print out
    # print 'Errors: ', err
    return out, err