示例#1
0
from easiparse import importer, extractor, cooccurrences

import logging
logging.basicConfig(level=logging.DEBUG, format="%(levelname)-8s %(message)s")


def get_parser():
    parser = OptionParser()
    parser.add_option("-e", "--execute", dest="execute", help="execution action")
    return parser

if __name__ == "__main__":
    parser = get_parser()
    (options, args) = parser.parse_args()
    print options, args
    config = yaml.load( open( "config.yaml", 'rU' ) )
    

    if options.execute=='import':
        importer.main(config)

    if options.execute=='extract':
        extractor.main(config)

    if options.execute=='cooccurrences':
        cooccurrences.main(config)

    if options.execute=='exportcooc':
        cooccurrences.exportcooc(config)        
示例#2
0
def worker(config, input_path, mongodb, limit=None):
    try:
        isi_file = codecs.open(input_path, "rU", encoding="ascii",\
            errors="replace")
    except Exception, exc:
        logging.error("Error reading file %s"%input_path)
        return

    output_file = codecs.open( join(config['output_path'], split(input_path)[1]),\
        "w+", encoding="ascii", errors="replace")

    subtotal = importer.main(
        isi_file,
        config,
        output_file,
        mongodb,
        limit=limit
    )
    logging.debug("extracted %d matching notices in %s"%(subtotal, isi_file))

if __name__ == "__main__":
    config = yaml.load( open( "config.yaml", 'rU' ) )
    glob_list = glob(config['input_path'])

    mongodb = pymongo.Connection(config['mongo_host'],\
        config['mongo_port'])[config['mongo_db_name']]

    for input_path in glob_list:
        reactor.callInThread(worker, config, input_path, mongodb, limit=None)
        #asyncparser = AsyncParse(config, input_path, mongodb, None)