Пример #1
0
def start():
    sconf = SparkConf()
    sconf.set('spark.cores.max', 3)
    sc = SparkContext(appName='pysparkdfka', conf=sconf)
    ssc = StreamingContext(sc, 5)
    brokers = "10.12.64.205:9092"
    topic = 'test'
    stream = KafkaUtils.createDirectStream(
        ssc, [topic], kafkaParams={"metadata.broker.list": brokers})
    json = stream.map(lambda v: v[1])
    json.foreachRDD(lambda rdd: rdd.foreach(echo))

    json.pprint()
    ssc.start()
    ssc.awaitTermination()
Пример #2
0
def _cli_eval(config, output_dir, args):
    # Load model config from previous experiment
    with open(os.path.join(output_dir, 'config.yml'), 'r') as f:
        model_config = yaml.load(f)['model']
    model_config.update(config.get('model', {}))
    config['model'] = model_config

    results = evaluate(config, output_dir, n_iter=config.get('eval_iter'))

    # Print and export results
    logging.info('Evaluation results: \n{}'.format(
        pprint(results, indent=2, default=str)))
    with open(os.path.join(output_dir, 'eval.txt'), 'a') as f:
        f.write('Evaluation for {} dataset:\n'.format(config['data']['name']))
        for r, v in results.items():
            f.write('\t{}:\n\t\t{}\n'.format(r, v))
        f.write('\n')