def start(): sconf = SparkConf() sconf.set('spark.cores.max', 3) sc = SparkContext(appName='pysparkdfka', conf=sconf) ssc = StreamingContext(sc, 5) brokers = "10.12.64.205:9092" topic = 'test' stream = KafkaUtils.createDirectStream( ssc, [topic], kafkaParams={"metadata.broker.list": brokers}) json = stream.map(lambda v: v[1]) json.foreachRDD(lambda rdd: rdd.foreach(echo)) json.pprint() ssc.start() ssc.awaitTermination()
def _cli_eval(config, output_dir, args): # Load model config from previous experiment with open(os.path.join(output_dir, 'config.yml'), 'r') as f: model_config = yaml.load(f)['model'] model_config.update(config.get('model', {})) config['model'] = model_config results = evaluate(config, output_dir, n_iter=config.get('eval_iter')) # Print and export results logging.info('Evaluation results: \n{}'.format( pprint(results, indent=2, default=str))) with open(os.path.join(output_dir, 'eval.txt'), 'a') as f: f.write('Evaluation for {} dataset:\n'.format(config['data']['name'])) for r, v in results.items(): f.write('\t{}:\n\t\t{}\n'.format(r, v)) f.write('\n')