Пример #1
0
sc.addPyFile('magichour.zip')
from magichour.api.dist.events.eventEval import event_eval_rdd
from magichour.api.local.util.namedtuples import DistributedLogLine

logLineURI = 'hdfs://namenode/magichour/tbird.500.templateEvalRDD'
rddlogLines = sc.pickleFile(logLineURI)

eventDefURI = 'hdfs://namenode/magichour/tbird.500.eventsRDD'
eventDefs = sc.pickleFile(eventDefURI).collect()
windowSeconds = 500
test = event_eval_rdd(sc, rddlogLines, eventDefs, windowSeconds)

test.collect()
Пример #2
0
sc.addPyFile('magichour.zip')
from magichour.api.dist.events.eventEval import event_eval_rdd
from magichour.api.local.util.namedtuples import DistributedLogLine


logLineURI = 'hdfs://namenode/magichour/tbird.500.templateEvalRDD'
rddlogLines = sc.pickleFile(logLineURI)


eventDefURI = 'hdfs://namenode/magichour/tbird.500.eventsRDD'
eventDefs = sc.pickleFile(eventDefURI).collect()
windowSeconds = 500
test = event_eval_rdd(sc, rddlogLines, eventDefs, windowSeconds)

test.collect()
Пример #3
0
for template in templates:
    template_lookup[template.id] = template.raw_str

print '***********  Word2Vec  ***********'
word2vec_events = event_gen_word2vec(sc, matched_logline_rdd, window_size=60)

for event in word2vec_events:
    print '--------Event %d-----------' % event.id
    for template_id in event.template_ids:
        try:
            print template_lookup[template_id]
        except:
            print 'Unknown Template: ', template_id

# Save the event definitions locally
pickle.dump(word2vec_events, open('word2vec_events.pkl', 'wb'))
pickle.dump(event, open('event.pkl', 'wb'))

##################
##  Event Eval  ##
##################
windowSeconds = 500
found_events = event_eval_rdd(sc, matched_logline_rdd, word2vec_events,
                              windowSeconds)

event_output_URI = 'hdfs:///magichour/events'
found_events.saveAsPickleFile(event_output_URI)
found_events_local = found_events.take(10000)

print found_events_local[:10]
Пример #4
0
for template in templates:
    template_lookup[template.id] = template.raw_str 

print '***********  Word2Vec  ***********'
word2vec_events = event_gen_word2vec(sc, matched_logline_rdd,  window_size=60)

for event in word2vec_events:
    print '--------Event %d-----------'%event.id
    for template_id in event.template_ids:    
        try:
            print template_lookup[template_id]
        except:
            print 'Unknown Template: ', template_id

# Save the event definitions locally
pickle.dump(word2vec_events, open('word2vec_events.pkl', 'wb'))
pickle.dump(event, open('event.pkl', 'wb'))

##################
##  Event Eval  ##
##################
windowSeconds = 500
found_events = event_eval_rdd(sc, matched_logline_rdd, word2vec_events, windowSeconds)

event_output_URI = 'hdfs:///magichour/events'
found_events.saveAsPickleFile(event_output_URI)
found_events_local = found_events.take(10000)


print found_events_local[:10]