示例#1
0
f.close()
print "******Written labeled*****"
tmpUnlabeledPool = unlabeledPool.find({"random" : { "$gt": 0, "$lt": 1}})
f = open(os.path.expanduser(pathToEpic + "/data/PoolData/unlabeledPool.txt"),'w')
#f = open("unlabeledPool.txt",'w')
for i in range(1,tmpUnlabeledPool.count()):
	f.write(str(tmpUnlabeledPool[i]))
	f.write("\n")
	print str(i)
	if i% 100 == 0:
		print i

f.close()

print "*****Written unlabeled*****"



tmp_file = open(os.path.expanduser(pathToEpic + '/data/PoolData/labeledPool.conll'))
tmp_file.close()

noise = 0.0
if len(sys.argv) > 3:
	noise = float(sys.argv[3])
print "*****Time to make conll*****"
makeConll(pathToEpic + '/data/PoolData/labeledPool.txt', pathToEpic + '/data/PoolData/labeledPool.conll',noise)

print "poop"

示例#2
0
def relabelBatch(randomIds,noise):
	pathToEpic = os.getcwd()
	pathToEpic = pathToEpic[0:pathToEpic.rfind("epic")+4]
	returnString = "Tmp file: "
	print "Inside moveBatch"
	# Move Batch between databases
	#client = MongoClient('mon-entity-event-r13-2.recfut.com:27016')
	#db = client.rf_entity_curation
	#labeled = db.malware_labeled
	#unlabeled = db.malware_unlabeled
	batch = open(os.path.expanduser(pathToEpic + "/data/PoolData/batch.txt"),'w')
	readlabeled = open(os.path.expanduser(pathToEpic + "/data/PoolData/labeledPool.txt"), 'r')
	lines = readlabeled.readlines()
	readlabeled.close()
	print "Labeled openened for rewriting"
	#print "randomIds "  + str(randomIds)

	################## Batch moved  in database #############
	#for oneId in randomIds:
	#	tmpId = unlabeled.find({"random" : oneId})
	#	labeled.insert(tmpId)
	#	unlabeled.remove({"random" : oneId})
	#	tmpId = labeled.find({"random" : oneId})
	#	batch.write(str(tmpId[0]))
	#	batch.write("\n")

	#print "Starting to remove id from textfile"
	for line in lines:
		idFound = False
		for oneID in randomIds:
			if not (line.find(str(oneID)[0:len(str(oneID))-2])==-1):
				idFound = True
			#print str(idFound)+" " +str(oneID)[0:len(str(oneID))-2] +"\n"+line
		if idFound:
			batch.write(line)
			#print line + " does not include " +oneId
		#print str(idFound)+" " + +"\n"+line
		#returnString += str(idFound) + " " + line + "\n"


	batch.close()


	# Get Conll of the batches and add these to all conll's of labeled pool
	makeConll(pathToEpic + "/data/PoolData/batch.txt", pathToEpic + "/data/PoolData/batchConll.conll", noise)

	labeledOrig = open(os.path.expanduser(pathToEpic + "/data/PoolData/labeledPool.txt"), 'a')
	labeledOrigConll = open(os.path.expanduser(pathToEpic + "/data/PoolData/labeledPool.conll"),'a')

	batch = open(os.path.expanduser(pathToEpic + "/data/PoolData/batch.txt"),'r')
	batchConll = open(os.path.expanduser(pathToEpic + "/data/PoolData/batchConll.conll"),'r')

	labeledOrig.write(batch.read())
	labeledOrigConll.write(batchConll.read())
	labeledOrig.close()
	labeledOrigConll.close()

	batch.close()
	batchConll.close()

	#os.remove(os.path.expanduser("pathToEpic + "/data/batch.txt"))
	#os.remove(os.path.expanduser("pathToEpic + "/data/batchConll.conll"))





	return returnString
示例#3
0
import os
import unicodedata
from makeConllFromDBOutput import makeConll

pathToEpic = os.getcwd()
pathToEpic = pathToEpic[0:pathToEpic.rfind("epic")+4]

positiveFile = pathToEpic + "data/epicEvalutationTestSet/positives.txt"
positiveConll = pathToEpic + "data/epicEvalutationTestSet/positives.conll"
negativeFile = pathToEpic + "data/epicEvalutationTestSet/negatives.txt"
negativeConll = pathToEpic + "data/epicEvalutationTestSet/negatives.conll"
positiveFakeFile = pathToEpic + "data/epicEvalutationTestSet/fakePositives.txt"
positiveFakeConll = pathToEpic + "data/epicEvalutationTestSet/fakePositives.conll"

makeConll(positiveFile, positiveConll,0.0)
makeConll(negativeFile, negativeConll,0.0)
makeConll(positiveFakeFile, positiveFakeConll,0.0)

filenames = [positiveConll, positiveFakeConll, negativeConll]
with open(os.path.expanduser(pathToEpic + 'data/epicEvalutationTestSet.conll'), 'w') as outfile:
    for fname in filenames:
        with open(os.path.expanduser(fname)) as infile:
            outfile.write(infile.read())
示例#4
0
#import pymongo
import sys
import os
#from pymongo import MongoClient
from makeConllFromDBOutput import makeConll
#from getJustSentences import getJustSentences

pathToEpic = os.getcwd()
pathToEpic = pathToEpic[0:pathToEpic.rfind("epic")+4]

client = MongoClient('mon-entity-event-r13-6.recfut.com:27019')
db = client.rf_entity_curation
allMalware = db.malware_negatives

negatives = allMalware.find()
negativeFile = open(os.path.expanduser(pathToEpic+"/data/APInegatives.txt"),'w')

counter = 0
for i in negatives:
	negativeFile.write(str(i)+ "\n")
	print "counter " +str(counter)
	counter += 1


makeConll(pathToEpic+"/data/APInegatives.txt",pathToEpic+"/data/APInegatives.conll",0.0)
f.close()
print "******Written labeled*****"
tmpUnlabeledPool = unlabeledPool.find({"random": {"$gt": 0, "$lt": 1}})
f = open(os.path.expanduser(pathToEpic + "/data/PoolData/unlabeledPool.txt"),
         'w')
#f = open("unlabeledPool.txt",'w')
for i in range(1, tmpUnlabeledPool.count()):
    f.write(str(tmpUnlabeledPool[i]))
    f.write("\n")
    print str(i)
    if i % 100 == 0:
        print i

f.close()

print "*****Written unlabeled*****"

tmp_file = open(
    os.path.expanduser(pathToEpic + '/data/PoolData/labeledPool.conll'))
tmp_file.close()

noise = 0.0
if len(sys.argv) > 3:
    noise = float(sys.argv[3])
print "*****Time to make conll*****"
makeConll(pathToEpic + '/data/PoolData/labeledPool.txt',
          pathToEpic + '/data/PoolData/labeledPool.conll', noise)

print "poop"
示例#6
0
#import pymongo
import sys
import os
#from pymongo import MongoClient
from makeConllFromDBOutput import makeConll
#from getJustSentences import getJustSentences

pathToEpic = os.getcwd()
pathToEpic = pathToEpic[0:pathToEpic.rfind("epic") + 4]

client = MongoClient('mon-entity-event-r13-6.recfut.com:27019')
db = client.rf_entity_curation
allMalware = db.malware_negatives

negatives = allMalware.find()
negativeFile = open(os.path.expanduser(pathToEpic + "/data/APInegatives.txt"),
                    'w')

counter = 0
for i in negatives:
    negativeFile.write(str(i) + "\n")
    print "counter " + str(counter)
    counter += 1

makeConll(pathToEpic + "/data/APInegatives.txt",
          pathToEpic + "/data/APInegatives.conll", 0.0)
示例#7
0
def moveBatch(randomIds, noise):
    pathToEpic = os.getcwd()
    pathToEpic = pathToEpic[0:pathToEpic.rfind("epic") + 4]
    returnString = "Tmp file: "
    print "Inside moveBatch"
    # Move Batch between databases
    #client = MongoClient('mon-entity-event-r13-2.recfut.com:27016')
    #db = client.rf_entity_curation
    #labeled = db.malware_labeled
    #unlabeled = db.malware_unlabeled
    batch = open(os.path.expanduser(pathToEpic + "/data/PoolData/batch.txt"),
                 'w')
    readUnlabeled = open(
        os.path.expanduser(pathToEpic + "/data/PoolData/unlabeledPool.txt"),
        'r')
    lines = readUnlabeled.readlines()
    readUnlabeled.close()
    writeUnlabeled = open(
        os.path.expanduser(pathToEpic + "/data/PoolData/unlabeledPool.txt"),
        'w')
    print "Unlabeled openened for writing"
    #print "randomIds "  + str(randomIds)

    ################## Batch moved  in database #############
    #for oneId in randomIds:
    #	tmpId = unlabeled.find({"random" : oneId})
    #	labeled.insert(tmpId)
    #	unlabeled.remove({"random" : oneId})
    #	tmpId = labeled.find({"random" : oneId})
    #	batch.write(str(tmpId[0]))
    #	batch.write("\n")

    #print "Starting to remove id from textfile"
    for line in lines:
        idFound = False
        for oneID in randomIds:
            if not (line.find(str(oneID)[0:len(str(oneID)) - 2]) == -1):
                idFound = True
            #print str(idFound)+" " +str(oneID)[0:len(str(oneID))-2] +"\n"+line
        if not idFound:
            #print "Write \""+line+"\" to unlabeled"
            writeUnlabeled.write(line)
        else:
            #print "Write \""+line+"\" to batch"
            batch.write(line)
            #print line + " does not include " +oneId
        #print str(idFound)+" " + +"\n"+line
        #returnString += str(idFound) + " " + line + "\n"

    writeUnlabeled.close()
    batch.close()

    # Get Conll of the batches and add these to all conll's of labeled pool
    makeConll(pathToEpic + "/data/PoolData/batch.txt",
              pathToEpic + "/data/PoolData/batchConll.conll", noise)

    labeledOrig = open(
        os.path.expanduser(pathToEpic + "/data/PoolData/labeledPool.txt"), 'a')
    labeledOrigConll = open(
        os.path.expanduser(pathToEpic + "/data/PoolData/labeledPool.conll"),
        'a')

    batch = open(os.path.expanduser(pathToEpic + "/data/PoolData/batch.txt"),
                 'r')
    batchConll = open(
        os.path.expanduser(pathToEpic + "/data/PoolData/batchConll.conll"),
        'r')

    labeledOrig.write(batch.read())
    labeledOrigConll.write(batchConll.read())
    labeledOrig.close()
    labeledOrigConll.close()

    batch.close()
    batchConll.close()

    #os.remove(os.path.expanduser(pathToEpic + "/data/PoolData/batch.txt"))
    #os.remove(os.path.expanduser(pathToEpic + "/data/PoolData/batchConll.conll"))

    return returnString
示例#8
0
import os
import unicodedata
from makeConllFromDBOutput import makeConll

pathToEpic = os.getcwd()
pathToEpic = pathToEpic[0 : pathToEpic.rfind("epic") + 4]

positiveFile = pathToEpic + "data/epicEvalutationTestSet/positives.txt"
positiveConll = pathToEpic + "data/epicEvalutationTestSet/positives.conll"
negativeFile = pathToEpic + "data/epicEvalutationTestSet/negatives.txt"
negativeConll = pathToEpic + "data/epicEvalutationTestSet/negatives.conll"
positiveFakeFile = pathToEpic + "data/epicEvalutationTestSet/fakePositives.txt"
positiveFakeConll = pathToEpic + "data/epicEvalutationTestSet/fakePositives.conll"

makeConll(positiveFile, positiveConll, 0.0)
makeConll(negativeFile, negativeConll, 0.0)
makeConll(positiveFakeFile, positiveFakeConll, 0.0)

filenames = [positiveConll, positiveFakeConll, negativeConll]
with open(os.path.expanduser(pathToEpic + "data/epicEvalutationTestSet.conll"), "w") as outfile:
    for fname in filenames:
        with open(os.path.expanduser(fname)) as infile:
            outfile.write(infile.read())