示例#1
0
def main(args):
    argp = _argparse().parse_args(args[1:])

    # Read the data
    data = []
    titles = []
    #gzipFile = gzip.open("data/english-embeddings.turian.txt.gz")

    #for line in gzipFile:
    #    tokens = string.split(line)
    #    titles.append(tokens[0])
    #    data.append([float(f) for f in tokens[1:]])
    #data = numpy.array(data)
    print "Reading Data"    
    lensingJson = featureExtraction.readData('data/fullData.json')
     
    #ExtractBagOfWord features
    print "Extracting Features"
    data = featureExtraction.extBagOfWordFeatures(lensingJson)
    
    for i in range(0,len(lensingJson)):
        titles.append(str(i))
    
    #Call PCA
    #data = PCA(data,30)
    #print "PCA Complete"

    #call bh_tsne and get the results. Zip the titles and results for writing
    result = bh_tsne(data, perplexity=argp.perplexity, theta=argp.theta, 
        verbose=argp.verbose)
    
    #render image
    if argp.render:
        print "Rendering Image"
        import render
        render.render([(title, point[0], point[1]) for title, point in zip(titles, result)], "output/lensing500p30-data.rendered.png", width=3000, height=1800) 
    

    #convert result into json and write it
    if argp.write:
        print "Writing data to file"
        resData = {}
        minx = 0
        maxx = 0
        miny = 0
        maxy = 0
        for (title,result) in zip(titles,[[res[0],res[1]] for res in result]):
            resData[title] = {'x':result[0], 'y':result[1]}
            if minx > result[0]: minx = result[0]
            if maxx < result[0]: maxx = result[0]
            if miny > result[1]: miny = result[1]
            if maxy < result[1]: maxy = result[1]
        
        print "creating json" 
        print len(resData)
        jsonStr = json.dumps(resData)
        print "MinX - %s MaxX - %s MinY - %s MaxY - %s" % (minx, maxx, miny, maxy)
        with open('output/coordinateslensing-full-srl-p40.json','w') as outFile:
            outFile.write("jsonstr = ");
            outFile.write(jsonStr+'\n')
import unicodedata
import sys

def readCoordinateJson(filename):
	with open(filename) as inFile:
		for line in inFile:
			line = line.strip('\n\r')
			if line:
				line = line.split('jsonstr = ')[1]
				jsonObjects = json.loads(line)
	return jsonObjects


coordinateObjs = readCoordinateJson('output/coordinateslensing-full-srl-p40.json')
titles = range(0,len(coordinateObjs))
fullDataObjs = featureExtraction.readData('data/fullData.json')[:len(titles)]

finalData = {}
for (fdObj, title) in izip(fullDataObjs, titles):
	coordinateObj = coordinateObjs[str(title)]
	assert coordinateObj
	for e in fdObj['event']:
		if isinstance(e, basestring):
	            event = e
                    break
        text = fdObj['description']
	coordinateObj['event'] = event
	coordinateObj['text'] = text
	finalData[str(title)] = coordinateObj

with open('output/coordinate-srl-full-p40.json','w') as outFile: