inputfile="allBLESS-dependencies.json" inputpath=os.path.join(parameters['datadir'],inputfile) print inputpath with open(inputpath,'r') as instream: for line in instream: print line pairs = json.loads(inputpath) cluster0=[] cluster1=[] for (w1,w2,target) in pairs: if target==1: cluster1.append(w2) else: cluster0.append(w2) print len(cluster0), cluster0 print len(cluster1),cluster1 exit() words=["chicken","cricket","jaguar"] pos="N" mythes = Thesaurus("",parameters["simfile"],True,False,parameters["k"],1,1,False) mythes.readsomesims(words) for word in words: mythes.displayneighs((word,pos),100)
kdisplay = 10 print(sys.argv) Thesaurus.byblo = byblo # take command line argument as to whether this is a byblo file or not if metric == "cosine": compress = True else: compress = False mythes = Thesaurus(vectorfilename, simcachefile, simcache, windows, k, adja, adjb, compress) mythes.readvectors() # if simcache: # check=True # else: # for wordA in words: # for wordB in words: # mythes.outputsim(wordA,wordB,metric) (word1, word2) = testpair if simcache == False: mythes.outputsim(word1, word2, metric) mythes.allpairssims(metric) if simcache: mythes.outputsim(word1, word2, metric) for word in words: mythes.displayneighs(word, kdisplay) mythes.analyse()