def getMetamapResults(sentences,metaMapBinDir): metaMapExec = metaMapBinDir + 'metamap' fileExists = False input_file = None if sentences is not None: input_file = open("temp_input", "w") if sentences is not None: ids = range(1,len(sentences)+1) for identifier, sentence in zip(ids, sentences): input_file.write('%r|%r\n' % (identifier, sentence)) input_file.flush() command = [metaMapExec,'--sldiID','--silent','--prune 2'] command.append(input_file.name) #command.append(output_file.name) # ['/home/shukla/Documents/WMC/backendStuff/MetaMap/public_mm/bin/metamap', '--sldiID', '/tmp/tmp3grliz', '/tmp/tmpoC_O9s'] metamap_process = subprocess.Popen(command, stdout=subprocess.PIPE) while fileExists == False: if os.path.isfile("temp_input.out"): fileExists = os.stat("temp_input.out").st_size != 0 findings = phrases("temp_input.out") os.remove(input_file.name) os.remove("temp_input.out") return findings
def getMetamapResults(sentences,metaMapBinDir): metaMapExec = metaMapBinDir + 'metamap' fileExists = False input_file = None RANDOM_SUFFIX_RANGE = 1000000 SUFFIX = random.randint(0, 1000000) if sentences is not None: input_file = open("/tmp/sunburst_metamap_%d" % SUFFIX, "w") if sentences is not None: ids = range(1,len(sentences)+1) for identifier, sentence in zip(ids, sentences): input_file.write('%r|%r\n' % (identifier, sentence)) input_file.flush() command = [metaMapExec,'--sldiID','--silent'] command.append(input_file.name) metamap_process = subprocess.Popen(command, stdout=subprocess.PIPE) output_file_name = "/tmp/sunburst_metamap_%d.out" % SUFFIX while fileExists == False: if os.path.isfile(output_file_name): fileExists = os.stat(output_file_name).st_size != 0 findings = phrases(output_file_name) os.remove(input_file.name) os.remove(output_file_name) return findings
top = options.top trj = options.traj b = options.begin e = options.end skip = options.skip rec_str = options.receptor lig_str = options.ligand lig_dist_cutoff = options.dist_cutoff min_phrase_len = 3 u = MD.Universe(top, trj) receptor = u.select_atoms(rec_str) ligand = u.select_atoms(lig_str) P = phrases.phrases(u, receptor, ligand, lig_dist_cutoff, min_phrase_len) P.find_phrases(b, e, skip) with open(options.out + '-phrases.dat', 'wb') as output: pickle.dump(P.phrases, output, pickle.HIGHEST_PROTOCOL) P.calc_dist() np.savez(options.out + "-distance.npz", P.D) exit() elif options.program == "anal-phrases": res0 = options.res0 threshold = options.jac if options.dist != None: P = phrases.read_phrases(options.phrases, min_len=3, dist=options.dist)
skip = options.skip cutoff = options.cutoff rec_str = options.receptor lig_str = options.ligand res0 = options.res0 threshold = options.jac lig_dist_cutoff = options.lig_dist min_phrase_len = 3 u = MD.Universe(top,trj) receptor = u.select_atoms(rec_str) ligand = u.select_atoms(lig_str) P = phrases.phrases(u,receptor,ligand,lig_dist_cutoff,min_phrase_len) P.find_phrases(b,e,skip) with open(options.out+'-phrases.dat', 'wb') as output: pickle.dump(P.phrases, output, pickle.HIGHEST_PROTOCOL) P.calc_dist() np.savez(options.out+"-distance.npz",P.D) p = np.linspace(0,100,1002) perc = np.percentile(P.D,p) if cutoff==None: n_val = 551
with open('sword.set', 'rb') as f: sword_list = load(f) with open('phrase.set','rb') as f: phrase_list = load(f) problem = lil_matrix((5000, 17173)) n = 0 for i, tfile in enumerate(train_files): if i < 2500: fdir = pos_dir else: fdir = neg_dir with open(fdir+tfile) as f: text = f.read() tokens = tokenize(text) fphrases = phrases(tokens) for token in tokens: if token in sword_list: ind = sword_list.index(token) problem[i, ind] = 1 for p in fphrases: if p in phrase_list: ind = phrase_list.index(p) + 3111 problem[i, ind] = 1 with open('problem.matrix', 'wb') as f: dump(problem, f)