def getMetamapResults(sentences,metaMapBinDir):

    metaMapExec = metaMapBinDir + 'metamap'
    fileExists = False
    input_file = None
    if sentences is not None:
        input_file = open("temp_input", "w")
    if sentences is not None:
        ids = range(1,len(sentences)+1)
        for identifier, sentence in zip(ids, sentences):
            input_file.write('%r|%r\n' % (identifier, sentence))

        input_file.flush()

    command = [metaMapExec,'--sldiID','--silent','--prune 2']
    command.append(input_file.name)

    #command.append(output_file.name)
# ['/home/shukla/Documents/WMC/backendStuff/MetaMap/public_mm/bin/metamap', '--sldiID', '/tmp/tmp3grliz', '/tmp/tmpoC_O9s']

    metamap_process = subprocess.Popen(command, stdout=subprocess.PIPE)

    while fileExists == False:
        if os.path.isfile("temp_input.out"):
            fileExists = os.stat("temp_input.out").st_size != 0

    findings = phrases("temp_input.out")

    os.remove(input_file.name)
    os.remove("temp_input.out")


    return findings
def getMetamapResults(sentences,metaMapBinDir):

    metaMapExec = metaMapBinDir + 'metamap'
    fileExists = False
    input_file = None

    RANDOM_SUFFIX_RANGE =  1000000
    SUFFIX = random.randint(0, 1000000)

    if sentences is not None:
        input_file = open("/tmp/sunburst_metamap_%d" % SUFFIX, "w")
    if sentences is not None:
        ids = range(1,len(sentences)+1)
        for identifier, sentence in zip(ids, sentences):
            input_file.write('%r|%r\n' % (identifier, sentence))

        input_file.flush()

    command = [metaMapExec,'--sldiID','--silent']
    command.append(input_file.name)

    metamap_process = subprocess.Popen(command, stdout=subprocess.PIPE)

    output_file_name = "/tmp/sunburst_metamap_%d.out" % SUFFIX
    while fileExists == False:
        if os.path.isfile(output_file_name):
            fileExists = os.stat(output_file_name).st_size != 0

    findings = phrases(output_file_name)

    os.remove(input_file.name)
    os.remove(output_file_name)


    return findings
Exemplo n.º 3
0
    top = options.top
    trj = options.traj
    b = options.begin
    e = options.end
    skip = options.skip
    rec_str = options.receptor
    lig_str = options.ligand
    lig_dist_cutoff = options.dist_cutoff
    min_phrase_len = 3

    u = MD.Universe(top, trj)

    receptor = u.select_atoms(rec_str)
    ligand = u.select_atoms(lig_str)

    P = phrases.phrases(u, receptor, ligand, lig_dist_cutoff, min_phrase_len)

    P.find_phrases(b, e, skip)

    with open(options.out + '-phrases.dat', 'wb') as output:
        pickle.dump(P.phrases, output, pickle.HIGHEST_PROTOCOL)

    P.calc_dist()
    np.savez(options.out + "-distance.npz", P.D)
    exit()

elif options.program == "anal-phrases":
    res0 = options.res0
    threshold = options.jac
    if options.dist != None:
        P = phrases.read_phrases(options.phrases, min_len=3, dist=options.dist)
Exemplo n.º 4
0
skip    = options.skip
cutoff  = options.cutoff
rec_str = options.receptor
lig_str = options.ligand
res0    = options.res0
threshold = options.jac
lig_dist_cutoff = options.lig_dist

min_phrase_len = 3

u = MD.Universe(top,trj)

receptor = u.select_atoms(rec_str)
ligand = u.select_atoms(lig_str)

P = phrases.phrases(u,receptor,ligand,lig_dist_cutoff,min_phrase_len)

P.find_phrases(b,e,skip)


with open(options.out+'-phrases.dat', 'wb') as output:
    pickle.dump(P.phrases, output, pickle.HIGHEST_PROTOCOL)

P.calc_dist()
np.savez(options.out+"-distance.npz",P.D)

p = np.linspace(0,100,1002)
perc = np.percentile(P.D,p)

if cutoff==None:
    n_val = 551
with open('sword.set', 'rb') as f:
    sword_list = load(f)

with open('phrase.set','rb') as f:
    phrase_list = load(f)

problem = lil_matrix((5000, 17173))
n = 0

for i, tfile in enumerate(train_files):
    if i < 2500:
        fdir = pos_dir
    else:
        fdir = neg_dir
    with open(fdir+tfile) as f:
        text = f.read()
        tokens = tokenize(text)
        fphrases = phrases(tokens)
    for token in tokens:
        if token in sword_list:
            ind = sword_list.index(token)
            problem[i, ind] = 1
    for p in fphrases:
        if p in phrase_list:
            ind = phrase_list.index(p) + 3111
            problem[i, ind] = 1

with open('problem.matrix', 'wb') as f:
    dump(problem, f)