def main(): par = plyj.parser.Parser() corpus_path = "../Java/Test/" blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"] sm = [["levels", 3], ["levels", 2], ["levels", 1], ["levels", "MEMM"], ["cfs", 3], ["cfs", 2], ["cfs", 1]] vp = [3, 2, 1] fill = ["max", "random"] fout = open("results.csv", 'w') for subdir, dirs, files in os.walk(corpus_path): for f in files: clear = True for h in blacklist: if h in f: clear = False if clear: p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) for smod, mp in sm: for v in vp: for fi in fill: for i, cu in cus: cu = copy.deepcopy(cu) ans = LM.getLL(cu, i, smod, mp, v, "pot", fi) print smod, mp, v, "pot", fi for call, ll in ans[:20]: print str(ll) + ': ' + e.nstr(call) if f.endswith(".java"): unk = True else: unk = False fout.write(f[:-5] + ';' + smod + ';' + str(mp) + ';' + str(v) + ';' + fi + ';' + str(unk) + ';' + str(ll) + '\n') fout.close()
def main(): par = plyj.parser.Parser() file_path = "../Java/Corpus/" cul = [] vocab = {} sentlens = {} sf = [] fields = [] ctr = 1 blacklist = [ "5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa" ] for subdir, dirs, files in os.walk(file_path): for f in files: clear = True for h in blacklist: if h in f: clear = False if f.endswith(".java") and clear: p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) #cul.extend(cus) for i, cu in cus: #print cu.getStr() sf2, fi, sents = seq.getSents(cu, i, "levels") sf.extend(sf2) fields.extend(fi) print str(ctr) + ": " + str(len(sents)) ctr += 1 for sent, vl in sents: #print str(len(sf)) + " importables" #print str(len(fields)) + " fields" if not len(sent) in sentlens: sentlens[len(sent)] = 0 sentlens[len(sent)] += 1 if len(sent) > 0: for stat in sent: s = getSig(stat, vl) if not s[0] in vocab: vocab[s[0]] = [] vocab[s[0]].append(s[1:]) #break for s in vocab: print s for sig in resolveSigs(vocab[s]): print '\t' + e.nstr(sig) print len(vocab) print len(set(sf)) print len(set(fields)) print sentlens
def main(): par = plyj.parser.Parser() file_path = "../Java/ParseTests/" unr = [] for subdir, dirs, files in os.walk(file_path): for f in files: print f if f.endswith("test.java"): p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) for i, cu in cus: unr.extend(cu.getUNR()) cu.renameVars() print cu.getStr() for v in cu.dumpVars(): print e.nstr(v) #break #break for s in unr: print e.nstr(s)
def main(): par = plyj.parser.Parser() file_path = "../Java/Corpus/" cul = [] vocab = {} sentlens = {} sf = [] fields = [] ctr = 1 blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"] for subdir, dirs, files in os.walk(file_path): for f in files: clear = True for h in blacklist: if h in f: clear = False if f.endswith(".java") and clear: p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) #cul.extend(cus) for i, cu in cus: #print cu.getStr() sf2, fi, sents = seq.getSents(cu, i, "levels") sf.extend(sf2) fields.extend(fi) print str(ctr) + ": " + str(len(sents)) ctr += 1 for sent, vl in sents: #print str(len(sf)) + " importables" #print str(len(fields)) + " fields" if not len(sent) in sentlens: sentlens[len(sent)] = 0 sentlens[len(sent)] += 1 if len(sent) > 0: for stat in sent: s = getSig(stat, vl) if not s[0] in vocab: vocab[s[0]] = [] vocab[s[0]].append(s[1:]) #break for s in vocab: print s for sig in resolveSigs(vocab[s]): print '\t' + e.nstr(sig) print len(vocab) print len(set(sf)) print len(set(fields)) print sentlens
def main(): par = plyj.parser.Parser() modes = ["cfs", "levels"] if len(sys.argv) > 1: mode = sys.argv[1] else: mode = "levels" if mode not in modes: mode = "levels" corpus_path = "../Java/Corpus/" data_path = "../Data/Raw" #### meth_name = "method_sentences_" + mode + ".txt" var_name = "variable_sentences_" + mode + ".txt" vocab_name = "vocab_" + mode + ".txt" #### meth_file = open(os.path.join(data_path, meth_name), 'w') var_file = open(os.path.join(data_path, var_name), 'w') vocab_file = open(os.path.join(data_path, vocab_name), 'w') #### vocab = {} sf = [] fields = [] ctr = 1 blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"] for subdir, dirs, files in os.walk(corpus_path): for f in files: clear = True for h in blacklist: if h in f: clear = False if f.endswith(".java") and clear: p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) for i, cu in cus: sf2, fi, sents = seq.getSents(cu, i, mode) sf.extend(sf2) fields.extend(fi) print str(ctr) + ": " + str(len(sents)) ctr += 1 for sent, vl in sents: meth_file.write("<S2>\n") meth_file.write("<S1>\n") for stat, ctx in sent: meth_file.write(e.nstr(t.getSig(stat, vl, False)) + ' # ' + e.nstr(ctx) + '\n') s = t.getSig(stat, vl) if not s[0] in vocab: vocab[s[0]] = [] vocab[s[0]].append(s[1:]) meth_file.write('<END>\n') vsents = seq.getVarSents(sents) for vsent in vsents: var_file.write("<S2>\n") var_file.write("<S1>\n") for stat, ctx in vsent: var_file.write(e.nstr(stat) + '\n') var_file.write('<END>\n') #break for s in vocab: vocab_file.write(s + '\n') for sig in t.resolveSigs(vocab[s]): vocab_file.write('\t' + e.nstr(sig) + '\n') meth_file.close() var_file.close() vocab_file.close()
def main(): par = plyj.parser.Parser() modes = ["cfs", "levels"] if len(sys.argv) > 1: mode = sys.argv[1] else: mode = "levels" if mode not in modes: mode = "levels" corpus_path = "../Java/Corpus/" data_path = "../Data/Raw" #### meth_name = "method_sentences_" + mode + ".txt" var_name = "variable_sentences_" + mode + ".txt" vocab_name = "vocab_" + mode + ".txt" #### meth_file = open(os.path.join(data_path, meth_name), 'w') var_file = open(os.path.join(data_path, var_name), 'w') vocab_file = open(os.path.join(data_path, vocab_name), 'w') #### vocab = {} sf = [] fields = [] ctr = 1 blacklist = [ "5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa" ] for subdir, dirs, files in os.walk(corpus_path): for f in files: clear = True for h in blacklist: if h in f: clear = False if f.endswith(".java") and clear: p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) for i, cu in cus: sf2, fi, sents = seq.getSents(cu, i, mode) sf.extend(sf2) fields.extend(fi) print str(ctr) + ": " + str(len(sents)) ctr += 1 for sent, vl in sents: meth_file.write("<S2>\n") meth_file.write("<S1>\n") for stat, ctx in sent: meth_file.write( e.nstr(t.getSig(stat, vl, False)) + ' # ' + e.nstr(ctx) + '\n') s = t.getSig(stat, vl) if not s[0] in vocab: vocab[s[0]] = [] vocab[s[0]].append(s[1:]) meth_file.write('<END>\n') vsents = seq.getVarSents(sents) for vsent in vsents: var_file.write("<S2>\n") var_file.write("<S1>\n") for stat, ctx in vsent: var_file.write(e.nstr(stat) + '\n') var_file.write('<END>\n') #break for s in vocab: vocab_file.write(s + '\n') for sig in t.resolveSigs(vocab[s]): vocab_file.write('\t' + e.nstr(sig) + '\n') meth_file.close() var_file.close() vocab_file.close()