def compare_betweenPair(num): dbconn = MySQLdb.connect('10.141.221.73', 'root', 'root', 'fdroid') dbcursor = dbconn.cursor() # sql = 'select block_id,block_code from fdroid.cc_block where detection_id=1 and detection_tp = "20150101--20150131"' sql = 'SELECT distinct cc_clonepair.block1_id \ from fdroid.cc_clonepair where \ clonepair_id<%s and cc_clonepair.detection_tp="20150101--20150131"' % num blocksql = "SELECT block_id, block_code FROM fdroid.cc_block \ where detection_id=1 and detection_tp = '20150101--20150131' \ and block_id = %s" dbcursor.execute(sql) SourceCodedistanceDict = OrderedDict() CodeTokendistanceDict = OrderedDict() tem = dbcursor.fetchall() for i in range(len(tem)-1): print i blockid1 = tem[i][0] blockid2 = tem[i+1][0] sql1 = blocksql % blockid1 sql2 = blocksql % blockid2 dbcursor.execute(sql1) code1 = dbcursor.fetchone()[1].replace(r"\n","").replace(r"\t","").replace(r"b'","").replace(r"',","") hash1 = getHash.get_nilsimsa(code1) with open('lexer/tem.code', 'w') as f: f.write(code1) os.popen('java -jar codeLexer.jar lexer/tem.code') token1 = lexerparser.parse() dbcursor.execute(sql2) code2 = dbcursor.fetchone()[1].replace(r"\n","").replace(r"\t","").replace(r"b'","").replace(r"',","") hash2 = getHash.get_nilsimsa(code2) with open('lexer/tem.code', 'w') as f: f.write(code2) os.popen('java -jar codeLexer.jar lexer/tem.code') token2 = lexerparser.parse() dist = getHash.compare_hash(hash1,hash2) if SourceCodedistanceDict.has_key(dist): SourceCodedistanceDict[dist] += 1 else: SourceCodedistanceDict[dist] = 1 dist = getHash.compare_hash(getHash.get_nilsimsa(token1),getHash.get_nilsimsa(token2)) # print code1 # print code2 # print token1,token2 if CodeTokendistanceDict.has_key(dist): CodeTokendistanceDict[dist] += 1 else: CodeTokendistanceDict[dist] = 1 dbcursor.close() dbconn.close() return SourceCodedistanceDict, CodeTokendistanceDict
def token2hash(): dbconn = MySQLdb.connect('10.141.221.73', 'root', 'root', 'fdroid') dbcursor = dbconn.cursor() # sql = 'select block_id,block_code from fdroid.cc_block where detection_id=1 and detection_tp = "20150101--20150131"' sql = 'SELECT cc_clonepair.clonepair_id, cc_clonepair.block1_id, \ cc_clonepair.block2_id from fdroid.cc_clonepair where \ clonepair_id<3000 and cc_clonepair.detection_tp="20150101--20150131"' blocksql = "SELECT block_id, block_code FROM fdroid.cc_block \ where detection_id=1 and detection_tp = '20150101--20150131' \ and block_id = %s" dbcursor.execute(sql) distanceDict = OrderedDict() for i in dbcursor.fetchall(): blockid1 = i[1] blockid2 = i[2] sql1 = blocksql % blockid1 sql2 = blocksql % blockid2 dbcursor.execute(sql1) code1 = dbcursor.fetchone()[1].replace(r"\n", "").replace( r"\t", "").replace(r"b'", "").replace(r"',", "") with open('lexer/tem.code', 'w') as f: f.write(code1) os.popen('java -jar codeLexer.jar lexer/tem.code') token1 = lexerparser.parse() hash1 = getHash.get_nilsimsa(token1) dbcursor.execute(sql2) code2 = dbcursor.fetchone()[1].replace(r"\n", "").replace( r"\t", "").replace(r"b'", "").replace(r"',", "") with open('lexer/tem.code', 'w') as f: f.write(code2) os.popen('java -jar codeLexer.jar lexer/tem.code') token2 = lexerparser.parse() hash2 = getHash.get_nilsimsa(token2) dist = getHash.compare_hash(hash1, hash2) print i[0] if distanceDict.has_key(dist): distanceDict[dist] += 1 else: distanceDict[dist] = 1 dbcursor.close() dbconn.close() return distanceDict
def putSimhash(tp='#1--20171130'): dbconn = MySQLdb.connect('10.141.221.73', 'root', 'root', 'fdroid') dbcursor = dbconn.cursor() sql = "select detection_tp,block_id,block_code from cc_block where \ detection_id=2 and detection_tp='%s'" % tp dbcursor.execute(sql) tem = dbcursor.fetchall()[106666:] # 18513 updateSQL = "update fdroid.cc_block set simhash = '%s' \ where block_id = %s and detection_tp = '%s'" for i in tem: code = i[2].replace(r"\n", "").replace(r"\t", "").replace(r"b'", "").replace(r"',", "") with open('lexer/tem.code', 'w') as f: f.write(code) os.popen('java -jar codeLexer.jar lexer/tem.code') token = lexerparser.parse() hash = getHash.get_nilsimsa(token) dbcursor.execute(updateSQL % (hash, i[1], i[0])) dbconn.commit() print i[1] dbcursor.close() dbconn.close()
def token2md5(codeBody): with open('lexer/tem.code', 'w') as f: f.write(codeBody) os.popen('java -jar codeLexer.jar lexer/tem.code') token = lexerparser.parse() md5 = hashlib.md5() md5.update(token) return md5.hexdigest()
dbconn = MySQLdb.connect('10.141.221.73', 'root', 'root', 'fdroid') dbcursor = dbconn.cursor() dbcursor.execute(querysql) result = dbcursor.fetchall() for release in result[2500:]: dbcursor.execute(sql % (release[0], release[1])) tem = dbcursor.fetchall() havaToChangeTag = 1 for i in tem: print i[0], i[2], i[3] if havaToChangeTag: changeTag(i[1].split('/')[-1], i[2]) havaToChangeTag = 0 code = getCodeFrag('E:/codeclone/' + i[4], i[5], i[6]) with open('lexer/tem1.code', 'w') as f: f.write(code) os.popen('java -jar codeLexer.jar lexer/tem1.code') token = lexerparser.parse() hash = getHash.get_nilsimsa(token) # print code, hash, i[0], i[3], i[2] # '' in sql means ' dbcursor.execute(updateSQL % (code.replace("'", "''").replace( '\\', '\\\\'), hash, i[0], i[3], i[2])) dbconn.commit() havaToChangeTag = 1 dbcursor.close() dbconn.close()
def token2simhash(): token = lexerparser.parse() return getSimHash.get_nilsimsa(token)
def token2sha1(): os.popen('java -jar codeLexer.jar result/1.txt') token = lexerparser.parse() sha1 = hashlib.sha1() sha1.update(token) return sha1.hexdigest()
f.close() for i in range(len(lines) - 1): line1 = lines[i] line2 = lines[i + 1] class1 = line1.split(":")[-1] class2 = line2.split(":")[-1] if class1 == class2: code1 = getCodeFrag("." + line1.split(":")[0], line1.split(":")[1].split(",")[0], line1.split(":")[1].split(",")[1]) code2 = getCodeFrag("." + line2.split(":")[0], line2.split(":")[1].split(",")[0], line2.split(":")[1].split(",")[1]) with open('lexer/tem.code', 'w') as f: f.write(code1) os.popen('java -jar codeLexer.jar lexer/tem.code') token1 = lexerparser.parse() hash1 = getHash.get_nilsimsa(token1) with open('lexer/tem.code', 'w') as f: f.write(code2) os.popen('java -jar codeLexer.jar lexer/tem.code') token2 = lexerparser.parse() hash2 = getHash.get_nilsimsa(token2) print class1, class2, '-------------->', getHash.compare_hash( hash1, hash2) if getHash.compare_hash(hash1, hash2): print code1, '------------------------------------------------' print code2