Пример #1
0
def compare_betweenPair(num):
    dbconn = MySQLdb.connect('10.141.221.73', 'root', 'root', 'fdroid')
    dbcursor = dbconn.cursor()
    # sql = 'select block_id,block_code from fdroid.cc_block where detection_id=1 and detection_tp = "20150101--20150131"'
    sql = 'SELECT distinct cc_clonepair.block1_id \
    from fdroid.cc_clonepair where \
    clonepair_id<%s and cc_clonepair.detection_tp="20150101--20150131"' % num

    blocksql = "SELECT block_id, block_code FROM fdroid.cc_block \
        where detection_id=1 and detection_tp = '20150101--20150131' \
        and block_id = %s"
    dbcursor.execute(sql)
    SourceCodedistanceDict = OrderedDict()
    CodeTokendistanceDict = OrderedDict()
    tem = dbcursor.fetchall()
    
    for i in range(len(tem)-1):
        print i
        blockid1 = tem[i][0]
        blockid2 = tem[i+1][0]
        sql1 = blocksql % blockid1
        sql2 = blocksql % blockid2

        dbcursor.execute(sql1)
        code1 = dbcursor.fetchone()[1].replace(r"\n","").replace(r"\t","").replace(r"b'","").replace(r"',","")
        hash1 = getHash.get_nilsimsa(code1)
        with open('lexer/tem.code', 'w') as f:
            f.write(code1)
        os.popen('java -jar codeLexer.jar lexer/tem.code')
        token1 = lexerparser.parse()

        dbcursor.execute(sql2)
        code2 = dbcursor.fetchone()[1].replace(r"\n","").replace(r"\t","").replace(r"b'","").replace(r"',","")
        hash2 = getHash.get_nilsimsa(code2)
        with open('lexer/tem.code', 'w') as f:
            f.write(code2)
        os.popen('java -jar codeLexer.jar lexer/tem.code')
        token2 = lexerparser.parse()

        dist = getHash.compare_hash(hash1,hash2)
        if SourceCodedistanceDict.has_key(dist):
            SourceCodedistanceDict[dist] += 1
        else:
            SourceCodedistanceDict[dist] = 1
        
        dist = getHash.compare_hash(getHash.get_nilsimsa(token1),getHash.get_nilsimsa(token2))
        
        # print code1
        # print code2
        # print token1,token2
        
        if CodeTokendistanceDict.has_key(dist):
            CodeTokendistanceDict[dist] += 1
        else:
            CodeTokendistanceDict[dist] = 1

    dbcursor.close()
    dbconn.close()
    return SourceCodedistanceDict, CodeTokendistanceDict
Пример #2
0
def token2hash():
    dbconn = MySQLdb.connect('10.141.221.73', 'root', 'root', 'fdroid')
    dbcursor = dbconn.cursor()
    # sql = 'select block_id,block_code from fdroid.cc_block where detection_id=1 and detection_tp = "20150101--20150131"'
    sql = 'SELECT cc_clonepair.clonepair_id, cc_clonepair.block1_id, \
    cc_clonepair.block2_id from fdroid.cc_clonepair where \
    clonepair_id<3000 and cc_clonepair.detection_tp="20150101--20150131"'

    blocksql = "SELECT block_id, block_code FROM fdroid.cc_block \
        where detection_id=1 and detection_tp = '20150101--20150131' \
        and block_id = %s"

    dbcursor.execute(sql)
    distanceDict = OrderedDict()
    for i in dbcursor.fetchall():
        blockid1 = i[1]
        blockid2 = i[2]
        sql1 = blocksql % blockid1
        sql2 = blocksql % blockid2

        dbcursor.execute(sql1)
        code1 = dbcursor.fetchone()[1].replace(r"\n", "").replace(
            r"\t", "").replace(r"b'", "").replace(r"',", "")
        with open('lexer/tem.code', 'w') as f:
            f.write(code1)
        os.popen('java -jar codeLexer.jar lexer/tem.code')
        token1 = lexerparser.parse()
        hash1 = getHash.get_nilsimsa(token1)

        dbcursor.execute(sql2)
        code2 = dbcursor.fetchone()[1].replace(r"\n", "").replace(
            r"\t", "").replace(r"b'", "").replace(r"',", "")
        with open('lexer/tem.code', 'w') as f:
            f.write(code2)
        os.popen('java -jar codeLexer.jar lexer/tem.code')
        token2 = lexerparser.parse()
        hash2 = getHash.get_nilsimsa(token2)

        dist = getHash.compare_hash(hash1, hash2)

        print i[0]
        if distanceDict.has_key(dist):
            distanceDict[dist] += 1
        else:
            distanceDict[dist] = 1
    dbcursor.close()
    dbconn.close()
    return distanceDict
Пример #3
0
def putSimhash(tp='#1--20171130'):
    dbconn = MySQLdb.connect('10.141.221.73', 'root', 'root', 'fdroid')
    dbcursor = dbconn.cursor()
    sql = "select detection_tp,block_id,block_code from cc_block where \
    detection_id=2 and detection_tp='%s'" % tp
    dbcursor.execute(sql)
    tem = dbcursor.fetchall()[106666:]  # 18513
    updateSQL = "update fdroid.cc_block set simhash = '%s' \
    where block_id = %s and detection_tp = '%s'"

    for i in tem:
        code = i[2].replace(r"\n",
                            "").replace(r"\t",
                                        "").replace(r"b'",
                                                    "").replace(r"',", "")
        with open('lexer/tem.code', 'w') as f:
            f.write(code)
        os.popen('java -jar codeLexer.jar lexer/tem.code')
        token = lexerparser.parse()
        hash = getHash.get_nilsimsa(token)
        dbcursor.execute(updateSQL % (hash, i[1], i[0]))
        dbconn.commit()
        print i[1]

    dbcursor.close()
    dbconn.close()
Пример #4
0
def token2md5(codeBody):
    with open('lexer/tem.code', 'w') as f:
        f.write(codeBody)
    os.popen('java -jar codeLexer.jar lexer/tem.code')
    token = lexerparser.parse()
    md5 = hashlib.md5()
    md5.update(token)
    return md5.hexdigest()
Пример #5
0
    dbconn = MySQLdb.connect('10.141.221.73', 'root', 'root', 'fdroid')
    dbcursor = dbconn.cursor()

    dbcursor.execute(querysql)
    result = dbcursor.fetchall()

    for release in result[2500:]:
        dbcursor.execute(sql % (release[0], release[1]))
        tem = dbcursor.fetchall()
        havaToChangeTag = 1
        for i in tem:
            print i[0], i[2], i[3]
            if havaToChangeTag:
                changeTag(i[1].split('/')[-1], i[2])
                havaToChangeTag = 0
            code = getCodeFrag('E:/codeclone/' + i[4], i[5], i[6])

            with open('lexer/tem1.code', 'w') as f:
                f.write(code)
            os.popen('java -jar codeLexer.jar lexer/tem1.code')
            token = lexerparser.parse()
            hash = getHash.get_nilsimsa(token)
            # print code, hash, i[0], i[3], i[2]
            # '' in sql means '
            dbcursor.execute(updateSQL % (code.replace("'", "''").replace(
                '\\', '\\\\'), hash, i[0], i[3], i[2]))
        dbconn.commit()
        havaToChangeTag = 1
    dbcursor.close()
    dbconn.close()
Пример #6
0
def token2simhash():
    token = lexerparser.parse()
    return getSimHash.get_nilsimsa(token)
Пример #7
0
def token2sha1():
    os.popen('java -jar codeLexer.jar result/1.txt')
    token = lexerparser.parse()
    sha1 = hashlib.sha1()
    sha1.update(token)
    return sha1.hexdigest()
Пример #8
0
    f.close()
    for i in range(len(lines) - 1):
        line1 = lines[i]
        line2 = lines[i + 1]
        class1 = line1.split(":")[-1]
        class2 = line2.split(":")[-1]
        if class1 == class2:
            code1 = getCodeFrag("." + line1.split(":")[0],
                                line1.split(":")[1].split(",")[0],
                                line1.split(":")[1].split(",")[1])
            code2 = getCodeFrag("." + line2.split(":")[0],
                                line2.split(":")[1].split(",")[0],
                                line2.split(":")[1].split(",")[1])

            with open('lexer/tem.code', 'w') as f:
                f.write(code1)
            os.popen('java -jar codeLexer.jar lexer/tem.code')
            token1 = lexerparser.parse()
            hash1 = getHash.get_nilsimsa(token1)

            with open('lexer/tem.code', 'w') as f:
                f.write(code2)
            os.popen('java -jar codeLexer.jar lexer/tem.code')
            token2 = lexerparser.parse()
            hash2 = getHash.get_nilsimsa(token2)
            print class1, class2, '-------------->', getHash.compare_hash(
                hash1, hash2)
            if getHash.compare_hash(hash1, hash2):
                print code1, '------------------------------------------------'
                print code2