示例#1
0
def convert_ccfx_output(pb, proj, lang, is_new):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    #for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
    filter_path = pb.getFilterOutputPath(proj, lang)
    conv_path = pb.getLineMapPath(proj, lang, is_new)
    ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
    ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
    print "filter_path = " + filter_path
    print "conv_path = " + conv_path
    print "ccfx_i_path = " + ccfx_i_path
    print "ccfx_p_path = " + ccfx_p_path
    for name in os.listdir(filter_path):
        meta = CCFXMetaData(
            ccfx_i_path + name,
            ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
            conv_path + pb.makeLineMapFileName(name), filter_path + name)
        metaDB.addFile(meta)

    print metaDB
    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is False:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if cline.rstrip().startswith('"'):  #filename-->skip the line
                continue

            dstIdx, srcIdx, op, changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx numbers from 1, but pidx is from 0
            pidx2orig[pidx + 1] = input2orig.get(inputIdx, -1)
        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
        lang, is_new, is_tmp=False)
    ccfx_out = RepertoireOutput()
    ccfx_out.loadFromFile(ccfx_out_path)

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        print fileIdx
        print path
        if not metaDB.hasInputPath(path):
            raise Exception(
                "Couldn't find meta information for file: {0}".format(path))
        print ">>>>>>> " + path
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    for cloneIdx, (clone1, clone2) in ccfx_out.getCloneIter():
        op1 = []
        op2 = []
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1 + 1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2 + 1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        for i in range(start1, end1 + 1):
            op = meta1.line2op.get(i, "X")
            op1.append((i, op))

        for i in range(start2, end2 + 1):
            op = meta2.line2op.get(i, "X")
            op2.append((i, op))

        clone1 = (fidx1, start1, end1, op1)
        clone2 = (fidx2, start2, end2, op2)
        if clone1[0] < clone2[0]:
            clone = (clone1, clone2)
        else:
            clone = (clone2, clone1)
        clones[cloneIdx] = clone

    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
def convert_ccfx_output(pb, lang, is_new, debug = False):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
        filter_path = pb.getFilterOutputPath(proj, lang)
        conv_path   = pb.getLineMapPath(proj, lang, is_new)
        ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
        ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
        for name in os.listdir(filter_path):
            meta = CCFXMetaData(
                    ccfx_i_path + name,
                    ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
                    conv_path + pb.makeLineMapFileName(name),
                    filter_path + name)
            metaDB.addFile(meta)

    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is True:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        last_dst = last_src = 0
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if  cline.rstrip().startswith('"'): #filename-->skip the line
                continue

            dstIdx,srcIdx,op,changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
            last_dst = int(dstIdx) + 1
            last_src = int(srcIdx) + 1
        # ccfx cares about the end of file, which isn't represented by our mappings
        input2orig[last_dst] = last_src
        origline2op[last_src] = "NOCHANGE"
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx output has numbers like 0-131, meaning that pidx
            # is meant to be taken from 0
            pidx2orig[pidx] = input2orig.get(inputIdx, -1)
            if debug and input2orig.get(inputIdx, -1) == -1:
                print "failed to translate from pidx to original: {0} -> {1}".format(pidx, inputIdx)
                print "    file: " + meta.ccfxInput

        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
            lang, is_new, is_tmp = False)
    ccfx_out = RepertoireOutput()
    if debug:
        print 'loading from ccfx output file: {0}'.format(ccfx_out_path)
    ccfx_out.loadFromFile(ccfx_out_path)
    if debug:
        print "finished loading ccfx output."

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        if not metaDB.hasInputPath(path):
            raise Exception(
                    "Couldn't find meta information for file: {0}".format(
                        path))
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    # rewrite the line numbers to index into filter_output files
    for clone_idx, clone_pair in ccfx_out.getCloneIter():
        fidx1, start1, end1, op1 = clone_pair.clone1
        fidx2, start2, end2, op2 = clone_pair.clone2
        metric = clone_pair.metric
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1 + 1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2 + 1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        if (start1 == -1 or start2 == -1 or
                end1 == -1 or end2 == -1):
            if debug:
                print 'line translation failed for ' + str(clone_pair)
            # don't even try to translate a clonew with bad indices
            # this usually means we somehow dumped an empty file on
            # ccfx and we can't translate the eof token correctly
            # enabling debug should verify this
            continue


        for i in range(start1, end1 + 1):
            op = meta1.line2op.get(i, "X")
            op1.append(Operation(i,op))

        for i in range(start2, end2 + 1):
            op = meta2.line2op.get(i, "X")
            op2.append(Operation(i,op))


        clone1 = Clone(fidx1, start1, end1, op1)
        clone2 = Clone(fidx2, start2, end2, op2)
        if clone1.fidx < clone2.fidx:
            unsplit_clone = ClonePair(clone1, clone2, metric)
        else:
            unsplit_clone = ClonePair(clone2, clone1, metric)

        # split into hunks, add those hunks into our final output
        clone_pairs = split_clone_into_hunks(unsplit_clone, debug)
        for clone_pair in clone_pairs:
            clones[len(clones)] = clone_pair


    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
def convert_ccfx_output(pb, proj, lang, is_new):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    #for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
    filter_path = pb.getFilterOutputPath(proj, lang)
    conv_path   = pb.getLineMapPath(proj, lang, is_new)
    ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
    ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
    print "filter_path = " + filter_path
    print "conv_path = " + conv_path
    print "ccfx_i_path = " + ccfx_i_path
    print "ccfx_p_path = " + ccfx_p_path
    for name in os.listdir(filter_path):
        meta = CCFXMetaData(
                ccfx_i_path + name,
                ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
                conv_path + pb.makeLineMapFileName(name),
                filter_path + name)
        metaDB.addFile(meta)

    print metaDB
    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is False:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if  cline.rstrip().startswith('"'): #filename-->skip the line
                continue

            dstIdx,srcIdx,op,changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx numbers from 1, but pidx is from 0
            pidx2orig[pidx + 1] = input2orig.get(inputIdx, -1)
        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
            lang, is_new, is_tmp = False)
    ccfx_out = RepertoireOutput()
    ccfx_out.loadFromFile(ccfx_out_path)

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        print fileIdx
        print path
        if not metaDB.hasInputPath(path):
            raise Exception(
                    "Couldn't find meta information for file: {0}".format(
                        path))
        print ">>>>>>> " + path
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    for cloneIdx, (clone1, clone2) in ccfx_out.getCloneIter():
        op1 = []
        op2 = []
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1+1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2+1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        for i in range(start1,end1+1):
            op = meta1.line2op.get(i, "X")
            op1.append((i,op))

        for i in range(start2,end2+1):
            op = meta2.line2op.get(i, "X")
            op2.append((i,op))

        clone1 = (fidx1, start1, end1, op1)
        clone2 = (fidx2, start2, end2, op2)
        if clone1[0] < clone2[0]:
            clone = (clone1, clone2)
        else:
            clone = (clone2, clone1)
        clones[cloneIdx] = clone

    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
示例#4
0
    meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

    start1 = meta1.prepIdx2OrigIdx.get(start1+1, -1)
    end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
    start2 = meta2.prepIdx2OrigIdx.get(start2+1, -1)
    end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

    for i in range(start1,end1+1):
        op = meta1.line2op.get(i, "X")
        op1.append((i,op))

    for i in range(start2,end2+1):
        op = meta2.line2op.get(i, "X")
        op2.append((i,op))

    clone1 = (fidx1, start1, end1, op1)
    clone2 = (fidx2, start2, end2, op2)
    if clone1[0] < clone2[0]:
        clone = (clone1, clone2)
    else:
        clone = (clone2, clone1)
    clones[cloneIdx] = clone

rep_out = RepertoireOutput()
rep_out.loadFromData(files, clones)

# rep_out is the final results, we write out data here
results_file_name = 'inter_group_results.txt'
rep_out.writeToFile(results_file_name)
print "Processing successful!!"