示例#1
0
def search_signatures(indexname, level_filenames, feature_filenames):
    import correlate
    import smooth_avg

    level_signatures = []
    for level_filename, feature_filename in zip(level_filenames,
                                                feature_filenames):
        level_signature = signature_read.read_signature_short(level_filename)
        level_signatures.append(level_signature)

    data = pickle.loads(open(indexname, "r").read())
    search_results = []
    for level_filename, feature_filename in zip(level_filenames,
                                                feature_filenames):
        level_signature = signature_read.read_signature_short(level_filename)
        feature_signature = signature_read.read_signature_byte(
            feature_filename)
        signature = signature_make.signature_make(level_signature,
                                                  feature_signature)
        search_result = signature_make.signature_search_intersect(
            data, signature)
        search_result = [
            (i, compare,
             correlate.pearson(smooth_avg.avg_smooth(level_signature),
                               smooth_avg.avg_smooth(level_signatures[i])))
            for i, compare in search_result
        ]
        #        search_result = [(i, compare) for i, compare in search_result]
        search_results.append(search_result)
    return search_results
def main():
    pattern_length = 100
    pattern_step = 50

    indexname = sys.argv[1]
    filename = sys.argv[2]

    patterns  = pickle.loads(open(indexname, "r").read())

    values = []
    for line in open(filename, "r").readlines():
        value = int(line.strip())
        values.append(value)

    result = []
    queue = deque()
    for i, v in enumerate(values):
        if len(queue) >= pattern_length:
            queue.popleft()
        queue.append(v)

        if len(queue) >= pattern_length and ((i + 1) % pattern_step) == 0:
            rank = []
            for pindex, pattern in enumerate(patterns):
                rv = correlate.pearson(pattern, queue)
                rank.append((rv, pindex))
            rank.sort(key = lambda x: -x[0])
            result.append(str(rank[0][1]))

    print "_".join(result)
示例#3
0
def main():
    pattern_length = 100
    pattern_step = 50

    indexname = sys.argv[1]
    filename = sys.argv[2]

    patterns = pickle.loads(open(indexname, "r").read())

    values = []
    for line in open(filename, "r").readlines():
        value = int(line.strip())
        values.append(value)

    result = []
    queue = deque()
    for i, v in enumerate(values):
        if len(queue) >= pattern_length:
            queue.popleft()
        queue.append(v)

        if len(queue) >= pattern_length and ((i + 1) % pattern_step) == 0:
            rank = []
            for pindex, pattern in enumerate(patterns):
                rv = correlate.pearson(pattern, queue)
                rank.append((rv, pindex))
            rank.sort(key=lambda x: -x[0])
            result.append(str(rank[0][1]))

    print "_".join(result)
def create_graph_correlation(indexfile, filelist):
    index = pickle.loads(open(indexfile, "r").read())
    db, filenames, filevalues = index

    graph = []
    for index1, filename1 in enumerate(filenames):
        edges = []
        for index2, filename2 in enumerate(filenames):
            if filename1 != filename2:
                correlation = correlate.pearson(filevalues[index1], filevalues[index2])
                edges.append((index2, correlation))
        graph.append(edges)

    data = (graph, [filename for filename, filesize in filenames])
    print pickle.dumps(data)
def main():
    listfile = sys.argv[1]
    listfilea = sys.argv[2]
    files = load_listfile(listfile)
    filesa = load_listfile(listfilea)

    data = []
    dataa = []

    for filename in files:
        values = []
        for line in open(filename, "r").readlines():
            vs = line.strip().split()
            value = int(vs[1]) + 2**15
            values.append(value)
        data.append(frozenset(values))

    for filename in filesa:
        values = []
        for line in open(filename, "r").readlines():
            vs = line.strip().split()
            value = int(vs[1]) + 2**15
            values.append(float(value))
        dataa.append(values)

    n = 20
    sarray = [0 for i in xrange(0, n)]
    narray = [0 for i in xrange(0, n)]
    for i in xrange(0, len(data)):
        for j in xrange(i + 1, len(data)):
            similarity_p = correlate.pearson(dataa[i], dataa[j])
            similarity = 2.0 * float(len(data[i].intersection(
                data[j]))) / float(len(data[i]) + len(data[j]))
            index = min(max(int(n * similarity), 0), n - 1)
            if similarity_p > 0.8:
                sarray[index] += 1
            else:
                narray[index] += 1

    index = 0
    for svalue, nvalue in zip(sarray, narray):
        print str(index) + " " + str(svalue) + " " + str(nvalue)
        index += 1
示例#6
0
def main():
    listfile = sys.argv[1]
    listfilea = sys.argv[2]
    files = load_listfile(listfile)
    filesa = load_listfile(listfilea)

    data = []
    dataa = []

    for filename in files:
        values = []
        for line in open(filename, "r").readlines():
            vs = line.strip().split()
            value = int(vs[1]) + 2**15
            values.append(value)
        data.append(frozenset(values))

    for filename in filesa:
        values = []
        for line in open(filename, "r").readlines():
            vs = line.strip().split()
            value = int(vs[1]) + 2**15
            values.append(float(value))
        dataa.append(values)

    n = 20
    sarray = [0 for i in xrange(0, n)]
    narray = [0 for i in xrange(0, n)]
    for i in xrange(0, len(data)):
        for j in xrange(i + 1, len(data)):
            similarity_p = correlate.pearson(dataa[i], dataa[j])
            similarity = 2.0 * float(len(data[i].intersection(data[j]))) / float(len(data[i]) + len(data[j]))
            index = min(max(int(n * similarity), 0), n - 1)
            if similarity_p > 0.8:
                sarray[index] += 1
            else:
                narray[index] += 1

    index = 0
    for svalue, nvalue in zip(sarray, narray):
        print str(index) + " " + str(svalue) + " " + str(nvalue)
        index += 1
def search_signatures(indexname, level_filenames, feature_filenames):
    import correlate
    import smooth_avg

    level_signatures = []
    for level_filename, feature_filename in zip(level_filenames, feature_filenames):
        level_signature = signature_read.read_signature_short(level_filename)
        level_signatures.append(level_signature)

    data = pickle.loads(open(indexname, "r").read())
    search_results = []
    for level_filename, feature_filename in zip(level_filenames, feature_filenames):
        level_signature = signature_read.read_signature_short(level_filename)
        feature_signature = signature_read.read_signature_byte(feature_filename)
        signature = signature_make.signature_make(level_signature, feature_signature)
        search_result = signature_make.signature_search_intersect(data, signature)
        search_result = [(i, compare, correlate.pearson(smooth_avg.avg_smooth(level_signature), smooth_avg.avg_smooth(level_signatures[i]))) for i, compare in search_result]
#        search_result = [(i, compare) for i, compare in search_result]
        search_results.append(search_result)
    return search_results
def main():
    threshold = float(sys.argv[2])

    indexname = sys.argv[1]

    patterns = pickle.loads(open(indexname, "r").read())
    patterns = patterns[::int(sys.argv[3])]

    values = []
    matches = []

    for i in xrange(0, len(patterns)):
        matches.append([i])
        for k in xrange(i + 1, len(patterns)):
            value = correlate.pearson(patterns[i], patterns[k])
            values.append((i, k, value))
            if value > threshold:
                matches[i].append(k)

    firstresult = []
    secondresult = []

    rpatterns = []

    for ps in matches:
        if len(ps) <= 1:
            continue

        plen = int(sys.argv[4])
        presult = []
        for k in xrange(0, plen):
            average = 0
            for x in ps:
                average += patterns[x][k]
            average /= len(ps)
            presult.append(average)
        rpatterns.append(presult)

    for i, k, value in values:
        if value > threshold:
            firstresult.extend(patterns[i])
            secondresult.extend(patterns[k])
            print "(" + str(i) + " x " + str(k) + " = " + str(value) + ")"

    print pickle.dumps(rpatterns)

    ff = open(indexname + ".first", "w")
    sf = open(indexname + ".second", "w")
    rf = open(indexname + ".average", "w")

    for rpattern in rpatterns:
        for rvalue in rpattern:
            rf.write(str(rvalue) + "\n")
    rf.close()

    for value in firstresult[:]:
        ff.write(str(value) + "\n")
    ff.close()

    for value in secondresult[:]:
        sf.write(str(value) + "\n")
    sf.close()
def main():
    threshold = float(sys.argv[2])

    indexname = sys.argv[1]

    patterns = pickle.loads(open(indexname, "r").read())
    patterns = patterns[::int(sys.argv[3])]

    values = []
    matches = []

    for i in xrange(0, len(patterns)):
        matches.append([i])
        for k in xrange(i + 1, len(patterns)):
            value = correlate.pearson(patterns[i], patterns[k])
            values.append((i, k, value))
            if value > threshold:
                matches[i].append(k)

    firstresult = []
    secondresult = []

    rpatterns = []

    for ps in matches:
        if len(ps) <= 1:
            continue

        plen = int(sys.argv[4])
        presult = []
        for k in xrange(0, plen):
            average = 0
            for x in ps:
                average += patterns[x][k]
            average /= len(ps)
            presult.append(average)
        rpatterns.append(presult)

    for i, k, value in values:
        if value > threshold:
            firstresult.extend(patterns[i])
            secondresult.extend(patterns[k])
            print "(" + str(i) + " x " + str(k) + " = " + str(value) + ")"

    print pickle.dumps(rpatterns)

    ff = open(indexname + ".first", "w")
    sf = open(indexname + ".second", "w")
    rf = open(indexname + ".average", "w")

    for rpattern in rpatterns:
        for rvalue in rpattern:
            rf.write(str(rvalue) + "\n")
    rf.close()

    for value in firstresult[:]:
        ff.write(str(value) + "\n")
    ff.close()

    for value in secondresult[:]:
        sf.write(str(value) + "\n")
    sf.close()