def handle(school):

    name = school[1]
    match = []

    if name.replace(' ', '').isalpha():
        top = 2900110
    else:
        top = 34050

    school_id = [k for k, v in SCHOOL_UNIVERSITY.iteritems() if v == name]

    if not school_id:
        match = [(i, find_lcs_len(v.encode('utf-8'), name.encode('utf-8')))
                 for i, v in SCHOOL_UNIVERSITY.iteritems() if i <= top]
        match = sorted(match, key=lambda x: x[1], reverse=True)[:10]

        print '\n--------%s--------\n' % name
        get = getIndex('\n'.join([
            '选择:\t' + str(match.index(i)) + ' ' + SCHOOL_UNIVERSITY[i[0]]
            for i in match
        ]))
        if get < 10:
            school_id = match[get][0]
        else:
            school_id = 0
        if school_id:
            print '\n\n++++++%s++++++++' % SCHOOL_UNIVERSITY[school_id]

    else:
        school_id = school_id[0]

    name = school[2]
    match = []
    depDict = defaultdict(str)
    dep_id = 0
    if name.replace(' ', ''):
        if school_id and type(
                school_id
        ) is int and school_id in SCHOOL_UNIVERSITY_DEPARTMENT_ID:
            for id in SCHOOL_UNIVERSITY_DEPARTMENT_ID[school_id]:
                depDict[id] = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[id]
        else:
            depDict = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME

        dep_id = []
        for k, v in depDict.iteritems():
            mlen = find_lcs_len(name.encode('utf-8'), v.encode('utf-8'))
            if mlen > 3:
                dep_id.append((k, mlen))

        dep_id.sort(key=lambda x: x[1])

        if not dep_id:
            dep_id = 0
        else:
            dep_id = dep_id[0][0]
            print name, SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[dep_id]

    return [school[0], school_id, dep_id]
def handle(school):

    name = school[1]
    match = []

    if name.replace(' ', '').isalpha():
        top = 2900110
    else:
        top = 34050

    school_id = [k for k, v in SCHOOL_UNIVERSITY.iteritems() if v == name]

    if not school_id:
        match = [(i, find_lcs_len(v.encode('utf-8'), name.encode('utf-8'))) for i, v in SCHOOL_UNIVERSITY.iteritems() if i <= top]
        match = sorted(match, key=lambda x:x[1], reverse=True)[:10]

        print '\n--------%s--------\n'% name
        get = getIndex('\n'.join(['选择:\t'+str(match.index(i))+' '+SCHOOL_UNIVERSITY[i[0]] for i in match]))
        if get < 10:
            school_id = match[get][0]
        else:
            school_id = 0
        if school_id:
            print '\n\n++++++%s++++++++'%SCHOOL_UNIVERSITY[school_id]

    else:
        school_id = school_id[0]

    name = school[2]
    match = []
    depDict = defaultdict(str)
    dep_id = 0
    if name.replace(' ', ''):
        if school_id and type(school_id) is int and school_id in SCHOOL_UNIVERSITY_DEPARTMENT_ID:
            for id in SCHOOL_UNIVERSITY_DEPARTMENT_ID[school_id]:
                depDict[id] = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[id]
        else:
            depDict = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME

        dep_id = []
        for k,v in depDict.iteritems():
            mlen = find_lcs_len(name.encode('utf-8'),v.encode('utf-8'))
            if mlen > 3:
                dep_id.append((k,mlen))

        dep_id.sort(key=lambda x:x[1])

        if not dep_id:
            dep_id = 0
        else:
            dep_id = dep_id[0][0]
            print name,SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[dep_id]

    return [school[0], school_id, dep_id]
示例#3
0
def calculate_similarity(file_name_1, file_name_2):
    seq1 = fetch_machine_instruction_sequence(file_name_1)
    seq2 = fetch_machine_instruction_sequence(file_name_2)
    lcs_len_seq1_seq2 = lcs.find_lcs_len(seq1, seq2)
    similarity = calculate_jaccard_coefficient(lcs_len_seq1_seq2, seq1, seq2)
    return similarity
示例#4
0
f = open('out2.txt', 'w')
err = open('logging2', 'w')
fcount = 0
for pos, i in enumerate(data):

    _name = i[1]
    name = replace_name(_name)
    print name
    if not name: continue
    c = []
    maxlen = 0

    for j, id in _SCHOOL_UNIVERSITY.iteritems():
        if len(set(name) & set(j)) >= 2:
            llen = find_lcs_len(name, j)
            if llen > maxlen:
                c = [j]
                maxlen = llen
            elif llen == maxlen:
                c.append(j)
    ok = False
    if c:
        c.sort(key=len)
        #    print " ".join(c)
        if (maxlen /
                float(len(name))) > 0.6 and maxlen / float(len(c[0])) > 0.6:
            ok = True

    if ok:
        name = i[2].encode('utf-8')
示例#5
0
f = open('out2.txt', 'w')
err = open('logging2','w')
fcount = 0
for pos, i in enumerate(data):

    _name = i[1]
    name = replace_name(_name)
    print name
    if not name: continue
    c = []
    maxlen = 0

    for j, id in _SCHOOL_UNIVERSITY.iteritems():
        if len(set(name)&set(j)) >= 2:
            llen = find_lcs_len(name, j)
            if llen > maxlen:
                c = [j]
                maxlen = llen
            elif llen == maxlen:
                c.append(j)
    ok = False
    if c:
        c.sort(key=len)
        #    print " ".join(c)
        if (maxlen / float(len(name)) ) > 0.6 and maxlen/float(len(c[0])) > 0.6:
            ok = True

    if ok:
        name = i[2].encode('utf-8')
        p = []