Пример #1
0
def matching(text, command):
    list_ = []  # list can be road or bus stop belong to the command
    if command == 'road':
        path_road = 'D:/Project/Transportation_SMU-NEC_collaboration/Data'
        name_road = 'road_abbrevation_all.csv'
        list_ = load_file(path_road, name_road)
    elif command == 'busstop':
        path_stop = 'D:/Project/Transportation_SMU-NEC_collaboration/Data'
        name_busstop = 'bus_stop_crf.csv'  # delete the header of file bus stop
        list_ = load_file(path_stop, name_busstop)

    list_element = []
    for index in range(0, len(list_)):
        ele = list_[index].lower()
        split_road = ele.split(';')

        for road in split_road:
            if pattern_match(road, text) is True:
                list_element.append(road)
                break

    list_token_element = []
    for element in list_element:
        split_ = element.split()
        for value in split_:
            list_token_element.append(value)
    return list_token_element
def extract_road_busstop_expression(list_line, list_dict):
    y_label = []
    y_reg = []
    list_svc = []
    cnt = 1

    list_write = []
    for line in list_line:
        split_line = line.split('\t')

        index = split_line[0]
        label = split_line[1].strip()
        y_label.append(label)
        svc = split_line[2].strip()
        list_svc.append(svc)
        text = split_line[3].strip().lower()  # this is a text for road or bus stop
        # print index, label, svc

        list_road_match = []
        for index in range(0, len(list_dict)):
            road = list_road[index]
            split_road = road.split(';')
            for token in split_road:
                if pattern_match(token.lower(), text) is True:
                    split_token = token.split()
                    for value in split_token:
                        if value not in list_road_match:
                            list_road_match.append(value.lower())
                    break

        flag = 'FALSE'
        if svc in list_road_match:
            flag = 'TRUE'
            y_reg.append(flag)
        else:
            flag = 'FALSE'
            y_reg.append(flag)

        print '-- finished this line -- %i' % cnt + '\t' + flag
        list_write.append('-- finished this line -- %i' % cnt + '\t' + flag)
        cnt += 1
        break

    # for value in y_reg:
    #     print value

    # for i in range(0, len(y_reg)):
    #     if y_label[i] != y_reg[i]:
    #         print list_svc[i]

    write_file('d:/', 'busstop', list_write)

    print metrics.accuracy_score(y_label, y_reg)
    print metrics.classification_report(y_label, y_reg)
    print metrics.confusion_matrix(y_label, y_reg)
def match_road(string, list_road):
    list_index = []
    
    for index in range(0, len(list_road)):
        road = list_road[index]
        split_road = road.split(';')
        for token in split_road:
            if pattern_match(token, string) is True:
                list_index.append(index)
                break
    return list_index