def pattern_tokenText_bussvc(token, text, command): list_svc = load_dict('svc') list_patt = pattern_bus_service_ver2(text, list_svc, command) if token in list_patt: return True else: return False
def check_reg(svc, text, list_sv): # if the text contain correct bus service which appear in text, add word "feature_reg" list_pattern_services = pattern_bus_service_ver2(text, list_sv) list_match_services = pattern_bus_service(text, list_sv) list_total = list(set(list_pattern_services) | set(list_match_services)) if svc in list_total: return text + ' feature_reg' else: return text
def extract_svc_expression(list_line, list_sv): y_label = [] y_reg = [] list_svc = [] for line in list_line: split_line = line.split('\t') index = split_line[0] label = split_line[1] svc = split_line[2].strip() # note that svc can be string or int => format svc as string list_svc.append(svc) text = split_line[3].strip() # print index, label, svc y_label.append(split_line[1]) list_pattern_services = pattern_bus_service_ver2(text, list_sv) list_match_services = pattern_bus_service(text, list_sv) list_total = list(set(list_pattern_services) | set(list_match_services)) if svc in list_total: y_reg.append('TRUE') else: y_reg.append('FALSE') for value in y_reg: print value # for i in range(0, len(y_reg)): # if y_label[i] != y_reg[i]: # print list_svc[i] print metrics.accuracy_score(y_label, y_reg) print metrics.classification_report(y_label, y_reg) print metrics.confusion_matrix(y_label, y_reg)