def _check_dir(self,env): movs = [(-1, 0), (0, 1), (1, 0), (0, -1)] for mov in movs[:]: if is_in(self.row + mov[0],self.column + mov[1], env): place = env[self.row + mov[0]][self.column + mov[1]] if len(place.objects): if 3 in place.objects: j = 2 while True: if is_in(self.row + mov[0]*j, self.column + mov[1]*j, env): objects = env[self.row + mov[0]*j][self.column + mov[1]*j].objects if len(objects) and 3 in objects: j += 1 continue elif not len(objects): break movs.remove(mov) break else: movs.remove(mov) else: movs.remove(mov) return movs
def _filter_synset(term, not_terms, synsets): for synset, gloss, lemmas in synsets: if ( is_in(term, gloss) or is_in(term, lemmas) ) and not ( is_in(not_terms, gloss) or is_in(not_terms, lemmas) ): yield synset, gloss, lemmas
def load_events_file(self): # The QWidget widget is the base class of all user interface objects in PyQt4. w = QtGui.QWidget() # Set window size. w.resize(320, 240) # Set window title w.setWindowTitle("Hello World!") filename = QtGui.QFileDialog.getOpenFileName( w, 'Open File', '/', "Text Files (*.txt *.csv)") print(filename) try: df = pd.read_csv(str(filename)) if not is_in(REQUIRED_COLS, df.columns): # display error message show_message("Dataframe must contain columns:\n\n{0}".format( ", ".join(REQUIRED_COLS))) else: folder = os_path.dirname(str(filename)) self.events_df = df self.events_df[ "next_img_name"] = folder + self.events_df["next_img_name"] self.images_names = df.next_img_name.unique() self.count = -1 self.update_image(foward=True) except Exception as e: print(e) show_message("Not a valid dataframe")
def _path_to_closest_object(self, env, obj): if self.have_child: checked = [[ True if 3 in env[i][j].objects or 2 in env[i][j].objects else False for j in range(len(env[0])) ] for i in range(len(env))] else: checked = [[ True if 3 in env[i][j].objects else False for j in range(len(env[0])) ] for i in range(len(env))] moves = [(-1, 0), (0, 1), (1, 0), (0, -1)] cola = [[(self.row, self.column)]] while cola: path = cola.pop(0) row, col = path[-1] if len(env[row][col].objects) and env[row][col].objects[0] == obj: return path for direct in moves: new_row = row + direct[0] new_col = col + direct[1] if is_in(new_row, new_col, checked) and not checked[new_row][new_col]: new_path = list(path) new_path.append((new_row, new_col)) cola.append(new_path) checked[row][col] = True
def goal_test(self, state): """Return True if the state is a goal. The default method compares the state to self.goal or checks for state in self.goal if it is a list, as specified in the constructor. Override this method if checking against a single self.goal is not enough.""" if isinstance(self.goal, list): return is_in(state, self.goal) else: return state == self.goal
def goal_test(self, state): """Retornar verdadeiro se o estado for uma meta. O método padrão compara o estado para self.goal ou verifica o estado em self.goal se é uma lista, conforme especificado no construtor.""" if isinstance(self.goal, list): return is_in(state, self.goal) else: return state == self.goal
def _filter(file_path): index, terms = Index(), ['allegori', 'symboli'] for notation, data in parse(file_path): if notation.depth > 1: if is_in(terms, data['desc'].lower()) or \ notation.code[-1] == '0': index.add(notation, data, select=True) continue index.add(notation, data, select=False) return index
def _query_synset(synset_name): synset = wordnet.synset(synset_name) for synset in _get_hypernyms(synset): gloss = word_tokenize(synset.definition().lower()) if not is_in(['informal', 'slang', 'gossip'], gloss): lemmas = [ lemma.replace('_', ' ').lower() for lemma in synset.lemma_names() ] yield synset, gloss, lemmas
def count_prescript(series_two, list_name, group_list_word): """ 计算症状团在药方数据中出现的次数 :param series: :param list_name: :param group_list_word: :return: """ group_list = utils.word_2_num(list_name, group_list_word) series_list,index_list = series_2_list(series_two, list_name) # utils.save_pickle('data.txt',series_list) count_list = [] id_list = [] for group in group_list: count = 0 item_li = [] for i,item in enumerate(series_list): if (utils.is_in(group, item)): # 如果该项包括聚类团 item_li.append(index_list[i]) count += 1 count_list.append(count) id_list.append(item_li) '''对每个症状团对应的药方进行排序''' for list_index,item_li in enumerate(id_list): # query_rows = series_two.query('序号==' + str(li)) if len(item_li)<3: continue query_rows = series_two.loc[series_two['序号'].isin(item_li)] med_all = [] '''汇总该类药方所有药物''' for index,row in query_rows.iterrows(): med = row['处方'].split('、') med_all.extend(med) count_result = Counter(med_all) med_list = [x for x in count_result if count_result[x]>1] # 词频大于1的药物 count_dict = {} for index,row in query_rows.iterrows(): med = row['处方'].split('、') id = row['序号'] count_dict[id] = len([x for x in med if x in med_list]) # 每个方剂包含的重要药物个数 sorted_id = sorted(count_dict.items(),key=lambda x:x[1],reverse = True) #方剂排序 id_list[list_index] = [x[0] for x in sorted_id] data = utils.write_csv(['聚类', '数字', '数量','id'], 'data/count.csv', group_list_word, group_list, count_list,id_list) # 保存的csv无法提取list utils.save_pickle('group_count.pkl',data) return data
def group_clean(pkl_file): """ 把不同成员数的亲友团整理在一起 :param pkl_file: :return: """ group = utils.load_pickle(pkl_file) all_list = [] for i in range(len(group) - 1, 0, -1): item = list(group[i]) member_num = len(item[0]) new_item = copy.deepcopy(item) #用来进行删除操作的复制项 for item_li in item: for li in all_list: if (utils.is_in(item_li, li) and item_li in new_item): new_item.remove(item_li) break all_list.extend(new_item) return all_list
def calculate(series, list_name, group_list_word): """ 统计显示每个药方对应的团 :param series: :param list_name: :param group_list_word: :return: """ group_list = utils.word_2_num(list_name, group_list_word) series_list,index_list = series_2_list(series, list_name) pattern_list = [] for item in series_list: pattern = [] for group in group_list: if (utils.is_in(group, item)): pattern.append(group) pattern_list.append(pattern) series_list = utils.num_2_word(list_name, series_list) pattern_list = utils.num_2_word(list_name, pattern_list) utils.write_csv(['主治', '功能团'], 'data/pattern.csv', series_list, pattern_list)
def _put_trash(self, env, row, column): movs = [(-1, 0), (-1, 1), (0, 1), (1, 1), (1, 0), (1, -1), (0, -1), (-1, -1), (0, 0)] for mov in movs[:]: if is_in(row + mov[0], column + mov[1], env): place = env[row + mov[0]][column + mov[1]] if len(place.objects): movs.remove(mov) else: movs.remove(mov) if not len(movs): return env, False dirt_mov = choice(movs) env[row + dirt_mov[0]][column + dirt_mov[1]].add_object(4) return env, True
def test_configure(): from metrique.utils import configure assert configure() == {} config = dict(debug=100, OK='OK') defaults = dict(debug=False, log2file=False) options = dict(debug=20, log2file=None) # when None, should be ignored config_file = os.path.join(etc, 'test_conf.json') # contents: #{ "file": true # "debug": true, # "log2file": true } # first, only defaults x = configure(defaults=defaults) assert is_in(x, 'debug', False) assert is_in(x, 'log2file', False) # then, where opt is not None, override x = configure(defaults=defaults, options=options) assert is_in(x, 'debug', 20) assert is_in(x, 'log2file', False) # ignored options:None value # update acts as 'template config' in place of {} # but options will override values set already... # so, except that we have a new key, this should # be same as the one above x = configure(update=config, defaults=defaults, options=options) assert is_in(x, 'debug', 20) assert is_in(x, 'log2file', False) # ignored options:None value assert is_in(x, 'OK', 'OK') # only in the template config # first thing loaded is values from disk, then updated # with 'update' config template # since log2file is set in config_file to True, it will # take that value x = configure(config_file=config_file, update=config, defaults=defaults, options=options) assert is_in(x, 'debug', 20) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'OK', 'OK') # only in the template config assert is_in(x, 'file', True) # only in the config_file config # cf is loaded first and update config template applied on top x = configure(config_file=config_file, update=config) assert is_in(x, 'debug', 100) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'OK', 'OK') # only in the template config assert is_in(x, 'file', True) # only in the config_file config # cf is loaded first and update config template applied on top x = configure(config_file=config_file, options=options) assert is_in(x, 'debug', 20) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'file', True) # only in the config_file config # cf is loaded first and where key:values aren't set or set to # None defaults will be applied x = configure(config_file=config_file, defaults=defaults) assert is_in(x, 'debug', True) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'file', True) # only in the config_file config config_file = os.path.join(etc, 'test_conf_nested.json') # Contents are same, but one level nested under key 'metrique' x = configure(config_file=config_file, defaults=defaults, section_key='metrique', section_only=True) assert is_in(x, 'debug', True) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'file', True) # only in the config_file config _x = x.copy() config_file = os.path.join(etc, 'test_conf_nested.json') # Contents are same, but one level nested under key 'metrique' x = configure(config_file=config_file, defaults=defaults, section_key='metrique') assert is_in(x, 'metrique', _x) try: # should fail x = configure(config_file='I_DO_NOT_EXIST') except IOError: pass else: assert False # shouldn't fail, but instead, returns empty dict x = configure(config_file=config_file, section_key='I_DO_NOT_EXIST', section_only=True) assert x == {} for arg in ('update', 'options', 'defaults'): try: x = configure(**{arg: 'I_SHOULD_BE_A_DICT'}) except AttributeError: pass else: assert False
def gt(item): ret = is_in(keywords, item) return 1 if ret else 0
def goal_test(self, state): if isinstance(self.goal, list): return is_in(state, self.goal) else: return state == self.goal
def _is_male(self): if is_in(self.terms, self._males) and not \ is_in(self.terms, TERMS_FEMALE): return True return False
def _get_code(self): if not is_in(self.terms, EXCLUDE_TERMS): if self._is_female(): return CODE_FEMALE elif self._is_male(): return CODE_MALE
def recomment_main(insert): """ 推荐主程序,综合聚类结果和whoosh进行搜索推荐 :param insert: :return: """ # insert = ['带下赤白', '四肢乏力'] #全包含 # insert = ['头痛', '赤白带下', '腹痛', '饮食减少', '经闭', '风寒'] # 团 # insert = ['经闭', '头晕', '食症','恶心','风寒']#不包含 list_name = utils.load_pickle('list_name.txt') series = utils.load_pickle('series.pkl') # 症状series group_list = utils.group_clean2('data/group10.csv.pkl') group_count = clus4.count_prescript(series, list_name, group_list) # 存放索引的文件夹 indexfile = 'index/' # 同义词词典路径 dictfile = 'data/clean2.txt' # 方剂数据集路径 # datasetfile = 'data/symptom_entity.csv' # 创建索引 # who.get_index(datasetfile, indexfile) # 用户查询 # 和查询 my_query = who.add_synonym(dictfile, insert) result = who.get_recommend(indexfile, my_query) num_4_group = 2 num_4_other = 2 if (result): # 如果可以直接匹配 print('可以直接匹配') flag=0 #匹配团标签 #使用症状团进行推理 for index, group in enumerate(group_list): if (utils.is_in(insert,group) and len(insert)!=len(group)): flag=1 ids = group_count.loc[index, 'id'] print('症状团',group) print_info(ids,4) if not flag: #不匹配症状团 print_info(result,1) return 1, result else: # 从文件中初始化对象 word_map = utils.gene_dic_2('data/clean2.txt') # insert标准化为同义词的标准词 insert = [word_map[x] for x in insert] insert = utils.delete_duplicate(insert) # 去重 不考虑重复词权重 group_new = [] # 查找insert包含的团 match_symptom = [] for index, group in enumerate(group_list): if (utils.is_in(group, insert)): match_symptom.extend(group) group_new.append(index) # 团的列表 group_new.sort(reverse=True) other_symptom = list(set(insert)-set(match_symptom)) series_ids = [] # 症状团对应的方剂id group_clean = [] # 对团进行清理,删除重复包含的团 for index in group_new: group = group_count.loc[index, '聚类'] if (utils.is_in2(group, group_clean)): continue group_clean.append(group) ids = group_count.loc[index, 'id'] print('药物团', group) # series_ids.append((group,ids)) for i,id in enumerate(ids): if i>num_4_group-1: break series_ids.append(str(id)) # 存在对应的方剂 if (len(series_ids) > 0): # 使用whoosh搜索剩余症状的方剂 my_query = who.add_synonym2(dictfile, insert) # or查询语句 recommend_result = who.get_recommend(indexfile, my_query) recommend_result = recommend_result[:num_4_other] series_ids.extend(recommend_result) # series_ids = list(set(series_ids)) #顺序可能改变 series_ids = utils.delete_duplicate(series_ids) # 不会改变顺序 print('存在症状团匹配') print_info(series_ids,2) return 2, series_ids # 没有对应的方剂 else: print('没有症状团匹配,or搜索') my_query = who.add_synonym2(dictfile, insert) # or查询语句 result = who.get_recommend(indexfile, my_query) print_info(result,3) return 3, result
def test_configure(): from metrique.utils import configure assert configure() == {} config = dict( debug=100, OK='OK') defaults = dict( debug=False, log2file=False) options = dict( debug=20, log2file=None) # when None, should be ignored config_file = os.path.join(etc, 'test_conf.json') # contents: #{ "file": true # "debug": true, # "log2file": true } # first, only defaults x = configure(defaults=defaults) assert is_in(x, 'debug', False) assert is_in(x, 'log2file', False) # then, where opt is not None, override x = configure(defaults=defaults, options=options) assert is_in(x, 'debug', 20) assert is_in(x, 'log2file', False) # ignored options:None value # update acts as 'template config' in place of {} # but options will override values set already... # so, except that we have a new key, this should # be same as the one above x = configure(update=config, defaults=defaults, options=options) assert is_in(x, 'debug', 20) assert is_in(x, 'log2file', False) # ignored options:None value assert is_in(x, 'OK', 'OK') # only in the template config # first thing loaded is values from disk, then updated # with 'update' config template # since log2file is set in config_file to True, it will # take that value x = configure(config_file=config_file, update=config, defaults=defaults, options=options) assert is_in(x, 'debug', 20) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'OK', 'OK') # only in the template config assert is_in(x, 'file', True) # only in the config_file config # cf is loaded first and update config template applied on top x = configure(config_file=config_file, update=config) assert is_in(x, 'debug', 100) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'OK', 'OK') # only in the template config assert is_in(x, 'file', True) # only in the config_file config # cf is loaded first and update config template applied on top x = configure(config_file=config_file, options=options) assert is_in(x, 'debug', 20) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'file', True) # only in the config_file config # cf is loaded first and where key:values aren't set or set to # None defaults will be applied x = configure(config_file=config_file, defaults=defaults) assert is_in(x, 'debug', True) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'file', True) # only in the config_file config config_file = os.path.join(etc, 'test_conf_nested.json') # Contents are same, but one level nested under key 'metrique' x = configure(config_file=config_file, defaults=defaults, section_key='metrique', section_only=True) assert is_in(x, 'debug', True) assert is_in(x, 'log2file', True) # ignored options:None value assert is_in(x, 'file', True) # only in the config_file config _x = x.copy() config_file = os.path.join(etc, 'test_conf_nested.json') # Contents are same, but one level nested under key 'metrique' x = configure(config_file=config_file, defaults=defaults, section_key='metrique') assert is_in(x, 'metrique', _x) try: # should fail x = configure(config_file='I_DO_NOT_EXIST') except IOError: pass else: assert False # shouldn't fail, but instead, returns empty dict x = configure(config_file=config_file, section_key='I_DO_NOT_EXIST', section_only=True) assert x == {} for arg in ('update', 'options', 'defaults'): try: x = configure(**{arg: 'I_SHOULD_BE_A_DICT'}) except AttributeError: pass else: assert False
def main_(params): # can be called using dictobj.DictionaryObject({'metric': 'euclidean'}) or # str_to_params('--output_folder result3 --metric euclidean --input_folder ../codes_short/ --files_limit 100 --max_features 2000') # for construction of params object list_of_tokens = get_vocab(params.select_top_tokens, 'short_sorted_freq_list.txt') vector_path = join(params.output_folder, 'vectors/vectors.npz') tfidf_path = join(params.output_folder, 'tfidf.npz') distances_path = join(params.output_folder, 'distances.npz') vectors_all_path = os.path.join(params.output_folder, f'vectors_all.npz') vectors_folder = join(params.output_folder) if params.cores_to_use == -1: params.cores_to_use = multiprocessing.cpu_count() true_cores = params.cores_to_use params.cores_to_use = 1 s = params.files_limit_start e = min([ params.files_limit_end, 111000, len(os.listdir(os.path.join(params.input_folder, 'tokenized1'))) ]) params.select_functions_limit = min(params.select_functions_limit, e) q = Queue() if 'vectors' in params.stages_to_run or ( not os.path.exists(vectors_all_path) and is_in( ['tfidf', 'distances', 'clustering'], params.stages_to_run)): count = s q_len = 0 while count < e: if not os.path.exists(vector_path[:-4] + str(count) + '.npz'): q.put(count) q_len += 1 count += params.files_limit_step if q_len > 0: multi_process_run(UserProcessTask(params, list_of_tokens, q), true_cores) bow_matrix, lists, all_ends_raw, gt_values, filenames_list, all_vulnerabilities, all_start_raw, vocab, idf, all_functions_count = load_vectors_iter_folder( e, s, params.files_limit_step, vector_path, params.output_folder, params.security_keywords, indices=params.select_functions_limit) np.savez_compressed(vectors_all_path, bow_matrix=bow_matrix, lists=lists, all_start_ends=all_ends_raw, gt_values=gt_values, filenames_list=filenames_list, all_vulnerabilities=all_vulnerabilities, all_start_raw=all_start_raw, idf=idf, all_functions_count=all_functions_count) upload_to_gcp(params) q.close() if 'tfidf' in params.stages_to_run or ( not os.path.exists(tfidf_path) and is_in(['distances', 'clustering'], params.stages_to_run)): if 'vectors' not in params.stages_to_run: # data = load_vectors(vector_path) # bow_matrix, lists, raw_lists, gt_values, filenames_list,\ # all_vulnerabilities, all_start_raw, vocab = data bow_matrix, lists, all_ends_raw, gt_values, filenames_list, all_vulnerabilities, all_start_raw, vocab, idf, all_functions_count = load_vectors_iter( vectors_folder ) # load_vectors_iter_folder(e,s, params.files_limit_step, vector_path, indices=params.select_functions_limit) # intersting_indices = np.array(list(range(len(lists)))) # if scipy.sparse.issparse(bow_matrix): # matrix = bow_matrix.toarray() if params.vectorizer == 'count' and params.matrix_form == 'tfidf': matrix = count_to_tfidf(bow_matrix, idf, all_functions_count) np.savez_compressed(tfidf_path, matrix=matrix) upload_to_gcp(params) if 'distances' in params.stages_to_run or ( not os.path.exists(distances_path) and is_in(['clustering'], params.stages_to_run)): if 'tfidf' not in params.stages_to_run: matrix = np.load(tfidf_path)['matrix'] distances = pdist(matrix, metric=params.metric) np.savez_compressed(distances_path, distances=distances) upload_to_gcp(params) if 'clustering' in params.stages_to_run: if 'vectors' not in params.stages_to_run and 'tfidf' not in params.stages_to_run: bow_matrix, lists, all_ends_raw, gt_values, filenames_list, all_vulnerabilities, all_start_raw, vocab, idf, all_functions_count = load_vectors_iter( vectors_folder) if 'tfidf' not in params.stages_to_run: matrix = np.load(tfidf_path)['matrix'] if 'distances' not in params.stages_to_run: distances = np.load(distances_path)['distances'] analyze_functions2(distances, matrix, lists, all_ends_raw, params, gt_values, filenames_list, all_vulnerabilities, all_start_raw) upload_to_gcp(params) print('finished') if params.shutdown: subprocess.run('sudo shutdown', shell=True) # sudo shutdown 0 on aws machines
def goal_test(self, state): """Return True if the state is a goal.""" if isinstance(self.goal, list): return is_in(state, self.goal) else: return state == self.goal