def parse(line): words = line.split() def expand(start, end, tag): """Yield all trees rooted by tag over words[start:end].""" if end-start == 1: word = words[start] for leaf in lexicon: if leaf.tag == tag and leaf.word == word: yield leaf if tag in grammar: for tags in grammar[tag]: for branches in expand_all(start, end, tags): yield Tree(tag, branches) def expand_all(start, end, tags): """Yield all sequences of branches for tags over words[start:end].""" if len(tags) == 1: for branch in expand(start, end, tags[0]): yield [branch] else: first, rest = tags[0], tags[1:] for middle in range(start+1, end+1-len(rest)): for first_branch in expand(start, middle, first): for rest_branches in expand_all(middle, end, rest): yield [first_branch] + rest_branches for tree in expand(0, len(words), 'S'): print_tree(tree)
def max_parses(line, n=1): words = line.split() @memoize @max_trees(n) def expand(start, end, tag): """Yield all trees rooted by tag over words[start:end].""" if end - start == 1: word = words[start] if tag in tags_for_word(word): yield Leaf(tag, word) if tag in grammar: for tags in grammar[tag]: for branches in expand_all(start, end, tags): yield Tree(tag, branches) def expand_all(start, end, tags): """Yield all sequences of branches for tags over words[start:end].""" if len(tags) == 1: for branch in expand(start, end, tags[0]): yield [branch] else: first, rest = tags[0], tags[1:] for middle in range(start + 1, end + 1 - len(rest)): for first_branch in expand(start, middle, first): for rest_branches in expand_all(middle, end, rest): yield [first_branch] + rest_branches for tree in expand(0, len(words), 'S'): print_tree(tree)
def max_parses(line, n=1): words = line.split() @memoize @max_trees(n) def expand(start, end, tag): """Yield all trees rooted by tag over words[start:end].""" if end-start == 1: word = words[start] if tag in tags_for_word(word): yield Leaf(tag, word) if tag in grammar: for tags in grammar[tag]: for branches in expand_all(start, end, tags): yield Tree(tag, branches) def expand_all(start, end, tags): """Yield all sequences of branches for tags over words[start:end].""" if len(tags) == 1: for branch in expand(start, end, tags[0]): yield [branch] else: first, rest = tags[0], tags[1:] for middle in range(start+1, end+1-len(rest)): for first_branch in expand(start, middle, first): for rest_branches in expand_all(middle, end, rest): yield [first_branch] + rest_branches for tree in expand(0, len(words), 'S'): print_tree(tree)
def parse(line): words = line.split() def expand(start, end, tag): """Yield all trees rooted by tag over words[start:end].""" if end - start == 1: word = words[start] for leaf in lexicon: if leaf.tag == tag and leaf.word == word: yield leaf if tag in grammar: for tags in grammar[tag]: for branches in expand_all(start, end, tags): yield Tree(tag, branches) def expand_all(start, end, tags): """Yield all sequences of branches for tags over words[start:end].""" if len(tags) == 1: for branch in expand(start, end, tags[0]): yield [branch] else: first, rest = tags[0], tags[1:] for middle in range(start + 1, end + 1 - len(rest)): for first_branch in expand(start, middle, first): for rest_branches in expand_all(middle, end, rest): yield [first_branch] + rest_branches for tree in expand(0, len(words), 'S'): print_tree(tree)
if show_hidden_file and len(entry.name) > 1 and entry.name.startswith("."): continue if entry.is_dir(): dir_attr["__children"].extend(make_dir_tree(entry.path)) else: dir_attr["__children"].append({"__id":entry.name}) L.append(dir_attr) return L if __name__ == "__main__": from print_tree import print_tree from sys import argv from argparse import ArgumentParser from argparse import ArgumentDefaultsHelpFormatter ap = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) ap.add_argument("dir", nargs="*", help="1st item.") ap.add_argument("-a", action="store_true", dest="show_hidden_file", help="show hidden files.") opt = ap.parse_args() if len(opt.dir) == 0: top_dir = ["."] else: top_dir = opt.dir try: for d in top_dir: L = make_dir_tree(d) print_tree(L, style=[" ", "|--", "'--", False]) except NotADirectoryError as e: print(f"ERROR: {top_dir} is not a directory.") exit(0)
if v < _sum: lo = lo_iter.next() else: hi = hi_iter.next() #====================================================================== if __name__ == "__main__": from test import test_root from print_tree import print_tree root = test_root() d = locals().copy() l = [ d[attr] for attr in d if callable(d[attr]) and "with_sum" in attr] l.sort() for f in l: print "\n------------------------------------------------------" print f.__doc__ print_tree(root) for _sum in [-1, 1, 2, 3, 4, 4.5, 5, 6, 7, 8, 10, 11, 12, 13, 14, 20]: r = f(root, _sum) if r is not None: x, y = r print "%d+%d=%d" % (x, y, _sum) else: print "no 2 nodes sum to %s" % repr(_sum)
if v == _sum: return lo.value, hi.value if v < _sum: lo = lo_iter.next() else: hi = hi_iter.next() #====================================================================== if __name__ == "__main__": from test import test_root from print_tree import print_tree root = test_root() d = locals().copy() l = [d[attr] for attr in d if callable(d[attr]) and "with_sum" in attr] l.sort() for f in l: print "\n------------------------------------------------------" print f.__doc__ print_tree(root) for _sum in [-1, 1, 2, 3, 4, 4.5, 5, 6, 7, 8, 10, 11, 12, 13, 14, 20]: r = f(root, _sum) if r is not None: x, y = r print "%d+%d=%d" % (x, y, _sum) else: print "no 2 nodes sum to %s" % repr(_sum)
3. 再次重复第1、2步直至剩余一个元素 :param total: :param array: :return: ''' while total > 1: array[1], array[total] = array[total], array[1] # 堆顶和最后一个结点互换 total -= 1 if total == 2 and array[total] >= array[ total - 1]: # 当剩余2个元素,如果最后一个结点比堆顶大,则不再调整 break heap_adjust(total, 1, array) return array if __name__ == '__main__': # 构建待排序元素: # origin = [x * 10 for x in range(1, 10)] # random.shuffle(origin) # origin.insert(0, 0) origin = [0, 20, 10, 40, 70, 50, 60, 90, 30, 80] # 为了能和二叉树编码一致,增加一个无用的占位值0在首位 print(origin) print_tree.print_tree(origin, True) print('=' * 50) total = len(origin) - 1 # 初始待排序元素个数,即n print_tree.print_tree(sort(total, max_heap(total, origin))) print(origin)
def xgboost_train(file, num_class, num_rounds, early_stopping_rounds): # 记录程序运行时间 start_time = time.time() # 读入数据 # train = pd.read_csv('DigitRecognizer/train.csv') # tests = pd.read_csv('DigitRecognizer/test.csv') train = pd.read_csv(file) label_this = train.columns.values.tolist() # 用sklearn.model_selection进行训练数据集划分,这里训练集和交叉验证集比例为8:2,可以自己根据需要设置 train_xy, val = train_test_split(train, test_size=0.3, random_state=1) y = train_xy.Label x = train_xy.drop(['Id', 'Label'], axis=1) val_y = val.Label val_x = val.drop(['Id', 'Label'], axis=1) # xgb矩阵赋值 xgb_val = xgb.DMatrix(val_x, label=val_y) xgb_train = xgb.DMatrix(x, label=y) # xgb_test = xgb.DMatrix(tests) # 先用原样本试一试 xgb_test = xgb.DMatrix(train.drop(['Id', 'Label'], axis=1)) ceate_feature_map(x.columns) params = { 'booster': 'gbtree', 'objective': 'multi:softmax', # 多分类问题 'num_class': num_class, # 类别数,与multisoftmax并用 'gamma': 0.1, # 用于控制是否后剪枝的参数,越大越保守,一般0.1、0.2这样子。 'max_depth': 12, # 构建树的深度,越大越容易过拟合 'lambda:': 2, # 控制模型复杂度的权重值的L2正则化项参数,参数越大,模型越不容易过拟合。 'subsample': 0.7, # 随机采样训练样本 'colsample_bytree': 0.7, # 生成树时进行的列采样 'mid_child_weight': 3, # 这个参数默认是 1,是每个叶子里面 h 的和至少是多少,对正负样本不均衡时的 0-1 分类而言 # 假设 h 在 0.01 附近,min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。 # 这个参数非常影响结果,控制叶子节点中二阶导的和的最小值,该参数值越小,越容易 overfitting。 'silent': 0, # 如同学习率 'eta': 0.007, 'seed': 1000, 'nthread': 0, # cpu 线程数 # 'eval_metric': 'auc' } plst = list(params.items()) # num_rounds = 5000 # 迭代次数 watchlist = [(xgb_train, 'train'), (xgb_val, 'val')] # 训练模型并保存 # early_stopping_rounds 当设置的迭代次数较大时,early_stopping_rounds 可在一定的迭代次数内准确率没有提升就停止训练 save_location = './model/xgb.model' model = xgb.train(plst, xgb_train, num_rounds, watchlist, early_stopping_rounds=early_stopping_rounds) model.save_model(save_location) # 用于存储训练出的模型 print('best best_ntree_limit', model.best_ntree_limit) pt.print_tree(model) [ ft_importance, feat_importances_sorted, machine_importance, machine_importances_sorted ] = get_xgb_feat_importances(model) ft_importance.to_csv('./result/feature_imporatance.csv', index=False) machine_importance.to_csv('./result/process_machine_imporatance.csv', index=False) machine_importances_sorted.to_csv( './result/process_machine_importances_sorted.csv', index=False) preds = model.predict(xgb_test, ntree_limit=model.best_ntree_limit) # np.savetxt('xgb_submission.csv', np.c_[range(1,len(tests)+1), preds], delimiter=',', header='ImageId, Label', comments='', fmt='%d') np.savetxt('./model/xgb_submission.csv', np.c_[range(1, len(train) + 1), preds], delimiter=',', header='ImageId, Label', comments='', fmt='%d') # 输出运行时长 cost_time = time.time() - start_time print('xgboost success!', '\n', 'cost time: ', cost_time, '(s)......') # # 画重要性图和树状图 # xgb.plot_importance(model) # xgb.plot_tree(model, fmap='xgb.fmap') return ft_importance, feat_importances_sorted, machine_importance, machine_importances_sorted """
}, { "ID": "I", "PID": "H" }, { "ID": "B", "PID": "C" }, { "ID": "E", "PID": "A" }, { "ID": "F", "PID": None }, { "ID": "K", "PID": "F" }, { "ID": "D", "PID": "B" }, ] shuffle(L) make_from_bottom_up_tree(L, keys=("ID", "PID")) # print_tree(L, keys=("ID", "__children"))
'VP': [['V', 'NP']], 'RP': [['R', 'NP', 'V']], } def expand(tag): """Yield all trees rooted by tag.""" for leaf in lexicon: if tag == leaf.tag: yield leaf if tag in grammar: for tags in grammar[tag]: for branches in expand_all(tags): yield Tree(tag, branches) def expand_all(tags): """Yield all sequences of branches for a sequence of tags.""" if len(tags) == 1: for branch in expand(tags[0]): yield [branch] else: first, rest = tags[0], tags[1:] for first_branch in expand(first): for rest_branches in expand_all(rest): yield [first_branch] + rest_branches for tree in expand('S'): print_tree(tree)
else: new_node.left = build_bracket_tree(bracket_depth - 1, players) new_node.right = build_bracket_tree(bracket_depth - 1, players) return new_node def bracket_builder(tournament_name, tournament_event, top_players): num_top_players = len(top_players) bracket_size = init_sim_bracket(tournament_name, tournament_event) winners_players = top_players[:num_top_players / 2] winners_players.reverse() losers_players = top_players[num_top_players / 2:] losers_players.reverse() winners_bracket = build_bracket_tree(log(bracket_size / 2, 2), winners_players) losers_bracket = build_bracket_tree(log(bracket_size / 2, 2), losers_players) return (winners_bracket, losers_bracket) if __name__ == "__main__": top_players = get_top_players("get-on-my-level-2016", "melee-singles") winners_bracket, losers_bracket = bracket_builder("get-on-my-level-2016", "melee-singles", top_players) print_tree(winners_bracket) print_tree(losers_bracket)