Exemplo n.º 1
0
def test(**kwargs):
	
	# ---------------------- 更新参数 ----------------------
	opt = DefaultConfig()
	opt.update(**kwargs)
	opt.printf()

    # ---------------------- 数据处理 ----------------------

    # 获取数据
	train, test = get_test_data(opt)
	gc.collect()
 #    # 获取样本
	# test_sample = get_sample(train, test, load=True)
	# gc.collect()
 #    # 获取特征
	# test_feat = get_feat(train, test_sample)
	# gc.collect()

	# 保存特征至文件
	# test_feat.to_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_{}.hdf'.format(test.shape[0]), 'w', complib='blosc', complevel=5)
	test_feat = pd.read_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_{}.hdf'.format(test.shape[0]))
	test_feat = get_feat(train, test_feat)
	gc.collect()
	test_feat.to_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_{}_filter.hdf'.format(test.shape[0]), 'w', complib='blosc', complevel=5)

    # ---------------------- 载入模型 ----------------------

	# opt['model_name'] = 'lgb_1_90_all.pkl'
	# gbm0, use_feat0 = load_model(opt)
	opt['model_name'] = 'lgb_2017-09-23#20:14:52_0.58893.pkl'
	gbm1, use_feat1 = load_model(opt)
	# opt['model_name'] = 'lgb_2_300_top15.pkl'
	# gbm2, use_feat2 = load_model(opt)
	# opt['model_name'] = 'lgb_3_300_top10.pkl'
	# gbm3, use_feat3 = load_model(opt)
	# opt['model_name'] = 'lgb_4_300_top5.pkl'
	# gbm4, use_feat4 = load_model(opt)
    
	# ---------------------- 保存预测结果 -------------------

	# test_feat.loc[:, 'pred'] = gbm0.predict(test_feat[use_feat0])
	# gc.collect()
	# res = test_feat[['orderid', 'geohashed_end_loc', 'pred']].sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(25)
	# res[['orderid', 'geohashed_end_loc']].to_hdf('/home/xuwenchao/dyj-storage/sample_25_{}_filter_leak_sample.hdf'.format(test.shape[0]), 'w', complib='blosc', complevel=5)
	# gc.collect()

	# test_feat.loc[:, 'pred'] = gbm1.predict(test_feat[use_feat1])
	# test_feat[['orderid', 'geohashed_end_loc', 'pred']].to_hdf('/home/xuwenchao/dyj-storage/pred/pred_{}_0.58820.hdf'.format(test.shape[0]), 'w', complib='blosc', complevel=5)

	res = predict(test_feat, use_feat1, gbm1)
	test_feat[['orderid', 'geohashed_end_loc', 'pred']].to_hdf('/home/xuwenchao/dyj-storage/pred/pred_{}_0.58893.hdf'.format(test.shape[0]), 'w', complib='blosc', complevel=5)
	gc.collect()
	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	res_path = '{}/day{}_{}_wc_sample_0.58893.csv'.format(opt['result_dir'], opt['test_startday'], cur_time)
	res.to_csv(res_path, index=False)
	print('保存测试结果至:', res_path)
Exemplo n.º 2
0
def val(**kwargs):

	# ---------------------- 更新参数 ----------------------
	opt = DefaultConfig()
	opt.update(**kwargs)
	opt.printf()

	# ---------------------- 数据处理 ----------------------

	# 获取数据
	# train1, train2, train_test = get_train_data(opt)
	# 获取样本
	# train_sample = get_sample(train1, train2, load=True)
	# 获取特征
	# train_feat = get_feat(train_test, train_sample)
	# gc.collect()
    
	# train_feat.to_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_{}.hdf'.format(opt['startday']), 'w', complib='blosc', complevel=5)
	train_feat = pd.read_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf')

    # ---------------------- 载入模型 ----------------------
	
	# opt['model_name'] = 'lgb_1_90_all.pkl'
	# gbm0, use_feat0 = load_model(opt)
	opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl'
	gbm, use_feat = load_model(opt)
	opt['model_name'] = 'lgb_2017-09-23#20:14:52_0.58893.pkl'
	gbm1, use_feat1 = load_model(opt)
	# gbm2, use_feat2 = load_model(opt)
	# opt['model_name'] = 'lgb_2017-09-03#23:24:26_0.57836.pkl'
	# gbm3, use_feat3 = load_model(opt)
	# opt['model_name'] = ''
	# gbm4, use_feat4 = load_model(opt)

	# ---------------------- 评估 -------------------------

	train_feat.loc[:, 'pred'] = gbm.predict(train_feat[use_feat])
	gc.collect()
	train_feat[['orderid', 'geohashed_end_loc', 'pred']].to_csv('/home/xuwenchao/dyj-storage/pred/pred_23_24_0.58820.csv', index=None)
	train_feat.loc[:, 'pred'] = gbm1.predict(train_feat[use_feat1])
	gc.collect()
	train_feat[['orderid', 'geohashed_end_loc', 'pred']].to_csv('/home/xuwenchao/dyj-storage/pred/pred_23_24_0.58893.csv', index=None)
Exemplo n.º 3
0
def train(**kwargs):

	# ---------------------- 更新参数 ----------------------
	opt = DefaultConfig()
	opt.update(**kwargs)
	opt.printf()

	# ---------------------- 数据处理 ----------------------

	# 获取数据
	# train1, train2 = get_train_data(opt)
	# 获取样本
	# train_sample = get_sample(train1, train2, load=True)
	# 获取特征
	# train_feat = get_feat(train1, train_sample)
	# 获取标签
	# train_all = get_label(train_feat, opt)
	# gc.collect()

	# train_all.to_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf', 'w', complib='blosc', complevel=5)
	train_all = pd.read_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf')
	print(train_all.shape)
    
	# 取出需要用的特征
	# opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl'
	# gbm, use_feat = load_model(opt)
	# predictors_100 = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()})
	# predictors_100 = predictors_100.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100]
	# use_feat = list(predictors_100) + ['orderid', 'geohashed_end_loc', 'label'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
	# train_all = train_all[use_feat]
	# gc.collect()
    
	# -------------------- 训练第一层 ------------------------

	# ********* 准备数据 **********
	# 划分验证集
	train, val = train_test_split(train_all, test_size=0.1)
	# 定义使用哪些特征
	# opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl'
	# gbm, use_feat = load_model(opt)
	filters = set(['orderid', 'userid', 'biketype', 'geohashed_start_loc', 'bikeid', 'starttime', 'geohashed_end_loc', 'label'])
	predictors = list(filter(lambda x: x not in filters, train_all.columns.tolist()))
	# predictors = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()})
	# predictors = predictors.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100]
	# use_feat = list(predictors) + ['orderid', 'geohashed_end_loc'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
	# predictors = list(predictors_100) + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
	print('使用的特征:{}维\n'.format(len(predictors)), predictors)
	# 定义数据集
	X_train = train[predictors]
	y_train = train['label']
	X_val = val[predictors]
	y_val = val['label']
	del train, val
	gc.collect()

	# ********* LightGBM *********
	# 数据集
	lgb_train = lgb.Dataset(X_train, y_train)
	lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)
	# 配置
	params = {
	    'objective': 'binary',
	    'metric': {'auc', 'binary_logloss'},
	    'is_unbalance': True,
	    'num_leaves': opt['lgb_leaves'],
	    'learning_rate': opt['lgb_lr'],
	    'feature_fraction': 0.886,
	    'bagging_fraction': 0.886,
	    'bagging_freq': 5
	}
	gc.collect()    
	# ********** 开始训练 *********
	gbm1 = lgb.train(
				params,
                lgb_train,
                num_boost_round=1200,
                valid_sets=[lgb_train, lgb_val],
                early_stopping_rounds=5
	)
	gc.collect()
    
	# #  ********* 保存模型 *********

	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	# save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb', cur_time, score[0])
	save_path = '{}/{}_{}.pkl'.format(opt['model_dir'], 'lgb', cur_time)
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm1, fout)
	print('保存模型:', save_path)
	gc.collect()    

	# #  ********* 评估  *********

	# # 在训练集上看效果
	del X_train, y_train, X_val, y_val
	gc.collect()    
	score = get_score(train_all, predictors, gbm1, opt)
	print('训练集分数:{}'.format(score))
    
	import sys
	sys.exit(0)
    
	# save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_1_300_top25')
	# with open(save_path, 'wb') as fout:
	#     pickle.dump(gbm1, fout)
	# print('保存模型(第一层):', save_path)

	# ********* save predict *****

	# train_all[['orderid', 'geohashed_end_loc', 'pred']].to_hdf('/home/xuwenchao/dyj-storage/train2324_80_pred_res.hdf', 'w', complib='blosc', complevel=5)
	# print('Save train_pred_res.hdf successful!!!')

	# import sys
	# sys.exit(0)
    
	# -------------------- 训练第二层 ------------------------
    
	# opt['model_name'] = 'lgb_1_300_top25.pkl'
	# gbm1, use_feat1 = load_model(opt)
	# train_all.loc[:, 'pred'] = gbm1.predict(train_all[use_feat1])

	# 去掉重要性较低的特征,筛选出排名前十的候选样本,重新训练模型(后期可以载入模型finetune,尤其是对于样本量较少的情况,甚至可以选前5,但15可以覆盖99.5%的原始label,10可以覆盖98%的原始label,这两者可能会好一些,备选方案:5(+finetune),10(+finetune),15(+finetune))
	predictors = pd.DataFrame(data={'feature_name': gbm1.feature_name(), 'feature_importance': gbm1.feature_importance()})
	predictors = predictors[predictors['feature_importance']>0]['feature_name'].values
	print('第二层使用的特征:{}维\n'.format(len(predictors)), predictors)
	train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(15)
	# train_all = rank(train_all, 'orderid', 'pred', ascending=False)
	del train_all['pred']
	print('第二层数据:', train_all.shape)
    
	# ********* 准备数据 **********
	# 划分验证集
	train, val = train_test_split(train_all, test_size=0.1)
    
	# 定义数据集
	X_train = train[predictors]
	y_train = train['label']
	X_val = val[predictors]
	y_val = val['label']
	del train, val
	gc.collect()

	# 数据集
	lgb_train = lgb.Dataset(X_train, y_train)
	lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

	# ********** 开始训练 *********
	gbm2 = lgb.train(
				params,
                lgb_train,
                num_boost_round=1200,
                valid_sets=[lgb_train, lgb_val],
                early_stopping_rounds=5
                # init_model=gbm1 # finetune
	        )

	#  ********* 评估  *********

	# 在训练集上看效果    
	score = get_score(train_all, predictors, gbm2, opt)
	print('训练集分数(第二层):{}'.format(score))

	#  ********* 保存模型 *********

	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_2', cur_time, score[0])
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm2, fout)
	print('保存模型(第二层):', save_path)
	# save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_2_300_top15')
	# with open(save_path, 'wb') as fout:
	#     pickle.dump(gbm2, fout)
	# print('保存模型(第二层):', save_path)
    
	import sys
	sys.exit(0)
    
	# -------------------- 训练第三层 ------------------------
    
	# 筛选出排名前五的候选样本
	predictors = pd.DataFrame(data={'feature_name': gbm2.feature_name(), 'feature_importance': gbm2.feature_importance()})
	predictors = predictors[predictors['feature_importance']>0]['feature_name'].values
	print('第三层使用的特征:{}维\n'.format(len(predictors)), predictors)
	train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(10)
	# train_all = rank(train_all, 'orderid', 'pred', ascending=False)
	del train_all['pred']
	print('第三层数据:', train_all.shape)
    
	# ********* 准备数据 **********
	# 划分验证集
	train, val = train_test_split(train_all, test_size=0.1)
    
	# 定义数据集
	X_train = train[predictors]
	y_train = train['label']
	X_val = val[predictors]
	y_val = val['label']
	del train, val
	gc.collect()

	# 数据集
	lgb_train = lgb.Dataset(X_train, y_train)
	lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

	# ********** 开始训练 *********
	gbm3 = lgb.train(
				params,
                lgb_train,
                num_boost_round=1200,
                valid_sets=[lgb_train, lgb_val],
                early_stopping_rounds=5
                # init_model=gbm2 # finetune
	        )

	#  ********* 评估  *********

	# 在训练集上看效果    
	score = get_score(train_all, predictors, gbm3, opt)
	print('训练集分数(第三层):{}'.format(score))

	#  ********* 保存模型 *********

	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_3', cur_time, score[0])
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm3, fout)
	print('保存模型(第三层):', save_path)
	save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_3_300_top10')
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm3, fout) 
	print('保存模型(第三层):', save_path)

    
	# -------------------- 训练第四层 ------------------------
    
	# 筛选出排名前三的候选样本
	predictors = pd.DataFrame(data={'feature_name': gbm3.feature_name(), 'feature_importance': gbm3.feature_importance()})
	predictors = predictors[predictors['feature_importance']>0]['feature_name'].values
	print('第四层使用的特征:{}维\n'.format(len(predictors)), predictors)
	train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(5)
	# train_all = rank(train_all, 'orderid', 'pred', ascending=False)
	del train_all['pred']
	print('第四层数据:', train_all.shape)
    
	# ********* 准备数据 **********
	# 划分验证集
	train, val = train_test_split(train_all, test_size=0.1)
    
	# 定义数据集
	X_train = train[predictors]
	y_train = train['label']
	X_val = val[predictors]
	y_val = val['label']
	del train, val
	gc.collect()

	# 数据集
	lgb_train = lgb.Dataset(X_train, y_train)
	lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

	# ********** 开始训练 *********
	gbm4 = lgb.train(
				params,
                lgb_train,
                num_boost_round=1200,
                valid_sets=[lgb_train, lgb_val],
                early_stopping_rounds=5
                # init_model=gbm3 # finetune
	        )

	#  ********* 评估  *********

	# 在训练集上看效果    
	score = get_score(train_all, predictors, gbm4, opt)
	print('训练集分数(第四层):{}'.format(score))

	#  ********* 保存模型 *********

	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_4', cur_time, score[0])
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm4, fout)
	print('保存模型(第四层):', save_path)
	save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_4_300_top5')
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm4, fout) 
	print('保存模型(第四层):', save_path)
Exemplo n.º 4
0
        im_scale = im_scale.reshape((-1)).asscalar()
        det_bboxes *= im_scale

        gt_ids = y.slice_axis(axis=-1, begin=4, end=5)
        gt_bboxes = y.slice_axis(axis=-1, begin=0, end=4)
        gt_bboxes *= im_scale
        gt_difficulty = y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None

        val_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficulty)

    return val_metric.get()

if __name__ == '__main__':
    
    opt = DefaultConfig()
    opt.parse({'model': 'vgg16_faster_rcnn',
               'env': 'vgg16',
               'lr_decay_epoch': '8, 16',
               'preload': False,
               'special_load': True,
               'lr': 0.001,
               'start_epoch': 0,
               'max_epoch': 20,
               'load_file_path': '',
               'log_file_path': './log/vgg16_faster_rcnn.log'})

    logger = get_logger(opt)
    if opt.special_load and opt.special_load_path is not None:
        model_train = getattr(models, opt.model)(True, opt.special_load_path)
    else:
Exemplo n.º 5
0
from datetime import datetime
from typing import Dict

from aiohttp import web
from aiohttp.web import Request, Response, json_response
from botbuilder.core import (
    BotFrameworkAdapterSettings,
    TurnContext,
    BotFrameworkAdapter,
)
from botbuilder.schema import Activity, ActivityTypes, ConversationReference

from bots import ProactiveBot
from config import DefaultConfig

CONFIG = DefaultConfig()

# Create adapter.
# See https://aka.ms/about-bot-adapter to learn more about how bots work.
SETTINGS = BotFrameworkAdapterSettings(CONFIG.APP_ID, CONFIG.APP_PASSWORD)
ADAPTER = BotFrameworkAdapter(SETTINGS)


# Catch-all for errors.
async def on_error(context: TurnContext, error: Exception):
    # This check writes out errors to console log .vs. app insights.
    # NOTE: In production environment, you should consider logging this to Azure
    #       application insights.
    print(f"\n [on_turn_error] unhandled error: {error}", file=sys.stderr)
    traceback.print_exc()
Exemplo n.º 6
0
import torch
import torch.nn as nn
import torch.optim as optim
from dataset import Data
from dataset import DataLoaderX
from config import DefaultConfig
from student_model import student_model
from run import run_epoch

opt = DefaultConfig()

if __name__ == '__main__':
    train_dataset = Data(train=True)
    test_dataset = Data(train=False)
    train_loader = DataLoaderX(train_dataset,
                               batch_size=opt.batch_size,
                               num_workers=4,
                               pin_memory=True,
                               shuffle=True)
    test_loader = DataLoaderX(test_dataset,
                              batch_size=opt.batch_size,
                              num_workers=4,
                              pin_memory=True)
    num_skills = train_dataset.max_skill_num + 1

    m = student_model(num_skills=num_skills,
                      state_size=opt.state_size,
                      num_heads=opt.num_heads,
                      dropout=opt.dropout,
                      infer=False)
Exemplo n.º 7
0
Arquivo: app.py Projeto: xzwupeng/quay
app = Flask(__name__)
logger = logging.getLogger(__name__)

# Instantiate the configuration.
is_testing = IS_TESTING
is_kubernetes = IS_KUBERNETES
is_building = IS_BUILDING

if is_testing:
    from test.testconfig import TestConfig
    logger.debug('Loading test config.')
    app.config.from_object(TestConfig())
else:
    from config import DefaultConfig
    logger.debug('Loading default config.')
    app.config.from_object(DefaultConfig())
    app.teardown_request(database.close_db_filter)

# Load the override config via the provider.
config_provider.update_app_config(app.config)

# Update any configuration found in the override environment variable.
environ_config = json.loads(os.environ.get(OVERRIDE_CONFIG_KEY, '{}'))
app.config.update(environ_config)

# Fix remote address handling for Flask.
if app.config.get('PROXY_COUNT', 1):
    app.wsgi_app = ProxyFix(app.wsgi_app,
                            num_proxies=app.config.get('PROXY_COUNT', 1))

# Ensure the V3 upgrade key is specified correctly. If not, simply fail.
Exemplo n.º 8
0
                sess, dev_set, tags)
            # decay learning rate
            cfg.LR *= cfg.LR_DECAY

            print("epoch %d - train loss: %.2f, validation loss: %.2f, accuracy: %.2f  with f1: %.2f , P: %.2f , R: %.2f " % \
                (epoch + 1, train_losses, validation_loss, accuracy * 100, f1 * 100, p * 100, r * 100))


if __name__ == "__main__":
    if len(sys.argv) != 4:
        sys.stderr.write(
            "Usage: %s wikipedia-xxx-mincount-xx-window-x-cbow.bin TRAIN_SET DEV_SET\n"
            % sys.argv[0])
        sys.exit(1)

    cfg = DefaultConfig()

    # check if data not processed, generate tags, words, chars
    if not (os.path.exists(cfg.words_filename) & os.path.exists(
            cfg.tags_filename) & os.path.exists(cfg.chars_filename)):
        print("preprocessed Data not found. processing data...")
        train = cr.conllReader(sys.argv[2])
        test = cr.conllReader(sys.argv[3])

        # get words and tags vocabulary from whole data
        vocab_words, vocab_tags = cr.get_vocabs([train, test])
        # Add unknown token and number to vocab
        vocab_words.add(cfg.UNK)
        vocab_words.add(cfg.NUM)
        # save all words and tags to file
        cr.write_vocab(vocab_tags, cfg.tags_filename)
def main(**kwargs):
    config = DefaultConfig()
    config.parse(kwargs)
    config.env = str(config.id)
    vis = Visualizer

    # set random seed
    # cpu and gpu both need to set
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed(config.seed)
    np.random.seed(config.seed)
    random.seed(config.seed)

    if not torch.cuda.is_available():
        config.cuda = False
        config.device = None

    train_iter, test_iter, emb_vectors = utils.load_data(config)
    config.print_config()

    model = getattr(models, config.model)(config, emb_vectors)
    print(model)

    if config.cuda:
        torch.cuda.set_device(config.device)
        model.cuda()

    # 目标函数和优化器
    loss_f = F.cross_entropy
    lr1, lr2 = config.lr1, config.lr2
    optimizer = model.get_optimizer(lr1, lr2)

    model.train()
    for epoch in range(config.max_epochs):
        start_time = time.time()
        total_loss = 0.0
        correct = 0
        total = 0

        for batch_i, batch in enumerate(train_iter):
            text, label = batch.text[0], batch.label
            if config.cuda:
                text, label = text.cuda(), label.cuda()

            optimizer.zero_grad()
            pred = model(text)
            loss = loss_f(pred, label)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            predicted = pred.max(dim=1)[1]
            total += label.size(0)
            correct += predicted.eq(label).sum().item()

            if (batch_i + 1) % (10000 // config.batch_size) == 0:
                # 10000条训练数据输出一次统计指标
                print('[Epoch {}] loss: {:.5f} | Acc: {:.3f}%({}/{})'.format(
                    epoch + 1, total_loss, 100.0 * correct / total, correct,
                    total))

        train_acc, train_acc_n, train_n = val(model, train_iter, config)
        print('Epoch {} time spends : {:.1f}s'.format(epoch + 1,
                                                      time.time() -
                                                      start_time))
        print('Epoch {} Train Acc: {:.2f}%({}/{})'.format(
            epoch + 1, train_acc, train_acc_n, train_n))
        test_acc, test_acc_n, test_n = val(model, test_iter, config)
        print('Epoch {} Test Acc: {:.2f}%({}/{})\n'.format(
            epoch + 1, test_acc, test_acc_n, test_n))
Exemplo n.º 10
0
import requests
import json
from config import DefaultConfig
config = DefaultConfig()
import string

token_data = {
    'grant_type': 'password',
    'client_id': config.CLIENT_ID,
    'client_secret': config.CLIENT_SECRET,
    'resource': config.RESOURCE,
    'scope': config.RESOURCE,
    'username': config.MAIL,
    'password': config.PASS,
}

token_req = requests.post(config.URL2, data=token_data)
token = token_req.json().get('access_token')
query = config.RESOURCE + config.API_VERSION + '/sites/' + config.SITE_ID
headers = {'Authorization': 'Bearer {}'.format(token)}


def welcome():

    data = json.loads(requests.get(query, headers=headers).text)
    return (data['displayName'])


def getList(intent: string):
    if (intent == "listes_cours"):
        l = 'courses'
Exemplo n.º 11
0
#coding:utf8
import torch
from torchvision import models
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from dataset import DogCat
from config import DefaultConfig
import shutil
from tensorboard import SummaryWriter
from train_test import *

opts = DefaultConfig()

if opts.pretrained:
    print('using pretrained model {}'.format(opts.model))
    model = models.__dict__[opts.model](pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
###############适用于alexnet和vgg16###############################
    new_classifier = nn.Sequential(*list(model.classifier)[:-1])
    new_classifier.add_module('6',
                              nn.Linear(model.classifier[-1].in_features, 2))
    model.classifier = new_classifier
##################################################################
else:
    print('creating model {}'.format(opts.model))
    model = models.__dict__[opts.model]()
    new_classifier = nn.Sequential(*list(model.classifier)[:-1])
    new_classifier.add_module('6',
                              nn.Linear(model.classifier[-1].in_features, 2))
Exemplo n.º 12
0
gt_transform = transforms.Compose([
    transforms.TenCrop((crop_height, crop_weight), vertical_flip=False),
    transforms.Lambda(
        lambda crops: [transforms.ToTensor()(crop) for crop in crops]),
    transforms.Lambda(lambda crops: torch.stack(crops))
])

dataset = DatasetConstructor(img_dir, gt_dir, 300, 20, transform_a,
                             gt_transform_a)
train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1)
eval_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1)
# obtain the gpu device
assert torch.cuda.is_available()
cuda_device = torch.device("cuda")
args = DefaultConfig()

# model construct
net = DMENet().to(cuda_device)
gt_map_process_model = GroundTruthProcess(1, 1, 8).to(
    cuda_device)  # to keep the same resolution with the prediction

# set optimizer and estimator
criterion = metrics.DMELoss().to(cuda_device)
optimizer = torch.optim.Adam(net.parameters(),
                             lr=args.lr,
                             weight_decay=args.weight_decay)
ae_batch = metrics.AEBatch().to(cuda_device)
se_batch = metrics.SEBatch().to(cuda_device)
for epoch_index in range(args.max_epoch):
    dataset = dataset.train_model().shuffle()
Exemplo n.º 13
0
    correct_num = 0

    for i, (data, label) in enumerate(dataloader):
        if opt.use_gpu:
            data = data.cuda()

        score = model_test(data)

        _, predict = torch.max(score.data, 1)

        total_num += label.size(0)
        correct_num += (predict.cpu() == label).sum()

    return 100 * float(correct_num) / float(total_num)


if __name__ == '__main__':
    print("Initialize starting options")
    opt = DefaultConfig()

    opt.parse({'batch_size': 128, 'num_workers': 4})

    model_test = getattr(model, opt.model)()
    model_test.load("model/SuleymanNET_model_state_dict.pkl")

    testloader = data_loader(opt.root, opt.batch_size, opt.num_workers)

    accuracy = test(model_test, testloader, opt)

    print("Accuracy of Test Set: %.3f" % accuracy)
Exemplo n.º 14
0
 def __init__(self, model=None, opt=DefaultConfig()):
     self.model = model
     self.criterion = opt.criterion
     self.optimizer = opt.optimizer(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay)
     self.opt = opt
Exemplo n.º 15
0
                optimizer_g.zero_grad()
                # train by fake image
                # refresh the value of noises
                noises.data.copy_(torch.randn(opt.batch_size, opt.nz, 1, 1))
                fake_img = model_G(noises)
                output = model_D(fake_img)

                loss_g = criterion(output, true_labels)
                loss_g.backward()

                optimizer_g.step()
                loss_G_meter.add(loss_g.item())

            if ii % opt.print_freq:
                vis.plot('loss_d', loss_D_meter.value()[0])
                vis.plot('loss_g', loss_G_meter.value()[0])
                fix_fake_img = model_G(fix_noises)
                vis.images(fix_fake_img.data.cpu().numpy()[:64] * 0.5 + 0.5,
                           win='fixfake')

        if (epoch + 1) % 20 == 0:
            model_G.save(opt.save_model_path + opt.G_model + '_' + str(epoch))
            model_D.save(opt.save_model_path + opt.D_model + '_' + str(epoch))


if __name__ == '__main__':
    opt = DefaultConfig()
    opt.parse({'max_epoch': 100})

    train(opt)
Exemplo n.º 16
0
        total_epoch_loss += loss_val.item()
        total_epoch_recall += recall_val
        total_epoch_acc += acc_val.item()
        total_epoch_pre += pre_val
        total_epoch_f1 += f1_val
        print(f"validation in batch:{bat_num+1}\n")

    model.train()
    return total_epoch_loss / (bat_num + 1), total_epoch_acc / (
        bat_num + 1), total_epoch_f1 / (bat_num + 1), total_epoch_pre / (
            bat_num + 1), total_epoch_recall / (bat_num + 1)


if __name__ == '__main__':
    opt = DefaultConfig()
    # 数据文件路径
    data_path = opt.data_path
    data = data_process.read_corpus(data_path)
    timestamp = str(int(time.time()))
    outdir = os.path.abspath(
        os.path.join(os.path.curdir, "checkpoints", timestamp))
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    vocab = data_process.load_vocab(opt.vocab_path)

    tag2label = {
        "O": 0,
        "B-W": 1,
        "I-W": 2,
    }
Exemplo n.º 17
0
            data = data.cuda()

        score = model_train(data)

        confusion_matrix.add(score.data.squeeze(),
                             label.type(torch.LongTensor))
        _, predict = torch.max(score.data, 1)
        total_num += label.size(0)
        correct_num += (predict.cpu() == label).sum()

    model_train.train()
    accuracy = 100 * float(correct_num) / float(total_num)

    return confusion_matrix, accuracy


if __name__ == '__main__':
    # login itchat
    itchat.auto_login()
    itchat_send('start!')

    opt = DefaultConfig()
    opt.parse({
        'model': 'VGG19',
        'max_epoch': 200,
        'weight_decay': 15e-4,
        'lr': 0.1
    })

    train(opt)