def __init__(self, pretrain=False): self.logger = get_logger('baseline') self.voca_size = opt.word_voca_size + 2 self.char_voca_size = opt.char_voca_size + 2 if pretrain: self.logger.info('use pretrained embedding matrix') word_embed_matrix = joblib.load('../word_embed_matrix.np') char_embed_matrix = joblib.load('../char_embed_matrix.np') self.word_embd = Embedding(self.voca_size, opt.word_embd_size, weights=[word_embed_matrix], name='shared_embed', trainable=True) self.char_embd = Embedding(self.char_voca_size, opt.char_embd_size, weights=[char_embed_matrix], name='shared_char_embed', trainable=True) else: self.logger.info('no use pretrained embedding matrix') self.word_embd = Embedding(self.voca_size, opt.word_embd_size, name='shared_embed', trainable=True) self.char_embd = Embedding(self.char_voca_size, opt.char_embd_size, name='shared_char_embed', trainable=True)
def __init__(self, config, use=False): self.logger = get_logger() self.meta_path = config['META_PATH'] self.titles_path = config['TITLES_PATH'] self.spm_dir_path = config['SPM_DIR_PATH'] self.spm_wp_path = config['SPM_WP_PATH'] self.category_path = config['CATEGORY_PATH'] self.parse_data_path = config['PARSE_DATA_PATH'] self.doc2vec_dir_path = config['DOC2VEC_DIR_PATH'] self.use_cols = config['USE_COLS'] self.use_cate = config['USE_CATE'] self.n_sample = config['N_SAMPLE'] self.vocab_size = config['VOCAB_SIZE'] self.n_shuffle = config['N_SHUFFLE'] self.cate_depth = config['CATE_DEPTH'] self.n_log_print = config['N_LOG_PRINT'] self.doc_vec_size = config['DOC_VEC_SIZE'] self.doc2vec_epochs = config['DOC2CEC_EPOCHS'] self.n_workers = config['N_WORKERS'] self.window_size = config['WINDOW_SIZE'] self.re_sc = re.compile('[\!@#$%\^&\*\(\)=\[\]\{\}\.,/\?~\+\"|\_\-:;]') self.stopwords =['&', '"'] if use: self.load_spm() self.load_doc2vec_model()
def __init__(self, data_path): self.logger = get_logger('EDA') self.write_path = './EDA/' self.data_path = data_path self.cate1 = self.load_catefile(self.data_path + '/' + opt.cate_filename)
def __init__(self, epochs=10, batch_size=128): self.logger = get_logger() self.data = Data("005930") data_len = len(self.data) train_num = int(data_len * 0.8) valid_num = int(data_len * 0.1) test_num = data_len - train_num - valid_num train, valid, test = random_split(self.data, [train_num, valid_num, test_num]) self.train_iter = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=4) self.valid_iter = DataLoader(valid, batch_size=batch_size, shuffle=True, num_workers=4) self.test_iter = DataLoader(test, batch_size=batch_size, shuffle=True, num_workers=4) self.encoder = Encoder(features=self.data.features, hid_dim=64, layers=2, dropout=0.3) self.network = Network(encoder=self.encoder, enc_hid_dim=64, hid_dim=64, device=device).to(device) print(self.network) self.epochs = epochs self.batch_size = batch_size
def get_logger_and_parser(): parser = argparse.ArgumentParser(description='config') parser.add_argument('--config', type=str, default='config/cityscapes_pspnet.yaml', help='Configuration file to use') parser.add_argument('--num_of_gpus', type=int, default=0) parser.add_argument('opts', help='', default=None, nargs=argparse.REMAINDER) args = parser.parse_args() assert args.config is not None cfg = config.load_cfg_from_cfg_file(args.config) if args.opts is not None: cfg = config.merge_cfg_from_list(cfg, args.opts) args_dict = dict() for arg in vars(args): args_dict[arg] = getattr(args, arg) cfg.update(args_dict) run_dir = os.path.join('runs', os.path.basename(args.config)[:-5], cfg['exp_name']) check_dir_exists(run_dir) run_id = str(int(time.time())) logger = get_logger(run_dir, run_id, 'val') logger.info('RUNDIR: {}'.format(run_dir)) return logger, cfg, run_dir
def __init__(self, num_classes): logger = get_logger('textonly') max_len = opt['max_len'] voca_size = opt['unigram_hash_size'] + 1 t_input = tf.keras.Input(shape=(max_len, )) tx = keras.layers.Embedding(voca_size, opt['embd_size'])(t_input) w_input = tf.keras.Input(shape=(max_len, )) wx = tf.keras.layers.Reshape((max_len, 1))(w_input) x = tf.keras.layers.dot([tx, wx], axes=1) x = tf.keras.layers.Reshape((opt['embd_size'], ))(x) x = keras.layers.Dense(16, activation=tf.nn.relu)(x) outputs = keras.layers.Dense(num_classes, activation=tf.nn.sigmoid)(x) model = tf.keras.models.Model(inputs=[t_input, w_input], outputs=outputs) model.summary(print_fn=lambda x: logger.info(x)) model.compile(loss='binary_crossentropy', optimizer=tf.train.AdamOptimizer(), metrics=[top1_acc]) self.model = model
def __init__(self, path, window, stride, minute_after, bandwidth, batch_size, train_ratio, verbose=False): """ :param path: data path :param window: length of windows in each frame :param stride: stride of frame :param minute_after: compare with the last of frame close and minute_after close :param bandwidth: height of frame :param batch_size: train batch :param train_ratio: :param verbose: """ self.logger = get_logger() self.path = path self.window = window self.stride = stride self.minute_after = minute_after self.bandwidth = bandwidth self.batch_size = batch_size self.train_ratio = train_ratio self.verbose = verbose self.threshold = 0.01
def __init__(self, conf="./config/stock_minute.json", **kwargs): Status.__init__(self, conf=conf, **kwargs) self.logger = get_logger() if not os.path.isdir(self.opt.export_to): os.mkdir(self.opt.export_to) self.stock_chart = None self.stock_code = None
def __init__(self): self.logger = get_logger('words') self.ignore = [ '기타', '참조', '상품상세설명', '주', '청구할인', '상세설명참조', '없음', '상세정보참조', '해당없음', '품명', '상품상세정보', '상세설명', '상세정보별도표시', '알수', '상세페이지', '상세참조', 'ETC', '상세내용참조', '기타상세참조', '상세정보', '별도표기', '상세페이지참조', '알수없음', '상품상세설명참조' ] + [chr(asc) for asc in range(65, 91)]
def __init__(self): self.logger = get_logger('data') self.word_to_idx = self.load_word_to_idx() # make temporary save dir if not exists tempdir = os.path.dirname(self.tmp_chunk_tpl) if not os.path.isdir(tempdir) : os.makedirs(tempdir)
def __init__(self, enc_emb_dim=128, dec_emb_dim=128, enc_hid_dim=256, dec_hid_dim=256, enc_dropout=0.3, dec_dropout=0.3, epochs=15): self.logger = get_logger() #self.data = Data() self.data = FRENDataset() data_len = len(self.data) train_num = int(data_len * 0.8) valid_num = int(data_len * 0.1) test_num = data_len - train_num - valid_num train, valid, test = random_split(self.data, [train_num, valid_num, test_num]) self.train_iter = DataLoader(train, batch_size=128, shuffle=True, num_workers=4) self.valid_iter = DataLoader(valid, batch_size=128, shuffle=True, num_workers=4) self.test_iter = DataLoader(test, batch_size=128, shuffle=True, num_workers=4) #self.train_iter, self.valid_iter, self.test_iter = self.data.iterator() #self.input_dim = len(self.data.source.vocab) #self.output_dim = len(self.data.target.vocab) self.input_dim = self.data.source_dim self.output_dim = self.data.target_dim self.enc_emb_dim = enc_emb_dim self.dec_emb_dim = dec_emb_dim self.enc_hid_dim = enc_hid_dim self.dec_hid_dim = dec_hid_dim self.enc_dropout = enc_dropout self.dec_dropout = dec_dropout self.encoder = Encoder(self.input_dim, self.enc_emb_dim, self.enc_hid_dim, self.dec_hid_dim, self.enc_dropout) self.attention = Attention(self.enc_hid_dim, self.dec_hid_dim) self.decoder = Decoder(self.output_dim, self.dec_emb_dim, self.enc_hid_dim, self.dec_hid_dim, self.dec_dropout, self.attention) self.model = Seq2Seq(self.encoder, self.decoder, device).to(device) self.epochs = epochs #target_padding_index = self.data.target.vocab.stoi[self.data.target.pad_token] #self.criterion = nn.CrossEntropyLoss(ignore_index = target_padding_index) self.criterion = nn.CrossEntropyLoss( ignore_index=self.data.end_token_pivot)
def __init__(self): self.logger = get_logger('Classifier') self.h5 = { 'train': h5py.File(os.path.join(opt.data_path, 'train.h5'), 'r'), 'dev': h5py.File(os.path.join(opt.data_path, 'dev.h5'), 'r'), 'test': h5py.File(os.path.join(opt.data_path, 'test.h5'), 'r') } self.char_dict = pickle.load(open( os.path.join(opt.data_path, 'char_dict.pkl'), 'rb'), encoding='utf-8')
def __init__(self, config): self.logger = get_logger() self.parse_data_path = config['PARSEMETA']['PARSE_DATA_PATH'] self.category_path = config['PARSEMETA']['CATEGORY_PATH'] self.n_log_print = config['PARSEMETA']['N_LOG_PRINT'] self.doc_vec_size = config['PARSEMETA']['DOC_VEC_SIZE'] self.train_dir_path = config['MAKEDB']['TRAIN_DIR_PATH'] self.chunk_size = config['MAKEDB']['CHUNK_SIZE'] self.temp_dir_path = config['MAKEDB']['TEMP_DIR_PATH'] self.parser = EcommerceDataParser(config['PARSEMETA'], use=True)
def __init__(self): self.logger = get_logger() self.init_token = '<sos>' self.end_token = '<eos>' en = self._read_file("small_vocab_en") fr = self._read_file("small_vocab_fr") assert len(en) == len(fr) self.en_sequences, self.en_dict = self._tokenize(en) self.fr_sequences, self.fr_dict = self._tokenize(fr) self.source_dim = len(self.en_dict) + 1 self.target_dim = len(self.fr_dict) + 1 self.end_token_pivot = 2
def __init__(self): self.logger = get_logger('preprocessor') self.train_df_file = "train_df.csv" self.dev_df_file = "dev_df.csv" self.test_df_file = "test_df.csv" self.train_df_columns = ['bcateid', 'mcateid', 'scateid', 'dcateid', 'brand', 'maker', 'model', 'product', 'price', 'updttm', 'pid'] self.dev_df_columns = ['brand', 'maker', 'model', 'product', 'price', 'updttm', 'pid'] self.test_df_columns = ['brand', 'maker', 'model', 'product', 'price', 'updttm', 'pid'] self.data_path_list = opt.train_data_list self.stop_words = opt.en_stopwords + opt.ko_stopwords
def __init__(self): self.logger = get_logger('Classifier') self.num_classes = 0 self.encoded_dict = {"price_lev": 3} self.cate_index_dict = pickle.load( open('./data/cate_index_dict.pickle', 'rb')) self.predict_encoder = pickle.load( open('./data/predict_encoder.pickle', 'rb')) self.cate_split_index = {"b": 0, "m": 1, "s": 2, "d": 3} self.prev_cate_list = {"m": "b", "s": "m", "d": "s"} self.b_model = None self.m_model = None self.s_model = None self.d_model = None
def __init__(self, path="data/ninetoten.h5", verbose=False): self.logger = get_logger() self.path = path self.verbose = verbose self.barrier = 4 self.window = 7 assert self.barrier > 0, "target inference" self.transaction_fee = 0.0035 self.threshold = 0.0001 self.stock_meta = StockMetaData() self.load()
def __init__(self): self.logger = get_logger('data') self.price_quantile_dict = pickle.load( open(self.price_quantile_dict_path, 'rb')) self.time_aging_dict = pickle.load( open(self.time_aging_dict_path, 'rb')) self.b2v_dict = pickle.load(open(self.b2v_dict_path, 'rb')) self.b2v_model = Word2Vec.load(self.b2v_model_path) self.d2v_model = Doc2Vec.load(self.d2v_model_path) # self.df_term_vector = pd.concat([ # pd.read_pickle('./data/df_product_train_dataset.pkl'), # pd.read_pickle('./data/df_product_dev_dataset.pkl'), # pd.read_pickle('./data/df_product_test_dataset.pkl')], # axis=0 # ) self.term_vector_dict = pickle.load( open(self.term_vector_dict_path, 'rb'))
def get_logger_and_parser(): global cfg, logger parser = argparse.ArgumentParser(description='config') parser.add_argument( '--config', type=str, default='config/cityscapes_pspnet.yaml', help='Configuration file to use', ) parser.add_argument( '--local_rank', type=int, default=0, help='Local rank for distributed training', ) args = parser.parse_args() assert args.config is not None cfg = config.load_cfg_from_cfg_file(args.config) args_dict = dict() for arg in vars(args): args_dict[arg] = getattr(args, arg) cfg.update(args_dict) run_dir = os.path.join('runs', os.path.basename(args.config)[:-5], cfg['exp_name']) if main_process(): check_dir_exists(run_dir) run_id = str(int(time.time())) logger = get_logger(run_dir, run_id, 'train') logger.info('RUNDIR: {}'.format(run_dir)) shutil.copy(args.config, run_dir) else: logger = None try: cfg['world_size'] = int(os.environ['WORLD_SIZE']) except: pass return logger, cfg, run_dir
def __init__(self): self.logger = get_logger('ShopNet') self.N_IMG_FEAT = 2048 self.max_len = opt.max_len self.voca_size = get_word_idx_size( ) + 1 #500424+1 이어야함 #500458+1 #96778+1 #opt.max_embd_words + 1 self.embd_size = opt.embd_size self.C_idx = dict() self.C_idx['b'] = {c: c - 1 for c in range(1, 57 + 1)} self.C_idx['m'] = {c: c - 1 for c in range(1, 552 + 1)} self.C_idx['s'] = {c: c - 2 for c in range(2, 3190 + 1)} self.C_idx['d'] = {c: c - 2 for c in range(2, 404 + 1)} self.N_Cb = 57 self.N_Cm = 552 self.N_Cs = 3190 - 1 self.N_Cd = 404 - 1
def __init__(self, stock_code, marketkind=1, verbose=False): self.logger = get_logger() self.verbose = verbose self.barrier = 8 self.window = 12 assert self.barrier > 0, "target inference" self.transaction_fee = 0.0035 self.threshold = 0.0 stock_code = str(stock_code) if not stock_code.startswith("A"): stock_code = "A" + stock_code self.stock_code = stock_code self.logger.info("target stock code: {}".format(self.stock_code)) self.marketkind = marketkind self.parse()
def __init__(self, report, feat_len, batch_size, train_ratio, verbose=False): """ :param path: data path :param report: report type string :param batch_size: train batch :param train_ratio: :param verbose: """ self.logger = get_logger() self.report = report self.feature_len = feat_len self.batch_size = batch_size self.train_ratio = train_ratio self.verbose = verbose self.feature_norm = [] self.threshold = 0.006
def __init__(self, report, feat_len, batch_size, train_ratio, epochs, verbose=False): self.logger = get_logger() self.report = report self.feat_len = feat_len self.batch_size = batch_size self.train_ratio = train_ratio self.verbose = verbose self.data_iter = Data(report=self.report, feat_len=self.feat_len, batch_size=self.batch_size, train_ratio=self.train_ratio) self.data_iter.launch() self.log_path = "tmp/tensorboard" self.checkpoint_path = "tmp/checkpoint" self.epochs = epochs
def __init__(self, path, window, stride, minute_after, bandwidth, batch_size, train_ratio, epochs, verbose=False): self.logger = get_logger() self.path = path self.window = window self.stride = stride self.minute_after = minute_after self.bandwidth = bandwidth self.batch_size = batch_size self.train_ratio = train_ratio self.verbose = verbose self.log_path = "tmp/tensorboard" self.checkpoint_path = "tmp/checkpoint" self.epochs = epochs
def __init__(self): self.logger = get_logger('data') self.price_quantile_dict = pickle.load( open(self.price_quantile_dict_path, 'rb')) self.time_aging_dict = pickle.load( open(self.time_aging_dict_path, 'rb')) self.valid_tag_dict = pickle.load(open(self.valid_tag_dict_path, 'rb')) self.b2v_dict = pickle.load(open(self.b2v_dict_path, 'rb')) # self.b2v_model = gensim.models.Word2Vec.load(self.b2v_model_path) self.d2v_model = Doc2Vec.load( '/workspace/dataset/doc2vec_test/reduced_doc2vec.model' ) #TODO 절대경로 self.df_term_vector = pd.concat([ pd.read_pickle( '/workspace/dataset/preprocess_test/df_product_train_datset.pkl' ), pd.read_pickle( '/workspace/dataset/preprocess_test/df_product_dev_datset.pkl' ), pd.read_pickle( '/workspace/dataset/preprocess_test/df_product_test_datset.pkl' ) ], axis=0)
def __init__(self): self.logger = get_logger('textonly')
def __init__(self): self.logger = get_logger('Model')
def __init__(self): self.logger = get_logger('Classifier') self.num_classes = 0
def __init__(self): self.logger = get_logger('shopnet') self.input_size = 2352
# coding=utf-8 from misc import get_logger logger = get_logger(__file__) SPADE = '♠' HEART = '♥' CLUB = '♣' DIAMOND = '♦' SUITS_RANK = { # higher is better SPADE: 3, HEART: 2, CLUB: 1, DIAMOND: 0 } SUITS = [SPADE, HEART, CLUB, DIAMOND] # 2, 3, 4, 5, 6, 7, 8, 9, 10, J, Q, K, A CARDS_RANK = range(2, 15) ALL_CARDS = set() for s in SUITS: for c in CARDS_RANK: ALL_CARDS.add((c, s))