def options_api(headers: OrderedDict, data: Optional[str], client_cert: SSLCertT) -> HttpResponse: info("api_options: {}".format(headers[":path"])) headers_extra = OrderedDict() headers_extra["Allow"] = "GET, PUT, POST, OPTIONS, HEAD, DELETE" http_resp = HttpResponse(HttpStatus.Ok, bytes(), CT_PLAIN, extra_headers=headers_extra) return http_resp
def check_login(): frame = inspect.currentframe() current_function_name = inspect.getframeinfo(frame).function try: # checks valid login process data = { 'id': ID, 'pw': PW, } r = requests.post('http://{}:{}/login'.format(HOST, PORT), data=data, allow_redirects=False) if 'set-cookie' not in r.headers: colorlog.error('"{}" failed'.format(current_function_name)) os._exit(1) # checks invalid login process data = {'id': 'asjdkfl;as', 'pw': 'qwueioprq'} r = requests.post('http://{}:{}/login'.format(HOST, PORT), data=data, allow_redirects=False) if 'set-cookie' in r.headers: colorlog.error('"{}" failed'.format(current_function_name)) os._exit(1) except: colorlog.error('"{}" failed'.format(current_function_name)) os._exit(2) colorlog.info('"{}" passed'.format(current_function_name)) return
def __init__(self, FLAGS): super(TrainingConfig, self).__init__(FLAGS) colorlog.info("Training configuration") pp(vars(self)) self.load("training.config") self.save("training.config")
def init_param(self): super().init_param() colorlog.info( "[init_param] for {}: bases for label embedding matrices: xavier_uniform_" .format(self.__class__.__name__)) for W in self.BasesList: nn.init.xavier_uniform_(W.weight)
def __init__(self, FLAGS): super(ModelConfig, self).__init__(FLAGS) # Embedding dimensions self.img_dim = 2048 # Memory size self.img_memory_size = 1 #self.max_context_length = 80 #self.max_output_length = 16 self.memory_size = ( self.img_memory_size + self.max_context_length + \ self.max_output_length ) # Memory CNN self.context_filter_sizes = [3, 4, 5] self.output_filter_sizes = [3, 4, 5] self.num_channels_total = self.num_channels * \ (len(self.context_filter_sizes) + len(self.output_filter_sizes)) colorlog.info("Model configuration") pp(vars(self)) self.load("model.config") self.batch_size = FLAGS.batch_size self.save("model.config")
def _preprocess_episodes(self, episodes, dictionary, mode): """ Tokenize all the fields in Wizard-of-Wikipedia """ colorlog.info("Preprocess wizard of wikipedia dataset") tokenize = lambda x: ' '.join([str(data_vocab.BERT_CLS_ID)] + [ str(y) for y in dictionary.convert_tokens_to_ids(dictionary.tokenize(x)) ] + [str(data_vocab.BERT_SEP_ID)]) new_episodes = [] for episode_num, episode in enumerate(tqdm(episodes, ncols=70)): new_examples = [] for example_num, example in enumerate(episode): # Tokenize inputs and convert to tokens context = tokenize(example['text']) if mode == "train": response = tokenize(example['labels'][0]) else: response = tokenize(example['eval_labels'][0]) chosen_topic = tokenize(example['chosen_topic']) # Set up knowledge checked_knowledge = example[ 'title'] + ' __knowledge__ ' + example['checked_sentence'] knowledges = [checked_knowledge] + \ [k for k in example['knowledge'].rstrip().split('\n')] for idx, k in enumerate(knowledges[1:]): if k == checked_knowledge: break else: # Sometimes, knowledge does not include checked_sentnece idx = None colorlog.warning( "Knowledge does not include checked sentence.") if idx: del knowledges[idx + 1] # Tokenize knowledge knowledge_sentences = [tokenize(k) for k in knowledges] new_example = { 'context': context, 'response': response, 'chosen_topic': chosen_topic, 'knowledge_sentences': knowledge_sentences, 'episode_num': episode_num, 'example_num': example_num } new_examples.append(new_example) new_episodes.append(new_examples) if self._datapath: episodes_fname = self._get_preprocessed_fname(mode) colorlog.info(f"Cache preprocessed dataset to {episodes_fname}") with open(episodes_fname, 'w') as fp: for episode in new_episodes: fp.write(json.dumps(episode) + '\n') return new_episodes, dictionary
def validation(self): dataloader = self.dev_dataloader N = len(dataloader.dataset) B = dataloader.batch_size pred_np = np.zeros((N), dtype=np.int64) target_np = np.zeros((N), dtype=np.int64) with torch.no_grad(): self.model.eval() for i_sample, sample_batch in enumerate(dataloader): (text, length, mask, user, product, label) = sample_batch pred = self.model(text, length, mask, **{ 'user': user, 'product': product }) # N, 5 pred = torch.argmax(pred, dim=-1) pred_np[i_sample * B:(i_sample + 1) * B] = pred.cpu().data.numpy() target_np[i_sample * B:(i_sample + 1) * B] = label.cpu().data.numpy() self.model.train() acc = (pred_np == target_np).mean() rmse = ((pred_np - target_np)**2).mean()**0.5 print("acc: {:2.2f}%, rmse: {:.3f}".format(acc * 100, rmse)) if self.engine.state.best_accuracy < acc: path = os.path.join(self.args.param_dir, self.args.model_type) torch.save( { 'state_dict': self.model.state_dict(), 'optimizer_state_dict': self.optimizer.state_dict(), }, path) self.engine.state.best_accuracy = acc self.engine.state.best_param_path = path colorlog.info(">> parameter saved {}".format(path)) return acc, rmse
def get_file(self, headers: OrderedDict, data: Optional[str], client_cert: SSLCertT) -> HttpResponse: # Ordinary file on filesystem username = CertHelpers.get_field(client_cert, "emailAddress") url_path = headers[":path"].split("?")[0] url_path_safe = "".join( filter(lambda c: c.isalpha() or c in "/-_.", url_path)).replace("..", "").strip("/") file_path = os.path.join(config.CFG.http["DOC_ROOT"], url_path_safe) if os.path.isdir(file_path): file_path = os.path.join(file_path, config.CFG.http["DOC_DEFAULT_NAME"]) ctype = mimetypes.guess_type( file_path)[0] or "application/octet-stream" try: fd = open(file_path, 'rb') response = fd.read() fd.close() except FileNotFoundError: warn("[{}] Cannot open requested file \"{}\"".format( username, file_path)) http_resp = HttpResponse.empty(HttpStatus.NotFound) else: info("[{}] Serving ordinary file {} of type \"{}\"".format( username, file_path, ctype)) http_resp = HttpResponse(HttpStatus.Ok, response, ctype) return http_resp
def api_delete_closure(headers: OrderedDict, data: Optional[str], client_cert: SSLCertT) -> HttpResponse: username = CertHelpers.get_field(client_cert, "emailAddress") info("[{}] api_delete: {}".format(username, headers[":path"])) api_pth = headers[":path"][len(API_ROOT_data):] http_resp = _delete(ds, api_pth, username) return http_resp
def main(): colorlog.basicConfig( filename=None, level=logging.INFO, format="%(log_color)s[%(levelname)s:%(asctime)s]%(reset)s %(message)s", ) if not os.path.exists(CAPTION_OUTPUT_PATH): colorlog.info("Create directory %s" % (CAPTION_OUTPUT_PATH)) os.makedirs(CAPTION_OUTPUT_PATH) if not os.path.exists(HASHTAG_OUTPUT_PATH): colorlog.info("Create directory %s" % (HASHTAG_OUTPUT_PATH)) os.makedirs(HASHTAG_OUTPUT_PATH) # Load raw data caption_train_json = load_json(CAPTION_TRAIN_JSON_FNAME) caption_test1_json = load_json(CAPTION_TEST1_JSON_FNAME) caption_test2_json = load_json(CAPTION_TEST2_JSON_FNAME) hashtag_train_json = load_json(HASHTAG_TRAIN_JSON_FNAME) hashtag_test1_json = load_json(HASHTAG_TEST1_JSON_FNAME) hashtag_test2_json = load_json(HASHTAG_TEST2_JSON_FNAME) # Tokenize all caption_counter, caption_train_tokens, caption_test1_tokens, caption_test2_tokens = tokenize_all( caption_train_json, caption_test1_json, caption_test2_json, 'caption') hashtag_counter, hashtag_train_tokens, hashtag_test1_tokens, hashtag_test2_tokens = tokenize_all( hashtag_train_json, hashtag_test1_json, hashtag_test2_json, 'tags') # Create vocabulary caption_vocab, caption_rev_vocab = create_vocabulary( caption_counter, CAPTION_VOCAB_FNAME, CAPTION_VOCAB_SIZE) hashtag_vocab, hashtag_rev_vocab = create_vocabulary( hashtag_counter, HASHTAG_VOCAB_FNAME, HASHTAG_VOCAB_SIZE)
def tokenize_all(train_json, test1_json, test2_json, key='caption', topic=False): """ Tokenize sentences in raw dataset Args: train_json, test1_json, test2_json: raw json object key: 'caption' or 'tags' """ colorlog.info("Tokenize %s data" % (key)) token_counter = Counter() train_tokens = {} test1_tokens = {} test2_tokens = {} # Train data for user_id, posts in tqdm(train_json.items(), ncols=70, desc="train data"): train_tokens[user_id] = {} for post_id, post in posts.items(): post_tokens = tokenize(post[key]) if topic: topic_tokens = tokenize(post['topic']) train_tokens[user_id][post_id] = [post_tokens, topic_tokens] for post_token in topic_tokens: token_counter[post_token] += 1 else: train_tokens[user_id][post_id] = post_tokens for post_token in post_tokens: token_counter[post_token] += 1 # Test1 data for user_id, posts in tqdm(test1_json.items(), ncols=70, desc="test1 data"): test1_tokens[user_id] = {} for post_id, post in posts.items(): post_tokens = tokenize(post[key]) if topic: topic_tokens = tokenize(post['topic']) test1_tokens[user_id][post_id] = [post_tokens, topic_tokens] else: test1_tokens[user_id][post_id] = post_tokens # Test2 data for user_id, posts in tqdm(test2_json.items(), ncols=70, desc="test2 data"): test2_tokens[user_id] = {} for post_id, post in posts.items(): post_tokens = tokenize(post[key]) if topic: topic_tokens = tokenize(post['topic']) test2_tokens[user_id][post_id] = [post_tokens, topic_tokens] else: test2_tokens[user_id][post_id] = post_tokens return token_counter, train_tokens, test1_tokens, test2_tokens
def evaluation_with_dict(self, pred_answers_dict, method="coco"): colorlog.info("Run evaluation...") # Dict to list pairs predictions = [] answers = [] for pred_answers in pred_answers_dict.values(): prediction = pred_answers['prediction'] answer = pred_answers['answers'] # Type checking if type(prediction) == str: prediction = prediction.split() if type(answer[0]) == str: answer = [answer_.split() for answer_ in answer] predictions.append(prediction) answers.append(answer) if method == "coco": eval_result = self._coco_evaluation(predictions, answers) else: raise NotImplementedError return eval_result
def __init__(self): # HTTP server init self.http_handlers = HttpHandlerList() ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) ssl_context.options |= (ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 | ssl.OP_NO_COMPRESSION) ssl_context.load_cert_chain(certfile=CONFIG_HTTP["SERVER_SSL_CERT"], keyfile=CONFIG_HTTP["SERVER_SSL_PRIVKEY"]) try: ssl_context.set_alpn_protocols(["h2"]) except AttributeError: info("Python not compiled with ALPN support, using NPN instead.") ssl_context.set_npn_protocols(["h2"]) if not CONFIG_HTTP["DBG_DISABLE_CERTS"]: ssl_context.verify_mode = ssl.CERT_REQUIRED ssl_context.load_verify_locations(cafile=CONFIG_HTTP["CA_CERT"]) self.loop = asyncio.get_event_loop() # Each client connection will create a new H2Protocol instance listener = self.loop.create_server( H2Protocol, "127.0.0.1" if CONFIG_HTTP["LISTEN_LOCALHOST_ONLY"] else "", CONFIG_HTTP["PORT"], ssl=ssl_context) self.server = self.loop.run_until_complete(listener)
def __init__(self): # HTTP server init if config.CFG.http["DISABLE_SSL"]: ssl_context = None else: ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) ssl_context.options |= (ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 | ssl.OP_NO_COMPRESSION) ssl_context.load_cert_chain(certfile=config.CFG.http["SERVER_SSL_CERT"], keyfile=config.CFG.http["SERVER_SSL_PRIVKEY"]) if ssl.HAS_ALPN: ssl_context.set_alpn_protocols(["h2"]) else: info("Python not compiled with ALPN support, using NPN instead.") ssl_context.set_npn_protocols(["h2"]) if not config.CFG.http["DBG_DISABLE_CERT"]: ssl_context.verify_mode = ssl.CERT_REQUIRED ssl_context.load_verify_locations(cafile=config.CFG.http["CA_CERT"]) self.loop = asyncio.get_event_loop() # Each client connection will create a new H2Protocol instance listener = self.loop.create_server( H2Protocol, "127.0.0.1" if config.CFG.http["LISTEN_LOCALHOST_ONLY"] else "", config.CFG.http["PORT"], ssl=ssl_context ) self.server = self.loop.run_until_complete(listener) H2Protocol.LOOP = self.loop
def test(self): # LOAD PRETRAINED PARAMETERS state_dict = torch.load(self.param_dir) self.model.load_state_dict(state_dict) # EVALUATION test_acc, test_rmse = self.evaluation(self.test_dataloader) # SAVE FINAL EVALUATION PERFORMANCE colorlog.info(""" " Evaluation with test data set " << Model Type : {} >> TEST ACCURACY : {:2.2f}% TEST RMSE : {:2.4f} DEV ACCURACY : {:2.2f}% DEV RMSE : {:2.4f} """.format( args.model_type, test_acc * 100, test_rmse, self.engine.state.best_dev_acc * 100, self.engine.state.dev_rmse, )) return
def create_graph(cfg, graph, mode): colorlog.info("Build %s graph" % mode) with graph.as_default(), tf.container(mode): if cfg.data_name == "audiocaps": prefetch_data_fn = input_helper.prefetch_dataset vocab_fname = os.path.join('data/audiocaps/features/auxiliary', '{}.vocab'.format(cfg.vocab_size)) else: raise NotImplementedError() # Read vocab _, index_to_string = vocab_utils.create_vocab_tables( vocab_fname, cfg.vocab_size) # Read dataset num_data, iterator_init, iterators = prefetch_data_fn( cfg.batch_size, cfg.bucket_width, cfg.buffer_size, cfg.random_seed, cfg.num_gpus, cfg.num_epochs, mode, cfg.feature_name, ) iters_in_data = int(num_data / cfg.batch_size / cfg.num_gpus) # Build model model_args = cfg, index_to_string, iterators, iters_in_data, mode == "train" if cfg.model_name == "PyramidLSTM": model = PyramidLSTM(*model_args) else: raise NotImplementedError() return model, iters_in_data, iterator_init
def load_json(fname): colorlog.info(f"Read {fname}") jsons = [] with open(fname, 'r') as fp: for line in fp: jsons.append(json.loads(line)) return jsons
def get_staging_api_closure(headers: OrderedDict, data: Optional[str], client_cert: SSLCertT) -> HttpResponse: username = CertHelpers.get_field(client_cert, "emailAddress") info("[{}] api_get_staging: {}".format(username, headers[":path"])) api_pth = headers[":path"][len(API_ROOT_STAGING_data):] http_resp = _get(ds, headers, api_pth, username, staging=True) return http_resp
def _load_and_preprocess_all(self, mode: str): self._download_data(mode) if os.path.exists(self._get_preprocessed_fname(mode)): episodes_fname = self._get_preprocessed_fname(mode) colorlog.info(f"Load preprocessed holle from {episodes_fname}") with open(episodes_fname, 'r') as fp: episodes = [] for line in fp: episodes.append(json.loads(line)) dictionary = tokenization.FullTokenizer(self._vocab_fname) return episodes, dictionary # Load raw dataset raw_fname = os.path.join(self._datapath, f'{mode}_data.json') with open(raw_fname, 'r') as fp: episodes = json.load(fp) if mode != 'test': episodes = self._to_wow_format(episodes, mode) else: multi_fname = os.path.join(self._datapath, 'multi_reference_test.json') with open(multi_fname, 'r') as fp: multi_responses = json.load(fp) episodes = self._to_wow_format_multi(episodes, multi_responses, mode) dictionary = tokenization.FullTokenizer(self._vocab_fname) return self._preprocess_episodes(episodes, dictionary, mode)
def __init__(self, FLAGS): super(ModelConfig, self).__init__(FLAGS) # Embedding dimensions self.img_dim = 2048 # Memory size self.img_memory_size = 1 if self.use_user_context: self.memory_size = self.img_memory_size + self.max_context_length + self.max_output_length else: self.memory_size = self.img_memory_size + self.max_output_length # Memory CNN self.context_filter_sizes = [3, 4, 5] self.output_filter_sizes = [3, 4, 5] if self.use_user_context: self.num_channels_total = self.num_channels * ( len(self.context_filter_sizes) + len(self.output_filter_sizes)) else: self.num_channels_total = self.num_channels * len( self.output_filter_sizes) colorlog.info("Model configuration") pp(vars(self)) self.batch_size = FLAGS.BATCH_SIZE self.save("model.config")
def check_cart(): frame = inspect.currentframe() current_function_name = inspect.getframeinfo(frame).function try: data = { 'id': ID, 'pw': PW, } r = requests.post('http://{}:{}/login'.format(HOST, PORT), data=data, allow_redirects=False) if 'set-cookie' not in r.headers: colorlog.error('"{}" failed'.format(current_function_name)) os._exit(1) cookie = r.headers['set-cookie'] cookie = [i for i in cookie.split(';') if 'connect.sid' in i][0] headers = { 'Cookie': cookie, } r = requests.get( 'http://{}:{}/product/add?product-id=1&product-num=8'.format( HOST, PORT), headers=headers) if r.status_code != 200: colorlog.error('"{}" failed'.format(current_function_name)) os._exit(1) r = requests.get('http://{}:{}/cart'.format(HOST, PORT), headers=headers) ids = [ i.rstrip().lstrip() for i in r.text.split('\n') if 'cart-id' in i ] for line in ids: id_num = map(int, re.findall(r'([0-9]+)', line)) if len(id_num) != 1: continue r = requests.post('http://{}:{}/cart/remove'.format(HOST, PORT), headers=headers, data={'cart_id': id_num}) if r.status_code != 200: colorlog.error('"{}" failed'.format(current_function_name)) os._exit(1) except Exception as e: colorlog.error('"{}" failed'.format(current_function_name)) os._exit(2) return colorlog.info('"{}" passed'.format(current_function_name)) return
def generate_node(self, node_ii: InstanceRoute, username: str, staging: bool) -> JsonNodeT: info("jukebox_example_handler, ii = {}".format(node_ii)) artist_list_ii = self.ds.parse_ii( "/example-jukebox:jukebox/library/artist", PathFormat.URL) jb_artists = self.ds.get_data_root().goto(artist_list_ii).value return len(jb_artists)
def put_api(self, headers: OrderedDict, data: Optional[str], client_cert: SSLCertT) -> HttpResponse: username = ClientHelpers.get_username(client_cert, headers) info("[{}] api_put: {}".format(username, headers[":path"])) api_pth = headers[":path"][len(config.CFG.api_root_data):] http_resp = self._put(api_pth, username, data) return http_resp
def init_param(self): super().init_param() colorlog.info( "[init_param] for {}: meta parameters: uniform_ [-0.01, 0.01]". format(self.__class__.__name__)) for name, num_meta in self.args.meta_units: colorlog.info("\t {} intialized".format(name)) nn.init.uniform_(getattr(self, name).weight, -0.01, 0.01)
def init_param(self): colorlog.info("[init_param] for {}".format(self.__class__.__name__)) for p in self.parameters(): if p.requires_grad: if len(p.shape) > 1: nn.init.xavier_uniform_(p) else: nn.init.constant_(p, 0)
def get_api_running(self, headers: OrderedDict, data: Optional[str], client_cert: SSLCertT) -> HttpResponse: username = CertHelpers.get_field(client_cert, "emailAddress") info("[{}] api_get_running: {}".format(username, headers[":path"])) api_pth = headers[":path"][len(config.CFG.api_root_running_data):] http_resp = self._get(headers, api_pth, username, staging=False) return http_resp
def delete_api(self, headers: OrderedDict, data: Optional[str], client_cert: SSLCertT) -> HttpResponse: username = CertHelpers.get_field(client_cert, "emailAddress") info("[{}] api_delete: {}".format(username, headers[":path"])) api_pth = headers[":path"][len(config.CFG.api_root_data):] http_resp = self._delete(api_pth, username) return http_resp
def annotate_all(self): """Annotates dataframes after read_annotation""" logger.info("Annotating data") self.genomic_df = self.genomic_df.merge(self.annotation_df, how='left', on=['IDENTIFIER']) self.genomic_df = self._string_split(self.genomic_df, 'GENE', ',') self.annotate = True
def _delete(ds: BaseDatastore, pth: str, username: str) -> HttpResponse: url_split = pth.split("?") url_path = url_split[0] rpc1 = RpcInfo() rpc1.username = username rpc1.path = url_path.rstrip("/") # Skip NACM check for privileged users if username in CONFIG_NACM["ALLOWED_USERS"]: rpc1.skip_nacm_check = True try: ds.lock_data(username) try: try: staging_root = ds.get_data_root_staging(rpc1.username) except StagingDataException: info("Starting transaction for user \"{}\"".format( rpc1.username)) ds.make_user_journal(rpc1.username, None) staging_root = ds.get_data_root_staging(rpc1.username) new_root = ds.delete_node_rpc(staging_root, rpc1) ds.add_to_journal_rpc(ChangeType.DELETE, rpc1, None, *new_root) http_resp = HttpResponse.empty(HttpStatus.NoContent, status_in_body=False) except NacmForbiddenError as e: http_resp = HttpResponse.error(HttpStatus.Forbidden, RestconfErrType.Protocol, ERRTAG_ACCDENIED, exception=e) except (NonexistentSchemaNode, NonexistentInstance) as e: http_resp = HttpResponse.error(HttpStatus.NotFound, RestconfErrType.Protocol, ERRTAG_INVVALUE, exception=e) except NoHandlerError as e: http_resp = HttpResponse.error(HttpStatus.BadRequest, RestconfErrType.Protocol, ERRTAG_OPNOTSUPPORTED, exception=e) except (InstanceValueError, StagingDataException, YangsonException) as e: http_resp = HttpResponse.error(HttpStatus.BadRequest, RestconfErrType.Protocol, ERRTAG_INVVALUE, exception=e) except DataLockError as e: http_resp = HttpResponse.error(HttpStatus.Conflict, RestconfErrType.Protocol, ERRTAG_LOCKDENIED, exception=e) finally: ds.unlock_data() return http_resp
def load_model(model, ckpt, session, name): start_time = time.time() model.saver.restore(session, ckpt) session.run(tf.tables_initializer()) colorlog.info( "Loaded %s model from %s, time %.2fs" % ( name, ckpt, time.time() - start_time ) ) return model