def main(): logger = create_logger(__name__) parser = argparse.ArgumentParser() parser.add_argument('--glove_path', type=str, default="data/glove.840B.300d.txt") parser.add_argument('--train_path', type=str, default="data/squad_train_v.csv") parser.add_argument('--test_path',type=str, default="data/squad_dev_v.csv") parser.add_argument('--meta_path', type=str, default="resource/meta.pkl") parser.add_argument('--source_path', type=str, default="resource") config = parser.parse_args() logger.info("loading data...") train_data = DataFrame(pd.read_csv(config.train_path), columns=columns) test_data = DataFrame(pd.read_csv(config.test_path), columns=columns) glove_vocab = load_glove_vocab(config.glove_path, dim=300) merged_data = pd.concat([train_data, test_data]) vocab, tag_vocab, ner_vocab, char_vocab = build_vocab(merged_data, glove_vocab, tagner_on=True) dump_data(char_vocab, "resource/char_vocab.pkl") logger.info("building embedding...") glove_embedding = build_embedding(config.glove_path, vocab) logger.info("dumping meta data...") meta = {"vocab": vocab, 'tag_vocab': tag_vocab, 'ner_vocab': ner_vocab, 'embedding': glove_embedding} dump_data(meta, config.meta_path) vocab, tag_vocab, ner_vocab, char_vocab, embedding = load_meta_(config.meta_path) logger.info("building train data...") train_input_path = os.path.join(config.source_path, "addexm_train_input.txt") build_data(train_data, vocab, tag_vocab, ner_vocab, char_vocab, fout=train_input_path) logger.info("building test data...") test_input_path = os.path.join(config.source_path, "addexm_test_input.txt") build_data(test_data, vocab, tag_vocab, ner_vocab, char_vocab, fout=test_input_path)
def __init__(self, opt): self.opt = opt self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web') self.img_dir = os.path.join(self.web_dir, 'images') self.log_dir = os.path.join(opt.log_dir, opt.name) util.mkdirs([self.web_dir, self.img_dir, self.log_dir]) log_name = 'train{}.log'.format( datetime.now().strftime("%Y%m%d-%H%M%S")) self.logger = create_logger(os.path.join(self.log_dir, log_name)) self.logger.info('============ Initialized logger ============') self.logger.info('\n'.join( '%s: %s' % (k, str(v)) for k, v in sorted(dict(vars(opt)).items()))) self.tb_logger = Logger(os.path.join(opt.log_dir, opt.name))
from util.text_utils import normalize_text, END, STA import numpy as np import spacy import json from tqdm import tqdm from util.logger import create_logger from allennlp.modules.elmo import batch_to_ids from allennlp.data.token_indexers.elmo_indexer import ELMoCharacterMapper logger = create_logger(__name__) NLP = spacy.load('en', disable=['vectors', 'textcat', 'parser']) def build_embedding(emb_path, vocab, dim=300): vocab_size = len(vocab) embedding = np.zeros((vocab_size, dim)) with open(emb_path, 'r') as f: for line in tqdm(f, total=2196017): elements = line.split() tok = normalize_text(" ".join(elements[:-dim])) if tok in vocab: embedding[vocab[tok]] = [float(x) for x in elements[-dim:]] return embedding def build_glove_idx(doc, vocab): return [vocab[tok.text] for tok in doc if len(tok.text) > 0] def build_char_idx(doc, vocab): idx_list = []
import pandas as pd from util import logger logger_obj = logger.create_logger() def aggregate_df(df, group_param, aggr_param): """ function to aggregate values based on group attributes :param df: dataframe on which operation is to be performed :param group_param: argument(s) on which output will be grouped :param aggr_param: argument that will be aggregated :return: aggregated dataframe """ output_df = df if(len(aggr_param) == 0): logger_obj.error('Attribute to be summed is mandatory for the function') else: if (len(group_param) == 0): output_df = output_df.aggregate(aggr_param) else: output_df = output_df.groupby(group_param, axis=0).agg(aggr_param) logger_obj.info('Dataframe aggregated') # Reset dataframe index. Aggregation sets first column as index output_df = output_df.reset_index() return output_df def join_df(df1, df2, merge_type, join_keys, col_names): """
import tensorflow as tf import numpy as np import sys import os import time import argparse import src.facenet as facenet from src.align import detect_face import csv from os.path import isdir, isfile import random import logging from settings import MODEL_DIR, IMG_PATH from util.logger import create_logger logger = create_logger('model', logging.DEBUG, 'model.log') logger.info('Model directory: %s' % MODEL_DIR) meta_file, ckpt_file = facenet.get_model_filenames( os.path.expanduser(MODEL_DIR)) logger.info('Metagraph file: %s' % meta_file) logger.info('Checkpoint file: %s' % ckpt_file) time_check_1 = time.time() # set a facenet session facenet_session = tf.Session() facenet.load_model_with_session(facenet_session, MODEL_DIR, meta_file, ckpt_file) time_check_2 = time.time() logger.info("Loading facenet taken {} seconds".format(time_check_2 - time_check_1))
def __init__(self): self.logger_obj = logger.create_logger()
import os import configparser import logging from util.logger import create_logger setting_logger = create_logger('settings', logging.DEBUG, 'settings.log') dir_path = os.path.dirname(os.path.realpath(__file__)) try: # app_config.ini is a symbolic link to the config file config = configparser.ConfigParser() config.read(os.path.join(dir_path, 'app_config.ini')) IMG_PATH = config["file_path"]["img_path"] MODEL_DIR = config["file_path"]["model_dir"] except Exception as e: setting_logger.exception(e)
import logging import time from flask import Flask from flask import request from flask_cors import cross_origin from util.logger import create_logger from util.response import json_response, error from util.image import save_image from src import face_match_controller import os from settings import IMG_PATH app = Flask(__name__) logger = create_logger('app', logging.DEBUG, 'app.log') @app.route('/facematch/compare', methods=['POST']) @cross_origin() def compare_face(): if 'files' not in request.files: return json_response(error(400, "no files sent"), None) uploaded_files = request.files.getlist("files") faces = [] for f in uploaded_files: file_path = save_image(IMG_PATH, f) faces.append(file_path) try: