예제 #1
0
def main():
    logger = create_logger(__name__)
    parser = argparse.ArgumentParser()
    parser.add_argument('--glove_path', type=str, default="data/glove.840B.300d.txt")
    parser.add_argument('--train_path', type=str, default="data/squad_train_v.csv")
    parser.add_argument('--test_path',type=str, default="data/squad_dev_v.csv")
    parser.add_argument('--meta_path', type=str, default="resource/meta.pkl")
    parser.add_argument('--source_path', type=str, default="resource")
    config = parser.parse_args()

    logger.info("loading data...")
    train_data = DataFrame(pd.read_csv(config.train_path), columns=columns)
    test_data = DataFrame(pd.read_csv(config.test_path), columns=columns)

    glove_vocab = load_glove_vocab(config.glove_path, dim=300)
    merged_data = pd.concat([train_data, test_data])
    vocab, tag_vocab, ner_vocab, char_vocab = build_vocab(merged_data, glove_vocab, tagner_on=True)
    dump_data(char_vocab, "resource/char_vocab.pkl")

    logger.info("building embedding...")
    glove_embedding = build_embedding(config.glove_path, vocab)

    logger.info("dumping meta data...")
    meta = {"vocab": vocab, 'tag_vocab': tag_vocab, 'ner_vocab': ner_vocab, 'embedding': glove_embedding}
    dump_data(meta, config.meta_path)

    vocab, tag_vocab, ner_vocab, char_vocab, embedding = load_meta_(config.meta_path)
    logger.info("building train data...")
    train_input_path = os.path.join(config.source_path, "addexm_train_input.txt")
    build_data(train_data, vocab, tag_vocab, ner_vocab, char_vocab, fout=train_input_path)

    logger.info("building test data...")
    test_input_path = os.path.join(config.source_path, "addexm_test_input.txt")
    build_data(test_data, vocab, tag_vocab, ner_vocab, char_vocab, fout=test_input_path)
예제 #2
0
    def __init__(self, opt):
        self.opt = opt

        self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
        self.img_dir = os.path.join(self.web_dir, 'images')
        self.log_dir = os.path.join(opt.log_dir, opt.name)

        util.mkdirs([self.web_dir, self.img_dir, self.log_dir])

        log_name = 'train{}.log'.format(
            datetime.now().strftime("%Y%m%d-%H%M%S"))
        self.logger = create_logger(os.path.join(self.log_dir, log_name))
        self.logger.info('============ Initialized logger ============')
        self.logger.info('\n'.join(
            '%s: %s' % (k, str(v))
            for k, v in sorted(dict(vars(opt)).items())))

        self.tb_logger = Logger(os.path.join(opt.log_dir, opt.name))
예제 #3
0
from util.text_utils import normalize_text, END, STA
import numpy as np
import spacy
import json
from tqdm import tqdm
from util.logger import create_logger
from allennlp.modules.elmo import batch_to_ids
from allennlp.data.token_indexers.elmo_indexer import ELMoCharacterMapper

logger = create_logger(__name__)
NLP = spacy.load('en', disable=['vectors', 'textcat', 'parser'])


def build_embedding(emb_path, vocab, dim=300):
    vocab_size = len(vocab)
    embedding = np.zeros((vocab_size, dim))
    with open(emb_path, 'r') as f:
        for line in tqdm(f, total=2196017):
            elements = line.split()
            tok = normalize_text(" ".join(elements[:-dim]))
            if tok in vocab:
                embedding[vocab[tok]] = [float(x) for x in elements[-dim:]]
    return embedding


def build_glove_idx(doc, vocab):
    return [vocab[tok.text] for tok in doc if len(tok.text) > 0]


def build_char_idx(doc, vocab):
    idx_list = []
import pandas as pd

from util import logger

logger_obj = logger.create_logger()

def aggregate_df(df, group_param, aggr_param):
    """
    function to aggregate values based on group attributes
    :param df: dataframe on which operation is to be performed
    :param group_param: argument(s) on which output will be grouped
    :param aggr_param: argument that will be aggregated
    :return: aggregated dataframe
    """
    output_df = df
    if(len(aggr_param) == 0):
        logger_obj.error('Attribute to be summed is mandatory for the function')
    else:
        if (len(group_param) == 0):
            output_df = output_df.aggregate(aggr_param)
        else:
            output_df = output_df.groupby(group_param, axis=0).agg(aggr_param)

    logger_obj.info('Dataframe aggregated')

    # Reset dataframe index. Aggregation sets first column as index
    output_df = output_df.reset_index()
    return output_df

def join_df(df1, df2, merge_type, join_keys, col_names):
    """
예제 #5
0
import tensorflow as tf
import numpy as np
import sys
import os
import time
import argparse
import src.facenet as facenet
from src.align import detect_face
import csv
from os.path import isdir, isfile
import random
import logging
from settings import MODEL_DIR, IMG_PATH
from util.logger import create_logger

logger = create_logger('model', logging.DEBUG, 'model.log')

logger.info('Model directory: %s' % MODEL_DIR)
meta_file, ckpt_file = facenet.get_model_filenames(
    os.path.expanduser(MODEL_DIR))
logger.info('Metagraph file: %s' % meta_file)
logger.info('Checkpoint file: %s' % ckpt_file)

time_check_1 = time.time()
# set a facenet session
facenet_session = tf.Session()
facenet.load_model_with_session(facenet_session, MODEL_DIR, meta_file,
                                ckpt_file)
time_check_2 = time.time()
logger.info("Loading facenet taken {} seconds".format(time_check_2 -
                                                      time_check_1))
예제 #6
0
 def __init__(self):
     self.logger_obj = logger.create_logger()
예제 #7
0
파일: settings.py 프로젝트: MrXu/facenet
import os
import configparser
import logging
from util.logger import create_logger

setting_logger = create_logger('settings', logging.DEBUG, 'settings.log')
dir_path = os.path.dirname(os.path.realpath(__file__))

try:
    # app_config.ini is a symbolic link to the config file
    config = configparser.ConfigParser()
    config.read(os.path.join(dir_path, 'app_config.ini'))

    IMG_PATH = config["file_path"]["img_path"]
    MODEL_DIR = config["file_path"]["model_dir"]
except Exception as e:
    setting_logger.exception(e)
예제 #8
0
import logging
import time
from flask import Flask
from flask import request
from flask_cors import cross_origin

from util.logger import create_logger
from util.response import json_response, error
from util.image import save_image
from src import face_match_controller
import os
from settings import IMG_PATH

app = Flask(__name__)

logger = create_logger('app', logging.DEBUG, 'app.log')


@app.route('/facematch/compare', methods=['POST'])
@cross_origin()
def compare_face():
    if 'files' not in request.files:
        return json_response(error(400, "no files sent"), None)
    uploaded_files = request.files.getlist("files")

    faces = []
    for f in uploaded_files:
        file_path = save_image(IMG_PATH, f)
        faces.append(file_path)

    try: