Пример #1
0
    def __init__(self, page_url, html_string=None):
        init_logger(self)
        self.opener = urllib2.build_opener()
        self.opener.addheaders = [('user-agent', USER_AGENT)]

        if html_string:
            self.html = html_string
        else:
            assert page_url
            self.logger.debug(u'Fetching {0}'.format(page_url))
            self.html = self.opener.open(page_url).read()

        self.soup = BeautifulSoup.BeautifulSoup(self.html)
Пример #2
0
 def __init__(self,
              config_path=os.path.join(os.environ['CONFROOT'], 'main.conf'),
              refresh_time=900):
     self.config = {}
     self.update_time = 0
     self.refresh_time = refresh_time
     self.path = config_path
     self.log = utils.init_logger('ConfigParser', {'syslog', 'stream'},
                                  verbosity='1')
     self.log.info('Created instance of Config parser.')
Пример #3
0
    def __init__(self, auth, sub, dry_run=False):
        init_logger(self)
        self.dry_run = dry_run
        self.sub = sub
        self.sleeper = Sleeper(self.logger)

        # prepare
        cookies = cookielib.CookieJar()
        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies))
        self.opener.addheaders = [('user-agent', USER_AGENT)]

        if dry_run:
            return

        # login
        result = self.api_call('/api/login', user=auth.reddit_login, passwd=auth.reddit_password)
        if not any([True for c in cookies if c.name == 'reddit_session']):
            self.logger.error('Login failure, result: {0}'.format(result))
            raise RuntimeError('Login failure')

        self.logger.debug('Successfully logged in')

        me = self.api_call('/api/me.json')
        self.opener.addheaders.append(('x-modhash',  me['data']['modhash']))
Пример #4
0
            else:
                pass  # deal with bad lines of text here
    return data


def init_arg():
    parser = argparse.ArgumentParser()
    parser.add_argument("-o", default='.')
    parser.add_argument("--itout", default=5, type=int)
    return parser.parse_args()


##### Main settings
args = init_arg()
odir = 'Synthetic'
logger = utils.init_logger(odir, 'log_cs_marginal_deeppseudo_sum.txt')
OUT_ITERATION = args.itout
data_mode = 'Synthetic'
num_Event = 2  #causes of the event
evalTime = [12, 60]  # evalution times (for C-index and Brier-Score)
in_path = odir + '/results/'

if not os.path.exists(in_path):
    os.makedirs(in_path)

WEIGHTED_C_INDEX = np.zeros([num_Event, len(evalTime), OUT_ITERATION])
WEIGHTED_BRIER_SCORE = np.zeros([num_Event, len(evalTime), OUT_ITERATION])

for out_itr in range(OUT_ITERATION):

    ## Define a list of continuous columns from the covariates
Пример #5
0
import smtplib
from email.mime.text import MIMEText
from email.header import Header
import time
from sqlalchemy import create_engine, Column, String, Integer
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
import requests
from lxml import etree
from retrying import retry
import utils
logger = utils.init_logger('data-montior_mail')
import traceback

CONSTR = 'mysql+pymysql://root:[email protected]:3306/xxxxx?charset=utf8'
engine = create_engine(CONSTR, echo=False)
DBSession = sessionmaker(bind=engine)
session = DBSession()
Base = declarative_base()


class Mybase(Base):
    __tablename__ = 'decryption-tools'
    id = Column(Integer, name='Id', primary_key=True, autoincrement=True)
    name = Column(String(255), nullable=False)

    def __repr__(self):
        return "{}".format(self.name)

parser.add_argument("--repeat", type=int, default=5)

args = parser.parse_args()

if __name__ == "__main__":
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)

    args.save = os.path.join("experiments", args.save)
    if os.path.exists(args.save) and args.load == "none":
        raise NameError("previous experiment '{}' already exists!".format(
            args.save))
    os.makedirs(args.save)

    logger = init_logger(logpath=args.save,
                         experiment_name="logs-" + args.model)
    logger.info(args)

    args.device = torch.device(
        "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu")
    train_loader, test_loader, train_eval_loader = get_cifar10_loaders(
        data_aug=True, batch_size=args.tbsize)

    model = cifar_model(args.model, layers=args.block, norm_type=args.norm)
    logger.info(model)
    model.to(args.device)

    loader = {
        "train_loader": train_loader,
        "train_eval_loader": train_eval_loader,
        "test_loader": test_loader
Пример #7
0
# -*- coding: UTF-8 -*-
from app import app, db
import re, time, json
from app.models import *
from flask import render_template, redirect, session, url_for, request, g, jsonify, flash, make_response
import traceback
import sys, os
import utils
logger = utils.init_logger('hfs_web')
query_data_url = []
query_data_md5 = []
sys.path.append("..")
from hfs_down import get_hfs_down_file_url
from hfs_down import get_url_id_by_hfs_host_url
from hfs_down import get_host_id_by_host_port
_dir = os.path.dirname(os.getcwd())
white_file = os.path.join(_dir, 'white_list.txt')
white_list = []
if os.path.exists(white_file):
    try:
        with open(white_file, 'r') as fp:
            for i in fp:
                white_list.append(i.strip())
    except:
        logger.error(traceback.format_exc())


@app.route('/', methods=['GET', 'POST'])
def hfs_host_main():
    global query_data_url
    query_data_url = []
Пример #8
0
app.add_processor(web.loadhook(header_html))


def notfound():
    web.ctx.status = '404 Not Found'
    return web.notfound(str(render._404()))
app.notfound = notfound


def internalerror():
    web.ctx.status = '500 Internal Server Error'
    logging.exception("this is an internalerror")
    return web.internalerror(str(render._500()))
app.internalerror = internalerror

# 让子应用也可以使用session, global_render, db
def global_hook():
    web.ctx.session = session
    web.ctx.global_render = render
    web.ctx.global_db = db
app.add_processor(web.loadhook(global_hook))

app.add_processor(web.loadhook(utils.filter_input_loadhook))
wsgiapp = app.wsgifunc()

utils.init_logger(config.log_path, config.log_level, console=True)
qqlogin.init(config.qq_app_id, config.qq_app_key, config.qq_callback, on_qq_logined)

if __name__ == '__main__':
    app.run()
Пример #9
0
import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
sess = tf.Session(config=config)
set_session(
    sess)  # set this TensorFlow session as the default session for Keras

from model import getAState
from model import getPolicy
from model import getConv2DClassifier
from queryStrategy import *

start_time = time.time()
args = utils.get_args()
logger = utils.init_logger()

QUERY = args.query_strategy

policyname = args.policy_path

DATASET_NAME = QUERY + "_transfer_" + args.dataset_name

EPISODES = args.episodes
k_num = args.k
BUDGET = args.annotation_budget

EMBEDDING_SIZE = 32
NUM_CLASSES = 10

policyname = args.policy_path
Пример #10
0
                pass # deal with bad lines of text here    
    return data


def init_arg():
    parser = argparse.ArgumentParser()
    parser.add_argument("-o", default='.')
    parser.add_argument("--itout", default=5, type=int)
    return parser.parse_args()



##### Main settings
args = init_arg()
odir = 'Synthetic'
logger = utils.init_logger(odir, 'log_cs_conditional_deeppseudo_sum.txt')
OUT_ITERATION               = args.itout
data_mode                   = 'Synthetic'
num_Event                   =  2        #causes of the event
evalTime                    = [12, 60]  # evalution times (for C-index and Brier-Score)
in_path = odir + '/results/'


if not os.path.exists(in_path):
    os.makedirs(in_path)



WEIGHTED_C_INDEX  = np.zeros([num_Event, len(evalTime), OUT_ITERATION])
WEIGHTED_BRIER_SCORE = np.zeros([num_Event, len(evalTime), OUT_ITERATION])
Пример #11
0
import req_query_userid
import req_query_inviter
import req_user_info
import req_update_user_info
import req_update_inviter


urls = (
    '/register', req_register.Handler,
    '/bind_phone', req_bind_phone.Handler,
    '/userid', req_query_userid.Handler,
    '/inviter', req_query_inviter.Handler,
    '/user_info', req_user_info.Handler,
    '/update_user_info', req_update_user_info.Handler,
    '/update_inviter', req_update_inviter.Handler,
)

web.config.debug = True
#web.internalerror = web.debugerror

utils.init_logger('../log/account.log')

app = web.application(urls, globals(), autoreload=False)

if __name__ == '__main__':
    app.run()
else:
    application = app.wsgifunc()


Пример #12
0
from Queue import Empty
import RPi.GPIO as GPIO

from db import DB
from maps_repo import MapsRepo
from prompts_enum import PromptDirn
from audio_driver import AudioDriver
from step_counter import StepCounter
from heading_calculator import HeadingCalculator
from camera import camera
import utils
from utils import CommonLogger, init_logger


LOG_FILENAME = "/home/pi/logs/navi.log"
logger = init_logger(logging.getLogger(__name__), LOG_FILENAME)
sys.stdout = CommonLogger(logger, logging.INFO)
sys.stderr = CommonLogger(logger, logging.ERROR)

STEP_LENGTH = 40.0
ANGLE_THRESHOLD = 10
FOOT_SENSOR_ID = 0
BACK_SENSOR_ID = 1
GPIO_OVERRIDE_PIN = 17
QUEUE = Queue()


class Navigator(object):
    def __init__(self, logger):
        self.log = logger
        self.log.info("Starting navigator...")
Пример #13
0
# -*- coding: utf-8 -*-
"""
    myapp
    ~~~
"""

from flask import Flask

from utils import init_logger

app = Flask(__name__)

init_logger(app)

@app.route('/')
def index():
    return 'Hello, World!'


@app.route('/error')
def err():
    raise Exception('Something bad happened!')
Пример #14
0
connections from Pool._connection_pool. After connection is closed
it returns to Pool._connection_pool.
"""
import psycopg2 as psc
import time

from collections import deque
from contextlib import contextmanager
from threading import RLock

from config_parser import Config
from utils import init_logger, Singleton

CONFIGS = Config().get_config()

LOGGER = init_logger('DB', {'stream', 'syslog'}, CONFIGS['logging.verbosity'])


class DBPool(object):

    """DBPool class represents DB pool, which
    handles and manages work with database
    connections.
    """

    __metaclass__ = Singleton

    def __init__(self, configs, pool_size):
        if not isinstance(pool_size, int) or pool_size <= 0:
            raise ValueError('Bad value of POOL_SIZE!')
        if not isinstance(configs['ttl'], int) or configs['ttl'] <= 0:
Пример #15
0
from flask import Flask

from config_parser import Config
from utils import init_logger

CONFIGS = Config().get_config()

app = Flask('vocab_trainer')

app.config['TEMPLATE_DIR'] = CONFIGS['dirs.template_dir']
app.config['SECRET_KEY'] = CONFIGS['server.secret_key']

logger = init_logger('vocab-trainer', ['file'],
                     verbosity=CONFIGS['logging.verbosity'],
                     log_file='/tmp/vocab_trainer.log')
Пример #16
0
    res = []
    i = 0
    for xx, yy in zip(x,y):
        if yy == 'b' and i>0:
            res.append(',')
        res.append(xx)
        i += 1
    return ''.join(res)


if __name__ == '__main__':
    # Combine command-line arguments and yaml file arguments
    opt = opts.model_opts()
    config = yaml.load(open(opt.config, "r"))
    config = Namespace(**config, **vars(opt))
    logger = init_logger("torch", logging_path='')
    logger.info(config.__dict__)

    device, devices_id = misc_utils.set_cuda(config)
    config.device = device

    TEXT = data.Field(sequential=True, use_vocab=False, batch_first=True, unk_token=utils.UNK,
                      include_lengths=True, pad_token=utils.PAD, preprocessing=to_int, )
    # init_token=utils.BOS, eos_token=utils.EOS)
    LABEL = data.Field(sequential=True, use_vocab=False, batch_first=True, unk_token=utils.UNK,
                       include_lengths=True, pad_token=utils.PAD, preprocessing=to_int, )
    # init_token=utils.BOS, eos_token=utils.EOS)

    fields = [("text", TEXT), ("label", LABEL)]
    validDataset = datasets.SequenceTaggingDataset(path=os.path.join(config.data, 'valid.txt'),
                                                   fields=fields)
Пример #17
0
import logging
import environment
import utils

import numpy as np
import random
import matplotlib.pyplot as plt

# The logger
utils.init_logger(logging.DEBUG, fileName="log/app.log")
logger = logging.getLogger('Easy21')

# set the random seed
random.seed(a=None, version=2)

# constants
alpha = 0.01  # step size
epsilon = 0.05  # exploration
lam_range = np.arange(0, 1.1, 0.1)
n_iter = 10000  # number of episodes

# define the indices of the different values
q_hit_index = 0  # q value for action hit
q_stick_index = 1  # q value for action stick
e_hit_index = 2  # eligibility trace for the hit action
e_stick_index = 3  # eligibility trace for the stick action

# initialize the value function approximation, the value is 1 if the state lies within the defined intervals
# x-index: dealer card approximation [1; 4] [4; 7] [7; 10]
# y-index: player sum approximation  [1; 6] [4; 9] [7; 12] [10; 15] [13; 18] [16; 21]
# z        all properties for this state
Пример #18
0
    r'<p class="wr_bookList_item_author"><a href=(.*?)</a>')
# 书名
findTitle = re.compile(r'<p class="wr_bookList_item_title">(.*)</p>')
#书籍封面
findCover = re.compile(
    r'<img alt="书籍封面" class="wr_bookCover_img" src="(.*?)"/>')
#简介
findIntro = re.compile(r'<p class="wr_bookList_item_desc">(.*)</p>', re.S)
# 评分
findScore = re.compile(
    r'<span class="wr_bookList_item_starString">([\d+\.]+)</span>')
#今日阅读人数
findReaderNumber = re.compile(
    r'<em class="wr_bookList_item_reading_number">([\d+\.]+)</em>')

logger = init_logger(log_file='.\\微信读书.log')


def get_info(url):
    """
    爬取数据
    :param url: 所要爬取的网页的url
    :return: 返回爬取到数据的列表
    """
    datalist = []
    html = requestURL(url)  # 保存网页源码
    logger.info('获取网页源码...')
    soup = BeautifulSoup(html, "html.parser")
    booklist = soup.find('ul', class_="ranking_content_bookList").select('li')
    logger.info('获得书籍列表...')
    for item in booklist:
Пример #19
0
import requests
from socks import SOCKS5
from telethon import TelegramClient
from telethon.events import NewMessage
from telethon.tl.custom import Message
from nltk.corpus import stopwords
from pymystem3 import Mystem
from string import punctuation
from telethon.tl.types import User, Channel
from requests.exceptions import ConnectionError

from config import APP_API_HASH, APP_API_ID, PHONE_NUMBER, SETTINGS_FILE, \
    PROXY_HOST, PROXY_PORT, PROXY_USERNAME, PROXY_PASS
from utils import init_logger

logger = init_logger()

try:
    response = requests.get('https://api.telegram.org')
    proxy = None
except ConnectionError as e:
    proxy = (SOCKS5, PROXY_HOST, PROXY_PORT, True, PROXY_USERNAME, PROXY_PASS)

client = TelegramClient(PHONE_NUMBER.strip('+'),
                        APP_API_ID,
                        APP_API_HASH,
                        proxy=proxy,
                        base_logger=logger)


def get_settings() -> Dict:
Пример #20
0
from datetime import datetime, timedelta
#from datetime import timedelta
from database.models import AWSInstance, AWSPrices, AWSInstancePrice, AWSInstanceWorkLoad, AWSSummary
from database.utils import check_if_exist, simple_query_count, simple_sum, simple_query

from utils import init_logger
import config

logger = init_logger(__name__, testing_mode=config.DEBUG_MODE)


class WriteData(object):

    base_datetime = datetime.now().date()

    def __init__(self, conn, workload_tag):
        self.conn = conn
        self.workload_tag = workload_tag

    # --- INSTANCE ---#
    def save_instance(self, dict_instance_details, workload_profile):
        saved = False
        geoJson = None
        try:
            instance_type = dict_instance_details['deep_details'][
                'Instance_type'],
            instance_id = dict_instance_details['instance_id']
            aws_region = dict_instance_details['aws_region']

            lat = None
            log = None
    writer.close()
    if args.adv_save :
        if not os.path.exists(os.path.join(args.load, args.attack+"_"+str(args.eps))) :
            os.makedirs(os.path.join(args.load, args.attack+"_"+str(args.eps)))
        with open(os.path.join(args.load, args.attack+"_"+str(args.eps), "adversary.pkl"), "wb") as f :
            pickle.dump(adv_saver,f)

    logger.info("Attacked Accuracy : {:.4f}".format(adv_acc))
    logger.info("Attacked Loss : {:.4f}".format(adv_loss))
    logger.info("Finished")
    logger.info("="*80)
 
if __name__ == "__main__" :
    args.device = torch.device("cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu")
    logger = init_logger(logpath=args.load, experiment_name="attack-"+str(args.attack)+"-"+str(args.eps))
    in_channels = 1 if args.eval=="mnist" else 3

    if args.eval == "mnist" or args.eval == "norm" :
        from model.mnist import mnist_model
        model = mnist_model(args.model, layers=args.block, norm_type=args.norm)
    elif args.eval == "cifar10" :
        from model.cifar10 import cifar_model
        model = cifar_model(args.model, layers=args.block, norm_type=args.norm)
    logger.info(args)
    logger.info(model)
    model.to(args.device)

    if args.crit == "acc" :
        model_dict = torch.load(os.path.join(args.load,"model_acc.pt"), map_location=str(args.device))
    elif args.crit == "last" :
Пример #22
0
# classifier arguments
parser.add_argument('--lr', type=float, default=0.)
parser.add_argument('--wd', type=float, default=0.)
parser.add_argument('--batch_size', type=int, default=0)
parser.add_argument('--n_epoch', type=int, default=0)
args = parser.parse_args()

np.set_printoptions(linewidth=150, precision=4, suppress=True)
th.set_printoptions(linewidth=150, precision=4)

FN = th.from_numpy
join = os.path.join
logger = logging.getLogger()

utils.prepare_directory(args.exp_root, force_delete=True)
utils.init_logger(join(args.exp_root, 'program.log'))
utils.write_args(args)

dset = data.XianDataset(args.data_dir,
                        args.mode,
                        feature_norm=args.feature_norm)
_X_s_tr = FN(dset.X_s_tr).to(args.device)
_Y_s_tr = FN(dset.Y_s_tr).to(args.device)
_X_s_te = FN(dset.X_s_te).to(args.device)
_Y_s_te = FN(dset.Y_s_te).to(args.device)
_X_u_te = FN(dset.X_u_te).to(args.device)
_Y_u_te = FN(dset.Y_u_te).to(args.device)
_Cu = FN(dset.Cu).to(args.device)
_Sall = FN(dset.Sall).to(args.device)

train_iter = data.Iterator([_X_s_tr, _Y_s_tr],
Пример #23
0
import os
from uuid import uuid4
from utils import get_config_params, _run, clean_environ, init_logger
import logging

logger = logging.getLogger(__name__)
init_logger(logger)

VYOS_SHELL_API = get_config_params('bin', 'shell_api_path')
VYOS_SBIN_DIR = get_config_params('bin', 'vyos_sbin_dir')
VYOS_SAVE_SCRIPT = 'vyatta-save-config.pl'

# Create/Get the logger object
#logger = init_logger()

class SessionAlreadyExists(Exception): pass
class SetupSessionFailed(Exception): pass
class OperationFailed(Exception): pass
class SessionNotExists(Exception): pass

class Session(object):
    """
    Return the session instance if exists. Else, create new one.
    SessionAlreadyExists exception raised on the second instantiation.
    """
    _ref = None
    def __new__(cls, *args, **kw):
        if cls._ref is not None:
            raise SessionAlreadyExists('A session exist already !')
        cls._ref = super(Session, cls).__new__(cls, *args, **kw)
        return cls._ref
Пример #24
0
from os import path
import argparse
import chainer
import copy
import numpy as np

from dataset import COL_BASIC_FEATURES
from dataset import FactIterator, Vocabulary, read_dataset, get_idx2vec, get_values, replace_by_dic
from models import LinearEnsembler, MLPEnsembler
from utils import init_logger
from utils import set_random_seed
from utils import standardize_vectors
from utils import find_greatest_divisor

verbose = False
logger = init_logger('Ensember')

# Directories
dir_scripts = path.dirname(path.dirname(path.dirname(path.abspath(__file__))))
dir_root = path.dirname(dir_scripts)
dir_data = path.join(dir_root, 'data')


class Classifier(Chain):
    """Calculate loss."""
    def __init__(self,
                 predictor,
                 label2fact,
                 en2ja,
                 idx2vec,
                 margin=1.0,
Пример #25
0
def main():
    # print("Starting DFC2021 baseline training script at %s" % (str(datetime.datetime.now())))
    #-------------------
    # Setup
    #-------------------
    assert os.path.exists(args.train_fn)
    assert os.path.exists(args.valid_fn)

    now_time = datetime.datetime.now()
    time_str = datetime.datetime.strftime(now_time, '%m-%d_%H-%M-%S')
    # output path
    # output_dir = Path(args.output_dir).parent / time_str / Path(args.output_dir).stem
    output_dir = Path(args.output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)
    logger = utils.init_logger(output_dir / 'info.log')
    # if os.path.isfile(args.output_dir):
    #     print("A file was passed as `--output_dir`, please pass a directory!")
    #     return
    #
    # if os.path.exists(args.output_dir) and len(os.listdir(args.output_dir)):
    #     if args.overwrite:
    #         print("WARNING! The output directory, %s, already exists, we might overwrite data in it!" % (args.output_dir))
    #     else:
    #         print("The output directory, %s, already exists and isn't empty. We don't want to overwrite and existing results, exiting..." % (args.output_dir))
    #         return
    # else:
    #     print("The output directory doesn't exist or is empty.")
    #     os.makedirs(args.output_dir, exist_ok=True)

    if args.gpu is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    n_gpu = torch.cuda.device_count()
    device = torch.device('cuda:0' if n_gpu > 0 else 'cpu')
    device_ids = list(range(n_gpu))

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    #-------------------
    # Load input data
    #-------------------

    train_dataframe = pd.read_csv(args.train_fn)
    train_image_fns = train_dataframe["image_fn"].values
    train_label_fns = train_dataframe["label_fn"].values
    train_groups = train_dataframe["group"].values
    train_dataset = StreamingGeospatialDataset(
        imagery_fns=train_image_fns, label_fns=train_label_fns, groups=train_groups, chip_size=CHIP_SIZE,
        num_chips_per_tile=NUM_CHIPS_PER_TILE, transform=transform, nodata_check=nodata_check
    )

    valid_dataframe = pd.read_csv(args.valid_fn)
    valid_image_fns = valid_dataframe["image_fn"].values
    valid_label_fns = valid_dataframe["label_fn"].values
    valid_groups = valid_dataframe["group"].values
    valid_dataset = StreamingValidationDataset(
        imagery_fns=valid_image_fns, label_fns=valid_label_fns, groups=valid_groups, chip_size=CHIP_SIZE,
        stride=CHIP_SIZE, transform=transform, nodata_check=nodata_check
    )

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        num_workers=NUM_WORKERS,
        pin_memory=True,
    )
    valid_dataloader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=args.batch_size,
        num_workers=NUM_WORKERS,
        pin_memory=True,
    )

    num_training_images_per_epoch = int(len(train_image_fns) * NUM_CHIPS_PER_TILE)
    # print("We will be training with %d batches per epoch" % (num_training_batches_per_epoch))

    #-------------------
    # Setup training
    #-------------------
    # if args.model == "unet":
    #     model = models.get_unet()
    # elif args.model == "fcn":
    #     model = models.get_fcn()
    # else:
    #     raise ValueError("Invalid model")

    model = models.isCNN(args.backbone)

    weights_init(model, seed=args.seed)

    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.AdamW(trainable_params, lr=INIT_LR, amsgrad=True, weight_decay=5e-4)
    lr_criterion = nn.CrossEntropyLoss(ignore_index=0) # todo
    hr_criterion = hr_loss
    # criterion = balanced_ce_loss
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=3, min_lr=0.0000001)
    # factor=0.5, patience=3, min_lr=0.0000001
    logger.info("Trainable parameters: {}".format(utils.count_parameters(model)))

    #-------------------
    # Model training
    #-------------------
    train_loss_total_epochs, valid_loss_total_epochs, epoch_lr = [], [], []
    best_loss = 1e50
    num_times_lr_dropped = 0
    # model_checkpoints = []
    # temp_model_fn = os.path.join(output_dir, "most_recent_model.pt")

    for epoch in range(args.num_epochs):
        lr = utils.get_lr(optimizer)

        train_loss_epoch, valid_loss_epoch = utils.fit(
            model,
            device,
            train_dataloader,
            valid_dataloader,
            num_training_images_per_epoch,
            optimizer,
            lr_criterion,
            hr_criterion,
            epoch,
            logger)

        scheduler.step(valid_loss_epoch)

        if epoch % config.SAVE_PERIOD == 0 and epoch != 0:
            temp_model_fn = output_dir / 'checkpoint-epoch{}.pth'.format(epoch+1)
            torch.save(model.state_dict(), temp_model_fn)

        if valid_loss_epoch < best_loss:
            logger.info("Saving model_best.pth...")
            temp_model_fn = output_dir / 'model_best.pth'
            torch.save(model.state_dict(), temp_model_fn)
            best_loss = valid_loss_epoch

        if utils.get_lr(optimizer) < lr:
            num_times_lr_dropped += 1
            print("")
            print("Learning rate dropped")
            print("")

        train_loss_total_epochs.append(train_loss_epoch)
        valid_loss_total_epochs.append(valid_loss_epoch)
        epoch_lr.append(lr)
Пример #26
0
Файл: test.py Проект: yyht/daga
def main():
    """Main workflow"""
    args = utils.build_test_args(argparse.ArgumentParser())

    suff = ".test"
    if args.report_iw_nll:
        if (
            args.num_iw_samples > args.iw_batch_size
            and args.num_iw_samples % args.iw_batch_size != 0
        ):
            raise RuntimeError("Expected num_iw_samples divisible by iw_batch_size")
        suff = ".test.iw" + str(args.num_iw_samples)

    utils.init_logger(args.model_file + suff)
    logger.info("Config:\n%s", pformat(vars(args)))

    assert torch.cuda.is_available()
    torch.cuda.set_device(args.gpuid)

    utils.init_random(args.seed)

    logger.info("Load parameters from '%s'", args.model_file)
    params = torch.load(args.model_file, map_location=lambda storage, loc: storage)

    utils.set_params(params["args"])

    fields = utils.load_fields_from_vocab(params["vocab"])
    logger.info("Fields: %s", fields.keys())

    model = utils.build_test_model(fields, params)
    logger.info("Model:\n%s", model)

    logger.info("Load %s", args.test_file)
    test_data = LMDataset(fields, args.test_file, args.sent_length_trunc)
    logger.info("Test sentences: %d", len(test_data))

    test_iter = utils.OrderedIterator(
        dataset=test_data,
        batch_size=args.batch_size,
        device=params["args"].device,
        train=False,
        shuffle=False,
        repeat=False,
        sort=False,
        sort_within_batch=True,
    )

    if model.encoder is None:
        args.report_iw_nll = False
        logger.info("Force report_iw_nll to False")

    start_time = time.time()
    logger.info("Start testing")
    if args.report_iw_nll:
        if args.num_iw_samples <= args.iw_batch_size:
            n_iw_iter = 1
        else:
            n_iw_iter = args.num_iw_samples // args.iw_batch_size
            args.num_iw_samples = args.iw_batch_size

        test_stats = report_iw_nll(model, test_iter, n_iw_iter, args.num_iw_samples)
        logger.info(
            "Results: test nll %.2f | test ppl %.2f", test_stats.nll(), test_stats.ppl()
        )
    else:
        test_stats = validate(model, test_iter)
        logger.info(
            "Results: test nll %.2f | test kl %.2f | test ppl %.2f",
            test_stats.nll(),
            test_stats.kl(),
            test_stats.ppl(),
        )

    logger.info("End of testing: time %.1f min", (time.time() - start_time) / 60)
Пример #27
0
"""Calculate Hits@k for all method on all split.

Example:
    nice -n 19 python scripts/twa/eval/hits_wrapper.py /baobab/otani/cckbc/cn/20170814v1/ens/kbc-170816v2/split -v --by-relation
"""

import argparse
import numpy as np
import os

import hits
from utils import init_logger


verbose = False
logger = init_logger('EvalHits')


def main(args):
    global verbose
    verbose = args.verbose

    assert args.label in ['devel', 'test']

    methods = ['pmi', 'kbc', 'trans', 'kbc-trans']
    if args.method:
        methods = [args.method]
    results = {m: [] for m in methods}
    for d in os.listdir(args.dir_split):
        d = os.path.join(args.dir_split, d)
        if not os.path.isdir(d):
Пример #28
0
from configuration import ConfigManager
from execution import ExecEnvironment
from instance.manager import InstanceManager
from snapshot.manager import SnapshotManager
from instance.upgrade import UpgradeManager
from version.manager import VersionManager
from version.parser import VersionParser
from instance.parser import InstanceParser
from snapshot.parser import SnapshotParser

from utils import init_logger
from utils import parse_args

args = parse_args()
init_logger(args.verbose)
logger = logging.getLogger('Main')

cm = ConfigManager()
vm = VersionManager(cm)
im = InstanceManager(cm, vm)
sm = SnapshotManager(cm, vm, im)
um = UpgradeManager(cm, vm, im)
ex = ExecEnvironment()

vp = VersionParser(cm, vm, im, sm, um, ex)
ip = InstanceParser(cm, vm, im, sm, um, ex)
sp = SnapshotParser(cm, vm, im, sm, um, ex)

logger.debug('Arguments : {}'.format(args))
if (args.action in ['list', 'l']):
Пример #29
0
def main():
    args = parse_args()
    subdir = f'train_syn_hop_factor_{args.model_name}_nn_{args.neighbour}_at_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}'
    utils.init_logger('./logs/', subdir, print_log=False)
    logging.info(str(args))
    logdir = f'./tf_logs/{subdir}'
    writer = SummaryWriter(log_dir=logdir)

    nfeature_dim = 2
    if args.model_name == 'mp_nn_factor':
        model = factor_mpnn(nfeature_dim, [nfeature_dim**2, args.hop_order],
                            [64, 64, 128, 128, 256, 256, 128, 128, 64, 64, 2],
                            [16, 16])

        emodel_pw = torch.nn.Sequential(torch.nn.Conv2d(3, 64, 1),
                                        torch.nn.ReLU(inplace=True),
                                        torch.nn.Conv2d(64, 16, 1))
        emodel_high = torch.nn.Sequential(torch.nn.Conv2d(2, 64, 1),
                                          torch.nn.ReLU(inplace=True),
                                          torch.nn.Conv2d(64, 16, 1))

    def get_model_description():
        return str(model) + str(emodel_pw) + str(emodel_high)

    logging.info('model {} created'.format(get_model_description()))

    cap = args.hop_cap

    nn_idx_pw, efeature_pw = generate_pw_factor_table(args.chain_length)
    nn_idx_high, efeature_high = generate_high_factor_table(
        args.chain_length, args.hop_order)

    if args.use_cuda:
        nn_idx_pw = nn_idx_pw.cuda()
        efeature_pw = efeature_pw.cuda()
        nn_idx_high = nn_idx_high.cuda()
        efeature_high = efeature_high.cuda()

        model.cuda()
        emodel_pw.cuda()
        emodel_high.cuda()

    parameters = list(model.parameters()) + \
                list(emodel_pw.parameters()) + \
                list(emodel_high.parameters())

    # train_data_set = lib.data.RandomPGMHop(args.chain_length,
    #                                       ret_efeature_pw=False)

    # dataloader = torch.utils.data.DataLoader(train_data_set,
    #                                          batch_size=args.batch_size,
    #                                          shuffle=True,
    #                                          num_workers=8,
    #                                          worker_init_fn=worker_init_fn)

    train_dataset = lib.data.RandomPGMData(args.train_path,
                                           pgm_type="hops",
                                           size=args.train_size)
    test_dataset = lib.data.RandomPGMData(args.test_path,
                                          pgm_type="hops",
                                          size=args.test_size)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=8,
                                               worker_init_fn=worker_init_fn)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=8,
                                              worker_init_fn=worker_init_fn)

    optimizer = torch.optim.Adam(parameters, lr=3e-3)
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lr_lambda=lambda x: max(0.98**x, 1e-6))
    start_epoch = 0
    gcnt = 0
    if os.path.exists(args.model_path):
        ckpt = torch.load(args.model_path)
        model.load_state_dict(ckpt['model_state_dict'])
        emodel_pw.load_state_dict(ckpt['emodel_pw_state_dict'])
        emodel_high.load_state_dict(ckpt['emodel_high_state_dict'])
        optimizer.load_state_dict(ckpt['optimizer_state_dict'])
        scheduler.load_state_dict(ckpt['lr_sche'])

        start_epoch = ckpt['epoch']
        gcnt = ckpt['gcnt']

    def get_model_dict():
        return {
            'model_state_dict': model.state_dict(),
            'emodel_pw_state_dict': emodel_pw.state_dict(),
            'emodel_high_state_dict': emodel_high.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'lr_sche': scheduler.state_dict(),
            'epoch': epoch,
            'gcnt': gcnt
        }

    epoch = 0
    for epoch in tqdm(range(start_epoch, args.train_epoches)):
        torch.save(
            get_model_dict(),
            '{}_nn_factor_{}_epoches_{}.pt'.format(args.model_name,
                                                   args.neighbour, epoch))

        logging.info('save train result to {}'.format(
            '{}_nn_factor_{}_epoches_{}.pt'.format(args.model_name,
                                                   args.neighbour, epoch)))
        scheduler.step()

        loss_seq = []
        acc_seq = []
        acc_lp_seq = []
        for bcnt, (nfeature, pws, hops, nlabel,
                   lp_label) in tqdm(enumerate(train_loader)):
            optimizer.zero_grad()
            if args.use_cuda:
                nfeature, pws, hops, nlabel, lp_label \
                    = nfeature.cuda(), pws.cuda(), hops.cuda(), nlabel.cuda(), lp_label.cuda()
            if len(nfeature.shape) == 3:
                nfeature = nfeature.unsqueeze(-1)

            etype_pw = emodel_pw(efeature_pw)
            etype_high = emodel_high(efeature_high)
            bsize = nfeature.shape[0]

            pred, _ = model(nfeature, [pws, hops],
                            [[
                                nn_idx_pw.repeat(bsize, 1, 1),
                                etype_pw.repeat(bsize, 1, 1, 1)
                            ],
                             [
                                 nn_idx_high.repeat(bsize, 1, 1),
                                 etype_high.repeat(bsize, 1, 1, 1)
                             ]])

            pred = pred.squeeze(-1).permute(0, 2, 1).contiguous()
            loss = torch.nn.functional.cross_entropy(pred.view(-1, 2),
                                                     nlabel.view(-1))
            loss.backward()
            torch.nn.utils.clip_grad_norm(parameters, 1.0)

            optimizer.step()
            loss_seq.append(loss.item())
            gcnt += 1

            pred_int = pred.argmax(dim=-1)
            all_correct = torch.sum(pred_int == nlabel)
            lp_correct = torch.sum(lp_label == nlabel)
            acc = all_correct.item() / np.prod(nlabel.shape)
            lp_acc = lp_correct.item() / np.prod(nlabel.shape)

            acc_lp_seq.append(lp_acc)
            acc_seq.append(acc)

            if gcnt % 10 == 0:
                logging.info(
                    'epoch = {} bcnt = {} loss = {} acc = {} lp_acc={}'.format(
                        epoch, bcnt, np.mean(loss_seq), np.mean(acc_seq),
                        np.mean(acc_lp_seq)))
                writer.add_scalar('syn_train/loss', loss.item(), gcnt)
                writer.add_scalar('syn_train/acc', acc, gcnt)
                writer.add_scalar('syn_train/lp_acc', lp_acc, gcnt)
                loss_seq = []
                acc_seq = []
                acc_lp_seq = []

    if epoch == args.train_epoches - 1:
        epoch = args.train_epoches
        torch.save(
            get_model_dict(),
            '{}_nn_factor_{}_epoches_{}.pt'.format(args.model_name,
                                                   args.neighbour, epoch))

        logging.info('save train result to {}'.format(
            '{}_nn_factor_{}_epoches_{}.pt'.format(args.model_name,
                                                   args.neighbour, epoch)))
        logging.info('training done!')

    loss_seq = []
    acc_seq = []
    acc_lp_seq = []
    acc_global = []
    acc_lp_global = []
    gcnt = 0
    accum_acc = 0
    accum_acc_lp = 0
    model.eval()
    emodel_high.eval()
    emodel_pw.eval()
    for bcnt, (nfeature, pws, hops, nlabel,
               lp_label) in tqdm(enumerate(test_loader)):
        if args.use_cuda:
            nfeature, pws, hops, nlabel, lp_label \
                = nfeature.cuda(), pws.cuda(), hops.cuda(), nlabel.cuda(), lp_label.cuda()
        if len(nfeature.shape) == 3:
            nfeature = nfeature.unsqueeze(-1)

        etype_pw = emodel_pw(efeature_pw)
        etype_high = emodel_high(efeature_high)
        bsize = nfeature.shape[0]

        pred, _ = model(
            nfeature, [pws, hops],
            [[nn_idx_pw.repeat(bsize, 1, 1),
              etype_pw.repeat(bsize, 1, 1, 1)],
             [
                 nn_idx_high.repeat(bsize, 1, 1),
                 etype_high.repeat(bsize, 1, 1, 1)
             ]])

        pred = pred.squeeze(-1).permute(0, 2, 1).contiguous()
        loss = torch.nn.functional.cross_entropy(pred.view(-1, 2),
                                                 nlabel.view(-1))
        torch.nn.utils.clip_grad_norm(parameters, 1.0)

        loss_seq.append(loss.item())
        gcnt += 1

        pred_int = pred.argmax(dim=-1)
        all_correct = torch.sum(pred_int == nlabel)
        lp_correct = torch.sum(lp_label == nlabel)
        acc = all_correct.item() / np.prod(nlabel.shape)
        lp_acc = lp_correct.item() / np.prod(nlabel.shape)
        acc_global.append(acc)
        acc_lp_global.append(lp_acc)

        acc_lp_seq.append(lp_acc)
        acc_seq.append(acc)
        accum_acc += acc
        accum_acc_lp += lp_acc

        if gcnt % 10 == 0:
            logging.info(
                'epoch = {} bcnt = {} loss = {} acc = {} lp_acc={}'.format(
                    epoch, bcnt, np.mean(loss_seq), np.mean(acc_seq),
                    np.mean(acc_lp_seq)))
            writer.add_scalar('syn_test/loss', loss.item(), gcnt)
            writer.add_scalar('syn_test/acc', acc, gcnt)
            writer.add_scalar('syn_test/lp_acc', lp_acc, gcnt)
            loss_seq = []
            acc_seq = []
            acc_lp_seq = []
    logging.info(
        f'testing result: acc = {accum_acc / gcnt}, acc_lp = {accum_acc_lp / gcnt}'
    )
    logging.info(
        f'stddev = {st.stdev(acc_global)}, stddev_lp = {st.stdev(acc_lp_global)}'
    )
Пример #30
0
    argparser.add_argument('--report_step', type=int, default=1000)
    argparser.add_argument('--eval_step', type=int, default=3000)
    argparser.add_argument('--n_epoch', type=int, default=10)
    argparser.add_argument('--init_lr', type=float, default=5e-4)
    argparser.add_argument('--batch_size', type=int, default=64)
    argparser.add_argument('--embed_dim', type=int, default=32)
    argparser.add_argument('--hidden_size', type=int, default=32)
    argparser.add_argument('--n_mem', type=int, default=75135)
    argparser.add_argument('--n_ene', type=int, default=55396)
    argparser.add_argument('--n_group', type=int, default=482)
    argparser.add_argument('--n_topic', type=int, default=17129)
    argparser.add_argument('--n_output', type=int, default=2)
    args = argparser.parse_args()
    dir_check_list = [
        './log',
        './model',
        './model/{}'.format(args.task_name),
    ]
    for dir in dir_check_list:
        if not os.path.exists(dir):
            os.mkdir(dir)
    logger = utils.init_logger('./log/{}.log'.format(args.task_name))

    pt = prettytable.PrettyTable()
    pt.field_names = ['arg', 'val']
    for k, v in vars(args).items():
        pt.add_row([k, v])
    logger.info("\n" + str(pt))

    train()
def init_arg():
    parser = argparse.ArgumentParser()
    parser.add_argument("-o")
    parser.add_argument("--it", default=100000,
                        type=int)  #number of iterations. default is 100000
    parser.add_argument(
        "--itout", default=5,
        type=int)  #total number of the set of cross-validation data
    parser.add_argument("--itrs", default=30,
                        type=int)  #number of random searches. default is 30.
    return parser.parse_args()


args = init_arg()
odir = 'Synthetic'  #output directory
logger = utils.init_logger(odir, 'log_marginal_deeppseudo.txt')
data_mode = 'Synthetic'
##Main settings
OUT_ITERATION = args.itout
RS_ITERATION = args.itrs
logger.info('data_mode:{}'.format(data_mode))
iteration = args.it

#Evaluation Times
evalTime = [12, 60]

out_path = odir + '/results/'

for itr in range(OUT_ITERATION):
    if not os.path.exists(out_path + '/itr_' + str(itr) + '/'):
        os.makedirs(out_path + '/itr_' + str(itr) + '/')
Пример #32
0
    def __init__(self,
                 name='OpenEDS',
                 track='Semantic_Segmentation_Dataset',
                 isTrain=True,
                 resizedFactor=0.5,
                 logDir=None):
        self.name = name
        self.track = track

        self.numTrainImgs = 8916
        self.numValImgs = 2403
        self.numTestImgs = 1440

        self.numTrainPersons = 95
        self.numValPersons = 28
        self.numTestPersons = 29
        self.numClasses = 4

        self.decodeImgShape = (int(640 * resizedFactor),
                               int(400 * 2 * resizedFactor), 1)
        self.singleImgShape = (int(640 * resizedFactor),
                               int(400 * resizedFactor), 1)

        # TFrecord path
        self.trainPath = '../../Data/OpenEDS/{}/train_expand/train.tfrecords'.format(
            self.track)
        self.valPath = '../../Data/OpenEDS/{}/validation/validation.tfrecords'.format(
            self.track)
        self.testPath = '../../Data/OpenEDS/{}/test/test.tfrecords'.format(
            self.track)
        self.overfittingPath = '../../Data/OpenEDS/{}/overfitting/overfitting.tfrecords'.format(
            self.track)

        if isTrain:
            self.logger = logging.getLogger(__name__)  # logger
            self.logger.setLevel(logging.INFO)
            utils.init_logger(logger=self.logger,
                              logDir=logDir,
                              isTrain=isTrain,
                              name='dataset')

            self.logger.info('Dataset name: \t\t{}'.format(self.name))
            self.logger.info('Dataset track: \t\t{}'.format(self.track))
            self.logger.info('Num. of training imgs: \t{}'.format(
                self.numTrainImgs))
            self.logger.info('Num. of validation imgs: \t{}'.format(
                self.numValImgs))
            self.logger.info('Num. of test imgs: \t\t{}'.format(
                self.numTestImgs))
            self.logger.info('Num. of training persons: \t{}'.format(
                self.numTrainPersons))
            self.logger.info('Num. of validation persons: \t{}'.format(
                self.numValPersons))
            self.logger.info('Num. of test persons: \t{}'.format(
                self.numTestPersons))
            self.logger.info('Num. of classes: \t\t{}'.format(self.numClasses))
            self.logger.info('Decode image shape: \t{}'.format(
                self.decodeImgShape))
            self.logger.info('Single img shape: \t\t{}'.format(
                self.singleImgShape))
            self.logger.info('Training TFrecord path: \t{}'.format(
                self.trainPath))
            self.logger.info('Validation TFrecord path: \t{}'.format(
                self.valPath))
            self.logger.info('Test TFrecord path: \t\t{}'.format(
                self.testPath))
            self.logger.info('Overfitting TFrecord path: \t\t{}'.format(
                self.overfittingPath))
Пример #33
0
    # setup hyperparams
    train_df = params.train_df
    valid_df = params.valid_df
    pssm_dir = params.pssm_dir
    tert_dir = params.tert_dir
    max_len = int(params.max_len)
    batch_size = int(params.batch_size)
    input_shape = tuple(int(x) for x in params.input_shape.split())
    n_dist_bins = int(params.n_dist_bins)
    n_blocks = int(params.n_blocks)
    n_epochs = int(params.n_epochs)
    lr = float(params.lr)
    name = params.name

    init_logger(name)
    logging.info(pformat(params))

    # to gpu if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info(f'device: {device}')

    # setup data iterators and model
    train_dataset = ProteinNetDataset(train_df, pssm_dir, tert_dir, max_len)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)

    valid_dataset = ProteinNetDataset(valid_df, pssm_dir, tert_dir, max_len)
Пример #34
0
    optimizer = optim.ASGD(net.parameters(),
                           lr=config.training.lr,
                           t0=0,
                           lambd=0.,
                           weight_decay=config.training.weight_decay)
criterion = nn.CrossEntropyLoss()

save_root = config.data.save_root
save_root = os.path.join(save_root, 'model_1')
if not os.path.exists(save_root):
    os.mkdir(save_root)
# writer_path = os.path.join(save_root, 'writer')
logger_path = os.path.join(save_root, 'lm.log')
ckpt_path = os.path.join(save_root, 'lm.pth')
# writer = SummaryWriter(writer_path)
logger = init_logger(logger_path)

###############################################################################
# Training code
###############################################################################
if args.resume_training:
    ckpt = torch.load(ckpt_path)
    start_epoch = ckpt['epoch'] + 1
    best_dev_loss = ckpt['best_dev_loss']
    net.load_state_dict(ckpt['net_state_dict'])
    logger.info(
        f'resume training from epoch {start_epoch} with best_dev_ppl {best_dev_ppl:5.2f}'
    )
else:
    start_epoch = 0
    best_dev_loss = float('inf')
Пример #35
0
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.naive_bayes import BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import os
import pickle
from utils import init_logger
import logging

init_logger()

# Load data
logging.info("Loading data")
try:
    data = pickle.load(open("./data/sklearn-data.pickle", "rb"))
except FileNotFoundError:
    raise FileNotFoundError("Place data files in ./data/sklearn-data.pickle")

x_train, y_train = data["x_train"], data["y_train"]
x_test, y_test = data["x_test"], data["y_test"]

# Transformers reviews to feature-vectors. Removes stop words and only looks if word is present or not.
vectorizer = HashingVectorizer(stop_words='english',
                               binary=True,
                               n_features=2**9)

# Load vectorized reviews from file or vectorize them and save for later
x_train_path = './data/x_train_vec.pkl'
y_train_path = "./data/y_train_vec.pkl"
if os.path.isfile(x_train_path) and os.path.isfile(y_train_path):
    logging.info("Loading vectorized data")
Пример #36
0
    args.max_wn_concepts_count = 0
elif args.kb == 'wn':
    args.max_nell_concepts_count = 0
elif args.kb == 'none':
    args.max_wn_concepts_count = 0
    args.max_nell_concepts_count = 0

args.record_path = '{}_{}_{}_{}_{}_{}'.format(
    args.task,
    f'{args.decoder}{"+pos" if args.pos else ""}{"+uni" if args.uni_intent else ""}',
    f'seed{args.seed}' if args.do_train else 'eval-pred',
    f'seq{args.max_seq_len}', f'{args.kb}',
    time.strftime('%Y-%m-%d--%H-%M-%S', time.localtime(time.time())))

if __name__ == '__main__':
    init_logger(args)
    set_seed(args.seed)
    tokenizer = load_tokenizer(args)

    trainer = Trainer(args=args,
                      train_data=load_and_cache_dataset(args,
                                                        tokenizer,
                                                        mode='train'),
                      dev_data=load_and_cache_dataset(args,
                                                      tokenizer,
                                                      mode='dev'),
                      test_data=load_and_cache_dataset(args,
                                                       tokenizer,
                                                       mode='test'))

    if args.do_train:
Пример #37
0
def main():

    parser = BasicConfig()

    model_type = vars(parser.parse_known_args()[0])["model_type"].lower()
    model_class, configs = MODEL_CLASSES[model_type]
    args = configs(parser)
    args = checkoutput_and_setcuda(args)
    logger = init_logger(args)
    logger.info('Dataset collected from {}'.format(args.data_dir))
    # Set seed
    set_seed(args)
    processor = UbuntuCorpus(args)

    logger.info(args)

    model = model_class(args=args)

    # model.to(args.device)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    # Training
    if args.do_train:
        args.train_batch_size = args.per_gpu_train_batch_size * max(
            1, args.n_gpu)
        train_dataloader = processor.create_batch(data_type="train")

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        eval_dataloader = processor.create_batch(data_type="eval")

        args.logging_steps = len(
            train_dataloader) // args.gradient_accumulation_steps // 5
        args.valid_steps = len(
            train_dataloader) // args.gradient_accumulation_steps

        trainer_op = trainer(args=args,
                             model=model,
                             optimizer=optimizer,
                             train_iter=train_dataloader,
                             eval_iter=eval_dataloader,
                             logger=logger,
                             num_epochs=args.num_train_epochs,
                             save_dir=args.output_dir,
                             log_steps=args.logging_steps,
                             valid_steps=args.valid_steps,
                             valid_metric_name="+R10@1")
        trainer_op.train()
    print('training complete!')
    # Test
    if args.do_test:
        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        test_dataloader = processor.create_batch(data_type="eval")

        trainer_op = trainer(args=args,
                             model=model,
                             optimizer=optimizer,
                             train_iter=None,
                             eval_iter=None,
                             logger=logger,
                             num_epochs=args.num_train_epochs,
                             save_dir=args.output_dir,
                             log_steps=None,
                             valid_steps=None,
                             valid_metric_name="+R10@1")

        best_model_file = os.path.join(args.output_dir,
                                       args.fusion_type + "_best.model")
        best_train_file = os.path.join(args.output_dir,
                                       args.fusion_type + "_best.train")

        trainer_op.load(best_model_file, best_train_file)

        evaluate(args, trainer_op.model, test_dataloader, logger)
    print('test complete')
    # TODO: Infer case study
    if args.do_infer:
        #不知道写什么,懒得想了。
        pass
Пример #38
0
                        help="systolic array design directory")
    parser.add_argument('--task', type=str, default="mm", help="search task")

    args = parser.parse_args()

    search_obj = args.objective

    # Set up the working directory
    now = datetime.now()
    outdir = args.outdir
    os.makedirs(outdir, exist_ok=True)
    explore_config = ""
    exp_name = f"O_{args.objective}-C_{explore_config}-T_{now.date()}-{now.time()}"
    outdir = f"{outdir}/{exp_name}"
    os.makedirs(outdir, exist_ok=True)
    logger = utils.init_logger(outdir)

    # Load the constraints
    cst = Constraint(f'cst/{args.cst}.json')

    # Set up the searching algorithm stop criteria
    max_epochs = -1
    max_time = -1
    if args.stop_after_epochs > 0:
        max_epochs = args.stop_after_epochs
    elif args.stop_after_time > 0:
        max_time = args.stop_after_time
    else:
        max_time = 60

    # Set up the parallel executor
def main():
    args = parse_args()
    subdir = f'raw_nn_{args.neighbour}_at_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}'
    utils.init_logger('./logs/', subdir, print_log=False)
    logging.info(str(args))

    writer = SummaryWriter(log_dir=f'./tf_logs/{subdir}')


    nfeature_dim = 2
    print(nfeature_dim)
    if args.model_name == 'mp_nn':
        model = mp_sequential(
            mp_conv_v2(nfeature_dim,
                       64,
                       16,
                       extension=mp_conv_type.ORIG_WITH_NEIGHBOR),
            mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 128, 1),
            torch.nn.BatchNorm2d(128), torch.nn.ReLU(inplace=True),
            mp_conv_residual(128, 64, 16), torch.nn.Conv2d(128, 256, 1),
            torch.nn.BatchNorm2d(256), torch.nn.ReLU(inplace=True),
            mp_conv_residual(256, 64, 16), torch.nn.Conv2d(256, 128, 1),
            torch.nn.BatchNorm2d(128), torch.nn.ReLU(inplace=True),
            mp_conv_residual(128, 64, 16), torch.nn.Conv2d(128, 64, 1),
            torch.nn.BatchNorm2d(64), torch.nn.ReLU(inplace=True),
            mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 2, 1))
        emodel = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1),
                                     torch.nn.ReLU(inplace=True),
                                     torch.nn.Conv2d(64, 16, 1))

    elif args.model_name == 'mp_nn_comp':
        model = mp_sequential(
            mp_conv_v2(nfeature_dim,
                       64,
                       16,
                       extension=mp_conv_type.ORIG_WITH_NEIGHBOR),
            mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 128, 1),
            torch.nn.BatchNorm2d(128), torch.nn.ReLU(inplace=True),
            mp_conv_residual(128, 64, 16), torch.nn.Conv2d(128, 256, 1),
            torch.nn.BatchNorm2d(256), torch.nn.ReLU(inplace=True),
            mp_conv_residual(256, 64, 16), mp_conv_residual(256, 64, 16),
            mp_conv_residual(256, 64, 16), mp_conv_residual(256, 64, 16),
            mp_conv_residual(256, 64, 16), torch.nn.Conv2d(256, 128, 1),
            torch.nn.BatchNorm2d(128), torch.nn.ReLU(inplace=True),
            mp_conv_residual(128, 64, 16), torch.nn.Conv2d(128, 64, 1),
            torch.nn.BatchNorm2d(64), torch.nn.ReLU(inplace=True),
            mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 2, 1))
        emodel = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1),
                                     torch.nn.ReLU(inplace=True),
                                     torch.nn.Conv2d(64, 16, 1))

    elif args.model_name == 'simple_gnn':
        model = mp_sequential(
            mp_conv_v2(nfeature_dim,
                       64,
                       16,
                       extension=mp_conv_type.ORIG_WITH_NEIGHBOR),
            mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 2, 1))
        emodel = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1),
                                     torch.nn.ReLU(inplace=True),
                                     torch.nn.Conv2d(64, 16, 1))
    elif args.model_name == 'iid':
        model = mp_sequential(torch.nn.Conv2d(nfeature_dim, 64, 1),
                              torch.nn.ReLU(True), torch.nn.Conv2d(64, 2, 1))
        emodel = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1),
                                     torch.nn.ReLU(inplace=True),
                                     torch.nn.Conv2d(64, 16, 1))

    logging.info('model {} created'.format(str(model)))

    np.random.seed(23456)
    cap = args.hop_cap
    transition = list(np.random.randn(2 * 2))


    nn_idx, efeature = generate_knn_table(args.chain_length, args.neighbour)

    if args.use_cuda:
        nn_idx, efeature = nn_idx.cuda(), efeature.cuda()
        model.cuda()
        emodel.cuda()

    # train_data_set = lib.data.RandomPGM(args.chain_length, cap, transition)
    # dataloader = torch.utils.data.DataLoader(train_data_set,
    #                                          batch_size=args.batch_size,
    #                                          shuffle=True,
    #                                          num_workers=8,
    #                                          worker_init_fn=worker_init_fn)

    train_dataset = lib.data.RandomPGMData(args.train_path, pgm_type="raw", size=args.train_size)
    test_dataset = lib.data.RandomPGMData(args.test_path, pgm_type="raw", size=args.test_size)

    train_loader = torch.utils.data.DataLoader(train_dataset, 
                                            batch_size=args.batch_size, 
                                            shuffle=True,
                                            num_workers=8,
                                            worker_init_fn=worker_init_fn)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                            batch_size=args.batch_size,
                                            shuffle=False,
                                            num_workers=8,
                                            worker_init_fn=worker_init_fn)

    optimizer = torch.optim.Adam(list(model.parameters()) +
                                 list(emodel.parameters()),
                                 lr=3e-3)
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lr_lambda=lambda x: max(0.98**x, 1e-6))
    start_epoch = 0
    gcnt = 0
    if os.path.exists(args.model_path):
        ckpt = torch.load(args.model_path)
        model.load_state_dict(ckpt['model_state_dict'])
        emodel.load_state_dict(ckpt['emodel_state_dict'])
        optimizer.load_state_dict(ckpt['optimizer_state_dict'])
        scheduler.load_state_dict(ckpt['lr_sche'])

        start_epoch = ckpt['epoch']
        gcnt = ckpt['gcnt']

    def get_model_dict():
        return {
                'model_state_dict': model.state_dict(),
                'emodel_state_dict': emodel.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'lr_sche': scheduler.state_dict(),
                'epoch': epoch,
                'gcnt': gcnt
            }

    def get_filename(epoch):
        return f'raw_nn_{args.neighbour}_epoches_{epoch}.pt'

    epoch = 0
    for epoch in tqdm(range(start_epoch, args.train_epoches)):
        torch.save(get_model_dict(), get_filename(epoch))

        logging.info(f'save train result to {get_filename(epoch)}')
        scheduler.step()

        loss_seq = []
        acc_seq = []
        acc_lp_seq = []
        for bcnt, (nfeature, nlabel, lp_label) in tqdm(enumerate(train_loader)):
            optimizer.zero_grad()
            if args.use_cuda:
                nfeature, nlabel, lp_label = nfeature.cuda(), nlabel.cuda(
                ), lp_label.cuda()
            if len(nfeature.shape) == 3:
                nfeature = nfeature.unsqueeze(-1)

            etype = emodel(efeature)
            # print(etype.shape)
            # print(nn_idx.shape)
            pred = model(nfeature, nn_idx.repeat(nfeature.shape[0], 1, 1),
                         etype.repeat(nfeature.shape[0], 1, 1, 1))
            pred = pred.squeeze(-1).permute(0, 2, 1).contiguous()
            loss = torch.nn.functional.cross_entropy(pred.view(-1, 2),
                                                     nlabel.view(-1))
            loss.backward()
            torch.nn.utils.clip_grad_norm(
                list(model.parameters()) + list(emodel.parameters()), 1.0)

            optimizer.step()
            loss_seq.append(loss.item())
            gcnt += 1

            pred_int = pred.argmax(dim=-1)
            all_correct = torch.sum(pred_int == nlabel)
            lp_correct = torch.sum(lp_label == nlabel)
            acc = all_correct.item() / np.prod(nlabel.shape)
            lp_acc = lp_correct.item() / np.prod(nlabel.shape)

            acc_lp_seq.append(lp_acc)
            acc_seq.append(acc)

            if gcnt % 10 == 0:
                writer.add_scalar('syn_train/loss', loss.item(), gcnt)
                writer.add_scalar('syn_train/acc', acc, gcnt)
                writer.add_scalar('syn_train/lp_acc', lp_acc, gcnt)
                logging.info(
                    'epoch = {} bcnt = {} loss = {} acc = {} lp_acc={}'.format(
                        epoch, bcnt, np.mean(loss_seq), np.mean(acc_seq),
                        np.mean(acc_lp_seq)))
                loss_seq = []
                acc_seq = []
                acc_lp_seq = []

    if epoch == args.train_epoches - 1:
        epoch = args.train_epoches
        torch.save(get_model_dict(), get_filename(epoch))

        logging.info(f'save train result to {get_filename(epoch)}')
        logging.info('training done!')

    loss_seq = []
    acc_seq = []
    acc_lp_seq = []
    acc_global = []
    acc_lp_global = []
    gcnt = 0
    accum_acc = 0
    accum_acc_lp = 0
    model.eval()
    emodel.eval()
    for bcnt, (nfeature, nlabel,
                lp_label) in tqdm(enumerate(test_loader)):
        if args.use_cuda:
            nfeature, nlabel, lp_label \
                = nfeature.cuda(), nlabel.cuda(), lp_label.cuda()
        if len(nfeature.shape) == 3:
            nfeature = nfeature.unsqueeze(-1)

            etype = emodel(efeature)
            pred = model(nfeature, nn_idx.repeat(nfeature.shape[0], 1, 1),
                         etype.repeat(nfeature.shape[0], 1, 1, 1))
            pred = pred.squeeze(-1).permute(0, 2, 1).contiguous()
            loss = torch.nn.functional.cross_entropy(pred.view(-1, 2),
                                                     nlabel.view(-1))
            torch.nn.utils.clip_grad_norm(
                list(model.parameters()) + list(emodel.parameters()), 1.0)

            loss_seq.append(loss.item())
            gcnt += 1

            pred_int = pred.argmax(dim=-1)
            all_correct = torch.sum(pred_int == nlabel)
            lp_correct = torch.sum(lp_label == nlabel)
            acc = all_correct.item() / np.prod(nlabel.shape)
            lp_acc = lp_correct.item() / np.prod(nlabel.shape)
            acc_global.append(acc)
            acc_lp_global.append(lp_acc)

            acc_lp_seq.append(lp_acc)
            acc_seq.append(acc)
            accum_acc += acc
            accum_acc_lp += lp_acc

            if gcnt % 10 == 0:
                logging.info(
                    'epoch = {} bcnt = {} loss = {} acc = {} lp_acc={}'.format(
                        epoch, bcnt, np.mean(loss_seq), np.mean(acc_seq),
                        np.mean(acc_lp_seq)))
                writer.add_scalar('syn_test/loss', loss.item(), gcnt)
                writer.add_scalar('syn_test/acc', acc, gcnt)
                writer.add_scalar('syn_test/lp_acc', lp_acc, gcnt)

                loss_seq = []
                acc_seq = []
                acc_lp_seq = []

    logging.info(f'testing result: acc = {accum_acc / gcnt}, acc_lp = {accum_acc_lp / gcnt}')
    logging.info(f'stddev = {st.stdev(acc_global)}, stddev_lp = {st.stdev(acc_lp_global)}')
Пример #40
0
    '/0/account/bind_phone', req_bind_phone.Handler,
    '/0/account/bind_phone_confirm', req_bind_phone_confirm.Handler,
    '/0/account/billing_history', req_billing_history.Handler,
    '/0/sms/resend_sms_code', req_resend_sms_code.Handler,
    '/0/task/list', req_task_list.Handler,
    '/0/task/detail', req_task_detail.Handler,
    '/0/task/check-in', req_task_checkin.Handler,
    '/0/task/comment', req_task_comment.Handler,
    '/0/task/download_app', req_task_download_app.Handler,
    '/0/task/domob',  req_task_domob.Handler,
    '/0/task/offerwall',  req_task_offerwall.Handler,
    '/0/order/alipay', req_order_alipay.Handler,
    '/0/order/phone_pay', req_order_phone_pay.Handler,
    '/0/order/exchange_code', req_order_exchange_code.Handler,
    '/0/order/exchange_list', req_order_exchange_list.Handler,
    '/0/order/detail', req_order_detail.Handler
)

web.config.debug = True
#web.internalerror = web.debugerror

utils.init_logger("../log/interface.log")

app = web.application(urls, globals(), autoreload=False)

if __name__ == '__main__':
    app.run()
else:
    application = app.wsgifunc()

Пример #41
0
import os
import random

import db

from config_parser import Config
from utils import init_logger

CONFIGS = Config().get_config()
LOGGER = init_logger('vocab_trainer', {'file'}, CONFIGS['logging.verbosity'])


def add_new_words():
    print 'Enter word to add to your list:'
    word = raw_input()
    if word:
        print 'Enter translations to this word:'
        translations = []
        translation = raw_input()
        if not translation:
            print 'No translation provided!'
            return
        while translation:
            translations.append(translation)
            print 'Another translation?'
            translation = raw_input()
        word_id = db.insert_word(word, user_id=1, memorized=False,
                                 lang='english')
        for translation in translations:
            db.insert_translation(translation, word_id, 'ukrainian')
    else:
Пример #42
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# import daemon
import time
import logging

import config
import utils
import sender
import rule_runner

utils.init_logger(config.log_dir, config.log_level, config.log_console)
logging.info("zoro start")

utils.init_for_setup()

zorocfg = utils.load_user_config(config.user_config_path)
sender.init(zorocfg)
rule_runner.init(zorocfg)
rule_runner.runall(zorocfg)

time.sleep(10000)

# TODO
# with daemon.DaemonContext():
#     while True:
#         time.sleep(1)
Пример #43
0
models_file = join(configs.EMBEDINGS_ROOT, configs.W2V_FILENAME)

if len(argv) == 1:
    print "usage: ./{} <model_name> <enabled (0 or 1)>".format(argv[0])
    print "Chages options in {}".format(models_file)
    print "model_name: feature name or 'all'"
    print "enabled: 0 or 1"
    exit()

model_name = argv[1]
if (argv[2] == "1"):
    enabled = "true"
else:
    enabled = "false"

utils.init_logger()

logging.info("set w2v[{}] = {}".format(model_name, enabled))
with open(models_file, "r") as ff:
    features = json.load(ff)
    if (model_name == 'all'):
        for i in features['w2v_models']:
            enable_feature(features, i, enabled)
    else:
        enable_feature(features, model_name, enabled)

logging.info("save: {}".format(models_file))
with open(models_file, "w") as ff:
    json.dump(features, ff, indent=4, sort_keys=True)
Пример #44
0
def main(cli_args):
    # Read from config file and make args
    config_filename = "{}.json".format(cli_args.taxonomy)
    with open(os.path.join("config", config_filename)) as f:
        args = AttrDict(json.load(f))
    logger.info("Training/evaluation parameters {}".format(args))

    args.output_dir = os.path.join(args.ckpt_dir, args.output_dir)

    init_logger()
    set_seed(args)

    processor = GoEmotionsProcessor(args)
    label_list = processor.get_labels()

    config = BertConfig.from_pretrained(
        args.model_name_or_path,
        num_labels=len(label_list),
        finetuning_task=args.task,
        id2label={str(i): label
                  for i, label in enumerate(label_list)},
        label2id={label: i
                  for i, label in enumerate(label_list)})
    tokenizer = BertTokenizer.from_pretrained(args.tokenizer_name_or_path, )
    model = BertForMultiLabelClassification.from_pretrained(
        args.model_name_or_path, config=config)

    # GPU or CPU
    args.device = "cuda" if torch.cuda.is_available(
    ) and not args.no_cuda else "cpu"
    model.to(args.device)

    # Load dataset
    train_dataset = load_and_cache_examples(
        args, tokenizer, mode="train") if args.train_file else None
    dev_dataset = load_and_cache_examples(
        args, tokenizer, mode="dev") if args.dev_file else None
    test_dataset = load_and_cache_examples(
        args, tokenizer, mode="test") if args.test_file else None

    if dev_dataset is None:
        args.evaluate_test_during_training = True  # If there is no dev dataset, only use test dataset

    if args.do_train:
        global_step, tr_loss = train(args, model, tokenizer, train_dataset,
                                     dev_dataset, test_dataset)
        logger.info(" global_step = {}, average loss = {}".format(
            global_step, tr_loss))

    results = {}
    if args.do_eval:
        checkpoints = list(
            os.path.dirname(c) for c in sorted(
                glob.glob(args.output_dir + "/**/" + "pytorch_model.bin",
                          recursive=True)))
        if not args.eval_all_checkpoints:
            checkpoints = checkpoints[-1:]
        else:
            logging.getLogger("transformers.configuration_utils").setLevel(
                logging.WARN)  # Reduce logging
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split("-")[-1]
            model = BertForMultiLabelClassification.from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args,
                              model,
                              test_dataset,
                              mode="test",
                              global_step=global_step)
            result = dict(
                (k + "_{}".format(global_step), v) for k, v in result.items())
            results.update(result)

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as f_w:
            for key in sorted(results.keys()):
                f_w.write("{} = {}\n".format(key, str(results[key])))
Пример #45
0
import os
import shutil

import click
import pandas as pd
from deepsense import neptune
from sklearn.metrics import roc_auc_score

import pipeline_config as cfg
from pipelines import PIPELINES
from utils import init_logger, read_params, create_submission, set_seed, save_evaluation_predictions, \
    read_csv_time_chunks, cut_data_in_time_chunks, data_hash_channel_send, get_submission_hours_index

set_seed(1234)
logger = init_logger()
ctx = neptune.Context()
params = read_params(ctx)


@click.group()
def action():
    pass


@action.command()
def prepare_data():
    logger.info('chunking train')
    train = pd.read_csv(params.raw_train_filepath)
    cut_data_in_time_chunks(train,
                            timestamp_column='click_time',
                            chunks_dir=params.train_chunks_dir,
Пример #46
0
import time

from collections import deque
from contextlib import contextmanager
from threading import RLock

from config_parser import Config
from errors import MySQLPoolSizeError
from utils import init_logger, Singleton

CONFIGS = Config().get_config()
DEFAULT_TRIES = 3
DEFAULT_DELAY = 1


LOGGER = init_logger("DB", {"stream", "syslog"}, CONFIGS["logging.verbosity"])


class DBPool(object):

    """DBPool class represents DB pool, which
    handles and manages work with database
    connections.
    """

    __metaclass__ = Singleton

    def __init__(self, configs, pool_size):
        if not isinstance(pool_size, int) or pool_size <= 0:
            raise ValueError("Bad value of POOL_SIZE!")
        if not isinstance(configs["ttl"], int) or configs["ttl"] <= 0: