def __init__(self, page_url, html_string=None): init_logger(self) self.opener = urllib2.build_opener() self.opener.addheaders = [('user-agent', USER_AGENT)] if html_string: self.html = html_string else: assert page_url self.logger.debug(u'Fetching {0}'.format(page_url)) self.html = self.opener.open(page_url).read() self.soup = BeautifulSoup.BeautifulSoup(self.html)
def __init__(self, config_path=os.path.join(os.environ['CONFROOT'], 'main.conf'), refresh_time=900): self.config = {} self.update_time = 0 self.refresh_time = refresh_time self.path = config_path self.log = utils.init_logger('ConfigParser', {'syslog', 'stream'}, verbosity='1') self.log.info('Created instance of Config parser.')
def __init__(self, auth, sub, dry_run=False): init_logger(self) self.dry_run = dry_run self.sub = sub self.sleeper = Sleeper(self.logger) # prepare cookies = cookielib.CookieJar() self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies)) self.opener.addheaders = [('user-agent', USER_AGENT)] if dry_run: return # login result = self.api_call('/api/login', user=auth.reddit_login, passwd=auth.reddit_password) if not any([True for c in cookies if c.name == 'reddit_session']): self.logger.error('Login failure, result: {0}'.format(result)) raise RuntimeError('Login failure') self.logger.debug('Successfully logged in') me = self.api_call('/api/me.json') self.opener.addheaders.append(('x-modhash', me['data']['modhash']))
else: pass # deal with bad lines of text here return data def init_arg(): parser = argparse.ArgumentParser() parser.add_argument("-o", default='.') parser.add_argument("--itout", default=5, type=int) return parser.parse_args() ##### Main settings args = init_arg() odir = 'Synthetic' logger = utils.init_logger(odir, 'log_cs_marginal_deeppseudo_sum.txt') OUT_ITERATION = args.itout data_mode = 'Synthetic' num_Event = 2 #causes of the event evalTime = [12, 60] # evalution times (for C-index and Brier-Score) in_path = odir + '/results/' if not os.path.exists(in_path): os.makedirs(in_path) WEIGHTED_C_INDEX = np.zeros([num_Event, len(evalTime), OUT_ITERATION]) WEIGHTED_BRIER_SCORE = np.zeros([num_Event, len(evalTime), OUT_ITERATION]) for out_itr in range(OUT_ITERATION): ## Define a list of continuous columns from the covariates
import smtplib from email.mime.text import MIMEText from email.header import Header import time from sqlalchemy import create_engine, Column, String, Integer from sqlalchemy.orm import sessionmaker from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base import requests from lxml import etree from retrying import retry import utils logger = utils.init_logger('data-montior_mail') import traceback CONSTR = 'mysql+pymysql://root:[email protected]:3306/xxxxx?charset=utf8' engine = create_engine(CONSTR, echo=False) DBSession = sessionmaker(bind=engine) session = DBSession() Base = declarative_base() class Mybase(Base): __tablename__ = 'decryption-tools' id = Column(Integer, name='Id', primary_key=True, autoincrement=True) name = Column(String(255), nullable=False) def __repr__(self): return "{}".format(self.name)
parser.add_argument("--repeat", type=int, default=5) args = parser.parse_args() if __name__ == "__main__": torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) args.save = os.path.join("experiments", args.save) if os.path.exists(args.save) and args.load == "none": raise NameError("previous experiment '{}' already exists!".format( args.save)) os.makedirs(args.save) logger = init_logger(logpath=args.save, experiment_name="logs-" + args.model) logger.info(args) args.device = torch.device( "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu") train_loader, test_loader, train_eval_loader = get_cifar10_loaders( data_aug=True, batch_size=args.tbsize) model = cifar_model(args.model, layers=args.block, norm_type=args.norm) logger.info(model) model.to(args.device) loader = { "train_loader": train_loader, "train_eval_loader": train_eval_loader, "test_loader": test_loader
# -*- coding: UTF-8 -*- from app import app, db import re, time, json from app.models import * from flask import render_template, redirect, session, url_for, request, g, jsonify, flash, make_response import traceback import sys, os import utils logger = utils.init_logger('hfs_web') query_data_url = [] query_data_md5 = [] sys.path.append("..") from hfs_down import get_hfs_down_file_url from hfs_down import get_url_id_by_hfs_host_url from hfs_down import get_host_id_by_host_port _dir = os.path.dirname(os.getcwd()) white_file = os.path.join(_dir, 'white_list.txt') white_list = [] if os.path.exists(white_file): try: with open(white_file, 'r') as fp: for i in fp: white_list.append(i.strip()) except: logger.error(traceback.format_exc()) @app.route('/', methods=['GET', 'POST']) def hfs_host_main(): global query_data_url query_data_url = []
app.add_processor(web.loadhook(header_html)) def notfound(): web.ctx.status = '404 Not Found' return web.notfound(str(render._404())) app.notfound = notfound def internalerror(): web.ctx.status = '500 Internal Server Error' logging.exception("this is an internalerror") return web.internalerror(str(render._500())) app.internalerror = internalerror # 让子应用也可以使用session, global_render, db def global_hook(): web.ctx.session = session web.ctx.global_render = render web.ctx.global_db = db app.add_processor(web.loadhook(global_hook)) app.add_processor(web.loadhook(utils.filter_input_loadhook)) wsgiapp = app.wsgifunc() utils.init_logger(config.log_path, config.log_level, console=True) qqlogin.init(config.qq_app_id, config.qq_app_key, config.qq_callback, on_qq_logined) if __name__ == '__main__': app.run()
import tensorflow as tf config = tf.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU sess = tf.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras from model import getAState from model import getPolicy from model import getConv2DClassifier from queryStrategy import * start_time = time.time() args = utils.get_args() logger = utils.init_logger() QUERY = args.query_strategy policyname = args.policy_path DATASET_NAME = QUERY + "_transfer_" + args.dataset_name EPISODES = args.episodes k_num = args.k BUDGET = args.annotation_budget EMBEDDING_SIZE = 32 NUM_CLASSES = 10 policyname = args.policy_path
pass # deal with bad lines of text here return data def init_arg(): parser = argparse.ArgumentParser() parser.add_argument("-o", default='.') parser.add_argument("--itout", default=5, type=int) return parser.parse_args() ##### Main settings args = init_arg() odir = 'Synthetic' logger = utils.init_logger(odir, 'log_cs_conditional_deeppseudo_sum.txt') OUT_ITERATION = args.itout data_mode = 'Synthetic' num_Event = 2 #causes of the event evalTime = [12, 60] # evalution times (for C-index and Brier-Score) in_path = odir + '/results/' if not os.path.exists(in_path): os.makedirs(in_path) WEIGHTED_C_INDEX = np.zeros([num_Event, len(evalTime), OUT_ITERATION]) WEIGHTED_BRIER_SCORE = np.zeros([num_Event, len(evalTime), OUT_ITERATION])
import req_query_userid import req_query_inviter import req_user_info import req_update_user_info import req_update_inviter urls = ( '/register', req_register.Handler, '/bind_phone', req_bind_phone.Handler, '/userid', req_query_userid.Handler, '/inviter', req_query_inviter.Handler, '/user_info', req_user_info.Handler, '/update_user_info', req_update_user_info.Handler, '/update_inviter', req_update_inviter.Handler, ) web.config.debug = True #web.internalerror = web.debugerror utils.init_logger('../log/account.log') app = web.application(urls, globals(), autoreload=False) if __name__ == '__main__': app.run() else: application = app.wsgifunc()
from Queue import Empty import RPi.GPIO as GPIO from db import DB from maps_repo import MapsRepo from prompts_enum import PromptDirn from audio_driver import AudioDriver from step_counter import StepCounter from heading_calculator import HeadingCalculator from camera import camera import utils from utils import CommonLogger, init_logger LOG_FILENAME = "/home/pi/logs/navi.log" logger = init_logger(logging.getLogger(__name__), LOG_FILENAME) sys.stdout = CommonLogger(logger, logging.INFO) sys.stderr = CommonLogger(logger, logging.ERROR) STEP_LENGTH = 40.0 ANGLE_THRESHOLD = 10 FOOT_SENSOR_ID = 0 BACK_SENSOR_ID = 1 GPIO_OVERRIDE_PIN = 17 QUEUE = Queue() class Navigator(object): def __init__(self, logger): self.log = logger self.log.info("Starting navigator...")
# -*- coding: utf-8 -*- """ myapp ~~~ """ from flask import Flask from utils import init_logger app = Flask(__name__) init_logger(app) @app.route('/') def index(): return 'Hello, World!' @app.route('/error') def err(): raise Exception('Something bad happened!')
connections from Pool._connection_pool. After connection is closed it returns to Pool._connection_pool. """ import psycopg2 as psc import time from collections import deque from contextlib import contextmanager from threading import RLock from config_parser import Config from utils import init_logger, Singleton CONFIGS = Config().get_config() LOGGER = init_logger('DB', {'stream', 'syslog'}, CONFIGS['logging.verbosity']) class DBPool(object): """DBPool class represents DB pool, which handles and manages work with database connections. """ __metaclass__ = Singleton def __init__(self, configs, pool_size): if not isinstance(pool_size, int) or pool_size <= 0: raise ValueError('Bad value of POOL_SIZE!') if not isinstance(configs['ttl'], int) or configs['ttl'] <= 0:
from flask import Flask from config_parser import Config from utils import init_logger CONFIGS = Config().get_config() app = Flask('vocab_trainer') app.config['TEMPLATE_DIR'] = CONFIGS['dirs.template_dir'] app.config['SECRET_KEY'] = CONFIGS['server.secret_key'] logger = init_logger('vocab-trainer', ['file'], verbosity=CONFIGS['logging.verbosity'], log_file='/tmp/vocab_trainer.log')
res = [] i = 0 for xx, yy in zip(x,y): if yy == 'b' and i>0: res.append(',') res.append(xx) i += 1 return ''.join(res) if __name__ == '__main__': # Combine command-line arguments and yaml file arguments opt = opts.model_opts() config = yaml.load(open(opt.config, "r")) config = Namespace(**config, **vars(opt)) logger = init_logger("torch", logging_path='') logger.info(config.__dict__) device, devices_id = misc_utils.set_cuda(config) config.device = device TEXT = data.Field(sequential=True, use_vocab=False, batch_first=True, unk_token=utils.UNK, include_lengths=True, pad_token=utils.PAD, preprocessing=to_int, ) # init_token=utils.BOS, eos_token=utils.EOS) LABEL = data.Field(sequential=True, use_vocab=False, batch_first=True, unk_token=utils.UNK, include_lengths=True, pad_token=utils.PAD, preprocessing=to_int, ) # init_token=utils.BOS, eos_token=utils.EOS) fields = [("text", TEXT), ("label", LABEL)] validDataset = datasets.SequenceTaggingDataset(path=os.path.join(config.data, 'valid.txt'), fields=fields)
import logging import environment import utils import numpy as np import random import matplotlib.pyplot as plt # The logger utils.init_logger(logging.DEBUG, fileName="log/app.log") logger = logging.getLogger('Easy21') # set the random seed random.seed(a=None, version=2) # constants alpha = 0.01 # step size epsilon = 0.05 # exploration lam_range = np.arange(0, 1.1, 0.1) n_iter = 10000 # number of episodes # define the indices of the different values q_hit_index = 0 # q value for action hit q_stick_index = 1 # q value for action stick e_hit_index = 2 # eligibility trace for the hit action e_stick_index = 3 # eligibility trace for the stick action # initialize the value function approximation, the value is 1 if the state lies within the defined intervals # x-index: dealer card approximation [1; 4] [4; 7] [7; 10] # y-index: player sum approximation [1; 6] [4; 9] [7; 12] [10; 15] [13; 18] [16; 21] # z all properties for this state
r'<p class="wr_bookList_item_author"><a href=(.*?)</a>') # 书名 findTitle = re.compile(r'<p class="wr_bookList_item_title">(.*)</p>') #书籍封面 findCover = re.compile( r'<img alt="书籍封面" class="wr_bookCover_img" src="(.*?)"/>') #简介 findIntro = re.compile(r'<p class="wr_bookList_item_desc">(.*)</p>', re.S) # 评分 findScore = re.compile( r'<span class="wr_bookList_item_starString">([\d+\.]+)</span>') #今日阅读人数 findReaderNumber = re.compile( r'<em class="wr_bookList_item_reading_number">([\d+\.]+)</em>') logger = init_logger(log_file='.\\微信读书.log') def get_info(url): """ 爬取数据 :param url: 所要爬取的网页的url :return: 返回爬取到数据的列表 """ datalist = [] html = requestURL(url) # 保存网页源码 logger.info('获取网页源码...') soup = BeautifulSoup(html, "html.parser") booklist = soup.find('ul', class_="ranking_content_bookList").select('li') logger.info('获得书籍列表...') for item in booklist:
import requests from socks import SOCKS5 from telethon import TelegramClient from telethon.events import NewMessage from telethon.tl.custom import Message from nltk.corpus import stopwords from pymystem3 import Mystem from string import punctuation from telethon.tl.types import User, Channel from requests.exceptions import ConnectionError from config import APP_API_HASH, APP_API_ID, PHONE_NUMBER, SETTINGS_FILE, \ PROXY_HOST, PROXY_PORT, PROXY_USERNAME, PROXY_PASS from utils import init_logger logger = init_logger() try: response = requests.get('https://api.telegram.org') proxy = None except ConnectionError as e: proxy = (SOCKS5, PROXY_HOST, PROXY_PORT, True, PROXY_USERNAME, PROXY_PASS) client = TelegramClient(PHONE_NUMBER.strip('+'), APP_API_ID, APP_API_HASH, proxy=proxy, base_logger=logger) def get_settings() -> Dict:
from datetime import datetime, timedelta #from datetime import timedelta from database.models import AWSInstance, AWSPrices, AWSInstancePrice, AWSInstanceWorkLoad, AWSSummary from database.utils import check_if_exist, simple_query_count, simple_sum, simple_query from utils import init_logger import config logger = init_logger(__name__, testing_mode=config.DEBUG_MODE) class WriteData(object): base_datetime = datetime.now().date() def __init__(self, conn, workload_tag): self.conn = conn self.workload_tag = workload_tag # --- INSTANCE ---# def save_instance(self, dict_instance_details, workload_profile): saved = False geoJson = None try: instance_type = dict_instance_details['deep_details'][ 'Instance_type'], instance_id = dict_instance_details['instance_id'] aws_region = dict_instance_details['aws_region'] lat = None log = None
writer.close() if args.adv_save : if not os.path.exists(os.path.join(args.load, args.attack+"_"+str(args.eps))) : os.makedirs(os.path.join(args.load, args.attack+"_"+str(args.eps))) with open(os.path.join(args.load, args.attack+"_"+str(args.eps), "adversary.pkl"), "wb") as f : pickle.dump(adv_saver,f) logger.info("Attacked Accuracy : {:.4f}".format(adv_acc)) logger.info("Attacked Loss : {:.4f}".format(adv_loss)) logger.info("Finished") logger.info("="*80) if __name__ == "__main__" : args.device = torch.device("cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu") logger = init_logger(logpath=args.load, experiment_name="attack-"+str(args.attack)+"-"+str(args.eps)) in_channels = 1 if args.eval=="mnist" else 3 if args.eval == "mnist" or args.eval == "norm" : from model.mnist import mnist_model model = mnist_model(args.model, layers=args.block, norm_type=args.norm) elif args.eval == "cifar10" : from model.cifar10 import cifar_model model = cifar_model(args.model, layers=args.block, norm_type=args.norm) logger.info(args) logger.info(model) model.to(args.device) if args.crit == "acc" : model_dict = torch.load(os.path.join(args.load,"model_acc.pt"), map_location=str(args.device)) elif args.crit == "last" :
# classifier arguments parser.add_argument('--lr', type=float, default=0.) parser.add_argument('--wd', type=float, default=0.) parser.add_argument('--batch_size', type=int, default=0) parser.add_argument('--n_epoch', type=int, default=0) args = parser.parse_args() np.set_printoptions(linewidth=150, precision=4, suppress=True) th.set_printoptions(linewidth=150, precision=4) FN = th.from_numpy join = os.path.join logger = logging.getLogger() utils.prepare_directory(args.exp_root, force_delete=True) utils.init_logger(join(args.exp_root, 'program.log')) utils.write_args(args) dset = data.XianDataset(args.data_dir, args.mode, feature_norm=args.feature_norm) _X_s_tr = FN(dset.X_s_tr).to(args.device) _Y_s_tr = FN(dset.Y_s_tr).to(args.device) _X_s_te = FN(dset.X_s_te).to(args.device) _Y_s_te = FN(dset.Y_s_te).to(args.device) _X_u_te = FN(dset.X_u_te).to(args.device) _Y_u_te = FN(dset.Y_u_te).to(args.device) _Cu = FN(dset.Cu).to(args.device) _Sall = FN(dset.Sall).to(args.device) train_iter = data.Iterator([_X_s_tr, _Y_s_tr],
import os from uuid import uuid4 from utils import get_config_params, _run, clean_environ, init_logger import logging logger = logging.getLogger(__name__) init_logger(logger) VYOS_SHELL_API = get_config_params('bin', 'shell_api_path') VYOS_SBIN_DIR = get_config_params('bin', 'vyos_sbin_dir') VYOS_SAVE_SCRIPT = 'vyatta-save-config.pl' # Create/Get the logger object #logger = init_logger() class SessionAlreadyExists(Exception): pass class SetupSessionFailed(Exception): pass class OperationFailed(Exception): pass class SessionNotExists(Exception): pass class Session(object): """ Return the session instance if exists. Else, create new one. SessionAlreadyExists exception raised on the second instantiation. """ _ref = None def __new__(cls, *args, **kw): if cls._ref is not None: raise SessionAlreadyExists('A session exist already !') cls._ref = super(Session, cls).__new__(cls, *args, **kw) return cls._ref
from os import path import argparse import chainer import copy import numpy as np from dataset import COL_BASIC_FEATURES from dataset import FactIterator, Vocabulary, read_dataset, get_idx2vec, get_values, replace_by_dic from models import LinearEnsembler, MLPEnsembler from utils import init_logger from utils import set_random_seed from utils import standardize_vectors from utils import find_greatest_divisor verbose = False logger = init_logger('Ensember') # Directories dir_scripts = path.dirname(path.dirname(path.dirname(path.abspath(__file__)))) dir_root = path.dirname(dir_scripts) dir_data = path.join(dir_root, 'data') class Classifier(Chain): """Calculate loss.""" def __init__(self, predictor, label2fact, en2ja, idx2vec, margin=1.0,
def main(): # print("Starting DFC2021 baseline training script at %s" % (str(datetime.datetime.now()))) #------------------- # Setup #------------------- assert os.path.exists(args.train_fn) assert os.path.exists(args.valid_fn) now_time = datetime.datetime.now() time_str = datetime.datetime.strftime(now_time, '%m-%d_%H-%M-%S') # output path # output_dir = Path(args.output_dir).parent / time_str / Path(args.output_dir).stem output_dir = Path(args.output_dir) output_dir.mkdir(exist_ok=True, parents=True) logger = utils.init_logger(output_dir / 'info.log') # if os.path.isfile(args.output_dir): # print("A file was passed as `--output_dir`, please pass a directory!") # return # # if os.path.exists(args.output_dir) and len(os.listdir(args.output_dir)): # if args.overwrite: # print("WARNING! The output directory, %s, already exists, we might overwrite data in it!" % (args.output_dir)) # else: # print("The output directory, %s, already exists and isn't empty. We don't want to overwrite and existing results, exiting..." % (args.output_dir)) # return # else: # print("The output directory doesn't exist or is empty.") # os.makedirs(args.output_dir, exist_ok=True) if args.gpu is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu n_gpu = torch.cuda.device_count() device = torch.device('cuda:0' if n_gpu > 0 else 'cpu') device_ids = list(range(n_gpu)) np.random.seed(args.seed) torch.manual_seed(args.seed) #------------------- # Load input data #------------------- train_dataframe = pd.read_csv(args.train_fn) train_image_fns = train_dataframe["image_fn"].values train_label_fns = train_dataframe["label_fn"].values train_groups = train_dataframe["group"].values train_dataset = StreamingGeospatialDataset( imagery_fns=train_image_fns, label_fns=train_label_fns, groups=train_groups, chip_size=CHIP_SIZE, num_chips_per_tile=NUM_CHIPS_PER_TILE, transform=transform, nodata_check=nodata_check ) valid_dataframe = pd.read_csv(args.valid_fn) valid_image_fns = valid_dataframe["image_fn"].values valid_label_fns = valid_dataframe["label_fn"].values valid_groups = valid_dataframe["group"].values valid_dataset = StreamingValidationDataset( imagery_fns=valid_image_fns, label_fns=valid_label_fns, groups=valid_groups, chip_size=CHIP_SIZE, stride=CHIP_SIZE, transform=transform, nodata_check=nodata_check ) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, num_workers=NUM_WORKERS, pin_memory=True, ) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=args.batch_size, num_workers=NUM_WORKERS, pin_memory=True, ) num_training_images_per_epoch = int(len(train_image_fns) * NUM_CHIPS_PER_TILE) # print("We will be training with %d batches per epoch" % (num_training_batches_per_epoch)) #------------------- # Setup training #------------------- # if args.model == "unet": # model = models.get_unet() # elif args.model == "fcn": # model = models.get_fcn() # else: # raise ValueError("Invalid model") model = models.isCNN(args.backbone) weights_init(model, seed=args.seed) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.AdamW(trainable_params, lr=INIT_LR, amsgrad=True, weight_decay=5e-4) lr_criterion = nn.CrossEntropyLoss(ignore_index=0) # todo hr_criterion = hr_loss # criterion = balanced_ce_loss scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=3, min_lr=0.0000001) # factor=0.5, patience=3, min_lr=0.0000001 logger.info("Trainable parameters: {}".format(utils.count_parameters(model))) #------------------- # Model training #------------------- train_loss_total_epochs, valid_loss_total_epochs, epoch_lr = [], [], [] best_loss = 1e50 num_times_lr_dropped = 0 # model_checkpoints = [] # temp_model_fn = os.path.join(output_dir, "most_recent_model.pt") for epoch in range(args.num_epochs): lr = utils.get_lr(optimizer) train_loss_epoch, valid_loss_epoch = utils.fit( model, device, train_dataloader, valid_dataloader, num_training_images_per_epoch, optimizer, lr_criterion, hr_criterion, epoch, logger) scheduler.step(valid_loss_epoch) if epoch % config.SAVE_PERIOD == 0 and epoch != 0: temp_model_fn = output_dir / 'checkpoint-epoch{}.pth'.format(epoch+1) torch.save(model.state_dict(), temp_model_fn) if valid_loss_epoch < best_loss: logger.info("Saving model_best.pth...") temp_model_fn = output_dir / 'model_best.pth' torch.save(model.state_dict(), temp_model_fn) best_loss = valid_loss_epoch if utils.get_lr(optimizer) < lr: num_times_lr_dropped += 1 print("") print("Learning rate dropped") print("") train_loss_total_epochs.append(train_loss_epoch) valid_loss_total_epochs.append(valid_loss_epoch) epoch_lr.append(lr)
def main(): """Main workflow""" args = utils.build_test_args(argparse.ArgumentParser()) suff = ".test" if args.report_iw_nll: if ( args.num_iw_samples > args.iw_batch_size and args.num_iw_samples % args.iw_batch_size != 0 ): raise RuntimeError("Expected num_iw_samples divisible by iw_batch_size") suff = ".test.iw" + str(args.num_iw_samples) utils.init_logger(args.model_file + suff) logger.info("Config:\n%s", pformat(vars(args))) assert torch.cuda.is_available() torch.cuda.set_device(args.gpuid) utils.init_random(args.seed) logger.info("Load parameters from '%s'", args.model_file) params = torch.load(args.model_file, map_location=lambda storage, loc: storage) utils.set_params(params["args"]) fields = utils.load_fields_from_vocab(params["vocab"]) logger.info("Fields: %s", fields.keys()) model = utils.build_test_model(fields, params) logger.info("Model:\n%s", model) logger.info("Load %s", args.test_file) test_data = LMDataset(fields, args.test_file, args.sent_length_trunc) logger.info("Test sentences: %d", len(test_data)) test_iter = utils.OrderedIterator( dataset=test_data, batch_size=args.batch_size, device=params["args"].device, train=False, shuffle=False, repeat=False, sort=False, sort_within_batch=True, ) if model.encoder is None: args.report_iw_nll = False logger.info("Force report_iw_nll to False") start_time = time.time() logger.info("Start testing") if args.report_iw_nll: if args.num_iw_samples <= args.iw_batch_size: n_iw_iter = 1 else: n_iw_iter = args.num_iw_samples // args.iw_batch_size args.num_iw_samples = args.iw_batch_size test_stats = report_iw_nll(model, test_iter, n_iw_iter, args.num_iw_samples) logger.info( "Results: test nll %.2f | test ppl %.2f", test_stats.nll(), test_stats.ppl() ) else: test_stats = validate(model, test_iter) logger.info( "Results: test nll %.2f | test kl %.2f | test ppl %.2f", test_stats.nll(), test_stats.kl(), test_stats.ppl(), ) logger.info("End of testing: time %.1f min", (time.time() - start_time) / 60)
"""Calculate Hits@k for all method on all split. Example: nice -n 19 python scripts/twa/eval/hits_wrapper.py /baobab/otani/cckbc/cn/20170814v1/ens/kbc-170816v2/split -v --by-relation """ import argparse import numpy as np import os import hits from utils import init_logger verbose = False logger = init_logger('EvalHits') def main(args): global verbose verbose = args.verbose assert args.label in ['devel', 'test'] methods = ['pmi', 'kbc', 'trans', 'kbc-trans'] if args.method: methods = [args.method] results = {m: [] for m in methods} for d in os.listdir(args.dir_split): d = os.path.join(args.dir_split, d) if not os.path.isdir(d):
from configuration import ConfigManager from execution import ExecEnvironment from instance.manager import InstanceManager from snapshot.manager import SnapshotManager from instance.upgrade import UpgradeManager from version.manager import VersionManager from version.parser import VersionParser from instance.parser import InstanceParser from snapshot.parser import SnapshotParser from utils import init_logger from utils import parse_args args = parse_args() init_logger(args.verbose) logger = logging.getLogger('Main') cm = ConfigManager() vm = VersionManager(cm) im = InstanceManager(cm, vm) sm = SnapshotManager(cm, vm, im) um = UpgradeManager(cm, vm, im) ex = ExecEnvironment() vp = VersionParser(cm, vm, im, sm, um, ex) ip = InstanceParser(cm, vm, im, sm, um, ex) sp = SnapshotParser(cm, vm, im, sm, um, ex) logger.debug('Arguments : {}'.format(args)) if (args.action in ['list', 'l']):
def main(): args = parse_args() subdir = f'train_syn_hop_factor_{args.model_name}_nn_{args.neighbour}_at_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}' utils.init_logger('./logs/', subdir, print_log=False) logging.info(str(args)) logdir = f'./tf_logs/{subdir}' writer = SummaryWriter(log_dir=logdir) nfeature_dim = 2 if args.model_name == 'mp_nn_factor': model = factor_mpnn(nfeature_dim, [nfeature_dim**2, args.hop_order], [64, 64, 128, 128, 256, 256, 128, 128, 64, 64, 2], [16, 16]) emodel_pw = torch.nn.Sequential(torch.nn.Conv2d(3, 64, 1), torch.nn.ReLU(inplace=True), torch.nn.Conv2d(64, 16, 1)) emodel_high = torch.nn.Sequential(torch.nn.Conv2d(2, 64, 1), torch.nn.ReLU(inplace=True), torch.nn.Conv2d(64, 16, 1)) def get_model_description(): return str(model) + str(emodel_pw) + str(emodel_high) logging.info('model {} created'.format(get_model_description())) cap = args.hop_cap nn_idx_pw, efeature_pw = generate_pw_factor_table(args.chain_length) nn_idx_high, efeature_high = generate_high_factor_table( args.chain_length, args.hop_order) if args.use_cuda: nn_idx_pw = nn_idx_pw.cuda() efeature_pw = efeature_pw.cuda() nn_idx_high = nn_idx_high.cuda() efeature_high = efeature_high.cuda() model.cuda() emodel_pw.cuda() emodel_high.cuda() parameters = list(model.parameters()) + \ list(emodel_pw.parameters()) + \ list(emodel_high.parameters()) # train_data_set = lib.data.RandomPGMHop(args.chain_length, # ret_efeature_pw=False) # dataloader = torch.utils.data.DataLoader(train_data_set, # batch_size=args.batch_size, # shuffle=True, # num_workers=8, # worker_init_fn=worker_init_fn) train_dataset = lib.data.RandomPGMData(args.train_path, pgm_type="hops", size=args.train_size) test_dataset = lib.data.RandomPGMData(args.test_path, pgm_type="hops", size=args.test_size) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, worker_init_fn=worker_init_fn) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, worker_init_fn=worker_init_fn) optimizer = torch.optim.Adam(parameters, lr=3e-3) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda x: max(0.98**x, 1e-6)) start_epoch = 0 gcnt = 0 if os.path.exists(args.model_path): ckpt = torch.load(args.model_path) model.load_state_dict(ckpt['model_state_dict']) emodel_pw.load_state_dict(ckpt['emodel_pw_state_dict']) emodel_high.load_state_dict(ckpt['emodel_high_state_dict']) optimizer.load_state_dict(ckpt['optimizer_state_dict']) scheduler.load_state_dict(ckpt['lr_sche']) start_epoch = ckpt['epoch'] gcnt = ckpt['gcnt'] def get_model_dict(): return { 'model_state_dict': model.state_dict(), 'emodel_pw_state_dict': emodel_pw.state_dict(), 'emodel_high_state_dict': emodel_high.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'lr_sche': scheduler.state_dict(), 'epoch': epoch, 'gcnt': gcnt } epoch = 0 for epoch in tqdm(range(start_epoch, args.train_epoches)): torch.save( get_model_dict(), '{}_nn_factor_{}_epoches_{}.pt'.format(args.model_name, args.neighbour, epoch)) logging.info('save train result to {}'.format( '{}_nn_factor_{}_epoches_{}.pt'.format(args.model_name, args.neighbour, epoch))) scheduler.step() loss_seq = [] acc_seq = [] acc_lp_seq = [] for bcnt, (nfeature, pws, hops, nlabel, lp_label) in tqdm(enumerate(train_loader)): optimizer.zero_grad() if args.use_cuda: nfeature, pws, hops, nlabel, lp_label \ = nfeature.cuda(), pws.cuda(), hops.cuda(), nlabel.cuda(), lp_label.cuda() if len(nfeature.shape) == 3: nfeature = nfeature.unsqueeze(-1) etype_pw = emodel_pw(efeature_pw) etype_high = emodel_high(efeature_high) bsize = nfeature.shape[0] pred, _ = model(nfeature, [pws, hops], [[ nn_idx_pw.repeat(bsize, 1, 1), etype_pw.repeat(bsize, 1, 1, 1) ], [ nn_idx_high.repeat(bsize, 1, 1), etype_high.repeat(bsize, 1, 1, 1) ]]) pred = pred.squeeze(-1).permute(0, 2, 1).contiguous() loss = torch.nn.functional.cross_entropy(pred.view(-1, 2), nlabel.view(-1)) loss.backward() torch.nn.utils.clip_grad_norm(parameters, 1.0) optimizer.step() loss_seq.append(loss.item()) gcnt += 1 pred_int = pred.argmax(dim=-1) all_correct = torch.sum(pred_int == nlabel) lp_correct = torch.sum(lp_label == nlabel) acc = all_correct.item() / np.prod(nlabel.shape) lp_acc = lp_correct.item() / np.prod(nlabel.shape) acc_lp_seq.append(lp_acc) acc_seq.append(acc) if gcnt % 10 == 0: logging.info( 'epoch = {} bcnt = {} loss = {} acc = {} lp_acc={}'.format( epoch, bcnt, np.mean(loss_seq), np.mean(acc_seq), np.mean(acc_lp_seq))) writer.add_scalar('syn_train/loss', loss.item(), gcnt) writer.add_scalar('syn_train/acc', acc, gcnt) writer.add_scalar('syn_train/lp_acc', lp_acc, gcnt) loss_seq = [] acc_seq = [] acc_lp_seq = [] if epoch == args.train_epoches - 1: epoch = args.train_epoches torch.save( get_model_dict(), '{}_nn_factor_{}_epoches_{}.pt'.format(args.model_name, args.neighbour, epoch)) logging.info('save train result to {}'.format( '{}_nn_factor_{}_epoches_{}.pt'.format(args.model_name, args.neighbour, epoch))) logging.info('training done!') loss_seq = [] acc_seq = [] acc_lp_seq = [] acc_global = [] acc_lp_global = [] gcnt = 0 accum_acc = 0 accum_acc_lp = 0 model.eval() emodel_high.eval() emodel_pw.eval() for bcnt, (nfeature, pws, hops, nlabel, lp_label) in tqdm(enumerate(test_loader)): if args.use_cuda: nfeature, pws, hops, nlabel, lp_label \ = nfeature.cuda(), pws.cuda(), hops.cuda(), nlabel.cuda(), lp_label.cuda() if len(nfeature.shape) == 3: nfeature = nfeature.unsqueeze(-1) etype_pw = emodel_pw(efeature_pw) etype_high = emodel_high(efeature_high) bsize = nfeature.shape[0] pred, _ = model( nfeature, [pws, hops], [[nn_idx_pw.repeat(bsize, 1, 1), etype_pw.repeat(bsize, 1, 1, 1)], [ nn_idx_high.repeat(bsize, 1, 1), etype_high.repeat(bsize, 1, 1, 1) ]]) pred = pred.squeeze(-1).permute(0, 2, 1).contiguous() loss = torch.nn.functional.cross_entropy(pred.view(-1, 2), nlabel.view(-1)) torch.nn.utils.clip_grad_norm(parameters, 1.0) loss_seq.append(loss.item()) gcnt += 1 pred_int = pred.argmax(dim=-1) all_correct = torch.sum(pred_int == nlabel) lp_correct = torch.sum(lp_label == nlabel) acc = all_correct.item() / np.prod(nlabel.shape) lp_acc = lp_correct.item() / np.prod(nlabel.shape) acc_global.append(acc) acc_lp_global.append(lp_acc) acc_lp_seq.append(lp_acc) acc_seq.append(acc) accum_acc += acc accum_acc_lp += lp_acc if gcnt % 10 == 0: logging.info( 'epoch = {} bcnt = {} loss = {} acc = {} lp_acc={}'.format( epoch, bcnt, np.mean(loss_seq), np.mean(acc_seq), np.mean(acc_lp_seq))) writer.add_scalar('syn_test/loss', loss.item(), gcnt) writer.add_scalar('syn_test/acc', acc, gcnt) writer.add_scalar('syn_test/lp_acc', lp_acc, gcnt) loss_seq = [] acc_seq = [] acc_lp_seq = [] logging.info( f'testing result: acc = {accum_acc / gcnt}, acc_lp = {accum_acc_lp / gcnt}' ) logging.info( f'stddev = {st.stdev(acc_global)}, stddev_lp = {st.stdev(acc_lp_global)}' )
argparser.add_argument('--report_step', type=int, default=1000) argparser.add_argument('--eval_step', type=int, default=3000) argparser.add_argument('--n_epoch', type=int, default=10) argparser.add_argument('--init_lr', type=float, default=5e-4) argparser.add_argument('--batch_size', type=int, default=64) argparser.add_argument('--embed_dim', type=int, default=32) argparser.add_argument('--hidden_size', type=int, default=32) argparser.add_argument('--n_mem', type=int, default=75135) argparser.add_argument('--n_ene', type=int, default=55396) argparser.add_argument('--n_group', type=int, default=482) argparser.add_argument('--n_topic', type=int, default=17129) argparser.add_argument('--n_output', type=int, default=2) args = argparser.parse_args() dir_check_list = [ './log', './model', './model/{}'.format(args.task_name), ] for dir in dir_check_list: if not os.path.exists(dir): os.mkdir(dir) logger = utils.init_logger('./log/{}.log'.format(args.task_name)) pt = prettytable.PrettyTable() pt.field_names = ['arg', 'val'] for k, v in vars(args).items(): pt.add_row([k, v]) logger.info("\n" + str(pt)) train()
def init_arg(): parser = argparse.ArgumentParser() parser.add_argument("-o") parser.add_argument("--it", default=100000, type=int) #number of iterations. default is 100000 parser.add_argument( "--itout", default=5, type=int) #total number of the set of cross-validation data parser.add_argument("--itrs", default=30, type=int) #number of random searches. default is 30. return parser.parse_args() args = init_arg() odir = 'Synthetic' #output directory logger = utils.init_logger(odir, 'log_marginal_deeppseudo.txt') data_mode = 'Synthetic' ##Main settings OUT_ITERATION = args.itout RS_ITERATION = args.itrs logger.info('data_mode:{}'.format(data_mode)) iteration = args.it #Evaluation Times evalTime = [12, 60] out_path = odir + '/results/' for itr in range(OUT_ITERATION): if not os.path.exists(out_path + '/itr_' + str(itr) + '/'): os.makedirs(out_path + '/itr_' + str(itr) + '/')
def __init__(self, name='OpenEDS', track='Semantic_Segmentation_Dataset', isTrain=True, resizedFactor=0.5, logDir=None): self.name = name self.track = track self.numTrainImgs = 8916 self.numValImgs = 2403 self.numTestImgs = 1440 self.numTrainPersons = 95 self.numValPersons = 28 self.numTestPersons = 29 self.numClasses = 4 self.decodeImgShape = (int(640 * resizedFactor), int(400 * 2 * resizedFactor), 1) self.singleImgShape = (int(640 * resizedFactor), int(400 * resizedFactor), 1) # TFrecord path self.trainPath = '../../Data/OpenEDS/{}/train_expand/train.tfrecords'.format( self.track) self.valPath = '../../Data/OpenEDS/{}/validation/validation.tfrecords'.format( self.track) self.testPath = '../../Data/OpenEDS/{}/test/test.tfrecords'.format( self.track) self.overfittingPath = '../../Data/OpenEDS/{}/overfitting/overfitting.tfrecords'.format( self.track) if isTrain: self.logger = logging.getLogger(__name__) # logger self.logger.setLevel(logging.INFO) utils.init_logger(logger=self.logger, logDir=logDir, isTrain=isTrain, name='dataset') self.logger.info('Dataset name: \t\t{}'.format(self.name)) self.logger.info('Dataset track: \t\t{}'.format(self.track)) self.logger.info('Num. of training imgs: \t{}'.format( self.numTrainImgs)) self.logger.info('Num. of validation imgs: \t{}'.format( self.numValImgs)) self.logger.info('Num. of test imgs: \t\t{}'.format( self.numTestImgs)) self.logger.info('Num. of training persons: \t{}'.format( self.numTrainPersons)) self.logger.info('Num. of validation persons: \t{}'.format( self.numValPersons)) self.logger.info('Num. of test persons: \t{}'.format( self.numTestPersons)) self.logger.info('Num. of classes: \t\t{}'.format(self.numClasses)) self.logger.info('Decode image shape: \t{}'.format( self.decodeImgShape)) self.logger.info('Single img shape: \t\t{}'.format( self.singleImgShape)) self.logger.info('Training TFrecord path: \t{}'.format( self.trainPath)) self.logger.info('Validation TFrecord path: \t{}'.format( self.valPath)) self.logger.info('Test TFrecord path: \t\t{}'.format( self.testPath)) self.logger.info('Overfitting TFrecord path: \t\t{}'.format( self.overfittingPath))
# setup hyperparams train_df = params.train_df valid_df = params.valid_df pssm_dir = params.pssm_dir tert_dir = params.tert_dir max_len = int(params.max_len) batch_size = int(params.batch_size) input_shape = tuple(int(x) for x in params.input_shape.split()) n_dist_bins = int(params.n_dist_bins) n_blocks = int(params.n_blocks) n_epochs = int(params.n_epochs) lr = float(params.lr) name = params.name init_logger(name) logging.info(pformat(params)) # to gpu if available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logging.info(f'device: {device}') # setup data iterators and model train_dataset = ProteinNetDataset(train_df, pssm_dir, tert_dir, max_len) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) valid_dataset = ProteinNetDataset(valid_df, pssm_dir, tert_dir, max_len)
optimizer = optim.ASGD(net.parameters(), lr=config.training.lr, t0=0, lambd=0., weight_decay=config.training.weight_decay) criterion = nn.CrossEntropyLoss() save_root = config.data.save_root save_root = os.path.join(save_root, 'model_1') if not os.path.exists(save_root): os.mkdir(save_root) # writer_path = os.path.join(save_root, 'writer') logger_path = os.path.join(save_root, 'lm.log') ckpt_path = os.path.join(save_root, 'lm.pth') # writer = SummaryWriter(writer_path) logger = init_logger(logger_path) ############################################################################### # Training code ############################################################################### if args.resume_training: ckpt = torch.load(ckpt_path) start_epoch = ckpt['epoch'] + 1 best_dev_loss = ckpt['best_dev_loss'] net.load_state_dict(ckpt['net_state_dict']) logger.info( f'resume training from epoch {start_epoch} with best_dev_ppl {best_dev_ppl:5.2f}' ) else: start_epoch = 0 best_dev_loss = float('inf')
from sklearn.feature_extraction.text import HashingVectorizer from sklearn.naive_bayes import BernoulliNB from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score import os import pickle from utils import init_logger import logging init_logger() # Load data logging.info("Loading data") try: data = pickle.load(open("./data/sklearn-data.pickle", "rb")) except FileNotFoundError: raise FileNotFoundError("Place data files in ./data/sklearn-data.pickle") x_train, y_train = data["x_train"], data["y_train"] x_test, y_test = data["x_test"], data["y_test"] # Transformers reviews to feature-vectors. Removes stop words and only looks if word is present or not. vectorizer = HashingVectorizer(stop_words='english', binary=True, n_features=2**9) # Load vectorized reviews from file or vectorize them and save for later x_train_path = './data/x_train_vec.pkl' y_train_path = "./data/y_train_vec.pkl" if os.path.isfile(x_train_path) and os.path.isfile(y_train_path): logging.info("Loading vectorized data")
args.max_wn_concepts_count = 0 elif args.kb == 'wn': args.max_nell_concepts_count = 0 elif args.kb == 'none': args.max_wn_concepts_count = 0 args.max_nell_concepts_count = 0 args.record_path = '{}_{}_{}_{}_{}_{}'.format( args.task, f'{args.decoder}{"+pos" if args.pos else ""}{"+uni" if args.uni_intent else ""}', f'seed{args.seed}' if args.do_train else 'eval-pred', f'seq{args.max_seq_len}', f'{args.kb}', time.strftime('%Y-%m-%d--%H-%M-%S', time.localtime(time.time()))) if __name__ == '__main__': init_logger(args) set_seed(args.seed) tokenizer = load_tokenizer(args) trainer = Trainer(args=args, train_data=load_and_cache_dataset(args, tokenizer, mode='train'), dev_data=load_and_cache_dataset(args, tokenizer, mode='dev'), test_data=load_and_cache_dataset(args, tokenizer, mode='test')) if args.do_train:
def main(): parser = BasicConfig() model_type = vars(parser.parse_known_args()[0])["model_type"].lower() model_class, configs = MODEL_CLASSES[model_type] args = configs(parser) args = checkoutput_and_setcuda(args) logger = init_logger(args) logger.info('Dataset collected from {}'.format(args.data_dir)) # Set seed set_seed(args) processor = UbuntuCorpus(args) logger.info(args) model = model_class(args=args) # model.to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # Training if args.do_train: args.train_batch_size = args.per_gpu_train_batch_size * max( 1, args.n_gpu) train_dataloader = processor.create_batch(data_type="train") args.eval_batch_size = args.per_gpu_eval_batch_size * max( 1, args.n_gpu) eval_dataloader = processor.create_batch(data_type="eval") args.logging_steps = len( train_dataloader) // args.gradient_accumulation_steps // 5 args.valid_steps = len( train_dataloader) // args.gradient_accumulation_steps trainer_op = trainer(args=args, model=model, optimizer=optimizer, train_iter=train_dataloader, eval_iter=eval_dataloader, logger=logger, num_epochs=args.num_train_epochs, save_dir=args.output_dir, log_steps=args.logging_steps, valid_steps=args.valid_steps, valid_metric_name="+R10@1") trainer_op.train() print('training complete!') # Test if args.do_test: args.eval_batch_size = args.per_gpu_eval_batch_size * max( 1, args.n_gpu) test_dataloader = processor.create_batch(data_type="eval") trainer_op = trainer(args=args, model=model, optimizer=optimizer, train_iter=None, eval_iter=None, logger=logger, num_epochs=args.num_train_epochs, save_dir=args.output_dir, log_steps=None, valid_steps=None, valid_metric_name="+R10@1") best_model_file = os.path.join(args.output_dir, args.fusion_type + "_best.model") best_train_file = os.path.join(args.output_dir, args.fusion_type + "_best.train") trainer_op.load(best_model_file, best_train_file) evaluate(args, trainer_op.model, test_dataloader, logger) print('test complete') # TODO: Infer case study if args.do_infer: #不知道写什么,懒得想了。 pass
help="systolic array design directory") parser.add_argument('--task', type=str, default="mm", help="search task") args = parser.parse_args() search_obj = args.objective # Set up the working directory now = datetime.now() outdir = args.outdir os.makedirs(outdir, exist_ok=True) explore_config = "" exp_name = f"O_{args.objective}-C_{explore_config}-T_{now.date()}-{now.time()}" outdir = f"{outdir}/{exp_name}" os.makedirs(outdir, exist_ok=True) logger = utils.init_logger(outdir) # Load the constraints cst = Constraint(f'cst/{args.cst}.json') # Set up the searching algorithm stop criteria max_epochs = -1 max_time = -1 if args.stop_after_epochs > 0: max_epochs = args.stop_after_epochs elif args.stop_after_time > 0: max_time = args.stop_after_time else: max_time = 60 # Set up the parallel executor
def main(): args = parse_args() subdir = f'raw_nn_{args.neighbour}_at_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}' utils.init_logger('./logs/', subdir, print_log=False) logging.info(str(args)) writer = SummaryWriter(log_dir=f'./tf_logs/{subdir}') nfeature_dim = 2 print(nfeature_dim) if args.model_name == 'mp_nn': model = mp_sequential( mp_conv_v2(nfeature_dim, 64, 16, extension=mp_conv_type.ORIG_WITH_NEIGHBOR), mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 128, 1), torch.nn.BatchNorm2d(128), torch.nn.ReLU(inplace=True), mp_conv_residual(128, 64, 16), torch.nn.Conv2d(128, 256, 1), torch.nn.BatchNorm2d(256), torch.nn.ReLU(inplace=True), mp_conv_residual(256, 64, 16), torch.nn.Conv2d(256, 128, 1), torch.nn.BatchNorm2d(128), torch.nn.ReLU(inplace=True), mp_conv_residual(128, 64, 16), torch.nn.Conv2d(128, 64, 1), torch.nn.BatchNorm2d(64), torch.nn.ReLU(inplace=True), mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 2, 1)) emodel = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1), torch.nn.ReLU(inplace=True), torch.nn.Conv2d(64, 16, 1)) elif args.model_name == 'mp_nn_comp': model = mp_sequential( mp_conv_v2(nfeature_dim, 64, 16, extension=mp_conv_type.ORIG_WITH_NEIGHBOR), mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 128, 1), torch.nn.BatchNorm2d(128), torch.nn.ReLU(inplace=True), mp_conv_residual(128, 64, 16), torch.nn.Conv2d(128, 256, 1), torch.nn.BatchNorm2d(256), torch.nn.ReLU(inplace=True), mp_conv_residual(256, 64, 16), mp_conv_residual(256, 64, 16), mp_conv_residual(256, 64, 16), mp_conv_residual(256, 64, 16), mp_conv_residual(256, 64, 16), torch.nn.Conv2d(256, 128, 1), torch.nn.BatchNorm2d(128), torch.nn.ReLU(inplace=True), mp_conv_residual(128, 64, 16), torch.nn.Conv2d(128, 64, 1), torch.nn.BatchNorm2d(64), torch.nn.ReLU(inplace=True), mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 2, 1)) emodel = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1), torch.nn.ReLU(inplace=True), torch.nn.Conv2d(64, 16, 1)) elif args.model_name == 'simple_gnn': model = mp_sequential( mp_conv_v2(nfeature_dim, 64, 16, extension=mp_conv_type.ORIG_WITH_NEIGHBOR), mp_conv_residual(64, 64, 16), torch.nn.Conv2d(64, 2, 1)) emodel = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1), torch.nn.ReLU(inplace=True), torch.nn.Conv2d(64, 16, 1)) elif args.model_name == 'iid': model = mp_sequential(torch.nn.Conv2d(nfeature_dim, 64, 1), torch.nn.ReLU(True), torch.nn.Conv2d(64, 2, 1)) emodel = torch.nn.Sequential(torch.nn.Conv2d(1, 64, 1), torch.nn.ReLU(inplace=True), torch.nn.Conv2d(64, 16, 1)) logging.info('model {} created'.format(str(model))) np.random.seed(23456) cap = args.hop_cap transition = list(np.random.randn(2 * 2)) nn_idx, efeature = generate_knn_table(args.chain_length, args.neighbour) if args.use_cuda: nn_idx, efeature = nn_idx.cuda(), efeature.cuda() model.cuda() emodel.cuda() # train_data_set = lib.data.RandomPGM(args.chain_length, cap, transition) # dataloader = torch.utils.data.DataLoader(train_data_set, # batch_size=args.batch_size, # shuffle=True, # num_workers=8, # worker_init_fn=worker_init_fn) train_dataset = lib.data.RandomPGMData(args.train_path, pgm_type="raw", size=args.train_size) test_dataset = lib.data.RandomPGMData(args.test_path, pgm_type="raw", size=args.test_size) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, worker_init_fn=worker_init_fn) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=8, worker_init_fn=worker_init_fn) optimizer = torch.optim.Adam(list(model.parameters()) + list(emodel.parameters()), lr=3e-3) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda x: max(0.98**x, 1e-6)) start_epoch = 0 gcnt = 0 if os.path.exists(args.model_path): ckpt = torch.load(args.model_path) model.load_state_dict(ckpt['model_state_dict']) emodel.load_state_dict(ckpt['emodel_state_dict']) optimizer.load_state_dict(ckpt['optimizer_state_dict']) scheduler.load_state_dict(ckpt['lr_sche']) start_epoch = ckpt['epoch'] gcnt = ckpt['gcnt'] def get_model_dict(): return { 'model_state_dict': model.state_dict(), 'emodel_state_dict': emodel.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'lr_sche': scheduler.state_dict(), 'epoch': epoch, 'gcnt': gcnt } def get_filename(epoch): return f'raw_nn_{args.neighbour}_epoches_{epoch}.pt' epoch = 0 for epoch in tqdm(range(start_epoch, args.train_epoches)): torch.save(get_model_dict(), get_filename(epoch)) logging.info(f'save train result to {get_filename(epoch)}') scheduler.step() loss_seq = [] acc_seq = [] acc_lp_seq = [] for bcnt, (nfeature, nlabel, lp_label) in tqdm(enumerate(train_loader)): optimizer.zero_grad() if args.use_cuda: nfeature, nlabel, lp_label = nfeature.cuda(), nlabel.cuda( ), lp_label.cuda() if len(nfeature.shape) == 3: nfeature = nfeature.unsqueeze(-1) etype = emodel(efeature) # print(etype.shape) # print(nn_idx.shape) pred = model(nfeature, nn_idx.repeat(nfeature.shape[0], 1, 1), etype.repeat(nfeature.shape[0], 1, 1, 1)) pred = pred.squeeze(-1).permute(0, 2, 1).contiguous() loss = torch.nn.functional.cross_entropy(pred.view(-1, 2), nlabel.view(-1)) loss.backward() torch.nn.utils.clip_grad_norm( list(model.parameters()) + list(emodel.parameters()), 1.0) optimizer.step() loss_seq.append(loss.item()) gcnt += 1 pred_int = pred.argmax(dim=-1) all_correct = torch.sum(pred_int == nlabel) lp_correct = torch.sum(lp_label == nlabel) acc = all_correct.item() / np.prod(nlabel.shape) lp_acc = lp_correct.item() / np.prod(nlabel.shape) acc_lp_seq.append(lp_acc) acc_seq.append(acc) if gcnt % 10 == 0: writer.add_scalar('syn_train/loss', loss.item(), gcnt) writer.add_scalar('syn_train/acc', acc, gcnt) writer.add_scalar('syn_train/lp_acc', lp_acc, gcnt) logging.info( 'epoch = {} bcnt = {} loss = {} acc = {} lp_acc={}'.format( epoch, bcnt, np.mean(loss_seq), np.mean(acc_seq), np.mean(acc_lp_seq))) loss_seq = [] acc_seq = [] acc_lp_seq = [] if epoch == args.train_epoches - 1: epoch = args.train_epoches torch.save(get_model_dict(), get_filename(epoch)) logging.info(f'save train result to {get_filename(epoch)}') logging.info('training done!') loss_seq = [] acc_seq = [] acc_lp_seq = [] acc_global = [] acc_lp_global = [] gcnt = 0 accum_acc = 0 accum_acc_lp = 0 model.eval() emodel.eval() for bcnt, (nfeature, nlabel, lp_label) in tqdm(enumerate(test_loader)): if args.use_cuda: nfeature, nlabel, lp_label \ = nfeature.cuda(), nlabel.cuda(), lp_label.cuda() if len(nfeature.shape) == 3: nfeature = nfeature.unsqueeze(-1) etype = emodel(efeature) pred = model(nfeature, nn_idx.repeat(nfeature.shape[0], 1, 1), etype.repeat(nfeature.shape[0], 1, 1, 1)) pred = pred.squeeze(-1).permute(0, 2, 1).contiguous() loss = torch.nn.functional.cross_entropy(pred.view(-1, 2), nlabel.view(-1)) torch.nn.utils.clip_grad_norm( list(model.parameters()) + list(emodel.parameters()), 1.0) loss_seq.append(loss.item()) gcnt += 1 pred_int = pred.argmax(dim=-1) all_correct = torch.sum(pred_int == nlabel) lp_correct = torch.sum(lp_label == nlabel) acc = all_correct.item() / np.prod(nlabel.shape) lp_acc = lp_correct.item() / np.prod(nlabel.shape) acc_global.append(acc) acc_lp_global.append(lp_acc) acc_lp_seq.append(lp_acc) acc_seq.append(acc) accum_acc += acc accum_acc_lp += lp_acc if gcnt % 10 == 0: logging.info( 'epoch = {} bcnt = {} loss = {} acc = {} lp_acc={}'.format( epoch, bcnt, np.mean(loss_seq), np.mean(acc_seq), np.mean(acc_lp_seq))) writer.add_scalar('syn_test/loss', loss.item(), gcnt) writer.add_scalar('syn_test/acc', acc, gcnt) writer.add_scalar('syn_test/lp_acc', lp_acc, gcnt) loss_seq = [] acc_seq = [] acc_lp_seq = [] logging.info(f'testing result: acc = {accum_acc / gcnt}, acc_lp = {accum_acc_lp / gcnt}') logging.info(f'stddev = {st.stdev(acc_global)}, stddev_lp = {st.stdev(acc_lp_global)}')
'/0/account/bind_phone', req_bind_phone.Handler, '/0/account/bind_phone_confirm', req_bind_phone_confirm.Handler, '/0/account/billing_history', req_billing_history.Handler, '/0/sms/resend_sms_code', req_resend_sms_code.Handler, '/0/task/list', req_task_list.Handler, '/0/task/detail', req_task_detail.Handler, '/0/task/check-in', req_task_checkin.Handler, '/0/task/comment', req_task_comment.Handler, '/0/task/download_app', req_task_download_app.Handler, '/0/task/domob', req_task_domob.Handler, '/0/task/offerwall', req_task_offerwall.Handler, '/0/order/alipay', req_order_alipay.Handler, '/0/order/phone_pay', req_order_phone_pay.Handler, '/0/order/exchange_code', req_order_exchange_code.Handler, '/0/order/exchange_list', req_order_exchange_list.Handler, '/0/order/detail', req_order_detail.Handler ) web.config.debug = True #web.internalerror = web.debugerror utils.init_logger("../log/interface.log") app = web.application(urls, globals(), autoreload=False) if __name__ == '__main__': app.run() else: application = app.wsgifunc()
import os import random import db from config_parser import Config from utils import init_logger CONFIGS = Config().get_config() LOGGER = init_logger('vocab_trainer', {'file'}, CONFIGS['logging.verbosity']) def add_new_words(): print 'Enter word to add to your list:' word = raw_input() if word: print 'Enter translations to this word:' translations = [] translation = raw_input() if not translation: print 'No translation provided!' return while translation: translations.append(translation) print 'Another translation?' translation = raw_input() word_id = db.insert_word(word, user_id=1, memorized=False, lang='english') for translation in translations: db.insert_translation(translation, word_id, 'ukrainian') else:
#! /usr/bin/env python # -*- coding: utf-8 -*- # import daemon import time import logging import config import utils import sender import rule_runner utils.init_logger(config.log_dir, config.log_level, config.log_console) logging.info("zoro start") utils.init_for_setup() zorocfg = utils.load_user_config(config.user_config_path) sender.init(zorocfg) rule_runner.init(zorocfg) rule_runner.runall(zorocfg) time.sleep(10000) # TODO # with daemon.DaemonContext(): # while True: # time.sleep(1)
models_file = join(configs.EMBEDINGS_ROOT, configs.W2V_FILENAME) if len(argv) == 1: print "usage: ./{} <model_name> <enabled (0 or 1)>".format(argv[0]) print "Chages options in {}".format(models_file) print "model_name: feature name or 'all'" print "enabled: 0 or 1" exit() model_name = argv[1] if (argv[2] == "1"): enabled = "true" else: enabled = "false" utils.init_logger() logging.info("set w2v[{}] = {}".format(model_name, enabled)) with open(models_file, "r") as ff: features = json.load(ff) if (model_name == 'all'): for i in features['w2v_models']: enable_feature(features, i, enabled) else: enable_feature(features, model_name, enabled) logging.info("save: {}".format(models_file)) with open(models_file, "w") as ff: json.dump(features, ff, indent=4, sort_keys=True)
def main(cli_args): # Read from config file and make args config_filename = "{}.json".format(cli_args.taxonomy) with open(os.path.join("config", config_filename)) as f: args = AttrDict(json.load(f)) logger.info("Training/evaluation parameters {}".format(args)) args.output_dir = os.path.join(args.ckpt_dir, args.output_dir) init_logger() set_seed(args) processor = GoEmotionsProcessor(args) label_list = processor.get_labels() config = BertConfig.from_pretrained( args.model_name_or_path, num_labels=len(label_list), finetuning_task=args.task, id2label={str(i): label for i, label in enumerate(label_list)}, label2id={label: i for i, label in enumerate(label_list)}) tokenizer = BertTokenizer.from_pretrained(args.tokenizer_name_or_path, ) model = BertForMultiLabelClassification.from_pretrained( args.model_name_or_path, config=config) # GPU or CPU args.device = "cuda" if torch.cuda.is_available( ) and not args.no_cuda else "cpu" model.to(args.device) # Load dataset train_dataset = load_and_cache_examples( args, tokenizer, mode="train") if args.train_file else None dev_dataset = load_and_cache_examples( args, tokenizer, mode="dev") if args.dev_file else None test_dataset = load_and_cache_examples( args, tokenizer, mode="test") if args.test_file else None if dev_dataset is None: args.evaluate_test_during_training = True # If there is no dev dataset, only use test dataset if args.do_train: global_step, tr_loss = train(args, model, tokenizer, train_dataset, dev_dataset, test_dataset) logger.info(" global_step = {}, average loss = {}".format( global_step, tr_loss)) results = {} if args.do_eval: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True))) if not args.eval_all_checkpoints: checkpoints = checkpoints[-1:] else: logging.getLogger("transformers.configuration_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split("-")[-1] model = BertForMultiLabelClassification.from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, test_dataset, mode="test", global_step=global_step) result = dict( (k + "_{}".format(global_step), v) for k, v in result.items()) results.update(result) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as f_w: for key in sorted(results.keys()): f_w.write("{} = {}\n".format(key, str(results[key])))
import os import shutil import click import pandas as pd from deepsense import neptune from sklearn.metrics import roc_auc_score import pipeline_config as cfg from pipelines import PIPELINES from utils import init_logger, read_params, create_submission, set_seed, save_evaluation_predictions, \ read_csv_time_chunks, cut_data_in_time_chunks, data_hash_channel_send, get_submission_hours_index set_seed(1234) logger = init_logger() ctx = neptune.Context() params = read_params(ctx) @click.group() def action(): pass @action.command() def prepare_data(): logger.info('chunking train') train = pd.read_csv(params.raw_train_filepath) cut_data_in_time_chunks(train, timestamp_column='click_time', chunks_dir=params.train_chunks_dir,
import time from collections import deque from contextlib import contextmanager from threading import RLock from config_parser import Config from errors import MySQLPoolSizeError from utils import init_logger, Singleton CONFIGS = Config().get_config() DEFAULT_TRIES = 3 DEFAULT_DELAY = 1 LOGGER = init_logger("DB", {"stream", "syslog"}, CONFIGS["logging.verbosity"]) class DBPool(object): """DBPool class represents DB pool, which handles and manages work with database connections. """ __metaclass__ = Singleton def __init__(self, configs, pool_size): if not isinstance(pool_size, int) or pool_size <= 0: raise ValueError("Bad value of POOL_SIZE!") if not isinstance(configs["ttl"], int) or configs["ttl"] <= 0: