예제 #1
0
def main():
    parser = argparse.ArgumentParser(
        description='Tool for generating TFRecords from bam files')
    parser.add_argument("--conf", type=str, required=True)
    parser.add_argument("--whitelist", type=str, required=True)
    parser.add_argument("--log-output", type=str, default=None)
    args = parser.parse_args()

    utils.set_logging(fname=args.log_output)

    conf_fname = args.conf
    whitelist_fname = args.whitelist

    conf = utils.get_json(conf_fname)
    samples = get_whitelist(whitelist_fname)

    if not os.path.exists(conf['tensors_path']):
        logging.info("Creating the {} directory".format(conf['tensors_path']))
        os.makedirs(conf['tensors_path'])

    samples_size = len(samples)
    logging.info(
        "There is a total of {} samples to process".format(samples_size))

    n_threads = conf.get("n_threads", 1)

    if n_threads > 1:
        p = multiprocessing.Pool(n_threads)
        p.starmap(process_sample, ((sample, conf) for sample in samples))
    else:
        for sample in samples:
            process_sample(sample, conf)

    logging.info("Finished generating tensors")
예제 #2
0
def main():
    args = load_cli_args_and_config()
    set_logging(args.logging.upper())

    permanent_token = get_permanent_user_token(args.version, args.app_id,
                                               args.app_secret,
                                               args.short_lived_token)
    print_token('Page', permanent_token)
예제 #3
0
def main():
    args = load_cli_args_and_config([(('--page_id', ), {
        'help': "Page ID.",
        'required': True
    })])
    print(args)

    set_logging(args.logging.upper())

    permanent_token = get_permanent_page_token(args.version, args.page_id,
                                               args.app_id, args.app_secret,
                                               args.short_lived_token)
    print_token('User', permanent_token)
예제 #4
0
def main():
    from argparse import ArgumentParser
    from utils import set_logging

    args = init_arguments()
    set_logging(args.verbose - args.quiet)

    opfile = '../features/op.csv.xz'
    binfile = 'op-w2v.bin'

    options(force=args.force, clean=args.clean)
    model = get_model(opfile, binfile)
    print("\n".join(model.vocab))
예제 #5
0
def main():
    # set args
    args = parse_args()
    amqp_url = args.amqp_url
    queue = args.queue
    log_path = args.log_path
    thread_num = args.thread_num

    # set log
    set_logging(log_path)

    # threading
    for i in range(0, thread_num):
        tw = thread_worker.ThreadWorker(amqp_url, queue,
                                        "Thread Number : " + str(i))
        tw.start()
예제 #6
0
class ServerComponent:
    logger = set_logging(config.SERVER_LOGGER_NAME)
    db = MongoConn(
        config.MONGODB['host'],
        config.MONGODB['port'],
        config.MONGODB['db_name'],
        config.MONGODB['username'],
        config.MONGODB['password'],
    ).db
예제 #7
0
class ReporterComponent:
    logger = set_logging(config.REPORTER_LOGGER_NAME)
    message_db = RedisConn(host=config.REDIS_HOST,
                           port=config.REDIS_PORT,
                           db=config.MESSAGE_DB).db
    remote_db = RedisConn(host=config.REDIS_HOST,
                          port=config.REDIS_PORT,
                          db=config.REMOTE_DB).db
    group_db = RedisConn(host=config.REDIS_HOST,
                         port=config.REDIS_PORT,
                         db=config.GROUP_DB).db
예제 #8
0
class ServerComponent:
    logger = set_logging(config.SERVER_LOGGER_NAME)
    session_db = RedisConn(host=config.REDIS_HOST,
                           port=config.REDIS_PORT,
                           db=config.SESSION_DB).db
    message_db = RedisConn(host=config.REDIS_HOST,
                           port=config.REDIS_PORT,
                           db=config.MESSAGE_DB).db
    remote_db = RedisConn(host=config.REDIS_HOST,
                          port=config.REDIS_PORT,
                          db=config.REMOTE_DB).db
예제 #9
0
def processing():
    data = request.data
    dataDict = json.loads(data)

    # set log
    set_logging('server_log.txt')

    userId = dataDict['userId']
    unicodes = dataDict['unicodes']
    count = dataDict['count']
    env = dataDict['env']

    woff_addr = back_processing(userId, count, unicodes, env)
    tempResponse = json.dumps(
        {'woff': woff_addr}
        # {'woff': 'https://s3.ap-northeast-2.amazonaws.com/fontto/example/UhBeeKang-Ja.woff'}
    )
    response = app.response_class(response=tempResponse,
                                  status=200,
                                  mimetype='application/json')
    return response
예제 #10
0
def main() -> None:
    formatted_help, args = read_args()
    set_logging(logging.DEBUG if args.debug else logging.INFO)

    if not args.video_location:
        logging.info(formatted_help)
        sys.exit(-1)
    loc = args.video_location
    file_reader = get_file_reader(loc)
    extend = file_reader.extend  # potential extend
    logging.debug('Potential extend: {}'.format(extend))
    # type checking
    video_type = check_video_type(
        file_reader, potential=VideoTypeEnum.get_type_from_extend(extend))
    parser = type_to_parser(video_type)  # one proper parse in parsers
    # raw video info, always json
    video_info = parser.parse(file_reader)
    # format video info indicated by fmt
    formatted_video_info = format_video_info(
        video_info, fmt='json' if args.json else 'text')
    print(formatted_video_info)
예제 #11
0
def main():
    parser = argparse.ArgumentParser(
            description='Tool for extracting reads from bam files')
    parser.add_argument("--conf", type=str, required=True)
    args = parser.parse_args()
    conf = utils.get_json(args.conf)
    utils.set_logging()

    per_sample = extract_vcf_records(conf["vcfs"])

    n_threads = conf.get("n_threads", 1)
    logging.info("Starting to generate the reads on {} threads".format(n_threads))

    if n_threads > 1:
        p = multiprocessing.Pool(n_threads)
        p.starmap(extract_reads,
                  ((sample, records, conf) for (sample, records) in per_sample.items()))
    else:
        for (sample, records) in per_sample.items():
            extract_reads(sample, records, conf)

    logging.info("Finished extracting the reads")
예제 #12
0
def main():
    args = init_arguments()
    set_logging(args.verbose - args.quiet)

    def file_exists(path):
        return path and os.path.exists(path)

    if args.subcmd == 'train':
        if not file_exists(args.datafile) and not (file_exists(
                args.vulfile) and file_exists(args.opfile)):
            logging.error(
                "Need to specify the data file or vul/op files to read.")
            logging.error(
                " For example: %s train -d %s [ -u %s -o %s ]" %
                (sys.argv[0], 'data.csv', '../run-anlyzers/vuls.csv.xz',
                 '../features/op-ft.csv.xz'))
            sys.exit(1)

        logging.info('training...')
        data = get_vul_op_data(args.vulfile, args.opfile, args.datafile)
        train(data)

    elif args.subcmd == 'predict':
        if file_exists(args.sol):
            logging.error("No solidty file")
            sys.exit(1)

        logging.info('predicting...')
        vuls = ALL_VULS
        data = sol_to_data(args.sol)

        preds = predict(data, vuls)
        print_prediction(vuls, preds)

    else:
        logging.error("unknown command '%s'" % args.subcmd)
        sys.exit(1)
예제 #13
0
out_dir = ROOT_DIR / Path('runs/{:d}/'.format(args.experiment_id))

if not out_dir.exists():
    out_dir.mkdir()
else:
    ans = input(
        "Will delete all the files under {:s}. Please enter Yes or No: \n".
        format(out_dir))
    if ans.lower() == "yes" or ans.lower() == "y":
        filelist = [f for f in out_dir.files()]
        for f in filelist:
            print("deleting {:s}".format(f))
            Path.remove(f)

# set up logging
logger = set_logging(out_dir / "model.log")
stderr_write_copy = sys.stderr.write
stdout_write_copy = sys.stdout.write
# redirect print to log
sys.stderr.write = logger.error
sys.stdout.write = logger.info
sys.stderr.write = stderr_write_copy
sys.stdout.write = stdout_write_copy

# set up gpus
print("args:")
print(str(args))
logger.info("  args:")
logger.info("\n " + str(args))
with open(out_dir / "args.pickle", "wb") as pickle_out:
    pickle.dump(args, pickle_out)
예제 #14
0
    parser.add_argument('-a', '--algo', action='store', default="logistic",
            help="specified the algorithm to train or predict: < " + " | ".join(VulnTrainer.algo_trainers.keys()) + " >")
    parser.add_argument("-v", "--verbose",  action="count", default=0, help="increse detail information")
    parser.add_argument("-q", "--quiet",  action="count", default=0, help="decrese detail information")
    parser.add_argument("-f", "--force", action="store_true", default=False, help="force to reload data")
    parser.add_argument("-s", "--sol", action="store", default=False, help="Solidity file")
    parser.add_argument("-t", "--targets", action="append", default=None, help="target vulnerability to train / predict")
    parser.add_argument("-d", "--datafile", action="store", default=None, help="Data file to train")
    parser.add_argument('-u', '--vulfile', action='store', default=None, help="vunlerability data")
    parser.add_argument('-o', '--opfile', action='store', default=None, help="feature data")
    parser.add_argument("-p", "--print-stat", action="store_true", default=False, help="pirnt statistics info")
    parser.add_argument("-z", "--fuzz-match", action="store_true", default=False, help="fuzz to match theopcode seqence")
    return parser.parse_args()

Args = init_arguments()
set_logging(Args.verbose - Args.quiet)

#import const
import w2v_cnn
from trainer import TestTrainer, LogisticTrainer, LinearSvcTrainer, SvcTrainer, RndForestTrainer, DecisionTreeTrainer, KnnTrainer, GradientBoostingTrainer
from common import get_vul_op_data, ALL_VULS
from common import stem, sol_to_ops, sol_to_data
#from common import print_prediction
from common import ModelRepo
from utils import get_pipe_model
from utils import get_pipe_transform
from asm_parser import AsmParser
from loc2linepos import loc_to_linepos
import os
import sys
import logging
예제 #15
0
#TODO: Before running this script please make sure that there are json files with scraped school data in directory 'jedeschule-scraper/data'
#TODO: Before running this script please make sure that there is one json file with shl school data in directory 'jedeschule-scraper/shl/data/in/shl_data.json'

import fuzzywuzzy
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import pandas as pd
import numpy as np
import utils
import json
import datetime
import os
from pathlib import Path

# initialize logger
log = utils.set_logging()

log.info('Loading input data')
### loading scraped schools
df_scrape = utils.load_scraped_data()

### loading shl schools
df_shl_in, df_shl = utils.load_shl_data()

### grab relevant columns for fuzzy matching
df_scrape_fuzzy = df_scrape[[
    'id_scraped', 'name_scraped', 'anschrift_scraped', 'plz_scraped',
    'ort_scraped'
]]
df_shl_fuzzy = df_shl[['id', 'name', 'anschrift', 'plz', 'ort']]
예제 #16
0
def execute(rank, world_size, name, quantized, dataset, output, training_pref,
            ssd_settings, trained_model_path, verbose):

    setup(rank, world_size)

    if rank == 0:
        logname = '{}/{}.log'.format(output['model'], name)
        logger = set_logging('Train_SSD', logname, verbose)

    qbits = 8 if quantized else None
    ssd_settings['n_classes'] += 1
    plot = Plotting(save_dir=output['plots'])

    # Initialize dataset
    train_loader = get_data_loader(dataset['train'][rank],
                                   training_pref['batch_size_train'],
                                   training_pref['workers'],
                                   ssd_settings['input_dimensions'],
                                   ssd_settings['object_size'],
                                   rank,
                                   shuffle=True,
                                   return_pt=True,
                                   qbits=qbits)

    val_loader = get_data_loader(dataset['validation'][rank],
                                 training_pref['batch_size_validation'],
                                 training_pref['workers'],
                                 ssd_settings['input_dimensions'],
                                 ssd_settings['object_size'],
                                 rank,
                                 shuffle=False,
                                 return_pt=True,
                                 qbits=qbits)

    # Build SSD network
    ssd_net = build_ssd(rank, ssd_settings).to(rank)
    if rank == 0:
        logger.debug('SSD architecture:\n{}'.format(str(ssd_net)))

    # Initialize weights
    if trained_model_path:
        ssd_net.load_weights(trained_model_path)
    else:
        ssd_net.mobilenet.apply(weights_init)
        ssd_net.loc.apply(weights_init)
        ssd_net.cnf.apply(weights_init)
        ssd_net.reg.apply(weights_init)

    # Data parallelization
    cudnn.benchmark = True
    net = DDP(ssd_net, device_ids=[rank])

    # Set training objective parameters
    optimizer = optim.SGD(net.parameters(),
                          lr=1e-3,
                          momentum=training_pref['momentum'],
                          weight_decay=training_pref['weight_decay'])
    scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[20, 30, 50, 60, 70, 80, 90], gamma=0.5)
    if rank == 0:
        cp_es = EarlyStopping(patience=training_pref['patience'],
                              save_path='%s/%s.pth' % (output['model'], name))
    criterion = MultiBoxLoss(rank,
                             ssd_settings['n_classes'],
                             min_overlap=ssd_settings['overlap_threshold'])
    scaler = GradScaler()
    verobse = verbose and rank == 0
    train_loss, val_loss = torch.empty(3, 0), torch.empty(3, 0)

    for epoch in range(1, training_pref['max_epochs'] + 1):

        # Start model training
        if verbose:
            tr = trange(len(train_loader), file=sys.stdout)
        all_epoch_loss = torch.zeros(3)
        net.train()

        # Ternarize weights
        if quantized:
            for m in net.modules():
                if is_first_or_last(m):
                    delta = get_delta(m.weight.data)
                    m.weight.delta = delta
                    m.weight.alpha = get_alpha(m.weight.data, delta)

        for batch_index, (images, targets) in enumerate(train_loader):

            # Ternarize weights
            if quantized:
                for m in net.modules():
                    if is_first_or_last(m):
                        m.weight.org = m.weight.data.clone()
                        m.weight.data = to_ternary(m.weight.data,
                                                   m.weight.delta,
                                                   m.weight.alpha)

            with autocast():
                outputs = net(images)
                l, c, r = criterion(outputs, targets)
                loss = l + c + r
            scaler.scale(loss).backward()

            if quantized:
                for m in net.modules():
                    if is_first_or_last(m):
                        m.weight.data.copy_(m.weight.org)

            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

            if quantized:
                for m in net.modules():
                    if is_first_or_last(m):
                        m.weight.org.copy_(m.weight.data.clamp_(-1, 1))

            all_epoch_loss += torch.tensor([l.item(), c.item(), r.item()])
            av_epoch_loss = all_epoch_loss / (batch_index + 1)

            info = 'Epoch {}, {}'.format(epoch, get_loss_info(av_epoch_loss))
            if verbose:
                tr.set_description(info)
                tr.update(1)

        if rank == 0:
            logger.debug(info)
        train_loss = torch.cat((train_loss, av_epoch_loss.unsqueeze(1)), 1)
        if verbose:
            tr.close()

        # Start model validation
        if verbose:
            tr = trange(len(val_loader), file=sys.stdout)
        all_epoch_loss = torch.zeros(3)
        net.eval()

        with torch.no_grad():

            # Ternarize weights
            if quantized:
                for m in net.modules():
                    if is_first_or_last(m):
                        m.weight.org = m.weight.data.clone()
                        m.weight.data = to_ternary(m.weight.data)

            for batch_index, (images, targets) in enumerate(val_loader):
                outputs = net(images)
                l, c, r = criterion(outputs, targets)
                l, c, r = reduce_tensor(l.data, c.data, r.data)
                all_epoch_loss += torch.tensor([l.item(), c.item(), r.item()])
                av_epoch_loss = all_epoch_loss / (batch_index + 1)
                info = 'Validation, {}'.format(get_loss_info(av_epoch_loss))
                if verbose:
                    tr.set_description(info)
                    tr.update(1)

            if rank == 0:
                logger.debug(info)
            val_loss = torch.cat((val_loss, av_epoch_loss.unsqueeze(1)), 1)
            if verbose:
                tr.close()

            plot.draw_loss(train_loss.cpu().numpy(),
                           val_loss.cpu().numpy(),
                           quantized=quantized)

            if rank == 0 and cp_es(av_epoch_loss.sum(0), ssd_net):
                break

            dist.barrier()

            if quantized:
                for m in net.modules():
                    if is_first_or_last(m):
                        m.weight.org.copy_(m.weight.data)
        scheduler.step()
    cleanup()
예제 #17
0
파일: main.py 프로젝트: berniebear/trec_is
def main():
    args = get_arguments()
    random.seed(args.random_seed)
    np.random.seed(args.random_seed)  # sklearn use np to generate random value

    # Create folders and set logging format
    args.model_dir = os.path.join(args.out_dir, 'ckpt-{}'.format(args.class_weight_scheme))
    args.log_dir = os.path.join(args.out_dir, 'log')
    args.ensemble_dir = os.path.join(args.out_dir, 'ensemble-{}'.format(args.class_weight_scheme))
    if args.class_weight_scheme == 'customize':
        args.model_dir = os.path.join(args.model_dir, 'weight{}'.format(args.additional_weight))
        args.ensemble_dir = os.path.join(args.ensemble_dir, 'weight{}'.format(args.additional_weight))
    prepare_folders(args)
    logger = set_logging(args)
    logger.info("Here is the arguments of this running:")
    logger.info("{}".format(args))
    utils.check_args_conflict(args)

    # Set files which contain data for training and test. If use "trecis2019-A", it means we want to tune parameters.
    args.data_prefix = "trecis2019-B"
    # Note that for 2019-B submission, all '2019' means '2019-B' and '2018' means '2018 + 2019-A'
    label_file = os.path.join(args.data_dir, 'ITR-H.types.v{}.json'.format(
        4 if args.data_prefix == "trecis2019-B" else 3))
    tweet_file_list = [os.path.join(args.data_dir, 'all-tweets.txt')]
    tweet_file_list_2019 = [os.path.join(args.data_dir, 'all-tweets-2019.txt')]
    train_file_list = [os.path.join(args.data_dir, 'TRECIS-CTIT-H-Training.json')]
    train_file_list += [os.path.join(args.data_dir, 'TRECIS-2018-TestEvents-Labels',
                                     'assr{}.test'.format(i)) for i in range(1, 7)]
    if args.data_prefix == "trecis2019-B":
        train_file_list += [os.path.join(args.data_dir, '2019ALabels', '2019A-assr{}.json'.format(i)) for i in range(1, 6)]
        train_file_list += [os.path.join(args.data_dir, '2019ALabels', '2019-assr2.json')]
    test_raw_tweets_json_folder = 'download_tweets'
    # Some output files which has been formalized for further usages.
    formal_train_file = os.path.join(args.data_dir, 'train.txt{}'.format('_small' if args.sanity_check else ''))
    formal_test_file = os.path.join(args.data_dir, 'test.txt{}')
    tweet_text_out_file = os.path.join(args.out_dir, 'tweets-clean-text.txt')
    tweet_id_out_file = os.path.join(args.out_dir, 'tweets-id.txt')
    tweet_text_out_file_2019 = os.path.join(args.out_dir, 'tweets-clean-text-2019.txt')
    tweet_id_out_file_2019 = os.path.join(args.out_dir, 'tweets-id-2019.txt')
    predict_priority_score_out_file = os.path.join(args.out_dir, 'predict_priority_score.txt')

    # Set files for submission.
    args.model_name = '{0}{1}'.format(args.model, '-event' if args.event_wise else '')
    args.dev_label_file = os.path.join(args.ensemble_dir, 'dev_label.txt')
    args.dev_predict_file = os.path.join(args.ensemble_dir, 'dev_predict_{}.txt'.format(args.model_name))
    args.test_predict_file = os.path.join(args.ensemble_dir, 'test_predict_{}.txt'.format(args.model_name))
    args.submission_folder = utils.prepare_submission_folder(args)
    args.submission_file = os.path.join(args.submission_folder, 'submission_{}'.format(args.model_name))

    # As the original files provided by TREC is quite messy, we formalize them into train and test file.
    utils.formalize_files(train_file_list, formal_train_file, args)
    utils.formalize_test_file(test_raw_tweets_json_folder, formal_test_file, prefix=args.data_prefix)
    logger.info("The training data file is {0} and testing data file is {1}".format(
        formal_train_file, formal_test_file))

    # Step0. Extract some info which can be used later (also useful for generating submission files).
    label2id, majority_label, short2long_label = utils.get_label2id(label_file, formal_train_file, args.cv_num)
    id2label = utils.get_id2label(label2id)
    class_weight = utils.get_class_weight(args, label2id, id2label, formal_train_file)

    # When get submission, there is no need to run all following steps, but only read the `test_predict_file` and
    # pick some classes as final output according to policy (such as top-2 or auto-threshold).
    # You MUST run `--predict_mode` in advance to get the `test_predict_file` prepared.
    if args.get_submission:
        postpro = PostProcess(args, label2id, id2label, class_weight, majority_label, short2long_label,
                              formal_train_file, formal_test_file, test_raw_tweets_json_folder,
                              predict_priority_score_out_file)
        postpro.pick_labels_and_write_final_result()
        quit()

    # Step1. Preprocess and extract features for all tweets
    tweetid_list, tweet_content_list = utils.get_tweetid_content(tweet_file_list)
    utils.write_tweet_and_ids(tweetid_list, tweet_content_list, tweet_text_out_file, tweet_id_out_file)
    tweetid_list_2019, tweet_content_list_2019 = utils.get_tweetid_content(tweet_file_list_2019)
    utils.write_tweet_and_ids(tweetid_list_2019, tweet_content_list_2019, tweet_text_out_file_2019,
                              tweet_id_out_file_2019)
    # Note that before `extract_features()`, we should manually run the `extract_features.sh` in `feature_tools`.
    # quit()  # The `extract_features.sh` only need to be run once for the same dataset.
    preprocess = Preprocess(args, tweetid_list, tweet_content_list, label2id, tweet_id_out_file)
    preprocess.extract_features()
    preprocess_2019 = Preprocess(args, tweetid_list_2019, tweet_content_list_2019, label2id,
                                 tweet_id_out_file_2019, test=True)
    preprocess_2019.extract_features()

    if args.train_regression:
        data_x, data_score = preprocess.extract_train_data(formal_train_file, get_score=True)
        train_regression = TrainRegression(args, data_x, data_score)
        if args.cross_validate:
            train_regression.cross_validate()
            quit()

    if args.cross_validate:
        # Step2. Train and Cross-validation (for tuning hyper-parameters).
        # If we want to do ensemble in the future, we need the prediction on dev data by setting `--cross_validate`.
        if args.event_wise:
            data_x, data_y, event2idx_list, line_num = preprocess.extract_train_data(formal_train_file)
            data_predict_collect = np.zeros([line_num, len(label2id)])
            metrics_collect = []
            metric_names = None
            for event_type in utils.idx2event_type:
                it_data_x, it_data_y = data_x[event_type], data_y[event_type]
                train = Train(args, it_data_x, it_data_y, id2label, preprocess.feature_len, class_weight, event_type)
                metrics, predict_score = train.train()
                for i, idx in enumerate(event2idx_list[event_type]):
                    data_predict_collect[idx] = predict_score[i]
                metrics_collect.append((metrics, it_data_x.shape[0]))
                if metric_names is None:
                    metric_names = train.metric_names
            utils.get_final_metrics(metrics_collect, metric_names)
        else:
            data_x, data_y = preprocess.extract_train_data(formal_train_file)
            train = Train(args, data_x, data_y, id2label, preprocess.feature_len, class_weight)
            _, data_predict_collect = train.train()
        if args.predict_mode:
            utils.write_predict_and_label(args, formal_train_file, label2id, data_predict_collect)

    if args.predict_mode:
        # Step3. Get the 2019 test data, and retrain the model on all training data, then predict on the 2019-test
        if args.event_wise:
            data_x, data_y, _, _ = preprocess.extract_train_data(formal_train_file)
            test_x, event2idx_list, line_num = preprocess_2019.extract_formalized_test_data(formal_test_file)
            test_predict_collect = np.zeros([line_num, len(label2id)])
            for event_type in utils.idx2event_type:
                it_data_x, it_data_y, it_test_x = data_x[event_type], data_y[event_type], test_x[event_type]
                if len(it_test_x) == 0:
                    print("[WARNING] There are no event belongs to {} for the test data".format(event_type))
                    continue
                train = Train(args, it_data_x, it_data_y, id2label,
                              preprocess_2019.feature_len, class_weight, event_type)
                train.train_on_all()
                predict_score = train.predict_on_test(it_test_x)
                for i, idx in enumerate(event2idx_list[event_type]):
                    test_predict_collect[idx] = predict_score[i]
        else:
            data_x, data_y = preprocess.extract_train_data(formal_train_file)
            test_x = preprocess_2019.extract_formalized_test_data(formal_test_file)
            train = Train(args, data_x, data_y, id2label, preprocess_2019.feature_len, class_weight)
            train.train_on_all()
            test_predict_collect = train.predict_on_test(test_x)
        utils.write_predict_res_to_file(args, test_predict_collect)

        if args.train_regression:
            test_x = preprocess_2019.extract_formalized_test_data(formal_test_file)
            if args.event_wise:
                # For event_wise setting, there will be many additional things extracted, what we need is only test_x.
                test_x = test_x[0]
            train_regression.train()
            predict_priority_score = train_regression.predict_on_test(test_x)
            utils.write_predict_score_to_file(predict_priority_score, predict_priority_score_out_file)

    if args.ensemble is not None:
        # TODO(junpeiz): Average the priority score for ensemble.
        # Step4 (optional). Do the ensemble of different model
        if args.event_wise:
            raise NotImplementedError("We don't want to ensemble for event-wise models")
        else:
            out_file = os.path.join(args.out_dir, 'ensemble_out.txt')
            # Note the file list contains predictions from all models with and without the '-event' suffix.
            # So, we need to train both event-wise and not event-wise models or just delete those files in the folder.
            dev_predict_file_list = utils.get_predict_file_list(args.ensemble_dir, 'dev_predict_')
            test_predict_file_list = utils.get_predict_file_list(args.ensemble_dir, 'test_predict_')
            train_x = utils.get_ensemble_feature(dev_predict_file_list)
            train_y = utils.get_ensemble_label(args.dev_label_file)
            print("The shape of ensemble train_x is {0}".format(train_x.shape))
            utils.ensemble_cross_validate(train_x, train_y, id2label, train.mlb, args.ensemble)
            test_x = utils.get_ensemble_feature(test_predict_file_list)
            predict = utils.ensemble_train_and_predict(train_x, train.mlb.transform(train_y), test_x,
                                                       id2label, args.ensemble)
            predict = [id2label[x] for x in predict]
            with open(out_file, 'w', encoding='utf8') as f:
                for it_predict in predict:
                    f.write("{}\n".format(it_predict))
            print("The ensemble result has been written to {}".format(out_file))
예제 #18
0
    #svgs2ttf(HASH): convert HASH to ttf file (HASH 안의 모든 svg -> ttf)
    logging.info("svgs2ttf start for userID: %s, count: %s" % (userID, count))
    ttf_converted = svgs2ttf(svg_set)

    #ttf2S3(userID, count, ttf, s3key_ttfs): save converted ttf file to S3 (완성된 ttf S3에 저장)
    logging.info("save ttf2S3 start for userID: %s, count: %s" %
                 (userID, count))
    # ttf2S3(env, userID, count, ttf_converted)
    woff_addr = woff2S3(env, userID, count, ttf_converted)

    logging.info("-----END BACKPROCESSING for userID: %s, count: %s-----" %
                 (userID, count))
    logging.info(
        "------------------------------------------------------------")
    return woff_addr


def test():
    test_userID = 'seo'
    test_count = 0
    test_unicodes = ["B9DD"]
    test_env = 'development'
    woff_addr = back_processing(test_userID, test_count, test_unicodes,
                                test_env)
    print(woff_addr)


if __name__ == '__main__':
    set_logging('test_log.txt')
    test()
예제 #19
0
        'preexists': False
    },
    'output_format': {
        'name': 'output_format',
        'flag': 't',
        'type': str,
        'help_text': 'Keys records file output format: choices are `oasis_keys` and `list_keys`',
        'required_on_command_line': False,
        'required_for_script': False
    }
}


if __name__ == '__main__':

    logger = mdk_utils.set_logging()
    logger.info('Console logging set')

    try:
        logging.info('Parsing script arguments')
        args = mdk_utils.parse_script_args(SCRIPT_ARGS_METADICT, desc='Generate Oasis keys file for a model')
        
        if args['config_file_path']:
            logger.info('Loading script resources from config file {}'.format(args['config_file_path']))
            args = mdk_utils.load_script_args_from_config_file(SCRIPT_ARGS_METADICT, args['config_file_path'])
        else:
            args.pop('config_file_path')

        logger.info('Script arguments: {}'.format(args))

        di = SCRIPT_ARGS_METADICT
예제 #20
0
    nline, npos = loc_to_linepos(parser.solbytes, begin)
    text = parser.solbytes[begin:end].decode('utf8')

    return nline, npos, text

def usage():
    print('usage:')
    print('    %s find <solfile> <opseq>' % sys.argv[0])
    print('    %s print <solfile>' % sys.argv[0])


if __name__ == '__main__':
    import sys
    from loc2linepos import loc_to_linepos

    set_logging(0)

    try:
        subcmd = sys.argv[1]

        if subcmd == 'find':
            solfile = sys.argv[2]
            opseq = sys.argv[3].split()

            loc = __find_sol_vul_loc(solfile, opseq, try_dec=True)
            if loc is None:
                logging.error("file '{}' cannot find '{}'".format(solfile, opseq))
            else:
                nline, npos, msg = loc
                print("Find: '{}': Line {}: {}: {}\nFor ({})".format(solfile, nline, npos, msg, opseq))