Пример #1
0
def load():
    """load the classic Norvig big.txt corpus"""
    print("training!")

    models.load_models()

    print("done training!")

    return True
Пример #2
0
def run_server(port_num=8080):
    """little demo server for demo'ing sake"""
    models.load_models()

    debug(True)

    @route('/<wordA>/<wordB>')
    def index(wordA, wordB):
        return dict(predict(wordA, wordB))

    run(host='localhost', port=port_num)
Пример #3
0
def main():
    product_json_fields = 'product_name manufacturer model family announced-date'.split()
    listing_json_fields = 'title manufacturer currency price'.split()

    products = list(load_models('data/products.txt', Product, product_json_fields, 'utf-8'))
    listings = load_models('data/listings.txt', Listing, listing_json_fields, 'utf-8')

    to_review_listings = []
    product_buckets = defaultdict(set)

    # Group products by the words in the manufacturer field
    for product in products:
        for token in product.normalized_manufacturer.split():
            product_buckets[token].add(product)

    for listing in listings:
        # Retrieve candidates
        candidates = set()
        for token in listing.tokens:
            candidates.update(product_buckets[token])

        potential_products = [
            candidate
            for candidate in candidates
            if is_potential_match(candidate, listing)
        ]

        # If there's more than one match using manufacturer and model, try to disambiguate
        # by looking for all product.name tokens in listing.title and listing.manufacturer
        if len(potential_products) > 1:
            potential_products = filter_by_product_name(potential_products, listing)
        if len(potential_products) == 1:
            product = potential_products[0]
            product.listings.append(listing)
        else:
            to_review_listings.append(listing)

    # Remove listings if their price is under 0.3 of the median price (accessory?)
    for product in products:
        product.listings, rejected = filter_accessories(
            product.listings, 0.3, lambda x: x.price_in_cad
        )
        if rejected:
            to_review_listings.extend(rejected)

    # Save results
    save_json_by_line('results.txt', (
        {
            'product_name': product.name,
            'listings': [l.as_dict() for l in product.listings]
        }
        for product in products
    ), encoding='utf-8')
Пример #4
0
def configure_app(app, config):
    CORS(app)  # cross domain

    tools = {
        'auth': configure_auth(app, config),
        'cache': configure_cache(app, config)
    }

    if config.getboolean('hippo', 'mongo'):
        print('connect MongoDB...')
        tools['mongo'] = configure_mongo(config)

    if config.getboolean('hippo', 'pubsub'):
        print('connect Kafka...')
        tools['pubsub'] = configure_kafka(config)

    models = load_models(config, tools)
    apis = load_apis(config, tools, models)

    if 'pubsub' in tools:
        print('register Subscribers...')
        register_subscribers(config, tools, models)

    if config.getboolean('hippo', 'refresh_data'):
        init_tasks(models)
        init_events(models)

    return tools, apis
Пример #5
0
def main():
    logging.basicConfig(level=logging.INFO, format='%(message)s')

    parser = argparse.ArgumentParser(
        description='Create synthetic phrases'
        ' using trained CRF models and lemma grammar')
    parser.add_argument('rev_map', help='reverse inflection map')
    parser.add_argument('models',
                        nargs='+',
                        help='trained models (category:file)')
    parser.add_argument('sgm', help='original sentences + grammar pointers')
    parser.add_argument('sgm_lem',
                        help='original sentences + lemma grammar pointers')
    parser.add_argument('out', help='grammar output directory')
    args = parser.parse_args()

    if not os.path.exists(args.out):
        os.mkdir(args.out)

    logging.info('Loading reverse inflection map')
    with open(args.rev_map) as f:
        rev_map = cPickle.load(f)

    logging.info('Loading inflection prediction models')
    models = load_models(args.models)

    logging.info('Generating extended grammars')
    data = izip(read_sentences(sys.stdin, skip_empty=False),
                read_sgm(args.sgm), read_sgm(args.sgm_lem))
    for (source, _, _), (grm_path, sid, left, right),\
            (lem_grm_path, lem_sid, lem_left, lem_right) in data:
        assert sid == lem_sid and left == lem_left and right == lem_right
        # Create grammar file
        out_path = os.path.join(args.out, 'grammar.{}.gz'.format(sid))
        grammar_file = gzip.open(out_path, 'w')
        # Copy original grammar
        with gzip.open(grm_path) as f:
            for line in f:
                grammar_file.write(line)

        # Generate synthetic phrases from lemma grammar
        for rule in read_grammar(lem_grm_path):
            assert not any(src.startswith('[X,')
                           for src in rule.lhs)  # no gaps, please
            for match in source_match(rule.lhs, source):
                # create (at most) a synthetic rule
                for new_rule in synthetic_rule(rev_map, models, rule, source,
                                               match):
                    grammar_file.write(unicode(new_rule).encode('utf8') + '\n')

        grammar_file.close()
        # Write sgm
        new_left = u'<seg grammar="{}" id="{}">{}</seg>'.format(
            out_path, sid, left)
        print(u' ||| '.join([new_left] + right).encode('utf8'))
Пример #6
0
def main(args):
    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    ae_args, gan_args, idx2word, autoencoder, gan_gen, gan_disc \
        = load_models(args.ae_args, args.gan_args, args.vocab_file,
                      args.ae_model, args.g_model, args.d_model)
    word2idx = {v: k for k, v in idx2word.items()}
    gan_gen.cuda()
    autoencoder.cuda()
    gan_gen.eval()
    autoencoder.eval()

    with open(args.quiz_file) as f:
        quizzes = json.load(f)

    def gen_batches(data, batch_size=args.batch_size):
        for i in range(int(np.ceil(len(data) / batch_size))):
            yield data[i:i + batch_size]

    def pad_data(data):
        maxlen = max(map(len, data))
        return [x + (maxlen - len(x)) * [Dictionary.pad] for x in data]

    answers = []
    i = 0
    for batch in tqdm(gen_batches(quizzes),
                      total=int(np.ceil(len(quizzes) / args.batch_size))):
        source_idx = torch.LongTensor(
            pad_data([[Dictionary.sos] + [word2idx[w] for w in quiz]
                      for quiz in batch])).cuda()
        source = autoencoder(Variable(source_idx, volatile=True),
                             noise=False,
                             encode_only=True)
        sentences = []
        for _ in range(args.gen_cnt):
            hidden = gan_gen(source)
            sentences.append(
                generate_from_hidden(autoencoder,
                                     hidden,
                                     vocab=idx2word,
                                     sample=args.sample,
                                     maxlen=args.maxlen))
            source = hidden
        sentences = list(zip(*sentences))
        for quiz, ans in zip(batch, sentences):
            i += 1
            answers.append({'id': i, 'quiz': ''.join(quiz), 'answer': ans})
    with open(args.outf, 'w') as f:
        json.dump(answers, f, ensure_ascii=False, indent=2)
Пример #7
0
def run_server(port_num=8080):
    if not models.load_models():
        #print(os.listdir(os.curdir))
        with open(os.path.join(os.path.dirname(__file__), 'big.txt'),
                  'rb') as f:
            print(os.path.join(os.path.dirname(__file__), 'big.txt'))
            models.train_models(str(f.read()))

    @route('/<wordA>/<wordB>')
    def index(wordA, wordB):
        return dict(autocomplete.predict(wordA, wordB))

    run(host='localhost', port=port_num)
Пример #8
0
def main():
    logging.basicConfig(level=logging.INFO, format='%(message)s')

    parser = argparse.ArgumentParser(description='Create synthetic phrases'
            ' using trained CRF models and lemma grammar')
    parser.add_argument('rev_map', help='reverse inflection map')
    parser.add_argument('models', nargs='+', help='trained models (category:file)')
    parser.add_argument('sgm', help='original sentences + grammar pointers')
    parser.add_argument('sgm_lem', help='original sentences + lemma grammar pointers')
    parser.add_argument('out', help='grammar output directory')
    args = parser.parse_args()

    if not os.path.exists(args.out):
        os.mkdir(args.out)

    logging.info('Loading reverse inflection map')
    with open(args.rev_map) as f:
        rev_map = cPickle.load(f)

    logging.info('Loading inflection prediction models')
    models = load_models(args.models)
    extracted_tags = ''.join([cat for cat in models.keys()])
    logging.info('Inflecting categories: {}'.format(extracted_tags))
    lemma_re = re.compile('^(.+)_(['+''+extracted_tags+'])$')

    logging.info('Generating extended grammars')
    data = izip(read_sentences(sys.stdin, skip_empty=False),
            read_sgm(args.sgm), read_sgm(args.sgm_lem))
    for (source, _, _), (grm_path, sid, left, right),\
            (lem_grm_path, lem_sid, lem_left, lem_right) in data:
        assert sid == lem_sid and left == lem_left and right == lem_right
        # Create grammar file
        out_path = os.path.join(args.out, 'grammar.{}.gz'.format(sid))
        grammar_file = gzip.open(out_path, 'w')
        # Copy original grammar
        with gzip.open(grm_path) as f:
            for line in f:
                grammar_file.write(line)

        # Generate synthetic phrases from lemma grammar
        for rule in read_grammar(lem_grm_path):
            assert not any(src.startswith('[X,') for src in rule.lhs) # no gaps, please
            for match in source_match(rule.lhs, source):
                # create (at most) a synthetic rule
                for new_rule in synthetic_rule(rev_map, models, rule, source, match, lemma_re):
                    grammar_file.write(unicode(new_rule).encode('utf8')+'\n')

        grammar_file.close()
        # Write sgm
        new_left = u'<seg grammar="{}" id="{}">{}</seg>'.format(out_path, sid, left)
        print(u' ||| '.join([new_left] + right).encode('utf8'))
Пример #9
0
def main(args):

    ###########################################################################
    # Load the models
    ###########################################################################

    model_args, idx2word, autoencoder, inverter, gan_gen, gan_disc = \
        load_models(args.load_path)

    # Set the random seed manually for reproducibility.
    random.seed(model_args['seed'])
    np.random.seed(model_args['seed'])
    torch.manual_seed(model_args['seed'])
    if torch.cuda.is_available():
        torch.cuda.manual_seed(model_args['seed'])
    else:
        print("Note that our pre-trained models require CUDA to evaluate.")

    ###########################################################################
    # Load data
    ###########################################################################

    corpus = Corpus(model_args['data_path'],
                    maxlen=model_args['maxlen'],
                    vocab_size=model_args['vocab_size'],
                    lowercase=model_args['lowercase'])
    if args.test:
        eval_batch_size = 1
        test_data = batchify(corpus.test, eval_batch_size, shuffle=False)
    else:
        train_data = batchify(corpus.train, model_args['batch_size'], shuffle=True)

    print("Loaded data!")

    ###########################################################################
    # Perturbations
    ###########################################################################

    ring_rng = np.linspace(0., 1., 100)
    n_rng = len(test_data) if args.test else len(train_data)

    for idx in range(n_rng):
        data_batch = test_data[idx] if args.test else train_data[idx]

        for l, r in zip(ring_rng, ring_rng[1:]):

            flg = perturb(data_batch, autoencoder, idx2word,
                          model_args['sample'], model_args['maxlen'],
                          left=l, right=r, n_samples=5, epoch=idx,
                          gpu=model_args['cuda'])
            if flg: break
Пример #10
0
def main(args):
    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
    else:
        print("Note that our pre-trained models require CUDA to evaluate.")

    ###########################################################################
    # Load the models
    ###########################################################################

    ae_args, gan_args, idx2word, autoencoder, gan_gen, gan_disc \
        = load_models(args.ae_args, args.gan_args, args.vocab_file,
                      args.ae_model, args.g_model, args.d_model)

    ###########################################################################
    # Generation code
    ###########################################################################

    # Generate sentences
    corpus = Corpus(args.data_path, args.dict_file, vocab_size=len(idx2word))

    source, _ = next(BatchGen(corpus.get_chunks(size=2), args.ngenerations))
    prev_sent = [
        decode_idx(corpus.dictionary, sent) for sent in source.tolist()
    ]
    source = Variable(source, volatile=True)
    sentences = generate(autoencoder,
                         gan_gen,
                         inp=source,
                         vocab=idx2word,
                         sample=args.sample,
                         maxlen=args.maxlen)

    if not args.noprint:
        print("\nSentence generations:\n")
        for prev, sent in zip(prev_sent, sentences):
            print(prev)
            print("    ", sent)
            print("")
    with open(args.outf, "w") as f:
        f.write("Sentence generations:\n\n")
        for prev, sent in zip(prev_sent, sentences):
            f.write(prev + '\n')
            f.write("-> " + sent + '\n\n')
Пример #11
0
 def test_load_models(self):
     load_models(MODELS_JSON_FILE)
     self.assertEqual(len(all_models()), 4)
Пример #12
0
def main():
    logging.basicConfig(level=logging.INFO, format="%(message)s")

    parser = argparse.ArgumentParser(description="Predict using trained models")
    parser.add_argument("rev_map", help="reverse inflection map")
    parser.add_argument("models", nargs="+", help="trained models (category:file)")
    parser.add_argument("--ambiguous", action="store_true", help="evaluate only lemmas with multiple inflections")
    args = parser.parse_args()

    logging.info("Loading reverse inflection map")
    with open(args.rev_map) as f:
        rev_map = cPickle.load(f)

    logging.info("Loading inflection prediction models")
    models = load_models(args.models)
    logging.info("Loaded models for %d categories", len(models))

    stats = {cat: [0, 0, 0, 0, 0] for cat in config.EXTRACTED_TAGS}

    for source, target, alignment in read_sentences(sys.stdin):
        for word, features in extract_instances(source, target, alignment):
            gold_inflection, lemma, tag = word
            category = tag[0]
            gold_tag = tag[1:]
            possible_inflections = rev_map.get((lemma, category), [])
            if (gold_tag, gold_inflection) not in possible_inflections:
                print(u"Expected: {} ({}) not found".format(gold_inflection, gold_tag).encode("utf8"))
                continue
            if args.ambiguous and len(possible_inflections) == 1:
                continue

            model = models[category]

            scored_inflections = model.score_all(possible_inflections, features)
            ranked_inflections = sorted(scored_inflections, reverse=True)
            predicted_score, predicted_tag, predicted_inflection = ranked_inflections[0]

            gold_rank = 1 + [tag for _, tag, _ in ranked_inflections].index(gold_tag)
            gold_score = next((score for score, tag, _ in ranked_inflections if tag == gold_tag))

            print(
                u"Expected: {} ({}) r={} score={:.3f} |"
                " Predicted: {} ({}) score={:.3f}".format(
                    gold_inflection,
                    gold_tag,
                    gold_rank,
                    gold_score,
                    predicted_inflection,
                    predicted_tag,
                    predicted_score,
                ).encode("utf8")
            )

            stats[category][0] += 1
            stats[category][1] += 1 / float(gold_rank)
            stats[category][2] += gold_inflection == predicted_inflection
            stats[category][3] += gold_score
            stats[category][4] += len(ranked_inflections)

    for category, (n_instances, rrank_sum, n_correct, total_log_prob, n_inflections) in stats.items():
        if n_instances == 0:
            continue
        mrr = rrank_sum / n_instances
        accuracy = n_correct / float(n_instances)
        ppl = math.exp(-total_log_prob / n_instances)
        avg_inflections = n_inflections / float(n_instances)
        print(
            "Category {}: MRR={:.3f} acc={:.1%} ppl={:.2f} ({} instances; avg #infl={:.2f})".format(
                category, mrr, accuracy, ppl, n_instances, avg_inflections
            )
        )
Пример #13
0
def load_models(device,
                base_folder='./models/BAM/',
                specific="bowling_alley", 
                seed=0, 
                module="layer3",
                experiment="sgd_finetuned",
                ratio="0.5",
                adv=False,
                baseline=False,
                epoch=None,
                post=False,
                multiple=True,
                leakage=False,
                tcav=False,
                force=False,
                dataset='bam',
                args=None,
                ignore_net=False):
    '''
    if dataset == 'coco' and adv:
        class DummyArgs:
            num_object = 79
            finetune=False
            layer='generated_image'
            autoencoder_finetune=True
            finetune=True
        model = balanced_models.ObjectMultiLabelAdv(DummyArgs(), 79, 300, True, 1)
        ok    = torch.load('model_best.pth.tar', encoding='bytes')
        state_dict = {key.decode("utf-8"):ok[b'state_dict'][key] for key in ok[b'state_dict']}
        model.load_state_dict(state_dict)
        model.to(device)
        model.eval()
    '''
    if leakage:
        assert post
    if epoch is not None:
        epoch = "_" + str(epoch)
    else:
        epoch = ""
    if len(args.custom_end) > 0:
        args.custom_end = "_" + str(args.custom_end)
    if baseline:
        model_end = "resnet_base_"+str(ratio)+epoch+'.pt'
        if not post:
            n2v_end   = "resnet_n2v_base_"+str(ratio)+epoch+'.pt'
        else:
            n2v_end   = "resnet_n2v_base_after_"+str(ratio)+epoch+'.pt'
    else:
        if not adv:
            model_end = "resnet_debias_"+str(ratio)+epoch+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_debias_"+str(ratio)+epoch+'.pt'
            else:
                n2v_end   = "resnet_n2v_debias_after_"+str(ratio)+epoch+str(args.custom_end)+'.pt'
        else:
            model_end = "resnet_adv_"+str(ratio)+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_adv_"+str(ratio)+'.pt'
            else:
                n2v_end   = "resnet_n2v_adv_after_"+str(ratio)+epoch+'.pt'
    if dataset != 'bam' and dataset != 'coco':
        model_end = model_end.replace('_'+str(ratio), '')
        n2v_end   = n2v_end.replace('_'+str(ratio), '')
    if dataset == 'bam' or dataset == 'coco':
        model_path, n2v_path = utils.get_paths(
                base_folder,
                seed,
                specific,
                model_end=model_end,
                n2v_end='leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end,
                n2v_module=module,
                experiment=experiment,
                with_n2v=True,
        )
    else:
        model_path = os.path.join(base_folder, str(seed), experiment, module, model_end)
        n2v_path = os.path.join(base_folder, str(seed), experiment, module, 'leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end)
    if dataset == 'bam':
        trainloader, _ = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(ratio), specific=specific)
        _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
    elif dataset == 'coco':
        tmp_args = copy.deepcopy(args)
        tmp_args.ratio = ratio
        if int(ratio) > 0:
            tmp_args.balanced = True
        if leakage:
            tmp_args.gender_balanced = True
        trainloader, testloader = coco_dataload.get_data_loader_coco(
            tmp_args
        )
    else:
        trainloader,testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                   train_batch_size=64,
                                                                   test_batch_size=64,
                                                                   exclusive=True)
    if not (dataset == 'coco' and adv):
        assert os.path.exists(model_path), model_path
    if post:
        # since we have to run a separate script, might not have finished...
        if not leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if tcav:
                pass
            elif force:
                post_train.train_net2vec(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=.01,
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=False, # might want to change this later
                                        model_custom_end=epoch.replace('_',''),
                                        n2v_custom_end=epoch.replace('_',''),
                                        multiple=multiple,
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
        elif leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if force:
                post_train.train_leakage(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=5e-5, # leakage model uses adam
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=True, # MLP leakage model
                                        model_custom_end='',
                                        n2v_custom_end='',
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
    else:
        # should've been saved during training if not ported from tianlu
        if not (dataset == 'coco' and adv):
            assert os.path.exists(n2v_path)
    num_attributes = 10 + 9 + 20 if multiple else 12
    num_classes=10
    if dataset == 'coco':
        num_attributes = 81
        num_classes = 79
    model, net, net_forward, activation_probe = models.load_models(
        device,
        None if (dataset == 'coco' and adv) else
        lambda x,y,z: models.resnet_(
            pretrained=True, 
            custom_path=x, 
            device=y,
            initialize=z, 
            num_classes=num_classes,
            size=50 if (dataset == 'bam') or (dataset == 'coco') else 34
        ),
        model_path=model_path,
        net2vec_pretrained=True,
        net2vec_path=n2v_path,
        module='fc' if leakage else module,
        num_attributes=2 if leakage else num_attributes,
        model_init = False,
        n2v_init = False,
        nonlinear = leakage,
        ignore_net = ignore_net
    )
    print(n2v_path)
    return model, net, net_forward, activation_probe
Пример #14
0
                rev[f].add(k)
            all_zs[k] = noise[i]
    pickle.dump((all_sents, all_features, rev, all_zs), open(args.dump, "bw"))


def main(args):
    if args.mode == 'gen':
        dump_samples(args)
    elif args.mode == 'alter':
        alter(args)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='PyTorch experiment')
    parser.add_argument('mode', default='gen',
                        help='choices [gen, alter]')
    parser.add_argument('--load_path', type=str, default='',
                        help='directory to load models from')

    parser.add_argument('--dump', type=str, default="features.pkl",
                        help='path to sample dump')
    parser.add_argument('--nbatches', type=int, default=1000)
    parser.add_argument('--batch_size', type=int, default=1000)
    parser.add_argument('--alter', type=str, default="")
    parser.add_argument('--nsent', type=int, default=100)
    args = parser.parse_args()
    model_args, idx2word, autoencoder, gan_gen, gan_disc \
        = load_models(args.load_path)

    main(args)
Пример #15
0
                        count += 1

                    else:
                        previous_state = state
                        state = STATES["NO_IMAGE"]

                if state == STATES["PREDICT"]:
                    state, pytorch_result, sklearn_result = states.predict(img, pytorch_model, sklearn_model)

                if state == STATES["RESULTS"]:
                    state = states.results(MaskDetectoBot, chat_id, pytorch_result, sklearn_result)
                    new_offset = update_id + 1

                if state == STATES["HELP"]:
                    states.help(MaskDetectoBot, chat_id, previous_state)
                    state = previous_state
                    new_offset = update_id + 1

                elif state == STATES["ERROR"] or state == STATES["NO_IMAGE"]:
                    states.error(MaskDetectoBot, chat_id, state)
                    state = previous_state
                    new_offset = update_id + 1

if __name__ == '__main__':
    # Load the models here
    pytorch_model, sklearn_model = load_models()

    try:
        main(pytorch_model, sklearn_model)
    except KeyboardInterrupt:
        exit()
Пример #16
0
from flask import Flask
from flask_restful import reqparse, abort, Api, Resource
#from flask_cors import CORS

from models import load_models, predict, predict_news_hatespeech

### LOAD APP

application = Flask(__name__)
#CORS(application)
api = Api(application)

### LOAD MODEL

load_models()

### LOAD PARSER

parser = reqparse.RequestParser()
parser.add_argument('content')
parser.add_argument('news_title')

parser.add_argument('headline')
parser.add_argument('model')
parser.add_argument('lang')

parser.add_argument('title_news')
parser.add_argument('lead_news')
parser.add_argument('text_news')
parser.add_argument('authors_news')
Пример #17
0
def load_models(device,
                base_folder='./models/BAM/',
                specific="bowling_alley", 
                seed=0, 
                module="layer3",
                experiment="sgd_finetuned",
                ratio="0.5",
                adv=False,
                baseline=False,
                epoch=None,
                post=False,
                multiple=True,
                leakage=False,
                tcav=False,
                force=False,
                dataset='bam'):
    if leakage:
        assert post
    if epoch is not None:
        epoch = "_" + str(epoch)
    else:
        epoch = ""
    if baseline:
        model_end = "resnet_base_"+str(ratio)+epoch+'.pt'
        if not post:
            n2v_end   = "resnet_n2v_base_"+str(ratio)+epoch+'.pt'
        else:
            n2v_end   = "resnet_n2v_base_after_"+str(ratio)+epoch+'.pt'
    else:
        if not adv:
            model_end = "resnet_debias_"+str(ratio)+epoch+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_debias_"+str(ratio)+epoch+'.pt'
            else:
                n2v_end   = "resnet_n2v_debias_after_"+str(ratio)+epoch+'.pt'
        else:
            model_end = "resnet_adv_"+str(ratio)+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_adv_"+str(ratio)+'.pt'
            else:
                n2v_end   = "resnet_n2v_adv_after_"+str(ratio)+epoch+'.pt'
    if dataset != 'bam':
        model_end = model_end.replace('_'+str(ratio), '')
        n2v_end   = n2v_end.replace('_'+str(ratio), '')
    if dataset == 'bam':
        model_path, n2v_path = utils.get_paths(
                base_folder,
                seed,
                specific,
                model_end=model_end,
                n2v_end='leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end,
                n2v_module=module,
                experiment=experiment,
                with_n2v=True,
        )
    else:
        model_path = os.path.join(base_folder, str(seed), experiment, module, model_end)
        n2v_path = os.path.join(base_folder, str(seed), experiment, module, 'leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end)
    if dataset == 'bam':
        trainloader, _ = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(ratio), specific=specific)
        _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
    else:
        trainloader,testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                   train_batch_size=64,
                                                                   test_batch_size=64,
                                                                   exclusive=True)
    assert os.path.exists(model_path), model_path
    if post:
        # since we have to run a separate script, might not have finished...
        if not leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if tcav:
                pass
            elif force:
                post_train.train_net2vec(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=.01,
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=False, # might want to change this later
                                        model_custom_end=epoch.replace('_',''),
                                        n2v_custom_end=epoch.replace('_',''),
                                        multiple=multiple,
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
        elif leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if force:
                post_train.train_leakage(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=5e-5, # leakage model uses adam
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=True, # MLP leakage model
                                        model_custom_end='',
                                        n2v_custom_end='',
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
    else:
        # should've been saved during training
        assert os.path.exists(n2v_path)
    num_attributes = 10 + 9 + 20 if multiple else 12
    model, net, net_forward, activation_probe = models.load_models(
        device,
        lambda x,y,z: models.resnet_(pretrained=True, custom_path=x, device=y,initialize=z, size=50 if dataset == 'bam' else 34),
        model_path=model_path,
        net2vec_pretrained=True,
        net2vec_path=n2v_path,
        module='fc' if leakage else module,
        num_attributes=2 if leakage else num_attributes,
        model_init = False,
        n2v_init = False,
        nonlinear = leakage
    )
    return model, net, net_forward, activation_probe
Пример #18
0
def main(args):
    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
    else:
        print("Note that our pre-trained models require CUDA to evaluate.")

    ###########################################################################
    # Load the models
    ###########################################################################

    model_args, idx2word, autoencoder, gan_gen, gan_disc \
        = load_models(args.load_path)

    ###########################################################################
    # Generation code
    ###########################################################################

    # Generate sentences
    if args.ngenerations > 0:
        noise = torch.ones(args.ngenerations, model_args['z_size'])
        noise.normal_()
        sentences = generate(autoencoder, gan_gen, z=noise,
                             vocab=idx2word, sample=args.sample,
                             maxlen=model_args['maxlen'])

        if not args.noprint:
            print("\nSentence generations:\n")
            for sent in sentences:
                print(sent)
        with open(args.outf, "w") as f:
            f.write("Sentence generations:\n\n")
            for sent in sentences:
                f.write(sent+"\n")

    # Generate interpolations
    if args.ninterpolations > 0:
        noise1 = torch.ones(args.ninterpolations, model_args['z_size'])
        noise1.normal_()
        noise2 = torch.ones(args.ninterpolations, model_args['z_size'])
        noise2.normal_()
        interps = interpolate(autoencoder, gan_gen,
                              z1=noise1,
                              z2=noise2,
                              vocab=idx2word,
                              steps=args.steps,
                              sample=args.sample,
                              maxlen=model_args['maxlen'])

        if not args.noprint:
            print("\nSentence interpolations:\n")
            for interp in interps:
                for sent in interp:
                    print(sent)
                print("")
        with open(args.outf, "a") as f:
            f.write("\nSentence interpolations:\n\n")
            for interp in interps:
                for sent in interp:
                    f.write(sent+"\n")
                f.write('\n')
Пример #19
0
def train(
        trainloader,
        testloader,
        device,
        seed,
        debias_=True,
        specific=None,
        ratio=0.5,  # bias ratio in dataset
        n_epochs=5,
        model_lr=1e-3,
        n2v_lr=1e-3,
        combined_n2v_lr=1e-3,  # metalearning rate for n2v
        alpha=100,  # for debias,
        beta=0.1,  # for adversarial loss
        out_file=None,
        base_folder="",
        results_folder="",
        experiment="sgd",
        momentum=0,
        module="layer4",
        finetuned=False,
        adversarial=False,
        nonlinear=False,
        subset=False,
        subset_ratio=0.1,
        save_every=False,
        model_momentum=0,
        n2v_momentum=0,
        experimental=False,
        multiple=False,
        debias_multiple=False,
        reset=False,
        reset_counter=1,
        n2v_start=False,
        experiment2=None,
        adaptive_alpha=False,
        n2v_adam=False,
        single=False,
        imagenet=False,
        train_batch_size=64,
        constant_resize=False,
        adaptive_resize=False,
        no_class=False,
        gamma=0,
        partial_projection=False,
        norm='l2',
        constant_alpha=False,
        jump_alpha=False,
        linear_alpha=False,
        mean_debias=False,
        no_limit=False,
        dataset='bam',
        parallel=False,
        gpu_ids=[],
        switch_modes=True):
    print("mu", momentum, "debias", debias_, "alpha", alpha, " | ratio:",
          ratio)

    def get_vg(W):
        if single:
            return W[-2, :]
        else:
            return W[-2, :] - W[-1, :]

    if dataset == 'bam' or dataset == 'coco':
        model_init_path, n2v_init_path = utils.get_paths(
            base_folder,
            seed,
            specific,
            model_end="resnet_init" + '.pt',
            n2v_end="resnet_n2v_init" + '.pt',
            n2v_module=module,
            experiment=experiment,
            with_n2v=False)
    else:
        model_init_path = os.path.join(base_folder, str(seed), experiment,
                                       'resnet_init.pt')
        n2v_init_path = os.path.join(base_folder, str(seed), experiment,
                                     module, 'resnet_n2v_init.pt')
    if finetuned:
        if dataset == 'bam' or dataset == 'coco':
            model_init_path = utils.get_model_path(
                base_folder,
                seed,
                specific,
                "resnet_" + str(ratio) + ".pt",
                experiment='post_train'
                if not n2v_start else experiment.split('_finetuned')[0])
        else:
            model_init_path = os.path.join(
                base_folder, str(seed), 'post_train' if not n2v_start else
                experiment.split('_finetuned')[0], 'resnet.pt')
        assert (debias_ and not adversarial) or (
            adversarial and not debias_) or (not adversarial and not debias_)
        if debias_ and n2v_start:
            ext = "_n2v_" if not nonlinear else "_mlp_"
            if dataset == 'bam' or dataset == 'coco':
                n2v_init_path = utils.get_net2vec_path(
                    base_folder,
                    seed,
                    specific,
                    module,
                    "resnet" + str(ext) + str(ratio) + ".pt",
                    experiment=experiment.split('_finetuned')[0])
            else:
                n2v_init_path = os.path.join(base_folder, str(seed),
                                             experiment.split('_finetuned')[0],
                                             module,
                                             'resnet' + ext[:-1] + '.pt')
        # if we're also doing adversarial, make sure to load the matching n2v as init...
        if adversarial:
            ext = "_n2v_" if not nonlinear else "_mlp_"
            if dataset == 'bam' or dataset == 'coco':
                n2v_init_path = utils.get_net2vec_path(base_folder,
                                                       seed,
                                                       specific,
                                                       module,
                                                       "resnet" + str(ext) +
                                                       str(ratio) + ".pt",
                                                       experiment='post_train')
            else:
                n2v_init_path = os.path.join(base_folder, str(seed),
                                             'post_train', module,
                                             'resnet' + ext[:-1] + '.pt')
    num_classes = 10
    num_attributes = 12
    if nonlinear:
        num_attributes = 2
    if multiple:
        num_attributes = 10 + 9 + 2 * 10
    if dataset == 'coco':
        num_classes = 79
        num_attributes = 81
    model, net, net_forward, activation_probe = models.load_models(
        device,
        lambda x, y, z: models.resnet_(pretrained=True,
                                       custom_path=x,
                                       device=y,
                                       initialize=z,
                                       num_classes=num_classes,
                                       size=50 if (dataset == 'bam' or dataset
                                                   == 'coco') else 34),
        model_path=model_init_path,
        net2vec_pretrained=True,
        net2vec_path=n2v_init_path,
        module=module,
        num_attributes=num_attributes,
        # we want to make sure to save the inits if not finetuned...
        model_init=True if not finetuned else False,
        n2v_init=True if not (finetuned and
                              (adversarial or
                               (debias_ and n2v_start))) else False,
        loader=trainloader,
        nonlinear=nonlinear,
        # parameters if we want to initially project probes to have a certain amount of bias
        partial_projection=partial_projection,
        t=gamma)
    print(model_init_path, n2v_init_path)
    model_n2v_combined = models.ProbedModel(model,
                                            net,
                                            module,
                                            switch_modes=switch_modes)
    if n2v_adam:
        combined_optim = torch.optim.Adam(
            [{
                'params': model_n2v_combined.model.parameters()
            }, {
                'params': model_n2v_combined.net.parameters()
            }],
            lr=n2v_lr)
        # TODO: allow for momentum training as well
        n2v_optim = torch.optim.Adam(net.parameters(), lr=n2v_lr)
    else:
        combined_optim = torch.optim.SGD(
            [{
                'params': model_n2v_combined.model.parameters()
            }, {
                'params': model_n2v_combined.net.parameters(),
                'lr': combined_n2v_lr,
                'momentum': n2v_momentum
            }],
            lr=model_lr,
            momentum=model_momentum)

        # TODO: allow for momentum training as well
        n2v_optim = torch.optim.SGD(net.parameters(),
                                    lr=n2v_lr,
                                    momentum=n2v_momentum)
    model_optim = torch.optim.SGD(model.parameters(),
                                  lr=model_lr,
                                  momentum=model_momentum)

    d_losses = []
    adv_losses = []
    n2v_train_losses = []
    n2v_accs = []
    n2v_val_losses = []
    class_train_losses = []
    class_accs = []
    class_val_losses = []
    alpha_log = []
    magnitudes = []
    magnitudes2 = []
    unreduced = []
    bias_grads = []
    loss_shapes = []
    loss_shapes2 = []

    results = {
        "debias_losses": d_losses,
        "n2v_train_losses": n2v_train_losses,
        "n2v_val_losses": n2v_val_losses,
        "n2v_accs": n2v_accs,
        "class_train_losses": class_train_losses,
        "class_val_losses": class_val_losses,
        "class_accs": class_accs,
        "adv_losses": adv_losses,
        "alphas": alpha_log,
        "magnitudes": magnitudes,
        "magnitudes2": magnitudes2,
        "unreduced": unreduced,
        "bias_grads": bias_grads,
        "loss_shapes": loss_shapes,
        "loss_shapes2": loss_shapes2
    }
    if debias_:
        results_end = str(ratio) + "_debias.pck"
    elif adversarial:
        results_end = str(ratio) + "_adv.pck"
        if nonlinear:
            results_end = str(ratio) + "_mlp_adv.pck"
    else:
        results_end = str(ratio) + "_base.pck"

    if dataset == 'bam' or dataset == 'coco':
        results_path = utils.get_net2vec_path(
            results_folder, seed, specific, module, results_end,
            experiment if experiment2 is None else experiment2)
    else:
        results_path = os.path.join(
            results_folder, str(seed),
            experiment if experiment2 is None else experiment2, module,
            results_end)
    if debias_:
        model_end = "resnet_debias_" + str(ratio) + '.pt'
        n2v_end = "resnet_n2v_debias_" + str(ratio) + '.pt'
    elif adversarial:
        if not nonlinear:
            model_end = "resnet_adv_" + str(ratio) + '.pt'
        else:
            model_end = "resnet_adv_nonlinear_" + str(ratio) + '.pt'
        if not nonlinear:
            n2v_end = "resnet_n2v_adv_" + str(ratio) + '.pt'
        else:
            n2v_end = "resnet_mlp_adv_" + str(ratio) + '.pt'
    else:
        model_end = "resnet_base_" + str(ratio) + '.pt'
        n2v_end = "resnet_n2v_base_" + str(ratio) + '.pt'

    if dataset != 'bam' and dataset != 'coco':
        model_end = model_end.replace('_' + str(ratio), '')
        n2v_end = n2v_end.replace('_' + str(ratio), '')

    if dataset == 'bam' or dataset == 'coco':
        model_path, n2v_path = utils.get_paths(
            base_folder,
            seed,
            specific,
            model_end=model_end,
            n2v_end=n2v_end,
            n2v_module=module,
            experiment=experiment if experiment2 is None else experiment2,
            with_n2v=True,
        )
    else:
        model_path = os.path.join(
            base_folder, str(seed),
            experiment if experiment2 is None else experiment2, module,
            model_end)
        n2v_path = os.path.join(
            base_folder, str(seed),
            experiment if experiment2 is None else experiment2, module,
            n2v_end)
    if hasattr(trainloader.dataset, 'idx_to_class'):
        for key in trainloader.dataset.idx_to_class:
            if specific is not None and trainloader.dataset.idx_to_class[
                    key] in specific:
                specific_idx = int(key)
            else:
                specific_idx = 0
    train_labels = None if not nonlinear else [-2, -1]
    d_last = 0
    resize = constant_resize or adaptive_resize
    if imagenet:
        imagenet_trainloaders, _ = dataload.get_imagenet_tz(
            './datasets/imagenet',
            workers=8,
            train_batch_size=train_batch_size // 8,
            resize=resize,
            constant=constant_resize)
        imagenet_trainloader = dataload.process_imagenet_loaders(
            imagenet_trainloaders)

    params = list(model_n2v_combined.parameters())[:-2]
    init_alpha = alpha
    last_e = 0

    # setup training criteria
    if dataset == 'coco':
        object_weights = torch.FloatTensor(
            trainloader.dataset.getObjectWeights())
        gender_weights = torch.FloatTensor(
            trainloader.dataset.getGenderWeights())
        all_weights = torch.cat([object_weights, gender_weights])
        probe_criterion = nn.BCEWithLogitsLoss(weight=all_weights.to(device),
                                               reduction='elementwise_mean')
        downstream_criterion = nn.BCEWithLogitsLoss(
            weight=object_weights.to(device), reduction='elementwise_mean')
    else:
        probe_criterion = None
        downstream_criterion = nn.CrossEntropyLoss()

    for e in range(n_epochs):
        # save results every epoch...
        with open(results_path, 'wb') as f:
            print("saving results", e)
            print(results_path)
            pickle.dump(results, f)

        model.eval()

        with torch.no_grad():
            n2v_acc, n2v_val_loss = utils.net2vec_accuracy(
                testloader, net_forward, device, train_labels)
            n2v_accs.append(n2v_acc)
            n2v_val_losses.append(n2v_val_loss)

            if dataset != 'coco':
                class_acc, class_val_loss = utils.classification_accuracy(
                    testloader, model, device)
                class_accs.append(class_acc)
                class_val_losses.append(class_val_loss)
            else:
                f1, mAP = utils.detection_results(testloader, model, device)
                print("Epoch", e, "| f1:", f1, "| mAP:", mAP)
                class_accs.append([f1, mAP])

        d_initial = 0
        if not adversarial:
            curr_W = net.weight.data.clone()
            if not multiple:
                vg = get_vg(curr_W).reshape(-1, 1)
                d_initial = debias.debias_loss(curr_W[:-2], vg, t=0).item()
                print("Epoch", e, "bias", str(d_initial), " | debias: ",
                      debias_)
            else:
                ds = np.zeros(10)
                for i in range(10):
                    if i == 0:
                        vg = (curr_W[10, :] - curr_W[11, :]).reshape(-1, 1)
                    else:
                        vg = (curr_W[20 + i, :] - curr_W[29 + i, :]).reshape(
                            -1, 1)
                    ds[i] = debias.debias_loss(curr_W[:10], vg, t=0).item()
                print("Epoch", e, "bias", ds, " | debias: ", debias_)
                print("Accuracies:", n2v_acc)
                d_initial = ds[0]
        else:
            print("Epoch", e, "Adversarial", n2v_accs[-1])
        if adaptive_alpha and (e == 0 or ((d_last / d_initial) >=
                                          (5 / 2**(e - 1)) or
                                          (0.8 < (d_last / d_initial) < 1.2))):
            #alpha = alpha
            old_alpha = alpha
            # we don't want to increase too much if it's already decreasing
            if (e == 0 or (d_last / d_initial) >= (5 / 2**(e - 1))):
                alpha = min(
                    alpha * 2, (15 / (2**e)) / (d_initial + 1e-10)
                )  # numerical stability just in case d_initial gets really low
                #if e > 0 and old_alpha >= alpha:
                #    alpha = old_alpha # don't update if we're decreasing...
                print("Option 1")
            if e > 0 and alpha < old_alpha:
                # we want to increase if plateaud
                alpha = max(
                    old_alpha * 1.5, alpha
                )  # numerical stability just in case d_initial gets really low
                print("Option 2")
            # don't want to go over 1000...
            if alpha > 1000:
                alpha = 1000
            d_last = d_initial
        elif not adaptive_alpha and not constant_alpha:
            if dataset == 'coco' and jump_alpha:
                if e < 2:
                    alpha = 5e3
                elif e >= 2 and e < 4:
                    alpha = 1e4
                else:
                    alpha = init_alpha
            elif jump_alpha and (e - last_e) > 2:
                if not mean_debias:
                    if alpha < 100:
                        alpha = min(alpha * 2, 100)
                        last_e = e
                    else:
                        # two jumps
                        # if (e-last_e) >= ((n_epochs - last_e) // 2):
                        #     alpha = 1000
                        # else:
                        alpha = 1000
                else:
                    if alpha < 1000:
                        alpha = min(alpha * 2, 1000)
                        last_e = e
                    else:
                        alpha = 10000
            elif linear_alpha and (e - last_e) > 2:
                if alpha < 100:
                    alpha = min(alpha * 2, 100)
                    last_e = e
                else:
                    alpha += (1000 - 100) / (n_epochs - last_e)
            elif not jump_alpha and not linear_alpha:
                if (e + 1) % 3 == 0:
                    # apply alpha schedule?
                    # alpha = min(alpha * 1.2, max(init_alpha,1000))
                    alpha = alpha * 1.5
        alpha_log.append(alpha)
        print("Current Alpha:,", alpha)
        if save_every and e % 10 == 0 and e > 0 and seed == 0 and debias_:
            torch.save(net.state_dict(),
                       n2v_path.split('.pt')[0] + '_' + str(e) + '.pt')
            torch.save(model.state_dict(),
                       model_path.split('.pt')[0] + '_' + str(e) + '.pt')
        if reset and (e + 1) % reset_counter == 0 and e > 0:
            print("resetting")
            net, net_forward, activation_probe = net2vec.create_net2vec(
                model,
                module,
                num_attributes,
                device,
                pretrained=False,
                initialize=True,
                nonlinear=nonlinear)
            n2v_optim = torch.optim.SGD(net.parameters(),
                                        lr=n2v_lr,
                                        momentum=n2v_momentum)

        model.train()
        ct = 0
        for X, y, genders in trainloader:
            ids = None
            ##### Part 1: Update the Embeddings #####
            model_optim.zero_grad()
            n2v_optim.zero_grad()
            labels = utils.merge_labels(y, genders, device)
            logits = net_forward(X.to(device), switch_modes=switch_modes)
            # Now actually update net2vec embeddings, making sure to use the same batch
            if train_labels is not None:
                if logits.shape[1] == labels.shape[1]:
                    logits = logits[:, train_labels]
                labels = labels[:, train_labels]
            shapes = []
            shapes2 = []
            if dataset == 'coco':
                prelim_loss = probe_criterion(logits, labels)
            else:
                prelim_loss, ids = utils.balanced_loss(logits,
                                                       labels,
                                                       device,
                                                       0.5,
                                                       ids=ids,
                                                       multiple=multiple,
                                                       specific=specific_idx,
                                                       shapes=shapes)
            #print("prelim_loss:", prelim_loss.item())
            prelim_loss.backward()
            # we don't want to update these parameters, just in case
            model_optim.zero_grad()
            n2v_train_losses.append(prelim_loss.item())
            n2v_optim.step()
            try:
                magnitudes.append(
                    torch.norm(net.weight.data, dim=1).data.cpu().numpy())
            except:
                pass

            ##### Part 2: Update Conv parameters for classification #####
            model_optim.zero_grad()
            n2v_optim.zero_grad()
            class_logits = model(X.to(device))
            class_loss = downstream_criterion(class_logits, y.to(device))
            class_train_losses.append(class_loss.item())

            if debias_:
                W_curr = net.weight.data
                vg = get_vg(W_curr).reshape(-1, 1)
                unreduced.append(
                    debias.debias_loss(W_curr[:-2], vg, t=0,
                                       unreduced=True).data.cpu().numpy())

            loss = class_loss
            #### Part 2a: Debias Loss
            if debias_:
                model_optim.zero_grad()
                n2v_optim.zero_grad()

                labels = utils.merge_labels(y, genders, device)
                o = net.weight.clone()
                combined_optim.zero_grad()
                with higher.innerloop_ctx(model_n2v_combined,
                                          combined_optim) as (fn2v,
                                                              diffopt_n2v):
                    models.update_probe(fn2v)
                    logits = fn2v(X.to(device))
                    if dataset == 'coco':
                        prelim_loss = probe_criterion(logits, labels)
                    else:
                        prelim_loss, ids = utils.balanced_loss(
                            logits,
                            labels,
                            device,
                            0.5,
                            ids=ids,
                            multiple=False,
                            specific=specific_idx,
                            shapes=shapes2)
                    diffopt_n2v.step(prelim_loss)
                    weights = list(fn2v.parameters())[-2]
                    vg = get_vg(weights).reshape(-1, 1)
                    d_loss = debias.debias_loss(weights[:-2],
                                                vg,
                                                t=gamma,
                                                norm=norm,
                                                mean=mean_debias)
                    # only want to save the actual bias...
                    d_losses.append(d_loss.item())
                    grad_of_grads = torch.autograd.grad(
                        alpha * d_loss,
                        list(fn2v.parameters(time=0))[:-2],
                        allow_unused=True)

                    del prelim_loss
                    del logits
                    del vg
                    del fn2v
                    del diffopt_n2v
            #### Part 2b: Adversarial Loss
            if adversarial:
                logits = net_forward(
                    None, forward=True)[:, -2:]  # just use activation probe
                labels = genders.type(torch.FloatTensor).reshape(
                    genders.shape[0], -1).to(device)
                adv_loss, _ = utils.balanced_loss(logits,
                                                  labels,
                                                  device,
                                                  0.5,
                                                  ids=ids,
                                                  stable=True)
                adv_losses.append(adv_loss.item())
                # getting too strong, let it retrain...
                if adv_loss < 2:
                    adv_loss = -beta * adv_loss
                    loss += adv_loss
            loss.backward()
            if debias_:
                # custom backward to include the bias regularization....
                max_norm_grad = -1
                param_idx = -1
                for ii in range(len(grad_of_grads)):
                    if (grad_of_grads[ii] is not None
                            and params[ii].grad is not None
                            and torch.isnan(grad_of_grads[ii]).long().sum() <
                            grad_of_grads[ii].reshape(-1).shape[0]):
                        # just in case some or nan for some reason?
                        not_nan = ~torch.isnan(grad_of_grads[ii])
                        params[ii].grad[not_nan] += grad_of_grads[ii][not_nan]
                        if grad_of_grads[ii][not_nan].norm().item(
                        ) > max_norm_grad:
                            max_norm_grad = grad_of_grads[ii][not_nan].norm(
                            ).item()
                            param_idx = ii
                bias_grads.append((param_idx, max_norm_grad))
                # undo the last step and apply a smaller alpha to prevent stability issues
                if not no_limit and ((not mean_debias and max_norm_grad > 100)
                                     or (mean_debias and max_norm_grad > 100)):
                    for ii in range(len(grad_of_grads)):
                        if (grad_of_grads[ii] is not None
                                and params[ii].grad is not None and
                                torch.isnan(grad_of_grads[ii]).long().sum() <
                                grad_of_grads[ii].reshape(-1).shape[0]):
                            # just in case some or nan for some reason?
                            not_nan = ~torch.isnan(grad_of_grads[ii])
                            params[ii].grad[not_nan] -= grad_of_grads[ii][
                                not_nan]
                            # scale accordingly
                            # params[ii].grad[not_nan] += grad_of_grads[ii][not_nan] / max_norm_grad

            loss_shapes.append(shapes)
            loss_shapes2.append(shapes2)
            model_optim.step()
            #magnitudes2.append(
            #    torch.norm(net.weight.data, dim=1).data.cpu().numpy()
            #)
            ct += 1

    # save results every epoch...
    with open(results_path, 'wb') as f:
        print("saving results", e)
        print(results_path)
        pickle.dump(results, f)
    torch.save(net.state_dict(), n2v_path)
    torch.save(model.state_dict(), model_path)
Пример #20
0
def main(args):
    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
    else:
        print("Note that our pre-trained models require CUDA to evaluate.")

    ###########################################################################
    # Load the models
    ###########################################################################

    model_args, idx2word, autoencoder, gan_gen, gan_disc \
        = load_models(args.load_path)

    ###########################################################################
    # Load the data
    ###########################################################################

    corpus = Corpus(args.data_path,
                    maxlen=100,
                    vocab_size=model_args['vocab_size'],
                    lowercase=model_args['lowercase'])
    eval_batch_size = 10
    test_data = batchify(corpus.test, eval_batch_size, shuffle=False)

    ###########################################################################
    # Prediction code
    ###########################################################################

    autoencoder.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary.word2idx)
    all_accuracies = 0
    num_examples = 0
    criterion_ce = torch.nn.CrossEntropyLoss(size_average=False)
    for i, batch in enumerate(test_data):
        source, target, lengths = batch
        source = Variable(source, volatile=True)
        target = Variable(target, volatile=True)
        #source = to_gpu(args.cuda, Variable(source, volatile=True))
        #target = to_gpu(args.cuda, Variable(target, volatile=True))

        # Generate output.

        # output: batch x seq_len x ntokens
        hidden = autoencoder(source, lengths, noise=False, encode_only=True)
        max_indices = autoencoder.generate(hidden,
                                           model_args['maxlen'],
                                           sample=False)

        # Decode in training mode to compute loss.

        mask = target.gt(0)
        masked_target = target.masked_select(mask)
        # examples x ntokens
        output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

        # output: batch x seq_len x ntokens
        output = autoencoder(source, lengths, noise=False)
        flattened_output = output.view(-1, ntokens)

        masked_output = \
            flattened_output.masked_select(output_mask).view(-1, ntokens)
        total_loss += criterion_ce(masked_output / model_args['temp'],
                                   masked_target).data
        num_examples += source.size()[0]

        aeoutf = args.outf
        with open(aeoutf, "a") as f:
            max_indices = max_indices.view(eval_batch_size,
                                           -1).data.cpu().numpy()
            target = target.view(eval_batch_size, -1).data.cpu().numpy()
            eos = corpus.dictionary.word2idx['<eos>']
            for t, idx in zip(target, max_indices):
                # real sentence
                length = list(t).index(eos) if eos in t else len(t)
                chars = " ".join(
                    [corpus.dictionary.idx2word[x] for x in t[:length]])
                f.write(chars)
                f.write("\t")
                # autoencoder output sentence
                length = list(idx).index(eos) if eos in idx else len(idx)
                chars = " ".join(
                    [corpus.dictionary.idx2word[x] for x in idx[:length]])
                f.write(chars)
                f.write("\n")

    print("Processed {} examples".format(num_examples))
    print("Cross-entropy: {:.4f}".format((total_loss / num_examples)[0]))
Пример #21
0
def train_net2vec(trainloader,
                  testloader,
                  device,
                  seed,
                  specific=None,
                  p=0.5,
                  n_epochs=5,
                  module='layer4',
                  lr=0.5,
                  base_folder="",
                  out_file=None,
                  experiment1="",
                  experiment2="",
                  model_extra="",
                  n2v_extra="",
                  with_n2v=False,
                  nonlinear=False,
                  model_custom_end='',
                  n2v_custom_end='',
                  multiple=False,
                  dataset='bam',
                  parallel=False,
                  gpu_ids=[]):
    if out_file is not None:
        f = open(out_file, 'a')
    else:
        f = None
    print("Training N2V | p =", p, file=f)
    if not nonlinear:
        n2v_extra = "n2v" + str(n2v_extra)
    else:
        n2v_extra = "mlp" + str(n2v_extra)
    if len(model_custom_end) > 0:
        model_custom_end = "_" + model_custom_end
    if len(n2v_custom_end) > 0:
        n2v_custom_end = "_" + n2v_custom_end
    if hasattr(trainloader.dataset, 'idx_to_class'):
        for key in trainloader.dataset.idx_to_class:
            if specific is not None and trainloader.dataset.idx_to_class[
                    key] in specific:
                specific_idx = int(key)
    else:
        specific_idx = 0
    if dataset == 'bam' or dataset == 'coco':
        model_path = utils.get_model_path(base_folder,
                                          seed,
                                          specific,
                                          "resnet" + str(model_extra) + "_" +
                                          str(p) + model_custom_end + ".pt",
                                          experiment=experiment1,
                                          with_n2v=with_n2v,
                                          n2v_module=module)
        n2v_path = utils.get_net2vec_path(
            base_folder,
            seed,
            specific,
            module,
            "resnet_" + str(n2v_extra) + "_" + str(p) + n2v_custom_end + ".pt",
            experiment=experiment2,
        )
    else:
        if with_n2v:
            model_path = os.path.join(
                base_folder, str(seed), experiment1, module,
                "resnet" + str(model_extra) + model_custom_end + ".pt")
        else:
            model_path = os.path.join(
                base_folder, str(seed), experiment1,
                "resnet" + str(model_extra) + model_custom_end + ".pt")
        n2v_path = os.path.join(
            base_folder, str(seed), experiment2, module,
            'resnet_' + str(n2v_extra) + n2v_custom_end + ".pt")
    print(model_path, n2v_path)
    num_attributes = 12
    if nonlinear:
        num_attributes = 2
    if multiple:
        num_attributes = 10 + 9 + 2 * 10
    num_classes = 10
    if dataset == 'coco':
        num_classes = 79
        num_attributes = 81
    model, net, net_forward, activation_probe = models.load_models(
        device,
        # load in None means we load in the pretrained weights from Tianlu
        None if (dataset == 'coco') and ('adv' in model_extra) else lambda x,
        y, z: models.resnet_(pretrained=True,
                             custom_path=x,
                             device=y,
                             num_classes=num_classes,
                             initialize=z,
                             size=50 if
                             (dataset == 'bam' or dataset == 'coco') else 34),
        model_path=model_path,
        net2vec_pretrained=False,
        module=module,
        num_attributes=num_attributes,
        model_init=False,  # don't need to initialize a new one
        n2v_init=True,
        loader=trainloader,
        nonlinear=nonlinear,
        parallel=parallel,
        gpu_ids=gpu_ids)
    if dataset == 'coco':
        object_weights = torch.FloatTensor(
            trainloader.dataset.getObjectWeights())
        gender_weights = torch.FloatTensor(
            trainloader.dataset.getGenderWeights())
        all_weights = torch.cat([object_weights, gender_weights])
        criterion = nn.BCEWithLogitsLoss(weight=all_weights.to(device),
                                         reduction='elementwise_mean')
        #criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = None
    net2vec.train_net2vec(
        model,
        net,
        net_forward,
        n_epochs,
        trainloader,
        testloader,
        device,
        lr=lr,
        save_path=n2v_path,
        f=f,
        train_labels=[-2, -1] if nonlinear else None,
        multiple=multiple,
        balanced=False if dataset == 'coco' else True,
        criterion=criterion,
        adam=False,  #True if dataset == 'coco' else False,
        leakage=False)
    if f is not None:
        f.close()
Пример #22
0
def train_leakage(trainloader,
                  testloader,
                  device,
                  seed,
                  specific=None,
                  p=0.5,
                  n_epochs=5,
                  module='layer4',
                  lr=0.5,
                  base_folder="",
                  out_file=None,
                  experiment1="",
                  experiment2="",
                  model_extra="",
                  n2v_extra="",
                  with_n2v=False,
                  nonlinear=False,
                  model_custom_end='',
                  n2v_custom_end='',
                  multiple=False,
                  dataset='bam',
                  parallel=False,
                  gpu_ids=[]):
    if out_file is not None:
        f = open(out_file, 'a')
    else:
        f = None
    print("Training Model Leakage | p =", p, file=f)
    if not nonlinear:
        n2v_extra = "n2v" + str(n2v_extra)
    else:
        n2v_extra = "mlp" + str(n2v_extra)
    if len(model_custom_end) > 0:
        model_custom_end = "_" + model_custom_end
    if len(n2v_custom_end) > 0:
        n2v_custom_end = "_" + n2v_custom_end
    if hasattr(trainloader.dataset, 'idx_to_class'):
        for key in trainloader.dataset.idx_to_class:
            if specific is not None and trainloader.dataset.idx_to_class[
                    key] in specific:
                specific_idx = int(key)
            else:
                specific_idx = 0
    else:
        specific_idx = 0
    if dataset == 'bam' or dataset == 'coco':
        model_path = utils.get_model_path(base_folder,
                                          seed,
                                          specific,
                                          "resnet" + str(model_extra) + "_" +
                                          str(p) + model_custom_end + ".pt",
                                          experiment=experiment1,
                                          with_n2v=with_n2v,
                                          n2v_module=module)
        n2v_path = utils.get_net2vec_path(
            base_folder,
            seed,
            specific,
            module,
            "leakage/resnet_" + str(n2v_extra) + "_" + str(p) +
            n2v_custom_end + ".pt",
            experiment=experiment2,
        )
    else:
        if with_n2v:
            model_path = os.path.join(
                base_folder, str(seed), experiment1, module,
                "resnet" + str(model_extra) + model_custom_end + ".pt")
        else:
            model_path = os.path.join(
                base_folder, str(seed), experiment1,
                "resnet" + str(model_extra) + model_custom_end + ".pt")
        n2v_path = os.path.join(
            base_folder, str(seed), experiment2, module,
            'leakage/resnet_' + str(n2v_extra) + n2v_custom_end + ".pt")

    if dataset == 'bam':
        if specific is not None and not isinstance(specific, str):
            folder_name = '.'.join(sorted(specific))
        else:
            folder_name = specific
        leakage_folder = os.path.join(str(base_folder), str(seed), folder_name,
                                      str(experiment2), str(module), 'leakage')
    else:
        leakage_folder = os.path.join(str(base_folder), str(seed),
                                      str(experiment2), str(module), 'leakage')
    if not os.path.isdir(leakage_folder):
        os.mkdir(leakage_folder)
    num_classes = 10
    if dataset == 'coco':
        num_classes = 79
    num_attributes = 2
    model, net, net_forward, activation_probe = models.load_models(
        device,
        None if (dataset == 'coco') and ('adv' in model_extra) else lambda x,
        y, z: models.resnet_(pretrained=True,
                             custom_path=x,
                             device=y,
                             num_classes=num_classes,
                             initialize=z,
                             size=50 if
                             (dataset == 'bam' or dataset == 'coco') else 34),
        model_path=model_path,
        net2vec_pretrained=False,
        module='fc',  # leakage will come from the output logits...
        num_attributes=num_attributes,
        model_init=False,  # don't need to initialize a new one
        n2v_init=True,
        loader=trainloader,
        nonlinear=nonlinear,
        parallel=parallel,
        gpu_ids=gpu_ids)

    def criterion(logits, genders):
        return F.cross_entropy(logits,
                               genders[:, 1].long(),
                               reduction='elementwise_mean')

    net2vec.train_net2vec(model,
                          net,
                          net_forward,
                          n_epochs,
                          trainloader,
                          testloader,
                          device,
                          lr=lr,
                          save_path=n2v_path,
                          f=f,
                          train_labels=[-2, -1],
                          balanced=False,
                          criterion=criterion,
                          specific=specific_idx,
                          adam=True,
                          save_best=True,
                          leakage=True)
    if f is not None:
        f.close()
Пример #23
0
"""Command line tool for trying out the prediction models"""

import argparse

from sklearn.externals import joblib
from haliasdata import get_bird_species

from models import prediction_models, load_models

parser = argparse.ArgumentParser(description='Predict traffic disruptions')
parser.add_argument('var1', help='variable 1', type=float)
parser.add_argument('var2', help='variable 2', type=float)
parser.add_argument('var3', help='variable 3', type=float)
parser.add_argument('var4', help='variable 4', type=float)
args = parser.parse_args()

input_scaler = joblib.load('model/scaler.pkl')

species = get_bird_species()

load_models(prediction_models)

for model in prediction_models:
    #print 'Model %s' % (model.model)
    value_tuple = input_scaler.transform((args.var1, args.var2, args.var3, args.var4))
    prediction = model.predict(value_tuple)
    print('Model %s: %s' % (model.name, prediction))
Пример #24
0
import random
import time
from sklearn import linear_model
import pickle
import threading
import models
import utils_exomodel
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error 
import utils_model_genetic


importlib.reload(utils_exomodel)
importlib.reload(models)
importlib.reload(utils_model_genetic)

SKLEARN_MODELS = models.load_models()
models_list =  list(SKLEARN_MODELS.keys())


def save_obj(obj, name ):
    with open( name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open( name + '.pkl', 'rb') as f:
        return pickle.load(f)

def get_lag8columns(df):
  lag_columns = [x for x in df.columns if "LAG"  in x ]
  exclude_columns = list()
  for i in range(0,8):
Пример #25
0
                seen.add(sents[i])
                a = sents[0].split()
                b = sents[i].split()
                sm = difflib.SequenceMatcher(a=a, b=b)

                for tag, i1, i2, j1, j2 in sm.get_opcodes():
                    if tag == "equal":
                        print(" ".join(b[j1:j2]), end=" ")
                    if tag == "replace":
                        print(BOLD + " ".join(b[j1:j2]) + ENDC, end=" ")
                        # print("*" + " ".join(b[j1:j2]) + "*", end=" ")
                print()
        print()

def gen(vec):
    "Generate argmax sentence from vector."
    return generate(autoencoder, gan_gen, z=vec,
                    vocab=idx2word, sample=False,
                    maxlen=model_args['maxlen'])


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='PyTorch experiment')
    parser.add_argument('--load_path', type=str,
                        help='directory to load models from')
    args = parser.parse_args()
    model_args, idx2word, autoencoder, gan_gen, gan_disc \
        = load_models(args.load_path)

    main(args)
Пример #26
0
def main(args):
    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
    else:
        print("Note that our pre-trained models require CUDA to evaluate.")

    ###########################################################################
    # Load the models
    ###########################################################################

    model_args, idx2word, autoencoder, gan_gen, gan_disc \
        = load_models(args.load_path)

    ###########################################################################
    # Generation code
    ###########################################################################

    # Generate sentences
    if args.ngenerations > 0:
        noise = torch.ones(args.ngenerations, model_args['z_size'])
        noise.normal_()
        sentences = generate(autoencoder,
                             gan_gen,
                             z=noise,
                             vocab=idx2word,
                             sample=args.sample,
                             maxlen=model_args['maxlen'])

        if not args.noprint:
            print("\nSentence generations:\n")
            for sent in sentences:
                print(sent)
        with open(args.outf, "w") as f:
            f.write("Sentence generations:\n\n")
            for sent in sentences:
                f.write(sent + "\n")

    # Generate interpolations
    if args.ninterpolations > 0:
        noise1 = torch.ones(args.ninterpolations, model_args['z_size'])
        noise1.normal_()
        noise2 = torch.ones(args.ninterpolations, model_args['z_size'])
        noise2.normal_()
        interps = interpolate(autoencoder,
                              gan_gen,
                              z1=noise1,
                              z2=noise2,
                              vocab=idx2word,
                              steps=args.steps,
                              sample=args.sample,
                              maxlen=model_args['maxlen'])

        if not args.noprint:
            print("\nSentence interpolations:\n")
            for interp in interps:
                for sent in interp:
                    print(sent)
                print("")
        with open(args.outf, "a") as f:
            f.write("\nSentence interpolations:\n\n")
            for interp in interps:
                for sent in interp:
                    f.write(sent + "\n")
                f.write('\n')
Пример #27
0
corpus = Corpus(datafiles,
                maxlen=args.maxlen,
                vocab_size=args.vocab_size,
                lowercase=args.lowercase,
                vocab=vocabdict)

# save arguments
ntokens = len(corpus.dictionary.word2idx)
print("Vocabulary Size: {}".format(ntokens))
args.ntokens = ntokens

eval_batch_size = 100
en_data = batchify(corpus.data[args.corpus_name],
                   eval_batch_size,
                   shuffle=False)
print(len(en_data))
print("Loaded data!")

model_args, idx2word, autoencoder, gan_gen, gan_disc = load_models(
    args.outf, args.epochs, twodecoders=True)

if args.cuda:
    autoencoder = autoencoder.cuda()
    gan_gen = gan_gen.cuda()
    gan_disc = gan_disc.cuda()

one = to_gpu(args.cuda, torch.FloatTensor([1]))
mone = one * -1

evaluate_generator(1, False)
Пример #28
0
                vocab_size=len(vocabdict),
                lowercase=args.lowercase,
                vocab=vocabdict,
                debug=args.debug)

eval_batch_size = args.batch_size
test1_data = batchify(corpus.data['test1'], eval_batch_size, shuffle=False)
test2_data = batchify(corpus.data['test2'], eval_batch_size, shuffle=False)
print("Loaded data!")

###############################################################################
# Build the models
###############################################################################

model_args, idx2word, autoencoder, gan_gen, gan_disc = \
        load_models(args.load_path, args.epoch, twodecoders=True)

ntokens = len(corpus.dictionary.word2idx)

if args.cuda:
    autoencoder = autoencoder.cuda()
    autoencoder.gpu = True

###############################################################################
# Training code
###############################################################################

def evaluate_transfer(whichdecoder, data_source, epoch):
    # Turn on evaluation mode which disables dropout.
    autoencoder.eval()
    ntokens = len(corpus.dictionary.word2idx)
Пример #29
0
def main():
    logging.basicConfig(level=logging.INFO, format='%(message)s')

    parser = argparse.ArgumentParser(
        description='Predict using trained models')
    parser.add_argument('rev_map', help='reverse inflection map')
    parser.add_argument('models',
                        nargs='+',
                        help='trained models (category:file)')
    parser.add_argument('--ambiguous',
                        action='store_true',
                        help='evaluate only lemmas with multiple inflections')
    args = parser.parse_args()

    logging.info('Loading reverse inflection map')
    with open(args.rev_map) as f:
        rev_map = cPickle.load(f)

    logging.info('Loading inflection prediction models')
    models = load_models(args.models)
    logging.info('Loaded models for %d categories', len(models))

    stats = {cat: [0, 0, 0, 0, 0] for cat in config.EXTRACTED_TAGS}

    for source, target, alignment in read_sentences(sys.stdin):
        for word, features in extract_instances(source, target, alignment):
            gold_inflection, lemma, tag = word
            category = tag[0]
            gold_tag = tag[1:]
            possible_inflections = rev_map.get((lemma, category), [])
            if (gold_tag, gold_inflection) not in possible_inflections:
                print(u'Expected: {} ({}) not found'.format(
                    gold_inflection, gold_tag).encode('utf8'))
                continue
            if args.ambiguous and len(possible_inflections) == 1: continue

            model = models[category]

            scored_inflections = model.score_all(possible_inflections,
                                                 features)
            ranked_inflections = sorted(scored_inflections, reverse=True)
            predicted_score, predicted_tag, predicted_inflection = ranked_inflections[
                0]

            gold_rank = 1 + [tag for _, tag, _ in ranked_inflections
                             ].index(gold_tag)
            gold_score = next((score for score, tag, _ in ranked_inflections
                               if tag == gold_tag))

            print(u'Expected: {} ({}) r={} score={:.3f} |'
                  ' Predicted: {} ({}) score={:.3f}'.format(
                      gold_inflection, gold_tag, gold_rank, gold_score,
                      predicted_inflection, predicted_tag,
                      predicted_score).encode('utf8'))

            stats[category][0] += 1
            stats[category][1] += 1 / float(gold_rank)
            stats[category][2] += (gold_inflection == predicted_inflection)
            stats[category][3] += gold_score
            stats[category][4] += len(ranked_inflections)

    for category, (n_instances, rrank_sum, n_correct, total_log_prob,
                   n_inflections) in stats.items():
        if n_instances == 0: continue
        mrr = rrank_sum / n_instances
        accuracy = n_correct / float(n_instances)
        ppl = math.exp(-total_log_prob / n_instances)
        avg_inflections = n_inflections / float(n_instances)
        print(
            'Category {}: MRR={:.3f} acc={:.1%} ppl={:.2f} ({} instances; avg #infl={:.2f})'
            .format(category, mrr, accuracy, ppl, n_instances,
                    avg_inflections))
Пример #30
0
"""Command line tool for trying out the prediction models"""

import argparse

from models import prediction_models, load_models

load_models(prediction_models)

parser = argparse.ArgumentParser(description='Predict traffic disruptions')
parser.add_argument('temperature', help='Temperature [C]', type=float)
parser.add_argument('rainfall', help='Precipitation [mm]', type=float)
parser.add_argument('windspeed', help='Wind speed [m/s]', type=float)
parser.add_argument('hour', help='Hour of the day', type=int)
args = parser.parse_args()

for model in prediction_models:
    #print 'Model %s' % (model.model)
    value_tuple = (args.rainfall, args.temperature, args.windspeed, args.hour)
    prediction = model.predict(value_tuple[:model.parameters])
    print 'Model %s: %s' % (model.name, prediction)
Пример #31
0
"""


def run_server(port_num=8080):
    if not models.load_models():
        #print(os.listdir(os.curdir))
        with open(os.path.join(os.path.dirname(__file__), 'big.txt'),
                  'rb') as f:
            print(os.path.join(os.path.dirname(__file__), 'big.txt'))
            models.train_models(str(f.read()))

    @route('/<wordA>/<wordB>')
    def index(wordA, wordB):
        return dict(autocomplete.predict(wordA, wordB))

    run(host='localhost', port=port_num)


if __name__ == "__main__":
    """load the classic Norvig big.txt corpus"""
    print("training!")

    if not models.load_models():
        #print(os.listdir(os.curdir))
        with open(os.path.join(os.path.dirname(__file__), 'big.txt'),
                  'rb') as f:
            print(os.path.join(os.path.dirname(__file__), 'big.txt'))
            models.train_models(str(f.read()))

    print("done training!")