Exemplo n.º 1
0
def upgrade_pip(config_dir):
    config_instance = Config(config_dir)

    with prefix('workon %s' % config_instance.project_name):
        run('pip install -U pip')
Exemplo n.º 2
0
from utils.ArticlesHandler import ArticlesHandler
from utils import solve, embedding_matrix_2_kNN, get_rate, accuracy, precision, recall, f1_score
from utils.Trainer_graph import TrainerGraph
from utils import Config, accuracy_sentence_based
import time
import numpy as np
# from utils.postprocessing.SelectLabelsPostprocessor import SelectLabelsPostprocessor
from utils.Trainer_graph import TrainerGraph
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

config = Config('config/')

debut = time.time()
handler = ArticlesHandler(config)

# Save in a pickle file. To open, use the pickle dataloader.
#handler.articles.save("../Dataset/train_fake.pkl")
# Only recompute labels:
# handler.articles.compute_labels()

C = handler.get_tensor()
# select_labels = SelectLabelsPostprocessor(config, handler.articles)
# handler.add_postprocessing(select_labels, "label-selection")
# handler.postprocess()
labels = np.array(handler.articles.labels)
all_labels = np.array(handler.articles.labels_untouched)

if config.learning.method_learning == "FaBP":
    assert max(labels) == 2, "FaBP can only be used for binary classification."
Exemplo n.º 3
0
 def test_loading_same_directory(self):
     c = Config(Path.cwd() / 'utils' / 'config.yml')
     self.assertEqual(c.mysql.host, 'localhost')
     self.assertEqual(c.other.preprocessing_queue[0],
                      'preprocessing.scale_and_center')
Exemplo n.º 4
0
                te_count += len(th)
                if td0 is None:
                    td0 = diff
                else:
                    td0 = np.concatenate((td0, diff))

            fp.close()

            print("Err {0} {1} {2:.6f} {3:.6f} {4}".format(
                datetime.datetime.now() - T0, a, t_loss / t_count,
                te_loss / te_count, np.median(np.array(td0))))
            saver.save(sess, cfg.netFile)


if __name__ == '__main__':

    config_file = "config_2.json"

    cfg = Config(config_file)
    cfg.num_output = list(map(int, cfg.num_output.split(',')))

    iterations = 100000

    if len(sys.argv) > 1:
        cfg.mode = int(sys.argv[1])

    if len(sys.argv) > 2:
        iterations = int(sys.argv[2])

    run(cfg, iterations)
Exemplo n.º 5
0
def run_possibilities(dataset_path, logs_path, possibilities):
    x_train_labeled, x_train_unlabeled, y_train_labeled, x_val, y_val = get_data(
        dataset_path=dataset_path,
        normalization=NORMALIZATION,
        unlabeled_percentage=UNLABELED_PERCENTAGE,
        seed=SEED)
    _, evaluation_mapping, _ = timit.get_phone_mapping()
    n_classes = get_number_of_classes()

    for consistency_loss, schedule, sigma, consistency_scale, stabilization_scale, xi in possibilities:
        hparams = {
            'consistency_loss': consistency_loss,
            'schedule': schedule,
            'sigma': sigma,
            'consistency_scale': consistency_scale,
            'stabilization_scale': stabilization_scale,
            'xi': xi
        }

        for k, v in hparams.items():
            print(f'{k}={v}, ', end='')
        print()

        config = Config(version='mono_directional',
                        n_hidden_layers=N_HIDDEN_LAYERS,
                        n_units=N_UNITS,
                        n_epochs=N_EPOCHS,
                        batch_size=BATCH_SIZE,
                        unlabeled_percentage=UNLABELED_PERCENTAGE,
                        optimizer=OPTIMIZER,
                        consistency_loss=consistency_loss,
                        consistency_scale=consistency_scale,
                        stabilization_scale=stabilization_scale,
                        xi=xi,
                        sigma=sigma,
                        schedule=schedule,
                        schedule_length=SCHEDULE_LENGTH,
                        normalization=NORMALIZATION,
                        seed=SEED)

        logs_path_ = logs_path / str(config)
        if logs_path_.is_dir(
        ):  # skip what already done (e.g. in case of crashes)
            print('already done, skipping...')
            continue
        logs_path_.mkdir(parents=True)
        logs_path_ = str(logs_path_)

        model = DualStudent(n_classes=n_classes,
                            n_hidden_layers=config.n_hidden_layers,
                            n_units=config.n_units,
                            consistency_loss=config.consistency_loss,
                            consistency_scale=config.consistency_scale,
                            stabilization_scale=config.stabilization_scale,
                            xi=config.xi,
                            padding_value=PADDING_VALUE,
                            sigma=config.sigma,
                            schedule=config.schedule,
                            schedule_length=config.schedule_length,
                            version=config.version)

        model.compile(optimizer=get_optimizer(config.optimizer))

        model.train(x_labeled=x_train_labeled,
                    x_unlabeled=x_train_unlabeled,
                    y_labeled=y_train_labeled,
                    n_epochs=config.n_epochs,
                    batch_size=config.batch_size,
                    seed=config.seed)

        results = model.test(x=x_val,
                             y=y_val,
                             batch_size=config.batch_size,
                             evaluation_mapping=evaluation_mapping)

        with tf.summary.create_file_writer(logs_path_).as_default():
            hp.hparams(hparams)
            for k, v in results.items():
                tf.summary.scalar(k, v, step=N_EPOCHS)
Exemplo n.º 6
0
            while i < len(filenames):

                sys.stdout.write('\r>> converting images %d/%d' %
                                 (i + 1, len(filenames)))
                sys.stdout.flush()
                filename = filenames[i]

                input_byte, target_byte, input_shape, target_shape = read_image(
                    dataset_dir, filename)
                example = convert_to_example(input_byte, target_byte,
                                             input_shape, target_shape)
                serialized = example.SerializeToString()
                tfrecord_writer.write(serialized)

                i += 1

        print('\n%s image converted' % (mode))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--config',
                        type=str,
                        default='config/pix2pix.yml',
                        help='config file name')
    args = parser.parse_args()

    Config(args.config)
    create_tfrecord()
Exemplo n.º 7
0
from threading import Timer

from lib.HealthMonitor.HealthMonitor.HealthMapper import HealthMapper
from utils import Interaction, TaskThread, Cache, Config

CF = Config()


class HealthDispatcher(object):
    def __init__(self, **kwargs):
        super().__init__()
        self._request_inter = Interaction("receive")
        self._sender_inter = Interaction("sender")

        self._handlers = {
            "alive":
            TaskThread(target=kwargs.get("alive", self._alive), name="alive"),
            "status":
            TaskThread(target=kwargs.get("status", self._status),
                       name="status"),
            "init":
            TaskThread(target=kwargs.get("init", self._init), name="init"),
        }

        self._cache = Cache("cluster_manager")
        self._cache['node_load'] = {}

        self._mapper = HealthMapper()
        # self._cache['fd'] = {}

    def dispatch(self, data, address):
Exemplo n.º 8
0
def main(inputs, paths):
    print """
    """

    task = {
        'KIND_RUN' : 'TEST_CP2K',
        'TEMPLATE_FILE' : 'FSSH_CORE.template',
        'FORCEFIELD_FILE' : 'FSSH_FF.template',
         'TEST' : 'YES'
    }
    inputs.update(task)


    list_propagation = ['FSSH','BORN_OPPENHEIMER', 'TEST_HOP','FROZEN_HAMILTONIAN','CLASSICAL_PATH','GALILEAN']
    #list_propagation = ['FSSH']
    list_decoherences = ['NO_DECO_CORR', 'INSTANT_COLLAPSE', 'DAMPING']
    list_trivial_corr = ['TRIVIAL_HOP_CORRECT', 'UNMODIFIED_SURF_HOP']

    list_analytics   = ['T', 'F']
    #list_analytics = ['T']

#    list_collapse = ['T', 'F']
    #list_analytics = ['T']
    list_com = ['T', 'F']
    #list_collapse = ['T']
    list_first_diabat = [1, 2]
    #list_first_diabat = [1]
    list_rescaling    = ['SIMPLE', 'SIMPLE_QSYS','NACV']
    #list_rescaling = ['NACV']
    list_nacv         = ['TEST', 'CONTRIBUTION','TOTAL','FAST']
    #list_nacv         = ['TOTAL']
    list_reversal     = ['NEVER','ALWAYS','TRUHLAR','SUBOTNIK']
    #list_reversal = ['ALWAYS']

    mega_list = [ { 'PROPAGATION' : prop,
                   # 'COLLAPSE'    : collapse,
                    'DECOHERENCES': deco,
                    'TRIVIAL_CORR' : trivial,
                    'ANALYTICS'   : analytics,
                    'FIRST_DIABAT': diabat,
                    'METHOD_RESCALING' : rescaling,
                    'METHOD_ADIAB_NACV' : nacv,
                    'METHOD_REVERSAL'   : reversal,
                    'CENTER_OF_MASS'    : com
                   }
                  for prop in list_propagation
                  for trivial in list_trivial_corr
                  for deco in list_decoherences
                  for analytics in list_analytics
                  for diabat    in list_first_diabat
                  for rescaling in list_rescaling
                  for nacv      in list_nacv
                  for reversal  in list_reversal
                  for com       in list_com
                  ]

    systems = ['dimer', 'trimer', 'dimer_solvent']



    # SET_UP THE DIRECTORY, CHECK ANY SUBDIR IS PRESENT
    bucket = Bucket(inputs)
    bucket.name()
    paths.update({'bucket': bucket.path})

    task = Dir(inputs.get('INPUT_INFO'))
    paths.update( {'task' : task.path} )

    templates = Dir('templates', paths)
    templates.checkdir()
    templates.clean()

    bin = Dir('bin', paths)
    bin.checkdir()

    ndir= 0
    for system in systems:
        system_input = InputFile( paths.get('task') + system + '/input')
        os.system(' cp -r %s/initial/ %s' % (paths.get('task') + system, paths.get('bucket')))
        initial = Dir( 'initial', paths)
        initial.checkdir()     
        paths.update({'initial' :  initial.path})
        inputs.update(system_input.dict)
        inputs.update({'STEPS': 2})
        dict_prev = {}
        for dict in mega_list:
            if dict.get('PROPAGATION') == 'TEST_HOP':
               inputs.update({'STEPS':1})
            if system_input.dict.get('FILE_INIT') != 'initial_dimer':
               dict.update({'ANALYTICS' : 'F' })

            system = system_input.dict.get('SYSTEM')
            if system == 'CRYSTAL':
                from utils import CP2KOSFSSH as Config
            elif system == 'SOLVENT':
                from utils import CP2KOSwSolventFSSH as Config
            else:
                sys.exit()

            config = Config( inputs, paths, INIT = 1, **dict)
            print "GO FOR RUN %d" % ndir
            ndir = config.run(ndir)
            if os.path.exists('run-%d' % (ndir -1) ):
            #    pass
                os.system('rm -rf run-%d' % (ndir - 1))
            else:
                print " ERROR IN CP2K FOR THOSE PARAMETERS:"
                print dict
                print " TO BE COMPARED WITH:"
                print dict_prev
                sys.exit()
            dict_prev = dict
Exemplo n.º 9
0
 def __init__(self):
     self.__config = Config()
     pathlib.Path(self.__config.get_model_path()).mkdir(parents=True,
                                                        exist_ok=True)
     pathlib.Path(self.__config.get_prediction_path()).mkdir(parents=True,
                                                             exist_ok=True)
Exemplo n.º 10
0
parser.add_argument(
    '--app_prof',
    choices=['dureader_debug', 'cmrc2018_debug', 'dureader', 'cmrc2018'],
    default='cmrc2018',
    help='choose config profile to use')
parser.add_argument('--params_prof',
                    choices=['qanet', 'default'],
                    default='qanet',
                    help='choose params profile to use')
args = parser.parse_args()

dic = {
    '../data/configs.yaml': args.app_prof,
    '../data/params.yaml': args.params_prof
}
config = Config(dic)

server = Server(args, config)

app = Flask(__name__)


@app.route('/')
def index():
    return 'Index Page'


@app.route('/answer', methods=['GET', 'POST'])
def answer():
    args = request.args
    question = args.get('question', None)
Exemplo n.º 11
0
def main(args):
    dataset_config = Config(args.dataset_config)
    model_config = Config(args.model_config)

    exp_dir = Path("experiments") / model_config.type
    exp_dir = exp_dir.joinpath(
        f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}"
    )

    if not exp_dir.exists():
        exp_dir.mkdir(parents=True)

    if args.fix_seed:
        torch.manual_seed(777)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    tokenizer = get_tokenizer(dataset_config, model_config)
    tr_dl, val_dl = get_data_loaders(dataset_config, tokenizer,
                                     args.batch_size)

    model = VDCNN(num_classes=model_config.num_classes,
                  embedding_dim=model_config.embedding_dim,
                  k_max=model_config.k_max,
                  vocab=tokenizer.vocab)

    loss_fn = nn.CrossEntropyLoss()
    opt = optim.Adam(params=model.parameters(), lr=args.learning_rate)
    scheduler = ReduceLROnPlateau(opt, patience=5)
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    writer = SummaryWriter(f'{exp_dir}/runs')
    checkpoint_manager = CheckpointManager(exp_dir)
    summary_manager = SummaryManager(exp_dir)
    best_val_loss = 1e+10

    for epoch in tqdm(range(args.epochs), desc='epochs'):

        tr_loss = 0
        tr_acc = 0

        model.train()
        for step, mb in tqdm(enumerate(tr_dl), desc='steps', total=len(tr_dl)):
            x_mb, y_mb = map(lambda elm: elm.to(device), mb)

            opt.zero_grad()
            y_hat_mb = model(x_mb)
            mb_loss = loss_fn(y_hat_mb, y_mb)
            mb_loss.backward()
            opt.step()

            with torch.no_grad():
                mb_acc = acc(y_hat_mb, y_mb)

            tr_loss += mb_loss.item()
            tr_acc += mb_acc.item()

            if (epoch * len(tr_dl) + step) % args.summary_step == 0:
                val_loss = evaluate(model, val_dl, {'loss': loss_fn},
                                    device)['loss']
                writer.add_scalars('loss', {
                    'train': tr_loss / (step + 1),
                    'val': val_loss
                },
                                   epoch * len(tr_dl) + step)
                model.train()
        else:
            tr_loss /= (step + 1)
            tr_acc /= (step + 1)

            tr_summary = {'loss': tr_loss, 'acc': tr_acc}
            val_summary = evaluate(model, val_dl, {
                'loss': loss_fn,
                'acc': acc
            }, device)
            scheduler.step(val_summary['loss'])
            tqdm.write(
                f"epoch: {epoch+1}\n"
                f"tr_loss: {tr_summary['loss']:.3f}, val_loss: {val_summary['loss']:.3f}\n"
                f"tr_acc: {tr_summary['acc']:.2%}, val_acc: {val_summary['acc']:.2%}"
            )

            val_loss = val_summary['loss']
            is_best = val_loss < best_val_loss

            if is_best:
                state = {
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'opt_state_dict': opt.state_dict()
                }
                summary = {'train': tr_summary, 'validation': val_summary}

                summary_manager.update(summary)
                summary_manager.save('summary.json')
                checkpoint_manager.save_checkpoint(state, 'best.tar')

                best_val_loss = val_loss
                train_mse = np.square(self.calculate_rmse(X))
                train_objective = train_mse * X.shape[0] + self.calc_regularization()
                epoch_convergence = {"train_objective": train_objective,
                                     "train_mse": train_mse}
                self.record(epoch_convergence)
                self.current_epoch += 1
            self.save_params()


        """

        raise NotImplementedError

    def run_epoch(self, data: np.array):
        raise NotImplementedError

    def predict_on_pair(self, user: int, item: int):
        raise NotImplementedError

    def save_params(self):
        raise NotImplementedError


if __name__ == '__main__':
    baseline_config = Config(lr=0.001, gamma=0.001, epochs=10)

    train, validation = get_data()
    baseline_model = Baseline(baseline_config)
    baseline_model.fit(train)
    print(baseline_model.calculate_rmse(validation))
Exemplo n.º 13
0
        },
        'fetch_os': False
    },
    'toucan': {
        'enable': True,
        'api': 'http://toucan:3000',
        'min_retry_time': 5,
        'max_retry_time': 60 * 5,
        'max_retry_count': 20
    },
    'pid_file': 'aucote.pid',
    'default_config': 'aucote_cfg_default.yaml',
}

# global cfg
cfg = Config(_DEFAULT)


async def start_toucan(default_config):
    """
    Initialize Toucan

    Args:
        default_config:

    Returns:
        None

    """
    Toucan.min_retry_time = cfg['toucan.min_retry_time']
    Toucan.max_retry_time = cfg['toucan.max_retry_time']
Exemplo n.º 14
0
import pandas as pd
from pathlib import Path
from utils import Config
from sklearn.model_selection import train_test_split

# dataset
data_dir = Path("data")
train = pd.read_csv(data_dir / "kor_pair_train.csv").filter(
    items=["question1", "question2", "is_duplicate"])

test = pd.read_csv(data_dir / "kor_pair_test.csv").filter(
    items=["question1", "question2", "is_duplicate"])

dataset = pd.concat([train, test], ignore_index=True, sort=False)

train, test = train_test_split(dataset, test_size=.1, random_state=777)
train, validation = train_test_split(train, test_size=.1, random_state=777)

train.to_csv(data_dir / "train.txt", sep="\t", index=False)
validation.to_csv(data_dir / "validation.txt", sep="\t", index=False)
test.to_csv(data_dir / "test.txt", sep="\t", index=False)

config = Config({
    "train": str(data_dir / "train.txt"),
    "validation": str(data_dir / "validation.txt"),
    "test": str(data_dir / "test.txt"),
})
config.save(data_dir / "config.json")
Exemplo n.º 15
0
from model.metric import evaluate, acc
from utils import Config, CheckpointManager, SummaryManager

parser = argparse.ArgumentParser()
parser.add_argument('--data_dir',
                    default='data',
                    help="Directory containing config.json of data")
parser.add_argument('--model_dir',
                    default='experiments/base_model',
                    help="Directory containing config.json of model")

if __name__ == '__main__':
    args = parser.parse_args()
    data_dir = Path(args.data_dir)
    model_dir = Path(args.model_dir)
    data_config = Config(json_path=data_dir / 'config.json')
    model_config = Config(json_path=model_dir / 'config.json')

    # tokenizer
    with open(data_config.vocab, mode='rb') as io:
        vocab = pickle.load(io)
    pad_sequence = PadSequence(length=model_config.length,
                               pad_val=vocab.to_indices(vocab.padding_token))
    tokenizer = Tokenizer(vocab=vocab,
                          split_fn=MeCab().morphs,
                          pad_fn=pad_sequence)

    # model
    model = SenCNN(num_classes=model_config.num_classes, vocab=tokenizer.vocab)

    # training
Exemplo n.º 16
0
                          token_to_idx=ptr_bert_vocab.token_to_idx)

            # save vocab
            with open(ptr_vocab_path.with_suffix('.pkl'), mode="wb") as io:
                pickle.dump(vocab, io)
        else:
            print('Already you have pytorch_model_skt_vocab.json!')

        if not ptr_tokenizer_path.exists():
            urlretrieve('https://kobert.blob.core.windows.net/models/kobert/tokenizer/tokenizer_78b3253a26.model',
                        filename=ptr_tokenizer_path)
        else:
            print('Already you have pytorch_model_skt_tokenizer.model')

        ptr_config = Config({'config': str(ptr_config_path),
                             'bert': str(ptr_bert_path),
                             'tokenizer': str(ptr_tokenizer_path),
                             'vocab': str(ptr_vocab_path.with_suffix('.pkl'))})
        ptr_config.save(ptr_dir / "config_skt.json")

    if args.type == 'etri':
        # loading BertTokenizer
        ptr_config_path = ptr_dir / 'bert_config_etri.json'
        ptr_tokenizer_path = ptr_dir / "vocab.korean.rawtext.list"
        ptr_bert_path = ptr_dir / "pytorch_model_etri.bin"

        ptr_tokenizer = ETRITokenizer.from_pretrained(
            ptr_tokenizer_path, do_lower_case=False
        )
        # generate vocab
        idx_to_token = list(ptr_tokenizer.vocab.keys())
        token_to_idx = {token: idx for idx, token in enumerate(idx_to_token)}
Exemplo n.º 17
0
For the full list of settings and their values, see
https://docs.djangoproject.com/en/3.1/ref/settings/
"""
import os
from pathlib import Path

from corsheaders.defaults import default_methods, default_headers
from common.admin_site import admin_site
from utils import Config

BASE_DIR = Path(__file__).resolve(strict=True).parent.parent.parent

# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/3.1/howto/deployment/checklist/
CONFIG_FILE = os.path.join(BASE_DIR, '..', 'config.yml')
config = Config(CONFIG_FILE)

admin_site.site_title = config.get('SITE_TITLE', 'Django Template Project')
admin_site.site_header = config.get('SITE_HEADER', 'Django Template Project')
SECRET_KEY = config.get('SECRET_KEY', raise_error=True)
DEBUG = config.get('DEBUG', False, cast=bool)
ALLOWED_HOSTS = config.get('ALLOWED_HOSTS', cast=list)

INSTALLED_APPS = [
    'administration',
    'website',
    'common',
    'rest_framework_swagger',
    'rest_framework',
    'rest_framework.authtoken',
    'drf_yasg',
Exemplo n.º 18
0
from geeteventbus.subscriber import subscriber
from utils import Logger, XEvent, Config
from pluginbase import PluginBase
from functools import partial
import os

here = os.path.abspath(os.path.dirname(__file__))
get_path = partial(os.path.join, here)

plugin_base = PluginBase(package='plugins',
                         searchpath=[
                             os.path.join(os.getcwd(), 'plugins', 'user'),
                             os.path.join(os.getcwd(), 'plugins', 'system')
                         ])

conf = Config()
lg = Logger(__file__)


class Core(subscriber):
    def __init__(self, eb):
        super().__init__()
        self.eb = eb

        self.plugins = plugin_base.make_plugin_source(searchpath=[
            os.path.join(os.getcwd(), 'plugins', 'user'),
            os.path.join(os.getcwd(), 'plugins', 'system')
        ],
                                                      identifier=__name__)

        for plugin_name in self.plugins.list_plugins():
Exemplo n.º 19
0
                cv2.line(image, pt['prev'], pt['curr'], (0, 0, 0), 1)
        cv2.imwrite('tmp.jpg', image)
        image = QtGui.QPixmap('tmp.jpg')
        self.scene.history.append(3)
        self.scene.addPixmap(image)

    def save_img(self):
        if type(self.output_img):
            fileName, _ = QtWidgets.QFileDialog.getSaveFileName(
                self, "Save File", QtCore.QDir.currentPath())
            cv2.imwrite(fileName + '.jpg', self.output_img)

    def undo(self):
        self.scene.undo()

    def clear(self):
        self.scene.reset_items()
        self.scene.reset()
        if type(self.image):
            self.scene.addPixmap(self.image)


if __name__ == '__main__':
    config = Config('demo.yaml')
    os.environ["CUDA_VISIBLE_DEVICES"] = str(config.GPU_NUM)
    model = Model(config)

    app = QtWidgets.QApplication(sys.argv)
    ex = Ex(model, config)
    sys.exit(app.exec_())
Exemplo n.º 20
0
def main(log_dir, dataset_dir, out_dir):
    models = os.listdir(log_dir)
    config_path = os.path.join(log_dir, models[0])
    config = Config(config_path, "READ")

    model_path_list = [
        os.path.join(log_dir, each, "xgb.model") for each in models
    ]

    dataset_fmt = "/data/slowmoyang/QGJets/npz/root_{}_{}/dijet_test_set.npz"
    datasets = [
        dataset_fmt.format(min_pt, min_pt + 100)
        for min_pt in range(100, 901, 100)
    ]

    auc_matrix = [[] for min_pt in range(100, 1000, 100)]

    for model_idx, model_path in enumerate(model_path_list):
        clf = load_classifier(model_path)

        model_min_pt, _ = parse_model_path(model_path)
        for dataset_path in datasets:
            _, dataset_min_pt, _ = parse_dataset_path(dataset_path)
            x, y_true, _ = load_dataset(dataset_path,
                                        features=config.feature_names)

            y_score = clf.predict_proba(x)[:, 1]
            auc = metrics.roc_auc_score(y_true=y_true, y_score=y_score)
            auc_matrix[model_idx].append(auc)

    auc_mat = np.array(auc_matrix, np.float32)
    print(auc_mat)

    path_fmt = os.path.join(out_dir, "auc_heatmap.{ext}")
    np.save(path_fmt.format(ext="npy"), auc_mat)

    sns.set(font_scale=1.2)
    fig, ax = plt.subplots()

    fig.set_figheight(8)
    fig.set_figwidth(12)

    labels = [
        "{}\n~ {}\n".format(min_pt, min_pt + 100)
        for min_pt in range(100, 1000, 100)
    ]

    sns.palplot(sns.cubehelix_palette())

    ax = sns.heatmap(auc_mat.transpose(),
                     annot=True,
                     ax=ax,
                     cmap="coolwarm",
                     vmax=0.9,
                     vmin=0.7,
                     xticklabels=labels,
                     yticklabels=labels,
                     fmt=".3f")

    ax.set_title("AUC of BDT", fontsize=24)
    ax.set_ylabel("Test set", fontsize=18)
    ax.set_xlabel("Training set", fontsize=18)
    ax.invert_yaxis()

    fig.savefig(path_fmt.format(ext="png"))
    fig.savefig(path_fmt.format(ext="pdf"), format="pdf")
Exemplo n.º 21
0
    DATA_SIZE = args.datasize # demo, small, large
    
    BATCH_SIZE = 1 # dummy

    test = MINDDataset(path.join(DATA_SIZE,'test/news.tsv'), path.join(DATA_SIZE,'test/behaviors.tsv'), 'all_embeddings.vec', 'large', batch_size=BATCH_SIZE, model=args.pretrained_model, subset='test')
    test.init_news()

    print('finish loading data', flush = True)

    # build the model
    # news_encoder_parameters = {'n_classes': len(train._class2id), 'n_subclasses': len(train._subclass2id), 'class_embedding_dim': 50, 'subclass_embedding_dim': 30, 'news_repr_dim': 400, 'distil_dropout': 0.1, 'class_dropout': 0, 'entity_embedding_dim': 100}
    news_encoder_parameters = {'n_classes': 19, 'n_subclasses': 294, 'class_embedding_dim': 50, 'subclass_embedding_dim': 30, 'news_repr_dim': 400, 'distil_dropout': 0.1, 'class_dropout': 0, 'entity_embedding_dim': 100}
    self_attention_hyperparameters['hidden_size'] = news_encoder_parameters['news_repr_dim']
    print(self_attention_hyperparameters)
    self_attention_config = Config(self_attention_hyperparameters)
    model = NewsRec(self_attention_config, news_encoder_parameters, args.pretrained_model, args.scorer).to(device)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    print('finish building the model', flush = True)

    try:
        print(args.checkpoint_name)
        load_checkpoint(model, map_location = device, path = args.checkpoint_name)
        print('checkpoint loaded', flush = True)
    except:
        print('WARNING: failed to load any checkpoints.', flush = True)

    evaluate(test, model, 1, subset='test')
Exemplo n.º 22
0
        pass
    logger_file_handler = logging.FileHandler(logging_file)
    logger.addHandler(logger_file_handler)
    logger.info('Arguments: {}'.format(args))

    mean, std = get_mean_std(args.dataset)

    if args.dataset in ['MNIST', 'FashionMNIST']:
        input_ch = 1
        padded_im_size = 32
        num_classes = 10
        im_size = 28
        epc_seed = 0
        config = Config(input_ch=input_ch, 
                    padded_im_size=padded_im_size, 
                    num_classes=num_classes,
                    im_size=im_size,
                    epc_seed=epc_seed
                    )
        dataset_sizes = {'train': 6e4, 'test': 1e4}
    elif args.dataset in ['CIFAR10', 'CIFAR100']:
        input_ch = 3
        padded_im_size = 32
        if args.dataset == 'CIFAR10':
            num_classes = 10
        elif args.dataset == 'CIFAR100':
            num_classes = 100
        else:
            raise Exception('Should not have reached here')
        im_size = 32
        epc_seed = 0
        config = Config(input_ch=input_ch, 
Exemplo n.º 23
0
 def __init__(self, client):
     self.client = client
     self.config = Config("config.toml")
Exemplo n.º 24
0
                        intersection_group = set(self.items_dict[first_item].keys()).intersection(set(self.items_dict[second_item].keys()))
                        if len(intersection_group) > 1:
                            if (sim > 0):
                                writer.writerow([np.int16(first_item),np.int16(second_item),np.float32(round(sim, 6))])

    def build_item_to_itm_corr(self, X):
        self.user_map = pd.Series(X[USER_COL_NAME_IN_DATAEST].unique())
        df_movie_features = X.pivot(index=ITEM_COL_NAME_IN_DATASET, columns=USER_COL_NAME_IN_DATAEST, values=RATING_COL_NAME_IN_DATASET).fillna(0)
        self.item_map = pd.Series(df_movie_features.index)
        self.corr_matrix = np.corrcoef(df_movie_features)
        self.corr_matrix = pd.DataFrame(self.corr_matrix).set_index(self.item_map,self.item_map)
        self.corr_matrix.columns = [self.item_map][0].values


    def get_input_item_similarities(self, item):
        input_item_similarities = self.corr_csv[self.corr_csv['item_1'] == item].set_index('item_2', drop=False).reindex(columns=['item_2','sim'])
        input_item_similarities.rename(columns={'item_2': 'item'}, inplace=True)
        input_item_similarities2 = self.corr_csv[self.corr_csv['item_2'] == item].set_index('item_1', drop=False).reindex(columns=['item_1','sim'])
        input_item_similarities2.rename(columns={'item_1': 'item'}, inplace=True)
        input_item_similarities_full = pd.concat([input_item_similarities, input_item_similarities2])
        input_item_similarities_full.drop_duplicates(inplace=True)
        return input_item_similarities_full


if __name__ == '__main__':
    knn_config = Config(k=25)
    train, validation = get_data()
    knn = KnnItemSimilarity(knn_config)
    knn.fit(train)
    print(knn.calculate_rmse(validation))
Exemplo n.º 25
0
import click
import os
import logging
import coloredlogs
import spiffymanagement
from utils import Config

log = logging.getLogger('spiffymanagement')
coloredlogs.install(
    level='DEBUG',
    logger=log,
    fmt='%(asctime)s:%(msecs)03d %(name)s: %(levelname)s %(message)s')

dirp = os.path.dirname(os.path.realpath(__file__))
cfg = Config(f'{dirp}/configs/serverCfgs.toml')


@click.group()
def spiffy():
    cfg._load()


@spiffy.command()
@click.argument('server')
def start(server):
    spiffymanagement.start(cfg, server)


@spiffy.command()
@click.argument('server')
@click.option('-c', '--countdown')
Exemplo n.º 26
0
 def setUp(self):
     super(ToucanConsumerTest, self).setUp()
     self.cfg = Config()
     self.consumer = ToucanConsumer(self.cfg)
Exemplo n.º 27
0
 def test_loading_experiment(self):
     c = Config(Path.cwd() / 'experiments' / 'sample.yml')
     self.assertEqual(c.model.bias, True)
     self.assertEqual(c.training.location, 'aws.google.com')
     self.assertEqual(c.training.alert.every, '20 minutes')
Exemplo n.º 28
0
 def test_empty_config_len(self):
     config = Config()
     self.assertEqual(len(config), 0)
Exemplo n.º 29
0
from torch import device
from utils import Config

vocab_size = 10000
embedding_dim = 200

config = Config({
    "predictor_type": "model.retrival_baseline.Predictor_DualRNNEncoder",
    "num_epoch": 12,
    "vocab_size": vocab_size,
    "save_path": './save/retrival_baseline2.pt'
})
config.train_set = Config({
    "type": "utterance_with_cands",
    "kwargs": {
        "train": True,
    },
    "loader": {
        "batch_size": 30,
        "shuffle": True,
        "num_workers": 8,
    }
})
config.test_set = Config({
    "type": "utterance_with_cands",
    "kwargs": {
        "train": False,
    },
    "loader": {
        "batch_size": 1,
        "shuffle": False,
Exemplo n.º 30
0
import networkx as nx
import re
import pickle
import os
import torch
import yaml
from utils import Voc, Config, dump_pickle

FILEPATH = '/home/lanco/zhaoliang/KB/en_concept_net_extracted.csv'
ROOTPATH = '/home/lanco/zhaoliang/KB/'
edgeList = []
errorList = []
nodeList = set()
relationList = []

config = Config(os.path.join(ROOTPATH, 'config.yml'))

voc = Voc(config)

try:
    with open(FILEPATH, 'r') as file:
        for index, line in enumerate(file):
            if index % 100000 == 0:
                print('processing %d' % index)
            lineSearch = re.search(
                "/a/\[/r/(.+)/,/c/en/(.+?)/.*,/c/en/(.+)/\]", line)
            if lineSearch != None and lineSearch.group(
                    1) != None and lineSearch.group(2) != None:
                voc.addWord(lineSearch.group(3))
                voc.addWord(lineSearch.group(2))
                if lineSearch.group(1) not in relationList: