Пример #1
0
    def __init__(self,
                 layers,
                 activation,
                 activation_last=None,
                 batch_norm=False,
                 initialize=True,
                 *args,
                 **kwargs):
        super(MLPBlock, self).__init__(*args, **kwargs)
        from utils import get_module

        _layers = []
        for i, node in enumerate(layers):
            if i == len(layers) - 1:
                break
            else:
                _layers.append(nn.Linear(layers[i], layers[i + 1]))

            if batch_norm:
                _layers.append(nn.BatchNorm1d(layers[i + 1]))

            if i == len(layers) - 2:
                if activation_last is None:
                    _layers.append(get_module([nn], 'Identity')())
                else:
                    _layers.append(get_module([nn], activation_last)())
            else:
                _layers.append(get_module([nn], activation)())

        self._layers = nn.Sequential(*_layers)
        if initialize:
            self.apply(self._init_weights)
Пример #2
0
def main():
    cli_args = read_args()
    param_config = Config(cli_args.config)

    N_TRIES = 25

    np.random.seed(cli_args.seed)

    fn = get_module(cli_args.fn)()

    optimizer = get_module(cli_args.optimizer)(N_TRIES,
                                               param_config,
                                               fn,
                                               plot=cli_args.plot)

    best_input, best_output = optimizer.optimize()

    optimal_in, optimal_out = fn.optimal_input()
    print(
        "max value: {} with input: {}, optimal output: {opt_out} with input: {opt_in} diff: {diff}"
        .format(best_output,
                best_input,
                opt_out=optimal_out,
                opt_in=optimal_in,
                diff=optimal_out - best_output))

    write_output(cli_args, param_config, optimizer, fn)
Пример #3
0
    def initialize(self,
                   model,
                   dataloaders,
                   input_key: str,
                   target_key: str,
                   criterion: str,
                   device='cpu',
                   optimizer: str = 'Adam',
                   do_manual_decay: bool = False,
                   hp_epochs: int = 10,
                   hp_lr: float = 1e-3,
                   lr: float = 1e-3,
                   patience: int = 10,
                   hps: dict = {},
                   **kwargs):
        self._model = model
        self._dataloaders = dataloaders
        self._input_key = input_key
        self._target_key = target_key
        if isinstance(criterion, str):
            from models import MyLoss
            self._criterion = get_module([nn, MyLoss], criterion)()
        else:
            self._criterion = criterion

        self._device = device
        from torch import optim
        self._optimizer = get_module([optim], optimizer)(model.parameters())
        self._do_manual_decay = do_manual_decay
        self._hp_epochs = hp_epochs
        self._hp_lr = hp_lr
        self._lr = lr
        self._patience = patience
        self._hps = hps
Пример #4
0
 def get_module(self, base_url=None, username=None, password=None):
     if base_url == None:
         base_url = self.url
     if username == None:
         username = self.username
     if password == None:
         password = self.password
     utils.get_module(base_url, username, password, str(platform))
Пример #5
0
 def get_module(self, base_url=None, username=None, password=None):
     if base_url == None:
         base_url = self.url
     if username == None:
         username = self.username
     if password == None:
         password = self.password
     user_data_dir = App.get_running_app().user_data_dir
     tar = join(user_data_dir, "swm.tgz")
     ini = join(user_data_dir, "swm.ini")
     swm_json = join(user_data_dir, "swm.json")
     swm_dir = join(user_data_dir, "swm")
     utils.get_module(base_url, username, password, str(platform), tar, ini,
                      swm_json, swm_dir, user_data_dir)
Пример #6
0
def run(interp, line):

    line = line.strip()

    try:
        [(name, module, args)
         ] = re.findall('^([a-z][a-zA-Z_0-9]*) = ([a-z][a-zA-Z_0-9]*)\((.*)\)',
                        line)
    except ValueError:
        print 'Error: failed to parse load command.'
        print
        return

    if module not in available:
        print 'did not recognize loader %r' % module
        return

    m = get_module('load', module)(interp, name)

    try:
        exec 'm.main(%s)' % args
    except SyntaxError as e:
        print 'Syntax error: %s' % e
        return
    return interp.run_agenda()
Пример #7
0
    def __init__(self, dir_path):
        self.dir_path = dir_path
        self.hash = None
        self.file_paths = []
        self.ea_list = []
        self.tags = set()

        if self.event_accumulator is None:
            self.event_accumulator = get_module(
                'tensorboard.backend.event_processing.event_accumulator')

        if not os.path.isdir(dir_path):
            return

        for file in os.listdir(dir_path):
            file_path = os.path.join(self.dir_path, file)
            if os.path.isfile(file_path) and 'tfevents' in file:
                self.file_paths.append(file_path)

        if len(self.file_paths) > 0:
            for file_path in self.file_paths:
                ea_inst = self.event_accumulator.EventAccumulator(
                    file_path,
                    size_guidance={
                        self.event_accumulator.SCALARS: 0,
                    })
                ea_inst.Reload()
                ea_tags = ea_inst.Tags().get('scalars') or []
                self.ea_list.append({
                    'eq': ea_inst,
                    'tags': ea_tags,
                })
                for t in ea_tags:
                    self.tags.add(t)
Пример #8
0
def train_algorithm(request):
    module_id = request.GET.get('module_id')
    scene_id = request.GET.get('scene_id')
    limit  = request.GET.get('limit')

    if scene_id and module_id:
        tmp = utils.get_scene_record(module_id,scene_id)
        for i in tmp:
            i['data_length'] = range(len(i['data'][i['data'].keys()[0]]))
            i['resources'] = []
            i['apis'] = []
            i['api_info'] = []
            api_dict = {}
            for k in i['data'].keys():
                if k != 'total' and k.find('#api#') != 0:
                    i['resources'].append(k)
                if k != 'total' and k.find('#api#') == 0:
                    api_dict[k[5:]] = i['data'][k]
                    #this_api_id = utils.get_api_by_name(k[5:])
                    i['api_info'].append(k) # TODO

            for j in i['data_length']:
                current_api_dict = {}
                for k,v  in api_dict.iteritems():
                    current_api_dict[k] = v[j]
                i['apis'].append(current_api_dict)
        if limit and int(limit) > 0:
            ret = {'scene_records' : tmp[:int(limit)]}
        else:
            ret = {'scene_records': tmp}

        ret['module_id'] = module_id
        ret['scene_id'] = scene_id
        scene_api = utils.get_scene_api(module_id, scene_id)

        for s in scene_api:
            s['api_info'] = utils.get_api(s.get('api_id'))
            # ge threhold
            if s['api_info']:
                s['api_info']['threholds'] = utils.get_api_resource(s.get('api_id'))
                for th in s['api_info']['threholds'].get('resource_list'):
                    th['name'] = utils.get_resource(th.get('resource_id')).get('name')

        ret['scene_info'] = utils.get_scene(scene_id)
        ret['module_info'] = utils.get_module(module_id)
        ret['scene_api'] = scene_api
        ret['all_resource'] = []

        all_resource_ids = []
        # get all resource need
        for s in scene_api:
            for id in s.get('api_info').get('threholds').get('resource_id'):
                if not id in all_resource_ids:
                    all_resource_ids.append(id)
                    ret['all_resource'].append(utils.get_resource(id))

        ret["public"] = utils.get_public(request)
        return render(request, 'assess/train_algorithm.html', {'data': ret})
    else:
        return render(request, 'error.html')
Пример #9
0
    def __init__(self,
                 layers,
                 activation=None,
                 batch_norm=False,
                 initialize=True,
                 *args,
                 **kwargs):
        super(LSTMBlock, self).__init__(*args, **kwargs)
        from collections import OrderedDict
        from utils import get_module

        _layers = OrderedDict()
        for i, node in enumerate(layers):
            if i == len(layers) - 1:
                break
            else:
                _layers[f'LSTM{i}'] = nn.LSTM(layers[i], layers[i + 1])

        if batch_norm:
            _layers['batchnorm1d'] = nn.BatchNorm1d(layers[-1])

        if activation is not None:
            _layers[activation] = get_module([nn], activation)()

        self._layers = nn.Sequential(_layers)
        if initialize:
            self.apply(self._init_weights)
Пример #10
0
def iterate_values_det(position_transfers, rewards, discount_factor, convergence_factor):
    length = len(position_transfers)
    width = len(position_transfers[0])
    values = [0 for x in range(0, length)]
    policy = [0 for x in range(0, length)]
    delta = None

    while delta is None or delta >= convergence_factor:
        delta = 0

        for position in range(0, length):
            vs_old = values[position]
            values[position] = None

            for place in range(0, width):
                next_position = position_transfers[position][place] - 1
                next_reward = rewards[position][place]
                value = next_reward + (discount_factor * (values[next_position] or 0))

                if values[position] is None or value > values[position]:
                    values[position] = value
                    policy[position] = place + 1

            delta = max(delta, utils.get_module(values[position] - vs_old))

    return policy
Пример #11
0
    def __init__(self, layers_conv2d=None, initialize=True, *args, **kwargs):
        super(Conv2DBlock, self).__init__(*args, **kwargs)
        from copy import copy
        from utils import get_module
        _layers = []
        conv2d_args = {"stride": 1, "padding": 0, "activation": 'ReLU'}
        maxpooling2d_args = {"kernel_size": 2, "stride": 2}

        for layer, args in layers_conv2d:
            if layer == 'conv2d':
                layer_args = copy(conv2d_args)
                layer_args.update(args)
                activation = layer_args.pop('activation')
                _layers.append(nn.Conv2d(**layer_args))
                _layers.append(get_module([nn], activation)())
            elif layer == 'maxpooling2d':
                layer_args = copy(maxpooling2d_args)
                layer_args.update(args)
                _layers.append(nn.MaxPool2d(**layer_args))
            else:
                raise ValueError(f"{layer} is not implemented")

        self._layers = nn.Sequential(*_layers)
        if initialize:
            self.apply(self._init_weights)
Пример #12
0
 def do_help(self, line):
     mod = line.split()
     if len(mod) <= 1:
         return super(REPL, self).do_help(line)
     else:
         if len(mod) == 2:
             [cmd, sub] = mod
             if cmd in ('load', 'post'):
                 m = get_module(cmd, sub)
                 if m:
                     print m.__doc__
                 else:
                     print 'No help available for "%s %s"' % (cmd, sub)
                 return
Пример #13
0
Файл: repl.py Проект: nwf/dyna
 def do_help(self, line):
     mod = line.split()
     if len(mod) <= 1:
         return super(REPL, self).do_help(line)
     else:
         if len(mod) == 2:
             [cmd, sub] = mod
             if cmd in ("load", "post"):
                 m = get_module(cmd, sub)
                 if m:
                     print m.__doc__
                 else:
                     print 'No help available for "%s %s"' % (cmd, sub)
                 return
Пример #14
0
def evaluate_policy_det(position_transfers, rewards, policy, discount_factor, convergence_factor):
    length = len(position_transfers)
    values = [0 for x in range(0, length)]
    convergence_delta = None

    while convergence_delta >= convergence_factor or convergence_delta is None:
        values_new = copy.deepcopy(values)
        convergence_delta = 0

        for position in range(0, length):
            policy_action = policy[position][0] - 1
            next_position = position_transfers[position][policy_action] - 1
            next_reward = rewards[position][policy_action]
            values[position] = next_reward + (discount_factor * values_new[next_position])
            convergence_delta = max(convergence_delta, utils.get_module(values[position] - values_new[position]))

    return values
Пример #15
0
def analyze(filepath):
    """
    Analyze the module pointed by `filepath`
    """

    #Get module as ast node
    root = get_module(filepath)

    #create symbol table
    #The symbol table creation must be a separate phase from dependency tree creation
    #since Python does not evaluate, e.g. Functions on parse. Therefore, entities can be used before
    #being defined.
    #The alternative approach would be to have one pass, and resolve symbols as
    #soon as they become available; however, existing solution is closer to how Python works
    symbol_table = create_symbol_table(root)
    print_symtable(symbol_table)

    #find dependencies
    dependency_tree = create_dependency_tree(root, symbol_table)
Пример #16
0
def analyze(filepath):
    """
    Analyze the module pointed by `filepath`
    """

    #Get module as ast node
    root = get_module(filepath)

    #create symbol table
    #The symbol table creation must be a separate phase from dependency tree creation 
    #since Python does not evaluate, e.g. Functions on parse. Therefore, entities can be used before
    #being defined.
    #The alternative approach would be to have one pass, and resolve symbols as 
    #soon as they become available; however, existing solution is closer to how Python works
    symbol_table = create_symbol_table(root)
    print_symtable(symbol_table)

    #find dependencies
    dependency_tree = create_dependency_tree(root, symbol_table)
Пример #17
0
def analyze(module_path):
    """
    Analyze dependencies starting at `module_path`
    """
    #view the module as a AST node object
    module = get_module(module_path) 

    nodes = []
    NodeVisitor().visit(module, nodes)
    
    #Modify main module node to give it a name attr
    if not hasattr(nodes[0][0], "name"):
        nodes[0][0].name = name_from_path(module_path)

    #symbolic_pretty_print(nodes)
    #pretty_print(nodes)

    #create_symbol_table(nodes[0])
    find_dependencies(nodes[0])
Пример #18
0
def run(module_id, requested_widget=None, generate_widget=True):
    module = utils.get_module(module_id)
    widgets = []
    if module is None: return
    if 'widgets' not in module: return
    for i in range(len(module["widgets"])):
        for j in range(len(module["widgets"][i])):
            # for each widget
            widget = module["widgets"][i][j]
            if requested_widget is not None and widget[
                    "widget_id"] != requested_widget:
                continue
            if not widget["enabled"]: continue
            # generate the widget
            if "layout" not in widget: continue
            for k in range(len(widget["layout"])):
                layout = widget["layout"][k]
                chart_generated = True
                if layout["type"] == "sensor_group_summary":
                    if generate_widget:
                        add_sensor_group_summary_chart(layout, widget)
                    break
                elif layout["type"] == "image":
                    if generate_widget: add_sensor_image(layout, widget)
                    break
                elif layout["type"] == "sensor_group_timeline":
                    if generate_widget:
                        add_sensor_group_timeline_chart(layout, widget)
                    break
                elif layout["type"] == "chart_short" or layout[
                        "type"] == "chart_short_inverted":
                    if generate_widget: add_sensor_chart(layout, widget)
                    break
                elif layout["type"] == "map":
                    if generate_widget: add_sensor_map(layout, widget)
                    break
                else:
                    chart_generated = False
                    continue
            if chart_generated: widgets.append(widget["widget_id"])
    return widgets
Пример #19
0
def run(interp, line):

    line = line.strip()

    try:
        [(module, args)] = re.findall('([a-z][a-zA-Z_0-9]*)\((.*)\)$', line)
    except ValueError:
        print 'Error: failed to parse post command.'
        print
        return

    if module not in available:
        print 'did not recognize post-processor %r' % module
        return

    m = get_module('post', module)(interp)

    try:
        exec 'm.main(%s)' % args
    except SyntaxError as e:
        print 'Syntax error: %s' % e
        return
Пример #20
0
def run(interp, line):

    line = line.strip()

    try:
        [(module, args)] = re.findall('([a-z][a-zA-Z_0-9]*)\((.*)\)$', line)
    except ValueError:
        print 'Error: failed to parse post command.'
        print
        return

    if module not in available:
        print 'did not recognize post-processor %r' % module
        return

    m = get_module('post', module)(interp)

    try:
        exec 'm.main(%s)' % args
    except SyntaxError as e:
        print 'Syntax error: %s' % e
        return
Пример #21
0
    def __call__(self, parser, namespace, value, option_string=None):

        if namespace.init and namespace.create:
            print("Optional arguments --init and --create can't be used together")
            sys.exit(1)

        # Case where non argument is given
        if not namespace.init and not namespace.create:
            namespace.create = True

        if namespace.create :
            if not os.path.isdir(os.path.realpath(value)):
                print("{0} table project doesn't exist yet. \n \tpython myql-cli table -i {0} ".format(value))
                sys.exit(1)

            module_path = os.path.realpath(value)
            module = get_module(module_path)
            tables = [ v for k,v in module.__dict__.items() if isinstance(v, TableMeta) and k != 'TableModel']

            for table in tables :
                table_name = table.table.name
                path= os.path.realpath(value)
                table.table.save(name=table_name, path=path)

            sys.exit(0)

        if namespace.init :
            folder = value  
            if not create_directory(folder):
                print("This project already exists !!!")
                sys.exit(0)

            create_init_file(folder)
            create_tables_file(folder)

            sys.exit(0)

        sys.exit(1)
Пример #22
0
def run(interp, line):

    line = line.strip()

    try:
        [(name, module, args)] = re.findall('^([a-z][a-zA-Z_0-9]*) = ([a-z][a-zA-Z_0-9]*)\((.*)\)', line)
    except ValueError:
        print 'Error: failed to parse load command.'
        print
        return

    if module not in available:
        print 'did not recognize loader %r' % module
        return

    m = get_module('load', module)(interp, name)

    try:
        exec 'm.main(%s)' % args
    except SyntaxError as e:
        print 'Syntax error: %s' % e
        return
    return interp.run_agenda()
Пример #23
0
    def fit(self, X, y=None, fasta_path=None):
        """
        Parameters
        ----------
        X : array, (n_samples, 1)
            Contains the index numbers of fasta sequnce in the fasta file.
        y : array or list
            Target values.
        fasta_path : str
            File path to the fasta file.

        Returns
        -------
        self
        """
        if fasta_path:
            self.fasta_path = fasta_path

        if not self.fasta_path:
            raise ValueError("`fasta_path` can't be None!")

        pyfaidx = get_module('pyfaidx')
        fasta_file = pyfaidx.Fasta(self.fasta_path)
        # set up the sequence_length from the first entry
        sequence_length = len(fasta_file[int(X[0, 0])])
        if not self.padding:
            for idx in X[:, 0]:
                fasta_record = fasta_file[int(idx)]
                if len(fasta_record) != sequence_length:
                    raise ValueError("The first sequence record contains "
                                     "%d bases, while %s contrain %d bases" %
                                     (sequence_length, repr(fasta_record),
                                      len(fasta_record)))

        self.fasta_file = fasta_file
        self.sequence_length = sequence_length
        return self
def main(conf: str, seed: int, gpu_index: int, data_path: str):
    global DEVICE
    conf = load_config(conf)
    if seed is not None:
        conf.seed = seed
    if gpu_index is not None and DEVICE == torch.device('cuda'):
        DEVICE = torch.device(f'cuda:{gpu_index}')
    if data_path is not None:
        conf['dataset']['params']['data_path'] = data_path
    logger.info(DEVICE)
    logger.info(conf)

    set_seed(conf.seed)
    from models import sub_task
    tau4vec = set_task(conf.sub_task_params, 'tau4vec', sub_task)
    logger.info('set_task: tau4vec')
    set_seed(conf.seed)
    higgsId = set_task(conf.sub_task_params, 'higgsId', sub_task)
    logger.info('set_task: higgsId')
    from models import MyDataset
    from models import MyMetrics
    set_seed(conf.seed)
    dataset = set_module([MyDataset], conf, 'dataset')
    set_seed(conf.seed)
    dataloader = DataLoader(dataset, batch_size=100, shuffle=True)
    logger.info('set dataloader')
    # #########################################################################
    # pre-train ###############################################################
    # #########################################################################
    logger.info('----- pretrain[0] start -----')
    pretrain_conf = conf.sub_task_params.tau4vec.pretrain
    for i, sub_model in enumerate(tau4vec):
        logger.info(f'pretrain: [0][{i}]')
        set_seed(conf.seed)
        optimizer = set_module([optim],
                               pretrain_conf,
                               'optimizer',
                               params=sub_model.parameters())
        loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func')
        metrics = set_module([MyMetrics], pretrain_conf, 'metrics')
        activation = set_module([nn], pretrain_conf, 'activation')
        input_key = pretrain_conf.data.input_key
        target_key = pretrain_conf.data.target_key
        patience = pretrain_conf.patience
        tau4vec[i] = sub_task.pre_train(epochs=pretrain_conf.epochs,
                                        model=sub_model,
                                        dataloader=dataloader,
                                        optimizer=optimizer,
                                        loss_func=loss_func,
                                        input_key=input_key,
                                        target_key=target_key,
                                        device=DEVICE,
                                        patience=patience,
                                        metrics=metrics,
                                        activation=activation)
    logger.info('----- pretrain[0] end -----')
    logger.info('----- pretrain[1] start -----')
    pretrain_conf = conf.sub_task_params.higgsId.pretrain
    for i, sub_model in enumerate(higgsId):
        logger.info(f'pretrain: [1][{i}]')
        set_seed(conf.seed)
        optimizer = set_module([optim],
                               pretrain_conf,
                               'optimizer',
                               params=sub_model.parameters())
        loss_func = set_module([nn], pretrain_conf, 'loss_func')
        metrics = set_module([MyMetrics], pretrain_conf, 'metrics')
        activation = set_module([nn], pretrain_conf, 'activation')
        input_key = pretrain_conf.data.input_key
        target_key = pretrain_conf.data.target_key
        patience = pretrain_conf.patience
        higgsId[i] = sub_task.pre_train(epochs=pretrain_conf.epochs,
                                        model=sub_model,
                                        dataloader=dataloader,
                                        optimizer=optimizer,
                                        loss_func=loss_func,
                                        input_key=input_key,
                                        target_key=target_key,
                                        device=DEVICE,
                                        patience=patience,
                                        metrics=metrics,
                                        activation=activation)
    logger.info('----- pretrain[1] end -----')

    # #########################################################################
    # #########################################################################
    logger.info('copy the pretrain models')
    pre_trained_tau4vec = set_task(conf.sub_task_params, 'tau4vec', sub_task)
    pre_trained_higgsId = set_task(conf.sub_task_params, 'higgsId', sub_task)
    pre_trained_model = [pre_trained_tau4vec, pre_trained_higgsId]
    task = [tau4vec, higgsId]
    for num_task, sub in enumerate(task):
        for num_model in range(len(sub)):
            pre_trained_model[num_task][num_model].load_state_dict(
                deepcopy(task[num_task][num_model].state_dict()))
    # #########################################################################
    # #########################################################################

    logger.info('----- SPOS-NAS start -----')
    sposnas_conf = conf.SPOS_NAS

    def make_output_dict():
        return {
            'X': [],
            'AUC': {
                f'{f}_{s}': []
                for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
            },
            'LOSS_1ST': {
                f'{f}_{s}': []
                for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
            },
            'LOSS_2ND': {
                f'{f}_{s}': []
                for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
            },
            'RATIO': {
                f'{f}_{s}': []
                for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
            },
            'ONLY_PT_RATIO': {
                f'{f}_{s}': []
                for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
            },
        }

    # evaluate only pre-train model
    loss_func = [
        set_module([nn, MyLoss], sposnas_conf, 'loss_first'),
        set_module([nn, MyLoss], sposnas_conf, 'loss_second')
    ]
    loss_weight = [0.5, 0.5]
    metrics = get_module([MyMetrics], 'Calc_Auc')()
    from models.SPOS_NAS import SPOS
    model = SPOS(task=task, loss_func=loss_func, loss_weight=loss_weight)
    model.to(DEVICE)
    logger.info('evaluate only pre-train model')
    dummy = make_output_dict()
    for now_choice in product(range(3), range(3)):
        pre_train_result = evaluate(model, conf, dataloader, metrics, dummy,
                                    now_choice)

    output_dict = make_output_dict()
    X_list = [0.0, 0.1, 0.5]
    for X in (np.array(X_list)).round(10):
        output_dict['X'].append(X)
        logger.info(f'loss_ratio: {X:.6f} (loss_1*X + loss_2*(1-X)) start')
        set_seed(conf.seed)

        def initialize_pretrain_weight():
            logger.info('load pretrain models...')
            for num_task, sub in enumerate(task):
                for num_model in range(len(sub)):
                    task[num_task][num_model].load_state_dict(
                        deepcopy(pre_trained_model[num_task]
                                 [num_model].state_dict()))
            logger.info('load pretrain models done')

        logger.info('set model parameters...')
        loss_func = [
            set_module([nn, MyLoss], sposnas_conf, 'loss_first'),
            set_module([nn, MyLoss], sposnas_conf, 'loss_second')
        ]
        loss_weight = [X, 1. - X]
        metrics = get_module([MyMetrics], 'Calc_Auc')()

        for now_choice in product(range(3), range(3)):
            initialize_pretrain_weight()
            model = SPOS(task=task,
                         loss_func=loss_func,
                         loss_weight=loss_weight)
            model.to(DEVICE)
            optimizer = set_module([optim],
                                   sposnas_conf,
                                   'optimizer',
                                   params=model.parameters())
            scheduler = set_module([optim.lr_scheduler],
                                   sposnas_conf,
                                   'scheduler',
                                   optimizer=optimizer)
            logger.info('set model parameters done')
            logger.info('fit model...')
            model.fit(epochs=sposnas_conf.epochs,
                      dataloader=dataloader,
                      device=DEVICE,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      patience=sposnas_conf.patience,
                      choice=now_choice)
            logger.info('fit model done')
            logger.info('eval model...')
            output_dict = evaluate(model, conf, dataloader, metrics,
                                   output_dict, now_choice)
            logger.info('eval model done')

    logger.info(f'seed: {conf.seed}/ pretrain result: {pre_train_result}')
    logger.info(f'seed: {conf.seed}/ final result: {output_dict}')

    logger.info('all train and eval step are done')

    logger.info('plot results...')
    logger.info('plot auc...')
    import matplotlib.pyplot as plt
    plt.style.use('seaborn-darkgrid')
    import pandas as pd
    df = pd.DataFrame(output_dict['AUC'], index=output_dict['X'])
    df = df.rename(
        columns={
            f'{f}_{s}': f'{f}:{s}'
            for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
        })
    df.plot()
    plt.xlabel('X')
    plt.ylabel('AUC')
    plt.savefig(f'grid_auc_{conf.seed}.png')
    plt.close()

    logger.info('plot loss_2ND...')
    import matplotlib.pyplot as plt
    plt.style.use('seaborn-darkgrid')
    df = pd.DataFrame(output_dict['LOSS_2ND'], index=output_dict['X'])
    df = df.rename(
        columns={
            f'{f}_{s}': f'{f}:{s}'
            for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
        })
    df.plot()
    plt.xlabel('X')
    plt.ylabel('LOSS_2ND')
    plt.savefig(f'grid_loss_2nd_{conf.seed}.png')
    plt.close()

    logger.info('plot loss_1ST...')
    import matplotlib.pyplot as plt
    plt.style.use('seaborn-darkgrid')
    df = pd.DataFrame(output_dict['LOSS_1ST'], index=output_dict['X'])
    df = df.rename(
        columns={
            f'{f}_{s}': f'{f}:{s}'
            for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
        })
    df.plot()
    plt.xlabel('X')
    plt.ylabel('LOSS_1ST')
    plt.savefig(f'grid_loss_1st_{conf.seed}.png')
    plt.close()

    logger.info('plot ratios...')
    import matplotlib.pyplot as plt
    plt.style.use('seaborn-darkgrid')
    df = pd.DataFrame(output_dict['ONLY_PT_RATIO'], index=output_dict['X'])
    df = df.rename(
        columns={
            f'{f}_{s}': f'{f}:{s}'
            for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
        })
    df.plot()
    plt.ylabel('ratio')
    plt.savefig(f'grid_only_pt_ratio_{conf.seed}.png')
    plt.close()
    import matplotlib.pyplot as plt
    plt.style.use('seaborn-darkgrid')
    df = pd.DataFrame(output_dict['RATIO'], index=output_dict['X'])
    df = df.rename(
        columns={
            f'{f}_{s}': f'{f}:{s}'
            for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
        })
    df.plot()
    plt.ylabel('ratio')
    plt.savefig(f'grid_ratio_{conf.seed}.png')
    plt.close()
    logger.info('plot results done')
Пример #25
0
import sys
from os.path import join
reload(sys)
sys.setdefaultencoding('utf-8')
from kivy.app import App
from kivy.utils import platform

import utils
try:
    user_data_dir = App.get_running_app().user_data_dir
    tar = join(user_data_dir, "swm.tgz")
    ini = join(user_data_dir, "swm.ini")
    swm_json = join(user_data_dir, "swm.json")
    swm_dir = join(user_data_dir, "swm")
    utils.get_module(None, None, None, str(platform), tar, ini, swm_json,
                     swm_dir, user_data_dir)
#utils.get_module()
except:
    print "Unexpected error:", sys.exc_info()[0]
    from portal import PortalApp
    PortalApp().run()

user_data_dir = App.get_running_app().user_data_dir
sys.path.append(user_data_dir)
print "!!!!"
print user_data_dir
print "!!!!"
reload(sys)
from swm.main import SWMApp
SWMApp().run()
Пример #26
0
def get_network_module(name):
    return get_module('.' + name, package='overlay')
Пример #27
0
def main(conf: str, seed: int, gpu_index: int, data_path: str, event: int):
    global DEVICE, FIRST_MODEL_NAME, SECOND_MODEL_NAME, MODELNAME_CHOICE_INDEX
    conf = load_config(conf)
    if seed is not None:
        conf.seed = seed
    if gpu_index is not None and DEVICE == torch.device('cuda'):
        DEVICE = torch.device(f'cuda:{gpu_index}')
    if data_path is not None:
        conf['dataset']['params']['data_path'] = data_path
    if event is not None:
        conf['dataset']['params']['max_events'] = event
    logger.info(DEVICE)
    logger.info(conf)

    FIRST_MODEL_NAME = [
        i['name'].split('_')[-1][:-4] + f'-{num}'
        for num, i in enumerate(conf.sub_task_params.tau4vec.tasks)
    ]
    SECOND_MODEL_NAME = [
        i['name'].split('_')[-1][:-4] + f'-{num}'
        for num, i in enumerate(conf.sub_task_params.higgsId.tasks)
    ]
    MODELNAME_CHOICE_INDEX = {
        f'{n1}_{n2}': v
        for (n1, n2), v in zip(
            product(FIRST_MODEL_NAME, SECOND_MODEL_NAME),
            product(range(len(FIRST_MODEL_NAME)), range(len(
                SECOND_MODEL_NAME))))
    }

    set_seed(conf.seed)
    from models import sub_task
    tau4vec = set_task(conf.sub_task_params, 'tau4vec', sub_task)
    logger.info('set_task: tau4vec')
    set_seed(conf.seed)
    higgsId = set_task(conf.sub_task_params, 'higgsId', sub_task)
    logger.info('set_task: higgsId')
    from models import MyDataset
    from models import MyMetrics
    set_seed(conf.seed)
    dataset = set_module([MyDataset], conf, 'dataset')
    set_seed(conf.seed)
    dataloader = DataLoader(dataset, batch_size=100, shuffle=True)
    logger.info('set dataloader')
    # #########################################################################
    # pre-train ###############################################################
    # #########################################################################
    logger.info('----- pretrain[0] start -----')
    pretrain_conf = conf.sub_task_params.tau4vec.pretrain
    for i, sub_model in enumerate(tau4vec):
        logger.info(f'pretrain: [0][{i}]')
        set_seed(conf.seed)
        optimizer = set_module([optim],
                               pretrain_conf,
                               'optimizer',
                               params=sub_model.parameters())
        loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func')
        metrics = set_module([MyMetrics], pretrain_conf, 'metrics')
        activation = set_module([nn], pretrain_conf, 'activation')
        input_key = pretrain_conf.data.input_key
        target_key = pretrain_conf.data.target_key
        patience = pretrain_conf.patience
        tau4vec[i] = sub_task.pre_train(epochs=pretrain_conf.epochs,
                                        model=sub_model,
                                        dataloader=dataloader,
                                        optimizer=optimizer,
                                        loss_func=loss_func,
                                        input_key=input_key,
                                        target_key=target_key,
                                        device=DEVICE,
                                        patience=patience,
                                        metrics=metrics,
                                        activation=activation)
    logger.info('----- pretrain[0] end -----')
    logger.info('----- pretrain[1] start -----')
    pretrain_conf = conf.sub_task_params.higgsId.pretrain
    for i, sub_model in enumerate(higgsId):
        logger.info(f'pretrain: [1][{i}]')
        set_seed(conf.seed)
        optimizer = set_module([optim],
                               pretrain_conf,
                               'optimizer',
                               params=sub_model.parameters())
        loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func')
        metrics = set_module([MyMetrics], pretrain_conf, 'metrics')
        activation = set_module([nn], pretrain_conf, 'activation')
        input_key = pretrain_conf.data.input_key
        target_key = pretrain_conf.data.target_key
        patience = pretrain_conf.patience
        higgsId[i] = sub_task.pre_train(epochs=pretrain_conf.epochs,
                                        model=sub_model,
                                        dataloader=dataloader,
                                        optimizer=optimizer,
                                        loss_func=loss_func,
                                        input_key=input_key,
                                        target_key=target_key,
                                        device=DEVICE,
                                        patience=patience,
                                        metrics=metrics,
                                        activation=activation)
    logger.info('----- pretrain[1] end -----')

    # #########################################################################
    # #########################################################################
    logger.info('copy the pretrain models')
    pre_trained_tau4vec = set_task(conf.sub_task_params, 'tau4vec', sub_task)
    pre_trained_higgsId = set_task(conf.sub_task_params, 'higgsId', sub_task)
    pre_trained_model = [pre_trained_tau4vec, pre_trained_higgsId]
    task = [tau4vec, higgsId]
    for num_task, sub in enumerate(task):
        for num_model in range(len(sub)):
            pre_trained_model[num_task][num_model].load_state_dict(
                deepcopy(task[num_task][num_model].state_dict()))
    # #########################################################################
    # #########################################################################

    logger.info('----- SPOS-NAS start -----')
    sposnas_conf = conf.SPOS_NAS

    def make_output_dict():
        return {
            'X': [],
            'AUC': {
                f'{f}_{s}': []
                for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
            },
            'LOSS_1ST': {f: []
                         for f in FIRST_MODEL_NAME},
            'LOSS_2ND': {
                f'{f}_{s}': []
                for f, s in product(FIRST_MODEL_NAME, SECOND_MODEL_NAME)
            },
            'RATIO': {f: []
                      for f in FIRST_MODEL_NAME},
            'ONLY_PT_RATIO': {f: []
                              for f in FIRST_MODEL_NAME},
        }

    # evaluate only pre-train model
    loss_func = [
        set_module([nn, MyLoss], sposnas_conf, 'loss_first'),
        set_module([nn, MyLoss], sposnas_conf, 'loss_second')
    ]
    loss_weight = [0.5, 0.5]
    metrics = get_module([MyMetrics], 'Calc_Auc')()
    from models.SPOS_NAS import SPOS
    model = SPOS(task=task, loss_func=loss_func, loss_weight=loss_weight)
    model.to(DEVICE)
    logger.info('evaluate only pre-train model')
    dummy = make_output_dict()
    evaluate(model, conf, dataloader, metrics, dummy)

    output_dict = make_output_dict()
    X_list = [i for i in range(11)]
    X_list[1:1] = [0.01, 0.1]
    X_list[-1:-1] = [9.9, 9.99]
    for X in (np.array(X_list) * 0.1).round(10):
        output_dict['X'].append(X)
        logger.info(f'loss_ratio: {X:.6f} (loss_1*X + loss_2*(1-X)) start')
        set_seed(conf.seed)
        logger.info('load pretrain models...')
        for num_task, sub in enumerate(task):
            for num_model in range(len(sub)):
                task[num_task][num_model].load_state_dict(
                    deepcopy(
                        pre_trained_model[num_task][num_model].state_dict()))
        logger.info('load pretrain models done')
        logger.info('set model parameters...')
        loss_func = [
            set_module([nn, MyLoss], sposnas_conf, 'loss_first'),
            set_module([nn, MyLoss], sposnas_conf, 'loss_second')
        ]
        loss_weight = [X, 1. - X]
        metrics = get_module([MyMetrics], 'Calc_Auc')()

        model = SPOS(task=task,
                     loss_func=loss_func,
                     loss_weight=loss_weight,
                     save_dir='SPOS')
        model.to(DEVICE)
        optimizer = set_module([optim],
                               sposnas_conf,
                               'optimizer',
                               params=model.parameters())
        scheduler = set_module([optim.lr_scheduler],
                               sposnas_conf,
                               'scheduler',
                               optimizer=optimizer)
        logger.info('set model parameters done')
        logger.info('fit model...')
        model.fit(epochs=sposnas_conf.epochs,
                  dataloader=dataloader,
                  device=DEVICE,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  patience=sposnas_conf.patience)
        logger.info('fit model done')
        logger.info('eval model...')
        output_dict = evaluate(model, conf, dataloader, metrics, output_dict)
        logger.info('eval model done')

        set_seed(conf.seed)
        logger.info('re-train start')
        selected_model, _ = max(
            {k: v[-1]
             for k, v in output_dict['AUC'].items()}.items(),
            key=lambda x: x[1])
        logger.info(f'selected_model: {selected_model}')
        selected_choice = MODELNAME_CHOICE_INDEX[selected_model]
        model.fit(epochs=sposnas_conf.epochs,
                  dataloader=dataloader,
                  device=DEVICE,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  patience=sposnas_conf.patience,
                  choice=selected_choice)
        logger.info('re-train done')
        dummy = None
        dummy = make_output_dict()
        dummy = evaluate(model, conf, dataloader, metrics, dummy)

        def result_parser(res, selected_model, seed, X):
            AUC = res['AUC'][selected_model][0]
            LOSS_1ST = res['LOSS_1ST'][selected_model.split('_')[0]][0]
            LOSS_2ND = res['LOSS_2ND'][selected_model][0]
            RATIO = res['RATIO'][selected_model.split('_')[0]][0]
            ONLY_PT_RATIO = res['ONLY_PT_RATIO'][selected_model.split('_')
                                                 [0]][0]
            target_result = dict(seed=seed,
                                 X=X,
                                 AUC=AUC,
                                 LOSS_1ST=LOSS_1ST,
                                 LOSS_2ND=LOSS_2ND,
                                 RATIO=RATIO,
                                 ONLY_PT_RATIO=ONLY_PT_RATIO)
            logger.info(f're-train results: {target_result}')

        result_parser(dummy, selected_model, conf.seed, X)

    logger.info('all train and eval step are done')
    logger.info('plot results done')
Пример #28
0
def train(model,
          optimizer,
          train_dataset,
          args,
          checkpoint_data,
          dev_dataset=None,
          unk_dataset=None):
    """ Train model. 

    Args:
    - model: BertModelForLangID
    - optimizer
    - train_dataset: BertDatasetForClassification
    - args
    - checkpoint_data: dict
    - dev_dataset: (optional) BertDatasetForTesting for dev data (required if args.eval_during_training)
    - unk_dataset: (optional) BertDatasetForMLM for unlabeled data


    Returns: None

    """

    assert type(train_dataset) == BertDatasetForClassification
    if args.eval_during_training:
        assert dev_dataset is not None
        assert type(dev_dataset) == BertDatasetForTesting
    if unk_dataset is not None:
        assert type(unk_dataset) == BertDatasetForMLM

    # Where do we save stuff?
    save_to_dir = args.dir_pretrained_model if args.resume else args.dir_output

    # Prepare path of training log
    time_str = datetime.now().strftime("%Y%m%d%H%M%S")
    train_log_name = "%s.%strain.log" % (time_str,
                                         "resume." if args.resume else "")
    train_log_path = os.path.join(save_to_dir, train_log_name)

    # Write header in log. We create a new log whether we are fine-tuning from a pre-trained model or resuming a fine-tuning job.
    header = "GlobalStep\tLossLangID\tAccuracyLangID"
    if not args.no_mlm:
        header += "\tLossMLM\tAccuracyMLM"
    if unk_dataset is not None:
        header += "\tLossUnkMLM\tAccuracyUnkMLM"
    header += "\tGradNorm\tWeightNorm"
    if args.eval_during_training:
        header += "\tDevLoss\tDevF1Track1\tDevF1Track2\tDevF1Track3"
    with open(train_log_path, "w") as f:
        f.write(header + "\n")

    # Make dataloader(s). Note: since BertDatasetForTraining and its
    # subclasses are IterableDatasets (i.e. streams), the loader is an
    # iterable (with no end and no __len__) that we call with iter().
    train_dataloader = get_dataloader(train_dataset, args.train_batch_size,
                                      args.local_rank)
    train_batch_sampler = iter(train_dataloader)
    if unk_dataset is not None:
        unk_dataloader = get_dataloader(unk_dataset, args.train_batch_size,
                                        args.local_rank)
        unk_batch_enum = enumerate(iter(unk_dataloader))

    # Initialize best score
    if not args.resume:
        checkpoint_data["best_score"] = 0
    if args.resume:
        # This should not happen. I added it in between versions...
        if "best_score" not in checkpoint_data:
            checkpoint_data["best_score"] = 0

    # Evaluate model on dev set
    if args.eval_during_training:
        logger.info("Evaluating model on dev set before we %s training" %
                    ("resume" if args.resume else "start"))
        dev_scores = evaluate(model, dev_dataset, args)
        best_score = dev_scores[args.score_to_optimize]
        if args.resume:
            if best_score > checkpoint_data["best_score"]:
                checkpoint_data["best_score"] = best_score
                model_to_save = get_module(model)
                checkpoint_data['best_model_state_dict'] = deepcopy(
                    model_to_save.state_dict())
        else:
            checkpoint_data["best_score"] = best_score
        log_data = []
        log_data.append(str(checkpoint_data["global_step"]))
        log_data += ["", ""]
        if not args.no_mlm:
            log_data += ["", ""]
        if unk_dataset is not None:
            log_data += ["", ""]
        log_data += ["", ""]
        log_data.append("{:.5f}".format(dev_scores["loss"]))
        log_data.append("{:.5f}".format(dev_scores["track1"]))
        log_data.append("{:.5f}".format(dev_scores["track2"]))
        log_data.append("{:.5f}".format(dev_scores["track3"]))
        with open(train_log_path, "a") as f:
            f.write("\t".join(log_data) + "\n")

    # Start training
    logger.info("***** Running training *****")
    for epoch in trange(int(args.num_epochs), desc="Epoch"):
        model.train()
        # Some stats for this epoch
        real_batch_sizes = []
        lid_losses = []
        lid_accs = []
        mlm_losses = []
        mlm_accs = []
        unk_mlm_losses = []
        unk_mlm_accs = []
        grad_norms = []

        # Run training for one epoch
        for step in trange(int(args.num_train_steps_per_epoch),
                           desc="Iteration"):
            batch = next(train_batch_sampler)
            batch = tuple(t.to(args.device) for t in batch)
            input_ids = batch[0]
            input_mask = batch[1]
            segment_ids = batch[2]
            label_ids = batch[3]
            masked_input_ids = batch[4]
            lm_label_ids = batch[5]
            real_batch_sizes.append(len(input_ids))
            lid_scores = model(input_ids=input_ids,
                               input_mask=input_mask,
                               segment_ids=segment_ids)

            if not args.no_mlm:
                # Call BERT encoder to get encoding of masked input sequences
                mlm_outputs = model.encoder.bert(input_ids=masked_input_ids,
                                                 attention_mask=input_mask,
                                                 token_type_ids=segment_ids,
                                                 position_ids=None)
                mlm_last_hidden_states = mlm_outputs[0]

                # Do MLM on last hidden states
                mlm_pred_scores = model.encoder.cls(mlm_last_hidden_states)

            # Do MLM on unk_dataset if present
            if unk_dataset is not None:
                unk_batch_id, unk_batch = next(unk_batch_enum)
                # Make sure the training steps are synced
                assert unk_batch_id == step
                unk_batch = tuple(t.to(args.device) for t in unk_batch)
                xinput_ids, xinput_mask, xsegment_ids, xlm_label_ids = unk_batch
                # Make sure the batch sizes are equal
                assert len(xinput_ids) == len(input_ids)
                unk_mlm_outputs = model.encoder.bert(
                    input_ids=xinput_ids,
                    attention_mask=xinput_mask,
                    token_type_ids=xsegment_ids,
                    position_ids=None)
                unk_last_hidden_states = unk_mlm_outputs[0]
                unk_mlm_pred_scores = model.encoder.cls(unk_last_hidden_states)

            # Compute loss, do backprop. Compute accuracies.
            loss_fct = CrossEntropyLoss(reduction="mean")
            loss = loss_fct(lid_scores, label_ids)
            lid_losses.append(loss.item())
            if not args.no_mlm:
                mlm_loss = loss_fct(
                    mlm_pred_scores.view(-1, model.encoder.config.vocab_size),
                    lm_label_ids.view(-1))
                mlm_losses.append(mlm_loss.item())
                loss = loss + mlm_loss
            if unk_dataset is not None:
                unk_mlm_loss = loss_fct(
                    unk_mlm_pred_scores.view(-1,
                                             model.encoder.config.vocab_size),
                    xlm_label_ids.view(-1))
                loss = loss + unk_mlm_loss
                unk_mlm_losses.append(unk_mlm_loss.item())

            # Backprop
            loss = adjust_loss(loss, args)
            loss.backward()

            # Compute norm of gradient
            training_grad_norm = 0
            for param in model.parameters():
                if param.grad is not None:
                    training_grad_norm += torch.norm(param.grad, p=2).item()
            grad_norms.append(training_grad_norm)

            # Compute accuracies
            lid_acc = accuracy(lid_scores, label_ids)
            lid_accs.append(lid_acc)
            if not args.no_mlm:
                mlm_acc = accuracy(mlm_pred_scores.view(
                    -1, model.encoder.config.vocab_size),
                                   lm_label_ids.view(-1),
                                   ignore_label=NO_MASK_LABEL)
                mlm_accs.append(mlm_acc)
            if unk_dataset is not None:
                unk_mlm_acc = accuracy(unk_mlm_pred_scores.view(
                    -1, model.encoder.config.vocab_size),
                                       xlm_label_ids.view(-1),
                                       ignore_label=NO_MASK_LABEL)
                unk_mlm_accs.append(unk_mlm_acc)

            # Check if we accumulate grad or do an optimization step
            if (step + 1) % args.grad_accum_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
                checkpoint_data["global_step"] += 1
                if checkpoint_data["global_step"] >= checkpoint_data[
                        "max_opt_steps"]:
                    break

        # Compute stats for this epoch
        last_grad_norm = grad_norms[-1]
        avg_lid_loss = weighted_avg(lid_losses, real_batch_sizes)
        avg_lid_acc = weighted_avg(lid_accs, real_batch_sizes)
        if not args.no_mlm:
            avg_mlm_loss = weighted_avg(mlm_losses, real_batch_sizes)
            avg_mlm_acc = weighted_avg(mlm_accs, real_batch_sizes)
        if unk_dataset is not None:
            avg_unk_mlm_loss = weighted_avg(unk_mlm_losses, real_batch_sizes)
            avg_unk_mlm_acc = weighted_avg(unk_mlm_accs, real_batch_sizes)

        # Compute norm of model weights
        weight_norm = 0
        for param in model.parameters():
            weight_norm += torch.norm(param.data, p=2).item()

        # Evaluate model on dev set
        if args.eval_during_training:
            dev_scores = evaluate(model, dev_dataset, args)

        # Write stats for this epoch in log
        log_data = []
        log_data.append(str(checkpoint_data["global_step"]))
        log_data.append("{:.5f}".format(avg_lid_loss))
        log_data.append("{:.5f}".format(avg_lid_acc))
        if not args.no_mlm:
            log_data.append("{:.5f}".format(avg_mlm_loss))
            log_data.append("{:.5f}".format(avg_mlm_acc))
        if unk_dataset is not None:
            log_data.append("{:.5f}".format(avg_unk_mlm_loss))
            log_data.append("{:.5f}".format(avg_unk_mlm_acc))
        log_data.append("{:.5f}".format(last_grad_norm))
        log_data.append("{:.5f}".format(weight_norm))
        if args.eval_during_training:
            log_data.append("{:.5f}".format(dev_scores["loss"]))
            log_data.append("{:.5f}".format(dev_scores["track1"]))
            log_data.append("{:.5f}".format(dev_scores["track2"]))
            log_data.append("{:.5f}".format(dev_scores["track3"]))
        with open(train_log_path, "a") as f:
            f.write("\t".join(log_data) + "\n")

        # Save best model in checkpoint if score has improved
        save = True
        if args.eval_during_training:
            current_score = dev_scores[args.score_to_optimize]
            if current_score > best_score:
                best_score = current_score
                checkpoint_data["best_score"] = best_score
                model_to_save = get_module(model)
                checkpoint_data['best_model_state_dict'] = deepcopy(
                    model_to_save.state_dict())

        # Save datasets in case we need to resume later
        train_dataset.close_files()
        checkpoint_data["train_dataset"] = train_dataset
        if unk_dataset is not None:
            unk_dataset.close_files()
            checkpoint_data["unk_dataset"] = unk_dataset
        if dev_dataset is not None:
            checkpoint_data["dev_dataset"] = dev_dataset

        # Save checkpoint
        model_to_save = get_module(model)
        checkpoint_data['model_state_dict'] = model_to_save.state_dict()
        checkpoint_data['optimizer_state_dict'] = optimizer.state_dict()
        checkpoint_path = os.path.join(save_to_dir, "checkpoint.tar")
        logger.info("Saving checkpoint")
        torch.save(checkpoint_data, checkpoint_path)

        # Reload datasets we had to close
        train_dataset.prep_files_for_streaming()
        if unk_dataset is not None:
            unk_dataset.prep_files_for_streaming()
Пример #29
0
#!/usr/bin/python

import sys
reload(sys)
sys.setdefaultencoding('utf-8')

import utils

try:
    utils.get_module()
except:
    from portal import PortalApp
    PortalApp().run()

from swm.main import SWMApp
SWMApp().run()
Пример #30
0
def main(
        conf: str,
        seed: int,
        gpu_index: int,
        data_path: str,
        event: int,
        weight: float,
        n_times_model: int,
        prefix: str,
        is_gp_3dim: bool
):
    global DEVICE, FIRST_MODEL_NAME, SECOND_MODEL_NAME, MODELNAME_CHOICE_INDEX
    start = time.time()
    conf = load_config(conf)
    if seed is not None:
        conf.seed = seed
    if gpu_index is not None and DEVICE == torch.device('cuda'):
        # WARNING: Enable gp_re_index dict in gpu02 only
        gpu_re_index = {0: 0, 1: 1, 2: 4, 3: 5, 4: 2, 5: 3, 6: 6, 7: 7}
        gpu_index = gpu_re_index[gpu_index]
        DEVICE = torch.device(f'cuda:{gpu_index}')
    if data_path is not None:
        conf['dataset']['params']['data_path'] = data_path
    if event is not None:
        conf['dataset']['params']['max_events'] = event
    conf['is_gp_3dim'] = is_gp_3dim
    logger.info(DEVICE)
    logger.info(conf)

    model_confs_tau4vec = conf.sub_task_params.tau4vec
    model_confs_tau4vec['tasks'] = model_confs_tau4vec['tasks'] * n_times_model
    model_confs_higgsId = conf.sub_task_params.higgsId
    model_confs_higgsId['tasks'] = model_confs_higgsId['tasks'] * n_times_model
    sub_models_conf = {
        'tau4vec': model_confs_tau4vec,
        'higgsId': model_confs_higgsId
    }
    FIRST_MODEL_NAME = [
        i['name'].split('_')[-1][:-4] + f'-{num}'
        for num, i in enumerate(model_confs_tau4vec['tasks'])
    ]
    SECOND_MODEL_NAME = [
        i['name'].split('_')[-1][:-4] + f'-{num}'
        for num, i in enumerate(model_confs_higgsId['tasks'])
    ]
    MODELNAME_CHOICE_INDEX = {
        f'{n1}_{n2}': v
        for (n1, n2), v in zip(
                product(FIRST_MODEL_NAME,
                        SECOND_MODEL_NAME),
                product(range(len(FIRST_MODEL_NAME)),
                        range(len(SECOND_MODEL_NAME)))
        )
    }

    set_seed(conf.seed)
    from models import sub_task
    tau4vec = set_task(sub_models_conf, 'tau4vec', sub_task)
    logger.info('set_task: tau4vec')
    set_seed(conf.seed)
    higgsId = set_task(sub_models_conf, 'higgsId', sub_task)
    logger.info('set_task: higgsId')
    from models import MyDataset
    from models import MyMetrics
    set_seed(conf.seed)
    dataset = set_module([MyDataset], conf, 'dataset')
    set_seed(conf.seed)
    dataloader = DataLoader(dataset,
                            batch_size=100,
                            shuffle=True)
    logger.info('set dataloader')
    # #########################################################################
    # pre-train ###############################################################
    # #########################################################################
    logger.info('----- pretrain[0] start -----')
    pretrain_conf = model_confs_tau4vec['pretrain']
    for i, sub_model in enumerate(tau4vec):
        logger.info(f'pretrain: [0][{i}]')
        set_seed(conf.seed)
        optimizer = set_module([optim],
                               pretrain_conf,
                               'optimizer',
                               params=sub_model.parameters())
        loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func')
        metrics = set_module([MyMetrics], pretrain_conf, 'metrics')
        activation = set_module([nn], pretrain_conf, 'activation')
        input_key = pretrain_conf['data']['input_key']
        target_key = pretrain_conf['data']['target_key']
        patience = pretrain_conf['patience']
        tau4vec[i] = sub_task.pre_train(epochs=pretrain_conf['epochs'],
                                        model=sub_model,
                                        dataloader=dataloader,
                                        optimizer=optimizer,
                                        loss_func=loss_func,
                                        input_key=input_key,
                                        target_key=target_key,
                                        device=DEVICE,
                                        patience=patience,
                                        metrics=metrics,
                                        activation=activation)
    logger.info('----- pretrain[0] end -----')
    logger.info('----- pretrain[1] start -----')
    pretrain_conf = conf.sub_task_params.higgsId.pretrain
    for i, sub_model in enumerate(higgsId):
        logger.info(f'pretrain: [1][{i}]')
        set_seed(conf.seed)
        optimizer = set_module([optim],
                               pretrain_conf,
                               'optimizer',
                               params=sub_model.parameters())
        loss_func = set_module([nn, MyLoss], pretrain_conf, 'loss_func')
        metrics = set_module([MyMetrics], pretrain_conf, 'metrics')
        activation = set_module([nn], pretrain_conf, 'activation')
        input_key = pretrain_conf['data']['input_key']
        target_key = pretrain_conf['data']['target_key']
        patience = pretrain_conf['patience']
        higgsId[i] = sub_task.pre_train(epochs=pretrain_conf['epochs'],
                                        model=sub_model,
                                        dataloader=dataloader,
                                        optimizer=optimizer,
                                        loss_func=loss_func,
                                        input_key=input_key,
                                        target_key=target_key,
                                        device=DEVICE,
                                        patience=patience,
                                        metrics=metrics,
                                        activation=activation)
    logger.info('----- pretrain[1] end -----')

    # #########################################################################
    # #########################################################################
    logger.info('copy the pretrain models')
    pre_trained_tau4vec = set_task(sub_models_conf, 'tau4vec', sub_task)
    pre_trained_higgsId = set_task(sub_models_conf, 'higgsId', sub_task)
    pre_trained_model = [pre_trained_tau4vec, pre_trained_higgsId]
    task = [tau4vec, higgsId]
    for num_task, sub in enumerate(task):
        for num_model in range(len(sub)):
            pre_trained_model[num_task][num_model].load_state_dict(
                deepcopy(task[num_task][num_model].state_dict())
            )
    # #########################################################################
    # #########################################################################

    logger.info('----- SPOS-NAS start -----')
    sposnas_conf = conf.SPOS_NAS

    def make_output_dict():
        return {
            'X': [],
            'AUC': {
                f'{f}_{s}': [] for f, s in product(
                    FIRST_MODEL_NAME, SECOND_MODEL_NAME
                )
            },
            'LOSS_1ST': {
                f: [] for f in FIRST_MODEL_NAME
            },
            'LOSS_2ND': {
                f'{f}_{s}': [] for f, s in product(
                    FIRST_MODEL_NAME, SECOND_MODEL_NAME
                )
            },
            'RATIO': {
                f: [] for f in FIRST_MODEL_NAME
            },
            'ONLY_PT_RATIO': {
                f: [] for f in FIRST_MODEL_NAME
            },
        }

    # SPOS-NAS
    loss_func = [set_module([nn, MyLoss], sposnas_conf, 'loss_first'),
                 set_module([nn, MyLoss], sposnas_conf, 'loss_second')]
    loss_weight = [weight, 1. - weight]
    metrics = get_module([MyMetrics], 'Calc_Auc')()

    model = SPOS(task=task, loss_func=loss_func,
                 loss_weight=loss_weight)
    model.to(DEVICE)

    output_dict = make_output_dict()
    output_dict['X'].append(weight)
    logger.info(f'loss_ratio: {weight:.6f} (loss_1*X + loss_2*(1-X)) start')
    set_seed(conf.seed)
    logger.info('load pretrain models...')
    for num_task, sub in enumerate(task):
        for num_model in range(len(sub)):
            task[num_task][num_model].load_state_dict(
                deepcopy(pre_trained_model[num_task][num_model].state_dict())
            )
    logger.info('load pretrain models done')
    logger.info('set model parameters...')

    optimizer = set_module([optim],
                           sposnas_conf,
                           'optimizer',
                           params=model.parameters())
    scheduler = set_module([optim.lr_scheduler],
                           sposnas_conf,
                           'scheduler',
                           optimizer=optimizer)
    logger.info('set model parameters done')
    logger.info('fit model...')
    model.fit(epochs=sposnas_conf.epochs,
              dataloader=dataloader,
              device=DEVICE,
              optimizer=optimizer,
              scheduler=scheduler,
              patience=sposnas_conf.patience)
    logger.info('fit model done')
    logger.info('eval model...')
    output_dict = evaluate(model, conf, dataloader, metrics, output_dict, is_gp_3dim)
    logger.info('eval model done')

    set_seed(conf.seed)
    logger.info('re-train start')
    selected_model, _ = max(
        {
            k: v[-1] for k, v in output_dict['AUC'].items()
        }.items(), key=lambda x: x[1]
    )
    logger.info(f'selected_model: {selected_model}')
    selected_choice = MODELNAME_CHOICE_INDEX[selected_model]
    model.fit(epochs=sposnas_conf.epochs,
              dataloader=dataloader,
              device=DEVICE,
              optimizer=optimizer,
              scheduler=scheduler,
              patience=sposnas_conf.patience,
              choice=selected_choice)
    logger.info('re-train done')

    elapsed_time = time.time() - start
    events = conf.dataset.params.max_events * 2
    if prefix:
        output_file = (f'result.SPOS_NAS-{prefix}_' +
                       f's{seed}_w{weight}_e{events}_' +
                       f'n{n_times_model*3}.json')
    else:
        output_file = (f'result.SPOS_NAS-s{seed}_w{weight}_e{events}_' +
                       f'n{n_times_model*3}.json')

    with open(os.path.join('logs', output_file), 'w') as fo:
        json.dump(
            [{
                'agent': 'SPOS-NAS',
                'tasks': {
                    'tau4vec': {
                        'weight': weight,
                        'loss_test': -1,
                        'mse_test': -1,
                        'ratio_2sigma_GP_test': -1,
                        'models': FIRST_MODEL_NAME,
                        'model_selected': selected_model.split('_')[0]
                    },
                    'higgsId': {
                        'weight': 1. - weight,
                        'loss_test': -1,
                        'auc_test': -1,
                        'models': SECOND_MODEL_NAME,
                        'model_selected': selected_model.split('_')[1]
                    }
                },
                'loss_test': -1,
                'nevents': conf.dataset.params.max_events * 2,
                'seed': conf.seed,
                'walltime': elapsed_time
            }],
            fo,
            indent=2
        )

    dummy = make_output_dict()
    dummy = evaluate(model, conf, dataloader, metrics, dummy, is_gp_3dim)

    def result_parser(res, selected_model, seed, time):
        AUC = res['AUC'][selected_model][0]
        LOSS_1ST = res['LOSS_1ST'][selected_model.split('_')[0]][0]
        LOSS_2ND = res['LOSS_2ND'][selected_model][0]
        RATIO = res['RATIO'][selected_model.split('_')[0]][0]
        ONLY_PT_RATIO = res[
            'ONLY_PT_RATIO'
        ][selected_model.split('_')[0]][0]
        target_result = dict(
            seed=seed,
            AUC=AUC,
            LOSS_1ST=LOSS_1ST,
            LOSS_2ND=LOSS_2ND,
            RATIO=RATIO,
            ONLY_PT_RATIO=ONLY_PT_RATIO
        )
        logger.info(f're-train results: {target_result}')
        return {
            'agent': 'SPOS-NAS',
            'tasks': {
                'tau4vec': {
                    'weight': weight,
                    'loss_test': target_result['LOSS_1ST'],
                    'mse_test': target_result['LOSS_1ST'] * 10000,
                    'ratio_2sigma_GP_test': target_result['RATIO'],
                    'models': FIRST_MODEL_NAME,
                    'model_selected': selected_model.split('_')[0]
                },
                'higgsId': {
                    'weight': 1. - weight,
                    'loss_test': target_result['LOSS_2ND'],
                    'auc_test': target_result['AUC'],
                    'models': SECOND_MODEL_NAME,
                    'model_selected': selected_model.split('_')[1]
                }
            },
            'loss_test': (weight * target_result['LOSS_1ST']
                          + (1. - weight) * target_result['LOSS_2ND']),
            'nevents': conf.dataset.params.max_events * 2,
            'seed': seed,
            'walltime': time
        }

    with open(os.path.join('logs', output_file), 'w') as fo:
        json.dump(
            [result_parser(dummy, selected_model, conf.seed, elapsed_time)],
            fo,
            indent=2
        )

    logger.info('all train and eval step are done')
Пример #31
0
def train_algorithm(request):
    module_id = request.GET.get('module_id')
    scene_id = request.GET.get('scene_id')
    limit = request.GET.get('limit')

    if scene_id and module_id:
        tmp = utils.get_scene_record(module_id, scene_id)
        for i in tmp:
            i['data_length'] = range(len(i['data'][i['data'].keys()[0]]))
            i['resources'] = []
            i['apis'] = []
            i['api_info'] = []
            api_dict = {}
            for k in i['data'].keys():
                if k != 'total' and k.find('#api#') != 0:
                    i['resources'].append(k)
                if k != 'total' and k.find('#api#') == 0:
                    api_dict[k[5:]] = i['data'][k]
                    #this_api_id = utils.get_api_by_name(k[5:])
                    i['api_info'].append(k)  # TODO

            for j in i['data_length']:
                current_api_dict = {}
                for k, v in api_dict.iteritems():
                    current_api_dict[k] = v[j]
                i['apis'].append(current_api_dict)
        if limit and int(limit) > 0:
            ret = {'scene_records': tmp[:int(limit)]}
        else:
            ret = {'scene_records': tmp}

        ret['module_id'] = module_id
        ret['scene_id'] = scene_id
        scene_api = utils.get_scene_api(module_id, scene_id)

        for s in scene_api:
            s['api_info'] = utils.get_api(s.get('api_id'))
            # ge threhold
            if s['api_info']:
                s['api_info']['threholds'] = utils.get_api_resource(
                    s.get('api_id'))
                for th in s['api_info']['threholds'].get('resource_list'):
                    th['name'] = utils.get_resource(
                        th.get('resource_id')).get('name')

        ret['scene_info'] = utils.get_scene(scene_id)
        ret['module_info'] = utils.get_module(module_id)
        ret['scene_api'] = scene_api
        ret['all_resource'] = []

        all_resource_ids = []
        # get all resource need
        for s in scene_api:
            for id in s.get('api_info').get('threholds').get('resource_id'):
                if not id in all_resource_ids:
                    all_resource_ids.append(id)
                    ret['all_resource'].append(utils.get_resource(id))

        ret["public"] = utils.get_public(request)
        return render(request, 'assess/train_algorithm.html', {'data': ret})
    else:
        return render(request, 'error.html')
Пример #32
0
 def fasta_file(self):
     return get_module('pyfaidx').Fasta(self.fasta_path)
Пример #33
0
def main(unused_argv=None):
    tf.logging.set_verbosity(FLAGS.log)

    if FLAGS.config is None:
        raise RuntimeError("No config name specified.")

    config = utils.get_module("wavenet." + FLAGS.config).Config(
        FLAGS.train_path)

    logdir = FLAGS.logdir
    tf.logging.info("Saving to %s" % logdir)

    with tf.Graph().as_default():
        total_batch_size = FLAGS.total_batch_size
        assert total_batch_size % FLAGS.worker_replicas == 0
        worker_batch_size = total_batch_size / FLAGS.worker_replicas

        # Run the Reader on the CPU
        cpu_device = "/job:localhost/replica:0/task:0/cpu:0"
        if FLAGS.ps_tasks:
            cpu_device = "/job:worker/cpu:0"

        with tf.device(cpu_device):
            inputs_dict = config.get_batch(worker_batch_size)

        with tf.device(
                tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks,
                                               merge_devices=True)):
            global_step = tf.get_variable(
                "global_step", [],
                tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            # pylint: disable=cell-var-from-loop
            lr = tf.constant(config.learning_rate_schedule[0])
            for key, value in config.learning_rate_schedule.iteritems():
                lr = tf.cond(tf.less(global_step, key), lambda: lr,
                             lambda: tf.constant(value))
            # pylint: enable=cell-var-from-loop
            tf.summary.scalar("learning_rate", lr)

            # build the model graph
            outputs_dict = config.build(inputs_dict, is_training=True)
            loss = outputs_dict["loss"]
            tf.summary.scalar("train_loss", loss)

            worker_replicas = FLAGS.worker_replicas
            ema = tf.train.ExponentialMovingAverage(decay=0.9999,
                                                    num_updates=global_step)
            opt = tf.train.SyncReplicasOptimizer(
                tf.train.AdamOptimizer(lr, epsilon=1e-8),
                worker_replicas,
                total_num_replicas=worker_replicas,
                variable_averages=ema,
                variables_to_average=tf.trainable_variables())

            train_op = opt.minimize(loss,
                                    global_step=global_step,
                                    name="train",
                                    colocate_gradients_with_ops=True)

            session_config = tf.ConfigProto(allow_soft_placement=True)

            is_chief = (FLAGS.task == 0)
            local_init_op = opt.chief_init_op if is_chief else opt.local_step_init_op

            slim.learning.train(
                train_op=train_op,
                logdir=logdir,
                is_chief=is_chief,
                master=FLAGS.master,
                number_of_steps=config.num_iters,
                global_step=global_step,
                log_every_n_steps=250,
                local_init_op=local_init_op,
                save_interval_secs=300,
                sync_optimizer=opt,
                session_config=session_config,
            )
Пример #34
0
import sgmllib
import urllib
import random
import re

import	const
import	utils

module = utils.get_module()
exec(utils.get_import(mod=module, from_=['utils']))
try:
	exec(utils.get_import(mod=module, from_=['mounts'],
		import_=['HookMount', 'CommandMount']))
except ImportError, e:
	print e

class UrlParser(sgmllib.SGMLParser):
	"A simple parser class."

	def parse(self, s):
		"Parse the given string 's'."
		self.feed(s)
		self.close()

	def __init__(self, verbose=0):
		"Initialise an object, passing 'verbose' to the superclass."

		sgmllib.SGMLParser.__init__(self, verbose)
		self.hyperlinks = []
		self.descriptions = []
Пример #35
0
def get_module_view(request):
    id = request.GET.get("module_id")
    ret = utils.get_module(id)
    return public.success_result_http(ret)