예제 #1
0
파일: auth.py 프로젝트: csettles/climgur
def log_in(client):
    """Authorizes ImgurClient to use user account"""
    config = ConfigParser()
    config.read('auth.ini')
    access_token = config.get('credentials', 'access_token')
    refresh_token = config.get('credentials', 'refresh_token')
    if len(access_token) > 0 and len(refresh_token) > 0:
        client.set_user_auth(access_token, refresh_token)
        return client

    authorization_url = client.get_auth_url('pin')
    webbrowser.open(authorization_url)
    pin = input('Please input your pin\n>\t')

    credentials = client.authorize(pin)  # grant_type default is 'pin'

    access_token = credentials['access_token']
    refresh_token = credentials['refresh_token']

    config.set('credentials', 'access_token', access_token)
    config.set('credentials', 'refresh_token', refresh_token)

    save_config(config)
    client.set_user_auth(access_token, refresh_token)
    return client
예제 #2
0
 def on_remove_current_blog(self, action, parameter):
     """
     Removes current blog
     """
     dialog = Gtk.MessageDialog(
         parent=self.main_window,
         text="Confirm removing blog",
         buttons=(Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL,
                  Gtk.STOCK_REMOVE, Gtk.ResponseType.OK)
         )
     dialog.get_action_area().get_children()[1].get_style_context().add_class("destructive-action")
     response = dialog.run()
     if response == Gtk.ResponseType.OK:
         dialog.destroy()
         blog = utils.get_blog_by_id(self.config, self.config["active_blog"])
         if blog:
             index = self.config["blogs"].index(blog)
             self.config["blogs"].pop(index)
             self.config["active_blog"] = None
             header_bar = self.main_window.get_children()[1]
             subtitle = header_bar.get_custom_title().get_children()[1]
             subtitle.set_text("")
             utils.save_config(self.config)
             # Build menu items
             self.main_window.select_blog_menu.remove_all()
             for item in self.config["blogs"]:
                     # Creat actions and add new item in the menu
                     self.create_select_blog_action(item["id"])
                     self.main_window.select_blog_menu.append(item["name"], "app.select_blog_%s" % item["id"])
         else:
             self.main_window.infobar.get_content_area().get_children()[0].set_text("No blog to remove")
             self.main_window.infobar.get_action_area().get_children()[1].props.visible = False
             self.main_window.infobar.show()
     else:
         dialog.destroy()
예제 #3
0
def main(_):
  prepare_dirs_and_logger(config)

  if not config.task.lower().startswith('tsp'):
    raise Exception("[!] Task should starts with TSP")

  if config.max_enc_length is None:
    config.max_enc_length = config.max_data_length
  if config.max_dec_length is None:
    config.max_dec_length = config.max_data_length

  rng = np.random.RandomState(config.random_seed)
  tf.set_random_seed(config.random_seed)

  trainer = Trainer(config, rng)
  save_config(config.model_dir, config)

  if config.is_train:
    trainer.train()
  else:
    if not config.load_path:
      raise Exception("[!] You should specify `load_path` to load a pretrained model")
    trainer.test()

  tf.logging.info("Run finished.")
예제 #4
0
 def on_post_button_clicked(self, target):
     """
     Send post to remote server
     """
     blog = utils.get_blog_by_id(self.app.config, self.app.config["active_blog"])
     if blog:
         if blog["provider"] == "blogger":
             service = BloggerProvider(blog["username"], blog["password"])
             tags = self.tag_entry.get_text().split(",")
             tags = [x.strip() for x in tags]
             tags = [unicode(x, "utf-8") for x in tags]
             # Add new tags to the blog
             new_tags = []
             for tag in tags:
                 if tag not in blog["tags"]:
                     new_tags.append(tag)
             if new_tags:
                 index = self.app.config["blogs"].index(blog)
                 self.app.config["blogs"][index]["tags"].extend(new_tags)
                 utils.save_config(self.app.config)
         result = service.send_post(blog["id"], self.title_entry.get_text(), self.sourceview.get_buffer().props.text, tags)
         self.infobar.get_content_area().get_children()[0].set_text(result)
         self.infobar.get_action_area().get_children()[1].props.visible = True
         self.infobar.show()
     else:
         self.infobar.get_content_area().get_children()[0].set_text("No blog is selected")
         # Hide New button
         self.infobar.get_action_area().get_children()[1].props.visible = False
         self.infobar.show()
예제 #5
0
파일: main.py 프로젝트: byungsook/vectornet
def main(config):
    prepare_dirs_and_logger(config)
    save_config(config)

    if config.is_train:
        from trainer import Trainer
        if config.dataset == 'line':
            from data_line import BatchManager
        elif config.dataset == 'ch':
            from data_ch import BatchManager
        elif config.dataset == 'kanji':
            from data_kanji import BatchManager
        elif config.dataset == 'baseball' or\
             config.dataset == 'cat':
            from data_qdraw import BatchManager

        batch_manager = BatchManager(config)
        trainer = Trainer(config, batch_manager)
        trainer.train()
    else:
        from tester import Tester
        if config.dataset == 'line':
            from data_line import BatchManager
        elif config.dataset == 'ch':
            from data_ch import BatchManager
        elif config.dataset == 'kanji':
            from data_kanji import BatchManager
        elif config.dataset == 'baseball' or\
             config.dataset == 'cat':
            from data_qdraw import BatchManager
        
        batch_manager = BatchManager(config)
        tester = Tester(config, batch_manager)
        tester.test()
예제 #6
0
    def run(self, container):
        if container.get('template'):
            self.join_subtasks(self.run_subtask('container.template.fetch'))
            template = self.dispatcher.call_sync(
                'container.template.query',
                [('template.name', '=', container['template'].get('name'))],
                {'single': True}
            )
            template['template'].pop('readme')

            if template is None:
                raise TaskException(errno.ENOENT, 'Template {0} not found'.format(container['template'].get('name')))

            result = {}
            for key in container:
                if container[key]:
                    result[key] = container[key]
            deep_update(template, result)
            container = template

            self.join_subtasks(self.run_subtask('container.cache.update', container['template']['name']))
        else:
            normalize(container, {
                'config': {},
                'devices': []
            })

        normalize(container, {
            'enabled': True,
            'immutable': False
        })

        normalize(container['config'], {
            'memsize': 512,
            'ncpus': 1
        })

        self.init_dataset(container)
        for res in container['devices']:
            self.create_device(container, res)
        self.init_files(container)

        id = self.datastore.insert('containers', container)
        self.dispatcher.dispatch_event('container.changed', {
            'operation': 'create',
            'ids': [id]
        })

        container = self.datastore.get_by_id('containers', id)
        save_config(
            self.dispatcher.call_sync(
                'volume.resolve_path',
                container['target'],
                os.path.join('vm', container['name'])
            ),
            'vm-{0}'.format(container['name']),
            container
        )

        return id
예제 #7
0
    def run(self, id, updated_params):
        container = self.datastore.get_by_id('containers', id)
        if container['immutable']:
            raise TaskException(errno.EACCES, 'Cannot modify immutable container {0}.'.format(id))
        try:
            delete_config(
                self.dispatcher.call_sync(
                    'volume.resolve_path',
                    container['target'],
                    os.path.join('vm', container['name'])
                ),
                'vm-{0}'.format(container['name'])
            )
        except (RpcException, OSError):
            pass

        if 'template' in updated_params:
            readme = updated_params['template'].pop('readme')
            if readme:
                root = self.dispatcher.call_sync('container.get_container_root', container['id'])
                with open(os.path.join(root, 'README.md'), 'w') as readme_file:
                    readme_file.write(readme)

        if 'devices' in updated_params:
            self.join_subtasks(self.run_subtask('container.cache.update', container['template']['name']))
            for res in updated_params['devices']:
                existing = first_or_default(lambda i: i['name'] == res['name'], container['devices'])
                if existing:
                    self.update_device(container, existing, res)
                else:
                    self.create_device(container, res)

        if not updated_params.get('enabled', True):
            self.join_subtasks(self.run_subtask('container.stop', id))

        container.update(updated_params)
        self.datastore.update('containers', id, container)
        self.dispatcher.dispatch_event('container.changed', {
            'operation': 'update',
            'ids': [id]
        })

        container = self.datastore.get_by_id('containers', id)
        save_config(
            self.dispatcher.call_sync(
                'volume.resolve_path',
                container['target'],
                os.path.join('vm', container['name'])
            ),
            'vm-{0}'.format(container['name']),
            container
        )
예제 #8
0
    def run(self, id, updated_fields):
        share = self.datastore.get_by_id('shares', id)
        remove_unchanged(updated_fields, share)

        path = self.dispatcher.call_sync('share.get_directory_path', share['id'])
        try:
            delete_config(
                path,
                '{0}-{1}'.format(share['type'], share['name'])
            )
        except OSError:
            pass

        if 'type' in updated_fields:
            old_share_type = share['type']
            new_share_type = self.dispatcher.call_sync('share.supported_types').get(updated_fields['type'])
            if share['target_type'] == 'DATASET':
                pool, dataset = split_dataset(share['target_path'])
                self.join_subtasks(
                    self.run_subtask('volume.dataset.update', dataset, {
                        'permissions_type': new_share_type['perm_type']
                    })
                )

            share.update(updated_fields)
            self.join_subtasks(self.run_subtask('share.{0}.delete'.format(old_share_type), id))
            self.join_subtasks(self.run_subtask('share.{0}.create'.format(updated_fields['type']), share))
        else:
            self.join_subtasks(self.run_subtask('share.{0}.update'.format(share['type']), id, updated_fields))

        if 'permissions' in updated_fields:
            path = self.dispatcher.call_sync('share.translate_path', id)
            self.join_subtasks(self.run_subtask('file.set_permissions', path, updated_fields['permissions']))

        self.dispatcher.dispatch_event('share.changed', {
            'operation': 'update',
            'ids': [share['id']]
        })

        updated_share = self.datastore.get_by_id('shares', id)
        path = self.dispatcher.call_sync('share.get_directory_path', updated_share['id'])
        try:
            save_config(
                path,
                '{0}-{1}'.format(updated_share['type'], updated_share['name']),
                updated_share
            )
        except OSError as err:
            self.add_warning(TaskWarning(errno.ENXIO, 'Cannot save backup config file: {0}'.format(str(err))))
예제 #9
0
 def activate_blog(self, blog_id):
     """
     Makes blog active
     """
     self.config["active_blog"] = blog_id
     utils.save_config(self.config)
     blog = utils.get_blog_by_id(self.config, blog_id)
     if blog:
         window = self.get_windows()[0]
         header_bar = window.get_children()[1]
         text = u'<a href="{href}">{name}</a>'.format(href=blog["link"], name=blog["name"])
         subtitle = header_bar.get_custom_title().get_children()[1]
         subtitle.props.use_markup = True
         subtitle.props.track_visited_links = False
         subtitle.set_markup(text)
예제 #10
0
파일: engine.py 프로젝트: IDex/wmal-python
 def unload(self):
     """
     Closes the data handler and closes the engine cleanly.
     This should be called when closing the client application, or when you're
     sure you're not going to use the engine anymore. This does all the necessary
     procedures to close the data handler cleanly and then itself.
     
     """
     self.msg.info(self.name, "Unloading...")
     self.data_handler.unload()
     
     # Save config file
     utils.save_config(self.userconfig, self.userconfigfile)
     
     self.loaded = False
    def run(self, id, updated_params):
        container = self.datastore.get_by_id('containers', id)
        try:
            delete_config(
                self.dispatcher.call_sync(
                    'volume.resolve_path',
                    container['target'],
                    os.path.join('vm', container['name'])
                ),
                'vm-{0}'.format(container['name'])
            )
        except (RpcException, OSError):
            pass

        if 'devices' in updated_params:
            for res in updated_params['devices']:
                existing = first_or_default(lambda i: i['name'] == res['name'], container['devices'])
                if existing:
                    self.update_device(container, existing, res)
                else:
                    self.create_device(container, res)

        if not updated_params.get('enabled', True):
            self.join_subtasks(self.run_subtask('container.stop', id))

        container.update(updated_params)
        self.datastore.update('containers', id, container)
        self.dispatcher.dispatch_event('container.changed', {
            'operation': 'update',
            'ids': [id]
        })

        container = self.datastore.get_by_id('containers', id)
        save_config(
            self.dispatcher.call_sync(
                'volume.resolve_path',
                container['target'],
                os.path.join('vm', container['name'])
            ),
            'vm-{0}'.format(container['name']),
            container
        )
예제 #12
0
        def run_dialog(dialog):
            """
            Recreates dialog loop
            """
            response = dialog.run()
            if response == Gtk.ResponseType.OK:
                content_widgets = dialog.get_content_area().get_children()
                username = content_widgets[0].get_text()
                password = content_widgets[1].get_text()
                provider = content_widgets[2].get_active_id()
                if provider == "blogger":
                    service = BloggerProvider(username, password)
                data = service.get_blogs()
                if data["status"] == "ok":
                    message = "<b>Following blogs will be added:</b> \n"
                    for item in data["blogs"]:
                        message = message + item["name"] + "\n"
                    ok_dialog = Gtk.MessageDialog(parent=dialog, text=message, buttons=(Gtk.STOCK_APPLY, Gtk.ResponseType.OK),
                                                  use_markup=True)
                    ok_dialog.run()
                    ok_dialog.destroy()
                    dialog.destroy()

                    for item in data["blogs"]:
                        if not utils.get_blog_by_id(self.config, item["id"]):
                            self.config["blogs"].append(item)
                            # Creat actions and add new item in the menu
                            self.create_select_blog_action(item["id"])
                            main_window = self.get_windows()[0]
                            main_window.select_blog_menu.append(item["name"], "app.select_blog_%s" % item["id"])

                    utils.save_config(self.config)

                if data["status"] == "error":
                    error_dialog = Gtk.MessageDialog(parent=dialog, text=data["error"], buttons=(Gtk.STOCK_APPLY, Gtk.ResponseType.OK))
                    error_dialog.run()
                    error_dialog.destroy()
                    run_dialog(dialog)
            else:
                dialog.destroy()
def admin_config_view(request):
    """
    Configure default UI parameter settings
    """

    check_login(request)
    session = request.session
    subjects = ["Arts", "Business", "Humanities", "Mathematics and Statistics",
                "Science and Technology", "Social Sciences"]
    form = Form(request, schema=ConfigSchema)
    config = load_config(request)

    # Check for successful form completion
    if 'form.submitted' in request.POST:
        form.validate()
        for key in ['service_document_url', 'workspace_url']:
            config[key] = form.data[key]
        for key in ['title', 'summary', 'subject', 'keywords', 'language', 'google_code']:
            config['metadata'][key] = form.data[key]
        for key in ['authors', 'maintainers', 'copyright', 'editors', 'translators']:
            config['metadata'][key] = [x.strip() for x in form.data[key].split(',')]
        save_config(config, request)

    response =  {
        'form': FormRenderer(form),
        'subjects': subjects,
        'languages': languages,
        'roles': [('authors', 'Authors'),
                  ('maintainers', 'Maintainers'),
                  ('copyright', 'Copyright holders'),
                  ('editors', 'Editors'),
                  ('translators',
                  'Translators')
                 ],
        'request': request,
        'config': config,
    }
    return response
예제 #14
0
파일: main.py 프로젝트: yanssy/PoseFaceGAN
def main(config):
    """
    config参数:Namespace(D_arch='DCGAN', batch_size=1, beta1=0.5, beta2=0.999, ckpt_path=None, conv_hidden_num=128, 
    d_lr=2e-05, data_dir='data', data_format='NCHW', dataset='DF_train_data', g_lr=2e-05, gamma=0.5, gpu=0, 
    grayscale=False, img_H=256, img_W=256, is_train=True, lambda_k=0.001, load_path='', log_dir='logs', 
    log_level='INFO', log_step=200, lr_update_step=50000, max_step=80, model=11, model_dir='path_to_directory_of_model',
     num_log_samples=3, num_worker=4, optimizer='adam', pretrained_path=None, random_seed=123, sample_per_image=64, 
     save_model_secs=1000, split='train', start_step=0, test_data_path=None, test_one_by_one=False, use_gpu=True, 
     z_num=2)

    """
    prepare_dirs_and_logger(config)

    if config.gpu > -1:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
        os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu)

    config.data_format = 'NHWC'
    trainer = None
    if 1 == config.model:
        print("使用PG2(),即Market-1501 数据库,并初始化")
        trainer = PG2(config)
        trainer.init_net()
    elif 11 == config.model:
        print("使用PG2_256(),即DeepFashion数据库,并初始化")
        trainer = PG2_256(config)
        trainer.init_net()
        
    if config.is_train:
        print("开始训练")
        save_config(config)  # 存储参数到json文件
        trainer.train()  # 开始训练
    else:
        print("开始测试")
        if not config.load_path:
            raise Exception("[!] 没有指定 `load_path` 用于读取预训练的模型")
        trainer.test()
예제 #15
0
    def unload(self):
        """
        Closes the data handler and closes the engine

        This should be called when closing the client application, or when you're
        sure you're not going to use the engine anymore. This does all the necessary
        procedures to close the data handler and then itself.
        
        """
        #if not self.loaded:
        #    raise utils.wmalError("Engine is not loaded.")
        
        if self.last_show:
            self.data_handler.set_show_attr(self.last_show, 'playing', False)
            self._emit_signal('playing', self.last_show)
         
        self.msg.info(self.name, "Unloading...")
        self.data_handler.unload()
        
        # Save config file
        #utils.save_config(self.config, self.configfile)
        utils.save_config(self.userconfig, self.userconfigfile)
        
        self.loaded = False
예제 #16
0
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    # "sentences[0]:[['我', 'O'], ['要', 'O'], ['看', 'O'], ['乌', 'B-SLOC'], ['鲁', 'I-SLOC'], ['木', 'I-SLOC'], ['齐', 'I-SLOC'], ['市', 'I-SLOC'], ['第', 'I-SLOC'], ['四', 'I-SLOC'], ['十', 'I-SLOC'], ['九', 'I-SLOC'], ['中', 'I-SLOC'], ['学', 'I-SLOC'], ['东', 'I-SLOC'], ['门', 'I-SLOC'], ['去', 'O'], ['乌', 'B-ELOC'], ['鲁', 'I-ELOC'], ['木', 'I-ELOC'], ['齐', 'I-ELOC'], ['推', 'I-ELOC'], ['拿', 'I-ELOC'], ['职', 'I-ELOC'], ['业', 'I-ELOC'], ['学', 'I-ELOC'], ['校', 'I-ELOC'], ['南', 'I-ELOC'], ['门', 'I-ELOC'], ['沿', 'O'], ['西', 'B-ROAD'], ['虹', 'I-ROAD'], ['东', 'I-ROAD'], ['路', 'I-ROAD'], ['的', 'O'], ['监', 'B-TYPE'], ['控', 'I-TYPE']]"
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    # print("train_sentences[0]:{}".format(train_sentences[0]))
    # "train_sentences[0]:[['我', 'O'], ['要', 'O'], ['看', 'O'], ['乌', 'B-SLOC'], ['鲁', 'I-SLOC'], ['木', 'I-SLOC'], ['齐', 'I-SLOC'], ['市', 'I-SLOC'], ['第', 'I-SLOC'], ['四', 'I-SLOC'], ['十', 'I-SLOC'], ['九', 'I-SLOC'], ['中', 'I-SLOC'], ['学', 'I-SLOC'], ['东', 'I-SLOC'], ['门', 'E-SLOC'], ['去', 'O'], ['乌', 'B-ELOC'], ['鲁', 'I-ELOC'], ['木', 'I-ELOC'], ['齐', 'I-ELOC'], ['推', 'I-ELOC'], ['拿', 'I-ELOC'], ['职', 'I-ELOC'], ['业', 'I-ELOC'], ['学', 'I-ELOC'], ['校', 'I-ELOC'], ['南', 'I-ELOC'], ['门', 'E-ELOC'], ['沿', 'O'], ['西', 'B-ROAD'], ['虹', 'I-ROAD'], ['东', 'I-ROAD'], ['路', 'E-ROAD'], ['的', 'O'], ['监', 'B-TYPE'], ['控', 'E-TYPE']]"
    update_tag_scheme(dev_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    # print("map_file:{}".format(FLAGS.map_file))
    # print("pre_emb:{}".format(FLAGS.pre_emb))
    # map_file: maps.pkl
    # pre_emb: False
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(
                train_sentences, FLAGS.lower)[0]  # character -> count dict
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)

        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), 0, len(test_data)))
    # '3027 / 0 / 361 sentences in train / dev / test.'

    # print("batch_size:{}".format(FLAGS.batch_size))
    # batch_size: 20
    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    # make path for store log and model if not exist
    make_path(FLAGS)
    # print("config_file:{}".format(FLAGS.config_file))
    # config_file: config_file
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)

    log_path = os.path.join("log", FLAGS.log_file)
    # log_path:log/train.log
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    # print("steps_per_epoch:{}".format(steps_per_epoch))
    # steps_per_epoch: 152
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss = []
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                # print("steps_check:{}".format(FLAGS.steps_check))
                # steps_check: 100
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
            export(model, sess, "ner", "export_model")
예제 #17
0
 def save_configuration(self):
     self.config['debug'] = False
     if (self.debug):
         self.config['debug'] = True
     save_config(self.config, CONFIG_FILENAME)
     print(bcolors.WARNING + "Configuration saved" + bcolors.INFO)
예제 #18
0
def main():
    args = parse_args()
    if args.seed is not None:
        print("Using seed = {}".format(args.seed))
        # torch.manual_seed(args.seed)
        mx.random.seed(seed_state=args.seed)
        np.random.seed(seed=args.seed)

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = "./run/neumf_" + args.data + "/{}".format(config['timestamp'])
    print("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)  #defined in utils.py

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and mx.test_utils.list_gpus()

    t1 = time.time()
    # Load Data
    print('Loading data')
    train_dataset = CFTrainDataset(
        os.path.join(args.data, TRAIN_RATINGS_FILENAME), args.negative_samples)
    #in original file, use 8 core as defaul

    # the parameter:shuffle means random the samples
    train_dataloader = mx.gluon.data.DataLoader(dataset=train_dataset,
                                                batch_size=args.batch_size,
                                                shuffle=True,
                                                num_workers=args.workers)

    test_ratings = load_test_ratings(
        os.path.join(args.data, TEST_RATINGS_FILENAME))  # noqa: E501
    test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME))
    nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items

    print('Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d' %
          (time.time() - t1, nb_users, nb_items, train_dataset.mat.nnz,
           len(test_ratings)))

    if (use_cuda):
        ctx = mx.gpu(0)
        # default to use NO.1 gpu can use docker to select a nvidia
    else:
        ctx = mx.cpu(0)

    # Create model
    model = NeuMF(nb_users,
                  nb_items,
                  mf_dim=args.factors,
                  mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers],
                  ctx=ctx)
    model.initialize(ctx=ctx)
    model.hybridize()
    print(model)
    # todo 9: to change the function in utils
    # print("{} parameters".format(utils.count_parameters(model)))

    # model.collect_params()
    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))
    # model.save_parameters(os.path.join("/home/net.params", 'net.params'))

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    # Calculate initial Hit Ratio and NDCG
    hits, ndcgs = val_epoch(model,
                            test_ratings,
                            test_negs,
                            args.topk,
                            processes=args.processes,
                            ctx=ctx)
    print('Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}'.format(
        K=args.topk, hit_rate=np.mean(hits), ndcg=np.mean(ndcgs)))

    ############# hyperparameters
    # Add optimizer and loss to graph
    lr = args.learning_rate
    bs = args.batch_size

    trainer = mx.gluon.Trainer(model.collect_params(), 'adam',
                               {'learning_rate': lr})
    mxnet_criterion = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss(
    )  # equivalent to lossfunction

    # training
    for epoch in range(args.epochs):
        begin = time.time()
        # tqdm shows the percentage of the process
        loader = tqdm.tqdm(train_dataloader)
        for batch_index, (user, item, label) in enumerate(loader):
            # TODO 7: search the autograd in mxnet
            # todo : let user act in gpu
            user = nd.array(user, ctx=ctx)
            item = nd.array(item, ctx=ctx)
            label = nd.array(label, ctx=ctx)

            # compute the gradient automatically
            with autograd.record():
                outputs = model(user, item)
                loss = mxnet_criterion(outputs, label.T)

            loss.backward()
            trainer.step(bs)

            for x in loss.mean().asnumpy().tolist():
                loss_number = x
            description = ('Epoch {}  Loss {:.4f}'.format(epoch, loss_number))
            loader.set_description(description)

        train_time = time.time() - begin
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                output=valid_results_file,
                                epoch=epoch,
                                processes=args.processes,
                                ctx=ctx)
        val_time = time.time() - begin
        print(
            'Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f},'
            ' train_time = {train_time:.2f}, val_time = {val_time:.2f}'.format(
                epoch=epoch,
                K=args.topk,
                hit_rate=np.mean(hits),
                ndcg=np.mean(ndcgs),
                train_time=train_time,
                val_time=val_time))
        if args.threshold is not None:
            if np.mean(hits) >= args.threshold:
                print("Hit threshold of {}".format(args.threshold))
                # Save model text description after modelling
                with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
                    file.write(str(model))
                # model.save_parameters(os.path.join("/home/net.params",'net.params'))
                return 0
예제 #19
0
파일: main2.py 프로젝트: lzx00000/mynew
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)  #训练集 101218 句子
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower,
                                   FLAGS.zeros)  #验证集 7827句子
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower,
                                    FLAGS.zeros)  #测试集 16804句子

    # Use selected tagging scheme (IOB / IOBES)
    update_tag_scheme(train_sentences, FLAGS.tag_schema)  #更新标注iob转换成iobes
    update_tag_scheme(test_sentences, FLAGS.tag_schema)  #更新标注iob转换成iobes
    update_tag_scheme(dev_sentences, FLAGS.tag_schema)  #更新标注iob转换成iobes
    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):  #判断maps.pkl是否存在
        # create dictionary for word
        if FLAGS.pre_emb:  #是否使用预先训练的模型(训练好的字向量)  测试集的数据不在训练集中
            dico_chars_train = char_mapping(train_sentences,
                                            FLAGS.lower)[0]  #字频统计下来 dico_chars
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(),
                FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable(  #拉平,变成一个list
                        [[w[0] for w in s] for s in test_sentences])  #w[0] 是个字
                ))  #每个字建个字典,每个词建个字典
        else:
            #每个字的id,标记的id
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags 每个标记的id
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)  #字频,排序,写入文件
        #with open('maps.txt','w',encoding='utf8') as f1:
        #f1.writelines(str(char_to_id)+" "+id_to_char+" "+str(tag_to_id)+" "+id_to_tag+'\n')
        with open(FLAGS.map_file, "wb") as f:  #持久化下来
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(  #字词 数字特征化
        train_sentences, char_to_id, tag_to_id, FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), 0, len(test_data)))

    train_manager = BatchManager(train_data, FLAGS.batch_size)  #训练集每次60个句子进行迭代
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    # make path for store log and model if not exist
    make_path(FLAGS)  #创建文件log,result,ckpt
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)  #字符对应的id,标签对应的id
        save_config(config, FLAGS.config_file)  #每次的数据不一样都要生成一个config_file,
    make_path(FLAGS)  #创建文件log,result,ckpt 模型中的文件

    log_path = os.path.join("log", FLAGS.log_file)  #读取log路径
    logger = get_logger(log_path)  #定义log日志的写入格式
    print_config(config, logger)  #写入log日志

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True  #设置GPU自适应,用多少使用多少
    #tf_config.gpu_options.per_process_gpu_memory_fraction=True 设置GPU的使用率,占比
    steps_per_epoch = train_manager.len_data  #总共分多少批,取多少次
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        #模型初始化结束
        logger.info("start training")
        loss = []
        # with tf.device("/gpu:0"):没有Gpu注释掉  卷积神经网络要求句子的长度一样,
        for i in range(100):  #迭代多少次,每次把数据拿过来
            for batch in train_manager.iter_batch(shuffle=True):  #随机的拿
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    loss = []

        # best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)比上次模型好的话,就保存
            if i % 7 == 0:
                save_model(sess, model, FLAGS.ckpt_path, logger)
예제 #20
0
파일: main.py 프로젝트: zyq11223/NLP_basis

with open(FLAGS.map_file, "rb") as f:
    if pyversion == 'three':
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    else:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f,
                                                                   protocol=2)
        # make path for store log and model if not exist
make_path(FLAGS)

if os.path.isfile(FLAGS.config_file):
    config = load_config(FLAGS.config_file)
else:
    config = config_model(char_to_id, tag_to_id)
    save_config(config, FLAGS.config_file)
make_path(FLAGS)
app = Flask(__name__)
log_path = os.path.join("log", FLAGS.log_file)
logger = get_logger(log_path)
tf_config = tf.ConfigProto()
sess = tf.Session(config=tf_config)
sess.run(tf.global_variables_initializer())
model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config,
                     id_to_char, logger)


@app.route('/', methods=['POST', 'GET'])
def get_text_input():

    text = request.args.get('inputStr')
예제 #21
0
    def run(self, id, updated_fields, enable_service=False):
        share = self.datastore.get_by_id('shares', id)
        if not share:
            raise TaskException(errno.ENOENT, 'Share not found')

        if share['immutable']:
            raise TaskException(errno.EACCES, 'Cannot modify immutable share {0}.'.format(id))

        if 'name' in updated_fields or 'type' in updated_fields:
            share.update(updated_fields)
            if self.datastore.exists(
                'shares',
                ('id', '!=', id),
                ('type', '=', share['type']),
                ('name', '=', share['name'])
            ):
                raise TaskException(errno.EEXIST, 'Share {0} of type {1} already exists'.format(
                    share['name'],
                    share['type']
                ))

        path_after_update = updated_fields.get('target_path', share['target_path'])
        type_after_update = updated_fields.get('target_type', share['target_type'])
        permissions = updated_fields.pop('permissions', None)
        share_path = self.dispatcher.call_sync('share.expand_path', path_after_update, type_after_update)

        if not os.path.exists(share_path):
            raise TaskException(
                errno.ENOENT,
                'Selected share target {0} does not exist'.format(path_after_update)
            )

        share = self.datastore.get_by_id('shares', id)
        remove_unchanged(updated_fields, share)

        path = self.dispatcher.call_sync('share.get_directory_path', share['id'])
        try:
            delete_config(
                path,
                '{0}-{1}'.format(share['type'], share['name'])
            )
        except (OSError, ValueError):
            pass

        if 'type' in updated_fields:
            old_share_type = share['type']
            new_share_type = self.dispatcher.call_sync('share.supported_types').get(updated_fields['type'])
            if share['target_type'] == 'DATASET':
                pool, dataset = split_dataset(share['target_path'])
                self.join_subtasks(
                    self.run_subtask('volume.dataset.update', dataset, {
                        'permissions_type': new_share_type['perm_type']
                    })
                )

            share.update(updated_fields)
            self.run_subtask_sync('share.{0}.delete'.format(old_share_type), id)
            self.run_subtask_sync('share.{0}.create'.format(updated_fields['type']), share)
        else:
            self.run_subtask_sync('share.{0}.update'.format(share['type']), id, updated_fields)

        if permissions:
            path = self.dispatcher.call_sync('share.translate_path', id)
            self.run_subtask_sync('file.set_permissions', path, permissions)

        self.dispatcher.dispatch_event('share.changed', {
            'operation': 'update',
            'ids': [share['id']]
        })

        updated_share = self.datastore.get_by_id('shares', id)
        path = self.dispatcher.call_sync('share.get_directory_path', updated_share['id'])
        try:
            save_config(
                path,
                '{0}-{1}'.format(updated_share['type'], updated_share['name']),
                updated_share
            )
        except OSError as err:
            self.add_warning(TaskWarning(errno.ENXIO, 'Cannot save backup config file: {0}'.format(str(err))))

        service_state = self.dispatcher.call_sync('service.query', [('name', '=', share['type'])], {'single': True})
        if service_state['state'] != 'RUNNING':
            if enable_service:
                config = service_state['config']
                config['enable'] = True
                self.run_subtask_sync('service.update', service_state['id'], {'config': config})
            else:
                self.add_warning(TaskWarning(
                    errno.ENXIO, "Share has been updated but the service {0} is not currently running "
                                 "Please enable the {0} service.".format(share['type'])
                ))
예제 #22
0
파일: install.py 프로젝트: Abriko/pyLAMP
def go_centos():
	raw_input("LAMP Setup script is ready install it\nPress Enter key to continue, Press Ctrl+D to cancel progress\n")

	#generate config file
	os.mkdir('/tmp/lamp')
	os.mkdir('/etc/lamp')
	os.mkdir('/root/lamp_bak')
  	os.mkdir('/etc/lamp/ftp_users')
	config = {}
	global version
	config['version'] = version
	config['system'] = 'c'
	config['wwwroot'] = '/var/www'
	config['apache_etc'] = '/etc/httpd'
	config['apache'] = 'httpd'
	config['ftproot'] = '/var/www'
	config['vsftpd_conf_path'] = '/etc/vsftpd/vsftpd.conf'
	config['root_own'] = 'apache:apache'


	#setting mysql passwotrd
	mysql_root_pass = utils.gen_random_str()
	config['mysqlrootpass'] = mysql_root_pass
	utils.save_config(config)

	logging.debug('generate mysql root password : %s', mysql_root_pass)


	logging.info('load yum repo...')
	utils.exec_cmd('yum install yum-priorities -y')
	utils.exec_cmd('wget http://s1b-static.yuki.ws/files/lamp/files.tar.xz -O /tmp/lamp/files.tar.xz')
	utils.exec_cmd('tar xvf /tmp/lamp/files.tar.xz -C /tmp/lamp')

	# Get system detail info
	machine = platform.machine()
	if machine == 'i686':
		machine = 'i386'
	ver = platform.dist()[1]
	if ver >= 6:
		ver = 6
		utils.exec_cmd('wget http://ftp.riken.jp/Linux/fedora/epel/6/%s/epel-release-6-8.noarch.rpm -O /tmp/lamp/epel-release.rpm' % (machine))
		utils.exec_cmd('wget http://pkgs.repoforge.org/rpmforge-release/rpmforge-release-0.5.3-1.el6.rf.%s.rpm -O /tmp/lamp/rpmforge-release.rpm' % (platform.machine()))
		#download vsftpd
		utils.exec_cmd('wget http://centos.alt.ru/repository/centos/6/%s/vsftpd-3.0.2-2.el6.%s.rpm -O /tmp/lamp/vsftpd.rpm' %(machine, platform.machine()))
	else:
		ver = 5
		utils.exec_cmd('wget http://ftp.riken.jp/Linux/fedora/epel/5/%s/epel-release-5-4.noarch.rpm -O /tmp/lamp/epel-release.rpm' % (machine))
		utils.exec_cmd('wget http://pkgs.repoforge.org/rpmforge-release/rpmforge-release-0.5.3-1.el5.rf.%s.rpm -O /tmp/lamp/rpmforge-release.rpm' % (machine))
		utils.exec_cmd('wget http://centos.alt.ru/repository/centos/5/%s/vsftpd-3.0.2-1.el5.%s.rpm -O /tmp/lamp/vsftpd.rpm' %(platform.machine(), platform.machine()))

	utils.exec_cmd('yum localinstall /tmp/lamp/*-release.rpm -y')

	# Change yum priority
	utils.change_conf('/etc/yum.repos.d/CentOS-Base.repo', [{'old':'gpgcheck=1','new':'priority=1\ngpgcheck=1'}])
	utils.change_conf('/etc/yum.repos.d/rpmforge.repo', [{'old':'enabled =','new':'priority = 10\nenabled ='}])
	utils.change_conf('/etc/yum.repos.d/epel.repo', [{'old':'enabled=','new':'priority=11\nenabled='}])

	logging.info('update system, please wait...')
	utils.exec_cmd('yum makecache')
	#utils.exec_cmd('yum update -y')

	logging.info('install and config packages, please wait...')
	utils.exec_cmd('yum install axel screen MySQL-python vim pam_mysql httpd mysql-server php php-mysql php-pdo php-mcrypt php-mbstring php-gd php-pecl-imagick php-pecl-memcached php-xcache -y')


	logging.info('setting up web-server...')
	utils.change_conf('/etc/httpd/conf/httpd.conf',
		[
			{'old':'ple.com:80','new':'ple.com:80\nServerName %s' % (platform.uname()[1])},
			{'old':'Timeout 60','new':'Timeout 45'},
			{'old':'MaxKeepAliveRequests 100','new':'MaxKeepAliveRequests 200'},
			{'old':'ServerTokens OS','new':'ServerTokens Prod'},
			{'old':'ServerSignature On','new':'ServerSignature Off'},
			{'old':'/var/www/html','new':'/var/www/public_html'},
			{'old':'#NameVirtualHost \*:80','new':'NameVirtualHost *:80'},
			{'old':'#</VirtualHost>','new':'#</VirtualHost>\nInclude sites-enabled/'}
		]
	)

	# Set apache php.ini
	logging.debug('Setting php.ini')
	utils.change_conf('/etc/php.ini',
		[
			{'old':'post_max_size = 8M','new':'post_max_size = 50M'},
			{'old':'upload_max_filesize = 2M','new':'upload_max_filesize = 50M'},
			{'old':'expose_php = On','new':'expose_php = Off'},
			{'old':';date.timezone =','new':'date.timezone = Asia/Chongqing'},
			{'old':'request_order = "GP"','new':'request_order = "CGP"'}
		]
	)

	# Fix mcrypt.ini error
	utils.change_conf('/etc/php.d/mcrypt.ini',
		[
			{'old':'module.so','new':'mcrypt.so'}
		]
	)

	os.mkdir('/etc/httpd/sites-available')
	os.mkdir('/etc/httpd/sites-enabled')
	utils.exec_cmd('mv /var/www/html /var/www/public_html')

	# Create a default site
	os.mkdir('/var/www/logs')
	utils.change_conf('<APPROOT>/files/vhost_template',
		[
			{'old':'ServerName <ServerName>','new':''},
			{'old':'<ServerName>','new':'default'},
			{'old':'<siteroot>','new':'/var/www'}
		],
	'/etc/httpd/sites-enabled/default')



	# Init phpmyadmin and lamp user pass
	utils.exec_cmd('service mysqld restart')

	utils.exec_cmd('mysqladmin -u root password \'%s\'' % (mysql_root_pass))
	utils.cp('<APPROOT>/files/phpmyadmin_host', '/etc/httpd/conf.d/phpmyadmin.conf')
	lamp_controluser_pass = mysql.init_db(mysql_root_pass)

	config['lampuser'] = '******'
	config['lamppass'] = lamp_controluser_pass
	utils.save_config(config)

	
	utils.exec_cmd(r'echo "<?php phpinfo() ?>" > /var/www/public_html/test.php')

	# Change wwwroot permissions
	utils.exec_cmd('chown -R apache:apache /var/www')
	utils.exec_cmd('chmod -R go-rwx /var/www')
	utils.exec_cmd('chmod -R g+rw /var/www')
	utils.exec_cmd('chmod -R o+r /var/www')

	utils.exec_cmd('service httpd restart')

	logging.info('setting up ftp-server...')


	# Init ftp and create main account
	utils.exec_cmd('yum localinstall /tmp/lamp/vsftpd.rpm -y')
	utils.exec_cmd('mkdir -p /var/run/vsftpd')

	utils.change_conf('<APPROOT>/files/vsftpd_conf', [
		{'old':'<ftpuser>','new':'ftp'},
		{'old':'<guestuser>','new':'apache'}
	], '/etc/vsftpd/vsftpd.conf')

	utils.change_conf('<APPROOT>/files/vsftpd_mysql', [{'old':'<passwd>','new':lamp_controluser_pass}], '/etc/pam.d/vsftpd-mysql')

	args = {}
	args['username'] = '******'
	args['path'] = '/var/www'
	args['site_id'] = 1

	ftp_pass = ftp.create_ftp(args)
	utils.exec_cmd('service vsftpd restart')


	logging.info('setting up system...')
	# Set iptables
	utils.cp('<APPROOT>/files/iptables_rules', '/etc/sysconfig/iptables')
	utils.exec_cmd('service iptables restart')

	#load kernel ip_nat_ftp
	utils.exec_cmd('modprobe nf_nat_ftp')
	utils.exec_cmd('echo "modprobe nf_nat_ftp" >> /etc/sysconfig/modules/nf_nat_ftp.modules')
	utils.exec_cmd('chmod +x /etc/sysconfig/modules/nf_nat_ftp.modules')

	# Add auto start at boot
	utils.exec_cmd('chkconfig httpd on')
	utils.exec_cmd('chkconfig mysqld on')
	utils.exec_cmd('chkconfig vsftpd on')

	finish_install(mysql_root_pass, ftp_pass, 'yum update -y')
예제 #23
0
def train():
    # 加载数据集
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # 选择tag形式 (IOB / IOBES)  默认使用IOBES
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    if not os.path.isfile(FLAGS.map_file):
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            # {'S-LOC': 10, 'E-LOC': 3, 'B-ORG': 4, 'S-PER': 11, 'S-ORG': 12, 'O': 0,
            # 'E-ORG': 5, 'I-LOC': 6, 'I-PER': 7, 'I-ORG': 1, 'B-PER': 8, 'B-LOC': 2, 'E-PER': 9}
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # 转化成数字化的数据
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), len(dev_data), len(test_data)))

    #长度不足补0
    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)

    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # GPU设置
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data

    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss = []
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    # 每100次算一次平均loss
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #24
0
 def events_started(engine):
     if needs_save:
         save_config(config, seed, output_dir_path)
예제 #25
0
파일: ncf.py 프로젝트: WeSIG/training
def main():

    args = parse_args()
    if args.seed is not None:
        print("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = "./run/neumf/{}".format(config['timestamp'])
    print("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    # Check where to put data loader
    if use_cuda:
        dataloader_device = 'cpu' if args.cpu_dataloader else 'cuda'
    else:
        dataloader_device = 'cpu'

    # more like load trigger timmer now
    mlperf_log.ncf_print(key=mlperf_log.PREPROC_HP_NUM_EVAL, value=args.valid_negative)
    # The default of np.random.choice is replace=True, so does pytorch random_()
    mlperf_log.ncf_print(key=mlperf_log.PREPROC_HP_SAMPLE_EVAL_REPLACEMENT, value=True)
    mlperf_log.ncf_print(key=mlperf_log.INPUT_HP_SAMPLE_TRAIN_REPLACEMENT, value=True)
    mlperf_log.ncf_print(key=mlperf_log.INPUT_STEP_EVAL_NEG_GEN)

    # sync worker before timing.

    if use_cuda:
        torch.cuda.synchronize()

    #===========================================================================
    #== The clock starts on loading the preprocessed data. =====================
    #===========================================================================
    mlperf_log.ncf_print(key=mlperf_log.RUN_START)
    run_start_time = time.time()

    print(datetime.now(), "Loading test ratings.")
    test_ratings = [torch.LongTensor()] * args.user_scaling

    for chunk in range(args.user_scaling):
        test_ratings[chunk] = torch.from_numpy(np.load(args.data + '/testx'
                + str(args.user_scaling) + 'x' + str(args.item_scaling)
                + '_' + str(chunk) + '.npz', encoding='bytes')['arr_0'])

    fn_prefix = args.data + '/' + CACHE_FN.format(args.user_scaling, args.item_scaling)
    sampler_cache = fn_prefix + "cached_sampler.pkl"
    pos_users_cache = fn_prefix + "cached_pos_users.pkl"
    pos_items_cache = fn_prefix + "cached_pos_items.pkl"
    nb_items_cache = fn_prefix + "cached_nb_items.pkl"
    print(datetime.now(), "Loading preprocessed sampler.")
    if os.path.exists(args.data):
        print("Using alias file: {}".format(args.data))
        with open(sampler_cache, "rb") as f:
            sampler = pickle.load(f)
        with open(pos_users_cache, 'rb') as f:
            pos_users = pickle.load(f)
        with open(pos_items_cache, 'rb') as f:
            pos_items = pickle.load(f)
        with open(nb_items_cache, 'rb') as f:
            nb_items = pickle.load(f)
    print(datetime.now(), "Alias table loaded.")

    nb_users = len(sampler.num_regions)
    train_users = torch.from_numpy(pos_users).type(torch.LongTensor)
    train_items = torch.from_numpy(pos_items).type(torch.LongTensor)
    del pos_users
    del pos_items
    # 因为是random negatives 后面用不上了
    del sampler

    mlperf_log.ncf_print(key=mlperf_log.INPUT_SIZE, value=len(train_users))
    # produce things not change between epoch
    # mask for filtering duplicates with real sample
    # note: test data is removed before create mask, same as reference
    # create label
    train_label = torch.ones_like(train_users, dtype=torch.float32)
    neg_label = torch.zeros_like(train_label, dtype=torch.float32)
    neg_label = neg_label.repeat(args.negative_samples)
    train_label = torch.cat((train_label,neg_label))
    del neg_label

    test_pos = [l[:,1].reshape(-1,1) for l in test_ratings]
    test_negatives = [torch.LongTensor()] * args.user_scaling
    test_neg_items = [torch.LongTensor()] * args.user_scaling

    print(datetime.now(), "Loading test negatives.")
    for chunk in range(args.user_scaling):
        file_name = (args.data + '/test_negx' + str(args.user_scaling) + 'x'
                + str(args.item_scaling) + '_' + str(chunk) + '.npz')
        raw_data = np.load(file_name, encoding='bytes')
        test_negatives[chunk] = torch.from_numpy(raw_data['arr_0'])
        print(datetime.now(), "Test negative chunk {} of {} loaded ({} users).".format(
              chunk+1, args.user_scaling, test_negatives[chunk].size()))
        test_neg_items[chunk] = test_negatives[chunk][:, 1]
        test_negatives[chunk] = None
    #test_neg_items = [l[:, 1] for l in test_negatives]

    # create items with real sample at last position
    test_items = [torch.cat((a.reshape(-1,args.valid_negative), b), dim=1)
            for a, b in zip(test_neg_items, test_pos)]
    del test_ratings, test_neg_items

    # generate dup mask and real indice for exact same behavior on duplication compare to reference
    # here we need a sort that is stable(keep order of duplicates)
    # this is a version works on integer
    sorted_items, indices = zip(*[torch.sort(l) for l in test_items]) # [1,1,1,2], [3,1,0,2]
    sum_item_indices = [a.float()+b.float()/len(b[0])
            for a, b in zip(sorted_items, indices)] #[1.75,1.25,1.0,2.5]
    indices_order = [torch.sort(l)[1] for l in sum_item_indices] #[2,1,0,3]
    stable_indices = [torch.gather(a, 1, b)
            for a, b in zip(indices, indices_order)] #[0,1,3,2]
    # produce -1 mask

    dup_mask = [(l[:,0:-1] == l[:,1:]) for l in sorted_items]
    # by Linbo Qiao: unit8 --> bool 
    #dup_mask = [torch.cat((torch.zeros_like(a, dtype=torch.uint8), b),dim=1)
    #        for a, b in zip(test_pos, dup_mask)]
    dup_mask = [torch.cat((torch.zeros_like(a, dtype=torch.bool), b),dim=1)

            for a, b in zip(test_pos, dup_mask)]
    dup_mask = [torch.gather(a,1,b.sort()[1])
            for a, b in zip(dup_mask, stable_indices)]
    # produce real sample indices to later check in topk
    # by Linbo Qiao: bool --> tenor.unit8
    #sorted_items, indices = zip(*[(a != b).sort()
    #        for a, b in zip(test_items, test_pos)])
    sorted_items, indices = zip(*[torch.tensor((a != b),dtype=torch.uint8).sort()
            for a, b in zip(test_items, test_pos)])
    sum_item_indices = [(a.float()) + (b.float())/len(b[0])
            for a, b in zip(sorted_items, indices)]
    indices_order = [torch.sort(l)[1] for l in sum_item_indices]
    stable_indices = [torch.gather(a, 1, b)
            for a, b in zip(indices, indices_order)]
    real_indices = [l[:, 0] for l in stable_indices]
    del sorted_items, indices, sum_item_indices, indices_order, stable_indices, test_pos

    # For our dataset, test set is identical to user set, so arange() provides
    # all test users.
    test_users = torch.arange(nb_users, dtype=torch.long)
    test_users = test_users[:, None]
    test_users = test_users + torch.zeros(1+args.valid_negative, dtype=torch.long)
    # test_items needs to be of type Long in order to be used in embedding
    test_items = torch.cat(test_items).type(torch.long)

    dup_mask = torch.cat(dup_mask)
    real_indices = torch.cat(real_indices)

    # make pytorch memory behavior more consistent later
    if use_cuda:
        torch.cuda.empty_cache()

    mlperf_log.ncf_print(key=mlperf_log.INPUT_BATCH_SIZE, value=args.batch_size)
    mlperf_log.ncf_print(key=mlperf_log.INPUT_ORDER)  # we shuffled later with randperm

    print(datetime.now(),
        "Data loading done {:.1f} sec. #user={}, #item={}, #train={}, #test={}".format(
          time.time()-run_start_time, nb_users, nb_items, len(train_users), nb_users))

    # Create model
    model = NeuMF(nb_users, nb_items,
                  mf_dim=args.factors, mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers])
    print(model)
    print("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    # Add optimizer and loss to graph
    params = model.parameters()

    optimizer = torch.optim.Adam(params, lr=args.learning_rate, betas=(args.beta1, args.beta2), eps=args.eps)
    criterion = nn.BCEWithLogitsLoss(reduction = 'none') # use torch.mean() with dim later to avoid copy to host
    mlperf_log.ncf_print(key=mlperf_log.OPT_LR, value=args.learning_rate)
    mlperf_log.ncf_print(key=mlperf_log.OPT_NAME, value="Adam")
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_BETA1, value=args.beta1)
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_BETA2, value=args.beta2)
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_EPSILON, value=args.eps)
    mlperf_log.ncf_print(key=mlperf_log.MODEL_HP_LOSS_FN, value=mlperf_log.BCE)

    if use_cuda:
        # Move model and loss to GPU
        model = model.cuda()
        criterion = criterion.cuda()

    local_batch = args.batch_size
    traced_criterion = torch.jit.trace(criterion.forward, (torch.rand(local_batch,1),torch.rand(local_batch,1)))

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    # Calculate initial Hit Ratio and NDCG
    samples_per_user = test_items.size(1)
    users_per_valid_batch = args.valid_batch_size // samples_per_user

    test_users = test_users.split(users_per_valid_batch)
    test_items = test_items.split(users_per_valid_batch)
    dup_mask = dup_mask.split(users_per_valid_batch)
    real_indices = real_indices.split(users_per_valid_batch)

    hr, ndcg = val_epoch(model, test_users, test_items, dup_mask, real_indices, args.topk, samples_per_user=samples_per_user,
                         num_user=nb_users, use_cuda=use_cuda)
    print('Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}'
          .format(K=args.topk, hit_rate=hr, ndcg=ndcg))
    success = False

    # Training
    mlperf_log.ncf_print(key=mlperf_log.TRAIN_LOOP)
    for epoch in range(args.epochs):

        mlperf_log.ncf_print(key=mlperf_log.TRAIN_EPOCH, value=epoch)
        mlperf_log.ncf_print(key=mlperf_log.INPUT_HP_NUM_NEG, value=args.negative_samples)
        mlperf_log.ncf_print(key=mlperf_log.INPUT_STEP_TRAIN_NEG_GEN)
        begin = time.time()

        st = timeit.default_timer()
        # random_negatives 是Ture的,把下面的注释掉
        if args.random_negatives:
            neg_users = train_users.repeat(args.negative_samples)
            # by Linbo Qiao: to fix TypeError: random_() received an invalid combination of arguments - got (int, numpy.int64)
            #neg_items = torch.empty_like(neg_users, dtype=torch.int64).random_(0, nb_items)
            neg_items = torch.empty_like(neg_users, dtype=torch.int64).random_(0, torch.tensor(nb_items))
        else:
            negatives = generate_negatives(
                sampler,
                args.negative_samples,
                train_users.numpy())
            negatives = torch.from_numpy(negatives)
            neg_users = negatives[:, 0]
            neg_items = negatives[:, 1]


        print("generate_negatives loop time: {:.2f}", timeit.default_timer() - st)

        after_neg_gen = time.time()

        st = timeit.default_timer()
        epoch_users = torch.cat((train_users,neg_users))
        epoch_items = torch.cat((train_items,neg_items))
        del neg_users, neg_items

        # shuffle prepared data and split into batches
        epoch_indices = torch.randperm(len(epoch_users), device=dataloader_device)
        epoch_size = len(epoch_indices)
        epoch_users = epoch_users[epoch_indices]
        epoch_items = epoch_items[epoch_indices]
        epoch_label = train_label[epoch_indices]
        epoch_users_list = epoch_users.split(local_batch)
        epoch_items_list = epoch_items.split(local_batch)
        epoch_label_list = epoch_label.split(local_batch)

        print("shuffle time: {:.2f}", timeit.default_timer() - st)

        # only print progress bar on rank 0
        num_batches = (epoch_size + args.batch_size - 1) // args.batch_size
        qbar = tqdm.tqdm(range(num_batches))
        # handle extremely rare case where last batch size < number of worker
        if len(epoch_users_list) < num_batches:
            print("epoch_size % batch_size < number of worker!")
            exit(1)

        after_shuffle = time.time()

        neg_gen_time = (after_neg_gen - begin)
        shuffle_time = (after_shuffle - after_neg_gen)

        for i in qbar:
            # selecting input from prepared data

            if use_cuda:
                user = epoch_users_list[i].cuda()
                item = epoch_items_list[i].cuda()
                label = epoch_label_list[i].view(-1,1).cuda()
            else:
                user = epoch_users_list[i]
                item = epoch_items_list[i]
                label = epoch_label_list[i].view(-1,1)


            for p in model.parameters():
                p.grad = None

            outputs = model(user, item)
            loss = traced_criterion(outputs, label).float()
            loss = torch.mean(loss.view(-1), 0)

            loss.backward()
            optimizer.step()

        del epoch_users, epoch_items, epoch_label, epoch_users_list, epoch_items_list, epoch_label_list, user, item, label
        train_time = time.time() - begin
        begin = time.time()

        mlperf_log.ncf_print(key=mlperf_log.EVAL_START, value=epoch)

        hr, ndcg = val_epoch(model, test_users, test_items, dup_mask, real_indices, args.topk, samples_per_user=samples_per_user,
                             num_user=nb_users, output=valid_results_file, epoch=epoch, loss=loss.data.item(), use_cuda=use_cuda)

        val_time = time.time() - begin
        print('Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f},'
                ' train_time = {train_time:.2f}, val_time = {val_time:.2f}, loss = {loss:.4f},'
                ' neg_gen: {neg_gen_time:.4f}, shuffle_time: {shuffle_time:.2f}'
              .format(epoch=epoch, K=args.topk, hit_rate=hr,
                      ndcg=ndcg, train_time=train_time,
                      val_time=val_time, loss=loss.data.item(),
                      neg_gen_time=neg_gen_time, shuffle_time=shuffle_time))

        mlperf_log.ncf_print(key=mlperf_log.EVAL_ACCURACY, value={"epoch": epoch, "value": hr})
        mlperf_log.ncf_print(key=mlperf_log.EVAL_TARGET, value=args.threshold)
        mlperf_log.ncf_print(key=mlperf_log.EVAL_STOP, value=epoch)

        if args.threshold is not None:
            if hr >= args.threshold:
                print("Hit threshold of {}".format(args.threshold))
                success = True
                break

    mlperf_log.ncf_print(key=mlperf_log.RUN_STOP, value={"success": success})
    run_stop_time = time.time()
    mlperf_log.ncf_print(key=mlperf_log.RUN_FINAL)

    # easy way of tracking mlperf score
    if success:
        print("mlperf_score", run_stop_time - run_start_time)
def evaluate_testDataSet():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), len(dev_data), len(test_data)))

    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        for i in range(100):
            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #27
0
파일: main.py 프로젝트: chenjie04/LSPM
def main():
    # Note: The run start is in data_preprocess.py

    args = parse_args()
    if args.seed is not None:
        print("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)


    # Save configuration to file
    dataset = os.path.basename(os.path.normpath(args.data))
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = "./run/LSPM/{}/{}".format(os.path.basename(os.path.normpath(args.data)),config['timestamp'])
    print("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    if use_cuda:
        print("Using cuda ...")
    else:
        print("Using CPU ...")

    t1 = time.time()
    # Load Data
    print('Loading data')
    print(os.path.join(args.data, TRAIN_RATINGS_FILENAME))
    train_dataset = CFTrainDataset(
        os.path.join(args.data, TRAIN_RATINGS_FILENAME), args.negative_samples)

    mlperf_log.ncf_print(key=mlperf_log.INPUT_BATCH_SIZE, value=args.batch_size)
    mlperf_log.ncf_print(key=mlperf_log.INPUT_ORDER)  # set shuffle=True in DataLoader
    train_dataloader = torch.utils.data.DataLoader(
            dataset=train_dataset, batch_size=args.batch_size, shuffle=True,
            num_workers=args.workers, pin_memory=True)
    test_ratings = load_test_ratings(os.path.join(args.data, TEST_RATINGS_FILENAME))  # noqa: E501
    test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME))
    nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items
    print('Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d'
          % (time.time()-t1, nb_users, nb_items, train_dataset.mat.nnz,
             len(test_ratings)))

    # Create model
    model = Long_and_Short_term_Preference_Model(nb_users=nb_users, nb_items=nb_items,
                      embed_dim=64,mlp_layer_sizes=args.layers,mlp_layer_regs=[0. for i in args.layers])
    print(model)
    print("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    # Add optimizer and loss to graph
    mlperf_log.ncf_print(key=mlperf_log.OPT_LR, value=args.learning_rate)
    beta1, beta2, epsilon = 0.9, 0.999, 1e-8
    mlperf_log.ncf_print(key=mlperf_log.OPT_NAME, value="Adam")
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_BETA1, value=beta1)
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_BETA2, value=beta2)
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_EPSILON, value=epsilon)
    optimizer = torch.optim.Adam(model.parameters(), betas=(beta1, beta2),
                                 lr=args.learning_rate, eps=epsilon)

    mlperf_log.ncf_print(key=mlperf_log.MODEL_HP_LOSS_FN, value=mlperf_log.BCE)
    criterion = nn.BCEWithLogitsLoss()


    if use_cuda:
        # Move model and loss to GPU
        model = model.cuda()
        criterion = criterion.cuda()

    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(run_dir + model._get_name() + '.pd')
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch']
        best_hit = checkpoint['hit']
        best_ndcg = checkpoint['ndcg']

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    # Calculate initial Hit Ratio and NDCG

    hits, ndcgs = val_epoch(model, test_ratings, test_negs, args.topk,
                            use_cuda=use_cuda, processes=args.processes)
    print('Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}'
          .format(K=args.topk, hit_rate=np.mean(hits), ndcg=np.mean(ndcgs)))

    best_hit, best_ndcg = np.mean(hits), np.mean(ndcgs)
    mlperf_log.ncf_print(key=mlperf_log.TRAIN_LOOP)
    for epoch in range(args.epochs):
        mlperf_log.ncf_print(key=mlperf_log.TRAIN_EPOCH, value=epoch)
        model.train()
        losses = utils.AverageMeter()

        mlperf_log.ncf_print(key=mlperf_log.INPUT_HP_NUM_NEG, value=train_dataset.nb_neg)
        mlperf_log.ncf_print(key=mlperf_log.INPUT_STEP_TRAIN_NEG_GEN)
        begin = time.time()
        loader = tqdm.tqdm(train_dataloader)
        for batch_index, (user, item, history, label) in enumerate(loader):
            user = torch.autograd.Variable(user, requires_grad=False)
            item = torch.autograd.Variable(item, requires_grad=False)
            history = torch.autograd.Variable(history, requires_grad=False)
            label = torch.autograd.Variable(label, requires_grad=False)
            if use_cuda:
                user = user.cuda(async=True)
                item = item.cuda(async=True)
                history = history.cuda(async=True)
                label = label.cuda(async=True)

            outputs, _ = model(user, item,history)
            loss = criterion(outputs, label)
            losses.update(loss.data.item(), user.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Save stats to file
            description = ('Epoch {} Loss {loss.val:.4f} ({loss.avg:.4f})'
                           .format(epoch, loss=losses))
            loader.set_description(description)

        train_time = time.time() - begin
        begin = time.time()
        hits, ndcgs = val_epoch(model, test_ratings, test_negs, args.topk,
                                use_cuda=use_cuda, output=valid_results_file,
                                epoch=epoch, processes=args.processes)
        mlperf_log.ncf_print(key=mlperf_log.EVAL_ACCURACY, value={"epoch": epoch, "value": float(np.mean(hits))})
        mlperf_log.ncf_print(key=mlperf_log.EVAL_STOP)
        val_time = time.time() - begin
        print('Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f},'
              ' train_time = {train_time:.2f}, val_time = {val_time:.2f}'
              .format(epoch=epoch, K=args.topk, hit_rate=np.mean(hits),
                      ndcg=np.mean(ndcgs), train_time=train_time,
                      val_time=val_time))
        if np.mean(hits) >= best_hit:
            best_hit = np.mean(hits)
        if np.mean(ndcgs) >= best_ndcg:
            best_ndcg = np.mean(ndcgs)

    print("Best hit: ",best_hit)
    print("Best_ndcg: ", best_ndcg)

    mlperf_log.ncf_print(key=mlperf_log.RUN_STOP)
    mlperf_log.ncf_print(key=mlperf_log.RUN_FINAL)
예제 #28
0
def train():
    # load data sets
    # sentences 的格式如下  ['在', 'O'], ['厦', 'B-LOC'], ['门', 'I-LOC']
    # train_sentences = loader.load_sentences(FLAGS.train_file, FLAGS.lower, FLAGS.zeros)
    # dev_sentences = loader.load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    # test_sentences = loader.load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    train_sentences = loader.load_folder_sentences(FLAGS.train_file,
                                                   FLAGS.lower, FLAGS.zeros)
    dev_sentences = loader.load_folder_sentences(FLAGS.dev_file, FLAGS.lower,
                                                 FLAGS.zeros)
    test_sentences = loader.load_folder_sentences(FLAGS.test_file, FLAGS.lower,
                                                  FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    # update_tag_scheme 后sentence没有太大的变化
    loader.update_tag_scheme(train_sentences, FLAGS.tag_schema)
    loader.update_tag_scheme(test_sentences, FLAGS.tag_schema)

    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    # create maps if not exist
    # 是否存在maps.pkl文件,如果不存在就需要读取训练数据,
    # 获得char_to_id  tag_to_id

    # create maps if not exist
    # 是否存在maps.pkl文件,
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        if FLAGS.pre_emb:
            dico_chars_train = loader.char_mapping(train_sentences,
                                                   FLAGS.lower)[0]
            dico_chars, char_to_id, id_to_char = loader.augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = loader.char_mapping(
                train_sentences, FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = loader.tag_mapping(train_sentences)

        print('tag_to_id: ', tag_to_id)

        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # print('tag_to_id: ', tag_to_id)

    print('tag_to_id: ', tag_to_id)
    # prepare data, get a collection of list containing index
    train_data = loader.prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                        FLAGS.lower)
    dev_data = loader.prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                                      FLAGS.lower)
    test_data = loader.prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                       FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), 0, len(test_data)))

    train_manager = data_utils.BatchManager(train_data, FLAGS.batch_size)
    dev_manager = data_utils.BatchManager(dev_data, 100)
    test_manager = data_utils.BatchManager(test_data, 100)

    # make path for store log and model if not exist
    utils.make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = utils.load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        utils.save_config(config, FLAGS.config_file)
    utils.make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)  # ./log/train.log
    logger = utils.get_logger(log_path)
    utils.print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = utils.create_model(sess, Model, FLAGS.ckpt_path,
                                   data_utils.load_word2vec, config,
                                   id_to_char, logger)
        logger.info("start training")
        loss = []

        for i in range(FLAGS.iterations):
            # for i in range(10):
            logger.info('epoch: {}'.format(i))
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                utils.save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #29
0
    def run(self, share, dataset_properties=None, enable_service=False):
        if share['target_type'] == 'ZVOL':
            parent_ds = '/'.join(share['target_path'].split('/')[:-1])
            shareable = bool(self.dispatcher.call_sync('volume.dataset.query', [('name', '=', parent_ds)]))
        else:
            share_path = self.dispatcher.call_sync('share.expand_path', share['target_path'], share['target_type'])
            if share['target_type'] != 'FILE':
                share_path = os.path.dirname(share_path)
            shareable = os.path.exists(share_path)

        if not shareable:
            raise TaskException(errno.ENOENT, 'Selected share target {0} does not exist or cannot be created'.format(
                share['target_path']
            ))

        root = self.dispatcher.call_sync('volume.get_volumes_root')
        share_type = self.dispatcher.call_sync('share.supported_types').get(share['type'])

        assert share_type['subtype'] in ('FILE', 'BLOCK'),\
            "Unsupported Share subtype: {0}".format(share_type['subtype'])

        if self.datastore.exists(
            'shares',
            ('type', '=', share['type']),
            ('name', '=', share['name'])
        ):
            raise TaskException(errno.EEXIST, 'Share {0} of type {1} already exists'.format(
                share['name'],
                share['type']
            ))

        normalize(share, {
            'enabled': True,
            'immutable': False,
            'description': ''
        })

        if share['target_type'] in ('DATASET', 'ZVOL'):
            dataset = share['target_path']
            pool = share['target_path'].split('/')[0]
            path = os.path.join(root, dataset)

            if not self.dispatcher.call_sync('zfs.dataset.query', [('name', '=', dataset)], {'single': True}):
                if share_type['subtype'] == 'FILE':
                    self.run_subtask_sync('volume.dataset.create', {
                        'volume': pool,
                        'id': dataset,
                        'permissions_type': share_type['perm_type'],
                        'properties': dataset_properties or {}
                    })

                if share_type['subtype'] == 'BLOCK':
                    self.run_subtask_sync('volume.dataset.create', {
                        'volume': pool,
                        'id': dataset,
                        'type': 'VOLUME',
                        'volsize': share['properties']['size'],
                        'properties': dataset_properties or {}
                    })
            else:
                if share_type['subtype'] == 'FILE':
                    self.run_subtask('volume.dataset.update', dataset, {
                        'permissions_type': share_type['perm_type']
                    })

        elif share['target_type'] == 'DIRECTORY':
            # Verify that target directory exists
            path = share['target_path']
            if not os.path.isdir(path):
                raise TaskException(errno.ENOENT, "Target directory {0} doesn't exist".format(path))

        elif share['target_type'] == 'FILE':
            # Verify that target file exists
            path = share['target_path']
            if not os.path.isfile(path):
                raise TaskException(errno.ENOENT, "Target file {0} doesn't exist".format(path))

        else:
            raise AssertionError('Invalid target type')

        if share.get('permissions') and share['target_type'] not in ('ZVOL', 'FILE'):
            self.run_subtask_sync('file.set_permissions', path, share.pop('permissions'))

        id = self.run_subtask_sync('share.{0}.create'.format(share['type']), share)
        self.dispatcher.dispatch_event('share.changed', {
            'operation': 'create',
            'ids': [id]
        })

        new_share = self.datastore.get_by_id('shares', id)
        path = self.dispatcher.call_sync('share.get_directory_path', new_share['id'])
        try:
            save_config(
                path,
                '{0}-{1}'.format(new_share['type'], new_share['name']),
                new_share
            )
        except OSError as err:
            self.add_warning(TaskWarning(errno.ENXIO, 'Cannot save backup config file: {0}'.format(str(err))))

        service_state = self.dispatcher.call_sync('service.query', [('name', '=', share['type'])], {'single': True})
        if service_state['state'] != 'RUNNING':
            if enable_service:
                config = service_state['config']
                config['enable'] = True
                self.run_subtask_sync('service.update', service_state['id'], {'config': config})
            else:
                self.add_warning(TaskWarning(
                    errno.ENXIO, "Share has been created but the service {0} is not currently running "
                                 "Please enable the {0} service.".format(share['type'])
                ))

        return id
예제 #30
0
파일: main.py 프로젝트: yyht/Chinese-NER
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), 0, len(test_data)))

    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss = []
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #31
0
lstm_units = config['lstm_units']
dense_units = config['dense_units']

lr = config['lr']
optimizer = config['optimizer']
loss = config['loss']
if config['grouped']:
    n_outputs = 3
else:
    n_outputs = 1

save_feature = False if args.debug else True
use_cache = False if args.no_cache else True

# dump config
utils.save_config(config, os.path.join(result_dir, 'config.json'))

dataset = VsbSignalDataset(mode='train', debug=args.debug)
normalizer = Normalizer(min_num, max_num)

X, y = feature_extracter.feature_extracter(features,
                                           dataset,
                                           window_size=window_size,
                                           stride=stride,
                                           grouped=grouped,
                                           normalizer=normalizer,
                                           use_cache=use_cache,
                                           save_result=save_feature)

print(X.shape, y.shape)
np.save(os.path.join(result_dir, "X.npy"), X)
예제 #32
0
        # convert to tag
        tags = []
        for i in range(len(predict)):
            tag_predict = []
            for j in range(len(sent_token[i])):
                tag_predict.append(idx2tag[predict[i][j]])
            tags.append(tag_predict)
        return sent_token, tags


if __name__ == '__main__':
    print("Đọc dữ liệu...")
    sentences = utils.load_data(PATH_TRAIN)
    all_words, all_tags, word2idx, idx2word, tag2idx, idx2tag = utils.parse(
        sentences)
    X_train, Y_train = utils.sentence_to_number(sentences, MAX_LENGTH,
                                                word2idx, tag2idx)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train,
                                                      Y_train,
                                                      test_size=0.3,
                                                      random_state=0)
    utils.save_config(all_words, all_tags, word2idx, idx2word, tag2idx,
                      idx2tag, PATH_ALLWORDS, PATH_ALLTAGS, PATH_WORD2IDX,
                      PATH_IDX2WORD, PATH_TAG2IDX, PATH_IDX2TAG)
    print("Lưu tham số thành công! Tiến hành training...")
    VOCAB_SIZE = len(word2idx.items())
    NUM_CLASSES = len(tag2idx.items())
    model = Model(VOCAB_SIZE, NUM_CLASSES)
    model.build_model()
    model.fit(X_train, Y_train, X_val, Y_val, save=True)
예제 #33
0
파일: data.py 프로젝트: AkiraTreize/trackma
 def _save_userconfig(self):
     self.msg.debug(self.name, "Saving userconfig...")
     utils.save_config(self.userconfig, self.userconfig_file)
예제 #34
0
def main(args):
    hiddens = args.hiddens
    if args.restore:
        config = load_config(os.path.join(args.restore, 'config.json'))
        hiddens = config['hiddens']

    # create autoencoder
    ae = get_network(hiddens, logger=g_logger)

    # build graph
    sess, saver, init_op = build_graph(ae, [None, 784])

    if args.restore:
        restore(sess, saver, args.restore)
    else:
        g_logger.info('Initialize the model')
        sess.run(init_op)

    train_result = os.path.join(args.result, 'train')
    # make result directory if not exists
    if not os.path.exists(train_result):
        os.makedirs(train_result)

    # save configuraion
    save_dict = args.__dict__
    save_dict['hiddens'] = hiddens
    save_config(save_dict, os.path.join(args.result, 'config.json'))

    # use mnist for test
    mnist = tf.contrib.learn.datasets.load_dataset('mnist')

    figure = plt.figure(figsize=(8, 8))

    scatter_data = {}
    last_epoch = 0
    try:
        # Learn number of epoch times
        nodes = [ae.train, ae.loss, ae.z, ae.x_]
        for i in range(1, args.epoch + 1):
            losses = 0
            cnt = 0
            # get data with batch size
            for x, y in next_mnist_data(mnist, 'train'):
                _, loss, z, x_ = sess.run(nodes, feed_dict={ae.x: x})

                # make scatter data with latent variables(z)
                make_scatter_data(scatter_data, z, y)

                losses += loss
                cnt += 1

            last_epoch = i

            g_logger.info('epoch: {}, loss: {}'.format(i, losses / cnt))
            scatter(scatter_data, train_result, i)
            figure.clear()
            scatter_data.clear()

        # save checkpoint
        saver.save(sess, args.result + '/checkpoint', global_step=args.epoch)
    except KeyboardInterrupt:
        saver.save(sess, args.result + '/checkpoint', global_step=last_epoch)
예제 #35
0
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    # 检测并维护数据集的 tag 标记
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)
    update_tag_scheme(dev_sentences, FLAGS.tag_schema)

    # create maps if not exist
    # 根据数据集创建 char_to_id, id_to_char, tag_to_id, id_to_tag 字典,并储存为 pkl 文件
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            # 利用预训练嵌入集增强(扩充)字符字典,然后返回字符与位置映射关系
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags
        # 获取标记与位置映射关系
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)

        #with open('maps.txt','w',encoding='utf8') as f1:
        #f1.writelines(str(char_to_id)+" "+id_to_char+" "+str(tag_to_id)+" "+id_to_tag+'\n')
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # 提取句子特征
    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), len(dev_data), len(test_data)))

    # 获取可供模型训练的单个批次数据
    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)

    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    # 训练集全量跑一次需要迭代的次数
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:

        # 此处模型创建为项目最核心代码
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss = []
        with tf.device("/gpu:0"):
            for i in range(100):
                for batch in train_manager.iter_batch(shuffle=True):
                    step, batch_loss = model.run_step(sess, True, batch)
                    loss.append(batch_loss)
                    if step % FLAGS.steps_check == 0:
                        iteration = step // steps_per_epoch + 1
                        logger.info("iteration:{} step:{}/{}, "
                                    "NER loss:{:>9.6f}".format(
                                        iteration, step % steps_per_epoch,
                                        steps_per_epoch, np.mean(loss)))
                        loss = []

            # best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
                if i % 7 == 0:
                    save_model(sess, model, FLAGS.ckpt_path, logger)
예제 #36
0
if config.is_train:
    data_path = config.data_path
    batch_size = config.batch_size
    do_shuffle = True
else:
    setattr(config, 'batch_size', 64)
    if config.test_data_path is None:
        data_path = config.data_path
    else:
        data_path = config.test_data_path
    batch_size = config.sample_per_image
    do_shuffle = False
# trainer = Trainer(config) #, data_loader)
if config.is_train:
    save_config(config)
else:
    if not config.load_path:
        raise Exception("[!] You should specify `load_path` to load a pretrained model")
    trainer.test()

##############
# 3 init
##############
tf.reset_default_graph()

#data_loader = data_loader
dataset = config.dataset
beta1 = config.beta1
beta2 = config.beta2
optimizer = config.optimizer
예제 #37
0
def main():
    args = parse_args()
    if args.seed is not None:
        print("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = "./run/neumf/{}".format(config['timestamp'])
    print("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    t1 = time.time()
    # Load Data
    print('Loading data')
    train_dataset = CFTrainDataset(
        os.path.join(args.data, TRAIN_RATINGS_FILENAME), args.negative_samples)
    train_dataloader = torch.utils.data.DataLoader(
            dataset=train_dataset, batch_size=args.batch_size, shuffle=True,
            num_workers=8, pin_memory=True)
    test_ratings = load_test_ratings(os.path.join(args.data, TEST_RATINGS_FILENAME))  # noqa: E501
    test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME))
    nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items
    print('Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d'
          % (time.time()-t1, nb_users, nb_items, train_dataset.mat.nnz,
             len(test_ratings)))

    # Create model
    model = NeuMF(nb_users, nb_items,
                  mf_dim=args.factors, mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers])
    print(model)
    print("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    # Add optimizer and loss to graph
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    criterion = nn.BCEWithLogitsLoss()

    if use_cuda:
        # Move model and loss to GPU
        model = model.cuda()
        criterion = criterion.cuda()

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    # Calculate initial Hit Ratio and NDCG
    hits, ndcgs = val_epoch(model, test_ratings, test_negs, args.topk,
                            use_cuda=use_cuda, processes=args.processes)
    print('Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}'
          .format(K=args.topk, hit_rate=np.mean(hits), ndcg=np.mean(ndcgs)))
    for epoch in range(args.epochs):
        model.train()
        losses = utils.AverageMeter()

        begin = time.time()
        loader = tqdm.tqdm(train_dataloader)
        for batch_index, (user, item, label) in enumerate(loader):
            user = torch.autograd.Variable(user, requires_grad=False)
            item = torch.autograd.Variable(item, requires_grad=False)
            label = torch.autograd.Variable(label, requires_grad=False)
            if use_cuda:
                user = user.cuda(async=True)
                item = item.cuda(async=True)
                label = label.cuda(async=True)

            outputs = model(user, item)
            loss = criterion(outputs, label)
            losses.update(loss.data.item(), user.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Save stats to file
            description = ('Epoch {} Loss {loss.val:.4f} ({loss.avg:.4f})'
                           .format(epoch, loss=losses))
            loader.set_description(description)

        train_time = time.time() - begin
        begin = time.time()
        hits, ndcgs = val_epoch(model, test_ratings, test_negs, args.topk,
                                use_cuda=use_cuda, output=valid_results_file,
                                epoch=epoch, processes=args.processes)
        val_time = time.time() - begin
        print('Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f},'
              ' train_time = {train_time:.2f}, val_time = {val_time:.2f}'
              .format(epoch=epoch, K=args.topk, hit_rate=np.mean(hits),
                      ndcg=np.mean(ndcgs), train_time=train_time,
                      val_time=val_time))
        if args.threshold is not None:
            if np.mean(hits) >= args.threshold:
                print("Hit threshold of {}".format(args.threshold))
                return 0
예제 #38
0
 def save_config(self):
     """Writes configuration files"""
     
     # Save config file
     utils.save_config(self.config, self.configfile)
     utils.save_config(self.userconfig, self.userconfigfile)
예제 #39
0
                select_laps_to_render(matched_videos, args.lap_comparison,
                                      args.render_sessions)

    if args.manual_offset or args.force_manual_offset:
        for video in matched_videos:
            has_renderable_laps = False
            for lap in video.matched_laps:
                if lap.get('render'):
                    has_renderable_laps = True
                    break
            if has_renderable_laps and (offsets.get(video.filenames[0]) is None
                                        or args.force_manual_offset):
                offset = video.calibrate_offset()
                offsets[str(video.filenames[0])] = offset
                cfg.offsets = offsets
                save_config(cfg)

    def upload(lapvideo, params, renderer, args):
        print "Uploading %s to youtube..." % lapvideo
        md = generate_metadata(lapvideo, params, renderer, args)

        video_id = youtube.upload_video(lapvideo, md)
        print "Upload Complete!  Visit at https://www.youtube.com/watch?v=%s" % video_id

    if args.lap_comparison:
        dual_vids = [v for v in matched_videos if v.renderable_laps()]
        if len(dual_vids) == 1:
            dual_vids.append(None)

        from renderers.dual import DualRenderer
        from renderers.likeharrys import LikeHarrysRenderer
예제 #40
0
        print("重みをロードしました")
        vae.load_weights(save_weights_fname)

    vae.summary()
    save_dict = {
        "n_samples": n_samples,
        "maxlen": maxlen,
        "words_num": len(words_set),
        "intermediate_dim": intermediate_dim,
        "latent_dim": latent_dim,
        "w2v_dim": len(embedding_matrix[1]),
        "is_reversed": str(is_reversed),
        "mecab_lv": mecab_lv,
        "use_conjugated": str(use_conjugated)
    }
    save_config(path=save_config_fname, save_dict=save_dict)
    with open(save_w2i_fname, "wb") as fo:
        pickle.dump([word_to_id, is_reversed], fo)

    sampling_obj = Inference(gen, maxlen, latent_dim, word_to_id, is_reversed)
    es_cb = EarlyStopping(patience=30, verbose=1)
    print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
    model_checkpoint = ModelCheckpoint(filepath=save_callback_weights_fname,
                                       save_weights_only=True,
                                       period=save_weight_period)

    print("Training model...")
    vae.compile(optimizer="adam", loss=vae_model.vae_loss)
    fit = vae.fit([X_enc, X_dec],
                  Y,
                  epochs=epochs,
예제 #41
0
 def save_configuration(self):
     self.config['debug'] = False
     if (self.debug):
         self.config['debug'] = True
     save_config(self.config, CONFIG_FILENAME)
     return ("Configuration saved")
예제 #42
0
def main():

    args = parse_args()
    args.distributed, args.world_size = init_distributed(args.local_rank)
    if args.seed is not None:
        print("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = "./run/neumf/{}.{}".format(config['timestamp'],args.local_rank)
    print("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    # more like load trigger timmer now
    mlperf_log.ncf_print(key=mlperf_log.PREPROC_HP_NUM_EVAL, value=args.valid_negative)
    # The default of np.random.choice is replace=True, so does pytorch random_()
    mlperf_log.ncf_print(key=mlperf_log.PREPROC_HP_SAMPLE_EVAL_REPLACEMENT, value=True)
    mlperf_log.ncf_print(key=mlperf_log.INPUT_HP_SAMPLE_TRAIN_REPLACEMENT)
    mlperf_log.ncf_print(key=mlperf_log.INPUT_STEP_EVAL_NEG_GEN)

    # sync worker before timing.
    if args.distributed:
        torch.distributed.broadcast(torch.tensor([1], device="cuda"), 0)
    torch.cuda.synchronize()

    #===========================================================================
    #== The clock starts on loading the preprocessed data. =====================
    #===========================================================================
    mlperf_log.ncf_print(key=mlperf_log.RUN_START)
    run_start_time = time.time()

    # load not converted data, just seperate one for test
    train_ratings = torch.load(args.data+'/train_ratings.pt', map_location=torch.device('cuda:{}'.format(args.local_rank)))
    test_ratings = torch.load(args.data+'/test_ratings.pt', map_location=torch.device('cuda:{}'.format(args.local_rank)))

    # get input data
    # get dims
    nb_maxs = torch.max(train_ratings, 0)[0]
    nb_users = nb_maxs[0].item()+1
    nb_items = nb_maxs[1].item()+1
    train_users = train_ratings[:,0]
    train_items = train_ratings[:,1]
    del nb_maxs, train_ratings
    mlperf_log.ncf_print(key=mlperf_log.INPUT_SIZE, value=len(train_users))
    # produce things not change between epoch
    # mask for filtering duplicates with real sample
    # note: test data is removed before create mask, same as reference
    mat = torch.cuda.ByteTensor(nb_users, nb_items).fill_(1)
    mat[train_users, train_items] = 0
    # create label
    train_label = torch.ones_like(train_users, dtype=torch.float32)
    neg_label = torch.zeros_like(train_label, dtype=torch.float32)
    neg_label = neg_label.repeat(args.negative_samples)
    train_label = torch.cat((train_label,neg_label))
    del neg_label
    if args.fp16:
        train_label = train_label.half()

    # produce validation negative sample on GPU
    all_test_users = test_ratings.shape[0]

    test_users = test_ratings[:,0]
    test_pos = test_ratings[:,1].reshape(-1,1)
    test_negs = generate_neg(test_users, mat, nb_items, args.valid_negative, True)[1]

    # create items with real sample at last position
    test_users = test_users.reshape(-1,1).repeat(1,1+args.valid_negative)
    test_items = torch.cat((test_negs.reshape(-1,args.valid_negative), test_pos), dim=1)
    del test_ratings, test_negs

    # generate dup mask and real indice for exact same behavior on duplication compare to reference
    # here we need a sort that is stable(keep order of duplicates)
    # this is a version works on integer
    sorted_items, indices = torch.sort(test_items) # [1,1,1,2], [3,1,0,2]
    sum_item_indices = sorted_items.float()+indices.float()/len(indices[0]) #[1.75,1.25,1.0,2.5]
    indices_order = torch.sort(sum_item_indices)[1] #[2,1,0,3]
    stable_indices = torch.gather(indices, 1, indices_order) #[0,1,3,2]
    # produce -1 mask
    dup_mask = (sorted_items[:,0:-1] == sorted_items[:,1:])
    dup_mask = torch.cat((torch.zeros_like(test_pos, dtype=torch.uint8), dup_mask),dim=1)
    dup_mask = torch.gather(dup_mask,1,stable_indices.sort()[1])
    # produce real sample indices to later check in topk
    sorted_items, indices = (test_items != test_pos).sort()
    sum_item_indices = sorted_items.float()+indices.float()/len(indices[0])
    indices_order = torch.sort(sum_item_indices)[1]
    stable_indices = torch.gather(indices, 1, indices_order)
    real_indices = stable_indices[:,0]
    del sorted_items, indices, sum_item_indices, indices_order, stable_indices, test_pos

    if args.distributed:
        test_users = torch.chunk(test_users, args.world_size)[args.local_rank]
        test_items = torch.chunk(test_items, args.world_size)[args.local_rank]
        dup_mask = torch.chunk(dup_mask, args.world_size)[args.local_rank]
        real_indices = torch.chunk(real_indices, args.world_size)[args.local_rank]

    # make pytorch memory behavior more consistent later
    torch.cuda.empty_cache()

    mlperf_log.ncf_print(key=mlperf_log.INPUT_BATCH_SIZE, value=args.batch_size)
    mlperf_log.ncf_print(key=mlperf_log.INPUT_ORDER)  # we shuffled later with randperm

    print('Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d'
          % (time.time()-run_start_time, nb_users, nb_items, len(train_users),
             nb_users))

    # Create model
    model = NeuMF(nb_users, nb_items,
                  mf_dim=args.factors, mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers])

    if args.fp16:
        model = model.half()

    print(model)
    print("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    # Add optimizer and loss to graph
    if args.fp16:
        fp_optimizer = Fp16Optimizer(model, args.loss_scale)
        params = fp_optimizer.fp32_params
    else:
        params = model.parameters()

    #optimizer = torch.optim.Adam(params, lr=args.learning_rate, betas=(args.beta1, args.beta2), eps=args.eps)
    # optimizer = AdamOpt(params, lr=args.learning_rate, betas=(args.beta1, args.beta2), eps=args.eps)
    optimizer = FusedAdam(params, lr=args.learning_rate, betas=(args.beta1, args.beta2), eps=args.eps, eps_inside_sqrt=False)
    criterion = nn.BCEWithLogitsLoss(reduction = 'none') # use torch.mean() with dim later to avoid copy to host
    mlperf_log.ncf_print(key=mlperf_log.OPT_LR, value=args.learning_rate)
    mlperf_log.ncf_print(key=mlperf_log.OPT_NAME, value="Adam")
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_BETA1, value=args.beta1)
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_BETA2, value=args.beta2)
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_EPSILON, value=args.eps)
    mlperf_log.ncf_print(key=mlperf_log.MODEL_HP_LOSS_FN, value=mlperf_log.BCE)

    if use_cuda:
        # Move model and loss to GPU
        model = model.cuda()
        criterion = criterion.cuda()

    if args.distributed:
        model = DDP(model)
        local_batch = args.batch_size // int(os.environ['WORLD_SIZE'])
    else:
        local_batch = args.batch_size
    traced_criterion = torch.jit.trace(criterion.forward, (torch.rand(local_batch,1),torch.rand(local_batch,1)))

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')
    # Calculate initial Hit Ratio and NDCG
    test_x = test_users.view(-1).split(args.valid_batch_size)
    test_y = test_items.view(-1).split(args.valid_batch_size)

    hr, ndcg = val_epoch(model, test_x, test_y, dup_mask, real_indices, args.topk, samples_per_user=test_items.size(1),
                         num_user=all_test_users, distributed=args.distributed)
    print('Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}'
          .format(K=args.topk, hit_rate=hr, ndcg=ndcg))
    success = False
    mlperf_log.ncf_print(key=mlperf_log.TRAIN_LOOP)
    for epoch in range(args.epochs):

        mlperf_log.ncf_print(key=mlperf_log.TRAIN_EPOCH, value=epoch)
        mlperf_log.ncf_print(key=mlperf_log.INPUT_HP_NUM_NEG, value=args.negative_samples)
        mlperf_log.ncf_print(key=mlperf_log.INPUT_STEP_TRAIN_NEG_GEN)

        begin = time.time()

        # prepare data for epoch
        neg_users, neg_items = generate_neg(train_users, mat, nb_items, args.negative_samples)
        epoch_users = torch.cat((train_users,neg_users))
        epoch_items = torch.cat((train_items,neg_items))
        del neg_users, neg_items

        # shuffle prepared data and split into batches
        epoch_indices = torch.randperm(len(epoch_users), device='cuda:{}'.format(args.local_rank))
        epoch_users = epoch_users[epoch_indices]
        epoch_items = epoch_items[epoch_indices]
        epoch_label = train_label[epoch_indices]
        if args.distributed:
            epoch_users = torch.chunk(epoch_users, args.world_size)[args.local_rank]
            epoch_items = torch.chunk(epoch_items, args.world_size)[args.local_rank]
            epoch_label = torch.chunk(epoch_label, args.world_size)[args.local_rank]
        epoch_users_list = epoch_users.split(local_batch)
        epoch_items_list = epoch_items.split(local_batch)
        epoch_label_list = epoch_label.split(local_batch)

        # only print progress bar on rank 0
        num_batches = (len(epoch_indices) + args.batch_size - 1) // args.batch_size
        if args.local_rank == 0:
            qbar = tqdm.tqdm(range(num_batches))
        else:
            qbar = range(num_batches)
        # handle extremely rare case where last batch size < number of worker
        if len(epoch_users_list) < num_batches:
            print("epoch_size % batch_size < number of worker!")
            exit(1)

        for i in qbar:
            # selecting input from prepared data
            user = epoch_users_list[i]
            item = epoch_items_list[i]
            label = epoch_label_list[i].view(-1,1)

            for p in model.parameters():
                p.grad = None

            outputs = model(user, item)
            loss = traced_criterion(outputs, label).float()
            loss = torch.mean(loss.view(-1), 0)

            if args.fp16:
                fp_optimizer.step(loss, optimizer)
            else:
                loss.backward()
                optimizer.step()

        del epoch_users, epoch_items, epoch_label, epoch_users_list, epoch_items_list, epoch_label_list, user, item, label
        train_time = time.time() - begin
        begin = time.time()

        mlperf_log.ncf_print(key=mlperf_log.EVAL_START)

        hr, ndcg = val_epoch(model, test_x, test_y, dup_mask, real_indices, args.topk, samples_per_user=test_items.size(1),
                             num_user=all_test_users, output=valid_results_file, epoch=epoch, distributed=args.distributed)

        val_time = time.time() - begin
        print('Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f},'
              ' train_time = {train_time:.2f}, val_time = {val_time:.2f}'
              .format(epoch=epoch, K=args.topk, hit_rate=hr,
                      ndcg=ndcg, train_time=train_time,
                      val_time=val_time))

        mlperf_log.ncf_print(key=mlperf_log.EVAL_ACCURACY, value={"epoch": epoch, "value": hr})
        mlperf_log.ncf_print(key=mlperf_log.EVAL_TARGET, value=args.threshold)
        mlperf_log.ncf_print(key=mlperf_log.EVAL_STOP)

        if args.threshold is not None:
            if hr >= args.threshold:
                print("Hit threshold of {}".format(args.threshold))
                success = True
                break

    mlperf_log.ncf_print(key=mlperf_log.RUN_STOP, value={"success": success})
    run_stop_time = time.time()
    mlperf_log.ncf_print(key=mlperf_log.RUN_FINAL)

    # easy way of tracking mlperf score
    if success:
        print("mlperf_score", run_stop_time - run_start_time)
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower, FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    #update_tag_scheme(train_sentences, FLAGS.tag_schema)
    #update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):
        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(
        train_sentences, FLAGS.max_seq_len, tag_to_id, FLAGS.lower
    )
    dev_data = prepare_dataset(
        dev_sentences, FLAGS.max_seq_len, tag_to_id, FLAGS.lower
    )
    test_data = prepare_dataset(
        test_sentences, FLAGS.max_seq_len, tag_to_id, FLAGS.lower
    )
    print("%i / %i / %i sentences in train / dev / test." % (
        len(train_data), 0, len(test_data)))
    train_len=len(train_data)
    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, FLAGS.batch_size)
    test_manager = BatchManager(test_data, FLAGS.batch_size)
    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, config, logger)

        logger.info("start training")
        loss = []
        for i in range(FLAGS.max_epoch):
            from tqdm import tqdm
            for batch in tqdm(train_manager.iter_batch(shuffle=True)):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                        iteration, step%steps_per_epoch, steps_per_epoch, np.mean(loss)))
                    loss = []
            print("save result epoch:",i," ***************************************************")
            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger,i)
            if i>=8:
                save_model(sess, model, FLAGS.ckpt_path, logger, global_steps=step)
                evaluate(sess, model, "test", test_manager, id_to_tag, logger,i)
예제 #44
0
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower, FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(),
                FLAGS.emb_file,
                list(itertools.chain.from_iterable(
                    [[w[0] for w in s] for s in test_sentences])
                )
            )
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences, FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(
        train_sentences, char_to_id, tag_to_id, FLAGS.lower
    )
    dev_data = prepare_dataset(
        dev_sentences, char_to_id, tag_to_id, FLAGS.lower
    )
    test_data = prepare_dataset(
        test_sentences, char_to_id, tag_to_id, FLAGS.lower
    )
    print("%i / %i / %i sentences in train / dev / test." % (
        len(train_data), 0, len(test_data)))

    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        logger.info("start training")
        loss = []

        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                #print batch
                step, batch_loss = model.run_step(sess, True, batch)
                #print step
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                        iteration, step%steps_per_epoch, steps_per_epoch, np.mean(loss)))
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #45
0
def train():
    train_sentences = load_sentences(FLAGS.train_file)
    dev_sentences = load_sentences(FLAGS.dev_file)
    test_sentences = load_sentences(FLAGS.test_file)

    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)
    update_tag_scheme(dev_sentences, FLAGS.tag_schema)

    if not os.path.isfile(FLAGS.map_file):
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]

            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, 'wb') as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, 'rb') as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id)
    train_manager = BatchManager(train_data, FLAGS.batch_size, FLAGS.num_steps)

    dev_manager = BatchManager(dev_data, 100, FLAGS.num_steps)
    test_manager = BatchManager(test_data, 100, FLAGS.num_steps)

    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)
    os.environ["CUDA_VISIBLE_DEVICES"] = "3"
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    tf_config = tf.ConfigProto(gpu_options=gpu_options)
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss = []
        for i in range(75):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{},".format(
                        iteration, step % steps_per_epoch, steps_per_epoch))
                    loss = []
            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #46
0
파일: main.py 프로젝트: SiZ-oLab/GPTNeo
def main(args):
    # Setup logging
    logger = setup_logging(args)

    # Read params of model
    params = fetch_model_params(args.model)

    # Fetch appropriate input functions
    input_fn = generic_text
    pred_input_fn = pred_input
    handle_pred_output_fn = handle_pred_output

    if params["mlm_training"]:
        mlm_sample_text_fn = partial(mlm_sample_text, params)
        input_fn = partial(generic_text, sample_text_fn=mlm_sample_text_fn)

    # Fetch encoder per params
    encoder = fetch_encoder(params)

    pred_input_fn = partial(pred_input_fn,
                            path_to_prompt=args.prompt,
                            logger=logger,
                            enc=encoder)

    # Sample from Dataset if check dataset flag is on
    if args.check_dataset:
        check_dataset(input_fn, params)

    # Confirm deletion of checkpoint files if --new flag is set
    if args.new:
        if yes_or_no(
                f"Are you sure you want to remove '{params['model_path']}' to start afresh?"
        ):
            remove_gs_or_filepath(params["model_path"])
        else:
            exit()

    # Save config to logdir for experiment management
    save_config(params, params["model_path"])

    # Add to params: auto_layout, auto_layout_and_mesh_shape, use_tpu, num_cores
    mesh_shape = mtf.convert_to_shape(params["mesh_shape"])
    params["num_cores"] = mesh_shape.size
    params["auto_layout"] = args.auto_layout
    params["auto_layout_and_mesh_shape"] = args.auto_layout_and_mesh_shape
    params["use_tpu"] = True if not args.tpu is None else False
    params["gpu_ids"] = args.gpu_ids
    params["steps_per_checkpoint"] = args.steps_per_checkpoint
    # Expand attention types param
    params["attention_types"] = expand_attention_types_params(
        params["attention_types"])
    assert len(params["attention_types"]) == params[
        "n_layer"]  # Assert that the length of expanded list = num layers
    params["predict_batch_size"] = params.get("predict_batch_size",
                                              1)  # Default to 1
    params["predict"] = args.predict
    params['model'] = params.get(
        "model", "GPT"
    )  # Default model selection to GPT since it's the only option for now

    # Sample quality of MoE models suffers when using the faster sampling method, so default to slow_sampling if
    # moe layers are present
    params[
        "slow_sampling"] = True if params["moe_layers"] is not None else False

    logger.info(f"params = {params}")

    # Get eval tasks from params
    eval_tasks = params.get("eval_tasks", [])
    has_predict_or_eval_steps_or_eval_tasks = params[
        "predict_steps"] > 0 or params["eval_steps"] > 0 or len(eval_tasks) > 0

    for t in eval_tasks:
        assert t in task_descriptors, f"Eval task '{t}' is not known"
        task_descriptors[t]["init_fn"](params)

    # Set up TPUs and Estimator
    if args.tpu == "colab":
        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
        ) if params["use_tpu"] else None
    else:
        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            args.tpu) if params["use_tpu"] else None

    config = tpu_config.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=params["model_path"],
        save_checkpoints_steps=None,  # Disable the default saver
        save_checkpoints_secs=None,  # Disable the default saver
        log_step_count_steps=params["iterations"],
        save_summary_steps=params["iterations"],
        tpu_config=tpu_config.TPUConfig(
            num_shards=mesh_shape.size,
            iterations_per_loop=params["iterations"],
            num_cores_per_replica=1,
            per_host_input_for_training=tpu_config.InputPipelineConfig.
            BROADCAST))

    estimator = tpu_estimator.TPUEstimator(
        use_tpu=params["use_tpu"],
        model_fn=model_fn,
        config=config,
        train_batch_size=params["train_batch_size"],
        eval_batch_size=params["train_batch_size"],
        predict_batch_size=params["predict_batch_size"],
        params=params)

    def _make_task_estimator(task):
        task_params = params.copy()
        task_params["eval_task"] = task
        return tpu_estimator.TPUEstimator(
            use_tpu=params["use_tpu"],
            model_fn=model_fn,
            config=config,
            train_batch_size=params["train_batch_size"],
            eval_batch_size=params["eval_batch_size"],
            predict_batch_size=params["predict_batch_size"],
            params=task_params)

    eval_task_estimators = {
        task: _make_task_estimator(task)
        for task in eval_tasks
    }

    current_step = int(
        estimator_lib._load_global_step_from_checkpoint_dir(
            params["model_path"]))
    logger.info(f"Current step {current_step}")

    if args.predict:
        # Predict
        predictions = estimator.predict(input_fn=pred_input_fn)
        logger.info("Predictions generated")
        enc = fetch_encoder(params)
        handle_pred_output_fn(
            predictions,
            logger,
            enc,
            params,
            out_name=f"predictions_{args.sacred_id}_{current_step}")
        return

    if args.eval:
        for task in eval_tasks:
            logger.info(f"Starting evaluation task '{task}'")
            task_info = task_descriptors[task]["get_task_info_fn"](params)
            task_estimator = eval_task_estimators[task]
            task_input_fn = task_descriptors[task]["input_fn"]
            eval_results = task_estimator.evaluate(input_fn=task_input_fn,
                                                   steps=task_info["n_steps"],
                                                   name=task)
            logger.info(f"Eval task '{task}' results: {eval_results}")
        return

    elif has_predict_or_eval_steps_or_eval_tasks:
        # Eval and train - stop and predict and/or eval every checkpoint
        while current_step < params["train_steps"]:
            next_checkpoint = min(current_step + args.steps_per_checkpoint,
                                  params["train_steps"])

            estimator.train(input_fn=partial(input_fn, eval=False),
                            max_steps=next_checkpoint)
            current_step = next_checkpoint

            if params["predict_steps"] > 0:
                logger.info("Running prediction...")
                predictions = estimator.predict(input_fn=pred_input_fn)
                enc = fetch_encoder(params)
                handle_pred_output_fn(
                    predictions,
                    logger,
                    enc,
                    params,
                    out_name=f"predictions_{args.sacred_id}_{current_step}")

            if params["eval_steps"] > 0:
                logger.info("Running evaluation...")
                eval_results = estimator.evaluate(input_fn=partial(input_fn,
                                                                   eval=True),
                                                  steps=params["eval_steps"])
                logger.info(f"Eval results: {eval_results}")

            for task in eval_tasks:
                logger.info(f"Starting evaluation task '{task}'")
                task_info = task_descriptors[task]["get_task_info_fn"](params)
                task_estimator = eval_task_estimators[task]
                task_input_fn = task_descriptors[task]["input_fn"]
                eval_results = task_estimator.evaluate(
                    input_fn=task_input_fn,
                    steps=task_info["n_steps"],
                    name=task)
                logger.info(f"Eval task '{task}' results: {eval_results}")
        return
    else:
        # Else, just train
        while current_step < params["train_steps"]:
            # Else, don't stop and restart
            estimator.train(input_fn=partial(input_fn, eval=False),
                            max_steps=params["train_steps"])
예제 #47
0
def main(config):
    # NOTE: Run this in shell first.
    if tf.__version__[:3] != '1.1':
        sys.exit(
            '***NOTE!***: FIRST RUN:\n"source ~/began/BEGAN-tensorflow/tf1.1/bin/activate"'
        )
    # NOTE: Other setup requirements.
    print(
        '\nREQUIREMENTS:\n  1. The file "target_num_user_weights.npy" should '
        'contain the user-provided labels for the desired target number.\n  '
        '2. Use the flag --mnist_class to set the class of MNIST digit that'
        'matches that of the user-provided labels.\n  3. In trainer_iwgn.py, '
        '"num_user_labeled" must correspond to the number of labels in '
        '"target_num_user_weights.npy".')
    print('Press "c" to continue.')
    pdb.set_trace()

    prepare_dirs_and_logger(config)

    rng = np.random.RandomState(config.random_seed)
    tf.set_random_seed(config.random_seed)

    if config.is_train:
        data_path = config.data_path
        batch_size = config.batch_size
        do_shuffle = True
    else:
        setattr(config, 'batch_size', 64)
        if config.test_data_path is None:
            data_path = config.data_path
        else:
            data_path = config.test_data_path
        batch_size = config.sample_per_image
        do_shuffle = False

    #dir_loader = 'train8020'
    #dir_loader = 'train6040'
    #dir_loader = 'train4060'
    dir_loader = 'train2080'
    #dir_loader = 'train1090'
    #dir_loader = 'train0510'
    #dir_loader = 'trainBig0510'
    #dir_loader = 'train_all_1090'
    #dir_loader = 'train_small_5050'
    config.pct = [int(dir_loader[-4:][:2]), int(dir_loader[-4:][2:])]

    dir_target = 'train5050'
    data_loader = get_loader(data_path,
                             config.batch_size,
                             config.scale_size,
                             config.data_format,
                             config.split,
                             target=dir_loader)
    data_loader_target = get_loader(data_path,
                                    config.batch_size,
                                    config.scale_size,
                                    config.data_format,
                                    config.split,
                                    target=dir_target)
    trainer = Trainer(config, data_loader, data_loader_target)

    if config.is_train:
        save_config(config)
        trainer.train()
    else:
        if not config.load_path:
            raise Exception(
                "[!] You should specify `load_path` to load a pretrained model"
            )
        trainer.test()
예제 #48
0
def train():
    # load data sets:返回的是语料集的[['字','标'],...]元组
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # 由loader.py负责处理数据

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):

        # create dictionary for word
        if FLAGS.pre_emb:  # 判断是否用之前训练好的词向量
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            # dico_chars_train应该只接收了dico <注意后面的[0]> ,即训练数据的不重复统计的字集

            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), FLAGS.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences]))
                # chain.from_iterable(iterables): 一个备用链构造函数,其中的iterables是一个迭代变量,生成迭代序列
                # 所以这里的list生成的就是test_sentences里的字集
            )
            # 这里dico_chars是在train_set字典基础上添加wiki_100中包含的test_set里的字构成的字典
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
            # 通过pickle模块的序列化操作我们能够将程序中运行的对象信息保存到文件中去,永久存储。
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 FLAGS.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               FLAGS.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                FLAGS.lower)
    # xxx_data 以句子为单位存储[字符,字符id,标签id/chars长度的全是“0”对应标签id的list <train = True/False>,标签]
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), 0, len(test_data)))

    train_manager = BatchManager(train_data,
                                 FLAGS.batch_size)  # 默认的batch_size为20
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    # 定义了3个BatchManager类:这个类中包含batch_data和len_data
    # batch_data 是按句子长短顺序排序后一个batch大小的data列表数据,而且每个batch中的数据都padding到统一长短
    # len_data   是所分batch的数量

    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto(
    )  # tf.ConfigProto一般用在创建session的时候。用来对session进行参数配置
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss = []
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #49
0
파일: install.py 프로젝트: Abriko/pyLAMP
def go_debian():
	raw_input("LAMP Setup script is ready install it\nPress Enter key to continue, Press Ctrl+D to cancel the installation\n")

	#generate config file
	os.mkdir('/tmp/lamp')
	os.mkdir('/etc/lamp')
	os.mkdir('/root/lamp_bak')
  	os.mkdir('/etc/lamp/ftp_users')
	config = {}
	global version
	config['version'] = version
	config['system'] = 'u'
	config['wwwroot'] = '/var/www'
	config['apache_etc'] = '/etc/apache2'
	config['apache'] = 'apache2'
	config['ftproot'] = '/var/www'
	config['vsftpd_conf_path'] = '/etc/vsftpd.conf'
	config['root_own'] = 'www-data:www-data'

	'''
	logging.info('download files ...')
	utils.exec_cmd('wget http://s1b-static.yuki.ws/files/lamp/files.tar.xz -O /tmp/lamp/files.tar.xz')
	utils.exec_cmd('tar xvf /tmp/lamp/files.tar.xz -C /tmp/lamp')
	'''

	#setting mysql passwotrd
	mysql_root_pass = utils.gen_random_str()
	config['mysqlrootpass'] = mysql_root_pass
	utils.save_config(config)

	logging.debug('generate mysql root password : %s', mysql_root_pass)
	debconf_tmp = open('/tmp/lamp/debconf.tmp', 'w+')
	debconf_tmp.write('mysql-server mysql-server/root_password password %s\nmysql-server mysql-server/root_password_again password %s\n' % (mysql_root_pass, mysql_root_pass))
	debconf_tmp.write('iptables-persistent iptables-persistent/autosave_v4 boolean true\niptables-persistent iptables-persistent/autosave_v6 boolean true\n')
	debconf_tmp.close()
	utils.exec_cmd('debconf-set-selections /tmp/lamp/debconf.tmp')
	os.remove('/tmp/lamp/debconf.tmp')
	del debconf_tmp

	logging.info('update data packages info...')
	utils.exec_cmd('apt-get update')

	logging.info('update system, please wait...')
	#utils.exec_cmd('apt-get upgrade -y')

	logging.info('install and config packages, please wait...')
	utils.exec_cmd('apt-get install -y vim axel curl unzip build-essential python-mysqldb python-software-properties php5 apache2 libapache2-mod-php5 mysql-server php5-mysql php5-curl php5-gd php5-mcrypt php5-imagick php5-memcached php5-sqlite php5-xcache iptables-persistent libpam-mysql')


	logging.info('setting up web-server...')

	logging.debug('setting apache conf')
	httpd_conf = open('/etc/apache2/httpd.conf', 'w')
	httpd_conf.write('ServerName %s\n' % (platform.uname()[1]))
	httpd_conf.close()
	del httpd_conf

	utils.change_conf('/etc/apache2/apache2.conf',
		[
			{'old':'Timeout 300','new':'Timeout 45'},
			{'old':'MaxKeepAliveRequests 100','new':'MaxKeepAliveRequests 200'}
		]
	)

	utils.change_conf('/etc/apache2/conf.d/security',
		[
			{'old':'ServerTokens OS','new':'ServerTokens Prod'},
			{'old':'ServerSignature On','new':'ServerSignature Off'}
		]
	)

	# Enable mod-rewrite
	utils.exec_cmd('a2enmod rewrite')

	# Set apache php.ini
	logging.debug('Setting php.ini')
	utils.change_conf('/etc/php5/apache2/php.ini',
		[
			{'old':'post_max_size = 8M','new':'post_max_size = 50M'},
			{'old':'upload_max_filesize = 2M','new':'upload_max_filesize = 50M'},
			{'old':'expose_php = On','new':'expose_php = Off'},
			{'old':'display_errors = Off','new':'display_errors = On'},
			{'old':';date.timezone =','new':'date.timezone = Asia/Chongqing'},
			{'old':'request_order = "GP"','new':'request_order = "CGP"'}
		]
	)


	# Change default www dir
	utils.exec_cmd('mkdir -p /var/www/public_html')
	utils.exec_cmd('mv /var/www/index.html /var/www/public_html/')
	utils.change_conf('/etc/apache2/sites-enabled/000-default', [{'old':'/var/www','new':'/var/www/public_html'}])

	# Init phpmyadmin and lamp user pass
	utils.exec_cmd('service mysql restart')
	lamp_controluser_pass = mysql.init_db(mysql_root_pass)

	utils.cp('<APPROOT>/files/phpmyadmin_host', '/etc/apache2/mods-available/phpmyadmin.conf')
	utils.exec_cmd('ln -s /etc/apache2/mods-available/phpmyadmin.conf /etc/apache2/mods-enabled/phpmyadmin.conf')

	config['lampuser'] = '******'
	config['lamppass'] = lamp_controluser_pass
	utils.save_config(config)

	# create test php script
	utils.exec_cmd(r'echo "<?php phpinfo() ?>" > /var/www/public_html/test.php')

	# Change wwwroot permissions
	utils.exec_cmd('chown -R www-data:www-data /var/www')
	utils.exec_cmd('chmod -R go-rwx /var/www')
	utils.exec_cmd('chmod -R g+rw /var/www')
	utils.exec_cmd('chmod -R o+r /var/www')

	utils.exec_cmd('service apache2 restart')

	logging.info('setting up ftp-server...')

	# Init ftp and create main account
	#ftp_pass = ftp.init_ftp()
	if platform.machine() == 'x86_64':
		utils.exec_cmd('axel -q -n 3 -o /tmp/lamp/vsftpd.deb http://ftp.jaist.ac.jp/pub/Linux/ubuntu/pool/main/v/vsftpd/vsftpd_3.0.2-1ubuntu1_amd64.deb')
		#add fix 12.04 pam.d-mysql bugs "libgcc_s.so.1 must be installed for pthread_cancel to work"
		utils.exec_cmd('DEBIAN_FRONTEND=noninteractive apt-get install -qq libpam-ldap')
	else:
		utils.exec_cmd('axel -q -n 3 -o /tmp/lamp/vsftpd.deb http://ftp.jaist.ac.jp/pub/Linux/ubuntu/pool/main/v/vsftpd/vsftpd_3.0.2-1ubuntu1_i386.deb')

	returncode = utils.exec_cmd('dpkg -i /tmp/lamp/vsftpd.deb')
	if returncode == 2:
		logging.debug('install vsftpd failed!')

	utils.change_conf('<APPROOT>/files/vsftpd_conf', [
		{'old':'<ftpuser>','new':'ftp'},
		{'old':'<guestuser>','new':'www-data'}
	], '/etc/vsftpd.conf')


	#add fix 500 OOPS: priv_sock_get_cmd
	if platform.machine() == 'x86_64':
		utils.change_conf('/etc/vsftpd.conf', [{'old':'ftp_users','new':'ftp_users\nseccomp_sandbox=NO'}])

	utils.change_conf('<APPROOT>/files/vsftpd_mysql', [{'old':'<passwd>','new':lamp_controluser_pass}], '/etc/pam.d/vsftpd-mysql')

	#set master ftp account
	args = {}
	args['username'] = '******'
	args['path'] = '/var/www'
	args['site_id'] = 1

	ftp_pass = ftp.create_ftp(args)
	utils.exec_cmd('service vsftpd restart')


	logging.info('setting up system...')
	# Set iptables
	utils.cp('<APPROOT>/files/iptables_rules', '/etc/iptables/rules.v4')
	utils.exec_cmd('service iptables-persistent restart')

	#load kernel ip_nat_ftp
  	utils.exec_cmd('modprobe nf_nat_ftp')
  	utils.exec_cmd('echo "nf_nat_ftp" >> /etc/modules')


  	# Add auto start at boot
	utils.exec_cmd('update-rc.d apache2 defaults')
	utils.exec_cmd('update-rc.d mysql defaults')
	utils.exec_cmd('update-rc.d vsftpd defaults')

	finish_install(mysql_root_pass, ftp_pass, 'apt-get upgrade -y')
예제 #50
0
    "user-read-playback-state user-modify-playback-state user-read-currently-playing",
    "spotify_redirect_uri_playlist":
    "https://spoofy.baka.tokyo/callback_playlist/",
    "spotify_scopes_playlist":
    "playlist-modify-public playlist-read-collaborative",
    "spotify_connect_name": "Spoofy Bot",
    "http_host": "127.0.0.1",
    "http_port": 5000
}

if __name__ == '__main__':
    try:
        config = utils.load_config()
    except FileNotFoundError:
        config = DEFAULT_CONFIG.copy()
        utils.save_config(config)

    # If discord bot token is not present, error out and quit as we cannot do anything useful.
    if 'bot_token' not in config.keys() or config['bot_token'] in [
            "", DEFAULT_CONFIG['bot_token']
    ]:
        config['bot_token'] = DEFAULT_CONFIG['bot_token']
        utils.save_config(config)
        print(
            "Please configure the bot_token by modifying the 'config.json' file!",
            flush=True)
        sys.exit(1)
    bot_token = config['bot_token']

    # If the encryption key is missing, generate it and save it to the config.
    if 'encryption_key_passphrase' not in config.keys(
예제 #51
0
파일: engine.py 프로젝트: Logmytech/trackma
    def save_config(self):
        """Writes all configuration files to disk."""

        # Save config file
        utils.save_config(self.config, self.configfile)
예제 #52
0
def main():
    global msglogger

    script_dir = os.path.dirname(__file__)

    args = parse_args()

    # Distiller loggers
    msglogger = apputils.config_pylogger('logging.conf',
                                         args.name,
                                         output_dir=args.output_dir)
    tflogger = TensorBoardLogger(msglogger.logdir)
    # tflogger.log_gradients = True
    # pylogger = PythonLogger(msglogger)

    if args.seed is not None:
        msglogger.info("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)

    args.qe_mode = str(args.qe_mode).split('.')[1]
    args.qe_clip_acts = str(args.qe_clip_acts).split('.')[1]

    apputils.log_execution_env_state(sys.argv)

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error(
                'ERROR: Argument --gpus must be a comma-separated list of integers only'
            )
            exit(1)
        if len(args.gpus) > 1:
            msglogger.error('ERROR: Only single GPU supported for NCF')
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error(
                    'ERROR: GPU device ID {0} requested, but only {1} devices available'
                    .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = msglogger.logdir
    msglogger.info("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    t1 = time.time()
    # Load Data
    training = not (args.eval or args.qe_calibration
                    or args.activation_histograms)
    msglogger.info('Loading data')
    if training:
        train_dataset = CFTrainDataset(
            os.path.join(args.data, TRAIN_RATINGS_FILENAME),
            args.negative_samples)
        train_dataloader = torch.utils.data.DataLoader(
            dataset=train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.workers,
            pin_memory=True)
        nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items
    else:
        train_dataset = None
        train_dataloader = None
        nb_users, nb_items = (138493, 26744)

    test_ratings = load_test_ratings(
        os.path.join(args.data, TEST_RATINGS_FILENAME))  # noqa: E501
    test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME))

    msglogger.info(
        'Load data done [%.1f s]. #user=%d, #item=%d, #train=%s, #test=%d' %
        (time.time() - t1, nb_users, nb_items,
         str(train_dataset.mat.nnz) if training else 'N/A', len(test_ratings)))

    # Create model
    model = NeuMF(nb_users,
                  nb_items,
                  mf_dim=args.factors,
                  mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers],
                  split_final=args.split_final)
    if use_cuda:
        model = model.cuda()
    msglogger.info(model)
    msglogger.info("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    compression_scheduler = None
    start_epoch = 0
    optimizer = None
    if args.load:
        if training:
            model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
                model, args.load)
            if args.reset_optimizer:
                start_epoch = 0
                optimizer = None
        else:
            model = apputils.load_lean_checkpoint(model, args.load)

    # Add loss to graph
    criterion = nn.BCEWithLogitsLoss()

    if use_cuda:
        criterion = criterion.cuda()

    if training and optimizer is None:
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.compress:
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress)
        model.cuda()

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    if args.qe_calibration or args.activation_histograms:
        calib = {
            'portion':
            args.qe_calibration,
            'desc_str':
            'quantization calibration stats',
            'collect_func':
            partial(distiller.data_loggers.collect_quant_stats,
                    inplace_runtime_check=True,
                    disable_inplace_attrs=True)
        }
        hists = {
            'portion':
            args.activation_histograms,
            'desc_str':
            'activation histograms',
            'collect_func':
            partial(distiller.data_loggers.collect_histograms,
                    activation_stats=None,
                    nbins=2048,
                    save_hist_imgs=True)
        }
        d = calib if args.qe_calibration else hists

        distiller.utils.assign_layer_fq_names(model)
        num_users = int(np.floor(len(test_ratings) * d['portion']))
        msglogger.info(
            "Generating {} based on {:.1%} of the test-set ({} users)".format(
                d['desc_str'], d['portion'], num_users))

        test_fn = partial(val_epoch,
                          ratings=test_ratings,
                          negs=test_negs,
                          K=args.topk,
                          use_cuda=use_cuda,
                          processes=args.processes,
                          num_users=num_users)
        d['collect_func'](model=model,
                          test_fn=test_fn,
                          save_dir=run_dir,
                          classes=None)

        return 0

    if args.eval:
        if args.quantize_eval and args.qe_calibration is None:
            model.cpu()
            quantizer = quantization.PostTrainLinearQuantizer.from_args(
                model, args)
            dummy_input = (torch.tensor([1]), torch.tensor([1]),
                           torch.tensor([True], dtype=torch.bool))
            quantizer.prepare_model(dummy_input)
            model.cuda()

        distiller.utils.assign_layer_fq_names(model)

        if args.eval_fp16:
            model = model.half()

        # Calculate initial Hit Ratio and NDCG
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                processes=args.processes)
        val_time = time.time() - begin
        hit_rate = np.mean(hits)
        msglogger.info(
            'Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, val_time = {val_time:.2f}'
            .format(K=args.topk,
                    hit_rate=hit_rate,
                    ndcg=np.mean(ndcgs),
                    val_time=val_time))
        hit_rate = 0

        if args.quantize_eval:
            checkpoint_name = 'quantized'
            apputils.save_checkpoint(0,
                                     'NCF',
                                     model,
                                     optimizer=None,
                                     extras={'quantized_hr@10': hit_rate},
                                     name='_'.join([args.name, 'quantized'])
                                     if args.name else checkpoint_name,
                                     dir=msglogger.logdir)
        return 0

    total_samples = len(train_dataloader.sampler)
    steps_per_epoch = math.ceil(total_samples / args.batch_size)
    best_hit_rate = 0
    best_epoch = 0
    for epoch in range(start_epoch, args.epochs):
        msglogger.info('')
        model.train()
        losses = utils.AverageMeter()

        begin = time.time()

        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch, optimizer)

        loader = tqdm.tqdm(train_dataloader)
        for batch_index, (user, item, label) in enumerate(loader):
            user = torch.autograd.Variable(user, requires_grad=False)
            item = torch.autograd.Variable(item, requires_grad=False)
            label = torch.autograd.Variable(label, requires_grad=False)
            if use_cuda:
                user = user.cuda(async=True)
                item = item.cuda(async=True)
                label = label.cuda(async=True)

            if compression_scheduler:
                compression_scheduler.on_minibatch_begin(
                    epoch, batch_index, steps_per_epoch, optimizer)

            outputs = model(user, item, torch.tensor([False],
                                                     dtype=torch.bool))
            loss = criterion(outputs, label)

            if compression_scheduler:
                compression_scheduler.before_backward_pass(
                    epoch,
                    batch_index,
                    steps_per_epoch,
                    loss,
                    optimizer,
                    return_loss_components=False)

            losses.update(loss.data.item(), user.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if compression_scheduler:
                compression_scheduler.on_minibatch_end(epoch, batch_index,
                                                       steps_per_epoch,
                                                       optimizer)

            # Save stats to file
            description = (
                'Epoch {} Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                    epoch, loss=losses))
            loader.set_description(description)

            steps_completed = batch_index + 1
            if steps_completed % args.log_freq == 0:
                stats_dict = OrderedDict()
                stats_dict['Loss'] = losses.avg
                stats = ('Performance/Training/', stats_dict)
                params = model.named_parameters(
                ) if args.log_params_histograms else None
                distiller.log_training_progress(stats, params, epoch,
                                                steps_completed,
                                                steps_per_epoch, args.log_freq,
                                                [tflogger])

                tflogger.log_model_buffers(model,
                                           ['tracked_min', 'tracked_max'],
                                           'Quant/Train/Acts/TrackedMinMax',
                                           epoch, steps_completed,
                                           steps_per_epoch, args.log_freq)

        train_time = time.time() - begin
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                output=valid_results_file,
                                epoch=epoch,
                                processes=args.processes)
        val_time = time.time() - begin

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        hit_rate = np.mean(hits)
        mean_ndcgs = np.mean(ndcgs)

        stats_dict = OrderedDict()
        stats_dict['HR@{0}'.format(args.topk)] = hit_rate
        stats_dict['NDCG@{0}'.format(args.topk)] = mean_ndcgs
        stats = ('Performance/Validation/', stats_dict)
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        msglogger.info(
            'Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, AvgTrainLoss = {loss.avg:.4f}, '
            'train_time = {train_time:.2f}, val_time = {val_time:.2f}'.format(
                epoch=epoch,
                K=args.topk,
                hit_rate=hit_rate,
                ndcg=mean_ndcgs,
                loss=losses,
                train_time=train_time,
                val_time=val_time))

        is_best = False
        if hit_rate > best_hit_rate:
            best_hit_rate = hit_rate
            is_best = True
            best_epoch = epoch
        extras = {
            'current_hr@10': hit_rate,
            'best_hr@10': best_hit_rate,
            'best_epoch': best_epoch
        }
        apputils.save_checkpoint(epoch,
                                 'NCF',
                                 model,
                                 optimizer,
                                 compression_scheduler,
                                 extras,
                                 is_best,
                                 dir=run_dir)

        if args.threshold is not None:
            if np.mean(hits) >= args.threshold:
                msglogger.info("Hit threshold of {}".format(args.threshold))
                break
예제 #53
0
def main(config):
    save_config(config)
    data_loader = get_loader(config.batch_size, config.project_root)
    trainer = Trainer(config, data_loader)
    trainer.train()
    return
예제 #54
0
    def run(self, share):
        root = self.dispatcher.call_sync('volume.get_volumes_root')
        share_type = self.dispatcher.call_sync('share.supported_types').get(share['type'])

        assert share_type['subtype'] in ('FILE', 'BLOCK'),\
            "Unsupported Share subtype: {0}".format(share_type['subtype'])

        if self.datastore.exists(
            'shares',
            ('type', '=', share['type']),
            ('name', '=', share['name'])
        ):
            raise TaskException(errno.EEXIST, 'Share {0} of type {1} already exists'.format(
                share['name'],
                share['type']
            ))

        normalize(share, {
            'enabled': True,
            'immutable': False,
            'description': ''
        })

        if share['target_type'] in ('DATASET', 'ZVOL'):
            dataset = share['target_path']
            pool = share['target_path'].split('/')[0]
            path = os.path.join(root, dataset)

            if not self.dispatcher.call_sync('zfs.dataset.query', [('name', '=', dataset)], {'single': True}):
                if share_type['subtype'] == 'FILE':
                    self.join_subtasks(self.run_subtask('volume.dataset.create', {
                        'volume': pool,
                        'id': dataset,
                        'permissions_type': share_type['perm_type'],
                    }))

                if share_type['subtype'] == 'BLOCK':
                    self.join_subtasks(self.run_subtask('volume.dataset.create', {
                        'volume': pool,
                        'id': dataset,
                        'type': 'VOLUME',
                        'volsize': share['properties']['size'],
                    }))
            else:
                if share_type['subtype'] == 'FILE':
                    self.run_subtask('volume.dataset.update', dataset, {
                        'permissions_type': share_type['perm_type']
                    })

        elif share['target_type'] == 'DIRECTORY':
            # Verify that target directory exists
            path = share['target_path']
            if not os.path.isdir(path):
                raise TaskException(errno.ENOENT, "Target directory {0} doesn't exist".format(path))

        elif share['target_type'] == 'FILE':
            # Verify that target file exists
            path = share['target_path']
            if not os.path.isfile(path):
                raise TaskException(errno.ENOENT, "Target file {0} doesn't exist".format(path))

        else:
            raise AssertionError('Invalid target type')

        if share.get('permissions'):
            self.join_subtasks(self.run_subtask('file.set_permissions', path, share['permissions']))

        ids = self.join_subtasks(self.run_subtask('share.{0}.create'.format(share['type']), share))
        self.dispatcher.dispatch_event('share.changed', {
            'operation': 'create',
            'ids': ids
        })

        new_share = self.datastore.get_by_id('shares', ids[0])
        path = self.dispatcher.call_sync('share.get_directory_path', new_share['id'])
        try:
            save_config(
                path,
                '{0}-{1}'.format(new_share['type'], new_share['name']),
                new_share
            )
        except OSError as err:
            self.add_warning(TaskWarning(errno.ENXIO, 'Cannot save backup config file: {0}'.format(str(err))))

        return ids[0]