def _observe_iteration_results(self, num_it: int, epoch: int, loss, mix, est_data, gt_data, aux_data: AuxData, aux_loss=None): est_masks, est_labels = self._get_estimations(est_data) f1 = None # multi-task legacy iteration = num_it + (len(self.loader.dataset) // self.loader.batch_size) * epoch if not (iteration % self.exp_config.evaluation_steps): wav_predicted, wav_gt = reconstruct_with_masks( est_masks=est_masks, stft_all=aux_data.stft, log_resample=self.exp_config.log_resample, stft_frame=self.exp_config.stft_frame) sdr = -self.metrics(wav_predicted, wav_gt) masks = (gt_data, est_masks) logger.info(f'Iteration #{iteration} si-sdr: {sdr:.4f}') else: sdr, masks, wav_predicted = None, None, None self._write_summary(iteration, loss.item(), self.optimizer.param_groups[-1]['lr'], sdr, masks, wav_predicted, aux_loss=aux_loss, aux_metrics=f1)
def spider(urls, message_file_path): '''Sina WeiBo爬虫 ''' ## 启动driver, 设置cookie if __is_update_cookie(): driver = __get_and_set_cookie() else: cookie = __load_cookie() driver = __set_cookie(cookie) for url in urls: time.sleep(5) # 等待加载 try: driver = __grasp(driver, url) except: logger.warning(f'[chrome] failed to grasp html from {url} !!') try: message = __item(driver.page_source) with open(message_file_path, 'a+', encoding='utf-8') as file: file.writelines(message) except: logger.warning(f'[chrome] failed to parse html in {url} !!') driver.quit() logger.info('[chrome] close chrome...')
def __laungh_driver(): ''' 启动chrome''' driver = webdriver.Chrome('./chromedriver', ) driver.set_window_size(1440, 960) # 设置浏览器界面 driver.get("https://weibo.com/") # 打开微博登录页面 time.sleep(10) # 等待页面加载 logger.info('[chrome] start successfully...') return driver
def __set_cookie(cookies): ''' 启动浏览器并设置cookie''' driver = __laungh_driver() ## 删除并重新设置cookie driver.delete_all_cookies() for ii in cookies: driver.add_cookie(ii) logger.info('[cookie] load local cookies...') return driver
def run_phase(self, epoch: int): running_loss = 0.0 aux_loss_weight = 1e+5 for num_it, (inputs, gt_data, *aux_data) in enumerate(self.loader): # zero the parameter gradients self.optimizer.zero_grad() mix, gt_data, visual_or_labels, aux_data = self._input_preprocessing( inputs, gt_data, aux_data) # forward # track history if only in train with torch.set_grad_enabled(self._is_training()): outputs = self.model( mix, visual_or_labels ) if self.model.conditioned else self.model(mix) loss = self.criterion(outputs, gt_data) aux_loss = None total_loss = loss # backward + optimize only if in training phase if self._is_training(): total_loss.backward() if self.exp_config.with_lstm: nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.optimizer.step() if (epoch == 0) and (num_it == 0): self.optimizer = optim.Adam(self.model.parameters(), lr=self.exp_config.init_lr, weight_decay=0.0005) if aux_loss: aux_loss = aux_loss / aux_loss_weight # statistics and outputs self._observe_iteration_results(num_it=num_it, epoch=epoch, loss=loss, mix=mix, est_data=outputs, gt_data=gt_data, aux_data=aux_data, aux_loss=aux_loss) running_loss += loss.item() * mix.size(0) epoch_loss = running_loss / len(self.loader.dataset) logger.info(f'{self.phase} loss: {epoch_loss:.4f}') return epoch_loss
def __get_and_set_cookie(): ''' 启动浏览器,获取并保存cookie''' ## 启动chrome driver = __laungh_driver() ## 获取cookie driver.find_element_by_name("username").send_keys(username) # 输入用户名 driver.find_element_by_name("password").send_keys(password) # 输入密码 driver.find_element_by_xpath( "//a[@node-type='submitBtn']").click() # 点击登录按钮 time.sleep(10) # 等待登录过程结束 logger.info('[chrome] relogin ...') ## 获取并保存cookie time.sleep(5) # 等待加载 cookies = driver.get_cookies() # 获取cookies __dump_cookie(cookies) # 保存cookies logger.info('[cookie] save cookie to local...') return driver
def train_model(self): """ Full training pipeline function: - training - validation/validation with external metrics every 10 iteration - full tests every 100 epochs :return: """ best_val_loss = np.inf curriculum_patience = 0 for epoch in range(self.num_epochs): logger.info(f'Starting epoch {epoch}/{self.num_epochs}.') try: self._set_train() self.run_phase(epoch) self._set_eval() epoch_loss = self.run_phase(epoch) if epoch_loss < best_val_loss - 1e-4: best_val_loss = epoch_loss curriculum_patience = 0 else: curriculum_patience += 1 except KeyboardInterrupt: torch.save(self.model.state_dict(), os.path.join(self.cp_path, 'INTERRUPTED.pth')) logger.info('Saved interrupt') sys.exit(0) if self.exp_config.curriculum_training: if curriculum_patience > self.exp_config.curriculum_patience: train_inc = self.train_loader.dataset.increase_n_mix() self.val_loader.dataset.increase_n_mix() if train_inc: logger.info('Increased number of sources in a mixture') curriculum_patience = 0 best_val_loss = np.inf else: self.scheduler.step(epoch_loss) else: self.scheduler.step(epoch_loss) if self.save_cp and not (epoch % self.exp_config.save_cp_frequency): torch.save(self.model.state_dict(), os.path.join(self.cp_path, f'CP{epoch:04d}.pth')) logger.info(f'Checkpoint {epoch} saved !')
def _write_summary(self, iteration, loss, lr, sdr=None, masks=None, audio=None, aux_loss=None, aux_metrics=None): self.writer.add_scalar(f'loss/{self.phase}', loss, iteration) logger.info(f'Iteration #{iteration} loss: {loss:.4f}') self.writer.add_scalar(f'lr/{self.phase}', lr, iteration) self.writer.add_scalar(f'n_max_sources/{self.phase}', self.loader.dataset.n_mix_max, iteration) if sdr is not None: self.writer.add_scalar(f'sdr/{self.phase}', sdr, iteration) if masks is not None: gt_masks, predicted_masks = masks for ch_idx in range(gt_masks.shape[1]): self.writer.add_images( f'gt_vs_predicted_mask_ch_{ch_idx+1}/{self.phase}', torch.cat([ gt_masks[:, ch_idx, :, :], predicted_masks[:, ch_idx, :, :] ], dim=2).unsqueeze_(1)) if audio is not None: for piece_idx, source_idx in itertools.product( range(audio.shape[0]), range(audio.shape[1])): self.writer.add_audio( f'reconstructed_audio_{piece_idx}_{source_idx+1}/{self.phase}', audio[piece_idx, source_idx], sample_rate=self.exp_config.expected_sr) if aux_loss is not None: self.writer.add_scalar(f'aux_loss/{self.phase}', aux_loss, iteration) self.writer.add_scalar(f'aux_f1/{self.phase}', aux_metrics, iteration)
def rumMain(now): ''' 主程序''' t_bg = time.perf_counter() logger.info('[WeiBo System] start to run...') message_file_path = os.path.join(path_message, f'messeage_{now}.txt') spider(urls=[ f'https://weibo.com/{ii}?is_all=1&stat_date={months}#feedtop' for ii in targets ], message_file_path=message_file_path) try: send_email(f'Weibo{now}', message_file_path) logger.info('[Email] send e-mail successfully...') except: logger.warning(f'[Email] failed to send {message_file_path}!!!') t_ed = time.perf_counter() logger.info(f'[WeiBo System] end to run, escape {t_ed - t_bg:.2f} secs...')