예제 #1
0
 def get(self, url, headers={}, allow_redirects=True):
     tools.log('GET: %s' % url, 'info')
     request = lambda: self._cfscrape.get(url,
                                          headers=headers,
                                          timeout=self._timeout,
                                          allow_redirects=allow_redirects)
     return self._request_core(request)
예제 #2
0
    def fetch_configs(self):
        try:
            host_obj = models.Host.objects.get(id=self.client_id)  # get是获取一个对象
            template_list = list(
                host_obj.templates.select_related())  # 查询全部的host的templates

            for host_group in host_obj.host_groups.select_related():  # 遍历主机组
                # 这里是获取每个主机全部的templates + 主机的主机组中的全部的templates 后面有重复的直接去重即可
                template_list.extend(host_group.templates.select_related())
            for template in template_list:
                for service in template.services.select_related(
                ):  # loop each service
                    # 生成客户端的配置文件
                    # 每个服务有对应的插件的名字 时间间隔
                    # 返回的json格式
                    # 这里没有添加service index中的指标信息,目前是把插件中的指标全部report并存储了
                    # 后期页面上可以根据指定的指标进行筛选
                    """
                    {
                        'services': {
                            'service.name': ['plugin_name', interval]
                        }
                    }
                    """
                    # 这里如果用重复的会进行去重
                    self.client_configs['services'][service.name] = [
                        service.plugin_name, service.interval
                    ]
            return self.client_configs
        except ObjectDoesNotExist as e:  # 捕获查询不到的时候 使用objects.get会出现查询不到的情况
            log("ClientHandler/fetch_configs err", e)
예제 #3
0
        def check_distribution(columns, plot_cols=6):
            plt.style.use('seaborn-white')

            if plot_cols > len(columns) - 2:
                t.log(t.yellow('ERROR: '),
                      f"Can't use more than {len(columns) - 2} columns.")
                plot_cols = len(columns) - 2

            # figure size = (width,height)
            f1 = plt.figure(figsize=(30, len(columns) * 3))

            total_plots = len(columns)
            rows = total_plots - plot_cols

            for idx, y in enumerate(columns):
                if len(set(self.df[y])) >= 3:
                    idx += 1
                    ax1 = f1.add_subplot(rows, plot_cols, idx)
                    ax1.set_xlabel(y)
                    sns.distplot(self.df[y], color='b', hist=False)
                    # parameters for normal distribution
                    x_min = self.df[y].min()
                    x_max = self.df[y].max()
                    mean = self.df[y].mean()
                    std = self.df[y].std()
                    # plotting normal distribution
                    x = np.linspace(x_min, x_max, self.df.shape[0])
                    y = scipy.stats.norm.pdf(x, mean, std)
                    plt.plot(x, y, color='black', linestyle='dashed')
예제 #4
0
def __get_cache_core(query):
    if __cache_results.get(query, '') is not '':
        if CACHE_LOG:
            tools.log('get_cache_local', 'notice')
        return __cache_results[query]
    else:
        __cache_results[query] = {}

    if CACHE_LOG:
        tools.log('get_cache_request', 'notice')

    response = __dynamo_get(__map_in_cache(query))

    if response.status_code != 200:
        if CACHE_LOG:
            tools.log('get_cache_err_response', 'notice')
        return __cache_results[query]

    result = __map_out_cache(response.text)
    if result is None:
        if CACHE_LOG:
            tools.log('get_cache_nocache', 'notice')
        return __cache_results[query]

    result['d'] = json.loads(result['d'].replace("'", '"'))

    parsed_result = {}
    cached_results = {}
    for scraper_key in result['d'].keys():
        key = scraper_keys[scraper_key]
        cached_results[key] = []
        for result_key in result['d'][scraper_key].keys():
            scraper_result = result['d'][scraper_key][result_key]
            if len(scraper_result) < 2:
                continue
            cached_results[key].append({
                'hash':
                result_key,
                'package':
                package_keys[scraper_result[0]],
                'release_title':
                decode(scraper_result[1]),
                'size':
                scraper_result[2],
                'seeds':
                0
            })

    parsed_result['cached_results'] = cached_results
    parsed_result['use_cache_only'] = (now() - int(result['t'])) < (3600 *
                                                                    1000)
    __cache_results[query]['result'] = result
    __cache_results[query]['parsed_result'] = parsed_result

    if CACHE_LOG:
        tools.log('get_cache_result', 'notice')

    return __cache_results[query]
예제 #5
0
    def _plot(self, columns, df_clean, df_outliers, plot_cols=6):
        """Plots the dataframe and marks the outliers by a red cross.

        Parameters:
        ----------
        columns : str
            A string of columns which will be plotted.

        df_clean : dataframe
            Dataframe without outliers.

        df_outliers : dataframe
            Dataframe of outliers.

        plot_cols : int, default=6
            Determines how many columns the plots will form.

        """
        plt.style.use('seaborn-white')

        if plot_cols > len(columns) - 2:
            t.log(
                t.yellow('ERROR: '),
                f"Can't use more than {len(columns) - 2} columns in one row.")
            plot_cols = len(columns) - 2

        # figure size = (width,height)
        f1 = plt.figure(figsize=(30, len(columns) * 3))

        total_plots = len(columns)
        rows = total_plots - plot_cols

        for idx, y in enumerate(columns):
            idx += 1
            ax1 = f1.add_subplot(rows, plot_cols, idx)
            sns.regplot(
                x=df_clean.index,
                y=y,
                data=df_clean,
                scatter=True,
                fit_reg=False,
                color='lightblue',
            )
            sns.regplot(
                x=df_outliers.index,
                y=y,
                data=df_outliers,
                scatter=True,
                fit_reg=False,
                marker='x',
                color='red',
            )
예제 #6
0
def get_max_length(x, ratio=0.95):
    """
    Get the max length cover 95% data.
    """
    lens = [len(_) for _ in x]
    max_len = max(lens)
    min_len = min(lens)
    lens.sort()
    # TODO need to drop the too short data?
    specified_len = lens[int(len(lens) * ratio)]
    log(f"Max length: {max_len}; Min length {min_len}; 95 length {specified_len}"
        )
    return specified_len
예제 #7
0
    def _head(self, url):
        tools.log('HEAD: %s' % url, 'info')
        request = lambda: self._request.head(url, timeout=self._timeout)
        response = self._request_core(request)
        if self._cfscrape.is_cloudflare_on(response, allow_empty_body=True):
            response = lambda: None
            response.url = url
            response.status_code = 200
            return response

        if response.status_code == 302 or response.status_code == 301:
            redirect_url = response.headers['Location']
            if not redirect_url.endswith(
                    '127.0.0.1') and not redirect_url.endswith('localhost'):
                return self._head(redirect_url)

        return response
예제 #8
0
 def fit(self, train_x, train_y, validation_data_fit, train_loop_num,
         **kwargs):
     val_x, val_y = validation_data_fit
     callbacks = [
         tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
     ]
     epochs = 10 if train_loop_num == 1 else 30
     log(f'train_x: {train_x.shape}; train_y: {train_y.shape}')
     self._model.fit(
         train_x,
         ohe2cat(train_y),
         epochs=epochs,
         callbacks=callbacks,
         validation_data=(val_x, ohe2cat(val_y)),
         verbose=1,  # Logs once per epoch.
         batch_size=32,
         shuffle=True)
예제 #9
0
    def _get_cache(self, query):
        cache_result = get_cache(self.caller_name, query)
        self._cache_result = cache_result
        if cache_result is None:
            return False

        if not check_cache_result(cache_result, self.caller_name):
            return False

        parsed_result = cache_result['parsed_result']
        self._results_from_cache = parsed_result['cached_results'][
            self.caller_name]

        use_cache_only = parsed_result.get('use_cache_only', False)
        if use_cache_only and CACHE_LOG:
            tools.log('cache_direct_result', 'notice')

        return use_cache_only
예제 #10
0
    def _head(self, url):
        tools.log('HEAD: %s' % url, 'info')
        try:
            response = self._request.head(url, timeout=8)
            if is_cloudflare_on(response):
                response = lambda: None
                response.url = url
                response.status_code = 200
                return response

            if response.status_code == 302 or response.status_code == 301:
                redirect_url = response.headers['Location']
                return self._head(redirect_url)
            return response
        except:
            response = lambda: None
            response.status_code = 501
            return response
예제 #11
0
    def _request_core(self, request):
        self.has_timeout_exc = False
        response_err = lambda: None
        response_err.status_code = 501

        try:
            if self._sequental is False:
                return request()

            with self._lock:
                if self._should_wait:
                    time.sleep(self._wait)
                self._should_wait = True
                return request()
        except:
            exc = traceback.format_exc(limit=1)
            if 'ConnectTimeout' in exc or 'ReadTimeout' in exc:
                self.has_timeout_exc = True
                tools.log('%s timed out.' % request.url, 'notice')
            else:
                traceback.print_exc()

            return response_err
예제 #12
0
 def __init__(self):
     # clear_session()
     log('init BilstmAttention')
     self.max_length = None
     self._model = None
     self.is_init = False
예제 #13
0
 def post(self, url, data, headers={}):
     tools.log('POST: %s' % url, 'info')
     request = lambda: cfscrape.CloudflareScraper().post(
         url, data, headers=headers, timeout=self._timeout)
     request.url = url
     return self._request_core(request)
예제 #14
0
def main(args, configs):
    print("Prepare training ...")

    preprocess_config, model_config, train_config = configs

    # Get dataset
    dataset = Dataset("train.txt",
                      preprocess_config,
                      train_config,
                      model_config,
                      sort=True,
                      drop_last=True)
    batch_size = train_config["optimizer"]["batch_size"]
    group_size = 4  # Set this larger than 1 to enable sorting in Dataset
    assert batch_size * group_size < len(dataset)
    loader = DataLoader(
        dataset,
        batch_size=batch_size * group_size,
        shuffle=True,
        collate_fn=dataset.collate_fn,
    )

    # Prepare model
    model, optimizer = get_model(args, configs, device, train=True)
    model = nn.DataParallel(model)
    num_param = get_param_num(model)
    Loss = FastSpeech2Loss(preprocess_config, model_config).to(device)
    print("Number of FastSpeech2 Parameters:", num_param)

    # Load checkpoint if exists
    if args.restore_path is not None and os.path.isfile(args.restore_path):
        checkpoint = torch.load(args.restore_path)
        pretrained_dict = checkpoint['model']
        if not any(key.startswith('module.') for key in pretrained_dict):
            pretrained_dict = {
                'module.' + k: v
                for k, v in pretrained_dict.items()
            }

        dem1 = 0
        dem2 = 0
        model_dict = model.state_dict()
        for k, v in pretrained_dict.items():
            if k in model_dict and v.size() == model_dict[k].size():
                # print('Load weight in ', k)
                dem1 += 1
            else:
                print(f'Module {k} is not same size')
                dem2 += 1
        dem2 += dem1
        print(f'### Load {dem1}/{dem2} modules')
        # 1. filter out unnecessary keys
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items()
            if k in model_dict and v.size() == model_dict[k].size()
        }
        # 2. overwrite entries in the existing state dict
        model_dict.update(pretrained_dict)
        # 3. load the new state dict
        model.load_state_dict(model_dict)
        # model.load_state_dict(checkpoint['model'])
        # optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step {}---\n".format(args.restore_step))

    # Load vocoder
    vocoder = get_vocoder(model_config, device)

    # Init logger
    for p in train_config["path"].values():
        os.makedirs(p, exist_ok=True)
    train_log_path = os.path.join(train_config["path"]["log_path"], "train")
    val_log_path = os.path.join(train_config["path"]["log_path"], "val")
    os.makedirs(train_log_path, exist_ok=True)
    os.makedirs(val_log_path, exist_ok=True)
    train_logger = SummaryWriter(train_log_path)
    val_logger = SummaryWriter(val_log_path)

    # Training
    step = args.restore_step + 1
    epoch = 1
    grad_acc_step = train_config["optimizer"]["grad_acc_step"]
    grad_clip_thresh = train_config["optimizer"]["grad_clip_thresh"]
    total_step = train_config["step"]["total_step"]
    log_step = train_config["step"]["log_step"]
    save_step = train_config["step"]["save_step"]
    synth_step = train_config["step"]["synth_step"]
    val_step = train_config["step"]["val_step"]

    outer_bar = tqdm(total=total_step, desc="Training", position=0)
    outer_bar.n = args.restore_step
    outer_bar.update()

    while True:
        inner_bar = tqdm(total=len(loader),
                         desc="Epoch {}".format(epoch),
                         position=1)
        for batchs in loader:
            for batch in batchs:
                batch = to_device(batch, device)

                # Forward
                output = model(*(batch[2:]))

                # Cal Loss
                losses = Loss(batch, output)
                total_loss = losses[0]

                # Backward
                total_loss = total_loss / grad_acc_step
                total_loss.backward()
                if step % grad_acc_step == 0:
                    # Clipping gradients to avoid gradient explosion
                    nn.utils.clip_grad_norm_(model.parameters(),
                                             grad_clip_thresh)

                    # Update weights
                    optimizer.step_and_update_lr()
                    optimizer.zero_grad()

                if step % log_step == 0:
                    losses = [l.item() for l in losses]
                    message1 = "Step {}/{}|".format(step, total_step)
                    message2 = "|Total Loss: {:.4f}|Mel Loss: {:.4f}|Mel PostNet Loss: {:.4f}|Pitch Loss: {:.4f}|Energy Loss: {:.4f}|Duration Loss: {:.4f}|".format(
                        *losses)

                    # with open(os.path.join(train_log_path, "log.txt"), "a") as f:
                    #     f.write(message1 + message2 + "\n")

                    outer_bar.write(message1 + message2)

                    log(train_logger, step, losses=losses)

                if step % synth_step == 0:
                    output_preidiction = model(*(batch[2:6]))
                    fig, wav_reconstruction, wav_prediction, tag = synth_one_sample(
                        batch,
                        output_preidiction,
                        vocoder,
                        model_config,
                        preprocess_config,
                    )
                    log(
                        train_logger,
                        fig=fig,
                        tag="Training/step_{}_{}".format(step, tag),
                    )
                    sampling_rate = preprocess_config["preprocessing"][
                        "audio"]["sampling_rate"]
                    log(
                        train_logger,
                        audio=wav_reconstruction,
                        sampling_rate=sampling_rate,
                        tag="Training/step_{}_{}_reconstructed".format(
                            step, tag),
                    )
                    log(
                        train_logger,
                        audio=wav_prediction,
                        sampling_rate=sampling_rate,
                        tag="Training/step_{}_{}_synthesized".format(
                            step, tag),
                    )

                if step % val_step == 0:
                    model.eval()
                    message = evaluate(model, step, configs, val_logger,
                                       vocoder)
                    # with open(os.path.join(val_log_path, "log.txt"), "a") as f:
                    #     f.write(message + "\n")
                    outer_bar.write(message)

                    model.train()

                if step % save_step == 0:
                    torch.save(
                        {
                            "model": model.module.state_dict(),
                            "optimizer": optimizer._optimizer.state_dict(),
                        },
                        os.path.join(
                            train_config["path"]["ckpt_path"],
                            "{}.pth.tar".format(step),
                        ),
                    )

                if step == total_step:
                    quit()
                step += 1
                outer_bar.update(1)

            inner_bar.update(1)
        epoch += 1
예제 #15
0
    def get_summary(self,
                    nan=True,
                    formats=True,
                    categorical=True,
                    min_less_0=True,
                    check_normdist=True):
        """Describes the data.

        Parameters:
        ----------
        nan : bool, default=True
            True if need to return a list of NaNs.

        formats : bool, default=True
            True if need to return all the formats of the columns.

        categorical : bool, default=True
            True if need to return values which can be categorical.
            Variable is considered to be categorical if there are less uique values than num_ifcategorical.

        min_less_0 : bool, default=True
            True if need check for variables which have negative values.

        check_normdist : bool, default=True
            True if need check actual distribution against Normal distribution.
            Will make plots of each variable considered against the Normal distribution.

        Returns
        ----------
        A description of the data in text format and plots if check_normdist=True.

        """
        # Checking for NaN
        if nan:
            nans = list(
                pd.DataFrame(self.df.isna().sum()).rename(columns={
                    0: 'NaNs'
                }).reset_index().query("NaNs>0")['index'])
            t.log(t.black('NaNs: '), nans)
        else:
            nans = False

        # Checking for unique formats
        if formats:
            unique_formats = list(self.df.dtypes.unique())
            t.log(t.black('Unique formats: '), unique_formats)
        else:
            formats is False

        # Checking for possible categorical values
        if categorical:
            num_ifcategorical = 10
            possibly_categorical = []
            for col in self.df.columns:
                set_unique = set(self.df[col])
                if len(set_unique) <= num_ifcategorical:
                    possibly_categorical.append(col)
            t.log(
                t.black(
                    f'Possibly categorical variables (<{num_ifcategorical} unique values): '
                ), possibly_categorical)
        else:
            categorical is False

        # Checking if min value is < 0
        if min_less_0:
            lst_less0 = list(
                pd.DataFrame(self.df[self.df < 0].any()).rename(columns={
                    0: 'flag'
                }).query("flag==True").index)
            t.log(t.black(f'Min value < 0: '), lst_less0)
        else:
            min_less_0 is False

        # Plotting actual distributions vs Normal distribution
        def check_distribution(columns, plot_cols=6):
            plt.style.use('seaborn-white')

            if plot_cols > len(columns) - 2:
                t.log(t.yellow('ERROR: '),
                      f"Can't use more than {len(columns) - 2} columns.")
                plot_cols = len(columns) - 2

            # figure size = (width,height)
            f1 = plt.figure(figsize=(30, len(columns) * 3))

            total_plots = len(columns)
            rows = total_plots - plot_cols

            for idx, y in enumerate(columns):
                if len(set(self.df[y])) >= 3:
                    idx += 1
                    ax1 = f1.add_subplot(rows, plot_cols, idx)
                    ax1.set_xlabel(y)
                    sns.distplot(self.df[y], color='b', hist=False)
                    # parameters for normal distribution
                    x_min = self.df[y].min()
                    x_max = self.df[y].max()
                    mean = self.df[y].mean()
                    std = self.df[y].std()
                    # plotting normal distribution
                    x = np.linspace(x_min, x_max, self.df.shape[0])
                    y = scipy.stats.norm.pdf(x, mean, std)
                    plt.plot(x, y, color='black', linestyle='dashed')

        if check_normdist:
            t.log(
                t.black(
                    'Plotting distributions of variables against normal distribution:'
                ))
            check_distribution(self.df.columns, plot_cols=6)

        return nans, formats, categorical, min_less_0
예제 #16
0
    def sources(self, simple_info, hostDict, hostprDict):
        if simple_info is None:
            return []

        supported_hosts = hostDict + hostprDict
        sources = []

        try:
            query_type = None
            if simple_info.get('title', None) is not None:
                query_type = 'movie'
                query = '%s %s' % (strip_accents(
                    simple_info['title']), simple_info['year'])
            else:
                query_type = 'episode'
                query = '%s S%sE%s' % (strip_accents(
                    simple_info['show_title']),
                                       simple_info['season_number_xx'],
                                       simple_info['episode_number_xx'])

            if len(supported_hosts) > 0:
                url = self.scraper._find_url()

                def search(url):
                    try:
                        result = self.search(url, query)
                        if result is None:
                            raise requests.exceptions.RequestException()
                        return result
                    except requests.exceptions.RequestException:
                        url = self.scraper._find_next_url(url)
                        if url is None:
                            return []
                        return search(url)

                hoster_results = search(url) if url is not None else []
            else:
                hoster_results = []

            for result in hoster_results:
                quality = source_utils.get_quality(result.title)

                if query_type == 'movie' and not source_utils.filter_movie_title(
                        result.title, simple_info['title'],
                        simple_info['year']):
                    continue

                if query_type == 'episode' and not source_utils.filter_single_episode(
                        simple_info, result.title):
                    continue

                for url in result.urls:
                    domain = re.findall(r"https?:\/\/(www\.)?(.*?)\/.*?",
                                        url)[0][1]

                    if domain not in supported_hosts:
                        continue
                    if any(x in url for x in ['.rar', '.zip', '.iso']):
                        continue

                    quality_from_url = source_utils.get_quality(url)
                    if quality_from_url != 'SD':
                        quality = quality_from_url

                    sources.append({
                        'release_title':
                        strip_non_ascii_and_unprintable(result.title),
                        'source':
                        domain,
                        'quality':
                        quality,
                        'language':
                        'en',
                        'url':
                        url,
                        'info': [],
                        'direct':
                        False,
                        'debridonly':
                        False
                    })

            sources.reverse()

            result_count = len(
                sources) if len(supported_hosts) > 0 else 'disabled'
            tools.log(
                'a4kScrapers.%s.%s: %s' %
                (query_type, self._caller_name, result_count), 'notice')

            return sources
        except:
            traceback.print_exc()
            return sources
예제 #17
0
 def _episode_notice(self, caller_name):
     tools.log(
         'btScraper.episode.%s: %s' %
         (caller_name, self._torrent_list_stats(caller_name)), 'notice')
예제 #18
0
def train(train_loader, net, criterion, optimizer, num_epoches, iters):
    if resume:
        CHECKPOINT_FILE = './model/checkpoint/exp/{}.pth'.format(
            model_name + loss_name + times + extra_description)
        # 恢复上次的训练状态
        print("Resume from checkpoint...")
        checkpoint = torch.load(CHECKPOINT_FILE)
        net.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        initepoch = checkpoint['epoch'] + 1
    else:
        initepoch = 1
    for epoch in range(initepoch, num_epoches + 1):
        try:
            # 开始时间
            st = time.time()
            b_dice = 0.0
            t_dice = 0.0
            d_len = 0
            # 开始训练
            for inputs, mask in train_loader:
                X = inputs.to(device)
                y = mask.to(device)
                optimizer.zero_grad()
                output = net(X)
                loss = criterion(output, y)
                output = torch.sigmoid(output)
                output[output < 0.5] = 0
                output[output > 0.5] = 1
                bladder_dice = diceCoeffv2(output[:, 0:1, :],
                                           y[:, 0:1, :],
                                           activation=None).cpu().item()
                tumor_dice = diceCoeffv2(output[:, 1:2, :],
                                         y[:, 1:2, :],
                                         activation=None).cpu().item()
                mean_dice = (bladder_dice + tumor_dice) / 2
                d_len += 1
                b_dice += bladder_dice
                t_dice += tumor_dice
                loss.backward()
                optimizer.step()
                iters += batch_size
                string_print = "Epoch = %d iters = %d Current_Loss = %.4f Mean Dice=%.4f Bladder Dice=%.4f Tumor Dice=%.4f Time = %.2f"\
                            % (epoch, iters, loss.item(), mean_dice,
                                bladder_dice, tumor_dice, time.time() - st)
                tools.log(string_print)
                st = time.time()
                writer.add_scalar('train_main_loss', loss.item(), iters)

            b_dice = b_dice / d_len
            t_dice = t_dice / d_len
            m_dice = (b_dice + t_dice) / 2

            print(
                'Epoch {}/{},Train Mean Dice {:.4}, Bladder Dice {:.4}, Tumor Dice {:.4}'
                .format(epoch, num_epoches, m_dice, b_dice, t_dice))
            if epoch == num_epoches:
                checkpoint = {
                    'model_state_dict': net.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch': epoch,
                }
                torch.save(
                    checkpoint,
                    './model/checkpoint/exp/{}.pth'.format(model_name +
                                                           loss_name + times +
                                                           extra_description))
                writer.close()

        except BaseException as e:
            print(e)
            checkpoint = {
                'model_state_dict': net.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch,
            }
            torch.save(
                checkpoint,
                './model/checkpoint/exp/{}.pth'.format(model_name + loss_name +
                                                       times +
                                                       extra_description))
            writer.close()
            print('训练停止')
            return
예제 #19
0
 def get(self, url, headers={}):
     tools.log('GET: %s' % url, 'info')
     request = lambda: self._cfscrape.get(url, headers=headers, timeout=8)
     return self._request_core(request)
예제 #20
0
 def _get_episode_results(self):
     tools.log(
         'a4kScrapers.episode.%s: %s' %
         (self.caller_name, self._sanitize_and_get_status()), 'notice')
     return self._results
예제 #21
0
def main(args, configs):
    print("Prepare training ...")

    preprocess_config, model_config, train_config = configs

    # Get dataset
    dataset = Dataset("train.txt",
                      preprocess_config,
                      train_config,
                      sort=True,
                      drop_last=True)
    batch_size = train_config["optimizer"]["batch_size"]
    group_size = 4  # Set this larger than 1 to enable sorting in Dataset
    assert batch_size * group_size < len(dataset)
    loader = DataLoader(
        dataset,
        batch_size=batch_size * group_size,
        shuffle=True,
        collate_fn=dataset.collate_fn,
    )
    with open(
            os.path.join(preprocess_config["path"]["preprocessed_path"],
                         "stats.json")) as f:
        stats = json.load(f)
        mel_stats = stats["mel"]

    # Prepare model
    model, optimizer = get_model(args, configs, device, train=True)
    model = nn.DataParallel(model)
    num_param = get_param_num(model)
    Loss = ParallelTacotron2Loss(model_config, train_config).to(device)
    print("Number of Parallel Tacotron 2 Parameters:", num_param)

    # Load vocoder
    vocoder = get_vocoder(model_config, device)

    # Init logger
    for p in train_config["path"].values():
        os.makedirs(p, exist_ok=True)
    train_log_path = os.path.join(train_config["path"]["log_path"], "train")
    val_log_path = os.path.join(train_config["path"]["log_path"], "val")
    os.makedirs(train_log_path, exist_ok=True)
    os.makedirs(val_log_path, exist_ok=True)
    train_logger = SummaryWriter(train_log_path)
    val_logger = SummaryWriter(val_log_path)

    # Training
    step = args.restore_step + 1
    epoch = 1
    grad_acc_step = train_config["optimizer"]["grad_acc_step"]
    grad_clip_thresh = train_config["optimizer"]["grad_clip_thresh"]
    total_step = train_config["step"]["total_step"]
    log_step = train_config["step"]["log_step"]
    save_step = train_config["step"]["save_step"]
    synth_step = train_config["step"]["synth_step"]
    val_step = train_config["step"]["val_step"]

    outer_bar = tqdm(total=total_step, desc="Training", position=0)
    outer_bar.n = args.restore_step
    outer_bar.update()

    # with torch.autograd.detect_anomaly():
    while True:
        inner_bar = tqdm(total=len(loader),
                         desc="Epoch {}".format(epoch),
                         position=1)
        for batchs in loader:
            for batch in batchs:
                batch = to_device(batch, device, mel_stats)

                # Forward
                output = model(*(batch[2:]))
                # Cal Loss
                losses = Loss(batch, output, step)
                total_loss = losses[0]

                # Backward
                total_loss = total_loss / grad_acc_step
                total_loss.backward()
                if step % grad_acc_step == 0:
                    # Clipping gradients to avoid gradient explosion
                    nn.utils.clip_grad_norm_(model.parameters(),
                                             grad_clip_thresh)

                    # Update weights
                    optimizer.step_and_update_lr()
                    optimizer.zero_grad()

                if step % log_step == 0:
                    losses = [l.item() for l in losses]
                    message1 = "Step {}/{}, ".format(step, total_step)
                    message2 = "Total Loss: {:.4f}, Mel Loss: {:.4f}, Duration Loss: {:.4f}, KL Loss: {:.4f}".format(
                        *losses)

                    with open(os.path.join(train_log_path, "log.txt"),
                              "a") as f:
                        f.write(message1 + message2 + "\n")

                    outer_bar.write(message1 + message2)

                    log(train_logger, step, losses=losses)

                if step % synth_step == 0:
                    fig, wav_reconstruction, wav_prediction, tag = synth_one_sample(
                        batch,
                        output,
                        vocoder,
                        model_config,
                        preprocess_config,
                        mel_stats,
                    )
                    log(
                        train_logger,
                        fig=fig,
                        tag="Training/step_{}_{}".format(step, tag),
                    )
                    sampling_rate = preprocess_config["preprocessing"][
                        "audio"]["sampling_rate"]
                    log(
                        train_logger,
                        audio=wav_reconstruction,
                        sampling_rate=sampling_rate,
                        tag="Training/step_{}_{}_reconstructed".format(
                            step, tag),
                    )
                    log(
                        train_logger,
                        audio=wav_prediction,
                        sampling_rate=sampling_rate,
                        tag="Training/step_{}_{}_synthesized".format(
                            step, tag),
                    )

                if step % val_step == 0:
                    model.eval()
                    message = evaluate(model, step, configs, val_logger,
                                       vocoder, len(losses), mel_stats)
                    with open(os.path.join(val_log_path, "log.txt"), "a") as f:
                        f.write(message + "\n")
                    outer_bar.write(message)

                    model.train()

                if step % save_step == 0:
                    torch.save(
                        {
                            "model": model.module.state_dict(),
                            "optimizer": optimizer._optimizer.state_dict(),
                        },
                        os.path.join(
                            train_config["path"]["ckpt_path"],
                            "{}.pth.tar".format(step),
                        ),
                    )

                if step == total_step:
                    quit()
                step += 1
                outer_bar.update(1)

            inner_bar.update(1)
        epoch += 1
예제 #22
0
def __set_cache_core(scraper, query, results, cached_results):
    if __cache_has_new_results.get(query, '') == '':
        __cache_has_new_results[query] = False

    scraper_key = sha1(scraper)
    if cached_results.get(scraper_key, None) is None:
        cached_results[scraper_key] = {}

    for result in results:
        if result['size'] < 120:
            continue

        result_key = result['hash']

        duplicate = False
        for cached_scraper in cached_results:
            if cached_results[cached_scraper].get(result_key,
                                                  None) is not None:
                duplicate = True
                break

        if duplicate:
            continue

        scraper_result = cached_results[scraper_key]
        try:
            scraper_result[result_key] = [
                sha1(result['package']),
                encode(result['release_title']), result['size']
            ]
            __cache_has_new_results[query] = True
        except:
            traceback.print_exc()
            continue

    try:
        __cache_request_scrapers[query].pop(scraper, None)
        if len(__cache_request_scrapers[query].keys()) > 0:
            if CACHE_LOG:
                tools.log(
                    'set_cache_skip ' +
                    str(__cache_request_scrapers[query].keys()), 'notice')
            return

        if not __cache_has_new_results[query]:
            if CACHE_LOG:
                tools.log('set_cache_skip_no_new_results', 'notice')
            return

        item = {}
        item['q'] = sha256(query)
        item['t'] = now()
        item['d'] = json.dumps(cached_results).replace('"', "'")

        if CACHE_LOG:
            tools.log('set_cache_request', 'notice')

        response = __dynamo_put(__map_in_cache(item))

        __cache_has_new_results[query] = False
    except:
        traceback.print_exc()
예제 #23
0
def evaluate(model, step, configs, logger=None, vocoder=None):
    preprocess_config, model_config, train_config = configs

    # Get dataset
    dataset = Dataset(
        "val.txt", preprocess_config, train_config, sort=False, drop_last=False
    )
    batch_size = train_config["optimizer"]["batch_size"]
    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=dataset.collate_fn,
    )

    # Get loss function
    Loss = ParallelTacotron2Loss(model_config, train_config).to(device)

    # Evaluation
    loss_sums = [0 for _ in range(6)]
    for batchs in loader:
        for batch in batchs:
            batch = to_device(batch, device)
            with torch.no_grad():
                # Forward
                output = model(*(batch[2:]))

                # Cal Loss
                losses = Loss(batch, output, step)

                for i in range(len(losses)):
                    loss_sums[i] += losses[i].item() * len(batch[0])

    loss_means = [loss_sum / len(dataset) for loss_sum in loss_sums]

    message = "Validation Step {}, Total Loss: {:.4f}, Mel Loss: {:.4f}, Duration Loss: {:.4f}, KL Loss: {:.4f}, Attention Loss: {:.4f}".format(
        *([step] + [l for l in loss_means])
    )

    if logger is not None:
        fig, wav_reconstruction, wav_prediction, tag = synth_one_sample(
            batch,
            output,
            vocoder,
            model_config,
            preprocess_config,
        )

        log(logger, step, losses=loss_means)
        log(
            logger,
            fig=fig,
            tag="Validation/step_{}_{}".format(step, tag),
        )
        sampling_rate = preprocess_config["preprocessing"]["audio"]["sampling_rate"]
        log(
            logger,
            audio=wav_reconstruction,
            sampling_rate=sampling_rate,
            tag="Validation/step_{}_{}_reconstructed".format(step, tag),
        )
        log(
            logger,
            audio=wav_prediction,
            sampling_rate=sampling_rate,
            tag="Validation/step_{}_{}_synthesized".format(step, tag),
        )

    return message
예제 #24
0
 def post(self, url, data, headers={}):
     tools.log('POST: %s' % url, 'info')
     request = lambda: self._cfscrape.post(
         url, data, headers=headers, timeout=self._timeout)
     return self._request_core(request)