Пример #1
0
    def __init__(self, config, training=True):
        super(AudioDataset, self).__init__()
        self.config = config
        self.hop_length = config.data_config.hop_length
        self.training = training

        if self.training:
            self.segment_length = config.training_config.segment_length
        self.sample_rate = config.data_config.sample_rate

        self.filelist_path = config.training_config.train_filelist_path \
            if self.training else config.training_config.test_filelist_path
        self.audio_paths = parse_filelist(self.filelist_path)
Пример #2
0
    def add_authors(self, dir=None, format=None):
        if dir is None:
            return None

        files = []
        if os.path.isdir(dir):
            files = utils.parse_filelist(dir, format)

        c = self.get_class('人物')
        name = self.get_data_property(c, '姓名')
        # brief = self.get_data_property(c, '生平简介')

        for file in files:
            # print(file, os.path.join(dir, file))
            with open(os.path.join(dir, file), 'r') as f:
                data = json.load(f)
                # print(data)
                for v in data:
                    rid = self.new_entity_resource()
                    self.poetry_graph.add((rid, RDF.type, c))
                    self.poetry_graph.add((rid, name, Literal(v['诗人姓名'], lang='zh')))
Пример #3
0
    model.load_state_dict(torch.load(args.checkpoint_path)['model'],
                          strict=False)

    # Set noise schedule
    noise_schedule = torch.load(args.noise_schedule_path)
    n_iter = noise_schedule.shape[-1]
    init_fn = lambda **kwargs: noise_schedule
    init_kwargs = {'steps': n_iter}
    model.set_new_noise_schedule(init_fn, init_kwargs)

    # Trying to run inference on GPU
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)

    # Inference
    filelist = parse_filelist(args.mel_filelist)
    rtfs = []
    for mel_path in (tqdm(filelist, leave=False)
                     if args.verbose else filelist):
        with torch.no_grad():
            mel = torch.load(mel_path).unsqueeze(0).to(device)

            start = datetime.now()
            outputs = model.forward(mel, store_intermediate_states=False)
            end = datetime.now()

            outputs = outputs.cpu().squeeze()
            baseidx = os.path.basename(
                os.path.abspath(mel_path)).split('_')[-1].replace('.pt', '')
            save_path = f'{os.path.dirname(os.path.abspath(mel_path))}/predicted_{baseidx}.wav'
            torchaudio.save(save_path,
Пример #4
0
    def add_poetry(self, dir=None, format=None):
        """
        诗的信息,包括诗名、作者、诗的内容,利用这个可以添加诗句实体,
            以及诗和人物的关系、诗和诗句的关系、诗句和人物的关系、诗句和诗句的关系。
        :param dir:
        :param format:
        :return:
        """
        if dir is None:
            return None

        files = []
        if os.path.isdir(dir):
            files = utils.parse_filelist(dir, format)

        # 涉及的类
        poetry = self.get_class('诗词')
        verse_class = self.get_class('诗句')
        person = self.get_class('人物')

        # 涉及的属性
        title = self.get_data_property(poetry, '题名')
        poem_content = self.get_data_property(poetry, '内容')
        poem_author = self.get_object_property('作者', poetry, person)
        verse_author = self.get_object_property('作者', verse_class, person)
        verse_belongto_poem = self.get_object_property('属于', verse_class, poetry)
        next_verse = self.get_object_property('下一句', verse_class, verse_class)
        prev_verse = self.get_object_property('上一句', verse_class, verse_class)

        for file in files:
            # print(file, os.path.join(dir, file))
            with open(os.path.join(dir, file), 'r') as f:
                data = json.load(f)
                # print(data)
                for v in data:
                    poem = self.new_entity_resource()
                    # 添加 诗词 实体
                    self.poetry_graph.add((poem, RDF.type, poetry))
                    self.poetry_graph.add((poem, title, Literal(v['诗名'], lang='zh')))
                    self.poetry_graph.add((poem, poem_content, Literal(v['内容'], lang='zh')))
                    # 添加 诗词 与 人物 关系
                    personal = self.get_resource(person, '姓名', v['作者'])
                    if not personal:
                        personal = self.new_entity_resource()
                        self.poetry_graph.add((personal, RDF.type, person))
                        self.poetry_graph.add((personal, self.get_data_property(person, '姓名'), Literal(v['作者'], lang='zh')))
                    else:
                        self.poetry_graph.add((poem, poem_author, personal))
                    # 添加 诗句 实体以及 诗句与诗词 的关系
                    prev_v = None
                    for verse in re.split(',|。', v['内容']):
                        vres = self.new_entity_resource()
                        self.poetry_graph.add((vres, RDF.type, verse_class))
                        self.poetry_graph.add((vres, RDFS.label, Literal(verse, lang='zh')))

                        self.poetry_graph.add((vres, verse_author, personal))
                        self.poetry_graph.add((vres, verse_belongto_poem, poem))
                        if not prev_v:
                            prev_v = vres
                        else:
                            self.poetry_graph.add((prev_v, next_verse, vres))
                            self.poetry_graph.add((vres, prev_verse, prev_v))
                            prev_v = vres
Пример #5
0
    with open(args.config) as f:
        config = ConfigWrapper(**json.load(f))

    mel_fn = MelSpectrogramFixed(sample_rate=config.data_config.sample_rate,
                                 n_fft=config.data_config.n_fft,
                                 win_length=config.data_config.win_length,
                                 hop_length=config.data_config.hop_length,
                                 f_min=config.data_config.f_min,
                                 f_max=config.data_config.f_max,
                                 n_mels=config.data_config.n_mels,
                                 window_fn=torch.hann_window).cuda()

    dataset = AudioDataset(config, training=True)
    dataset.filelist_path = args.filelist
    dataset.audio_paths = parse_filelist(dataset.filelist_path)

    loader = DataLoader(dataset, batch_size=48)

    nans, infs = [], []
    for batch in tqdm(loader, total=int(np.ceil(len(dataset) / 48))):
        batch = batch.cuda()
        mels = mel_fn(batch)

        nan_mask = torch.isnan(mels)
        inf_mask = torch.isinf(mels)

        nans.append(nan_mask.sum().cpu())
        infs.append(inf_mask.sum().cpu())

    print(f'Dataset has nans: {any([item != 0 for item in nans])}')