Exemplo n.º 1
0
def _collate_fn(batch, token2idx, label_type, LFR_m=1, LFR_n=1):
    """
    Args:
        batch: list, len(batch) = 1. See AudioDataset.__getitem__()
    Returns:
        xs_pad: N x Ti x D, torch.Tensor
        ilens : N, torch.Tentor
        ys_pad: N x To, torch.Tensor
    """
    # batch should be located in list
    assert len(batch) == 1
    batch = load_inputs_and_targets(batch[0],
                                    token2idx,
                                    label_type,
                                    LFR_m=LFR_m,
                                    LFR_n=LFR_n)
    xs, ys = batch

    # TODO: perform subsamping

    # get batch of lengths of input sequences
    ilens = np.array([x.shape[0] for x in xs])

    # perform padding and convert to tensor
    xs_pad = pad_list([torch.from_numpy(x).float() for x in xs], 0)
    ilens = torch.from_numpy(ilens)
    ys_pad = pad_list([torch.from_numpy(y).long() for y in ys], 0)

    return xs_pad, ilens, ys_pad
Exemplo n.º 2
0
def f_xy_pad(batch):
    xs_pad = pad_list([torch.tensor(sample[0]).long() for sample in batch[0]],
                      0)
    ys_pad = pad_list([torch.tensor(sample[1]).long() for sample in batch[0]],
                      0)
    # xs_pad = pad_to_batch([sample for sample in batch[0][0]], 0)
    # ys_pad = pad_to_batch([sample for sample in batch[0][1]], 0)

    return xs_pad, ys_pad
Exemplo n.º 3
0
def _collate_fn(batch, LFR_m=1, LFR_n=1, model_choose='baseline3'):
    """
    Args:
        batch: list, len(batch) = 1. See AudioDataset.__getitem__()
    Returns:
        xs_pad: N x Ti x D, torch.Tensor
        ilens : N, torch.Tentor
        ys_pad: N x To, torch.Tensor
    """
    # batch should be located in list
    assert len(batch) == 1
    batch = load_inputs_and_targets(batch[0], LFR_m=LFR_m, LFR_n=LFR_n)
    xs, dialect_labels = batch

    import math

    if model_choose in ['baseline2', 'baseline4']:
        ilens = np.array([int(math.ceil(x.shape[0] / 4)) for x in xs])
    else:
        ilens = np.array([int(math.ceil(x.shape[0])) for x in xs])

    # perform padding and convert to tensor
    xs_pad = pad_list([torch.from_numpy(x).float() for x in xs], 0)

    ilens = torch.from_numpy(ilens)

    dialect_labels = torch.from_numpy(dialect_labels)
    return xs_pad, ilens, dialect_labels
Exemplo n.º 4
0
 def preprocess(self, padded_input):
     """Generate decoder input and output label from padded_input
     Add <sos> to decoder input, and add <eos> to decoder output label
     """
     ys = [y[y != IGNORE_ID] for y in padded_input]  # parse padded ys
     # prepare input and output word sequences with sos/eos IDs
     eos = ys[0].new([self.eos_id])
     sos = ys[0].new([self.sos_id])
     ys_in = [torch.cat([sos, y], dim=0) for y in ys]
     ys_out = [torch.cat([y, eos], dim=0) for y in ys]
     # padding for ys with -1
     # pys: utt x olen
     ys_in_pad = pad_list(ys_in, self.eos_id)
     ys_out_pad = pad_list(ys_out, IGNORE_ID)
     assert ys_in_pad.size() == ys_out_pad.size()
     return ys_in_pad, ys_out_pad
def extract_content_gsc(file_name):
    tree = html.fromstring(parse_file(file_name))
    title = tree.xpath('//h1[@class="product_title entry-title"]/text()')[0]
    price_from = tree.xpath(
        '//div[@class="summary entry-summary"]/p/span[1]/span/text() | //div[@class="summary entry-summary"]/p/span[1]/text()')
    price_from = "".join(price_from)

    price_to = tree.xpath(
        '//div[@class="summary entry-summary"]/p/span[2]/span/text() | //div[@class="summary entry-summary"]/p/span[2]/text()')
    price_to = "".join(price_to)

    description = tree.xpath('//div[@class="woocommerce-product-details__short-description"]/p/text()')
    description = ("".join(description)).replace("\n", "")

    category = tree.xpath('//span[@class="posted_in"]/a/text()')[0]

    tags = tree.xpath('//span[@class="tagged_as"]/a/text()')

    attributes = [attr.strip() for attr in
                  tree.xpath('//table[@class="table table-hover variations"]/thead/tr/th[not(@*)]/text()')]
    var_attr = {}
    for attr in attributes:
        var_attr[attr] = tree.xpath(
            f'//table[@class="table table-hover variations"]/tbody/tr/td[@data-title="{attr}"]/text()')

    separate_list_prices = tree.xpath(
        '//span[@class="price"]//span[1]/span/text() | //span[@class="price"]//span[1]/text()')

    c = 0
    separate_list_price = []
    while c < len(separate_list_prices) - 1:
        separate_list_price.append(f"{separate_list_prices[c]}{separate_list_prices[c + 1]}")
        c += 2

    separate_discount_prices = pad_list(tree.xpath('//ins/span/span/text() | //ins/span/text()'), var_attr["Model"],
                                        "currency_xpath")
    c = 0
    separate_discount_price = []
    while c < len(separate_discount_prices) - 1:
        separate_discount_price.append(f"{separate_discount_prices[c]}{separate_discount_prices[c + 1]}")
        c += 2

    variations = zip(separate_list_price, separate_discount_price)
    results = generate_json_gsc(title, price_from, price_to, description, category, tags, var_attr, variations)
    print(json.dumps(results, indent=4))
Exemplo n.º 6
0
def f_x_pad(batch):
    return pad_list([torch.tensor(sample).long() for sample in batch[0]], 0)