Python JSON примеры использования

Язык программирования: Python

Пространство имен/Пакет: mlmodels.utils.jsonIO

Класс/Тип: JSON

Примеров на hotexamples.com: 11

Python JSON - 11 примеров найдено. Это лучшие примеры Python кода для mlmodels.utils.jsonIO.JSON, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

load(4)

JSON(3)

get_iterator(3)

_len(2)

dump(2)

get_map(1)

process_nl(1)

Пример #1

Показать файл

def rouge_extraction(read_file, write_file):
    rouge = Rouge()
    data = JSON.load(read_file)
    for line in data:
        scores = rouge.get_scores(line["data"]["text_information"]["comment"],
                                  line["data"]["text_information"]["reply"])[0]
        line["data"]["rouge_scores"] = scores
    JSON.dump(data, write_file)
    return data

Пример #2

Показать файл

 def prepare_iter(filename, firstline=True, task=2):
     # load datasets to map into indexes
     if filename.split(".")[-1] == "csv":
         data_iter = CSV.get_iterator(filename,
                                      firstline=firstline,
                                      task=task)
         num_lines = CSV._len(filename)
     elif filename.split(".")[-1] == "json":
         data_iter = JSON.get_iterator(filename, task=task)
         num_lines = JSON._len(filename)
     else:
         raise Exception("Not implement yet")
     return data_iter, num_lines

Пример #3

Показать файл

    def pack_batch(self, test_file, batch_size=8):
        ftype = test_file.split(".")[-1]
        if ftype == "json":
            data = JSON.load(test_file)
            # random.shuffle(data)
            entries = []
            for entry in data:
                if len(entries) == batch_size:
                    yield entries
                    entries = []
                entries.append(entry)
            if len(entries) != 0:
                yield entries
        elif ftype == "csv":
            data = CSV.read(test_file)
            entries = []
            for row in data:
                if len(entries) == batch_size:
                    yield entries
                    entries = []
                entry = self.prepare_entry(row[0])
                entry["gold_output"] = CSV.process_target(row[-1])
                entries.append(entry)
            if len(entries) != 0:
                yield entries

        else:
            print("not implement yet")
            return

Пример #4

Показать файл

def write_csv(data, file_name, title=""):
    # data.sort(key=lambda x: len(x[0]), reverse=False)
    file_type = file_name.split(".")[-1]
    if file_type == "json":
        JSON.dump(data, file_name)
    else:
        with open(file_name, "w", newline='') as f:
            if file_name.split(".")[-1] != "csv":
                if len(title) != 0:
                    f.write(title + "\n")
                for line in data:
                    f.write(line + "\n")
            else:
                writer = csv.writer(f, delimiter=",")
                if len(title) != 0:
                    writer.writerow(title)
                writer.writerows(data)

Пример #5

Показать файл

def read_data(filename, firstline=True):
    # load datasets to map into indexes
    if filename.split(".")[-1] == "csv":
        data = CSV.read(filename, firstline=firstline, slices=[0, 1])
    elif filename.split(".")[-1] == "txt":
        data = TXT.read(filename, firstline=firstline)
    elif filename.split(".")[-1] == "json":
        data = JSON.load(filename)
    else:
        raise Exception("Not implement yet")
    return data

Пример #6

Показать файл

 def load_file(files, firstline=True, task=2):
     datasets = []
     for fname in files:
         # Read input files
         if fname.split(".")[-1] == "csv":
             datasets.append(
                 CSV(fname, limit=-1, firstline=firstline, task=task))
         elif fname.split(".")[-1] == "json":
             datasets.append(JSON(fname, limit=-1, task=task))
         else:
             raise Exception("Not implement yet")
     return datasets

Пример #7

Показать файл

    def build(self, files, limit=-1, firstline=True):
        """
        Read a list of file names, return vocabulary
        :param files: list of file names
        :param limit: read number of lines
        """
        swcnt, swl = Counter(), 0
        twcnt, twl = Counter(), 0
        count = 0

        for fname in files:
            # Read input files
            if fname.split(".")[-1] == "csv":
                raw = CSV(fname, limit=limit, firstline=firstline)
            elif fname.split(".")[-1] == "json":
                raw = JSON(fname, source2idx=None, target2idx=None, limit=-1)
            else:
                raise Exception("Not implement yet")

            for line in raw:
                count += 1
                (nl, target) = line
                nl = Vocab.process_nl(nl)
                target = Vocab.process_target(target)
                swcnt, swl = Vocab.update_sent(nl, swcnt, swl)
                twcnt, twl = Vocab.update_sent(target, twcnt, twl)

        swvocab = Vocab.update_vocab(swcnt, self.swcutoff, sys_tokens)

        twvocab = Vocab.update_vocab(twcnt, self.twcutoff, sys_tokens)

        self.sw2i = swvocab
        self.i2sw = Vocab.reversed_dict(swvocab)
        self.swl = swl if self.swl < 0 else min(swl, self.swl)

        self.tw2i = twvocab
        self.i2tw = Vocab.reversed_dict(twvocab)
        self.twl = twl if self.twl < 0 else min(twl, self.twl)

        print("\t- Extracting vocabulary: %d total samples" % count)

        print("\t\t- Natural Language Side: ")
        print("\t\t\t- %d total words" % (sum(swcnt.values())))
        print("\t\t\t- %d unique words" % (len(swcnt)))
        print("\t\t\t- %d unique words appearing at least %d times" %
              (len(swvocab) - 4, self.swcutoff))
        print("\t\t- Label Side: ")
        print("\t\t\t- %d total words" % (sum(twcnt.values())))
        print("\t\t\t- %d unique words" % (len(twcnt)))
        print("\t\t\t- %d unique words appearing at least %d times" %
              (len(twvocab) - 4, self.twcutoff))

Пример #8

Показать файл

def write_dataset(data_file,
                  train_file,
                  val_file,
                  test_file,
                  tr_ratio=0.9,
                  val_ratio=0.95,
                  shuffle=True,
                  readfirstline=False,
                  writefirstline=False):

    title = ""
    file_type = data_file.split(".")[-1]
    if file_type == "json":
        corpus = JSON.load(data_file)
    else:
        corpus = set()
        with open(data_file, "r") as f:
            if file_type == "csv":
                csvreader = csv.reader(f)
                if readfirstline:
                    title = next(csvreader)
                for line in csvreader:
                    corpus.update([tuple([line[0], line[-1]])])
            else:
                if readfirstline:
                    title = next(f)
                for line in f:
                    corpus.update([line.strip()])
            corpus = list(corpus)
    train_len = int(tr_ratio * len(corpus))
    val_len = int(val_ratio * len(corpus))
    if shuffle:
        np.random.shuffle(corpus)
        train, val, test = np.split(corpus, [train_len, val_len])
        train = train.tolist()
        val = val.tolist()
        test.tolist()
    else:
        train = corpus[:train_len]
        val = corpus[train_len:val_len]
        test = corpus[val_len:]

    if not writefirstline:
        title = ""
    if len(train) != 0:
        write_csv(train, train_file, title)
    if len(val) != 0:
        write_csv(val, val_file, title)
    if len(test) != 0:
        write_csv(test, test_file, title)

Пример #9

Показать файл

 def prepare_entry(task, rv_text, rv_rate, rv_name, rv_title, rv_hotel, rouge_score=None):
     if task == "sentiment":
         prompt_text = rv_text
     elif task == "paraphrase":
         prompt_text = rv_text
     else:
         if rouge_score is not None:
             # prompt_text = " ".join([rouge_score, SENSP, rv_rate.lower(), SENSP, rv_hotel.lower(), SENSP,
             #                         rv_name.lower(), SENSP, rv_title.lower(), SENSP, rv_text.lower(), SENGE])
             prompt_text = " ".join([rouge_score, rv_hotel.lower(), rv_name.lower(), rv_text.lower(), SENGE])
         else:
             # prompt_text = " ".join([rv_rate.lower(), SENSP, rv_hotel.lower(), SENSP, rv_name.lower(), SENSP,
             #                         rv_title.lower(), SENSP, rv_text.lower(), SENGE])
             prompt_text = " ".join([rv_hotel.lower(), rv_name.lower(), rv_text.lower(), SENGE])
     prompt_text = JSON.process_nl(prompt_text)
     return prompt_text

Пример #10

Показать файл

        tg2ids = BPE.tokens2ids(tokenizer,
                                sos=False,
                                eos=False,
                                add_special_tokens=False)

        pad_id = tokenizer.token_to_id(BPAD) if tokenizer.token_to_id(
            BPAD) else 0

    collate_fn = BPE.collate_fn(pad_id, True)

    # load datasets to map into indexes
    if filename.split(".")[-1] == "csv":
        train_data = CSV.get_iterator(filename, firstline=True, task=2)
        num_lines = CSV._len(filename)
    elif filename.split(".")[-1] == "json":
        train_data = JSON.get_iterator(filename, task=2)
        num_lines = JSON._len(filename)
    else:
        raise Exception("Not implement yet")

    train_iterdataset = IterDataset(train_data,
                                    source2idx=nl2ids,
                                    target2idx=lb2ids,
                                    num_lines=num_lines,
                                    bpe=True)
    train_dataloader = DataLoader(train_iterdataset,
                                  pin_memory=True,
                                  batch_size=8,
                                  collate_fn=collate_fn)

    for i, batch in enumerate(train_dataloader):

Пример #11

Показать файл

    filename = "/media/data/review_response/Dev.json"

    s_paras = [-1, 1]
    t_paras = [-1, 1]

    vocab = Vocab(s_paras, t_paras)
    vocab.build([filename])

    nl2ids = vocab.lst2idx(vocab_words=vocab.sw2i, unk_words=True, eos=True)

    tg2ids = vocab.lst2idx(vocab_words=vocab.tw2i,
                           unk_words=False,
                           sos=True,
                           eos=True)

    train_data = JSON(filename, source2idx=nl2ids, target2idx=tg2ids)
    # train_data = Csvfile(filename)

    data_idx = []
    batch = 8
    for d in Vocab.minibatches(train_data, batch):
        data_idx.append(d)
        nl, target = list(zip(*d))

        nl_pad_ids, nl_lens = seqPAD.pad_sequences(nl,
                                                   pad_tok=vocab.sw2i[PAD],
                                                   nlevels=1)
        nl_tensor = Data2tensor.idx2tensor(nl_pad_ids,
                                           dtype=torch.long,
                                           device=device)
        nl_len_tensor = Data2tensor.idx2tensor(nl_lens,