Python read_json_list примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils

Метод/Функция: read_json_list

Примеров на hotexamples.com: 7

Python read_json_list - 7 примеров найдено. Это лучшие примеры Python кода для utils.read_json_list, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: word_count.py Проект: vyraun/idea_relations

def get_word_count(input_file,
                   filter_stopwords=True,
                   ngrams=1,
                   bigram_dict=None,
                   words_func=None):
    result = collections.defaultdict(int)
    for data in utils.read_json_list(input_file):
        words = words_func(data["text"],
                           ngrams=ngrams,
                           filter_stopwords=filter_stopwords,
                           bigram_dict=bigram_dict)
        for w in words:
            result[w] += 1
    return result

Пример #2

Показать файл

def load_doc_topics(input_file, doc_topic_file, threshold=0.01):
    """Load topics in each document"""
    articles = []
    with open(doc_topic_file) as tfin:
        for data in utils.read_json_list(input_file):
            topic_line = tfin.readline()
            if not topic_line:
                break
            ideas = topic_line.strip().split()[2:]
            ideas = set(
                [i for (i, v) in enumerate(ideas) if float(v) > threshold])
            articles.append(
                utils.IdeaArticle(fulldate=int(data["date"]), ideas=ideas))
    return articles

Пример #3

Показать файл

def convert_word_count_mallet(word_dict,
                              input_file,
                              output_file,
                              words_func=None):
    doc_id = 0
    with open(output_file, "w") as fout:
        for data in utils.read_json_list(input_file):
            doc_id += 1
            words = collections.Counter(words_func(data["text"]))
            words = [(word_dict[w], words[w]) for w in words if w in word_dict]
            words.sort()
            word_cnts = [" ".join([str(wid)] * cnt) for (wid, cnt) in words]
            fout.write("%s %s %s\n" %
                       (doc_id, data["date"], " ".join(word_cnts)))

Пример #4

Показать файл

Файл: fighting_lexicon.py Проект: jerryyao-uofc/NLP_Project

def load_word_articles(input_file, vocab_file, data_dir, vocab_size=100):
    articles = []
    word_map = utils.read_word_dict(vocab_file, vocab_size=vocab_size)
    word_set = utils.get_reverse_dict(word_map)
    bigram_file = "%s/bigram_phrases.txt" % data_dir
    bigram_dict = wc.load_bigrams(bigram_file)
    words_func = functools.partial(wc.get_mixed_tokens,
                                   bigram_dict=bigram_dict)
    for data in utils.read_json_list(input_file):
        words = words_func(data["text"])
        words = set([word_set[w] for w in words if w in word_set])
        articles.append(
            utils.IdeaArticle(fulldate=int(data["date"]), ideas=words))
    return articles, word_set, word_map

Пример #5

Показать файл

Файл: mallet_topics.py Проект: jerryyao-uofc/NLP_Project

def convert_word_count_mallet(word_dict, input_file, output_file,
                              words_func=None):
    doc_id = 0
    if not os.path.exists(output_file):
        with open(output_file, "w") as fout:
            for data in utils.read_json_list(input_file):
                doc_id += 1
                words = collections.Counter(words_func(data["text"]))
                words = [(word_dict[w], words[w])
                        for w in words if w in word_dict]
                words.sort()
                word_cnts = [" ".join([str(wid)] * cnt) for (wid, cnt) in words]
                fout.write("%s %s %s\n" % (doc_id, data["date"], " ".join(word_cnts)))
    else:
        print("convert_word_count_mallet: output file found at: {}, skipping".format(output_file))

Пример #6

Показать файл

Файл: mallet_topics.py Проект: jerryyao-uofc/NLP_Project

def load_doc_topics(input_file, doc_topic_file, threshold=0.01):
    """Load topics in each document"""
    articles = []
    # fd = open(doc_topic_output_file, "w")
    # print("opening {}".format(doc_topic_output_file))
    with open(doc_topic_file) as tfin:
        for data in utils.read_json_list(input_file):
            topic_line = tfin.readline()
            if not topic_line:
                break
            ideas = topic_line.strip().split()[2:]
            ideas = set([i for (i, v) in enumerate(ideas)
                         if float(v) > threshold])
            articles.append(utils.IdeaArticle(fulldate=int(data["date"]),
                                         ideas=ideas))
    #         fd.write('{},"{}"\n'.format(int(data["date"]), list(ideas)))
    #         print('{},"{}"\n'.format(int(data["date"]), list(ideas)))
    # fd.close()
    return articles

Пример #7

Показать файл

def preprocess_input(input_file, output_file, func=tokenize):
    data = []
    for d in utils.read_json_list(input_file):
        d["text"] = " ".join(func(d["text"]))
        data.append(d)
    utils.write_json_list(output_file, data)