Пример #1
0
def calculate_popularity(file_path):
    references_count = Counter()
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print("cp %d" % idx)
        references_count[ref.cited_id] += 1
    return references_count
def calculate_popularity(file_path):
    references_count = Counter()
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print("cp %d" % idx)
        references_count[ref.cited_id] += 1
    return references_count
Пример #3
0
def count_paragraph_titles_for_citations(file_path):
    id_titles_dict = defaultdict(Counter) #e.g. {"1233242":{"introduction":1234. "background":123}}
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        title = ref.title.lower()
        id_titles_dict[ref.cited_id][title] += 1
    return id_titles_dict
Пример #4
0
def find_cits_with_sentiment_words(file_path):
    ret = set()
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        tokens = set(smart_tokenize(ref.get_sentence()))
        intersect = tokens.intersection(sentiment_words)
        if len(intersect) <> 0:
            ret.add(ref, list(intersect))
    return ret
def find_cits_with_sentiment_words(file_path):
    ret = set()
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        tokens = set(smart_tokenize(ref.get_sentence()))
        intersect = tokens.intersection(sentiment_words)
        if len(intersect) <> 0:
            ret.add(ref, list(intersect))
    return ret
Пример #6
0
def count_words_in_context(file_path, popular_pmids):
    word_count = defaultdict(Counter)
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        if ref.cited_id not in popular_pmids:
            continue
        citing_sentence = ref.get_sentence(0)
        tokens = smart_tokenize(citing_sentence)
        word_count[ref.cited_id] += Counter(tokens)
    return word_count
def count_words_in_context(file_path, popular_pmids):
    word_count = defaultdict(Counter)
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        if ref.cited_id not in popular_pmids:
            continue
        citing_sentence = ref.get_sentence(0)
        tokens = smart_tokenize(citing_sentence)
        word_count[ref.cited_id] += Counter(tokens)
    return word_count