Python Paper.update примеры использования

Язык программирования: Python

Пространство имен/Пакет: models

Класс/Тип: Paper

Метод/Функция: update

Примеров на hotexamples.com: 2

Python Paper.update - 2 примера найдено. Это лучшие примеры Python кода для models.Paper.update, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Paper(24)

save(10)

select(9)

title(7)

get(4)

create(4)

find(4)

doi(3)

abstract(3)

findAll(3)

set_self_citations(2)

publisher(2)

jeeves_get_private_author(2)

id(2)

get_by_id(2)

findNumber(2)

update(2)

authors(2)

all(2)

doctype(1)

author(1)

publish_origin(1)

publish_time(1)

publish_year(1)

bibcode(1)

put(1)

self_score(1)

pubdate(1)

arxiv_identifier(1)

start_referencing(1)

table_exists(1)

tag(1)

to_json(1)

type(1)

publication(1)

permalink(1)

delete_papers(1)

book_title(1)

create_table(1)

comment(1)

cited_num(1)

first_author(1)

full_clean(1)

cite_num(1)

citation_num(1)

_make(1)

pdf_downloaded(1)

isbn(1)

bibtex(1)

journal(1)

Пример #1

Показать файл

def download_extract(paper, extract_figure=False, extract_table=False):
    if paper.pages >= 0 and paper.table >= 0:
        return False
    paper_info = {
        'pdf_url': paper.url,
        'title': paper.title,
    }
    api_paper = arxiv.query(id_list=[paper.arvixID])[0]
    if 'pdf_url' not in api_paper:
        return False
    pdf_url = api_paper['pdf_url']
    # pdf_url = 'https://arxiv.org/pdf/' + paper.url.split('/')[-1] +'.pdf'
    file_path = os.path.join(store_path, paper.paperId + '.pdf')
    # if not os.path.isfile(file_path):
    urllib.request.urlretrieve(pdf_url, file_path)

    if extract_table:
        df = wrapper.read_pdf(file_path, multiple_tables=True, pages='all')
        table_count = len(df)
        del df

    if extract_figure:
        figure_count, page_count = get_figure_count(file_path)
        modified = False
        if paper.pages == -1:
            modified = True
            paper.pages = page_count
        else:
            page_count = paper.pages
        if paper.table == -1:
            modified = True
            paper.table = table_count
        if os.path.exists(file_path):
            os.remove(file_path)
        if modified:
            Paper.update(table=table_count, pages=page_count).where(
                Paper.arvixID == paper.arvixID).execute()
            # paper.save()
            return modified
    # api_paper = arxiv.query(id_list=[paper.arvixID])[0]
    # if 'pdf_url' not in api_paper:
    #     return False
    # pdf_url = api_paper['pdf_url']
    texts = extract_text(file_path, pdf_url)
    if texts is None:
        print("PDF either do not exists or failed : ", paper.url)
        return False
    affiliation = []
    for text in texts.split():
        if re.match("[^@]+@[^@]+\.[^@]+", text):
            domain_name = text.split('@')[-1]
            affiliation.append(domain_name)
    if len(affiliation) > 0:
        Paper.update(affiliation=affiliation).where(
            Paper.arvixID == paper.arvixID).execute()

    return False

Пример #2

Показать файл

def crawl_category(term='cs.LG'):
    index_iteration = 500
    logging.info("Crawling category : %s", term)
    for index in range(start_index, end_index, index_iteration):
        logging.info("\nBatch : %d-%d" % (index, index + index_iteration))
        articles = arxivpy.query(search_query=[term],
                                 start_index=index,
                                 max_index=index + index_iteration,
                                 results_per_iteration=index_iteration,
                                 wait_time=0.2,
                                 sort_by='lastUpdatedDate')
        article_batch_count = len(articles)
        if article_batch_count == 0:
            logging.warning('Article not found in batch %d - %d' %
                            (index, index + index_iteration))
        for idx, article in tqdm(enumerate(articles),
                                 total=article_batch_count):
            arvixID = article['id'].split('v')[0]
            query = Paper.select().where(Paper.arvixID == arvixID)
            if query.exists():
                paper = Paper.get(Paper.arvixID == arvixID)
                categories = paper.category
                if term not in categories:
                    categories.append(term)
                Paper.update(category=categories).where(
                    Paper.arvixID == arvixID).execute()
                continue
            success, article_meta = get_arvixpaper_semantic_scholar(arvixID)
            if success is False:
                logging.debug(
                    "Paper not exists in semantic scholar, arvixID : %s" %
                    arvixID)
                continue
            authorIDList = [
                int(author['authorId'])
                if author['authorId'] is not None else -1
                for author in article_meta['authors']
            ]
            authorNames = [article['main_author']]
            authorCount = len(article_meta['authors'])
            if authorCount > 1:
                other_author = [
                    name.strip() for name in article['authors'].split(',')
                    if len(name) > 1 and name != article['main_author']
                ]
                authorNames += other_author
            paper_category = [article['term']]
            if article['term'] != term:
                paper_category.append(term)
            try:
                paper = Paper.create(
                    indexID=idx + index,
                    arvixID=arvixID,
                    paperId=article_meta['paperId'],
                    doiID=str(article_meta['doi']),
                    title=article['title'],
                    summary=article['abstract'],
                    category=paper_category,
                    comments=article['comment'],
                    journal_ref=article['journal_ref'],
                    url=article['url'],
                    authorID=authorIDList,
                    authorName=authorNames,
                    authorCount=authorCount,
                    publishedDate=article['publish_date'],
                    citationVelocity=article_meta['citationVelocity'],
                    referencesCount=len(article_meta['references']),
                    topics=article_meta['topics'],
                    venue=str(article_meta['venue']),
                    year=article_meta['year'],
                    influentialCitationCount=article_meta[
                        'influentialCitationCount'],
                    citationCount=len(article_meta['citations']),
                    citations=article_meta['citations'],
                )
                try:
                    for meta in ['page', 'figure', 'table']:
                        if meta in article['comment']:
                            comment = article['comment'].replace(';', ',')
                            for segment in comment.split(','):
                                if meta in segment:
                                    page_prefix = segment.split(meta)[0]
                                    if meta == 'page':
                                        paper.pages = int(page_prefix.strip())
                                    elif meta == 'figure':
                                        paper.figures = int(
                                            page_prefix.strip())
                                    elif meta == 'table':
                                        paper.table = int(page_prefix.strip())
                                    break
                except:
                    logging.debug("Error in parsing meta data")
                paper.save()
            except BaseException as e:
                logging.warning("Error in arvix id %s, error: %s" %
                                (arvixID, str(e)))
            time.sleep(0.3)