Пример #1
0
def get_all_info(input, verbose=False):
    """
    This function is so that all information can be returned at once.
    It handles calls to get pdf link, reference information, and entry
    information. This is generally in order to limit needing to make a
    requests call three times for each function.

    For the Wiley implementation, the number of requests calls can't
    be reduced because the entry and reference information are on
    separate pages. This function still organizes the return values.

    Parameters
    ----------
    input
    verbose

    Returns
    -------

    """
    entry_info = get_entry_info(input, verbose)
    references = get_references(input, verbose)
    pdf_link = get_pdf_link(input)

    entry_dict = convert_to_dict(entry_info)



    pass
Пример #2
0
import json
import os

curpath = str(os.path.dirname(os.path.abspath(__file__)))

# Sample journal article
link = 'http://onlinelibrary.wiley.com/doi/10.1002/biot.201400046/references'
pii = '10.1002/biot.201400046'


# Run scraper on live site
entry = wy.get_entry_info(link)
refs = wy.get_references(pii)

# Make scraped entry into a dict
entry_dict = convert_to_dict(entry)

# Make a list of refs as dict objects
refs_dicts = []
for x in range(len(refs)):
    refs_dicts.append(convert_to_dict(refs[x]))


# Load saved version of content and references
with open(curpath + '/saved_sites/wy_entry.txt') as fe:
    saved_entry = fe.read()

with open(curpath + '/saved_sites/wy_references.txt') as fr:
    saved_refs = fr.read()

# Make the saved versions into dicts
Пример #3
0
    def populate_info(self):
        input = self._make_input()

        self.entry = utils.convert_to_dict(self.publisher_interface.get_entry_info(input))
        self.references = utils.refs_to_list(self.publisher_interface.get_references(input))
        self.pdf_link = self.publisher_interface.get_pdf_link(input)