Пример #1
0
def consulta(lista_isbns, servico):
    formatador_json = bibformatters['json']
    isbn_dict = {}

    if servico == 'gbooks':
        for isbn in lista_isbns:
            try:
                data = isbnlib.meta(isbn, service='goob')
                isbn_dict[str(isbn)] = formatador_json(data)
            except isbnlib.dev.DataNotFoundAtServiceError:
                print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico))    
            except isbnlib.dev._exceptions.NoDataForSelectorError:
                print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico))    
    elif servico == 'openl':
        for isbn in lista_isbns:
            try:
                a = isbn
                a = a.strip()
                data = isbnlib.meta(isbn, service = 'openl')
                isbn_dict[str(isbn)] = formatador_json(data)
            except isbnlib.dev.DataNotFoundAtServiceError:                    
                print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico))
    elif servico == 'crref':
        for isbn in lista_isbns:
            a = isbn
            a = a.strip()
            works = Works()
            l = works.filter(isbn = str(a))
            b = []
            for item in l:
                b.append((item))
Пример #2
0
 def all_articles(self):
     articles = []
     try:
         works = Works(etiquette=Prompt.etiquette)
         if __debug__:
             log(f'asking Crossref for all works by {self.doi_prefix}')
         for item in works.filter(prefix=self.doi_prefix):
             doi = item.get('DOI', '')
             title = item.get('title', [''])[0]
             online = item.get('published-online', None)
             if not online or 'date-parts' not in online:
                 if __debug__:
                     log(f'skipping {doi} lacking published-online')
                 continue
             else:
                 date = '-'.join(
                     format(x, '02') for x in online['date-parts'][0])
                 if __debug__:
                     log(f'keeping publication {doi} dated {date}')
             pdf = pdf_link(item.get('link', []))
             jats = ''
             image = ''
             basename = tail_of_doi(doi)
             status = 'complete' if all([pdf, doi, title, date
                                         ]) else 'incomplete'
             articles.append(
                 Article(self.issn, doi, date, title, basename, pdf, jats,
                         image, status))
     except Exception as ex:
         if __debug__: log(f'crossref API exception: {str(ex)}')
         raise ServerError(f'Failed to get data from Crossref: {str(ex)}')
     return articles
Пример #3
0
def main(list_):
    books = []
    crossref_works = Works()

    with open(list_) as f:
        lines = [line.rstrip() for line in f]
        for line in lines:
            book_object = create_book_metadata_obejct(line)
            books.append(book_object)

    for book in books:
        for result in crossref_works.filter(isbn=book['isbn'])\
                                    .sample(100)\
                                    .select('title', 'ISBN', 'link', 'author'):
            try:
                if book['isbn'] in result['ISBN']\
                    and book['title'] in result['title']:

                    if result.get('author'):
                        for author in result['author']:
                            name = f"{author['given']} {author['family']}"
                            book['authors'].append(name)

                    if result.get('link'):
                        for link in result['link']:
                            book['access_link'] = link['URL']

            except KeyError as err:
                print(err)
                pass

        sleep(0.5)

    normalised_json = json_normalize(books, sep=',')
    normalised_json.to_excel(f'results_{list_}.xlsx',
                             verbose=True,
                             sheet_name='books',
                             encoding='utf-8')
Пример #4
0
class Metadata:
    '''
    This class retrieve and organise book and chapters metadata
    associated to the user given ISBN.
    '''
    def __init__(self, isbn):
        self.works = Works()
        self.isbn = isbn

        # Get book metadata
        self.book_metadata = self.get_book_metadata()
        self.chapters_data = self.get_chapters_data()

    def get_book_metadata(self):
        '''
        Get book metadata associated to the supplied ISBN
        '''
        return self.works.filter(isbn=self.isbn).select('title', 'DOI', 'type')

    def get_chapters_data(self):
        '''
        Returns a python list of dictionaries with the book chapter data.
        '''
        book_data = [d for d in self.book_metadata]
        book_title = book_data[0]['title'][0]

        chapters_data = self.works.filter(container_title=book_title,
                                          type='book-chapter') \
                                  .select('DOI', 'license', 'author',
                                          'title', 'type', 'page',
                                          'publisher', 'container-title',
                                          'abstract')

        # Assert that at least one DOI have been discovered
        if not chapters_data:
            raise AssertionError('Couldn\'t find any chapter-level DOIs' +
                                 ' for the supplied --isbn value')

        return chapters_data

    def get_doi_suffix(self):
        '''
        Return the book DOI suffix (string)
        '''

        book_types = ['monograph', 'edited-book', 'book']

        book_doi = [
            item['DOI'] for item in self.book_metadata
            if item['type'] in book_types
        ]

        if not book_doi:
            raise AssertionError('Couldn\'t find book DOI')

        return book_doi[0].split('/')[1]

    @staticmethod
    def get_author_name(data, position):
        """
        Returns author name (if specified for the given position)
        """

        name = ''
        if len(data['author']) > position:
            name = '{} {}'\
                   .format(data['author'][position]['given'],
                           data['author'][position]['family'])
        return name

    @staticmethod
    def write_metadata(chapter_data, output_file_path):
        """
        Writes metadata to file_name
        """

        arguments = ['-Title={}'.format(chapter_data['title'][0]),

                     '-Author={}'.format(Metadata
                                         .join_author_names(chapter_data)),

                     # Add publisher to the dc:publisher field
                     '-Publisher={}'.format(chapter_data['publisher']),

                     '-ModDate={}'.format(datetime.now()
                                          .strftime("%Y:%m:%d %T")),

                     # Add Abstract in the dc:description field
                     '-Description={}'.format(chapter_data \
                                              .get('abstract', '')),

                     # Add a copyright notice in the dc:rights field
                     '-Copyright={}'.format(Metadata.get_rights(chapter_data)),

                     # Add DOI to the dc:identifier field
                     '-Identifier={}'.format(chapter_data['DOI']),

                     # Add format to the dc:format field
                     '-Format={}'.format('application/pdf'),

                     # Add date to the dc:date field
                     '-Date={}'.format(datetime.now()
                                       .strftime("%Y:%m:%d")),

                     # Add language to the dc:language field
                     '-Language={}'.format('en')]

        cmd = ['exiftool']
        cmd.append('-q')
        cmd.extend(arguments)
        cmd.append(output_file_path)

        run(cmd)
        print('{}: Metadata written'.format(path.split(output_file_path)[1]))

    @staticmethod
    def get_rights(chapter_data):
        '''
        Compose a simple copyright statement, just like:
        '© John Doe https://creativecommons.org/licenses/by/2.0/'

        Author name and licence link are pulled out from chapter_data
        '''

        data = {
            'authors_names': Metadata.join_author_names(chapter_data),
            'copyright_url': chapter_data.get('license', 'n.d.')[0]['URL']
        }

        rights_str = '© {authors_names} {copyright_url}'.format(**data)

        return rights_str

    @staticmethod
    def join_author_names(chapter_data):
        """
        Returns a string with author names, separated by semicolon
        """
        # Make a list with author names, i.e. ['Jhon Doe', '']
        authors = [
            Metadata.get_author_name(chapter_data, 0),
            Metadata.get_author_name(chapter_data, 1),
            Metadata.get_author_name(chapter_data, 2)
        ]

        # Return a string with the names, filtering empty fields
        return '; '.join(filter(None, authors))