Пример #1
0
def data(verbose=True):
    """
    Get a list of skid pdfs which have authors annotated.
    """
    for filename in iterview(CACHE.glob('*.pdf')):
        d = Document(filename)
        meta = d.parse_notes()
        if meta['author']:
            if verbose:
                ff = ' file://' + filename
                print
                print red % ('#' + '_' * len(ff))
                print red % ('#' + ff)
                print
                print('%s: %s' %
                      (yellow % 'meta', meta['title'])).encode('utf8')
                print('%s: %s' % (yellow % 'meta', ' ; '.join(
                    meta['author']))).encode('utf8')
                print
            try:
                yield (meta, d, pdfminer(filename))
            except Exception:
                # XXX: silently skips examples which cause pdfminer to throw an
                # exception.
                pass
Пример #2
0
def data():
    ix = defaultdict(list)
    docs = []  # documents with authors annotated

    for filename in CACHE.glob('*.pdf'):
        d = Document(filename)
        d.meta = d.parse_notes()
        authors = d.meta['author']
        if authors:
            docs.append(d)
            for x in authors:
                ix[simplify(x)].append(d)

    return ix, docs
Пример #3
0
def data():
    ix = defaultdict(list)
    docs = []  # documents with authors annotated

    for filename in CACHE.glob('*.pdf'):
        d = Document(filename)
        d.meta = d.parse_notes()
        authors = d.meta['author']
        if authors:
            docs.append(d)
            for x in authors:
                ix[simplify(x)].append(d)

    return ix, docs
Пример #4
0
def data(verbose=True):
    """
    Get a list of skid pdfs which have authors annotated.
    """
    for filename in iterview(CACHE.glob('*.pdf')):
        d = Document(filename)
        meta = d.parse_notes()
        if meta['author']:
            if verbose:
                ff = ' file://' + filename
                print
                print red % ('#' + '_' *len(ff))
                print red % ('#' + ff)
                print
                print ('%s: %s' % (yellow % 'meta', meta['title'])).encode('utf8')
                print ('%s: %s' % (yellow % 'meta', ' ; '.join(meta['author']))).encode('utf8')
                print
            yield (meta, d, pdfminer(filename))
Пример #5
0
def data(verbose=True):
    """
    Get a list of skid pdfs which have authors annotated.
    """
    for filename in iterview(CACHE.glob('*.pdf')):
        d = Document(filename)
        meta = d.parse_notes()
        if meta['author']:
            if verbose:
                ff = ' file://' + filename
                print()
                print(colors.red % ('#' + '_' *len(ff)))
                print(colors.red % ('#' + ff))
                print()
                print(('%s: %s' % (colors.yellow % 'meta', meta['title'])).encode('utf8'))
                print(('%s: %s' % (colors.yellow % 'meta', ' ; '.join(meta['author']))).encode('utf8'))
                print()
            try:
                yield (meta, d, pdfminer(filename))
            except Exception:
                # XXX: silently skips examples which cause pdfminer to throw an
                # exception.
                pass