Exemplo n.º 1
0
def pybtex_to_pieberry(key, ent):
    '''Transform pybtex Entry class to a PieObject'''
    obj = PieObject()
    modfields = {}
    for bkey, val in ent.fields.items():
        if bibtexmap.has_key(bkey):
            modfields[bibtexmap[bkey]] = ublc(val)
    obj.add_aspect_bibdata(**modfields)
    obj.BibData_Key = key
    if not ent.fields.has_key('title'):
        if ent.fields.has_key('booktitle'):
            obj.title = ublc(ent.items['booktitle'])
        else:
            raise ValueError, 'No title in this entry'
    for persons in ent.persons.itervalues():
        for person in persons:
            person.text = unicode(person)
    formatter = Formatter()
    formatted_names = formatter.format_people(ent)
    # formatted_names = formatter.format_author_or_editor(ent)
    rendered_names = formatted_names.render(plaintext.Backend()).rstrip('.')
    corpnamehere = re_corpname.match(rendered_names)
    if not ent.fields.has_key('year'):
        raise Exception, "No valid date for this item"
    if not ent.fields.has_key('month'):
        ent.fields['month'] = 'January'
    datestr = '01 %s %s' % (ent.fields['month'], ent.fields['year'])
    obj.BibData_DatePublished = datetime.datetime.strptime(datestr, '%d %B %Y')
    if corpnamehere:
        rendered_names = '%s%s' % (corpnamehere.group(1), corpnamehere.group(2))
        obj.corpauthor = rendered_names
    else:
        obj.author = rendered_names
    obj.BibData_Type = ent.type
    return obj
Exemplo n.º 2
0
def pypdf_object(fn):
    if sys.platform == 'win32':
        newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn))
        if os.path.isfile(newfn):
            newfn = auto_increment_fn(newfn)
        shutil.copyfile(fn, newfn)
        fn = newfn
    data = pypdf_metadata(fn)
    obj = PieObject(title=data['title'],
                    author=data['author'],
                    date=data['creation_date'])
    obj.FileData_DateCreated = data['creation_date']
    obj.FileData_FileType = 'pdf'
    obj.FileData_FileName = os.path.basename(fn)
    return obj
Exemplo n.º 3
0
def pypdf_object(fn):
    if sys.platform == 'win32':
        newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn))
        if os.path.isfile(newfn):
            newfn = auto_increment_fn(newfn)
        shutil.copyfile(fn, newfn)
        fn = newfn
    data = pypdf_metadata(fn)
    obj = PieObject(
        title=data['title'],
        author=data['author'],
        date=data['creation_date']
        )
    obj.FileData_DateCreated = data['creation_date']
    obj.FileData_FileType = 'pdf'
    obj.FileData_FileName = os.path.basename(fn)
    return obj
Exemplo n.º 4
0
def pybtex_to_pieberry(key, ent):
    '''Transform pybtex Entry class to a PieObject'''
    obj = PieObject()
    modfields = {}
    for bkey, val in ent.fields.items():
        if bibtexmap.has_key(bkey):
            modfields[bibtexmap[bkey]] = ublc(val)
    obj.add_aspect_bibdata(**modfields)
    obj.BibData_Key = key
    if not ent.fields.has_key('title'):
        if ent.fields.has_key('booktitle'):
            obj.title = ublc(ent.items['booktitle'])
        else:
            raise ValueError, 'No title in this entry'
    for persons in ent.persons.itervalues():
        for person in persons:
            person.text = unicode(person)
    formatter = Formatter()
    formatted_names = formatter.format_people(ent)
    # formatted_names = formatter.format_author_or_editor(ent)
    rendered_names = formatted_names.render(plaintext.Backend()).rstrip('.')
    corpnamehere = re_corpname.match(rendered_names)
    if not ent.fields.has_key('year'):
        raise Exception, "No valid date for this item"
    if not ent.fields.has_key('month'):
        ent.fields['month'] = 'January'
    datestr = '01 %s %s' % (ent.fields['month'], ent.fields['year'])
    obj.BibData_DatePublished = datetime.datetime.strptime(datestr, '%d %B %Y')
    if corpnamehere:
        rendered_names = '%s%s' % (corpnamehere.group(1),
                                   corpnamehere.group(2))
        obj.corpauthor = rendered_names
    else:
        obj.author = rendered_names
    obj.BibData_Type = ent.type
    return obj
Exemplo n.º 5
0
def get_fake_metadata_object(fn):
    '''get object with metadata gleaned only from the file system
    takes a full path'''
    d = get_fake_metadata(fn)
    obj = PieObject(title=d['title'], date=d['creation_date'])
    obj.FileData_DateCreated = d['creation_date']
    obj.FileData_DateModified = d['modification_date']
    obj.FileData_FileType = determine_file_type(fn)
    obj.FileData_FileName = os.path.basename(fn)
    return obj
Exemplo n.º 6
0
def pieberry_from_google(gdict, url):
    '''Take a google books dict and produce a PieObject'''
    bd = {
        'title': unicode(gdict['title'], 'utf8'),
    }
    if gdict.has_key('authors') and gdict['authors']:
        bd['author'] = fmt_authors(gdict['authors'])
    try:
        bd['BibData_DatePublished'] = datetime.datetime.strptime(
            gdict['date'], '%Y-%m-%d')
    except:
        try:
            bd['BibData_DatePublished'] = datetime.datetime.strptime(
                gdict['date'], '%Y')
        except:
            bd['BibData_DatePublished'] = datetime.datetime.today()
    if gdict.has_key('description') and gdict['description']:
        bd['BibData_Abstract'] = unicode(gdict['description'], 'utf8')
    if gdict.has_key('publishers') and gdict['publishers']:
        bd['BibData_Publisher'] = u' - '.join(
            [unicode(p, 'utf8') for p in gdict['publishers']])
    googlekey = ''
    for i, k in gdict['identifiers']:
        if i == 'ISBN':
            bd['PhysData_ISBN'] = k
        elif i == 'google_id':
            googlekey = k
    bd['BibData_Type'] = suggest_type(gdict, bd)
    bd['WebData_Url'] = url
    obj = PieObject()
    obj.GoogleData = {'google_id': googlekey}
    if gdict.has_key('subjects'):
        obj.GoogleData['subjects'] = gdict['subjects']
    if gdict.has_key('thumbnail'):
        obj.GoogleData['thumbnail'] = gdict['thumbnail']
    if gdict.has_key('summary'):
        obj.GoogleData['summary'] = gdict['summary']
    obj.add_aspect_bibdata(**bd)
    return obj
Exemplo n.º 7
0
def pieberry_from_google(gdict, url):
    '''Take a google books dict and produce a PieObject'''
    bd = {
        'title': unicode(gdict['title'], 'utf8'),
        }
    if gdict.has_key('authors') and gdict['authors']:
        bd['author'] = fmt_authors(gdict['authors'])
    try:
        bd['BibData_DatePublished'] = datetime.datetime.strptime(
            gdict['date'], '%Y-%m-%d')
    except:
        try:
            bd['BibData_DatePublished'] = datetime.datetime.strptime(
                gdict['date'], '%Y')
        except:
            bd['BibData_DatePublished'] = datetime.datetime.today()
    if gdict.has_key('description') and gdict['description']:
        bd['BibData_Abstract'] = unicode(gdict['description'], 'utf8')
    if gdict.has_key('publishers') and gdict['publishers']:
        bd['BibData_Publisher'] = u' - '.join([unicode(p, 'utf8') for p in gdict['publishers']])
    googlekey = ''
    for i, k in gdict['identifiers']:
        if i == 'ISBN': 
            bd['PhysData_ISBN'] = k
        elif i == 'google_id':
            googlekey = k
    bd['BibData_Type'] = suggest_type(gdict, bd)
    bd['WebData_Url'] = url
    obj = PieObject()
    obj.GoogleData = {'google_id': googlekey}
    if gdict.has_key('subjects'):
        obj.GoogleData['subjects'] = gdict['subjects']
    if gdict.has_key('thumbnail'):
        obj.GoogleData['thumbnail'] = gdict['thumbnail']
    if gdict.has_key('summary'):
        obj.GoogleData['summary'] = gdict['summary']
    obj.add_aspect_bibdata(**bd)
    return obj
Exemplo n.º 8
0
def spoof_pieobject(objtype="normal"):
    '''Generate a spoof PieObject'''
    if objtype == 'normal':
        t = random.choice(ipsum)
        a = random.choice(namelist)
        d = datetime.datetime.today()
        ro = PieObject(t, a, d)
        ro.FileData_FileName = random.choice(filenamelist)
        ro.FileData_Root = random.choice(rootlist)
    elif objtype == 'web':
        ro = PieObject()
        ro.WebData_Url = random.choice(urllist)
        ro.WebData_PageUrl = ro.WebData_Url
        ro.WebData_LinkText = random.choice(ipsum)
        ro.title = ro.WebData_LinkText
        ro.aspects['onweb'] = True
    elif objtype in ('webfull', 'desktop', 'pdffull'):
        t = random.choice(ipsum)
        a = random.choice(namelist)
        d = datetime.datetime.today()
        ro = PieObject(t, a, d)
        ro.WebData_Url = random.choice(urllist)
        ro.WebData_PageUrl = ro.WebData_Url
        ro.WebData_LinkText = t + ' [link]'
        ro.FileData_Root = 'cachedir'
        ro.aspects['onweb'] = True
    ro.MakeBibData()
    ro.add_tag('Test')
    ro.add_tags(('Foo', 'Bar'))
    return ro
Exemplo n.º 9
0
        # md = reader.Info.ModDate.split(':')[1]
        creation_date = datetime.datetime.strptime("%s %s %s %s %s" % (
                cd[0:4], cd[4:6], cd[6:8], cd[8:10], cd[10:12]
                ), "%Y %m %d %H %M")
    else:
        creation_date = fakeobj.FileData_DateCreated
    if reader.Info.Author:
        author = unicode(
            reader.Info.Author, 'utf8', errors=U_ERROR_BEHAV).strip('()')
    else: author = u''
    if reader.Info.Title:
        title = unicode(
            reader.Info.Title, 'utf8', errors=U_ERROR_BEHAV).strip('()')
    else: title = u''
    obj = PieObject(
        title = title,
        author = author,
        date = creation_date)
    obj.FileData_DateCreated = creation_date
    obj.FileData_FileType = 'pdf'
    obj.FileData_FileName = os.path.basename(fn)
    return obj

def pdfrw_metadata(fn):
    reader = PdfReader(fn)
        # assert len(reader.Info.CreationDate) > 0
    cd = reader.Info.CreationDate.split(':')[1] #get the 'good' bit 
        # md = reader.Info.ModDate.split(':')[1]
    creation_date = datetime.datetime.strptime("%s %s %s %s %s" % (
            cd[0:4], cd[4:6], cd[6:8], cd[8:10], cd[10:12]
            ), "%Y %m %d %H %M")
    return {
Exemplo n.º 10
0
def spoof_pieobject(objtype="normal"):
    '''Generate a spoof PieObject'''
    if objtype == 'normal':
        t = random.choice(ipsum)
        a = random.choice(namelist)
        d = datetime.datetime.today()
        ro = PieObject(t, a, d)
        ro.FileData_FileName = random.choice(filenamelist)
        ro.FileData_Root = random.choice(rootlist)
    elif objtype == 'web':
        ro = PieObject()
        ro.WebData_Url = random.choice(urllist)
        ro.WebData_PageUrl = ro.WebData_Url
        ro.WebData_LinkText = random.choice(ipsum)
        ro.title = ro.WebData_LinkText
        ro.aspects['onweb'] = True
    elif objtype in ('webfull', 'desktop', 'pdffull'):
        t = random.choice(ipsum)
        a = random.choice(namelist)
        d = datetime.datetime.today()
        ro = PieObject(t, a, d)
        ro.WebData_Url = random.choice(urllist)
        ro.WebData_PageUrl = ro.WebData_Url
        ro.WebData_LinkText = t + ' [link]'
        ro.FileData_Root = 'cachedir'
        ro.aspects['onweb'] = True
    ro.MakeBibData()
    ro.add_tag('Test')
    ro.add_tags(('Foo', 'Bar'))
    return ro