def pybtex_to_pieberry(key, ent): '''Transform pybtex Entry class to a PieObject''' obj = PieObject() modfields = {} for bkey, val in ent.fields.items(): if bibtexmap.has_key(bkey): modfields[bibtexmap[bkey]] = ublc(val) obj.add_aspect_bibdata(**modfields) obj.BibData_Key = key if not ent.fields.has_key('title'): if ent.fields.has_key('booktitle'): obj.title = ublc(ent.items['booktitle']) else: raise ValueError, 'No title in this entry' for persons in ent.persons.itervalues(): for person in persons: person.text = unicode(person) formatter = Formatter() formatted_names = formatter.format_people(ent) # formatted_names = formatter.format_author_or_editor(ent) rendered_names = formatted_names.render(plaintext.Backend()).rstrip('.') corpnamehere = re_corpname.match(rendered_names) if not ent.fields.has_key('year'): raise Exception, "No valid date for this item" if not ent.fields.has_key('month'): ent.fields['month'] = 'January' datestr = '01 %s %s' % (ent.fields['month'], ent.fields['year']) obj.BibData_DatePublished = datetime.datetime.strptime(datestr, '%d %B %Y') if corpnamehere: rendered_names = '%s%s' % (corpnamehere.group(1), corpnamehere.group(2)) obj.corpauthor = rendered_names else: obj.author = rendered_names obj.BibData_Type = ent.type return obj
def pypdf_object(fn): if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn data = pypdf_metadata(fn) obj = PieObject(title=data['title'], author=data['author'], date=data['creation_date']) obj.FileData_DateCreated = data['creation_date'] obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj
def pypdf_object(fn): if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn data = pypdf_metadata(fn) obj = PieObject( title=data['title'], author=data['author'], date=data['creation_date'] ) obj.FileData_DateCreated = data['creation_date'] obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj
def get_fake_metadata_object(fn): '''get object with metadata gleaned only from the file system takes a full path''' d = get_fake_metadata(fn) obj = PieObject(title=d['title'], date=d['creation_date']) obj.FileData_DateCreated = d['creation_date'] obj.FileData_DateModified = d['modification_date'] obj.FileData_FileType = determine_file_type(fn) obj.FileData_FileName = os.path.basename(fn) return obj
def pieberry_from_google(gdict, url): '''Take a google books dict and produce a PieObject''' bd = { 'title': unicode(gdict['title'], 'utf8'), } if gdict.has_key('authors') and gdict['authors']: bd['author'] = fmt_authors(gdict['authors']) try: bd['BibData_DatePublished'] = datetime.datetime.strptime( gdict['date'], '%Y-%m-%d') except: try: bd['BibData_DatePublished'] = datetime.datetime.strptime( gdict['date'], '%Y') except: bd['BibData_DatePublished'] = datetime.datetime.today() if gdict.has_key('description') and gdict['description']: bd['BibData_Abstract'] = unicode(gdict['description'], 'utf8') if gdict.has_key('publishers') and gdict['publishers']: bd['BibData_Publisher'] = u' - '.join( [unicode(p, 'utf8') for p in gdict['publishers']]) googlekey = '' for i, k in gdict['identifiers']: if i == 'ISBN': bd['PhysData_ISBN'] = k elif i == 'google_id': googlekey = k bd['BibData_Type'] = suggest_type(gdict, bd) bd['WebData_Url'] = url obj = PieObject() obj.GoogleData = {'google_id': googlekey} if gdict.has_key('subjects'): obj.GoogleData['subjects'] = gdict['subjects'] if gdict.has_key('thumbnail'): obj.GoogleData['thumbnail'] = gdict['thumbnail'] if gdict.has_key('summary'): obj.GoogleData['summary'] = gdict['summary'] obj.add_aspect_bibdata(**bd) return obj
def pieberry_from_google(gdict, url): '''Take a google books dict and produce a PieObject''' bd = { 'title': unicode(gdict['title'], 'utf8'), } if gdict.has_key('authors') and gdict['authors']: bd['author'] = fmt_authors(gdict['authors']) try: bd['BibData_DatePublished'] = datetime.datetime.strptime( gdict['date'], '%Y-%m-%d') except: try: bd['BibData_DatePublished'] = datetime.datetime.strptime( gdict['date'], '%Y') except: bd['BibData_DatePublished'] = datetime.datetime.today() if gdict.has_key('description') and gdict['description']: bd['BibData_Abstract'] = unicode(gdict['description'], 'utf8') if gdict.has_key('publishers') and gdict['publishers']: bd['BibData_Publisher'] = u' - '.join([unicode(p, 'utf8') for p in gdict['publishers']]) googlekey = '' for i, k in gdict['identifiers']: if i == 'ISBN': bd['PhysData_ISBN'] = k elif i == 'google_id': googlekey = k bd['BibData_Type'] = suggest_type(gdict, bd) bd['WebData_Url'] = url obj = PieObject() obj.GoogleData = {'google_id': googlekey} if gdict.has_key('subjects'): obj.GoogleData['subjects'] = gdict['subjects'] if gdict.has_key('thumbnail'): obj.GoogleData['thumbnail'] = gdict['thumbnail'] if gdict.has_key('summary'): obj.GoogleData['summary'] = gdict['summary'] obj.add_aspect_bibdata(**bd) return obj
def spoof_pieobject(objtype="normal"): '''Generate a spoof PieObject''' if objtype == 'normal': t = random.choice(ipsum) a = random.choice(namelist) d = datetime.datetime.today() ro = PieObject(t, a, d) ro.FileData_FileName = random.choice(filenamelist) ro.FileData_Root = random.choice(rootlist) elif objtype == 'web': ro = PieObject() ro.WebData_Url = random.choice(urllist) ro.WebData_PageUrl = ro.WebData_Url ro.WebData_LinkText = random.choice(ipsum) ro.title = ro.WebData_LinkText ro.aspects['onweb'] = True elif objtype in ('webfull', 'desktop', 'pdffull'): t = random.choice(ipsum) a = random.choice(namelist) d = datetime.datetime.today() ro = PieObject(t, a, d) ro.WebData_Url = random.choice(urllist) ro.WebData_PageUrl = ro.WebData_Url ro.WebData_LinkText = t + ' [link]' ro.FileData_Root = 'cachedir' ro.aspects['onweb'] = True ro.MakeBibData() ro.add_tag('Test') ro.add_tags(('Foo', 'Bar')) return ro
# md = reader.Info.ModDate.split(':')[1] creation_date = datetime.datetime.strptime("%s %s %s %s %s" % ( cd[0:4], cd[4:6], cd[6:8], cd[8:10], cd[10:12] ), "%Y %m %d %H %M") else: creation_date = fakeobj.FileData_DateCreated if reader.Info.Author: author = unicode( reader.Info.Author, 'utf8', errors=U_ERROR_BEHAV).strip('()') else: author = u'' if reader.Info.Title: title = unicode( reader.Info.Title, 'utf8', errors=U_ERROR_BEHAV).strip('()') else: title = u'' obj = PieObject( title = title, author = author, date = creation_date) obj.FileData_DateCreated = creation_date obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj def pdfrw_metadata(fn): reader = PdfReader(fn) # assert len(reader.Info.CreationDate) > 0 cd = reader.Info.CreationDate.split(':')[1] #get the 'good' bit # md = reader.Info.ModDate.split(':')[1] creation_date = datetime.datetime.strptime("%s %s %s %s %s" % ( cd[0:4], cd[4:6], cd[6:8], cd[8:10], cd[10:12] ), "%Y %m %d %H %M") return {