def get_fake_metadata_object(fn): '''get object with metadata gleaned only from the file system takes a full path''' d = get_fake_metadata(fn) obj = PieObject(title=d['title'], date=d['creation_date']) obj.FileData_DateCreated = d['creation_date'] obj.FileData_DateModified = d['modification_date'] obj.FileData_FileType = determine_file_type(fn) obj.FileData_FileName = os.path.basename(fn) return obj
def pypdf_object(fn): if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn data = pypdf_metadata(fn) obj = PieObject(title=data['title'], author=data['author'], date=data['creation_date']) obj.FileData_DateCreated = data['creation_date'] obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj
def pypdf_object(fn): if sys.platform == 'win32': newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn)) if os.path.isfile(newfn): newfn = auto_increment_fn(newfn) shutil.copyfile(fn, newfn) fn = newfn data = pypdf_metadata(fn) obj = PieObject( title=data['title'], author=data['author'], date=data['creation_date'] ) obj.FileData_DateCreated = data['creation_date'] obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj
), "%Y %m %d %H %M") else: creation_date = fakeobj.FileData_DateCreated if reader.Info.Author: author = unicode( reader.Info.Author, 'utf8', errors=U_ERROR_BEHAV).strip('()') else: author = u'' if reader.Info.Title: title = unicode( reader.Info.Title, 'utf8', errors=U_ERROR_BEHAV).strip('()') else: title = u'' obj = PieObject( title = title, author = author, date = creation_date) obj.FileData_DateCreated = creation_date obj.FileData_FileType = 'pdf' obj.FileData_FileName = os.path.basename(fn) return obj def pdfrw_metadata(fn): reader = PdfReader(fn) # assert len(reader.Info.CreationDate) > 0 cd = reader.Info.CreationDate.split(':')[1] #get the 'good' bit # md = reader.Info.ModDate.split(':')[1] creation_date = datetime.datetime.strptime("%s %s %s %s %s" % ( cd[0:4], cd[4:6], cd[6:8], cd[8:10], cd[10:12] ), "%Y %m %d %H %M") return { 'author': unicode(reader.Info.Author, 'utf8', errors=U_ERROR_BEHAV), 'title': unicode(reader.Info.Title, 'utf8', errors=U_ERROR_BEHAV),