Пример #1
0
def get_fake_metadata_object(fn):
    '''get object with metadata gleaned only from the file system
    takes a full path'''
    d = get_fake_metadata(fn)
    obj = PieObject(title=d['title'], date=d['creation_date'])
    obj.FileData_DateCreated = d['creation_date']
    obj.FileData_DateModified = d['modification_date']
    obj.FileData_FileType = determine_file_type(fn)
    obj.FileData_FileName = os.path.basename(fn)
    return obj
Пример #2
0
def pypdf_object(fn):
    if sys.platform == 'win32':
        newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn))
        if os.path.isfile(newfn):
            newfn = auto_increment_fn(newfn)
        shutil.copyfile(fn, newfn)
        fn = newfn
    data = pypdf_metadata(fn)
    obj = PieObject(title=data['title'],
                    author=data['author'],
                    date=data['creation_date'])
    obj.FileData_DateCreated = data['creation_date']
    obj.FileData_FileType = 'pdf'
    obj.FileData_FileName = os.path.basename(fn)
    return obj
Пример #3
0
def pypdf_object(fn):
    if sys.platform == 'win32':
        newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn))
        if os.path.isfile(newfn):
            newfn = auto_increment_fn(newfn)
        shutil.copyfile(fn, newfn)
        fn = newfn
    data = pypdf_metadata(fn)
    obj = PieObject(
        title=data['title'],
        author=data['author'],
        date=data['creation_date']
        )
    obj.FileData_DateCreated = data['creation_date']
    obj.FileData_FileType = 'pdf'
    obj.FileData_FileName = os.path.basename(fn)
    return obj
Пример #4
0
                ), "%Y %m %d %H %M")
    else:
        creation_date = fakeobj.FileData_DateCreated
    if reader.Info.Author:
        author = unicode(
            reader.Info.Author, 'utf8', errors=U_ERROR_BEHAV).strip('()')
    else: author = u''
    if reader.Info.Title:
        title = unicode(
            reader.Info.Title, 'utf8', errors=U_ERROR_BEHAV).strip('()')
    else: title = u''
    obj = PieObject(
        title = title,
        author = author,
        date = creation_date)
    obj.FileData_DateCreated = creation_date
    obj.FileData_FileType = 'pdf'
    obj.FileData_FileName = os.path.basename(fn)
    return obj

def pdfrw_metadata(fn):
    reader = PdfReader(fn)
        # assert len(reader.Info.CreationDate) > 0
    cd = reader.Info.CreationDate.split(':')[1] #get the 'good' bit 
        # md = reader.Info.ModDate.split(':')[1]
    creation_date = datetime.datetime.strptime("%s %s %s %s %s" % (
            cd[0:4], cd[4:6], cd[6:8], cd[8:10], cd[10:12]
            ), "%Y %m %d %H %M")
    return {
        'author': unicode(reader.Info.Author, 'utf8', errors=U_ERROR_BEHAV),
        'title': unicode(reader.Info.Title, 'utf8', errors=U_ERROR_BEHAV),