def imgpath2gps(imgpath): from libxmp.utils import file_to_dict def lat_lon_str_to_deg(str): parts=str.split(',') deg=float(parts[0]) min=float(parts[1][:-1]) dir=parts[1][-1] if dir=='N' or dir=='E': sign=1 else: sign=-1 return sign*(deg+min/60.0) try: xmp=file_to_dict(imgpath) exif=xmp[u'http://ns.adobe.com/exif/1.0/'] lat_str=[x[1] for x in exif if x[0]==u'exif:GPSLatitude'][0] lon_str=[x[1] for x in exif if x[0]==u'exif:GPSLongitude'][0] lat=lat_lon_str_to_deg(lat_str) lon=lat_lon_str_to_deg(lon_str) except KeyError as e: lat=float('NaN') lon=float('NaN') #import numpy as np #return np.array([lat,lon],np.float32) return [lat,lon]
def print_png_gif_bmp(file_full_path, color_mode): """ Analyzes the metadate of a PNG file """ # Header with file path if color_mode: cprint("\n[+] Metadata for file: %s" % (file_full_path), "green", attrs=["bold"]) else: print "\n[+] Metadata for file: %s" % (file_full_path) # Open the file image = Image.open(file_full_path) # Print XMP metadata if color_mode: cprint("\t-----XMP METADATA-----", "cyan") else: print "\t-----XMP METADATA-----" xmp = file_to_dict(file_full_path) if not xmp: if color_mode: cprint("\tNo XMP metadata found", "red") else: print "\tNo XMP metadata found" else: dc = xmp[consts.XMP_NS_DC] if color_mode: cprint("\t-" + dc[0][0], "cyan") cprint("\t-" + dc[0][1], "cyan") else: print "\t-" + dc[0][0] print "\t-" + dc[0][1] for key, value in dc[0][2].items(): if color_mode: cprint("\t-" + key + ": ", "cyan", end="") cprint(str(value)) else: print "\t-" + key + ": " + str(value)
def xmp(file_path: str): from libxmp.utils import file_to_dict from libxmp.consts import XMP_NS_XMP xmp = file_to_dict(file_path) for key, value, options in xmp[XMP_NS_XMP]: print(key, value)
def save_metadata(img_path: str, file): file.write(f'\nImage: {img_path.split("/")[-1]}\n\n') # Exif tags exif_data = Image(img_path) # XMP tags xmpdata = file_to_dict(img_path) # XMP attributes attrlist = xmpdata["http://www.dji.com/drone-dji/1.0/"] file.write("EXIF properties\n\n") # Print exif data for field in dir(exif_data): try: file.write(f'{field}: {exif_data[field]}\n') except: print(f"No se pudo leer el campo {field}") file.write("\nXMP keys and properties\n") for key in xmpdata.keys(): file.write(f"\n{key}\n\n") for prop in xmpdata[key]: file.write(f"{prop[0]}: {prop[1]}\n")
def get_metadata(self): path = self.kwargs.get("path") from libxmp.utils import file_to_dict from libxmp import consts try: metadata = {} xmp = file_to_dict(path) for space in xmp.keys(): #dc = xmp[consts.XMP_NS_DC] dc = xmp[space] for key, value, options in dc: metadata[key] = value return metadata except Exception, e: info = { "Message": str(e) } return info
def get_rating_embed(file): xmp = file_to_dict(file) if xmp: for tpl in xmp["http://ns.adobe.com/xap/1.0/"]: if tpl[0] == "xmp:Rating": return tpl[1] return 0
def get_xmp(path): try: data = {} xmp = file_to_dict(file_path=path) if consts.XMP_NS_DC not in xmp: return False dc = xmp[consts.XMP_NS_DC] for dc_item in dc: if dc_item[0][-1] == ']': key = dc_item[0].split('[')[0] key = key.split(':')[1] value = dc_item[1] # print('key: ' + key) # print('value: ' + value) if key in data: if isinstance(data[key], list) == False: previous_value = data[key] data[key] = [] data[key].append(previous_value) data[key].append(value) else: data[key] = dc_item[1] return data except Exception as e: print('exeption: ', e) return False
def getMovieDate(f): xmp_info_list = file_to_dict(f) for xmp_info in xmp_info_list: for element in xmp_info_list[xmp_info]: for element_data in element: if "xmp:CreateDate" in element_data: source_file_created_date = element[1][:] return source_file_created_date.replace("-", "").replace("T", "_").replace(":", "").replace("Z", "")
def get_metadata(self, dataset, content): if not content: return {}, [] context = {} contentmeta = [] for f in self.paths: info = file_to_dict(opj(self.ds.path, f)) if not info: # got nothing, likely nothing there # TODO check if this is an XMP sidecar file, parse that, and assign metadata # to the base file continue # update vocabulary vocab = {info[ns][0][0].split(':')[0]: {'@id': ns, 'type': vocabulary_id} for ns in info} # TODO this is dirty and assumed that XMP is internally consistent with the # definitions across all files -- which it likely isn't context.update(vocab) # now pull out actual metadata # cannot do simple dict comprehension, because we need to beautify things a little meta = {} for ns in info: for key, val, props in info[ns]: if not val: # skip everything empty continue if key.count('[') > 1: # this is a nested array # MIH: I do not think it is worth going here continue if props['VALUE_IS_ARRAY']: # we'll catch the actuall array values later continue # normalize value val = assure_unicode(val) # non-breaking space val = val.replace(u"\xa0", ' ') field, idx, qual = xmp_field_re.match(key).groups() normkey = u'{}{}'.format(field, qual) if '/' in key: normkey = u'{0}<{1}>'.format(*normkey.split('/')) if idx: # array arr = meta.get(normkey, []) arr.append(val) meta[normkey] = arr else: meta[normkey] = val # compact meta = {k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in meta.items()} contentmeta.append((f, meta)) return { '@context': context, }, \ contentmeta
def read_metadata(filepath): xmp = file_to_dict(filepath) record = {'filepath': filepath} for ns,v_list in xmp.items(): for a,b,c in v_list: for field_name in MAPPING.get(ns, {}): if a.startswith(field_name) and b: record.setdefault(MAPPING[ns][field_name], []).append(b) return record
def read_metadata(filepath): xmp = file_to_dict(filepath) record = {'filepath': filepath} for ns, v_list in xmp.items(): for a, b, c in v_list: for field_name in MAPPING.get(ns, {}): if a.startswith(field_name) and b: record.setdefault(MAPPING[ns][field_name], []).append(b) return record
def get_pose(image_file): exif_dict = piexif.load(image_file) # for ifd in exif_dict: # if ifd == str("thumbnail"): # print("thumb thumb thumbnail") # continue # print(ifd, ":") # for tag in exif_dict[ifd]: # print(ifd, tag, piexif.TAGS[ifd][tag]["name"], exif_dict[ifd][tag]) elat = exif_dict['GPS'][piexif.GPSIFD.GPSLatitude] lat = dms_to_decimal( elat[0], elat[1], elat[2], exif_dict['GPS'][piexif.GPSIFD.GPSLatitudeRef].decode('utf-8')) elon = exif_dict['GPS'][piexif.GPSIFD.GPSLongitude] lon = dms_to_decimal( elon[0], elon[1], elon[2], exif_dict['GPS'][piexif.GPSIFD.GPSLongitudeRef].decode('utf-8')) #print(lon) ealt = exif_dict['GPS'][piexif.GPSIFD.GPSAltitude] alt = ealt[0] / ealt[1] #exif_dict[GPS + 'MapDatum']) #print('lon ref', exif_dict['GPS'][piexif.GPSIFD.GPSLongitudeRef]) # print exif.exif_keys if piexif.ImageIFD.DateTime in exif_dict['0th']: strdate, strtime = exif_dict['0th'][piexif.ImageIFD.DateTime].decode( 'utf-8').split() year, month, day = strdate.split(':') hour, minute, second = strtime.split(':') d = datetime.date(int(year), int(month), int(day)) t = datetime.time(int(hour), int(minute), int(second)) dt = datetime.datetime.combine(d, t) unixtime = float(dt.strftime('%s')) else: unixtime = None #print('pos:', lat, lon, alt, heading) # check for dji image heading tag xmp_top = file_to_dict(image_file) xmp = {} for key in xmp_top: for v in xmp_top[key]: xmp[v[0]] = v[1] #for key in xmp: # print(key, xmp[key]) if 'drone-dji:GimbalYawDegree' in xmp: yaw_deg = float(xmp['drone-dji:GimbalYawDegree']) while yaw_deg < 0: yaw_deg += 360 else: yaw_deg = None return lon, lat, alt, unixtime, yaw_deg
def scan_pictures(path, min_rating): for root, subdirs, files in os.walk(path): for file in files: xmp = file_to_dict(os.path.join(root, file)) if consts.XMP_NS_XMP in xmp: props = {x[0]: x[1] for x in xmp[consts.XMP_NS_XMP]} rating = int(props.get('xmp:Rating', 0)) if rating >= min_rating: relpath = os.path.relpath(root, path) yield relpath, file, rating
def xmp_to_vec(fn): # read in the core data of interest from the XMP file. xmp_data = file_to_dict(fn) df = pd.DataFrame( [tup[:2] for _, data in xmp_data.items() for tup in data], columns=["field", "value"]) # filter down to the desired properties only. df = df.merge(DESIRED_FIELDS, how="inner", on="field") return df["field"].values, df["value"].values
def parse_xmp_data(fn): with open(fn, 'r') as fp: header = fp.readline() if "xpacket" in header: # the file is already in a compatible format for the XMP parser. return file_to_dict(fn) # need to wrap the file with a header and footer that allows # the XMP parser to parse the file into a dict. # we will only transform the data in a temporary file, leaving # the original file untouched. with NamedTemporaryFile(mode='w', delete=False) as fp,\ open(fn, 'r') as raw_fp: temp_fn = fp.name fp.write(XMP_XPACKET_HEADER + "\n") for line in raw_fp: fp.write("{line}\n".format(line=line)) fp.write(XMP_XPACKET_FOOTER + "\n") xmp_data = file_to_dict(temp_fn) os.remove(temp_fn) return xmp_data
def print_jpg(file_full_path, color_mode): """ Analyzes the metadate of a JPG/JPEG file """ # Header with file path if color_mode: cprint("\n[+] Metadata for file: %s" % (file_full_path), "green", attrs=["bold"]) else: print "\n[+] Metadata for file: %s" % (file_full_path) # Open the file image = Image.open(file_full_path) # Print XMP metadata if color_mode: cprint("\t-----XMP METADATA-----", "cyan") else: print "\t-----XMP METADATA-----" xmp = file_to_dict(file_full_path) if not xmp: if color_mode: cprint("\tNo XMP metadata found", "red") else: print "\tNo XMP metadata found" else: dc = xmp[consts.XMP_NS_DC] if color_mode: cprint("\t-" + dc[0][0], "cyan") cprint("\t-" + dc[0][1], "cyan") else: print "\t-" + dc[0][0] print "\t-" + dc[0][1] for key, value in dc[0][2].items(): if color_mode: cprint("\t-" + key + ": ", "cyan", end="") cprint(str(value)) else: print "\t-" + key + ": " print str(value) # Print EXIF metadata if color_mode: cprint("\n\t-----EXIF METADATA-----", "cyan") else: print "\n\t-----EXIF METADATA-----" info = image._getexif() if not info: if color_mode: cprint("\tNo EXIF metadata found", "red") else: print "\tNo EXIF metadata found" else: for tag, value in info.items(): key = TAGS.get(tag, tag) if color_mode: cprint("\t-" + key + ": ", "cyan", end="") cprint(str(value)) else: print "\t-" + key + ": " + str(value)
def __init__(self, pathname): # get a xmp data dictionnary from file xmp = file_to_dict(pathname) ## uncomment for debug #self.xmp = xmp # initialise dictionnarys: self.iptc = {} self.exif = {} self.aux = {} self.lightroom = {} self. photoshop = {} self.dublinCore = {} self.tiff = {} self.xap = {} self.rights = {} self.usplus = {} self.iptcExt = {} # IPTC if self.XMP_NS_IPTC in xmp: self.iptc = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_IPTC]} # EXIFs if self.XMP_NS_EXIF in xmp: self.exif = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_EXIF]} # Exif aux if self.XMP_NS_AUX in xmp: self.aux = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_AUX]} # Lightroom if self.XMP_NS_LR in xmp: self.lightroom = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_LR]} # Photoshop if self.XMP_NS_PS in xmp: self.photoshop = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_PS]} # Dublin Core if self.XMP_NS_DC in xmp: self.dublinCore = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_DC]} # Tiff if self.XMP_NS_TIFF in xmp: self.tiff = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_TIFF]} # Xap if self.XMP_NS_XAP in xmp: self.xap = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_XAP]} # Xap Rights if self.XMP_NS_RIGHTS in xmp: self.rights = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_RIGHTS]} # Iptc Extension if self.XMP_NS_IPTC_EXT in xmp: self.iptcExt = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_IPTC_EXT]} # Usplus if self.XMP_NS_USPLUS in xmp: self.usplus = {elem[0]:elem[1] for elem in xmp[self.XMP_NS_USPLUS]}
def update_xmp(imgpath, keywords): """ updates the xmp data in the image, or creates a sidecar xmp """ # Check if a sidecar file already exists if os.path.isfile(imgpath + '.xmp'): imgpath = imgpath + '.xmp' # NEF requires sidecar embeddedXmpFormats = ['jpg', 'png', 'tif', 'dng'] if not imgpath.lower().endswith(tuple(embeddedXmpFormats)): # create and use sidecar file imgpath = imgpath + '.xmp' with open(imgpath, 'w+') as f: f.write(blank_xmp()) print('wrote in' + imgpath) xmpfiledict = file_to_dict(imgpath) existing_keywords = [] try: dc = [] dc.append(xmpfiledict[consts.XMP_NS_DC]) existing_keywords = [x[1] for x in dc] except: print('nothing') print('existing_keywords') print(existing_keywords) xmpfile = XMPFiles(file_path=imgpath, open_forupdate=True) xmp = xmpfile.get_xmp() print(xmp) keywords_to_add = [x for x in keywords if x not in existing_keywords] print('keywords to add') print(keywords_to_add) def add_keyword(k): """ helper func """ xmp.append_array_item(consts.XMP_NS_DC, u'subject', k) _ = [add_keyword(x) for x in keywords_to_add] if xmpfile.can_put_xmp(xmp): xmpfile.put_xmp(xmp) else: xmpfile.close_file() raise Exception('Cannot write xmp to ' + imgpath) xmpfile.close_file() return 0
def __init__(self, file_path): try: # Don't use temporary files if we're using FileSystemStorage with open(default_storage.path(file_path), mode='rb') as f: self.file_path = f.name except: self.tmp_file = tempfile.NamedTemporaryFile() with default_storage.open(file_path) as f: self.tmp_file.write(f.read()) self.tmp_file.flush() self.tmp_file.seek(0) self.file_path = self.tmp_file.name ns_dict = file_to_dict(self.file_path) self.clean(ns_dict)
def print_tiff(file_full_path, color_mode): """ Analyzes the metadate of a JPG/JPEG file """ # Header with file path if color_mode: cprint("\n[+] Metadata for file: %s" % (file_full_path), "green", attrs=["bold"]) else: print "\n[+] Metadata for file: %s" % (file_full_path) # Open the file image = open(file_full_path, 'rb') # Print XMP metadata if color_mode: cprint("\t-----XMP METADATA-----", "cyan") else: "\t-----XMP METADATA-----" xmp = file_to_dict(file_full_path) if not xmp: if color_mode: cprint("\tNo XMP metadata found", "red") else: "\tNo XMP metadata found" else: dc = xmp[consts.XMP_NS_DC] if color_mode: cprint("\t-" + dc[0][0], "cyan") cprint("\t-" + dc[0][1], "cyan") else: print "\t-" + dc[0][0] print "\t-" + dc[0][1] for key, value in dc[0][2].items(): if color_mode: cprint("\t-" + key + ": ", "cyan", end="") cprint(str(value)) else: print "\t-" + key + ": " + str(value) # Print EXIF metadata if color_mode: print("\n\t-----EXIF METADATA-----", "cyan") else: "\n\t-----EXIF METADATA-----" tags = exifread.process_file(image) for tag in tags.keys(): if tag not in ('JPEGThumbnail', 'TIFFThumbnail', 'Filename', 'EXIF MakerNote'): if color_mode: cprint("\t-" + str(tag) + ": ", "cyan", end="") cprint(str(tags[tag])) else: print "\t-" + str(tag) + ": " + str(tags[tag])
def get_image(filename): xmp = file_to_dict(filename) dc = xmp[libxmp.consts.XMP_NS_DC] docker_image = None for name, value, d in dc: if name.find('creator') >= 0: if value.find('im2sim') >= 0: d = ast.literal_eval(value) docker_image = d['im2sim'] print('Pulling docker image {}'.format(docker_image)) print("""To inspect the code, try docker run -ti {} /bin/bash""".format(docker_image)) FNULL = open(os.devnull, 'w') return subprocess.call('docker pull {}'.format(docker_image), stdout=FNULL, shell=True)
def file_claims_pdfa(filename): """Determines if the file claims to be PDF/A compliant Checking if a file is a truly compliant PDF/A is a massive undertaking that no open source tool does properly. Some commercial tools are generally reliable (Acrobat). This checks if the XMP metadata contains a PDF/A marker. """ xmp = file_to_dict(filename) if not xmp: return { 'pass': False, 'output': 'pdf', 'conformance': 'No XMP metadata' } if not consts.XMP_NS_PDFA_ID in xmp: return { 'pass': False, 'output': 'pdf', 'conformance': 'No PDF/A metadata in XMP' } pdfa_node = xmp[consts.XMP_NS_PDFA_ID] def read_node(node, key): return next((v for k, v, meta in node if k == key), '') part = read_node(pdfa_node, 'pdfaid:part') conformance = read_node(pdfa_node, 'pdfaid:conformance') part_conformance = part + conformance valid_part_conforms = {'1A', '1B', '2A', '2B', '2U', '3A', '3B', '3U'} conformance = 'PDF/A-{}'.format(part_conformance) pdfa_dict = {} if part_conformance in valid_part_conforms: pdfa_dict['pass'] = True pdfa_dict['output'] = 'pdfa' pdfa_dict['conformance'] = conformance return pdfa_dict
def _ReadKeywordsFromFile(abspath: str) -> typing.Set[str]: """ Read the lightroom keywords for a file. Args: abspath: Path to the file. Returns: A set of lightroom keywords. An empty set is returned on failure. """ try: xmp = xmputils.file_to_dict(abspath) lrtags = xmp['http://ns.adobe.com/lightroom/1.0/'] keywords = set([e[1] for e in lrtags if e[1]]) return keywords except KeyError: logging.error(abspath) return set()
def get_pose(img_path: str) -> dict: # Exif tags exif_data = Image(img_path) # XMP tags xmpdata = file_to_dict(img_path) # XMP attributes attrlist = xmpdata["http://www.dji.com/drone-dji/1.0/"] # Available attributes dictionary with corresponding index attrs_index = {} for idx, attr in enumerate(attrlist): new_key = attr[0].split(':')[-1] attrs_index[new_key] = idx # Exif longitud and latitude to float long, lat = Photo.coordinate_to_float(exif_data["gps_longitude"], exif_data["gps_latitude"], exif_data["gps_longitude_ref"], exif_data["gps_latitude_ref"]) # Extract pose pose = { "focal_length": exif_data["focal_length"], "latitude": lat, "longitude": long, "altitude": np.float(attrlist[attrs_index["AbsoluteAltitude"]][1]), "gimbal_yaw": np.float(attrlist[attrs_index["GimbalYawDegree"]][1]), "gimbal_pitch": np.float(attrlist[attrs_index["GimbalPitchDegree"]][1]), "gimbal_roll": np.float(attrlist[attrs_index["GimbalRollDegree"]][1]), "flight_yaw": np.float(attrlist[attrs_index["FlightYawDegree"]][1]), "flight_pitch": np.float(attrlist[attrs_index["FlightPitchDegree"]][1]), "flight_roll": np.float(attrlist[attrs_index["FlightRollDegree"]][1]), } return pose
def copyXMP(path): with open(path+'/meta.txt', 'w') as txt_file: for root, dirs, files in os.walk(path): for file in files: if not file.endswith('jpg'): continue print(file) file = os.path.join(root, file) txt_file.write(file) txt_file.write('\n') xmp = file_to_dict(file) for x, y in xmp.items(): for z in y: txt_file.write(z[0]) txt_file.write(' : ') txt_file.write(z[1]) txt_file.write('\n') txt_file.write("-"*30) txt_file.write('\n')
def read_tags(file_path: str) -> Set[str]: """Reads tags from an XMP file (only Digikam tags) :param file_path: File where to read the tags from :return: Set of tags """ tags = set() xmp = file_to_dict(file_path) if 'http://www.digikam.org/ns/1.0/' in xmp: for tag_section in xmp['http://www.digikam.org/ns/1.0/']: if len(tag_section) == 0: continue tag = tag_section[1] if tag != '': tags.add(tag) return tags
def get_lightroom_keywords(full_path): # FYI: This was annoying to figure out. xmp = file_to_dict(full_path) # The metadata we want is from http://ns.adobe.com/lightroom/1.0/' # There are a bunch of other keys in xmp.keys() raw_xmp_data = xmp["http://ns.adobe.com/lightroom/1.0/"] metadata = { "Category": [], "Location": "", "Gallery": "", "ContentType": "", "CameraType": "", "IsBackgroundPhoto": False, } # The first entry is always noise so skip it. for entry in raw_xmp_data[1:]: # Each keyword gets it's own entry in the list along with a bunch of noise, it is the 2nd element keyword = entry[1] try: if keyword == "IsBackgroundPhoto": metadata["IsBackgroundPhoto"] = True else: key, value = keyword.split("|") if key == "Category": metadata["Category"].append(value) else: metadata[key] = value except Exception as e: print("Metadata Issue: {}".format(full_path)) print(e) continue return metadata
def get_pose(image_file): # exif data exif_dict = piexif.load(image_file) # extend xmp tags xmp_top = file_to_dict(image_file) xmp = {} for key in xmp_top: for v in xmp_top[key]: xmp[v[0]] = v[1] #for key in xmp: # print(key, xmp[key]) # for ifd in exif_dict: # if ifd == str("thumbnail"): # print("thumb thumb thumbnail") # continue # print(ifd, ":") # for tag in exif_dict[ifd]: # print(ifd, tag, piexif.TAGS[ifd][tag]["name"], exif_dict[ifd][tag]) if 'drone-dji:GpsLatitude' in xmp: lat_deg = float(xmp['drone-dji:GpsLatitude']) else: elat = exif_dict['GPS'][piexif.GPSIFD.GPSLatitude] lat_deg = dms_to_decimal(elat[0], elat[1], elat[2], exif_dict['GPS'][piexif.GPSIFD.GPSLatitudeRef].decode('utf-8')) if 'drone-dji:GpsLongitude' in xmp: lon_deg = float(xmp['drone-dji:GpsLongitude']) else: elon = exif_dict['GPS'][piexif.GPSIFD.GPSLongitude] lon_deg = dms_to_decimal(elon[0], elon[1], elon[2], exif_dict['GPS'][piexif.GPSIFD.GPSLongitudeRef].decode('utf-8')) if 'drone-dji:AbsoluteAltitude' in xmp: alt_m = float(xmp['drone-dji:AbsoluteAltitude']) if alt_m < 0: log("image meta data is reporting negative absolute alitude!") else: ealt = exif_dict['GPS'][piexif.GPSIFD.GPSAltitude] alt_m = ealt[0] / ealt[1] #exif_dict[GPS + 'MapDatum']) #print('lon ref', exif_dict['GPS'][piexif.GPSIFD.GPSLongitudeRef]) # print exif.exif_keys if piexif.ImageIFD.DateTime in exif_dict['0th']: strdate, strtime = exif_dict['0th'][piexif.ImageIFD.DateTime].decode('utf-8').split() year, month, day = strdate.split(':') hour, minute, second = strtime.split(':') d = datetime.date(int(year), int(month), int(day)) t = datetime.time(int(hour), int(minute), int(second)) dt = datetime.datetime.combine(d, t) unixtime = float(dt.strftime('%s')) else: unixtime = None #print('pos:', lat, lon, alt, heading) if 'drone-dji:GimbalYawDegree' in xmp: yaw_deg = float(xmp['drone-dji:GimbalYawDegree']) while yaw_deg < 0: yaw_deg += 360 else: yaw_deg = None if 'drone-dji:GimbalPitchDegree' in xmp: pitch_deg = float(xmp['drone-dji:GimbalPitchDegree']) else: pitch_deg = None if 'drone-dji:GimbalRollDegree' in xmp: roll_deg = float(xmp['drone-dji:GimbalRollDegree']) else: roll_deg = None return lon_deg, lat_deg, alt_m, unixtime, yaw_deg, pitch_deg, roll_deg
def get_metadata(self, dataset, content): if not content: return {}, [] context = {} contentmeta = [] log_progress( lgr.info, 'extractorxmp', 'Start XMP metadata extraction from %s', self.ds, total=len(self.paths), label='XMP metadata extraction', unit=' Files', ) for f in self.paths: absfp = opj(self.ds.path, f) log_progress( lgr.info, 'extractorxmp', 'Extract XMP metadata from %s', absfp, update=1, increment=True) info = file_to_dict(absfp) if not info: # got nothing, likely nothing there # TODO check if this is an XMP sidecar file, parse that, and assign metadata # to the base file continue # update vocabulary vocab = {info[ns][0][0].split(':')[0]: {'@id': ns, 'type': vocabulary_id} for ns in info} # TODO this is dirty and assumed that XMP is internally consistent with the # definitions across all files -- which it likely isn't context.update(vocab) # now pull out actual metadata # cannot do simple dict comprehension, because we need to beautify things a little meta = {} for ns in info: for key, val, props in info[ns]: if not val: # skip everything empty continue if key.count('[') > 1: # this is a nested array # MIH: I do not think it is worth going here continue if props['VALUE_IS_ARRAY']: # we'll catch the actuall array values later continue # normalize value val = assure_unicode(val) # non-breaking space val = val.replace(u"\xa0", ' ') field, idx, qual = xmp_field_re.match(key).groups() normkey = u'{}{}'.format(field, qual) if '/' in key: normkey = u'{0}<{1}>'.format(*normkey.split('/')) if idx: # array arr = meta.get(normkey, []) arr.append(val) meta[normkey] = arr else: meta[normkey] = val # compact meta = {k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in meta.items()} contentmeta.append((f, meta)) log_progress( lgr.info, 'extractorxmp', 'Finished XMP metadata extraction from %s', self.ds ) return { '@context': context, }, \ contentmeta
def test_xml_metadata_preserved(spoof_tesseract_noop, output_type, resources, outpdf): input_file = resources / 'graph.pdf' try: from libxmp import consts from libxmp.utils import file_to_dict except Exception: pytest.skip("libxmp not available or libexempi3 not installed") before = file_to_dict(str(input_file)) check_ocrmypdf( input_file, outpdf, '--output-type', output_type, env=spoof_tesseract_noop ) after = file_to_dict(str(outpdf)) equal_properties = [ 'dc:contributor', 'dc:coverage', 'dc:creator', 'dc:description', 'dc:format', 'dc:identifier', 'dc:language', 'dc:publisher', 'dc:relation', 'dc:rights', 'dc:source', 'dc:subject', 'dc:title', 'dc:type', 'pdf:keywords', ] might_change_properties = [ 'dc:date', 'pdf:pdfversion', 'pdf:Producer', 'xmp:CreateDate', 'xmp:ModifyDate', 'xmp:MetadataDate', 'xmp:CreatorTool', 'xmpMM:DocumentId', 'xmpMM:DnstanceId', ] # Cleanup messy data structure # Top level is key-value mapping of namespaces to keys under namespace, # so we put everything in the same namespace def unify_namespaces(xmpdict): for entries in xmpdict.values(): yield from entries # Now we have a list of (key, value, {infodict}). We don't care about # infodict. Just flatten to keys and values def keyval_from_tuple(list_of_tuples): for k, v, *_ in list_of_tuples: yield k, v before = dict(keyval_from_tuple(unify_namespaces(before))) after = dict(keyval_from_tuple(unify_namespaces(after))) for prop in equal_properties: if prop in before: assert prop in after, f'{prop} dropped from xmp' assert before[prop] == after[prop] # Certain entries like title appear as dc:title[1], with the possibility # of several propidx = f'{prop}[1]' if propidx in before: assert ( after.get(propidx) == before[propidx] or after.get(prop) == before[propidx] )
print result.deleted_count, "photos deleted from database" numPhotos = 0; for root, dirnames, filenames in os.walk(rootdir): for filename in filenames: if filename.endswith(('.jpg', '.JPG')): fullPath = os.path.join(root, filename) print "TCDEBUG: processing file", fullPath f = open(fullPath, 'rb') exifTags = exifread.process_file(f, details=False) for tag in exifTags.keys(): print "%s = %s" % (tag, exifTags[tag]) xmpTags = file_to_dict(fullPath) title = '' keywords = '' if consts.XMP_NS_DC in xmpTags: dublinCoreProperties = xmpTags[consts.XMP_NS_DC] # dublin core properties for tag in dublinCoreProperties: print "Tag", tag[0], "=", tag[1] tagName = tag[0] tagValue = tag[1] if tagValue != "" and tagName.startswith('dc:title'): title = title + tagValue elif tagValue != "" and tagName.startswith('dc:subject'): keywords = keywords + tagValue + " " print "Title", title print "Keywords", keywords
def test_file_to_dict_noxmp(self): self.assertEqual( file_to_dict("fixtures/empty.txt"), {} )
'EXIF FocalLength'].values[0].den teoreticFocalLength = exifTags['EXIF FocalLengthIn35mmFilm'].values[0] #print("GPSAltitude", GPSAltitude) print("realFocalLength", realFocalLength) print("35mmFocalLegth = ", teoreticFocalLength) exifImageWidth = exifTags['EXIF ExifImageWidth'].values[0] exifImageLength = exifTags['EXIF ExifImageLength'].values[0] imageRatio = float(exifImageWidth) / float(exifImageLength) print("exifImageWidth", exifImageWidth) print("exifImageLength", exifImageLength) print("imageRatio", imageRatio) ############################ xmp = file_to_dict(imgpath) for k, v in xmp.items(): #print(k,'--',v) pass # look for drone tags dictKey = None for k in xmp.keys(): if 'drone' in k: dictKey = k break droneMetadata = {} for tup in xmp[dictKey]: droneMetadata[tup[0]] = tup[1]
def get_metadata(self, dataset, content): if not content: return {}, [] context = {} contentmeta = [] # which files to look for fname_match_regex = self.ds.config.get( 'datalad.metadata.xmp.fname-match', '.*(jpg|jpeg|pdf|gif|tiff|tif|ps|eps|png|mp3|mp4|avi|wav)$') fname_match_regex = re.compile(fname_match_regex) log_progress( lgr.info, 'extractorxmp', 'Start XMP metadata extraction from %s', self.ds, total=len(self.paths), label='XMP metadata extraction', unit=' Files', ) for f in self.paths: log_progress(lgr.info, 'extractorxmp', 'Extract XMP metadata from %s', f, update=1, increment=True) # run basic file name filter for performance reasons # it is OK to let false-positives through if fname_match_regex.match(f, re.IGNORECASE) is None: continue absfp = opj(self.ds.path, f) info = file_to_dict(absfp) if not info: # got nothing, likely nothing there # TODO check if this is an XMP sidecar file, parse that, and assign metadata # to the base file continue # update vocabulary vocab = { info[ns][0][0].split(':')[0]: { '@id': ns, 'type': vocabulary_id } for ns in info } # TODO this is dirty and assumed that XMP is internally consistent with the # definitions across all files -- which it likely isn't context.update(vocab) # now pull out actual metadata # cannot do simple dict comprehension, because we need to beautify things a little meta = {} for ns in info: for key, val, props in info[ns]: if not val: # skip everything empty continue if key.count('[') > 1: # this is a nested array # MIH: I do not think it is worth going here continue if props['VALUE_IS_ARRAY']: # we'll catch the actuall array values later continue # normalize value val = assure_unicode(val) # non-breaking space val = val.replace(u"\xa0", ' ') field, idx, qual = xmp_field_re.match(key).groups() normkey = u'{}{}'.format(field, qual) if '/' in key: normkey = u'{0}<{1}>'.format(*normkey.split('/')) if idx: # array arr = meta.get(normkey, []) arr.append(val) meta[normkey] = arr else: meta[normkey] = val # compact meta = { k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in meta.items() } contentmeta.append((f, meta)) log_progress(lgr.info, 'extractorxmp', 'Finished XMP metadata extraction from %s', self.ds) return { '@context': context, }, \ contentmeta
def test_file_to_dict_noxmp(self): filename = pkg_resources.resource_filename(__name__, "fixtures/empty.txt") self.assertEqual( file_to_dict(filename), {} )
def test_file_to_dict(self): for f,fmt in samplefiles.iteritems(): self.assert_( file_to_dict( f ), "Expected dictionary" )
def __init__(self, file_path): self.file_path = file_path ns_dict = file_to_dict(file_path) self.clean(ns_dict)
def test_xml_metadata_preserved(spoof_tesseract_noop, output_type, resources, outpdf): input_file = resources / 'graph.pdf' try: from libxmp import consts from libxmp.utils import file_to_dict except Exception: pytest.skip("libxmp not available or libexempi3 not installed") before = file_to_dict(str(input_file)) check_ocrmypdf(input_file, outpdf, '--output-type', output_type, env=spoof_tesseract_noop) after = file_to_dict(str(outpdf)) equal_properties = [ 'dc:contributor', 'dc:coverage', 'dc:creator', 'dc:description', 'dc:format', 'dc:identifier', 'dc:language', 'dc:publisher', 'dc:relation', 'dc:rights', 'dc:source', 'dc:subject', 'dc:title', 'dc:type', 'pdf:keywords', ] might_change_properties = [ 'dc:date', 'pdf:pdfversion', 'pdf:Producer', 'xmp:CreateDate', 'xmp:ModifyDate', 'xmp:MetadataDate', 'xmp:CreatorTool', 'xmpMM:DocumentId', 'xmpMM:DnstanceId', ] # Cleanup messy data structure # Top level is key-value mapping of namespaces to keys under namespace, # so we put everything in the same namespace def unify_namespaces(xmpdict): for entries in xmpdict.values(): yield from entries # Now we have a list of (key, value, {infodict}). We don't care about # infodict. Just flatten to keys and values def keyval_from_tuple(list_of_tuples): for k, v, *_ in list_of_tuples: yield k, v before = dict(keyval_from_tuple(unify_namespaces(before))) after = dict(keyval_from_tuple(unify_namespaces(after))) for prop in equal_properties: if prop in before: assert prop in after, f'{prop} dropped from xmp' assert before[prop] == after[prop] # Certain entries like title appear as dc:title[1], with the possibility # of several propidx = f'{prop}[1]' if propidx in before: assert (after.get(propidx) == before[propidx] or after.get(prop) == before[propidx])
import json from libxmp import consts from libxmp.utils import file_to_dict images = [] for image_path in glob('www/assets/*.jpg'): filename = image_path.split('/')[2] image = { 'filename': filename, 'tags': [] } xmp = file_to_dict(image_path) dc = xmp[consts.XMP_NS_DC] for item in dc: if item[1] != '': if item[0].startswith('dc:subject'): image['tags'].append(item[1]) if item[0].startswith('dc:description') and item[1] != 'x-default': image['caption'] = item[1] if item[0].startswith('dc:creator'): image['creator'] = item[1] images.append(image) with open('data/images.json', 'w') as f: f.write(json.dumps(images))
def test_xml_metadata_preserved(test_file, output_type, resources, outpdf): input_file = resources / test_file try: from libxmp.utils import file_to_dict # pylint: disable=import-outside-toplevel except Exception: # pylint: disable=broad-except pytest.skip("libxmp not available or libexempi3 not installed") before = file_to_dict(str(input_file)) check_ocrmypdf( input_file, outpdf, '--output-type', output_type, '--skip-text', '--plugin', 'tests/plugins/tesseract_noop.py', ) after = file_to_dict(str(outpdf)) equal_properties = [ 'dc:contributor', 'dc:coverage', 'dc:creator', 'dc:description', 'dc:format', 'dc:identifier', 'dc:language', 'dc:publisher', 'dc:relation', 'dc:rights', 'dc:source', 'dc:subject', 'dc:title', 'dc:type', 'pdf:keywords', ] acquired_properties = ['dc:format'] might_change_properties = [ 'dc:date', 'pdf:pdfversion', 'pdf:Producer', 'xmp:CreateDate', 'xmp:ModifyDate', 'xmp:MetadataDate', 'xmp:CreatorTool', 'xmpMM:DocumentId', 'xmpMM:DnstanceId', ] # Cleanup messy data structure # Top level is key-value mapping of namespaces to keys under namespace, # so we put everything in the same namespace def unify_namespaces(xmpdict): for entries in xmpdict.values(): yield from entries # Now we have a list of (key, value, {infodict}). We don't care about # infodict. Just flatten to keys and values def keyval_from_tuple(list_of_tuples): for k, v, *_ in list_of_tuples: yield k, v before = dict(keyval_from_tuple(unify_namespaces(before))) after = dict(keyval_from_tuple(unify_namespaces(after))) for prop in equal_properties: if prop in before: assert prop in after, f'{prop} dropped from xmp' assert before[prop] == after[prop] # libxmp presents multivalued entries (e.g. dc:title) as: # 'dc:title': '' <- there's a title # 'dc:title[1]: 'The Title' <- the actual title # 'dc:title[1]/?xml:lang': 'x-default' <- language info propidx = f'{prop}[1]' if propidx in before: assert ( after.get(propidx) == before[propidx] or after.get(prop) == before[propidx] ) if prop in after and prop not in before: assert prop in acquired_properties, ( f"acquired unexpected property {prop} with value " f"{after.get(propidx) or after.get(prop)}" )
def test_file_to_dict(self): for filename in self.samplefiles: self.assertTrue( file_to_dict(filename), "Expected dictionary" )
list_items(key, value) # main program myFiles = sys.argv[1:] # add new fileypes as needed file_extensions = [ '.jpg', '.JPG', '.gif', '.png', '.pdf', '.psd', '.eps', '.tif', '.ai', '.jpeg' ] for eachFile in myFiles: xmpfile = XMPFiles(file_path=eachFile, open_forupdate=True) xmp = xmpfile.get_xmp() xmp_dict = file_to_dict(eachFile) print print("= Filename: " + eachFile + " =") # let's test for the file extension. If valid then display the xmp, else print "not a valid file type" pos = eachFile.rfind('.', 0, -1) input_file_extension = eachFile[pos:] if input_file_extension in file_extensions: list_dict(xmp_dict) else: print("not a valid file type")
# # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import sys from os import path from base64 import b64decode from libxmp.utils import file_to_dict img_filename = sys.argv[1] xmp = file_to_dict(img_filename) audio_b64 = xmp[u'http://ns.google.com/photos/1.0/audio/'][1][1] afh = open(path.splitext(img_filename)[0]+"_audio.mp4", 'wb') afh.write(b64decode(audio_b64)) afh.close() image_b64 = xmp[u'http://ns.google.com/photos/1.0/image/'][1][1] ifh = open(path.splitext(img_filename)[0]+"_righteye.jpg", 'wb') ifh.write(b64decode(image_b64)) ifh.close()