示例#1
0
def saveResults(audioMetadata, metadata, children):
  for key in audioMetadata.keys():
    #cherrypy.log("key:", key)
    if not in_blacklist(key, audioMetadata[key]):
      #cherrypy.log("TYPE:", type(audioMetadata[key]))  
      value = audioMetadata[key]
      if hasattr(value, 'data'):
        #an embedded binary
        name = "cover"
        if hasattr(value, 'desc'):
          name = value.desc
        childpath = tempfile.mkstemp(dir=insiderer.TMP_DIR)[1]
        try:
          tmp_handle = open(childpath, "wb")
          tmp_handle.write(value.data)
          tmp_handle.close()
          child = insiderer.get_metadata(childpath, name)
          children.append(child)
        finally:
           insiderer.safedelete(childpath)
        continue
      if not hasattr(value, '__len__') and hasattr(value, 'text'):
        value = value.text
      if hasattr(value, '__len__') and len(value) == 1:
        value = value[0]
      metadata[key] = value
示例#2
0
def saveResults(audioMetadata, metadata, children):
    for key in audioMetadata.keys():
        #cherrypy.log("key:", key)
        if not in_blacklist(key, audioMetadata[key]):
            #cherrypy.log("TYPE:", type(audioMetadata[key]))
            value = audioMetadata[key]
            if hasattr(value, 'data'):
                #an embedded binary
                name = "cover"
                if hasattr(value, 'desc'):
                    name = value.desc
                childpath = tempfile.mkstemp(dir=insiderer.TMP_DIR)[1]
                try:
                    with open(childpath, "wb") as tmp_handle:
                        tmp_handle.write(value.data)
                        tmp_handle.close()
                        child = insiderer.get_metadata(childpath, name)
                        children.append(child)
                finally:
                    insiderer.safedelete(childpath)
                continue
            if not hasattr(value, '__len__') and hasattr(value, 'text'):
                value = value.text
            if hasattr(value, '__len__') and len(value) == 1:
                value = value[0]
            metadata[key] = value
示例#3
0
def image_svg_xml(path, metadata, children):
    svg = etree.parse(path)
    fields = {
        'export-filename': '//@inkscape:export-filename',
        'docname': '//@sodipodi:docname',
        'description': '//svg:desc',
        'title': '//svg:title',
        'dca': '//@*[namespace-uri()="%s"][not(ancestor::dc:*)]' % namespaces[
            "dc"],  #because selecting by namespace attributes doesn't seem to work
        'rdf': '//rdf:*[not(ancestor::rdf:*)]'
    }
    for key in fields.keys():
        results = svg.xpath(fields[key], namespaces=namespaces)

        for result in results:
            key_name = key
            if '*' in fields[key] and len(results) > 0:
                if hasattr(result, "attrname"):
                    key_name = re.sub(r'\{.*?\}', '', result.attrname)
                else:
                    key_name = result.xpath('name()')

            key_name = insiderer.de_dup(key_name, metadata)

            if isinstance(result, etree._ElementUnicodeResult):
                metadata[key_name] = str(result)
            else:
                metadata[key_name] = xmltodict.parse(
                    etree.tostring(result, encoding='utf8',
                                   method='xml').decode('utf-8'))

    base64images = svg.xpath('//svg:image', namespaces=namespaces)
    for image in base64images:
        key = image.xpath('local-name()')
        href = image.attrib.get('{%s}href' % namespaces["xlink"])
        name = image.attrib.get('id') or "unknown"
        if href.startswith("data:") and base64_prefix in href[0:100]:
            image_data = base64.standard_b64decode(
                href[href.find(base64_prefix) + len(base64_prefix):])
            try:
                image_path = tempfile.mkstemp(dir=insiderer.TMP_DIR)[1]
                image_handle = open(image_path, "wb")
                image_handle.write(image_data)
                image_handle.close()
                child = insiderer.get_metadata(image_path, name)
                children.append(child)
            finally:
                insiderer.safedelete(image_path)
示例#4
0
def application_vnd_oasis_opendocument_text(path, metadata, children, from_doc=False):
  odtzip = zipfile.ZipFile(path)
  try:
    tmp_path = None
    tmp_path = tempfile.mkdtemp(dir=insiderer.TMP_DIR)
    odtzip.extractall(tmp_path)
    for name in odtzip.namelist():
      childpath = tmp_path + "/" + name
      try:
        #cherrypy.log(childpath, name)
        if name == "meta.xml":
          metaxml = etree.parse(childpath)
          metadatas = metaxml.xpath('/*/*/*')
          for item in metadatas:
            key = item.xpath('local-name()')
            if from_doc is True and key == "generator":
              pass
            else:
              metadata[key] = item.text
        elif name == "content.xml":
          metaxml = etree.parse(childpath)
          trackchanges = metaxml.xpath('//*[local-name() = "change-info"]')
          if len(trackchanges) > 0:
            metadata['track_changes'] = []
            for trackchange in trackchanges:
              trackchange_dict = {}
              for item in trackchange.xpath('*'):
                trackchange_dict[item.xpath('local-name()')] = item.text
              metadata['track_changes'].append(trackchange_dict)
        elif os.path.isdir(childpath):
          pass
        elif name == "mimetype" or name == "manifest.rdf" or name == "META-INF/manifest.xml" or name == "current.xml" or name == "styles.xml" or name == "settings.xml" or name == "" or name == "Configurations2/accelerator/current.xml" or name == "layout-cache":
          pass
        elif from_doc is True and name == "Thumbnails/thumbnail.png":
          pass
        else:
          child = insiderer.get_metadata(childpath, name)
          children.append(child)
      finally:
        if not os.path.isdir(childpath):
          insiderer.safedelete(childpath)
  finally:
    if tmp_path:
      shutil.rmtree(tmp_path)
示例#5
0
def image_svg_xml(path, metadata, children):
  svg = etree.parse(path)
  fields = {
    'export-filename': '//@inkscape:export-filename',
    'docname':         '//@sodipodi:docname',
    'description':     '//svg:desc',
    'title':           '//svg:title',
    'dca':             '//@*[namespace-uri()="%s"][not(ancestor::dc:*)]' % namespaces["dc"], #because selecting by namespace attributes doesn't seem to work
    'rdf':             '//rdf:*[not(ancestor::rdf:*)]'
  }
  for key in fields.keys():
    results = svg.xpath(fields[key], namespaces=namespaces)

    for result in results:
      key_name = key
      if '*' in fields[key] and len(results) > 0:
        if hasattr(result, "attrname"):
          key_name = re.sub(r'\{.*?\}', '', result.attrname)
        else:
          key_name = result.xpath('name()')

      key_name = insiderer.de_dup(key_name, metadata)

      if isinstance(result, etree._ElementUnicodeResult):
        metadata[key_name] = str(result)
      else:
        metadata[key_name] = xmltodict.parse(etree.tostring(result, encoding='utf8', method='xml').decode('utf-8'))

  base64images = svg.xpath('//svg:image', namespaces=namespaces)
  for image in base64images:
    key = image.xpath('local-name()')
    href = image.attrib.get('{%s}href' % namespaces["xlink"])
    name = image.attrib.get('id') or "unknown"
    if href.startswith("data:") and base64_prefix in href[0:100]:
      image_data = base64.standard_b64decode( href[href.find(base64_prefix) + len(base64_prefix):] )
      try:
        image_path = tempfile.mkstemp(dir=insiderer.TMP_DIR)[1]
        image_handle = open(image_path, "wb")
        image_handle.write(image_data)
        image_handle.close()
        child = insiderer.get_metadata(image_path, name)
        children.append(child)
      finally:
        insiderer.safedelete(image_path)
示例#6
0
def application_zip(path, metadata, children, from_doc=False):
    aZipFile = zipfile.ZipFile(path)
    try:
        tmp_path = None
        tmp_path = tempfile.mkdtemp(dir=insiderer.TMP_DIR)
        aZipFile.extractall(tmp_path)
        for name in aZipFile.namelist():
            childpath = os.path.join(tmp_path, name)
            try:
                if os.path.isdir(childpath):
                    pass
                else:
                    child = insiderer.get_metadata(childpath, name)
                    children.append(child)
            finally:
                if not os.path.isdir(childpath):
                    insiderer.safedelete(childpath)
    finally:
        if tmp_path:
            shutil.rmtree(tmp_path)
示例#7
0
def application_zip(path, metadata, children, from_doc=False):
  aZipFile = zipfile.ZipFile(path)
  try:
    tmp_path = None
    tmp_path = tempfile.mkdtemp(dir=insiderer.TMP_DIR)
    aZipFile.extractall(tmp_path)
    for name in aZipFile.namelist():
      childpath = os.path.join(tmp_path, name)
      try:
        if os.path.isdir(childpath):
          pass
        else:
          child = insiderer.get_metadata(childpath, name)
          children.append(child)
      finally:
        if not os.path.isdir(childpath):
          insiderer.safedelete(childpath)
  finally:
    if tmp_path:
      shutil.rmtree(tmp_path)