Exemplo n.º 1
0
    def _file_metadata(self, an_uri, file_descr):
        """Get metadata about the actual file and add it to the FileDescription
        """
        def convert_time(st_time):
            dt = datetime.fromtimestamp(stat.st_mtime)
            return dt.isoformat()

        stat = os.stat(an_uri)
        if file_descr.metadata is None:
            file_descr.metadata = {}
        file_descr.metadata['pw_name'] = MetaDataValue(
            type=MetaDataType.STRING,
            string_value=pwd.getpwuid(stat.st_uid).pw_name)
        file_descr.metadata['gr_name'] = MetaDataValue(
            type=MetaDataType.STRING,
            string_value=grp.getgrgid(stat.st_gid).gr_name)
        file_descr.metadata['st_size'] = MetaDataValue(type=MetaDataType.INT,
                                                       int_value=stat.st_size)

        file_descr.metadata['st_ctime'] = MetaDataValue(
            type=MetaDataType.STRING, string_value=convert_time(stat.st_ctime))
        file_descr.metadata['st_mtime'] = MetaDataValue(
            type=MetaDataType.STRING, string_value=convert_time(stat.st_mtime))

        #TODO:
        try:
            from damn_at.repository import Repository
            repo = Repository('/home/sueastside/dev/DAMN/damn-test-files')

            repo.get_meta_data(an_uri, file_descr)
        except Exception as repo_exception:
            logger.debug("Unable to extract repository information: %s",
                         str(repo_exception))
Exemplo n.º 2
0
    def analyze(self, an_uri):
        fileid = FileId(filename=os.path.abspath(an_uri))
        file_descr = FileDescription(file=fileid)
        file_descr.assets = []

        text_mimetype = mimetypes.guess_type(an_uri)[0]

        asset_descr = AssetDescription(asset=AssetId(
            subname='content',
            mimetype=text_mimetype,
            file=fileid
        ))

        num_lines = sum(1 for line in open(an_uri))

        with magic.Magic(flags=magic.MAGIC_MIME_ENCODING) as mm:
            charset = mm.id_filename(an_uri)

        asset_descr.metadata = {}

        asset_descr.metadata['lines'] = MetaDataValue(
            type=MetaDataType.INT,
            int_value=num_lines
        )
        asset_descr.metadata['charset'] = MetaDataValue(
            type=MetaDataType.STRING,
            string_value=charset
        )

        file_descr.assets.append(asset_descr)

        return file_descr
Exemplo n.º 3
0
    def analyze(self, an_uri):
        fileid = FileId(filename=os.path.abspath(an_uri))
        file_descr = FileDescription(file=fileid)
        file_descr.assets = []

        image_mimetype = mimetypes.guess_type(an_uri)[0]

        asset_descr = AssetDescription(asset=AssetId(
            subname='main layer', mimetype=image_mimetype, file=fileid))

        try:
            pro = subprocess.Popen(['exiftool', an_uri],
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
            out, err = pro.communicate()
            if pro.returncode != 0:
                msg = 'ImageAnalyzer failed %s with error code %d!:\n%s' % (
                    an_uri, pro.returncode, str(err))
                LOG.error(msg)
                raise AnalyzerException(msg)
        except OSError as e:
            msg = 'ImageAnalyzer failed %s:\n%s' % (an_uri, e)
            LOG.error(msg)
            raise e(msg)
        meta = {}
        flag = 0
        lines = str(out).strip().split('\n')
        for line in lines:
            line = line.split(':', 1)
            if len(line) == 1:
                line = line[0].split('=')
            line = [l.strip() for l in line]
            if line[0] == 'MIME Type':
                flag = 1
            if flag == 1 and line[0] not in ['MIME Type', 'Image Size']:
                meta[line[0].lower().replace(' ', '_')] = line[1]

        from damn_at.analyzers.image import metadata

        extractor_map = {
            'image/png': metadata.MetaDataPNG,
            'image/jpeg': metadata.MetaDataJPG,
            'image/x-ms-bmp': metadata.MetaDataBMP,
            'image/x-photoshop': metadata.MetaDataPSD,
            'application/x-xcf': metadata.MetaDataXCF,
        }

        if image_mimetype in extractor_map:
            asset_descr.metadata = extractor_map[image_mimetype].extract(meta)
        else:
            asset_descr.metadata = {}

        for key, value in meta.items():
            if key not in asset_descr.metadata:
                asset_descr.metadata['exif-' + key] = MetaDataValue(
                    type=MetaDataType.STRING, string_value=value)

        file_descr.assets.append(asset_descr)

        return file_descr
Exemplo n.º 4
0
    def get_meta_data(self, an_uri, file_descr):
        """
        Get git commit metadata for the specified file.
        """
        commit = self.repo.heads.master.commit
        #path = os.path.relpath(an_uri, self.path)
        #blob = commit.tree/path

        file_descr.metadata['git.author.name'] = MetaDataValue(
            type=MetaDataType.STRING, string_value=commit.author.name)
        file_descr.metadata['git.author.email'] = MetaDataValue(
            type=MetaDataType.STRING, string_value=commit.author.email)
        file_descr.metadata['git.message'] = MetaDataValue(
            type=MetaDataType.STRING, string_value=commit.message)
        file_descr.metadata['git.committer.name'] = MetaDataValue(
            type=MetaDataType.STRING, string_value=commit.committer.name)
        file_descr.metadata['git.committer.email'] = MetaDataValue(
            type=MetaDataType.STRING, string_value=commit.committer.email)
        file_descr.metadata['git.name_rev'] = MetaDataValue(
            type=MetaDataType.STRING, string_value=commit.name_rev)
        file_descr.metadata['git.remotes.origin.url'] = MetaDataValue(
            type=MetaDataType.STRING,
            string_value=self.repo.remotes.origin.config_reader.get('url'))

        return file_descr
Exemplo n.º 5
0
    def analyze(self, an_uri):
        fileid = FileId(filename=os.path.abspath(an_uri))
        file_descr = FileDescription(file=fileid)
        file_descr.assets = []
        video_mimetype = mimetypes.guess_type(an_uri)[0]
        asset_descr = AssetDescription(
            asset=AssetId(subname=os.path.basename(an_uri),
                          mimetype=video_mimetype,
                          file=fileid))

        try:
            pro = subprocess.Popen(['exiftool', an_uri],
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
            out, err = pro.communicate()
            if pro.returncode != 0:
                LOG.debug(
                    "VideoAnalyzer failed %s with error code %d" %
                    (an_uri, pro.returncode), out, err)
                return False
        except OSError:
            LOG.debug("VideoAnalyzer failed %s\n\t%s\n\t%s" %
                      (an_uri, out, err))
            return False

        meta = {}
        flag = False
        lines = out.strip().split('\n')
        for line in lines:
            line = line.split(':', 1)
            if len(line) == 1:
                line = line.split('=')
            line = [l.strip() for l in line]
            if line[0] == 'MIME Type':
                flag = True
                continue
            if flag:
                meta[line[0].lower().replace(' ', '_')] = line[1]
                if line[0] == 'Frame Rate':
                    meta['video_frame_rate'] = meta.pop('frame_rate')

        asset_descr.metadata = metadata.MetaDataExif.extract(meta)
        for key, value in meta.items():
            if key not in asset_descr.metadata:
                asset_descr.metadata['Exif-' + key] = MetaDataValue(
                    type=MetaDataType.STRING, string_value=value)

        file_descr.assets.append(asset_descr)

        return file_descr
Exemplo n.º 6
0
 def extract(cls, context):
     metadata = {}
     for field in dir(cls):
         if not field.startswith('__') and field not in [
                 'extract', 'fields', 'convert'
         ]:
             type, func = getattr(cls, field)
             type_name = MetaDataType._VALUES_TO_NAMES[type].lower(
             ) + '_value'
             kwargs = {'type': type}
             kwargs[type_name] = func(context)
             if kwargs[type_name] is not None:  # Ignore None values!
                 kwargs[type_name] = cls.convert(type, kwargs[type_name])
                 metadata[field] = MetaDataValue(**kwargs)
     return metadata
Exemplo n.º 7
0
    def analyze(self, anURI):
        fileid = FileId(filename=os.path.abspath(anURI))
        file_descr = FileDescription(file=fileid)
        file_descr.assets = []

        asset_descr = AssetDescription(
            asset=AssetId(subname=os.path.basename(anURI),
                          mimetype=mimetypes.guess_type(anURI, False)[0],
                          file=fileid))

        try:
            pro = subprocess.Popen(['sox', '--i', anURI],
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
            out, err = pro.communicate()
            if pro.returncode != 0:
                print(("E: SoundAnalyzer failed %s with error code %d! " %
                       (anURI, pro.returncode), out, err))
                return False
        except OSError:
            print(("E: SoundAnalyzer failed %s!" % anURI, out, err))
            return False

        meta = {}
        lines = out.decode("utf-8").strip().split('\n')
        for line in lines:
            line = line.split(':', 1)
            if len(line) == 1:
                line = line[0].split('=')
            line = [l.strip() for l in line]
            if line[0] in ['Input File', 'Comment']:
                continue
            meta[line[0].lower().replace(' ', '_')] = line[1]

        asset_descr.metadata = metadata.MetaDataSox.extract(meta)
        for key, value in meta.items():
            # Add none default metadata.
            if key not in asset_descr.metadata:
                asset_descr.metadata['Sox-' + key] = MetaDataValue(
                    type=MetaDataType.STRING, string_value=value)

        file_descr.assets.append(asset_descr)

        return file_descr
Exemplo n.º 8
0
    def analyze(self, an_uri):
        fileid = FileId(filename=os.path.abspath(an_uri))
        file_descr = FileDescription(file=fileid)
        file_descr.assets = []

        assimp_mimetype = 'application/assimp'

        scene = None
        try:
            scene = pyassimp.load(an_uri)

            textures = {}
            materials = {}

            from damn_at.analyzers.mesh.metadata import (
                MetaDataAssimpTexture,
                MetaDataAssimpMesh
            )

            for i, texture in enumerate(scene.textures):
                name = texture.name if texture.name else 'texture-'+str(i)
                asset_descr = AssetDescription(asset=AssetId(
                    subname=name,
                    mimetype=assimp_mimetype + ".texture",
                    file=fileid
                ))
                asset_descr.metadata = MetaDataAssimpTexture.extract(texture)
                file_descr.assets.append(asset_descr)
                textures[i] = asset_descr

            for i, material in enumerate(scene.materials):
                properties = {}
                for key, value in material.properties.items():
                    properties[key] = value
                name = properties.get('name', 'material-'+str(i))
                asset_descr = AssetDescription(asset=AssetId(
                    subname=name,
                    mimetype=assimp_mimetype + ".material",
                    file=fileid
                ))
                asset_descr.metadata = {}
                for key, value in properties.items():
                    if key == 'name' or key == 'file':
                        continue
                    asset_descr.metadata[key] = MetaDataValue(
                        type=MetaDataType.STRING,
                        string_value=str(value)
                    )
                file_descr.assets.append(asset_descr)
                materials[i] = asset_descr

            for i, mesh in enumerate(scene.meshes):
                name = mesh.name if mesh.name else 'mesh-' + str(i)
                asset_descr = AssetDescription(asset=AssetId(
                    subname=name,
                    mimetype=assimp_mimetype + ".mesh",
                    file=fileid
                ))
                asset_descr.metadata = MetaDataAssimpMesh.extract(mesh)
                asset_descr.dependencies = []
                # Dependencies
                if mesh.materialindex is not None:
                    if mesh.materialindex in materials:
                        asset_descr.dependencies.append(
                            materials[mesh.materialindex].asset
                        )
                file_descr.assets.append(asset_descr)

        finally:
            pyassimp.release(scene)

        '''
        obj = Loader(an_uri)

        from damn_at.analyzers.mesh.metadata import (
            MetaDataWaveFrontDefault,
            MetaDataWaveFrontGroup
        )
        d_asset_descr = AssetDescription(asset=AssetId(
            subname='default',
            mimetype="application/wavefront-obj",
            file=fileid
        ))
        d_asset_descr.metadata = MetaDataWaveFrontDefault.extract(obj)
        file_descr.assets.append(d_asset_descr)


        for name, group in obj.groups.items():
            if name != 'default':
                asset_descr = AssetDescription(asset=AssetId(
                    subname=name,
                    mimetype="application/wavefront-obj.group",
                    file=fileid
                ))
                asset_descr.metadata = MetaDataWaveFrontGroup.extract(group)
                asset_descr.dependencies = [d_asset_descr.asset]
                file_descr.assets.append(asset_descr)'''

        return file_descr