Пример #1
2
def get_creation_date(_path):
    """
    Simple function to retrieve the creation date from the file's metdata

    Args:
        _path the full path to the file.
    """
    # Initialise result
    _creation_date = None

    # Using the hachoir metadata library retrieve file metadata    
    hachoir_config.quiet = True
    try:
        parser = createParser(unicodeFilename(_path), _path)
        if parser:
            metadata = extractMetadata(parser)
            if metadata:
                _creation_date = metadata.get("creation_date")
    except Exception:
        pass

    # Validate and use ctime if not available
    if not _creation_date:
        _ctime = os.path.getctime(_path)
        _creation_date = datetime.datetime.fromtimestamp(_ctime)

    # Return result
    return _creation_date
Пример #2
0
    def lnkparse(reflectPath, filename):
        """ Return the target filename from a MS-widows link (URL format)
        """
        filename = unicodeFilename(filename)
        try:
            parser = createParser(filename)
            if parser is not None and isinstance(parser, LnkFile):
                #It is a "MS-Windows" link file
                try:
                    for field in parser: pass # trigger parsing
                    lnkpath = parser.getField('relative_path').value
                    # mount the complet target path,analyses if inside BasePath
                    if lnkpath.startswith('.\\'):
                        lnkpath = lnkpath[2:]
                    lnkpath = lnkpath.replace('\\','/')
                    filenamePath = os.path.dirname(filename)
                    allLnkpath = os.path.join(reflectPath, filenamePath, lnkpath)
                    allLnkpath = os.path.abspath(allLnkpath) #remove all ..\

                    if allLnkpath.startswith(reflectPath):
                        lnkpath = quote(lnkpath.encode('utf-8'))
                        return 'OK', lnkpath
                    else:
                        return 'ERROR_OUTREFLECTPATH', ''
                except MissingField:
                    # example: link to a network file
                    return 'ERROR_RELPATH', ''
            else:
                return 'NOT_LNKFILE', ''
        except InputStreamError:
            return 'NOT_PARSED', ''
Пример #3
0
def Downloadfile(url):
    infoMeta = []
    file_name = url.split('/')[-1]
    infoMeta.append(file_name)
    u = urllib2.urlopen(url)

    meta = u.info()
    infoMeta.append(meta.headers)
    doc = u.read()
    f = open(file_name, 'wb')
    f.write(doc)

    with open(file_name, 'rb') as p:
        # Slurp the whole file and efficiently convert it to hex all at once
        hexdata = binascii.hexlify(p.read())

    # use hachoir to add the standard metadata
    filename = './' + file_name
    print filename
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename)
    try:
        metalist = metadata.extractMetadata(parser).exportPlaintext()
        infoMeta.append(metalist[1:4])
    except Exception:
        infoMeta.append(["none", "none", "none"])

    p.close()
    # print "Done", file_name, " Info is ", infoMeta
    return file_name, hexdata
Пример #4
0
    def save_response_binaries(self, path, hash_value):
        try:
            flow = Flow.objects.get(hash_value=hash_value)
            flow_details = flow.details
            for detail in flow_details:
                # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat
                source_str = ":".join([detail.src_ip, str(detail.sport)])
                destination_str = ":".join([detail.dst_ip, str(detail.dport)])
                flow_str = "-".join([source_str, destination_str])
                resp_file = "_".join(["contents", flow_str,"resp.dat"])
                file_path = "/".join([path, resp_file])
                file_path = str(file_path)

                try:
                    stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
                except NullStreamError:
                    continue
                subfile = SearchSubfile(stream, 0, None)
                subfile.loadParsers()
                root = "/".join([path, "html-files"])
                if not os.path.exists(root):
                    os.makedirs(root)
                output = "/".join([root, flow_str])
                output = str(output)
                if not os.path.exists(output):
                    os.mkdir(output)
                subfile.setOutput(output)
                ok = subfile.main()

                # save the files info at the db also

            return True

        except Exception, ex:
            return False
Пример #5
0
def get_file_metadata(path):
    rdata = {}
    if os.path.isfile(path):
        try:
            parser = createParser(unicodeFilename(path), path)
            rdata["size"] = os.stat(path).st_size
            if parser:
                try:
                    metadata = extractMetadata(parser)
                    if metadata:
                        rdata.update(
                            (md.key,
                                md.values[0].value
                                if len(md.values) == 1 else
                                [value.value for value in md.values]
                                )
                            for md in metadata if md.values
                            )
                except HachoirError as e:
                    logging.exception(e)
        except NullStreamError:
            rdata["size"] = 0
        except BaseException as e:
            logging.exception(e)
        finally:
            if parser and parser.stream and parser.stream._input and not parser.stream._input.closed:
                parser.stream._input.close()
    return rdata
Пример #6
0
    def subfile(self, filePath):
        # hachoir-subfile is a tool based on hachoir-parser to find subfiles in any binary stream.
        # Website: http://bitbucket.org/haypo/hachoir/wiki/hachoir-subfile
        # bypass sys.stdout, sys.stderr
        oldStdOut = sys.stdout
        oldStdErr = sys.stderr
        outputStdErr = StringIO.StringIO()
        outputStdOut = StringIO.StringIO()
        sys.stdout = outputStdOut
        sys.stderr = outputStdErr

        stream = FileInputStream(unicodeFilename(filePath),
                                 real_filename=filePath)

        # Search for subfiles
        subfile = SearchSubfile(stream, 0, None)
        subfile.loadParsers(categories=None, parser_ids=None)
        subfile.main()

        # sys.stdout, sys.stderr reset
        sys.stdout = oldStdOut
        sys.stderr = oldStdErr

        # parse stdout, stderr from SearchSubfile
        return self.parse(outputStdOut.getvalue(), outputStdErr.getvalue())
Пример #7
0
def _parallel_task(work_queue, progress, args, run_stats):

    while True:
        try:
            src_file = work_queue.get(True, QUEUE_TIMEOUT_SEC)
            Partition.handle_file(src_file, args.src_dir, args.dest_dir, \
                                  not args.no_dry_run, args.flatten_subdirectories, run_stats)

            run_stats.count_success()
        except Queue.Empty:
            LOG.error("No more files to process. Exiting.")
        except:
            LOG.exception("Unexpected error processing file %s: %s", src_file,
                          sys.exc_info()[0])
            run_stats.count_failure()
        finally:
            work_queue.task_done()

        try:
            #TODO: I think this may break with Unicode filenames
            progress.set_postfix(file=unicodeFilename(
                os.path.basename(src_file)),
                                 refresh=False)
            progress.update(1)
        except:
            LOG.exception("Error updating progress bar for source file %s: %s",
                          src_file,
                          sys.exc_info()[0])
Пример #8
0
def get_file_date(root, file):
    date = ""
    try:
        filename = "{}/{}".format(root,file)
        filename, realname = unicodeFilename(filename), filename
        parser = createParser(filename, realname)
        if not parser:
            print >>stderr, "Unable to parse file {}".format(filename)
        try:
            actualstderr = sys.stderr
            sys.stderr = open(os.devnull,'w')
            metadata = extractMetadata(parser)
            sys.stderr = actualstderr
        except HachoirError, err:
            print "Metadata extraction error: %s" % unicode(err)
            metadata = None
        if not metadata:
            print "Unable to extract metadata, {}".format(filename)

        text = metadata.exportPlaintext()
        date = ""
        # Tracer()()
        for line in text:
            if line[0:10] == "- Creation":
                
                match = re.search('(\d+-\d+-\d+ \d+:\d+:\d+)', line)
                if match:
                    date = time.strptime(match.groups()[0], '%Y-%m-%d %H:%M:%S')
                    return date
Пример #9
0
def classify(path,rootdir): # add an extra argument here to take the root dir  :)

    print 'path given: ', path,' RootDir: ',rootdir
    foo = path.rsplit('/', 1)
    fname = foo[1]

    # defaults audio, video:
    artist = album = genre = 'unknown'

    # defaults image:
    latitude = longitude = 0
    city = state = country = 'unknown'
    year = '1960'
    month = 'January'
    # here we go :
    filename = path
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print >>stderr, "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Metadata extraction error: %s" % unicode(err)
        metadata = None
Пример #10
0
    def lnkparse(reflectPath, filename):
        """ Return the target filename from a MS-widows link (URL format)
        """
        filename = unicodeFilename(filename)
        try:
            parser = createParser(filename)
            if parser is not None and isinstance(parser, LnkFile):
                #It is a "MS-Windows" link file
                try:
                    for field in parser:
                        pass  # trigger parsing
                    lnkpath = parser.getField('relative_path').value
                    # mount the complet target path,analyses if inside BasePath
                    if lnkpath.startswith('.\\'):
                        lnkpath = lnkpath[2:]
                    lnkpath = lnkpath.replace('\\', '/')
                    filenamePath = os.path.dirname(filename)
                    allLnkpath = os.path.join(reflectPath, filenamePath,
                                              lnkpath)
                    allLnkpath = os.path.abspath(allLnkpath)  #remove all ..\

                    if allLnkpath.startswith(reflectPath):
                        lnkpath = quote(lnkpath.encode('utf-8'))
                        return 'OK', lnkpath
                    else:
                        return 'ERROR_OUTREFLECTPATH', ''
                except MissingField:
                    # example: link to a network file
                    return 'ERROR_RELPATH', ''
            else:
                return 'NOT_LNKFILE', ''
        except InputStreamError:
            return 'NOT_PARSED', ''
Пример #11
0
def hachm(filename):
    # using this example http://archive.org/details/WorkToFishtestwmv
    try:
        filename, realname = unicodeFilename(filename), filename
    except TypeError:
        filename,realname=filename,filename
    parser = createParser(filename)
    # See what keys you can extract
    tmp = metadata.extractMetadata(parser)
    if tmp is None: return {}
    else: tmp = tmp._Metadata__data.iteritems()
    for k,v in tmp:
        if v.values:
            print v.key, v.values[0].value
    # Turn the tags into a defaultdict
    metalist = metadata.extractMetadata(parser).exportPlaintext()
    meta = defaultdict(defaultdict)
    if not metalist:
        return meta
    for item in metalist[1:]:
        item = [x.strip() for x in item.split('-') if x.strip()][0]
        item = [ x.strip().lower().replace(' ','_') for x in item.split(':') ]

        k,v = item.pop(0),':'.join(item)
        meta[k]=v
    return meta
Пример #12
0
def get_metadata(file_names):
    print ("- Analyzing files metadata.." + "\n")
    file_ = open('results.txt', 'w')
    file_extensions = [".3do",    ".3ds",    ".7z",    ".a",    ".ace",    ".aif",    ".aifc",    ".aiff",    ".ani",    ".apm",    ".asf",    ".au",    ".avi",    ".bin",    ".bmp",    ".bz2",    ".cab",    ".cda",    ".chm",    ".class",    ".cur",    ".deb",    ".der",    ".dll",    ".doc",    ".dot",    ".emf",    ".exe",    ".flv",    ".gif",    ".gz",    ".ico",    ".jar",    ".jpeg",    ".jpg",    ".laf",    ".lnk",    ".m4a",    ".m4b",    ".m4p",    ".m4v",    ".mar",    ".mid",    ".midi",    ".mka",    ".mkv",    ".mod",    ".mov",    ".mp1",    ".mp2",    ".mp3",    ".mp4",    ".mpa",    ".mpe",    ".mpeg",    ".mpg",    ".msi",    ".nst",    ".oct",    ".ocx",    ".odb",    ".odc",    ".odf",    ".odg",    ".odi",    ".odm",    ".odp",    ".ods",    ".odt",    ".ogg",    ".ogm",    ".otg",    ".otp",    ".ots",    ".ott",    ".pcf",    ".pcx",    ".pdf",    ".png",    ".pot",    ".pps",    ".ppt",    ".ppz",    ".psd",    ".ptm", ".pyo",    ".qt",    ".ra",    ".rar",    ".rm",    ".rpm",    ".s3m",    ".sd0",    ".snd",    ".so",    ".stc",    ".std",    ".sti",    ".stw",    ".swf",    ".sxc",    ".sxd",    ".sxg",    ".sxi",    ".sxm",    ".sxw",    ".tar",    ".tga",    ".tif",    ".tiff",    ".torrent",    ".ts",    ".ttf",    ".vob",    ".wav",    ".wma",    ".wmf",    ".wmv",    ".wow",    ".xcf",    ".xla",    ".xls",    ".xm",    ".zip",    ".zs1",    ".zs2",    ".zs3",    ".zs4",    ".zs5",    ".zs6",    ".zs7",    ".zs8",    ".zs9",    ".zst"]
    for filename in file_names:
        print ("- Extracting file metadata: " + filename + "\n")
        extension = os.path.splitext(filename)
        if extension[1] in file_extensions:
            print ("    * The file extension is: " + extension[1] + "\n")
            filename, realname = unicodeFilename(filename), filename
            file_.write('Name: ')
            file_.write(filename)
            file_.write('\n')
            parser = createParser(filename, realname)
            if not parser:
                print >>stderr, "Error, parsing file"
                exit(1)
            try:
                metadata = extractMetadata(parser)
            except Exception as e:
                print ("Error extracting file metadata: " + str(e))
                metadata = None
            if not metadata:
                print ("Metadata can not be extracted")
                exit(1)
            text = metadata.exportPlaintext()
            for line in text:
                file_.write(line)
                file_.write('\n')
            print ("    * Successfull metadata extraction" + "\n" + "\n")
        if not extension[1] in file_extensions:
            print ("    * File extension is unknown or not supported" + "\n" + "\n")
    return text
    file_.close()
Пример #13
0
def classify(path,
             rootdir):  # add an extra argument here to take the root dir  :)

    print 'path given: ', path, ' RootDir: ', rootdir
    foo = path.rsplit('/', 1)
    fname = foo[1]

    # defaults audio, video:
    artist = album = genre = 'unknown'

    # defaults image:
    latitude = longitude = 0
    city = state = country = 'unknown'
    year = '1960'
    month = 'January'
    # here we go :
    filename = path
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print >> stderr, "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Metadata extraction error: %s" % unicode(err)
        metadata = None
Пример #14
0
 def get_meta(self, file_path):
     """ Get the meta information.
     """
     self.check_extension(file_path)
     filename, realname = unicodeFilename(file_path), file_path
     parser = createParser(filename, realname)
     if parser is None:
         if file_path.lower().endswith('.mov'):
             return 'video/quicktime', 'null'
         if file_path.lower().endswith('.mpg'):
             return 'video/mpeg', 'null'
         if file_path.lower().endswith('.jpg'):
             return 'image/jpeg', 'null'
         if file_path.lower().endswith('.bup'):
             return 'video/dvd', 'null'
         if file_path.lower().endswith('.vob'):
             return 'video/dvd', 'null'
         if file_path.lower().endswith('.ifo'):
             return 'video/dvd', 'null'
     metadata = extractMetadata(parser)
     mime_type = parser.mime_type
     info = {}
     for data in sorted(metadata or ()):
         if not data.values:
             continue
         info[data.key] = [item.text for item in data.values]
     return mime_type, json.dumps(info)
Пример #15
0
    def _guess_from_metadata(self):
        parse = lambda s: s.split(":")
        guesses = []
        for filename in self.files:
            filename = get_filename(filename)
            if not isinstance(filename, unicode):
                filename, realname = unicodeFilename(filename), filename
            else:
                realname = filename

            parser = createParser(filename, realname)
            if parser:
                try:
                    metadata = extractMetadata(parser)
                except HachoirError:
                    continue

                for line in metadata.exportPlaintext():
                    entries = dict((parse(normalize(l)) for l in line if 'comment' in l or 'title' in l))
                    entries = dict(((k, guessit.guess_episode_info(v)) for (k, v) in entries.items()))
                    if 'title' in entries:
                        guesses.append(entries['title'])
                    elif 'comment' in entries:
                        guesses.append(entries['comment'])
        return guesses
Пример #16
0
    def _guess_from_metadata(self):
        parse = lambda s: s.split(":")
        guesses = []
        for filename in self.files:
            filename = get_filename(filename)
            if not isinstance(filename, unicode):
                filename, realname = unicodeFilename(filename), filename
            else:
                realname = filename

            parser = createParser(filename, realname)
            if parser:
                try:
                    metadata = extractMetadata(parser)
                except HachoirError:
                    continue

                for line in metadata.exportPlaintext():
                    entries = dict((parse(normalize(l)) for l in line
                                    if 'comment' in l or 'title' in l))
                    entries = dict(((k, guessit.guess_episode_info(v))
                                    for (k, v) in entries.items()))
                    if 'title' in entries:
                        guesses.append(entries['title'])
                    elif 'comment' in entries:
                        guesses.append(entries['comment'])
        return guesses
Пример #17
0
def googlesearch():
    print "Searching google for files..."
    # set up browser
    browse = mechanize.Browser()
    cookiejar = cookielib.LWPCookieJar()
    browse.set_cookiejar(cookiejar)
    browse.set_handle_equiv(True)
    browse.set_handle_redirect(True)
    browse.set_handle_referer(True)
    browse.set_handle_robots(False)
    browse.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    browse.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    # response = browse.open("https://www.google.com/#q=filetype: %s + %s" % (filetype, domain))
    for filetype in ["doc", "docx", "ppt", "xls"]:
        response = browse.open("https://www.google.com")
        browse.select_form(nr=0)
        browse.form["q"] = "filetype:%s site:%s" % (filetype, domain)
        browse.submit()
        results = browse.response().read()
        soup = BeautifulSoup(results, "lxml")
        sidlist = []
        namelist = []
        typelist = []
        metalist = []
        counter = 1
        for link in soup.find_all("a", href=re.compile("/url")):
            link = link.get("href")
            if link.startswith("/url?q="):
                link = link[len("/url?q=") :]
                link = link.split("." + filetype)[0]
                # print str(link + ".pdf")
                filename = "%s%s.%s" % (domain, counter, filetype)
                try:
                    downfile = browse.retrieve(str(link + "." + filetype), filename)[0]
                    filename = downfile
                    filename, realname = unicodeFilename(filename), filename
                    parser = createParser(filename, realname)
                    metadata = extractMetadata(parser)
                    text = metadata.exportPlaintext()
                    charset = getTerminalCharset()
                    sidlist.append(sid)
                    typelist.append(str(filetype))
                    namelist.append(str(filename))
                    metalist.append(str(text))
                    counter += 1
                except:
                    pass
            for meta in zip(sidlist, typelist, namelist, metalist):
                executor.execute("INSERT INTO metadata VALUES (?,?,?,?)", meta)
                # for line in text:
                #    print makePrintable(line, charset)

    connection.commit()
Пример #18
0
def extract_title(filename):
	filename, realname = unicodeFilename(filename), filename
	parser = createParser(filename, realname)
	metadata = extractMetadata(parser)

	text = metadata.exportPlaintext()

	return metadata.get('title')
Пример #19
0
	def getData(self):
		filename, realname = unicodeFilename(self.filename), self.filename
		parser = createParser(filename, realname)
		try:
			metadata = extractMetadata(parser)
		except HachoirError, err:
			print "Metadata extraction error: %s" % unicode(err)
			metadata = None
 def getData(self):
     filename, realname = unicodeFilename(self.filename), self.filename
     parser = createParser(filename, realname)
     try:
         metadata = extractMetadata(parser)
     except HachoirError, err:
         print "Metadata extraction error: %s" % unicode(err)
         metadata = None
Пример #21
0
def getMetadata(filename):
	filename, realname = unicodeFilename(filename), filename
	
	parser = createParser(filename, realname)
	if not parser:
		print "Unable to parse file"
		exit(1)
	metadata = extractMetadata(parser)
	return metadata
Пример #22
0
def file_mimetype(filename):
    if filename and filename != "":
        result = Cigma().identify(filename=filename)
        return result["match"]["mimetype"] if result["match"] else None
        parser = createParser(unicodeFilename(filename), filename)
        return {
            "mimetype": str(parser.mime_type)
        } if parser else {
            "mimetype": "text/plain"
        }
Пример #23
0
    def __get_hd_tag__(self, video):
        result = 104

        file_metadata = extractMetadata(createParser(unicodeFilename(video)))
        if file_metadata.get('width') == 1280:
            result = 404
        elif file_metadata.get('width') == 1920:
            result = 1604

        return result
Пример #24
0
    def get_hd_tag(self, video):
        result = 0

        try:
            file_metadata = extractMetadata(createParser(unicodeFilename(video)))
            if file_metadata.get('width') == 1280:
                result = 1
            elif file_metadata.get('width') == 1920:
                result = 2
        finally:
            return result
Пример #25
0
 def extract_metadata(self, file):
     config.MAX_STR_LENGTH = float("inf")
     try:
         filename = file.name
         if not isinstance(filename, unicode):
             filename = unicodeFilename(filename)
         stream = InputIOStream(file, source="file:%s" % filename, tags=[], filename=filename)
         parser = guessParser(stream)
         return extractMetadata(parser)
     except (HachoirError, TypeError) as e:
         raise MetadataException(e)
def extractInicioEFimDoVideo(filename):
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print >> stderr, "Falha ao converter arquivo."
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Falha na extração de metadado do arquivo: %s" % unicode(err)
        metadata = None
def metadata_for(filename):
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Metadata extraction error: %s" % unicode(err)
        metadata = None
 def loadMetadata(self):
     """Load the metadata, either using Hachoir, ... either using mplayer"""
     if len(self.videoFile) != 0:
         filename = OP.join(self.videoPath, self.videoFile)
         filename, realname = unicodeFilename(filename), filename
         myParser = createParser(filename, realname)
         try:
             self.metadata = extractMetadata(myParser)
         except HachoirError, err:
             print "Metadata extraction error: %s" % unicode(err)
             self.metadata = None
Пример #29
0
def metadata_for_video(filename):
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Metadata extraction error: %s" % unicode(err)
        metadata = None
Пример #30
0
def processFile(filename, quality=0.5):
    charset = getTerminalCharset()
    filename, real_filename = unicodeFilename(filename, charset), filename

    # Create parser
    try:
        tags = None
        parser = createParser(filename, real_filename=real_filename, tags=tags)
    except InputStreamError, err:
        error(unicode(err))
        return False
Пример #31
0
def getMetadata(filename):
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    try:
        metadata = extractMetadata(parser)
    except:
        return None

    if metadata is not None:
        metadata = metadata.exportPlaintext()
        return metadata
    return None
Пример #32
0
 def parse(self):
     filename, realname = unicodeFilename(self.filename), self.filename
     parser = hachoir_parser.createParser(filename, realname)
     if not parser:
         sys.stderr.write("Unable to parse file %s/n" % self.filename)
         return
     try:
         ## TODO Essa chamada da um warning quando nao ha gps data
         metadata = hachoir_metadata.extractMetadata(parser)
     except HachoirError, err:
         print "Metadata extraction error: %s" % unicode(err)
         metadata = None
 def processFile(self, filename):
     filename, realname = unicodeFilename(filename), filename
     print u"[%s] Process file %s..." % (self.total, filename)
     parser = createParser(filename, realname)
     if not parser:
         print >> stderr, "Unable to parse file"
         return None
     try:
         metadata = extractMetadata(parser)
     except HachoirError, err:
         print >> stderr, "Metadata extraction error: %s" % unicode(err)
         return None
Пример #34
0
 def parse(self):
     filename, realname = unicodeFilename(self.filename), self.filename
     parser = hachoir_parser.createParser(filename, realname)
     if not parser:
         sys.stderr.write("Unable to parse file %s/n"%self.filename)
         return
     try:
         ## TODO Essa chamada da um warning quando nao ha gps data
         metadata = hachoir_metadata.extractMetadata(parser)
     except HachoirError, err:
         print "Metadata extraction error: %s" % unicode(err)
         metadata = None
Пример #35
0
def getMetadata(filename):
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    try:
        metadata = extractMetadata(parser)
    except:
        return None

    if metadata is not None:
        metadata = metadata.exportPlaintext()
        return metadata
    return None
Пример #36
0
 def get_movie_metadata(self, filename):
     filename, realname = unicodeFilename(filename), filename
     # parser = createParser(filename, realname)
     parser = createParser(filename, filename)
     if not parser:
         print >> stderr, "Unable to parse file"
         exit(1)
     try:
         metadata = extractMetadata(parser)
     except HachoirError, err:
         print "Metadata extraction error: %s" % unicode(err)
         metadata = None
Пример #37
0
 def processFile(self, filename):
     filename, realname = unicodeFilename(filename), filename
     print u"[%s] Process file %s..." % (self.total, filename)
     parser = createParser(filename, realname)
     if not parser:
         print >>stderr, "Unable to parse file"
         return None
     try:
         metadata = extractMetadata(parser)
     except HachoirError, err:
         print >>stderr, "Metadata extraction error: %s" % unicode(err)
         return None
Пример #38
0
    def search(self, file_path, strings=None):
        try:
            self.stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
        except NullStreamError:
            return False
        patterns = PatternMatching()
        for s in strings:
            patterns.addString(s)

        start = 0
        end = self.stream.size
        self.data = self.stream.readBytes(start, end//8)
        return patterns.search(self.data)
Пример #39
0
def main(filename="default_64.png"):
    filename, realname = unicodeFilename(filename), filename

    p = hachoir_parser.createParser(filename, realname)
    print "Fields: ", p._fields
    metadata = extractMetadata(p)
    print_metadata(metadata)

    fields = p._fields
    print "fields: ", fields

    for i in print_recursively(fields):
        print i
Пример #40
0
def metadata_for(filename):
    print '\nprinting metadata...\n'

    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print("Unable to parse file")
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print("Metadata extraction error: %s" % unicode(err))
        metadata = None
    def __init__(self, filename, source_directory, destination_directory):
        # Call the Constructor of the super class
        super(VideoFile, self).__init__(filename, source_directory, destination_directory)

        if self.date_created == "":
            # Set the timezone data for processing the movie files
            from_zone = tz.gettz('UTC')
            to_zone = tz.gettz('America/Los_Angeles')

            filename, realname = unicodeFilename((self.source_directory + "/" + self.filename)), (self.source_directory + "/" + self.filename)
            parser = createParser(filename, realname)

            if not parser:
                print "ERROR... unable to parse file!"
            else:
                try:
                    metadata = extractMetadata(parser)
                except (HachoirError, err):
                    print "Metadata extraction error: %s" % unicode(err)
                    metadata = None

            if not metadata:
                print "Unable to extract metadata"
            else:
                text = metadata.exportPlaintext()

                for line in text:
#                   print line
                    current_line  = str(line)[2:15]
                    movie_creation_date_and_time_utc = str(line)[17:len(line)]

                    if current_line == "Creation date":
#                        print "Current line: %s" % current_line
#                        print "Found match... %s" % movie_creation_date_and_time_utc

                        # Process the time extracted from the movie file by converting from
                        #  UTC time (Greenwich Mean Time) to the Pacific time zone
                        utc = datetime.strptime(movie_creation_date_and_time_utc, '%Y-%m-%d %H:%M:%S')
                        utc = utc.replace(tzinfo=from_zone)
                        movie_creation_date_and_time_pacific = utc.astimezone(to_zone)
#                        print "Time/Date: %s" % movie_creation_date_and_time_pacific

                        # Extract the date from the processed Pacific time
                        movie_creation_date = str(movie_creation_date_and_time_pacific)[0:10]
                        self.date_created = movie_creation_date
                        self.destination_directory += self.date_created + '/'
#                        print "Video created on: %s" % self.date_created
#                        print "Video dest dir: %s" % self.destination_directory

        else:
            self.destination_directory += self.date_created + '/'
Пример #42
0
 def extract_metadata(self, file):
     config.MAX_STR_LENGTH = float("inf")
     try:
         filename = file.name
         if not isinstance(filename, unicode):
             filename = unicodeFilename(filename)
         stream = InputIOStream(file,
                                source="file:%s" % filename,
                                tags=[],
                                filename=filename)
         parser = guessParser(stream)
         return extractMetadata(parser)
     except (HachoirError, TypeError) as e:
         raise MetadataException(e)
Пример #43
0
def get_metadata(path):
    # Create a parser for the file
    parser = createParser(unicodeFilename(path), path)
    if not parser:
        raise ValueError('Unable to parse %r' % path)
    # Read the metadata
    try:
        metadata = extractMetadata(parser)
    except HachoirError as e:
        raise ValueError('Metadata extraction error: %s' % e)
    # Check that there really was metadata
    if not metadata:
        raise ValueError('Unable to extract metadata for %r' % path)
    return metadata
Пример #44
0
def get_hachoir_create_date(fname):
    """Get media create date using hachoir library"""
    global log
    retval = None
    filename, realname = unicodeFilename(fname), fname
    parser = createParser(filename, realname)
    if not parser:
        log.critical( 'Unable to parse file ' + fname)
        return retval
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        log.critical( 'Metadata extraction error for ' + fname + ' - '+ unicode(err))
        metadata = None
Пример #45
0
def get_meta(filename):
    from hachoir_core.error import HachoirError
    from hachoir_core.cmd_line import unicodeFilename
    from hachoir_parser import createParser
    from hachoir_core.tools import makePrintable
    from hachoir_metadata import extractMetadata
    from hachoir_core.i18n import getTerminalCharset
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print >>sys.stderr, "{}: Unable to parse file".format(filename)
        return None

    return extractMetadata(parser)
Пример #46
0
def processFileReturn(filename, display_filename=False, priority=None, human=True, display=True):
    charset = getTerminalCharset()
    # filename, real_filename = unicode(filename, charset), filename
    if type(filename) == str:
        filename, real_filename = unicodeFilename(filename, charset), filename
    else:
        real_filename = filename.encode(getTerminalCharset())
    try:
        parser = createParser(filename, real_filename=real_filename, tags=None)
    except InputStreamError, err:
        error(unicode(err))
        try: del(parser)
        except: pass
        return False
Пример #47
0
    def convert_gzip_files(self, path, hash_value):
        try:
            flow = Flow.objects.get(hash_value=hash_value)
            flow_details = flow.details
            for detail in flow_details:
                # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat
                source_str = ":".join([detail.src_ip, str(detail.sport)])
                destination_str = ":".join([detail.dst_ip, str(detail.dport)])
                flow_str = "-".join([source_str, destination_str])
                resp_file = "_".join(["contents", flow_str,"resp.dat"])
                file_path = "/".join([path, resp_file])
                # path is created as unicode, convert it a regular string for hachoir operation
                file_path = str(file_path)

                try:
                    stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
                except NullStreamError:
                    continue
                subfile = SearchSubfile(stream, 0, None)
                subfile.loadParsers()
                root = "/".join([path, "html-files"])
                if not os.path.exists(root):
                    os.makedirs(root)
                output = "/".join([root, flow_str])
                output = str(output)
                subfile.setOutput(output)

                http_details = filter(lambda x: x.flow_details.id == detail.id ,HTTPDetails.objects.filter(http_type="response"))
                file_ext = ".txt"
                for http in http_details:
                    if http.content_type:
                        filename = subfile.output.createFilename(file_ext)
                        if http.content_encoding == "gzip":
                            r = open("/".join([output, filename]), "r")
                            body = r.read()
                            r.close()
                            data = StringIO.StringIO(body)
                            gzipper = gzip.GzipFile(fileobj=data)
                            html = gzipper.read()
                            filename = filename.split(".")[0] + ".html"
                            w = open("/".join([output, filename]), "w")
                            w.write(html)
                            w.close()

            return True

        except Exception, ex:
            print ex
            return False
Пример #48
0
def file_metadata(filename):
    if filename and filename != "":
        parser = createParser(unicodeFilename(filename), filename)
        meta = metadata.extractMetadata(parser) if parser else None
        metalist = meta.exportPlaintext() if meta else []
        meta = collections.defaultdict(collections.defaultdict)
        for item in metalist:
            if item.endswith(":"):
                k = item[:-1]
            else:
                tag, value = item.split(": ", 1)
                tag = tag[2:]
                meta[k][tag] = value
        return unicode_to_string(
            default_to_regular(meta))["Metadata"] if meta else {}
Пример #49
0
def get_hachoir_create_date(fname):
    """Get media create date using hachoir library"""
    global log
    retval = None
    filename, realname = unicodeFilename(fname), fname
    parser = createParser(filename, realname)
    if not parser:
        log.critical('Unable to parse file ' + fname)
        return retval
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        log.critical('Metadata extraction error for ' + fname + ' - ' +
                     unicode(err))
        metadata = None
Пример #50
0
def _read_exif_hachoir(file_name):
    try:

        filename, realname = unicodeFilename(file_name), file_name
        parser = createParser(filename, realname)
        metadata = extractMetadata(parser)

        if metadata and metadata.has('creation_date'):
            exif = {}
            exif['creation_date'] = str(metadata.get('creation_date'))
            return exif
        else:
            LOG.warn('File %s did not have creation_date' % file_name)

        return {}

    except HachoirError, err:
        LOG.exception("Metadata extraction error: %s", unicode(err))
Пример #51
0
def processFile(values,
                filename,
                display_filename=False,
                priority=None,
                human=True,
                display=True):
    charset = getTerminalCharset()
    filename, real_filename = unicodeFilename(filename, charset), filename

    # Create parser
    try:
        if values.force_parser:
            tags = [("id", values.force_parser), None]
        else:
            tags = None
        parser = createParser(filename, real_filename=real_filename, tags=tags)
    except InputStreamError, err:
        error(unicode(err))
        return False
Пример #52
0
def downloadBINARY(url):
    ###########################################################
    # USE TO DOWNLOAD A BINARY FILE LIKE DOC OR XLS           #
    # INPUT: the url of the file.                             #
    # OUTPUT: the hex of the file, and list of some metadata, #
    # from the server and from a hachoir_metadata scan        #
    # SAVES FILE TO: downloaded_docs/doc, or xls/filename     #
    ###########################################################
    infoMeta=[]
    file_name = url.split('/')[-1]
    file_type = file_name.split(".")[-1]
    base_dir = os.path.abspath("../../../downloaded_docs/")
    download_dir = os.path.join(base_dir, file_type)    
    infoMeta.append(file_type)
    infoMeta.append(file_name)
    u = urllib2.urlopen(url)

    meta = u.info()
    infoMeta.append(meta.headers)
    doc= u.read()
    f = open(os.path.join(download_dir,file_name), 'wb')
    f.write(doc)

    with open(os.path.join(download_dir,file_name), 'rb') as p:
    # Slurp the whole file and convert it to hex all at once
        hexdata = binascii.hexlify(p.read())

    # use hachoir to add the standard metadata
    filename = download_dir+ '/'+file_name
    print filename
#    filename = unicodeFilename(filename), filename
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename)
    try:
        metalist = metadata.extractMetadata(parser).exportPlaintext()
        infoMeta.append(metalist[1:4])
    except Exception:
        infoMeta.append(["none","none","none"])

        
    p.close()    
    print "Done", file_name, " Saved to: ", download_dir
    return hexdata, infoMeta
Пример #53
0
def processFileReturn(filename,
                      display_filename=False,
                      priority=None,
                      human=True,
                      display=True):
    charset = getTerminalCharset()
    # filename, real_filename = unicode(filename, charset), filename
    if type(filename) == str:
        filename, real_filename = unicodeFilename(filename, charset), filename
    else:
        real_filename = filename.encode(getTerminalCharset())
    try:
        parser = createParser(filename, real_filename=real_filename, tags=None)
    except InputStreamError, err:
        error(unicode(err))
        try:
            del (parser)
        except:
            pass
        return False
Пример #54
0
def getmeta(tempfile):
    try:

        filename = tempfile
        filename, realname = unicodeFilename(filename), filename
        parser = createParser(filename, realname)
        if not parser:
            print >> stderr, "Unable to parse file"
            return "error"
        try:
            metadata = extractMetadata(parser)
        except HachoirError, err:
            print "Metadata extraction error: %s" % unicode(err)
            metadata = None
        if not metadata:
            print "Unable to extract metadata"
            return "error"

        text = metadata.exportPlaintext()
        charset = getTerminalCharset()
        return text
Пример #55
0
 def extract(self, file):
     ''' This function use to extract meta data from a file. We use hachoir3 library
     to extract them. (See more: http://hachoir3.readthedocs.org)
     :param : file - file for extract
     :return: meta data as dict for success, 0 if fail.
     '''
     try:
         filename, realname = unicodeFilename(file), file
         parser = createParser(filename, realname)
         meta_data = extractMetadata(parser)
         meta_data_text = meta_data.exportPlaintext()
         meta_list = dict()
         for i in range(1, len(meta_data_text)):
             meta_split = meta_data_text[i].split(":")
             column = meta_split[0].replace('- ', '')
             value = meta_split[1].lstrip()
             meta_list.update({column: value})
         return meta_list
     except:
         if self.debug:
             print "Something went wrong, meta data of", file, "could not extract."
         return None
Пример #56
0
    def main(self):
        if len(argv) != 2:
            print >>stderr, "usage: %s document.swf" % argv[0]
            exit(1)

        realname = argv[1]
        filename = unicodeFilename(realname)
        parser = createParser(filename, real_filename=realname)

        if parser["signature"].value == "CWS":
            deflate_swf = parser["compressed_data"].getSubIStream()
            parser = guessParser(deflate_swf)

        if "jpg_table/data" in parser:
            # JPEG pictures with common header
            jpeg_header = parser["jpg_table/data"].value[:-2]
            for field in parser.array("def_bits"):
                jpeg_content = field["image"].value[2:]
                if self.verbose:
                    print "Extract JPEG from %s" % field.path
                self.storeJPEG(jpeg_header + jpeg_content)

        # JPEG in format 2/3
        for field in parser.array("def_bits_jpeg2"):
            self.extractFormat2(field)
        for field in parser.array("def_bits_jpeg3"):
            self.extractFormat2(field)

        # Extract sound
        #self.extractSound(parser)
        self.extractSound2(parser)

        # Does it extract anything?
        if self.jpg_index == 1:
            print "No JPEG picture found."
        if self.snd_index == 1:
            print "No sound found."