示例#1
0
def processFileReturn(filename, display_filename=False, priority=None, human=True, display=True):
    charset = getTerminalCharset()
    # filename, real_filename = unicode(filename, charset), filename
    if type(filename) == str:
        filename, real_filename = unicodeFilename(filename, charset), filename
    else:
        real_filename = filename.encode(getTerminalCharset())
    try:
        parser = createParser(filename, real_filename=real_filename, tags=None)
    except InputStreamError, err:
        error(unicode(err))
        try: del(parser)
        except: pass
        return False
示例#2
0
def main():
    if len(sys.argv) != 2:
        print >>sys.stderr, "usage: %s directory" % sys.argv[0]
        sys.exit(1)
    charset = getTerminalCharset()
    directory = unicode(sys.argv[1], charset)

    print "Download and check Hachoir testcase."
    print
    print "Use directory: %s" % directory
    ok = testFiles(directory, TESTCASE_URL)
    if not stringMD5("abc"):
        print
        for index in xrange(3):
            print "!!! Warning: Python module md5 is missing, unable to check MD5 hash"
    if ok:
        print
        totalsize = sum( item[1] for item in testcase_files )
        print "Test case is ok (%s files, %s)" % (len(testcase_files), humanFilesize(totalsize))
        sys.exit(0)
    else:
        print
        for index in xrange(3):
            print "!!! ERROR !!!"
        print
        sys.exit(1)
示例#3
0
def unicodeFilename(filename, charset=None):
    if not charset:
        charset = getTerminalCharset()
    try:
        return unicode(filename, charset)
    except UnicodeDecodeError:
        return makePrintable(filename, charset, to_unicode=True)
def main():
    setlocale(LC_ALL, "C")
    if len(sys.argv) != 2:
        print >>sys.stderr, "usage: %s testcase_directory" % sys.argv[0]
        sys.exit(1)
    charset = getTerminalCharset()
    directory = unicode(sys.argv[1], charset)

    print "Test hachoir-parser using random data."
    print
    if not testRandom():
        print
        print "If you are really sure there is no error in your code," \
              " increment the 'seed' parameter of testRandom."
        sys.exit(1)
    print "Result: ok"

    print
    print "Test hachoir-parser using testcase."
    print
    print "Testcase is in directory: %s" % directory
    if not testFiles(directory):
        print
        for index in xrange(3):
            print "!!! ERROR !!!"
        print
        sys.exit(1)
    print
    print "Result: ok for the %s files" % len(testcase_files)
示例#5
0
def unicodeFilename(filename, charset=None):
    if not charset:
        charset = getTerminalCharset()
    try:
        return unicode(filename, charset)
    except UnicodeDecodeError:
        return makePrintable(filename, charset, to_unicode=True)
示例#6
0
def main():
    setlocale(LC_ALL, "C")
    if len(sys.argv) != 2:
        print >> sys.stderr, "usage: %s testcase_directory" % sys.argv[0]
        sys.exit(1)
    charset = getTerminalCharset()
    directory = unicode(sys.argv[1], charset)

    print "Test hachoir-parser using random data."
    print
    if not testRandom():
        print
        print "If you are really sure there is no error in your code," \
              " increment the 'seed' parameter of testRandom."
        sys.exit(1)
    print "Result: ok"

    print
    print "Test hachoir-parser using testcase."
    print
    print "Testcase is in directory: %s" % directory
    if not testFiles(directory):
        print
        for index in xrange(3):
            print "!!! ERROR !!!"
        print
        sys.exit(1)
    print
    print "Result: ok for the %s files" % len(testcase_files)
def main():
    if len(sys.argv) != 2:
        print >> sys.stderr, "usage: %s directory" % sys.argv[0]
        sys.exit(1)
    charset = getTerminalCharset()
    directory = unicode(sys.argv[1], charset)

    print "Download and check Hachoir testcase."
    print
    print "Use directory: %s" % directory
    ok = testFiles(directory, TESTCASE_URL)
    if not stringMD5("abc"):
        print
        for index in xrange(3):
            print "!!! Warning: Python module md5 is missing, unable to check MD5 hash"
    if ok:
        print
        totalsize = sum(item[1] for item in testcase_files)
        print "Test case is ok (%s files, %s)" % (len(testcase_files),
                                                  humanFilesize(totalsize))
        sys.exit(0)
    else:
        print
        for index in xrange(3):
            print "!!! ERROR !!!"
        print
        sys.exit(1)
示例#8
0
def FileInputStream(filename, real_filename=None, **args):
    """
    Create an input stream of a file. filename must be unicode.

    real_filename is an optional argument used to specify the real filename,
    its type can be 'str' or 'unicode'. Use real_filename when you are
    not able to convert filename to real unicode string (ie. you have to
    use unicode(name, 'replace') or unicode(name, 'ignore')).
    """
    assert isinstance(filename, unicode)
    if not real_filename:
        real_filename = filename
    try:
        inputio = FileOpener(real_filename, 'rb')
    except IOError as err:
        charset = getTerminalCharset()
        errmsg = unicode(str(err), charset)
        raise InputStreamError(
            _("Unable to open file %s: %s") % (filename, errmsg))
    source = "file:" + filename
    offset = args.pop("offset", 0)
    size = args.pop("size", None)
    if offset or size:
        if size:
            size = 8 * size
        stream = InputIOStream(inputio, source=source, **args)
        return InputSubStream(stream, 8 * offset, size, **args)
    else:
        args.setdefault("tags", []).append(("filename", filename))
        return InputIOStream(inputio, source=source, **args)
示例#9
0
def googlesearch():
    print "Searching google for files..."
    # set up browser
    browse = mechanize.Browser()
    cookiejar = cookielib.LWPCookieJar()
    browse.set_cookiejar(cookiejar)
    browse.set_handle_equiv(True)
    browse.set_handle_redirect(True)
    browse.set_handle_referer(True)
    browse.set_handle_robots(False)
    browse.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    browse.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    # response = browse.open("https://www.google.com/#q=filetype: %s + %s" % (filetype, domain))
    for filetype in ["doc", "docx", "ppt", "xls"]:
        response = browse.open("https://www.google.com")
        browse.select_form(nr=0)
        browse.form["q"] = "filetype:%s site:%s" % (filetype, domain)
        browse.submit()
        results = browse.response().read()
        soup = BeautifulSoup(results, "lxml")
        sidlist = []
        namelist = []
        typelist = []
        metalist = []
        counter = 1
        for link in soup.find_all("a", href=re.compile("/url")):
            link = link.get("href")
            if link.startswith("/url?q="):
                link = link[len("/url?q=") :]
                link = link.split("." + filetype)[0]
                # print str(link + ".pdf")
                filename = "%s%s.%s" % (domain, counter, filetype)
                try:
                    downfile = browse.retrieve(str(link + "." + filetype), filename)[0]
                    filename = downfile
                    filename, realname = unicodeFilename(filename), filename
                    parser = createParser(filename, realname)
                    metadata = extractMetadata(parser)
                    text = metadata.exportPlaintext()
                    charset = getTerminalCharset()
                    sidlist.append(sid)
                    typelist.append(str(filetype))
                    namelist.append(str(filename))
                    metalist.append(str(text))
                    counter += 1
                except:
                    pass
            for meta in zip(sidlist, typelist, namelist, metalist):
                executor.execute("INSERT INTO metadata VALUES (?,?,?,?)", meta)
                # for line in text:
                #    print makePrintable(line, charset)

    connection.commit()
def print_metadata(metadata):
    text = metadata.exportPlaintext()
    charset = getTerminalCharset()
    for line in text:
        pass  #   print makePrintable(line, charset)

    # from http://stackoverflow.com/questions/14546533/hachoir-retrieving-data-from-a-group
    # See what keys you can extract
    for k, v in metadata._Metadata__data.iteritems():
        if v.values:
            print v.key, v.values[0].value
示例#11
0
def processFileReturn(filename,
                      display_filename=False,
                      priority=None,
                      human=True,
                      display=True):
    charset = getTerminalCharset()
    # filename, real_filename = unicode(filename, charset), filename
    if type(filename) == str:
        filename, real_filename = unicodeFilename(filename, charset), filename
    else:
        real_filename = filename.encode(getTerminalCharset())
    try:
        parser = createParser(filename, real_filename=real_filename, tags=None)
    except InputStreamError, err:
        error(unicode(err))
        try:
            del (parser)
        except:
            pass
        return False
示例#12
0
def processFile(filename, quality=0.5):
    charset = getTerminalCharset()
    filename, real_filename = unicodeFilename(filename, charset), filename

    # Create parser
    try:
        tags = None
        parser = createParser(filename, real_filename=real_filename, tags=tags)
    except InputStreamError, err:
        error(unicode(err))
        return False
示例#13
0
class metaMs2k:
	def __init__(self,filename):
		self.filename=filename
		self.users=[]
		self.paths=[]
		self.software=[]
		self.modification=[]
		self.creationDate=[]
		self.lastPrinted=[]
		self.raw=""

	def getData(self):
		filename, realname = unicodeFilename(self.filename), self.filename
		try:
			parser = createParser(filename, realname)
		except:
			return "error"
		try:
			metadata = extractMetadata(parser)
		except HachoirError, err:
			print "Metadata extraction error: %s" % unicode(err)
			metadata = None
		if not metadata:
			print "Unable to extract metadata on file: " + self.filename
		else:
			text = metadata.exportPlaintext()
			charset = getTerminalCharset()
			for line in text:
				res=line.split(":")
				if res[0]=="- Author":
						self.users.append(res[1])
				elif res[1]==" Author:":
						self.users.append(res[2])
				elif res[0]=="- Producer":
						self.software.append(res[1])
				elif res[0]=="- Creation date":
						self.creationDate.append(res[1])
				elif res[0]=="- Last modification":
						self.modification.append(res[1])
				elif res[1]==" Template":
						xres= line.replace("- Comment: Template:","")
						self.paths.append(xres)
				elif res[1]==" LastSavedBy":
				#		print res[1] + res[2]
						self.users.append(res[2])
				elif res[1]==" LastPrinted":
						self.lastPrinted.append(res[2])
				elif res[0]=="- Revision history":
						#self.paths.append(res[2])
						res2=line.split(",")
						self.paths.append(res2[1].split("file ")[1])
				self.raw=text
		return "ok"
示例#14
0
 def run(self):
     for filename in os.listdir(self.adir):
         filename = os.path.join(self.adir, filename)
         if os.path.isdir(filename):
             while 1:
                 try:
                     Walk(filename).start()
                     break
                 except:
                     continue
                     
         elif os.path.isfile(filename) and is_song(filename):
             filename, realname = unicodeFilename(filename), filename
             try:
                 song = Song.objects.get(filename = filename)
             except:
                 song = Song(filename = filename, name = os.path.splitext(os.path.basename(filename))[0])  
             if not has_changed(song):
                 continue
             song.stat = stat(filename)
             try:
                 parser = createParser(filename, realname)
             except:
                 parser = None
             if not parser:
                 print >>stderr, "Unable to parse file %s"%filename
                 continue 
             try:
                 metadata = extractMetadata(parser)
             except HachoirError, err:
                 print >>stderr, "Metadata extraction error: %s" % unicode(err)
                 continue
             if not metadata:
                 print >>stderr, "Unable to extract metadata"
                 continue
             else:
                 text = metadata.exportPlaintext()
                 charset = getTerminalCharset()
                 for line in text[1:]:
                     line = makePrintable(line, charset)
                     key = line[2:].split(': ')[0].replace(' ','_').replace('/','_').lower()
                     if key in COLS:
                         setattr(song,key,line[len(key)+4:])
             while 1:
                 try:
                     song.save()
                     break
                 except:
                     continue
示例#15
0
def processFile(values, filename,
display_filename=False, priority=None, human=True, display=True):
    charset = getTerminalCharset()
    filename, real_filename = unicodeFilename(filename, charset), filename

    # Create parser
    try:
        if values.force_parser:
            tags = [ ("id", values.force_parser), None ]
        else:
            tags = None
        parser = createParser(filename, real_filename=real_filename, tags=tags)
    except InputStreamError, err:
        error(unicode(err))
        return False
示例#16
0
 def __init__(self, input, size=None, **args):
     if not hasattr(input, "seek"):
         if size is None:
             input = InputPipe(input, self._setSize)
         else:
             input = InputPipe(input)
     elif size is None:
         try:
             input.seek(0, 2)
             size = input.tell() * 8
         except IOError, err:
             if err.errno == ESPIPE:
                 input = InputPipe(input, self._setSize)
             else:
                 charset = getTerminalCharset()
                 errmsg = unicode(str(err), charset)
                 source = args.get("source", "<inputio:%r>" % input)
                 raise InputStreamError(_("Unable to get size of %s: %s") % (source, errmsg))
示例#17
0
 def __init__(self, input, size=None, **args):
     if not hasattr(input, "seek"):
         if size is None:
             input = InputPipe(input, self._setSize)
         else:
             input = InputPipe(input)
     elif size is None:
         try:
             input.seek(0, 2)
             size = input.tell() * 8
         except IOError, err:
             if err.errno == ESPIPE:
                 input = InputPipe(input, self._setSize)
             else:
                 charset = getTerminalCharset()
                 errmsg = unicode(str(err), charset)
                 source = args.get("source", "<inputio:%r>" % input)
                 raise InputStreamError(_("Unable to get size of %s: %s") % (source, errmsg))
def FileInputStream(filename, real_filename=None, **args):
    """
    Create an input stream of a file. filename must be unicode.

    real_filename is an optional argument used to specify the real filename,
    its type can be 'str' or 'unicode'. Use real_filename when you are
    not able to convert filename to real unicode string (ie. you have to
    use unicode(name, 'replace') or unicode(name, 'ignore')).
    """
    assert isinstance(filename, unicode)
    if not real_filename:
        real_filename = filename
    try:
        inputio = FileOpener(real_filename, 'rb')
    except IOError, err:
        charset = getTerminalCharset()
        errmsg = unicode(str(err), charset)
        raise InputStreamError(_("Unable to open file %s: %s") % (filename, errmsg))
示例#19
0
def processFile(values,
                filename,
                display_filename=False,
                priority=None,
                human=True,
                display=True):
    charset = getTerminalCharset()
    filename, real_filename = unicodeFilename(filename, charset), filename

    # Create parser
    try:
        if values.force_parser:
            tags = [("id", values.force_parser), None]
        else:
            tags = None
        parser = createParser(filename, real_filename=real_filename, tags=tags)
    except InputStreamError, err:
        error(unicode(err))
        return False
示例#20
0
def main():
    if len(argv) != 2:
        print >>stderr, "usage: %s video.flv" % argv[0]
        exit(1)

    # Open input video
    inputname = unicode(argv[1], getTerminalCharset())
    parser = createParser(inputname)
    if parser["audio[0]/codec"].value != AUDIO_CODEC_MP3:
        print >>stderr, "Unknown audio codec: %s" % parser["audio[0]/codec"].display

    # Extract audio
    print "Extractor audio from: %s" % inputname
    outputname = inputname + ".mp3"
    output = FileOutputStream(outputname)
    for chunk in parser.array("audio"):
        data = chunk["music_data"]
        output.copyBitsFrom(data.parent.stream, data.absolute_address, data.size, data.parent.endian)
    print "Write audio into: %s" % outputname
示例#21
0
def FileInputStream(filename, real_filename=None, **args):
    """
    Create an input stream of a file. filename must be unicode.

    real_filename is an optional argument used to specify the real filename,
    its type can be 'str' or 'unicode'. Use real_filename when you are
    not able to convert filename to real unicode string (ie. you have to
    use unicode(name, 'replace') or unicode(name, 'ignore')).
    """
    assert isinstance(filename, unicode)
    if not real_filename:
        real_filename = filename
    try:
        inputio = FileOpener(real_filename, 'rb')
    except IOError, err:
        charset = getTerminalCharset()
        errmsg = unicode(str(err), charset)
        raise InputStreamError(
            _("Unable to open file %s: %s") % (filename, errmsg))
示例#22
0
文件: hachoir.py 项目: BwRy/sandy
def getmeta(tempfile):
  try:
    
    filename = tempfile
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
      print >>stderr, "Unable to parse file"
      return "error"
    try:
      metadata = extractMetadata(parser)
    except HachoirError, err:
      print "Metadata extraction error: %s" % unicode(err)
      metadata = None
    if not metadata:
      print "Unable to extract metadata"
      return "error"

    text = metadata.exportPlaintext()
    charset = getTerminalCharset()
    return text
示例#23
0
def getmeta(tempfile):
    try:

        filename = tempfile
        filename, realname = unicodeFilename(filename), filename
        parser = createParser(filename, realname)
        if not parser:
            print >> stderr, "Unable to parse file"
            return "error"
        try:
            metadata = extractMetadata(parser)
        except HachoirError, err:
            print "Metadata extraction error: %s" % unicode(err)
            metadata = None
        if not metadata:
            print "Unable to extract metadata"
            return "error"

        text = metadata.exportPlaintext()
        charset = getTerminalCharset()
        return text
示例#24
0
def main():
    setlocale(LC_ALL, "C")
    if len(sys.argv) != 2:
        print >> sys.stderr, "usage: %s testcase_directory" % sys.argv[0]
        sys.exit(1)
    charset = getTerminalCharset()
    directory = unicode(sys.argv[1], charset)

    print "Test hachoir-metadata using testcase."
    print
    print "Testcase is in directory: %s" % directory
    ok = testFiles(directory)
    if ok:
        print
        print "Result: ok for the %s files" % len(testcase_files)
        sys.exit(0)
    else:
        print
        for index in xrange(3):
            print "!!! ERROR !!!"
        print
        sys.exit(1)
示例#25
0
def main():
    setlocale(LC_ALL, "C")
    if len(sys.argv) != 2:
        print >>sys.stderr, "usage: %s testcase_directory" % sys.argv[0]
        sys.exit(1)
    charset = getTerminalCharset()
    directory = unicode(sys.argv[1], charset)

    print "Test hachoir-metadata using testcase."
    print
    print "Testcase is in directory: %s" % directory
    ok = testFiles(directory)
    if ok:
        print
        print "Result: ok for the %s files" % len(testcase_files)
        sys.exit(0)
    else:
        print
        for index in xrange(3):
            print "!!! ERROR !!!"
        print
        sys.exit(1)
示例#26
0
# (c) Raif Sarcich 2011 GPLv3

# Utility functions for paths

import os, sys, os.path, time, string, re
import urllib2, urlparse
from pieberry.pieconfig.config import PIE_CONFIG
from pieberry.pieconfig.paths import *
from pieberry.pieconfig.schemas import FEXTENSIONS
from pieberry.pieutility.decoding import *
from hachoir_core.i18n import getTerminalCharset

charset = getTerminalCharset()

def get_session(source=None):
    '''return a session code for dealing with grouped objects'''
    if source == 'desktop':
        return 'd_%s' % str(int(time.time()))
    else:
        return 'w_%s' % str(int(time.time()))

def auto_increment_fn(fn):
    counter = 0
    dn = os.path.dirname(fn)
    bn = os.path.splitext(os.path.basename(fn))[0]
    ext = os.path.splitext(os.path.basename(fn))[1]
    while os.path.exists(fn):
        counter += 1
        fn = os.path.join(dn, '%s_%d%s' % (bn, counter, ext))
        if counter == 1000: raise 'auto_increment_fn: Too many files - giving up'
    return fn
示例#27
0
def exploreFieldSet(field_set, args, options={}):
    charset = getTerminalCharset()

    ui = urwid.curses_display.Screen()
    ui.register_palette((
        ('focus', 'white', 'dark blue'),
        ('sep', 'white', 'dark red'),
        ('input', 'black', 'light gray'),
    ))

    msgs = [[],[],0]
    hachoir_log.use_print = False
    def logger(level, prefix, text, ctxt):
        if ctxt is not None:
            c = []
            if hasattr(ctxt, "_logger"):
                c[:0] = [ ctxt._logger() ]
            if issubclass(ctxt.__class__, Field):
                ctxt = ctxt["/"]
            name = logger.objects.get(ctxt)
            if name:
                c[:0] = [ name ]
            if c:
                text = "[%s] %s" % ('|'.join(c), text)
        if not isinstance(text, unicode):
            text = unicode(text, charset)
        msgs[0].append((level, prefix, text))
    logger.objects = WeakKeyDictionary()
    hachoir_log.on_new_message = logger

    preload_fields = 1 + max(0, args.preload)

    log_count = [ 0, 0, 0 ]
    sep = Separator("log: %%u/%%u/%%u  |  %s  " % _("F1: help"))
    sep.set_info(*tuple(log_count))
    body = Tabbed(sep)
    help = ('help', ListBox([ Text(getHelpMessage()) ]))
    logger.objects[field_set] = logger.objects[field_set.stream] = name = u'root'
    body.append((name, TreeBox(charset, Node(field_set, None), preload_fields, args.path, options)))

    log = BoxAdapter(ListBox(msgs[1]), 0)
    log.selectable = lambda: False
    wrapped_sep = AttrWrap(sep, 'sep')
    footer = Pile([ ('flow', wrapped_sep), log ])

    # awful way to allow the user to hide the log widget
    log.render = lambda size, focus=False: BoxAdapter.render(log, size[:1], focus)
    footer.render = lambda (maxcol,), focus=False: Pile.render(footer, (maxcol, sep.rows((maxcol,))+log.height), focus)

    top = Frame(body, None, footer)

    def input_enter(w):
        footer.widget_list[0] = w
        footer.set_focus(0)
        top.set_focus('footer')
    def input_leave():
        footer.widget_list[0] = wrapped_sep
        footer.set_focus(0)
        top.set_focus('body')
    input = Input(input_enter, input_leave)

    def run():
        msg = _resize = retry = 0
        events = ( "window resize", )
        profile_display = args.profile_display
        while True:
            for e in events:
                try:
                    if e == "window resize":
                        size = ui.get_cols_rows()
                        resize = log.height
                    else:
                        e = top.keypress(size, e)
                        if e is None:
                            pass
                        elif e in ('f1', '?'):
                            try:
                                body.select(body.tabs.index(help))
                            except ValueError:
                                body.append(help)
                                resize = log.height
                        elif e in ('esc', 'ctrl w'):
                            body.close()
                            if body.box_widget is None:
                                return
                            resize = log.height
                        elif e == '+':
                            if log.height:
                                resize = log.height - 1
                        elif e == '-':
                            resize = log.height + 1
                        elif e == 'q':
                            return
                #except AssertionError:
                #    hachoir_log.error(getBacktrace())
                except NewTab_Stream, e:
                    stream = e.field.getSubIStream()
                    logger.objects[stream] = e = "%u/%s" % (body.active, e.field.absolute_address)
                    parser = guessParser(stream)
                    if not parser:
                        hachoir_log.error(_("No parser found for %s") % stream.source)
                    else:
                        logger.objects[parser] = e
                        body.append((e, TreeBox(charset, Node(parser, None), preload_fields, None, options)))
                        resize = log.height
                except NeedInput, e:
                    input.do(*e.args)
                if profile_display:
                    events = events[1:]
                    break
            while True:
                if msgs[0]:
                    for level, prefix, text in msgs[0]:
                        log_count[level] += 1
                        txt = Text("[%u]%s %s" % (msg, prefix, text))
                        msg += 1
                        msgs[1].append(txt)
                        _resize += txt.rows(size[:1])
                    if log.height < _resize and (resize is None or resize < _resize):
                        resize = _resize
                    log.set_focus(len(msgs[1])-1)
                    sep.set_info(*tuple(log_count))
                    msgs[0] = []
                if resize is not None:
                    body.height = size[1] - sep.rows(size[:1]) - resize
                    if body.height <= 0:
                        resize += body.height - 1
                        body.height = 1
                    log.height = resize
                    resize = None
                canvas = top.render(size, focus=True)
                if not msgs[0]:
                    _resize = retry = 0
                    break
                assert not retry
                retry += 1
            ui.draw_screen(size, canvas)
            msgs[2] = len(msgs[1])
            if profile_display and events:
                continue
            while True:
                events = ui.get_input()
                if events: break
示例#28
0
    def separate_files(self):
        self.form2.update_progress("Getting file types", 10)
        #Feature Extraction from images
        self.form2.update_message(
            "Extracting features from images. Please wait")
        i = 0
        j = 0
        k = 0
        l = 0
        for file in self.file_list:
            #file=file.replace("\ "," ")
            file_type = self.magic.file(file).split(";")[0]
            if 'image' in file_type:
                i = i + 1
                #print file+":"+'image'
                self.form2.update_value(1)
                subprocess.call([
                    'python',
                    '/home/nsk/Project/caffe/examples/caffe_example.py', file
                ])
            if 'audio' in file_type:
                j = j + 1
                #update while processing
                self.form2.update_value(1)
                #self.tb_metadata.append("**** Metadata for: "+file+" ****")
                filename = file
                filename, realname = unicodeFilename(filename), filename
                parser = createParser(filename, realname)
                if not parser:
                    print >> stderr, "Unable to parse file"
                    continue
    #exit(1)
                try:
                    metadata = extractMetadata(parser)
                except Exception as err:
                    print "Metadata extraction error: %s" % unicode(err)
                    metadata = None
                if not metadata:
                    print "Unable to extract metadata"
                    continue

    #exit(1)
                text = metadata.exportPlaintext()
                charset = getTerminalCharset()
                #create corresponding text file
                value_string = file
                key_string = "/home/nsk/Desktop/Start/Begin1/Files/" + value_string.split(
                    "/")[-1].split(".")[0] + "." + "txt"
                #write metadata to the file
                fp = open(key_string, "wb+")
                for line in text:
                    fp.write(line)
                fp.close()
                #write the key, value to the database
                key_string_1 = key_string.replace(".", ";")
                value_string_1 = value_string.replace(".", ";")
                a = Database("MuDaM")
                a.set_collection("Files")
                a.add_entry(key_string_1, value_string_1)
                a.close()
            if 'text' in file_type:
                k = k + 1
                path = "/home/nsk/Desktop/Start/Begin1/Files/"
                path2 = file.split("/")[-1]
                dest_path = path + path2
                src_path = file
                shutil.copy2(src_path, dest_path)
                key_string = dest_path.replace(".", ";")
                value_string = src_path.replace(".", ";")
                a = Database("MuDaM")
                a.set_collection("Files")
                a.add_entry(key_string, value_string)
                a.close()
            if 'video' in file_type:
                l = l + 1

        self.form2.update_message("Analysed " + str(i) + " images")
        self.form2.update_message("Analysed " + str(j) + " audio files")
        self.form2.update_message("Detected " + str(k) + " text files")
        self.form2.update_message("Analysed " + str(l) + " video files")
        self.form2.update_progress("Creating Database entries", 10)
        self.form2.update_progress("Running clustering alogrithm", 30)
        subprocess.call(['Rscript', '/home/nsk/Desktop/Start/Begin1/Final.r'])
        self.form2.hide()
        self.form3.test()
        self.form3.show()
from hachoir_parser import createParser


def metadata_for(filename):
    filename, realname = unicodeFilename(filename), filename
    parser = createParser(filename, realname)
    if not parser:
        print "Unable to parse file"
        exit(1)
    try:
        metadata = extractMetadata(parser)
    except HachoirError, err:
        print "Metadata extraction error: %s" % unicode(err)
        metadata = None
    if not metadata:
        print "Unable to extract metadata"
        exit(1)

    text = metadata.exportPlaintext()
    charset = getTerminalCharset()
    for line in text:
        print makePrintable(line, charset)

    return metadata


def extract_data(metadata):
    for data in sorted(metadata):
        if len(data.values) > 0:
            print data.key, data.values[0].value
示例#30
0
 def _parse(self, filename):
     self._fields_parse(
         0, createParser(unicode(filename, getTerminalCharset())))
示例#31
0
 def _parse(self, filename):
     self._fields_parse(0, createParser(unicode(filename, getTerminalCharset())))
示例#32
0
def exploreFieldSet(field_set, args, options={}):
    charset = getTerminalCharset()

    ui = urwid.curses_display.Screen()
    ui.register_palette((
        ('focus', 'white', 'dark blue'),
        ('sep', 'white', 'dark red'),
        ('input', 'black', 'light gray'),
    ))

    msgs = [[],[],0]
    hachoir_log.use_print = False
    def logger(level, prefix, text, ctxt):
        if ctxt is not None:
            c = []
            if hasattr(ctxt, "_logger"):
                c[:0] = [ ctxt._logger() ]
            if issubclass(ctxt.__class__, Field):
                ctxt = ctxt["/"]
            name = logger.objects.get(ctxt)
            if name:
                c[:0] = [ name ]
            if c:
                text = "[%s] %s" % ('|'.join(c), text)
        if not isinstance(text, unicode):
            text = unicode(text, charset)
        msgs[0].append((level, prefix, text))
    logger.objects = WeakKeyDictionary()
    hachoir_log.on_new_message = logger

    preload_fields = 1 + max(0, args.preload)

    log_count = [ 0, 0, 0 ]
    sep = Separator("log: %%u/%%u/%%u  |  %s  " % _("F1: help"))
    sep.set_info(*tuple(log_count))
    body = Tabbed(sep)
    help = ('help', ListBox([ Text(getHelpMessage()) ]))
    logger.objects[field_set] = logger.objects[field_set.stream] = name = u'root'
    body.append((name, TreeBox(charset, Node(field_set, None), preload_fields, args.path, options)))

    log = BoxAdapter(ListBox(msgs[1]), 0)
    log.selectable = lambda: False
    wrapped_sep = AttrWrap(sep, 'sep')
    footer = Pile([ ('flow', wrapped_sep), log ])

    # awful way to allow the user to hide the log widget
    log.render = lambda size, focus=False: BoxAdapter.render(log, size[:1], focus)
    footer.render = lambda (maxcol,), focus=False: Pile.render(footer, (maxcol, sep.rows((maxcol,))+log.height), focus)

    top = Frame(body, None, footer)

    def input_enter(w):
        footer.widget_list[0] = w
        footer.set_focus(0)
        top.set_focus('footer')
    def input_leave():
        footer.widget_list[0] = wrapped_sep
        footer.set_focus(0)
        top.set_focus('body')
    input = Input(input_enter, input_leave)

    def run():
        msg = _resize = retry = 0
        events = ( "window resize", )
        profile_display = args.profile_display
        while True:
            for e in events:
                try:
                    if e == "window resize":
                        size = ui.get_cols_rows()
                        resize = log.height
                    else:
                        e = top.keypress(size, e)
                        if e is None:
                            pass
                        elif e in ('f1', '?'):
                            try:
                                body.select(body.tabs.index(help))
                            except ValueError:
                                body.append(help)
                                resize = log.height
                        elif e in ('esc', 'ctrl w'):
                            body.close()
                            if body.box_widget is None:
                                return
                            resize = log.height
                        elif e == '+':
                            if log.height:
                                resize = log.height - 1
                        elif e == '-':
                            resize = log.height + 1
                        elif e == 'q':
                            return
                #except AssertionError:
                #    hachoir_log.error(getBacktrace())
                except NewTab_Stream, e:
                    stream = e.field.getSubIStream()
                    logger.objects[stream] = e = "%u/%s" % (body.active, e.field.absolute_address)
                    parser = guessParser(stream)
                    if not parser:
                        hachoir_log.error(_("No parser found for %s") % stream.source)
                    else:
                        logger.objects[parser] = e
                        body.append((e, TreeBox(charset, Node(parser, None), preload_fields, None, options)))
                        resize = log.height
                except NeedInput, e:
                    input.do(*e.args)
                if profile_display:
                    events = events[1:]
                    break
            while True:
                if msgs[0]:
                    for level, prefix, text in msgs[0]:
                        log_count[level] += 1
                        txt = Text("[%u]%s %s" % (msg, prefix, text))
                        msg += 1
                        msgs[1].append(txt)
                        _resize += txt.rows(size[:1])
                    if log.height < _resize and (resize is None or resize < _resize):
                        resize = _resize
                    log.set_focus(len(msgs[1])-1)
                    sep.set_info(*tuple(log_count))
                    msgs[0] = []
                if resize is not None:
                    body.height = size[1] - sep.rows(size[:1]) - resize
                    if body.height <= 0:
                        resize += body.height - 1
                        body.height = 1
                    log.height = resize
                    resize = None
                canvas = top.render(size, focus=True)
                if not msgs[0]:
                    _resize = retry = 0
                    break
                assert not retry
                retry += 1
            ui.draw_screen(size, canvas)
            msgs[2] = len(msgs[1])
            if profile_display and events:
                continue
            while True:
                events = ui.get_input()
                if events: break