def execute(indexobj, filename): return filter.execfilter("pdftotext -enc UTF-8 %s -", filename)
def execute(indexobj, filename): # using -q switch to get quiet operation (no messages, no errors), # because poppler-utils pdftotext on Debian/Etch otherwise generates # lots of output on stderr (e.g. 10MB stderr output) and that causes # problems in current execfilter implementation. return execfilter("pdftotext -q -enc UTF-8 %s -", filename)
def execute(indexobj, filename): data = execfilter("catppt -dutf-8 %s", filename) return data
def execute(indexobj, filename): return filter.execfilter("antiword %s", filename)
def execute(indexobj, filename): data = execfilter("xls2csv %s", filename) # xls2csv uses comma as field separator and "field content", # we strip both to not confuse the indexer data = data.replace(u',', u' ').replace(u'"', u' ') return data
def execute(indexobj, filename): cmd = "antiword %s" if os.name == 'posix': cmd = "HOME=/tmp " + cmd # no HOME makes antiword complain (on Linux) return execfilter(cmd, filename)
def execute(indexobj, filename): return filter.execfilter("catdoc %s", filename)
def execute(indexobj, filename): return execfilter("catdoc %s", filename)