def upload_dir(self, dirname): """upload all RDF files found inside dirname and subdirectories (recursively)""" BATCH_UPLOAD_RDF = configmanager.get_config('BATCH_UPLOAD_RDF', 'no') == 'yes' BATCH_UPLOAD_SIZE = int(configmanager.get_config('BATCH_UPLOAD_SIZE', 100)) retry = [] rdffiles = [] for dirpath, dirnames, filenames in os.walk(dirname): for fn in filenames: if fn.endswith(RDF_GRAPH_FORMAT): if BATCH_UPLOAD_RDF: rdffiles.append(os.path.join(dirpath, fn)) if len(rdffiles) > BATCH_UPLOAD_SIZE: try: self.upload_many(rdffiles) rdffiles = [] except Exception as e: print "problem uploading", len(rdffiles), "files from ", dirname print e retry.extend(rdffiles) rdffiles = [] else: try: self.upload(os.path.join(dirpath, fn)) except: print "problem uploading", fn retry.append(os.path.join(dirpath, fn)) # upload any remaining batch of files if BATCH_UPLOAD_RDF and len(rdffiles) > 0: try: self.upload_many(rdffiles) rdffiles = [] except: print "problem uploading", len(rdffiles), "files from ", dirname retry.extend(rdffiles) if len(retry) > 0: print "Retrying ", len(retry), "uploads" while len(retry) > 0: fn = retry.pop() try: self.upload(fn) except Exception as e: print "problem with retry of ", fn print e
def upload_collectionrecords(server): import glob basedir = configmanager.get_config("CORPUS_BASEDIR") for f in glob.glob(os.path.join(basedir, '*.n3')): print "Uploading", f server.upload(f)
def report(self, files, datasize, linecount): if configmanager.get_config('QUERY_SIZE', 'no') == 'yes': size = self.size() else: # esitmate size from number of lines, 1.172 is a factor # measured on austalk to account for duplicate triples size = linecount/1.172 self.triplecount += size with open(self.logfile, "a") as fd: fd.write("%f,%d,%d,%f,%d,%d\n" % (time.time(), files, datasize, time.time()-self.starttime, size, self.triplecount))
"""Support for uploading RDF data directly to a Sesame instance""" import urllib, urllib2 import os import json import time import configmanager configmanager.configinit() RDF_GRAPH_FORMAT = configmanager.get_config("RDF_GRAPH_FORMAT", "nt") import tempfile class RequestWithMethod(urllib2.Request): def __init__(self, *args, **kwargs): self._method = kwargs.get('method') if self._method: del kwargs['method'] urllib2.Request.__init__(self, *args, **kwargs) def get_method(self): return self._method if self._method else super(RequestWithMethod, self).get_method() class SesameServer(): """A utility class to support HTTP interaction with a sesame triple store""" def __init__(self, url, logfile='benchmark.csv'): self.url = url self.logfile = logfile
def maus(wavfile, text, language='aus', canonly=False, minpauselen=5, startword=0, endword=999999, mausshift=10, insprob=0.0, inskantextgrid=True, insorttextgrid=True, usetrn=False, outformat='TextGrid', lexicon=None): """Send the given wavfile to MAUS for forced alignment text is the orthographic transcription returns the text of the textgrid returned by MAUS raises MausException if there was an error, the exception contains any error text returned by the MAUS web service >>> txt = maus("test/bassinette-sample-16.wav", "bassinette") >>> txt.find('xmax') 62 >>> txt.find('b{s@net') 896 >>> txt = maus("test/bassinette-sample-16.wav", "not in the lexicon") Traceback (most recent call last): MausException: Can't generate phonetic transcription for text 'not in the lexicon' # a bad request, send a text file >>> maus("annotate/maus.py", "bassinette") Traceback (most recent call last): MausException: Internal Server Error # another bad request, an unknown language >>> maus("test/bassinette-sample-16.wav", "bassinette", language='unknown') Traceback (most recent call last): MausException: Internal Server Error #maus("test/bassinette-sample-16.wav", "bassinette", outformat="EMU") #something """ if lexicon is None: lex = load_lexicon() else: lex = load_lexicon(lexicon) phb = text_phb(text, lex) if phb == None: truncated_text = (text[:100] + '...') if len(text) > 100 else text raise MausException( "Can't generate phonetic transcription for text '%s'" % truncated_text) params = dict(( ('LANGUAGE', language), ('CANONLY', maus_boolean(canonly)), ('MINPAUSLEN', str(minpauselen)), ('STARTWORD', str(startword)), ('ENDWORD', str(endword)), ('MAUSSHIFT', str(mausshift)), ('INSPROB', str(insprob)), ('SIGNAL', wavfile), ('BPF', StringIO(phb)), ('OUTFORMAT', str(outformat)), ('USETRN', maus_boolean(usetrn)), ('INSKANTEXTGRID', maus_boolean(inskantextgrid)), ('MAUSSHIFT', str(mausshift)), ('INSORTTEXTGRID', maus_boolean(insorttextgrid)), )) if configmanager.get_config("MAUS_LOCAL", "no") == "yes": params['SIGNAL'] = wavfile h = NamedTemporaryFile(prefix='bpf', delete=False) h.write(phb) h.close() params['BPF'] = h.name outfile = NamedTemporaryFile(prefix='textgrid', delete=False) outfile.close() params['OUT'] = outfile.name maus_program = configmanager.get_config("MAUS_PROGRAM") maus_cmd = [maus_program] for key in params.keys(): maus_cmd.append("%s=%s" % (key, params[key])) #print " ".join(maus_cmd) try: # send err output to nowhere devnull = open(os.devnull, "w") process = subprocess.Popen(maus_cmd, stdout=devnull, stderr=devnull) while process.poll() == None: pass except: pass os.unlink(h.name) if os.path.exists(outfile.name): # grab the result h = open(outfile.name) result = h.read() h.close() os.unlink(outfile.name) else: result = "Error Calling MAUS" else: # for the web call we open the wav file params['SIGNAL'] = open(wavfile) params['BPF'] = StringIO(phb) handler = MultipartPostHandler.MultipartPostHandler(debuglevel=0) opener = urllib2.build_opener(handler) MAUS_URL = configmanager.get_config("MAUS_URL") try: response = opener.open(MAUS_URL, params) except urllib2.HTTPError as e: errormessage = e.read() raise MausException(e.msg) result = response.read() if result.startswith('File type = "ooTextFile"'): # everything was ok return result else: # some kind of error raise MausException(result)
''' import urllib, urllib2 import MultipartPostHandler import os, sys import subprocess from StringIO import StringIO import configmanager configmanager.configinit() from rdflib import Graph, Literal, URIRef import convert from convert.namespaces import * from data import COMPONENT_MAP from tempfile import NamedTemporaryFile OUTPUT_DIR = configmanager.get_config('OUTPUT_DIR') LEXICON = os.path.join(os.path.dirname(__file__), "AUSTALK.lex") class MausException(Exception): pass def maus_boolean(value): """Turn a Python boolean value into a 'true' or 'false for MAUS""" if value: return 'true' else:
def maus(wavfile, text, language='aus', canonly=False, minpauselen=5, startword=0, endword=999999, mausshift=10, insprob=0.0, inskantextgrid=True, insorttextgrid=True, usetrn=False, outformat='TextGrid', lexicon=None): """Send the given wavfile to MAUS for forced alignment text is the orthographic transcription returns the text of the textgrid returned by MAUS raises MausException if there was an error, the exception contains any error text returned by the MAUS web service >>> txt = maus("test/bassinette-sample-16.wav", "bassinette") >>> txt.find('xmax') 62 >>> txt.find('b{s@net') 896 >>> txt = maus("test/bassinette-sample-16.wav", "not in the lexicon") Traceback (most recent call last): MausException: Can't generate phonetic transcription for text 'not in the lexicon' # a bad request, send a text file >>> maus("annotate/maus.py", "bassinette") Traceback (most recent call last): MausException: Internal Server Error # another bad request, an unknown language >>> maus("test/bassinette-sample-16.wav", "bassinette", language='unknown') Traceback (most recent call last): MausException: Internal Server Error #maus("test/bassinette-sample-16.wav", "bassinette", outformat="EMU") #something """ if lexicon is None: lex = load_lexicon() else: lex = load_lexicon(lexicon) phb = text_phb(text, lex) if phb == None: truncated_text = (text[:100] + '...') if len(text) > 100 else text raise MausException("Can't generate phonetic transcription for text '%s'" % truncated_text) params = dict((('LANGUAGE', language), ('CANONLY', maus_boolean(canonly)), ('MINPAUSLEN', str(minpauselen)), ('STARTWORD', str(startword)), ('ENDWORD', str(endword)), ('MAUSSHIFT', str(mausshift)), ('INSPROB', str(insprob)), ('SIGNAL', wavfile), ('BPF', StringIO(phb)), ('OUTFORMAT', str(outformat)), ('USETRN', maus_boolean(usetrn)), ('INSKANTEXTGRID', maus_boolean(inskantextgrid)), ('MAUSSHIFT', str(mausshift)), ('INSORTTEXTGRID', maus_boolean(insorttextgrid)), )) if configmanager.get_config("MAUS_LOCAL", "no") == "yes": params['SIGNAL'] = wavfile h = NamedTemporaryFile(prefix='bpf', delete=False) h.write(phb) h.close() params['BPF'] = h.name outfile = NamedTemporaryFile(prefix='textgrid', delete=False) outfile.close() params['OUT'] = outfile.name maus_program = configmanager.get_config("MAUS_PROGRAM") maus_cmd = [maus_program] for key in params.keys(): maus_cmd.append("%s=%s" % (key, params[key])) #print " ".join(maus_cmd) try: # send err output to nowhere devnull = open(os.devnull, "w") process = subprocess.Popen(maus_cmd, stdout=devnull, stderr=devnull) while process.poll() == None: pass except: pass os.unlink(h.name) if os.path.exists(outfile.name): # grab the result h = open(outfile.name) result = h.read() h.close() os.unlink(outfile.name) else: result = "Error Calling MAUS" else: # for the web call we open the wav file params['SIGNAL'] = open(wavfile) params['BPF'] = StringIO(phb) handler = MultipartPostHandler.MultipartPostHandler(debuglevel=0) opener = urllib2.build_opener(handler) MAUS_URL = configmanager.get_config("MAUS_URL") try: response = opener.open(MAUS_URL, params) except urllib2.HTTPError as e: errormessage = e.read() raise MausException(e.msg) result = response.read() if result.startswith('File type = "ooTextFile"'): # everything was ok return result else: # some kind of error raise MausException(result)
''' import urllib, urllib2 import MultipartPostHandler import os, sys import subprocess from StringIO import StringIO import configmanager configmanager.configinit() from rdflib import Graph, Literal, URIRef import convert from convert.namespaces import * from data import COMPONENT_MAP from tempfile import NamedTemporaryFile OUTPUT_DIR = configmanager.get_config('OUTPUT_DIR') LEXICON = os.path.join(os.path.dirname(__file__), "AUSTALK.lex") class MausException(Exception): pass def maus_boolean(value): """Turn a Python boolean value into a 'true' or 'false for MAUS""" if value: return 'true' else: return 'false'