def create_new_morpheme_language_model(data): """Create a new morpheme language model. :param dict data: the data for the morpheme language model to be created. :returns: an SQLAlchemy model object representing the morpheme language model. """ morpheme_language_model = MorphemeLanguageModel( parent_directory = h.get_OLD_directory_path('morphemelanguagemodels', config=config), rare_delimiter = h.rare_delimiter, start_symbol = h.lm_start, end_symbol = h.lm_end, morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'), UUID = unicode(uuid4()), name = h.normalize(data['name']), description = h.normalize(data['description']), enterer = session['user'], modifier = session['user'], datetime_modified = h.now(), datetime_entered = h.now(), vocabulary_morphology = data['vocabulary_morphology'], corpus = data['corpus'], toolkit = data['toolkit'], order = data['order'], smoothing = data['smoothing'], categorial = data['categorial'] ) return morpheme_language_model
def create_new_morpheme_language_model(data): """Create a new morpheme language model. :param dict data: the data for the morpheme language model to be created. :returns: an SQLAlchemy model object representing the morpheme language model. """ morpheme_language_model = MorphemeLanguageModel( parent_directory=h.get_OLD_directory_path('morphemelanguagemodels', config=config), rare_delimiter=h.rare_delimiter, start_symbol=h.lm_start, end_symbol=h.lm_end, morpheme_delimiters=h.get_morpheme_delimiters(type_=u'unicode'), UUID=unicode(uuid4()), name=h.normalize(data['name']), description=h.normalize(data['description']), enterer=session['user'], modifier=session['user'], datetime_modified=h.now(), datetime_entered=h.now(), vocabulary_morphology=data['vocabulary_morphology'], corpus=data['corpus'], toolkit=data['toolkit'], order=data['order'], smoothing=data['smoothing'], categorial=data['categorial']) return morpheme_language_model
def update_morpheme_language_model(morpheme_language_model, data): """Update a morpheme language model. :param morpheme_language_model: the morpheme language model model to be updated. :param dict data: representation of the updated morpheme language model. :returns: the updated morpheme language model model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = morpheme_language_model.set_attr('name', h.normalize(data['name']), changed) changed = morpheme_language_model.set_attr('description', h.normalize(data['description']), changed) changed = morpheme_language_model.set_attr('vocabulary_morphology', data['vocabulary_morphology'], changed) changed = morpheme_language_model.set_attr('corpus', data['corpus'], changed) changed = morpheme_language_model.set_attr('toolkit', data['toolkit'], changed) changed = morpheme_language_model.set_attr('order', data['order'], changed) changed = morpheme_language_model.set_attr('smoothing', data['smoothing'], changed) changed = morpheme_language_model.set_attr('categorial', data['categorial'], changed) changed = morpheme_language_model.set_attr('rare_delimiter', h.rare_delimiter, changed) changed = morpheme_language_model.set_attr('start_symbol', h.lm_start, changed) changed = morpheme_language_model.set_attr('end_symbol', h.lm_end, changed) if changed: session['user'] = Session.merge(session['user']) morpheme_language_model.modifier = session['user'] morpheme_language_model.datetime_modified = h.now() return morpheme_language_model return changed
def update_corpus(corpus, data): """Update a corpus. :param corpus: the corpus model to be updated. :param dict data: representation of the updated corpus. :returns: the updated corpus model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False # Unicode Data changed = corpus.set_attr('name', h.normalize(data['name']), changed) changed = corpus.set_attr('description', h.normalize(data['description']), changed) changed = corpus.set_attr('content', data['content'], changed) changed = corpus.set_attr('form_search', data['form_search'], changed) tags_to_add = [t for t in data['tags'] if t] forms_to_add = [f for f in data['forms'] if f] if set(tags_to_add) != set(corpus.tags): corpus.tags = tags_to_add changed = True if set(forms_to_add) != set(corpus.forms): corpus.forms = forms_to_add changed = True if changed: session['user'] = Session.merge(session['user']) corpus.modifier = session['user'] corpus.datetime_modified = h.now() return corpus return changed
def create_new_morphology(data): """Create a new morphology. :param dict data: the data for the morphology to be created. :returns: an SQLAlchemy model object representing the morphology. """ morphology = Morphology( parent_directory = h.get_OLD_directory_path('morphologies', config=config), word_boundary_symbol = h.word_boundary_symbol, morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'), rare_delimiter = h.rare_delimiter, UUID = unicode(uuid4()), name = h.normalize(data['name']), description = h.normalize(data['description']), enterer = session['user'], modifier = session['user'], datetime_modified = h.now(), datetime_entered = h.now(), lexicon_corpus = data['lexicon_corpus'], rules_corpus = data['rules_corpus'], script_type = data['script_type'], extract_morphemes_from_rules_corpus = data['extract_morphemes_from_rules_corpus'], rules = data['rules'], rich_upper = data['rich_upper'], rich_lower = data['rich_lower'], include_unknowns = data['include_unknowns'] ) return morphology
def create_new_corpus(data): """Create a new corpus. :param dict data: the data for the corpus to be created. :returns: an SQLAlchemy model object representing the corpus. .. note:: I have opted not to complicate corpora by giving meaning to the "restricted" tag where they are concerned. Given that a corpus' forms can be determined by a form search model and are therefore variable, it does not seem practical to toggle restricted status based on the status of any number of forms. The corpus files that may be associated to a corpus by requesting ``PUT /corpora/id/writetofile`` may, however, be restricted if a restricted form is written to file. """ corpus = Corpus() corpus.UUID = unicode(uuid4()) corpus.name = h.normalize(data['name']) corpus.description = h.normalize(data['description']) corpus.content = data['content'] corpus.form_search = data['form_search'] corpus.forms = data['forms'] corpus.tags = data['tags'] corpus.enterer = corpus.modifier = session['user'] corpus.datetime_modified = corpus.datetime_entered = h.now() return corpus
def update_morphological_parser(morphological_parser, data): """Update a morphological parser. :param morphological_parser: the morphological parser model to be updated. :param dict data: representation of the updated morphological parser. :returns: the updated morphological parser model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = morphological_parser.set_attr('name', h.normalize(data['name']), changed) changed = morphological_parser.set_attr('description', h.normalize(data['description']), changed) changed = morphological_parser.set_attr('phonology', data['phonology'], changed) changed = morphological_parser.set_attr('morphology', data['morphology'], changed) changed = morphological_parser.set_attr('language_model', data['language_model'], changed) if changed: session['user'] = Session.merge(session['user']) morphological_parser.modifier = session['user'] morphological_parser.datetime_modified = h.now() return morphological_parser return changed
def update_morphology(morphology, data): """Update a morphology. :param morphology: the morphology model to be updated. :param dict data: representation of the updated morphology. :returns: the updated morphology model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = morphology.set_attr('name', h.normalize(data['name']), changed) changed = morphology.set_attr('description', h.normalize(data['description']), changed) changed = morphology.set_attr('lexicon_corpus', data['lexicon_corpus'], changed) changed = morphology.set_attr('rules_corpus', data['rules_corpus'], changed) changed = morphology.set_attr('script_type', data['script_type'], changed) changed = morphology.set_attr('extract_morphemes_from_rules_corpus', data['extract_morphemes_from_rules_corpus'], changed) changed = morphology.set_attr('rules', data['rules'], changed) changed = morphology.set_attr('rich_upper', data['rich_upper'], changed) changed = morphology.set_attr('rich_lower', data['rich_lower'], changed) changed = morphology.set_attr('include_unknowns', data['include_unknowns'], changed) changed = morphology.set_attr('rare_delimiter', h.rare_delimiter, changed) changed = morphology.set_attr('word_boundary_symbol', h.word_boundary_symbol, changed) if changed: session['user'] = Session.merge(session['user']) morphology.modifier = session['user'] morphology.datetime_modified = h.now() return morphology return changed
def update_speaker(speaker, data): """Update a speaker. :param speaker: the speaker model to be updated. :param dict data: representation of the updated speaker. :returns: the updated speaker model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False # Unicode Data changed = speaker.set_attr('first_name', h.normalize(data['first_name']), changed) changed = speaker.set_attr('last_name', h.normalize(data['last_name']), changed) changed = speaker.set_attr('dialect', h.normalize(data['dialect']), changed) changed = speaker.set_attr('page_content', h.normalize(data['page_content']), changed) changed = speaker.set_attr('markup_language', h.normalize(data['markup_language']), changed) changed = speaker.set_attr('html', h.get_HTML_from_contents(speaker.page_content, speaker.markup_language), changed) if changed: speaker.datetime_modified = datetime.datetime.utcnow() return speaker return changed
def create_new_tag(data): """Create a new tag. :param dict data: the data for the tag to be created. :returns: an SQLAlchemy model object representing the tag. """ tag = Tag() tag.name = h.normalize(data['name']) tag.description = h.normalize(data['description']) tag.datetime_modified = datetime.datetime.utcnow() return tag
def create_new_tag(data): """Create a new tag. :param dict data: the data for the tag to be created. :returns: an SQLAlchemy model object representing the tag. """ tag = Tag() tag.name = h.normalize(data['name']) tag.description = h.normalize(data['description']) tag.datetime_modified = datetime.datetime.utcnow() return tag
def create_new_elicitation_method(data): """Create a new elicitation method. :param dict data: the elicitation method to be created. :returns: an SQLAlchemy model object representing the elicitation method. """ elicitation_method = ElicitationMethod() elicitation_method.name = h.normalize(data['name']) elicitation_method.description = h.normalize(data['description']) elicitation_method.datetime_modified = datetime.datetime.utcnow() return elicitation_method
def create_new_elicitation_method(data): """Create a new elicitation method. :param dict data: the elicitation method to be created. :returns: an SQLAlchemy model object representing the elicitation method. """ elicitation_method = ElicitationMethod() elicitation_method.name = h.normalize(data['name']) elicitation_method.description = h.normalize(data['description']) elicitation_method.datetime_modified = datetime.datetime.utcnow() return elicitation_method
def create_new_syntactic_category(data): """Create a new syntactic category. :param dict data: the data for the syntactic category to be created. :returns: an SQLAlchemy model object representing the syntactic category. """ syntactic_category = SyntacticCategory() syntactic_category.name = h.normalize(data['name']) syntactic_category.type = data['type'] syntactic_category.description = h.normalize(data['description']) syntactic_category.datetime_modified = datetime.datetime.utcnow() return syntactic_category
def create_new_syntactic_category(data): """Create a new syntactic category. :param dict data: the data for the syntactic category to be created. :returns: an SQLAlchemy model object representing the syntactic category. """ syntactic_category = SyntacticCategory() syntactic_category.name = h.normalize(data["name"]) syntactic_category.type = data["type"] syntactic_category.description = h.normalize(data["description"]) syntactic_category.datetime_modified = datetime.datetime.utcnow() return syntactic_category
def create_new_user(data): """Create a new user. :param dict data: the data for the user to be created. :returns: an SQLAlchemy model object representing the user. """ user = User() user.salt = h.generate_salt() user.password = unicode(h.encrypt_password(data['password'], str(user.salt))) user.username = h.normalize(data['username']) user.first_name = h.normalize(data['first_name']) user.last_name = h.normalize(data['last_name']) user.email = h.normalize(data['email']) user.affiliation = h.normalize(data['affiliation']) user.role = h.normalize(data['role']) user.markup_language = h.normalize(data['markup_language']) user.page_content = h.normalize(data['page_content']) user.html = h.get_HTML_from_contents(user.page_content, user.markup_language) # Many-to-One Data: input and output orthographies if data['input_orthography']: user.input_orthography= data['input_orthography'] if data['output_orthography']: user.output_orthography = data['output_orthography'] # OLD-generated Data user.datetime_modified = datetime.datetime.utcnow() # Create the user's directory h.create_user_directory(user) return user
def create_new_orthography(data): """Create a new orthography. :param dict data: the data for the orthography to be created. :returns: an SQLAlchemy model object representing the orthography. """ orthography = Orthography() orthography.name = h.normalize(data['name']) orthography.orthography = h.normalize(data['orthography']) orthography.lowercase = data['lowercase'] orthography.initial_glottal_stops = data['initial_glottal_stops'] orthography.datetime_modified = datetime.datetime.utcnow() return orthography
def create_new_user(data): """Create a new user. :param dict data: the data for the user to be created. :returns: an SQLAlchemy model object representing the user. """ user = User() user.salt = h.generate_salt() user.password = unicode( h.encrypt_password(data['password'], str(user.salt))) user.username = h.normalize(data['username']) user.first_name = h.normalize(data['first_name']) user.last_name = h.normalize(data['last_name']) user.email = h.normalize(data['email']) user.affiliation = h.normalize(data['affiliation']) user.role = h.normalize(data['role']) user.markup_language = h.normalize(data['markup_language']) user.page_content = h.normalize(data['page_content']) user.html = h.get_HTML_from_contents(user.page_content, user.markup_language) # Many-to-One Data: input and output orthographies if data['input_orthography']: user.input_orthography = data['input_orthography'] if data['output_orthography']: user.output_orthography = data['output_orthography'] # OLD-generated Data user.datetime_modified = datetime.datetime.utcnow() # Create the user's directory h.create_user_directory(user) return user
def create_new_orthography(data): """Create a new orthography. :param dict data: the data for the orthography to be created. :returns: an SQLAlchemy model object representing the orthography. """ orthography = Orthography() orthography.name = h.normalize(data['name']) orthography.orthography = h.normalize(data['orthography']) orthography.lowercase = data['lowercase'] orthography.initial_glottal_stops = data['initial_glottal_stops'] orthography.datetime_modified = datetime.datetime.utcnow() return orthography
def create_new_page(data): """Create a new page. :param dict data: the data for the page to be created. :returns: an SQLAlchemy model object representing the page. """ page = Page() page.name = h.normalize(data['name']) page.heading = h.normalize(data['heading']) page.markup_language = data['markup_language'] page.content = h.normalize(data['content']) page.html = h.get_HTML_from_contents(page.content, page.markup_language) page.datetime_modified = datetime.datetime.utcnow() return page
def create_new_form_search(data): """Create a new form search. :param dict data: the form search to be created. :returns: an form search model object. """ form_search = FormSearch() form_search.name = h.normalize(data['name']) form_search.search = data['search'] # Note that this is purposefully not normalized (reconsider this? ...) form_search.description = h.normalize(data['description']) form_search.enterer = session['user'] form_search.datetime_modified = datetime.datetime.utcnow() return form_search
def create_new_page(data): """Create a new page. :param dict data: the data for the page to be created. :returns: an SQLAlchemy model object representing the page. """ page = Page() page.name = h.normalize(data['name']) page.heading = h.normalize(data['heading']) page.markup_language = data['markup_language'] page.content = h.normalize(data['content']) page.html = h.get_HTML_from_contents(page.content, page.markup_language) page.datetime_modified = datetime.datetime.utcnow() return page
def apply(self, id, direction): """Call foma apply in the direction of ``direction`` on the input in the request body using a morphology. :param str id: the ``id`` value of the morphology that will be used. :param str direction: the direction of foma application. :Request body: JSON object of the form ``{'transcriptions': [t1, t2, ...]}``. :returns: if the morphology exists and foma is installed, a JSON object of the form ``{t1: [p1t1, p2t1, ...], ...}`` where ``t1`` is a transcription from the request body and ``p1t1``, ``p2t1``, etc. are outputs of ``t1`` after apply up/down. """ morphology = Session.query(Morphology).get(id) if morphology: if h.foma_installed(): morphology_binary_path = morphology.get_file_path('binary') if os.path.isfile(morphology_binary_path): try: inputs = json.loads(unicode(request.body, request.charset)) inputs = MorphemeSequencesSchema.to_python(inputs) inputs = [h.normalize(i) for i in inputs['morpheme_sequences']] return morphology.apply(direction, inputs) except h.JSONDecodeError: response.status_int = 400 return h.JSONDecodeErrorResponse except Invalid, e: response.status_int = 400 return {'errors': e.unpack_errors()} else: response.status_int = 400 return {'error': 'Morphology %d has not been compiled yet.' % morphology.id} else: response.status_int = 400 return {'error': 'Foma and flookup are not installed.'}
def create_new_speaker(data): """Create a new speaker. :param dict data: the data for the speaker to be created. :returns: an SQLAlchemy model object representing the speaker. """ speaker = Speaker() speaker.first_name = h.normalize(data['first_name']) speaker.last_name = h.normalize(data['last_name']) speaker.dialect = h.normalize(data['dialect']) speaker.page_content = h.normalize(data['page_content']) speaker.datetime_modified = datetime.datetime.utcnow() speaker.markup_language = h.normalize(data['markup_language']) speaker.html = h.get_HTML_from_contents(speaker.page_content, speaker.markup_language) return speaker
def create_new_form_search(data): """Create a new form search. :param dict data: the form search to be created. :returns: an form search model object. """ form_search = FormSearch() form_search.name = h.normalize(data['name']) form_search.search = data[ 'search'] # Note that this is purposefully not normalized (reconsider this? ...) form_search.description = h.normalize(data['description']) form_search.enterer = session['user'] form_search.datetime_modified = datetime.datetime.utcnow() return form_search
def get_probabilities(self, id): """Return the probability of each sequence of morphemes passed in the JSON PUT params. :param list morpheme_sequences: space-delimited morphemes in form|gloss|category format wherer "|" is actually ``h.rare_delimiter``. :returns: a dictionary with morpheme sequences as keys and log probabilities as values. """ lm = Session.query(MorphemeLanguageModel).get(id) if lm: try: schema = MorphemeSequencesSchema() values = json.loads(unicode(request.body, request.charset)) data = schema.to_python(values) morpheme_sequences = [h.normalize(ms) for ms in data['morpheme_sequences']] return lm.get_probabilities(morpheme_sequences) except h.JSONDecodeError: response.status_int = 400 return h.JSONDecodeErrorResponse except Invalid, e: response.status_int = 400 return {'errors': e.unpack_errors()} except Exception: response.status_int = 400 return {'error': 'An error occurred while trying to generate probabilities.'}
def update_subinterval_referencing_file(file): """Update a subinterval-referencing file model. :param file: a file model object to update. :param request.body: a JSON object containing the data for updating the file. :returns: the file model or, if the file has not been updated, ``False``. """ changed = False schema = FileSubintervalReferencingSchema() data = json.loads(unicode(request.body, request.charset)) data['name'] = data.get('name') or u'' state = h.State() state.full_dict = data state.user = session['user'] data = schema.to_python(data, state) # Data unique to referencing subinterval files changed = file.set_attr('parent_file', data['parent_file'], changed) changed = file.set_attr('name', (h.normalize(data['name']) or file.parent_file.filename), changed) changed = file.set_attr('start', data['start'], changed) changed = file.set_attr('end', data['end'], changed) file, changed = update_standard_metadata(file, data, changed) if changed: file.datetime_modified = datetime.datetime.utcnow() return file return changed
def update_elicitation_method(elicitation_method, data): """Update an elicitation method. :param elicitation_method: the elicitation method model to be updated. :param dict data: representation of the updated elicitation method. :returns: the updated elicitation method model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = elicitation_method.set_attr('name', h.normalize(data['name']), changed) changed = elicitation_method.set_attr('description', h.normalize(data['description']), changed) if changed: elicitation_method.datetime_modified = datetime.datetime.utcnow() return elicitation_method return changed
def applydown(self, id): """Apply-down (i.e., phonologize) the input in the request body using a phonology. :URL: ``PUT /phonologies/applydown/id`` (or ``PUT /phonologies/phonologize/id``) :param str id: the ``id`` value of the phonology that will be used. :Request body: JSON object of the form ``{'transcriptions': [t1, t2, ...]}``. :returns: if the phonology exists and foma is installed, a JSON object of the form ``{t1: [p1t1, p2t1, ...], ...}`` where ``t1`` is a transcription from the request body and ``p1t1``, ``p2t1``, etc. are phonologized outputs of ``t1``. """ phonology = Session.query(Phonology).get(id) if phonology: if h.foma_installed(): binary_path = phonology.get_file_path('binary') if os.path.isfile(binary_path): try: inputs = json.loads(unicode(request.body, request.charset)) inputs = MorphophonemicTranscriptionsSchema.to_python(inputs) inputs = [h.normalize(i) for i in inputs['transcriptions']] return phonology.applydown(inputs) except h.JSONDecodeError: response.status_int = 400 return h.JSONDecodeErrorResponse except Invalid, e: response.status_int = 400 return {'errors': e.unpack_errors()} else: response.status_int = 400 return {'error': 'Phonology %d has not been compiled yet.' % phonology.id} else: response.status_int = 400 return {'error': 'Foma and flookup are not installed.'}
def update_externally_hosted_file(file): """Update an externally hosted file model. :param file: a file model object to update. :param request.body: a JSON object containing the data for updating the file. :returns: the file model or, if the file has not been updated, ``False``. """ changed = False data = json.loads(unicode(request.body, request.charset)) data['password'] = data.get('password') or u'' data = FileExternallyHostedSchema().to_python(data) # Data unique to referencing subinterval files changed = file.set_attr('url', data['url'], changed) changed = file.set_attr('name', h.normalize(data['name']), changed) changed = file.set_attr('password', data['password'], changed) changed = file.set_attr('MIME_type', data['MIME_type'], changed) file, changed = update_standard_metadata(file, data, changed) if changed: file.datetime_modified = datetime.datetime.utcnow() return file return changed
def get_probabilities(self, id): """Return the probability of each sequence of morphemes passed in the JSON PUT params. :param list morpheme_sequences: space-delimited morphemes in form|gloss|category format wherer "|" is actually ``h.rare_delimiter``. :returns: a dictionary with morpheme sequences as keys and log probabilities as values. """ lm = Session.query(MorphemeLanguageModel).get(id) if lm: try: schema = MorphemeSequencesSchema() values = json.loads(unicode(request.body, request.charset)) data = schema.to_python(values) morpheme_sequences = [ h.normalize(ms) for ms in data['morpheme_sequences'] ] return lm.get_probabilities(morpheme_sequences) except h.JSONDecodeError: response.status_int = 400 return h.JSONDecodeErrorResponse except Invalid, e: response.status_int = 400 return {'errors': e.unpack_errors()} except Exception: response.status_int = 400 return { 'error': 'An error occurred while trying to generate probabilities.' }
def create_subinterval_referencing_file(data): """Create a subinterval-referencing file. :param dict data: the data to create the file model. :param int data['parent_file']: the ``id`` value of an audio/video file model. :param float/int data['start']: the start of the interval in seconds. :param float/int data['end']: the end of the interval in seconds. :returns: an SQLAlchemy model object representing the file. A value for ``data['name']`` may also be supplied. """ data['name'] = data.get('name') or u'' schema = FileSubintervalReferencingSchema() state = h.State() state.full_dict = data state.user = session['user'] data = schema.to_python(data, state) file = File() # Data unique to referencing subinterval files file.parent_file = data['parent_file'] file.name = h.normalize(data['name']) or file.parent_file.filename # Name defaults to the parent file's filename if nothing provided by user file.start = data['start'] file.end = data['end'] file.MIME_type = file.parent_file.MIME_type file = add_standard_metadata(file, data) file = restrict_file_by_forms(file) return file
def update_tag(tag, data): """Update a tag. :param tag: the tag model to be updated. :param dict data: representation of the updated tag. :returns: the updated tag model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = tag.set_attr('name', h.normalize(data['name']), changed) changed = tag.set_attr('description', h.normalize(data['description']), changed) if changed: tag.datetime_modified = datetime.datetime.utcnow() return tag return changed
def create_externally_hosted_file(data): """Create an externally hosted file. :param dict data: the data to create the file model. :param str data['url']: a valid URL where the file data are served. :returns: an SQLAlchemy model object representing the file. Optional keys of the data dictionary, not including the standard metadata ones, are ``name``, ``password`` and ``MIME_type``. """ data['password'] = data.get('password') or u'' schema = FileExternallyHostedSchema() data = schema.to_python(data) file = File() # User-inputted string data file.name = h.normalize(data['name']) file.password = data['password'] file.MIME_type = data['MIME_type'] file.url = data['url'] file = add_standard_metadata(file, data) file = restrict_file_by_forms(file) return file
def parse(self, id): """Parse the input word transcriptions using the morphological parser with id=``id``. :param str id: the ``id`` value of the morphological parser that will be used. :Request body: JSON object of the form ``{'transcriptions': [t1, t2, ...]}``. :returns: if the morphological parser exists and foma is installed, a JSON object of the form ``{t1: p1, t2: p2, ...}`` where ``t1`` and ``t2`` are transcriptions of words from the request body and ``p1`` and ``p2`` are the most probable morphological parsers of t1 and t2. """ parser = Session.query(MorphologicalParser).get(id) if not parser: response.status_int = 404 return {'error': 'There is no morphological parser with id %s' % id} if not h.foma_installed(): response.status_int = 400 return {'error': 'Foma and flookup are not installed.'} try: inputs = json.loads(unicode(request.body, request.charset)) schema = TranscriptionsSchema inputs = schema.to_python(inputs) inputs = [h.normalize(w) for w in inputs['transcriptions']] parses = parser.parse(inputs) # TODO: allow for a param which causes the candidates to be # returned as well as/instead of only the most probable parse # candidate. return dict((transcription, parse) for transcription, (parse, candidates) in parses.iteritems()) except h.JSONDecodeError: response.status_int = 400 return h.JSONDecodeErrorResponse except Invalid, e: response.status_int = 400 return {'errors': e.unpack_errors()}
def add_standard_metadata(file, data): """Add the standard metadata to the file model using the data dictionary. :param file: file model object :param dict data: dictionary containing file attribute values. :returns: the updated file model object. """ file.description = h.normalize(data['description']) file.utterance_type = data['utterance_type'] file.date_elicited = data['date_elicited'] if data['elicitor']: file.elicitor = data['elicitor'] if data['speaker']: file.speaker = data['speaker'] file.tags = [t for t in data['tags'] if t] file.forms = [f for f in data['forms'] if f] now = h.now() file.datetime_entered = now file.datetime_modified = now # Because of SQLAlchemy's uniqueness constraints, we may need to set the # enterer to the elicitor. if data['elicitor'] and (data['elicitor'].id == session['user'].id): file.enterer = data['elicitor'] else: file.enterer = session['user'] return file
def create_new_keyboard(data): """Create a new keyboard. :param dict data: the data for the keyboard to be created. :returns: an SQLAlchemy model object representing the keyboard. """ keyboard = Keyboard() keyboard.name = h.normalize(data['name']) keyboard.description = h.normalize(data['description']) keyboard.keyboard = h.normalize(data['keyboard']) # OLD-generated Data keyboard.datetime_entered = keyboard.datetime_modified = h.now() keyboard.enterer = keyboard.modifier = session['user'] return keyboard
def update_tag(tag, data): """Update a tag. :param tag: the tag model to be updated. :param dict data: representation of the updated tag. :returns: the updated tag model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = tag.set_attr('name', h.normalize(data['name']), changed) changed = tag.set_attr('description', h.normalize(data['description']), changed) if changed: tag.datetime_modified = datetime.datetime.utcnow() return tag return changed
def create_new_keyboard(data): """Create a new keyboard. :param dict data: the data for the keyboard to be created. :returns: an SQLAlchemy model object representing the keyboard. """ keyboard = Keyboard() keyboard.name = h.normalize(data['name']) keyboard.description = h.normalize(data['description']) keyboard.keyboard = h.normalize(data['keyboard']) # OLD-generated Data keyboard.datetime_entered = keyboard.datetime_modified = h.now() keyboard.enterer = keyboard.modifier = session['user'] return keyboard
def update_orthography(orthography, data): """Update an orthography. :param orthography: the orthography model to be updated. :param dict data: representation of the updated orthography. :returns: the updated orthography model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = orthography.set_attr('name', h.normalize(data['name']), changed) changed = orthography.set_attr('orthography', h.normalize(data['orthography']), changed) changed = orthography.set_attr('lowercase', data['lowercase'], changed) changed = orthography.set_attr('initial_glottal_stops', data['initial_glottal_stops'], changed) if changed: orthography.datetime_modified = datetime.datetime.utcnow() return orthography return changed
def update_syntactic_category(syntactic_category, data): """Update a syntactic category. :param syntactic_category: the syntactic category model to be updated. :param dict data: representation of the updated syntactic category. :returns: the updated syntactic category model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False # Unicode Data changed = syntactic_category.set_attr("name", h.normalize(data["name"]), changed) changed = syntactic_category.set_attr("type", h.normalize(data["type"]), changed) changed = syntactic_category.set_attr("description", h.normalize(data["description"]), changed) if changed: syntactic_category.datetime_modified = datetime.datetime.utcnow() return syntactic_category return changed
def update_elicitation_method(elicitation_method, data): """Update an elicitation method. :param elicitation_method: the elicitation method model to be updated. :param dict data: representation of the updated elicitation method. :returns: the updated elicitation method model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = elicitation_method.set_attr('name', h.normalize(data['name']), changed) changed = elicitation_method.set_attr('description', h.normalize(data['description']), changed) if changed: elicitation_method.datetime_modified = datetime.datetime.utcnow() return elicitation_method return changed
def update_application_settings(application_settings, data): """Update an application settings. :param application_settings: the application settings model to be updated. :param dict data: representation of the updated application settings. :returns: the updated application settings model or, if ``changed`` has not been set to ``True``, then ``False``. """ changed = False # Unicode Data changed = application_settings.set_attr('object_language_name', data['object_language_name'], changed) changed = application_settings.set_attr('object_language_id', data['object_language_id'], changed) changed = application_settings.set_attr('metalanguage_name', data['metalanguage_name'], changed) changed = application_settings.set_attr('metalanguage_id', data['metalanguage_id'], changed) changed = application_settings.set_attr('metalanguage_inventory', h.normalize(h.remove_all_white_space(data['metalanguage_inventory'])), changed) changed = application_settings.set_attr('orthographic_validation', data['orthographic_validation'], changed) changed = application_settings.set_attr('narrow_phonetic_inventory', h.normalize(h.remove_all_white_space(data['narrow_phonetic_inventory'])), changed) changed = application_settings.set_attr('narrow_phonetic_validation', data['narrow_phonetic_validation'], changed) changed = application_settings.set_attr('broad_phonetic_inventory', h.normalize(h.remove_all_white_space(data['broad_phonetic_inventory'])), changed) changed = application_settings.set_attr('broad_phonetic_validation', data['broad_phonetic_validation'], changed) changed = application_settings.set_attr('morpheme_break_is_orthographic', data['morpheme_break_is_orthographic'], changed) changed = application_settings.set_attr('morpheme_break_validation', data['morpheme_break_validation'], changed) changed = application_settings.set_attr('phonemic_inventory', h.normalize(h.remove_all_white_space(data['phonemic_inventory'])), changed) changed = application_settings.set_attr('morpheme_delimiters', h.normalize(data['morpheme_delimiters']), changed) changed = application_settings.set_attr('punctuation', h.normalize(h.remove_all_white_space(data['punctuation'])), changed) changed = application_settings.set_attr('grammaticalities', h.normalize(h.remove_all_white_space(data['grammaticalities'])), changed) # Many-to-One changed = application_settings.set_attr('storage_orthography', data['storage_orthography'], changed) changed = application_settings.set_attr('input_orthography', data['input_orthography'], changed) changed = application_settings.set_attr('output_orthography', data['output_orthography'], changed) # Many-to-Many Data: unrestricted_users # First check if the user has made any changes. If there are changes, just # delete all and replace with new. unrestricted_users_to_add = [u for u in data['unrestricted_users'] if u] if set(unrestricted_users_to_add) != set(application_settings.unrestricted_users): application_settings.unrestricted_users = unrestricted_users_to_add changed = True if changed: application_settings.datetime_modified = datetime.datetime.utcnow() return application_settings return changed
def update_user(user, data): """Update a user. :param user: the user model to be updated. :param dict data: representation of the updated user. :returns: the updated user model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False # Unicode Data changed = user.set_attr('first_name', h.normalize(data['first_name']), changed) changed = user.set_attr('last_name', h.normalize(data['last_name']), changed) changed = user.set_attr('email', h.normalize(data['email']), changed) changed = user.set_attr('affiliation', h.normalize(data['affiliation']), changed) changed = user.set_attr('role', h.normalize(data['role']), changed) changed = user.set_attr('page_content', h.normalize(data['page_content']), changed) changed = user.set_attr('markup_language', h.normalize(data['markup_language']), changed) changed = user.set_attr( 'html', h.get_HTML_from_contents(user.page_content, user.markup_language), changed) # username and password need special treatment: a value of None means that # these should not be updated. if data['password'] is not None: changed = user.set_attr( 'password', unicode(h.encrypt_password(data['password'], str(user.salt))), changed) if data['username'] is not None: username = h.normalize(data['username']) if username != user.username: h.rename_user_directory(user.username, username) changed = user.set_attr('username', username, changed) # Many-to-One Data changed = user.set_attr('input_orthography', data['input_orthography'], changed) changed = user.set_attr('output_orthography', data['output_orthography'], changed) if changed: user.datetime_modified = datetime.datetime.utcnow() return user return changed
def update_orthography(orthography, data): """Update an orthography. :param orthography: the orthography model to be updated. :param dict data: representation of the updated orthography. :returns: the updated orthography model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = orthography.set_attr('name', h.normalize(data['name']), changed) changed = orthography.set_attr('orthography', h.normalize(data['orthography']), changed) changed = orthography.set_attr('lowercase', data['lowercase'], changed) changed = orthography.set_attr('initial_glottal_stops', data['initial_glottal_stops'], changed) if changed: orthography.datetime_modified = datetime.datetime.utcnow() return orthography return changed
def create_new_phonology(data): """Create a new phonology. :param dict data: the data for the phonology to be created. :returns: an SQLAlchemy model object representing the phonology. """ phonology = Phonology( parent_directory = h.get_OLD_directory_path('phonologies', config=config), word_boundary_symbol = h.word_boundary_symbol, UUID = unicode(uuid4()), name = h.normalize(data['name']), description = h.normalize(data['description']), script = h.normalize(data['script']).replace(u'\r', u''), # normalize or not? enterer = session['user'], modifier = session['user'], datetime_modified = h.now(), datetime_entered = h.now() ) return phonology
def update_keyboard(keyboard, data): """Update a keyboard. :param keyboard: the keyboard model to be updated. :param dict data: representation of the updated keyboard. :returns: the updated keyboard model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False changed = keyboard.set_attr('name', h.normalize(data['name']), changed) changed = keyboard.set_attr('description', h.normalize(data['description']), changed) changed = keyboard.set_attr('keyboard', h.normalize(data['keyboard']), changed) if changed: keyboard.datetime_modified = h.now() session['user'] = Session.merge(session['user']) keyboard.modifier = session['user'] return keyboard return changed
def create_new_morphological_parser(data): """Create a new morphological parser. :param dict data: the data for the morphological parser to be created. :returns: an SQLAlchemy model object representing the morphological parser. """ morphological_parser = MorphologicalParser( parent_directory=h.get_OLD_directory_path('morphologicalparsers', config=config), UUID=unicode(uuid4()), name=h.normalize(data['name']), description=h.normalize(data['description']), enterer=session['user'], modifier=session['user'], datetime_modified=h.now(), datetime_entered=h.now(), phonology=data['phonology'], morphology=data['morphology'], language_model=data['language_model']) return morphological_parser
def update_standard_metadata(file, data, changed): """Update the standard metadata attributes of the input file. :param file: a file model object to be updated. :param dict data: the data used to update the file model. :param bool changed: indicates whether the file has been changed. :returns: a tuple whose first element is the file model and whose second is the boolean ``changed``. """ changed = file.set_attr('description', h.normalize(data['description']), changed) changed = file.set_attr('utterance_type', h.normalize(data['utterance_type']), changed) changed = file.set_attr('date_elicited', data['date_elicited'], changed) changed = file.set_attr('elicitor', data['elicitor'], changed) changed = file.set_attr('speaker', data['speaker'], changed) # Many-to-Many Data: tags & forms # Update only if the user has made changes. forms_to_add = [f for f in data['forms'] if f] tags_to_add = [t for t in data['tags'] if t] if set(forms_to_add) != set(file.forms): file.forms = forms_to_add changed = True # Cause the entire file to be tagged as restricted if any one of its # forms are so tagged. tags = [f.tags for f in file.forms] tags = [tag for tag_list in tags for tag in tag_list] restricted_tags = [tag for tag in tags if tag.name == u'restricted'] if restricted_tags: restricted_tag = restricted_tags[0] if restricted_tag not in tags_to_add: tags_to_add.append(restricted_tag) if set(tags_to_add) != set(file.tags): file.tags = tags_to_add changed = True return file, changed
def update_form_search(form_search, data): """Update a form search model. :param form: the form search model to be updated. :param dict data: representation of the updated form search. :returns: the updated form search model or, if ``changed`` has not been set to ``True``, then ``False``. """ changed = False # Unicode Data changed = form_search.set_attr('name', h.normalize(data['name']), changed) changed = form_search.set_attr('search', data['search'], changed) changed = form_search.set_attr('description', h.normalize(data['description']), changed) if changed: form_search.datetime_modified = datetime.datetime.utcnow() return form_search return changed
def update_phonology(phonology, data): """Update a phonology. :param page: the phonology model to be updated. :param dict data: representation of the updated phonology. :returns: the updated phonology model or, if ``changed`` has not been set to ``True``, ``False``. """ changed = False # Unicode Data changed = phonology.set_attr('name', h.normalize(data['name']), changed) changed = phonology.set_attr('description', h.normalize(data['description']), changed) changed = phonology.set_attr('script', h.normalize(data['script']), changed) changed = phonology.set_attr('word_boundary_symbol', h.word_boundary_symbol, changed) if changed: session['user'] = Session.merge(session['user']) phonology.modifier = session['user'] phonology.datetime_modified = h.now() return phonology return changed
def create_new_application_settings(data): """Create a new application settings. :param dict data: the application settings to be created. :returns: an SQLAlchemy model object representing the application settings. """ # Create the application_settings model object. application_settings = ApplicationSettings() application_settings.object_language_name = data['object_language_name'] application_settings.object_language_id = data['object_language_id'] application_settings.metalanguage_name = data['metalanguage_name'] application_settings.metalanguage_id = data['metalanguage_id'] application_settings.metalanguage_inventory = h.normalize( h.remove_all_white_space(data['metalanguage_inventory'])) application_settings.orthographic_validation = data[ 'orthographic_validation'] application_settings.narrow_phonetic_inventory = h.normalize( h.remove_all_white_space(data['narrow_phonetic_inventory'])) application_settings.narrow_phonetic_validation = data[ 'narrow_phonetic_validation'] application_settings.broad_phonetic_inventory = h.normalize( h.remove_all_white_space(data['broad_phonetic_inventory'])) application_settings.broad_phonetic_validation = data[ 'broad_phonetic_validation'] application_settings.morpheme_break_is_orthographic = data[ 'morpheme_break_is_orthographic'] application_settings.morpheme_break_validation = data[ 'morpheme_break_validation'] application_settings.phonemic_inventory = h.normalize( h.remove_all_white_space(data['phonemic_inventory'])) application_settings.morpheme_delimiters = h.normalize( data['morpheme_delimiters']) application_settings.punctuation = h.normalize( h.remove_all_white_space(data['punctuation'])) application_settings.grammaticalities = h.normalize( h.remove_all_white_space(data['grammaticalities'])) # Many-to-One if data['storage_orthography']: application_settings.storage_orthography = data['storage_orthography'] if data['input_orthography']: application_settings.input_orthography = data['input_orthography'] if data['output_orthography']: application_settings.output_orthography = data['output_orthography'] # Many-to-Many Data: unrestricted_users application_settings.unrestricted_users = [ u for u in data['unrestricted_users'] if u ] return application_settings