def compute_perplexity(**kwargs): """Evaluate the LM by attempting to calculate its perplexity and changing some attribute values to reflect the attempt. """ try: dbsession = get_dbsession_from_settings(kwargs['settings'])() langmod = dbsession.query( old_models.MorphemeLanguageModel).get( kwargs['morpheme_language_model_id']) timeout = kwargs['timeout'] iterations = 5 try: langmod.perplexity = langmod.compute_perplexity(timeout, iterations) except Exception as error: LOGGER.error('Exception when calling `compute_perplexity` on' ' language model: %s %s', error.__class__.__name__, error) langmod.perplexity = None if langmod.perplexity is None: langmod.perplexity_computed = False else: langmod.perplexity_computed = True langmod.perplexity_attempt = str(uuid4()) langmod.modifier_id = kwargs['user_id'] langmod.datetime_modified = h.now() finally: dbsession.commit() dbsession.close()
def generate_and_compile_morphology(**kwargs): """Generate a foma script for a morphology and (optionally) compile it. :param int kwargs['morphology_id']: id of a morphology. :param bool kwargs['compile']: if True, the script will be generated *and* compiled. :param int kwargs['user_id']: id of the user model performing the generation/compilation. :param float kwargs['timeout']: how many seconds to wait before killing the foma compile process. """ try: dbsession = get_dbsession_from_settings(kwargs['settings'])() morphology = dbsession.query( old_models.Morphology).get(kwargs['morphology_id']) try: morphology.write(oldc.UNKNOWN_CATEGORY) except Exception as error: LOGGER.error('Exception when calling `write` on morphology: %s %s', error.__class__.__name__, error) if kwargs.get('compile', True): try: morphology.compile(kwargs['timeout']) except Exception as error: LOGGER.error('Exception when calling `compile` on morphology:' ' %s %s', error.__class__.__name__, error) morphology.generate_attempt = str(uuid4()) morphology.modifier_id = kwargs['user_id'] morphology.datetime_modified = h.now() finally: dbsession.commit() dbsession.close()
def _get_create_data(self, data): user_data = self._get_user_data(data) now = h.now() user_model = self.logged_in_user user_data.update({ 'parent_directory': h.get_old_directory_path('morphologies', self.request.registry.settings), # TODO: the Pylons app implied that this constant could change... 'word_boundary_symbol': oldc.WORD_BOUNDARY_SYMBOL, 'rare_delimiter': oldc.RARE_DELIMITER, 'morpheme_delimiters': self.db.get_morpheme_delimiters(type_='str'), 'UUID': str(uuid4()), 'enterer': user_model, 'modifier': user_model, 'datetime_modified': now, 'datetime_entered': now }) return user_data
def _get_create_data(self, data): user_data = self._get_user_data(data) now = h.now() user_model = self.logged_in_user user_data.update({ 'parent_directory': h.get_old_directory_path('morphemelanguagemodels', self.request.registry.settings), 'rare_delimiter': oldc.RARE_DELIMITER, 'start_symbol': oldc.LM_START, 'end_symbol': oldc.LM_END, 'morpheme_delimiters': self.db.get_morpheme_delimiters(type_='str'), 'UUID': str(uuid4()), 'enterer': user_model, 'modifier': user_model, 'datetime_modified': now, 'datetime_entered': now }) return user_data
def update(self): """Update a user's remembered forms and return them. - URL: ``PUT /rememberedforms/id`` - Request body: JSON object of the form ``{"forms": [...]}`` where the array contains the form ``id`` values that will constitute the user's ``remembered_forms`` collection after update. :param str id: the ``id`` value of the user model whose ``remembered_forms`` attribute is to be updated. :returns: the list of remembered forms of the user. .. note:: Administrators can update any user's remembered forms; non-administrators can only update their own. """ id_ = self.request.matchdict['id'] LOGGER.info('Attempting to update the forms remembered by user %d.', id_) user = self.request.dbsession.query(User).options( subqueryload(User.remembered_forms)).get(id_) schema = FormIdsSchemaNullable if not user: self.request.response.status_int = 404 msg = 'There is no user with id {}'.format(id_) LOGGER.warning(msg) return {'error': msg} try: values = json.loads(self.request.body.decode(self.request.charset)) except ValueError: self.request.response.status_int = 400 LOGGER.warning(JSONDecodeErrorResponse) return JSONDecodeErrorResponse state = SchemaState(full_dict=values, db=self.db, id=id_) try: data = schema.to_python(values, state) except Invalid as error: self.request.response.status_int = 400 errors = error.unpack_errors() LOGGER.warning(errors) return {'errors': errors} forms = [f for f in data['forms'] if f] unrestricted_users = self.db.get_unrestricted_users() unrestricted_forms = [ f for f in forms if self.logged_in_user.is_authorized_to_access_model( f, unrestricted_users) ] if set(user.remembered_forms) != set(unrestricted_forms): user.remembered_forms = unrestricted_forms user.datetime_modified = h.now() LOGGER.info('Updated the forms remembered by user %d.', id_) return user.remembered_forms self.request.response.status_int = 400 msg = ('The update request failed because the submitted data were not' ' new.') LOGGER.warning(msg) return {'error': msg}
def generate_language_model(**kwargs): """Write the requisite files (corpus, vocab, ARPA, LMTrie) of a morpheme LM to disk. :param str kwargs['morpheme_language_model_id']: ``id`` value of a morpheme LM. :param int/float kwargs['timeout']: seconds to allow for ARPA file creation. :param str kwargs['user_id']: ``id`` value of an OLD user. :returns: ``None``; side-effect is to change relevant attributes of LM object. """ try: dbsession = get_dbsession_from_settings(kwargs['settings'])() langmod = dbsession.query( old_models.MorphemeLanguageModel).get( kwargs['morpheme_language_model_id']) trie_path = langmod.get_file_path('trie') trie_mod_time = langmod.get_modification_time(trie_path) langmod.generate_succeeded = False try: langmod.write_corpus() except Exception as error: LOGGER.error('Exception when calling `write_corpus` on language' ' model: %s %s', error.__class__.__name__, error) langmod.generate_message = 'Error writing the corpus file. %s' % error try: langmod.write_vocabulary() except Exception as error: LOGGER.error('Exception when calling `write_vocabulary` on language' ' model: %s %s', error.__class__.__name__, error) langmod.generate_message = 'Error writing the vocabulary file. %s' % error try: langmod.write_arpa(kwargs['timeout']) except Exception as error: LOGGER.error('Exception when calling `write_arpa` on language' ' model: %s %s', error.__class__.__name__, error) langmod.generate_message = 'Error writing the ARPA file. %s' % error try: langmod.generate_trie() except Exception as error: LOGGER.error('Exception when calling `generate_trie` on language' ' model: %s %s', error.__class__.__name__, error) langmod.generate_message = 'Error generating the LMTrie instance. %s' % error else: if langmod.get_modification_time(trie_path) != trie_mod_time: langmod.generate_succeeded = True langmod.generate_message = 'Language model successfully generated.' else: langmod.generate_message = 'Error generating the LMTrie instance.' langmod.generate_attempt = str(uuid4()) langmod.modifier_id = kwargs['user_id'] langmod.datetime_modified = h.now() finally: dbsession.commit() dbsession.close()
def compile_phonology(**kwargs): """Compile the foma script of a phonology and save it to the db with values that indicate compilation success. """ try: dbsession = get_dbsession_from_settings(kwargs['settings'])() phonology = dbsession.query( old_models.Phonology).get(kwargs['phonology_id']) phonology.compile(kwargs['timeout']) phonology.datetime_modified = h.now() phonology.modifier_id = kwargs['user_id'] finally: dbsession.commit() dbsession.close()
def _add_standard_metadata(self, file_, data): """Add the standard metadata to the file model using the data dictionary. :param file_: file model object :param dict data: dictionary containing file attribute values. :returns: the updated file model object. """ file_.description = h.normalize(data['description']) file_.utterance_type = data['utterance_type'] file_.date_elicited = data['date_elicited'] if data['elicitor']: file_.elicitor = data['elicitor'] if data['speaker']: file_.speaker = data['speaker'] file_.tags = [t for t in data['tags'] if t] file_.forms = [f for f in data['forms'] if f] now = h.now() file_.datetime_entered = now file_.datetime_modified = now file_.enterer = self.logged_in_user return file_
def _get_create_data(self, data): user_data = self._get_user_data(data) now = h.now() user_model = self.logged_in_user user_data.update({ 'parent_directory': h.get_old_directory_path('morphologicalparsers', self.request.registry.settings), 'UUID': str(uuid4()), 'enterer': user_model, 'modifier': user_model, 'datetime_modified': now, 'datetime_entered': now }) return user_data
def generate_and_compile_parser(**kwargs): """Write the parser's morphophonology FST script to file and compile it if ``compile_`` is True. Generate the language model and pickle it. """ try: dbsession = get_dbsession_from_settings(kwargs['settings']) parser = dbsession.query(old_models.MorphologicalParser).get( kwargs['morphological_parser_id']) cache = Cache(parser, kwargs['settings'], get_dbsession_from_settings) parser.cache = cache parser.changed = False parser.write() dbsession.commit() if kwargs.get('compile', True): parser.compile(kwargs['timeout']) parser.modifier_id = kwargs['user_id'] parser.datetime_modified = h.now() #parser.changed = True # TESTS SHOULD PASS WITHOUT THIS! if parser.changed: parser.cache.clear(persist=True) dbsession.add(parser) finally: dbsession.commit() dbsession.close()
def _get_update_data(self, user_data): now = h.now() user_model = self.logged_in_user user_data.update({'datetime_modified': now, 'modifier': user_model}) return user_data
def _write_to_file(self, corpus, format_): """Write the corpus to file in the specified format. Write the corpus to a binary file, create or update a corpus file model and associate it to the corpus model (if necessary). :param corpus: a corpus model. :param str format_: the format of the file to be written. :returns: the corpus modified appropriately (assuming success) :side effects: may write (a) file(s) to disk and update/create a corpus file model. .. note:: It may be desirable/necessary to perform the corpus file writing asynchronously using a dedicated corpus-file-worker. """ def error_msg(msg): return { 'error': 'Unable to write corpus %d to file with format "%s".' ' (%s)' % (corpus.id, format_, msg) } def update_corpus_file(corpus, filename, modifier, datetime_modified, restricted): """Update the corpus file model of ``corpus`` that matches ``filename``. """ corpus_file = [ cf for cf in corpus.files if cf.filename == filename ][0] corpus_file.restricted = restricted corpus_file.modifier = modifier corpus_file.datetime_modified = corpus.datetime_modified = \ datetime_modified def generate_new_corpus_file(*args): """Create a corpus file model with ``filename`` and append it to ``corpus.files``. """ (corpus, filename, format_, creator, datetime_created, restricted) = args corpus_file = CorpusFile() corpus_file.restricted = restricted corpus.files.append(corpus_file) corpus_file.filename = filename corpus_file.format = format_ corpus_file.creator = corpus_file.modifier = creator corpus_file.datetime_created = corpus_file.datetime_modified = \ datetime_created corpus.datetime_modified = datetime_created def destroy_file(file_path): try: rmtree(file_path) except Exception: pass corpus_file_path = self._get_corpus_file_path(corpus, format_) update = os.path.exists(corpus_file_path) # If True, we are upating restricted = False # Create the corpus file on the filesystem try: writer = oldc.CORPUS_FORMATS[format_]['writer'] if corpus.form_search: # ``form_search`` value negates any content. with codecs.open(corpus_file_path, 'w', 'utf8') as file_: for form in corpus.forms: if (not restricted and 'restricted' in [t.name for t in form.tags]): restricted = True file_.write(writer(form)) else: form_references = corpus.get_form_references(corpus.content) forms = {f.id: f for f in corpus.forms} with codecs.open(corpus_file_path, 'w', 'utf8') as file_: for id_ in form_references: form = forms[id_] if (not restricted and 'restricted' in [t.name for t in form.tags]): restricted = True file_.write(writer(form)) gzipped_corpus_file_path = h.compress_file(corpus_file_path) _create_tgrep2_corpus_file(gzipped_corpus_file_path, format_) except Exception as error: destroy_file(corpus_file_path) self.request.response.status_int = 400 error = error_msg(error) LOGGER.warning(error['error']) return error # Update/create the corpus_file object try: now = h.now() user = self.logged_in_user corpus_filename = os.path.split(corpus_file_path)[1] if update: try: update_corpus_file(corpus, corpus_filename, user, now, restricted) except Exception: generate_new_corpus_file(corpus, corpus_filename, format_, user, now, restricted) else: generate_new_corpus_file(corpus, corpus_filename, format_, user, now, restricted) except Exception as error: destroy_file(corpus_file_path) self.request.response.status_int = 400 error = error_msg(error) LOGGER.warning(error['error']) return error LOGGER.info('Wrote corpus %s to a file on disk', corpus.id) self.request.dbsession.flush() return corpus
def _update_collections_that_reference_this_collection( self, collection, **kwargs): """Update all collections that reference the input collection. :param collection: a collection model. :param bool kwargs['contents_changed']: indicates whether the input collection's ``contents`` value has changed. :param bool kwargs['deleted']: indicates whether the input collection has just been deleted. :returns: ``None`` Update the ``contents``, ``contents_unpacked``, ``html`` and/or ``form`` attributes of every collection that references the input collection plus all of the collections that reference those collections, etc. This function is called upon successful update and delete requests. If the contents of the ``collection`` have changed (i.e., ``kwargs['contents_changed']==True``) , then retrieve all collections that reference ``collection`` and all collections that reference those referers, etc., and update their ``contents_unpacked``, ``html`` and ``forms`` attributes. If the ``collection`` has been deleted (i.e., ``kwargs['deleted']==True``), then recursively retrieve all collections referencing ``collection`` and update their ``contents``, ``contents_unpacked``, ``html`` and ``forms`` attributes. If ``collection`` has just been tagged as restricted (i.e., ``kwargs['restricted']==True``), then recursively restrict all collections that reference it. In all cases, update the ``datetime_modified`` value of every collection that recursively references ``collection``. """ restricted = kwargs.get('restricted', False) contents_changed = kwargs.get('contents_changed', False) deleted = kwargs.get('deleted', False) if restricted or contents_changed or deleted: collections_referencing_this_collection = \ self._get_collections_referencing_this_collection(collection) collections_referencing_this_collection_dicts = [ c.get_full_dict() for c in collections_referencing_this_collection ] now = h.now() if restricted: restricted_tag = self.db.get_restricted_tag() for collection_ in collections_referencing_this_collection: collection_.tags.append(restricted_tag) if contents_changed: for collection_ in collections_referencing_this_collection: self._update_contents_unpacked_etc(collection_) if deleted: for collection_ in collections_referencing_this_collection: self._update_contents_unpacked_etc( collection_, collection_id=collection.id, deleted=True) for collection_ in collections_referencing_this_collection: collection_.datetime_modified = now collection_.modifier = self.logged_in_user for colldict in collections_referencing_this_collection_dicts: self._backup_resource(colldict) self.request.dbsession.add_all( collections_referencing_this_collection) self.request.dbsession.flush()