def create_new_morpheme_language_model(data): """Create a new morpheme language model. :param dict data: the data for the morpheme language model to be created. :returns: an SQLAlchemy model object representing the morpheme language model. """ morpheme_language_model = MorphemeLanguageModel( parent_directory = h.get_OLD_directory_path('morphemelanguagemodels', config=config), rare_delimiter = h.rare_delimiter, start_symbol = h.lm_start, end_symbol = h.lm_end, morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'), UUID = unicode(uuid4()), name = h.normalize(data['name']), description = h.normalize(data['description']), enterer = session['user'], modifier = session['user'], datetime_modified = h.now(), datetime_entered = h.now(), vocabulary_morphology = data['vocabulary_morphology'], corpus = data['corpus'], toolkit = data['toolkit'], order = data['order'], smoothing = data['smoothing'], categorial = data['categorial'] ) return morpheme_language_model
def serve_file(id, reduced=False): """Serve the content (binary data) of a file. :param str id: the ``id`` value of the file whose file data will be served. :param bool reduced: toggles serving of file data or reduced-size file data. """ file = Session.query(File).options(subqueryload(File.parent_file)).get(id) if getattr(file, 'parent_file', None): file = file.parent_file elif getattr(file, 'url', None): response.status_int = 400 return json.dumps({'error': u'The content of file %s is stored elsewhere at %s' % (id, file.url)}) if file: files_dir = h.get_OLD_directory_path('files', config=config) if reduced: filename = getattr(file, 'lossy_filename', None) if not filename: response.status_int = 404 return json.dumps({'error': u'There is no size-reduced copy of file %s' % id}) file_path = os.path.join(files_dir, 'reduced_files', filename) else: file_path = os.path.join(files_dir, file.filename) unrestricted_users = h.get_unrestricted_users() if h.user_is_authorized_to_access_model(session['user'], file, unrestricted_users): return forward(FileApp(file_path)) else: response.status_int = 403 return json.dumps(h.unauthorized_msg) else: response.status_int = 404 return json.dumps({'error': 'There is no file with id %s' % id})
def create_new_morphology(data): """Create a new morphology. :param dict data: the data for the morphology to be created. :returns: an SQLAlchemy model object representing the morphology. """ morphology = Morphology( parent_directory = h.get_OLD_directory_path('morphologies', config=config), word_boundary_symbol = h.word_boundary_symbol, morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'), rare_delimiter = h.rare_delimiter, UUID = unicode(uuid4()), name = h.normalize(data['name']), description = h.normalize(data['description']), enterer = session['user'], modifier = session['user'], datetime_modified = h.now(), datetime_entered = h.now(), lexicon_corpus = data['lexicon_corpus'], rules_corpus = data['rules_corpus'], script_type = data['script_type'], extract_morphemes_from_rules_corpus = data['extract_morphemes_from_rules_corpus'], rules = data['rules'], rich_upper = data['rich_upper'], rich_lower = data['rich_lower'], include_unknowns = data['include_unknowns'] ) return morphology
def create_new_morpheme_language_model(data): """Create a new morpheme language model. :param dict data: the data for the morpheme language model to be created. :returns: an SQLAlchemy model object representing the morpheme language model. """ morpheme_language_model = MorphemeLanguageModel( parent_directory=h.get_OLD_directory_path('morphemelanguagemodels', config=config), rare_delimiter=h.rare_delimiter, start_symbol=h.lm_start, end_symbol=h.lm_end, morpheme_delimiters=h.get_morpheme_delimiters(type_=u'unicode'), UUID=unicode(uuid4()), name=h.normalize(data['name']), description=h.normalize(data['description']), enterer=session['user'], modifier=session['user'], datetime_modified=h.now(), datetime_entered=h.now(), vocabulary_morphology=data['vocabulary_morphology'], corpus=data['corpus'], toolkit=data['toolkit'], order=data['order'], smoothing=data['smoothing'], categorial=data['categorial']) return morpheme_language_model
def __setattrs__(self): self.extra_environ_view = {'test.authentication.role': u'viewer'} self.extra_environ_contrib = {'test.authentication.role': u'contributor'} self.extra_environ_admin = {'test.authentication.role': u'administrator'} self.extra_environ_view_appset = {'test.authentication.role': u'viewer', 'test.application_settings': True} self.extra_environ_contrib_appset = {'test.authentication.role': u'contributor', 'test.application_settings': True} self.extra_environ_admin_appset = {'test.authentication.role': u'administrator', 'test.application_settings': True} self.json_headers = {'Content-Type': 'application/json'} config = self.config = appconfig('config:test.ini', relative_to='.') self.here = config['here'] self.files_path = h.get_OLD_directory_path('files', config=config) self.reduced_files_path = h.get_OLD_directory_path('reduced_files', config=config) self.test_files_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'files') self.create_reduced_size_file_copies = asbool(config.get( 'create_reduced_size_file_copies', False)) self.preferred_lossy_audio_format = config.get('preferred_lossy_audio_format', 'ogg') self.corpora_path = h.get_OLD_directory_path('corpora', config=config) self.test_datasets_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'datasets') self.test_scripts_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'scripts') self.loremipsum100_path = os.path.join(self.test_datasets_path, 'loremipsum_100.txt') self.loremipsum1000_path = os.path.join(self.test_datasets_path , 'loremipsum_1000.txt') self.loremipsum10000_path = os.path.join(self.test_datasets_path, 'loremipsum_10000.txt') self.users_path = h.get_OLD_directory_path('users', config=config) self.morphologies_path = h.get_OLD_directory_path('morphologies', config=config) self.morphological_parsers_path = h.get_OLD_directory_path('morphological_parsers', config=config) self.phonologies_path = h.get_OLD_directory_path('phonologies', config=config) self.morpheme_language_models_path = h.get_OLD_directory_path('morpheme_language_models', config=config) self.test_phonologies_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'phonologies') self.test_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology.script') self.test_malformed_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_malformed.script') self.test_phonology_no_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_malformed.script') self.test_medium_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_medium.script') self.test_large_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_large.script') self.test_phonology_testless_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_no_tests.script') self.test_morphologies_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'morphologies') self.test_morphophonologies_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'morphophonologies')
def delete_file(file): """Delete a file model. :param file: a file model object to delete. :returns: ``None``. This deletes the file model object from the database as well as any binary files associated with it that are stored on the filesystem. """ if getattr(file, 'filename', None): file_path = os.path.join(h.get_OLD_directory_path('files', config=config), file.filename) os.remove(file_path) if getattr(file, 'lossy_filename', None): file_path = os.path.join(h.get_OLD_directory_path('reduced_files', config=config), file.lossy_filename) os.remove(file_path) Session.delete(file) Session.commit()
def create_plain_file(): """Create a local file using data from a ``Content-Type: multipart/form-data`` request. :param request.POST['filedata']: a ``cgi.FieldStorage`` object containing the file data. :param str request.POST['filename']: the name of the binary file. :returns: an SQLAlchemy model object representing the file. .. note:: The validator expects ``request.POST`` to encode list input via the ``formencode.variabledecode.NestedVariables`` format. E.g., a list of form ``id`` values would be provided as values to keys with names like ``'forms-0'``, ``'forms-1'``, ``'forms-2'``, etc. """ values = dict(request.params) filedata = request.POST.get('filedata') if not hasattr(filedata, 'file'): raise InvalidFieldStorageObjectError if not values.get('filename'): values['filename'] = os.path.split(filedata.filename)[-1] values['filedata_first_KB'] = filedata.value[:1024] schema = FileCreateWithFiledataSchema() data = schema.to_python(values) file = File() file.filename = h.normalize(data['filename']) file.MIME_type = data['MIME_type'] files_path = h.get_OLD_directory_path('files', config=config) file_path = os.path.join(files_path, file.filename) file_object, file_path = get_unique_file_path(file_path) file.filename = os.path.split(file_path)[-1] file.name = file.filename shutil.copyfileobj(filedata.file, file_object) filedata.file.close() file_object.close() file.size = os.path.getsize(file_path) file = add_standard_metadata(file, data) return file
def create_new_phonology(data): """Create a new phonology. :param dict data: the data for the phonology to be created. :returns: an SQLAlchemy model object representing the phonology. """ phonology = Phonology( parent_directory = h.get_OLD_directory_path('phonologies', config=config), word_boundary_symbol = h.word_boundary_symbol, UUID = unicode(uuid4()), name = h.normalize(data['name']), description = h.normalize(data['description']), script = h.normalize(data['script']).replace(u'\r', u''), # normalize or not? enterer = session['user'], modifier = session['user'], datetime_modified = h.now(), datetime_entered = h.now() ) return phonology
def create_new_morphological_parser(data): """Create a new morphological parser. :param dict data: the data for the morphological parser to be created. :returns: an SQLAlchemy model object representing the morphological parser. """ morphological_parser = MorphologicalParser( parent_directory = h.get_OLD_directory_path('morphologicalparsers', config=config), UUID = unicode(uuid4()), name = h.normalize(data['name']), description = h.normalize(data['description']), enterer = session['user'], modifier = session['user'], datetime_modified = h.now(), datetime_entered = h.now(), phonology = data['phonology'], morphology = data['morphology'], language_model = data['language_model'] ) return morphological_parser
def create_new_morphological_parser(data): """Create a new morphological parser. :param dict data: the data for the morphological parser to be created. :returns: an SQLAlchemy model object representing the morphological parser. """ morphological_parser = MorphologicalParser( parent_directory=h.get_OLD_directory_path('morphologicalparsers', config=config), UUID=unicode(uuid4()), name=h.normalize(data['name']), description=h.normalize(data['description']), enterer=session['user'], modifier=session['user'], datetime_modified=h.now(), datetime_entered=h.now(), phonology=data['phonology'], morphology=data['morphology'], language_model=data['language_model']) return morphological_parser
def create_base64_file(data): """Create a local file using data from a ``Content-Type: application/json`` request. :param dict data: the data to create the file model. :param str data['base64_encoded_file']: Base64-encoded file data. :returns: an SQLAlchemy model object representing the file. """ data['MIME_type'] = u'' # during validation, the schema will set a proper value based on the base64_encoded_file or filename attribute schema = FileCreateWithBase64EncodedFiledataSchema() state = h.State() state.full_dict = data state.user = session['user'] data = schema.to_python(data, state) file = File() file.MIME_type = data['MIME_type'] file.filename = h.normalize(data['filename']) file = add_standard_metadata(file, data) # Write the file to disk (making sure it's unique and thereby potentially) # modifying file.filename; and calculate file.size. file_data = data['base64_encoded_file'] # base64-decoded during validation files_path = h.get_OLD_directory_path('files', config=config) file_path = os.path.join(files_path, file.filename) file_object, file_path = get_unique_file_path(file_path) file.filename = os.path.split(file_path)[-1] file.name = file.filename file_object.write(file_data) file_object.close() file_data = None file.size = os.path.getsize(file_path) file = restrict_file_by_forms(file) return file
def get_corpus_dir_path(corpus): return os.path.join(h.get_OLD_directory_path('corpora', config=config), 'corpus_%d' % corpus.id)
def __setattrs__(self): self.extra_environ_view = {'test.authentication.role': u'viewer'} self.extra_environ_contrib = { 'test.authentication.role': u'contributor' } self.extra_environ_admin = { 'test.authentication.role': u'administrator' } self.extra_environ_view_appset = { 'test.authentication.role': u'viewer', 'test.application_settings': True } self.extra_environ_contrib_appset = { 'test.authentication.role': u'contributor', 'test.application_settings': True } self.extra_environ_admin_appset = { 'test.authentication.role': u'administrator', 'test.application_settings': True } self.json_headers = {'Content-Type': 'application/json'} config = self.config = appconfig('config:test.ini', relative_to='.') self.here = config['here'] self.files_path = h.get_OLD_directory_path('files', config=config) self.reduced_files_path = h.get_OLD_directory_path('reduced_files', config=config) self.test_files_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'files') self.create_reduced_size_file_copies = asbool( config.get('create_reduced_size_file_copies', False)) self.preferred_lossy_audio_format = config.get( 'preferred_lossy_audio_format', 'ogg') self.corpora_path = h.get_OLD_directory_path('corpora', config=config) self.test_datasets_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'datasets') self.test_scripts_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'scripts') self.loremipsum100_path = os.path.join(self.test_datasets_path, 'loremipsum_100.txt') self.loremipsum1000_path = os.path.join(self.test_datasets_path, 'loremipsum_1000.txt') self.loremipsum10000_path = os.path.join(self.test_datasets_path, 'loremipsum_10000.txt') self.users_path = h.get_OLD_directory_path('users', config=config) self.morphologies_path = h.get_OLD_directory_path('morphologies', config=config) self.morphological_parsers_path = h.get_OLD_directory_path( 'morphological_parsers', config=config) self.phonologies_path = h.get_OLD_directory_path('phonologies', config=config) self.morpheme_language_models_path = h.get_OLD_directory_path( 'morpheme_language_models', config=config) self.test_phonologies_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'phonologies') self.test_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology.script') self.test_malformed_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_malformed.script') self.test_phonology_no_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_malformed.script') self.test_medium_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_medium.script') self.test_large_phonology_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_large.script') self.test_phonology_testless_script_path = os.path.join( self.test_phonologies_path, 'test_phonology_no_tests.script') self.test_morphologies_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'morphologies') self.test_morphophonologies_path = os.path.join( self.here, 'onlinelinguisticdatabase', 'tests', 'data', 'morphophonologies')