Пример #1
0
def create_new_morpheme_language_model(data):
    """Create a new morpheme language model.

    :param dict data: the data for the morpheme language model to be created.
    :returns: an SQLAlchemy model object representing the morpheme language model.

    """
    morpheme_language_model = MorphemeLanguageModel(
        parent_directory = h.get_OLD_directory_path('morphemelanguagemodels', config=config),
        rare_delimiter = h.rare_delimiter,
        start_symbol = h.lm_start,
        end_symbol = h.lm_end,
        morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'),
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now(),
        vocabulary_morphology = data['vocabulary_morphology'],
        corpus = data['corpus'],
        toolkit = data['toolkit'],
        order = data['order'],
        smoothing = data['smoothing'],
        categorial = data['categorial']
    )
    return morpheme_language_model
Пример #2
0
def create_new_morpheme_language_model(data):
    """Create a new morpheme language model.

    :param dict data: the data for the morpheme language model to be created.
    :returns: an SQLAlchemy model object representing the morpheme language model.

    """
    morpheme_language_model = MorphemeLanguageModel(
        parent_directory=h.get_OLD_directory_path('morphemelanguagemodels',
                                                  config=config),
        rare_delimiter=h.rare_delimiter,
        start_symbol=h.lm_start,
        end_symbol=h.lm_end,
        morpheme_delimiters=h.get_morpheme_delimiters(type_=u'unicode'),
        UUID=unicode(uuid4()),
        name=h.normalize(data['name']),
        description=h.normalize(data['description']),
        enterer=session['user'],
        modifier=session['user'],
        datetime_modified=h.now(),
        datetime_entered=h.now(),
        vocabulary_morphology=data['vocabulary_morphology'],
        corpus=data['corpus'],
        toolkit=data['toolkit'],
        order=data['order'],
        smoothing=data['smoothing'],
        categorial=data['categorial'])
    return morpheme_language_model
Пример #3
0
def update_morpheme_language_model(morpheme_language_model, data):
    """Update a morpheme language model.

    :param morpheme_language_model: the morpheme language model model to be updated.
    :param dict data: representation of the updated morpheme language model.
    :returns: the updated morpheme language model model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = morpheme_language_model.set_attr('name', h.normalize(data['name']), changed)
    changed = morpheme_language_model.set_attr('description', h.normalize(data['description']), changed)
    changed = morpheme_language_model.set_attr('vocabulary_morphology', data['vocabulary_morphology'], changed)
    changed = morpheme_language_model.set_attr('corpus', data['corpus'], changed)
    changed = morpheme_language_model.set_attr('toolkit', data['toolkit'], changed)
    changed = morpheme_language_model.set_attr('order', data['order'], changed)
    changed = morpheme_language_model.set_attr('smoothing', data['smoothing'], changed)
    changed = morpheme_language_model.set_attr('categorial', data['categorial'], changed)
    changed = morpheme_language_model.set_attr('rare_delimiter', h.rare_delimiter, changed)
    changed = morpheme_language_model.set_attr('start_symbol', h.lm_start, changed)
    changed = morpheme_language_model.set_attr('end_symbol', h.lm_end, changed)
    if changed:
        session['user'] = Session.merge(session['user'])
        morpheme_language_model.modifier = session['user']
        morpheme_language_model.datetime_modified = h.now()
        return morpheme_language_model
    return changed
Пример #4
0
def update_corpus(corpus, data):
    """Update a corpus.

    :param corpus: the corpus model to be updated.
    :param dict data: representation of the updated corpus.
    :returns: the updated corpus model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    # Unicode Data
    changed = corpus.set_attr('name', h.normalize(data['name']), changed)
    changed = corpus.set_attr('description', h.normalize(data['description']), changed)
    changed = corpus.set_attr('content', data['content'], changed)
    changed = corpus.set_attr('form_search', data['form_search'], changed)

    tags_to_add = [t for t in data['tags'] if t]
    forms_to_add = [f for f in data['forms'] if f]
    if set(tags_to_add) != set(corpus.tags):
        corpus.tags = tags_to_add
        changed = True
    if set(forms_to_add) != set(corpus.forms):
        corpus.forms = forms_to_add
        changed = True

    if changed:
        session['user'] = Session.merge(session['user'])
        corpus.modifier = session['user']
        corpus.datetime_modified = h.now()
        return corpus
    return changed
Пример #5
0
def create_new_morphology(data):
    """Create a new morphology.

    :param dict data: the data for the morphology to be created.
    :returns: an SQLAlchemy model object representing the morphology.

    """
    morphology = Morphology(
        parent_directory = h.get_OLD_directory_path('morphologies', config=config),
        word_boundary_symbol = h.word_boundary_symbol,
        morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'),
        rare_delimiter = h.rare_delimiter,
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now(),
        lexicon_corpus = data['lexicon_corpus'],
        rules_corpus = data['rules_corpus'],
        script_type = data['script_type'],
        extract_morphemes_from_rules_corpus = data['extract_morphemes_from_rules_corpus'],
        rules = data['rules'],
        rich_upper = data['rich_upper'],
        rich_lower = data['rich_lower'],
        include_unknowns = data['include_unknowns']
    )
    return morphology
Пример #6
0
def create_new_corpus(data):
    """Create a new corpus.

    :param dict data: the data for the corpus to be created.
    :returns: an SQLAlchemy model object representing the corpus.

    .. note::
    
        I have opted not to complicate corpora by giving meaning to the
        "restricted" tag where they are concerned.  Given that a corpus' forms
        can be determined by a form search model and are therefore variable, it
        does not seem practical to toggle restricted status based on the status
        of any number of forms.  The corpus files that may be associated to a
        corpus by requesting ``PUT /corpora/id/writetofile`` may, however, be
        restricted if a restricted form is written to file.

    """
    corpus = Corpus()
    corpus.UUID = unicode(uuid4())
    corpus.name = h.normalize(data['name'])
    corpus.description = h.normalize(data['description'])
    corpus.content = data['content']
    corpus.form_search = data['form_search']
    corpus.forms = data['forms']
    corpus.tags = data['tags']
    corpus.enterer = corpus.modifier = session['user']
    corpus.datetime_modified = corpus.datetime_entered = h.now()
    return corpus
Пример #7
0
def update_morphological_parser(morphological_parser, data):
    """Update a morphological parser.

    :param morphological_parser: the morphological parser model to be updated.
    :param dict data: representation of the updated morphological parser.
    :returns: the updated morphological parser model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = morphological_parser.set_attr('name', h.normalize(data['name']),
                                            changed)
    changed = morphological_parser.set_attr('description',
                                            h.normalize(data['description']),
                                            changed)
    changed = morphological_parser.set_attr('phonology', data['phonology'],
                                            changed)
    changed = morphological_parser.set_attr('morphology', data['morphology'],
                                            changed)
    changed = morphological_parser.set_attr('language_model',
                                            data['language_model'], changed)
    if changed:
        session['user'] = Session.merge(session['user'])
        morphological_parser.modifier = session['user']
        morphological_parser.datetime_modified = h.now()
        return morphological_parser
    return changed
Пример #8
0
def update_morphology(morphology, data):
    """Update a morphology.

    :param morphology: the morphology model to be updated.
    :param dict data: representation of the updated morphology.
    :returns: the updated morphology model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = morphology.set_attr('name', h.normalize(data['name']), changed)
    changed = morphology.set_attr('description', h.normalize(data['description']), changed)
    changed = morphology.set_attr('lexicon_corpus', data['lexicon_corpus'], changed)
    changed = morphology.set_attr('rules_corpus', data['rules_corpus'], changed)
    changed = morphology.set_attr('script_type', data['script_type'], changed)
    changed = morphology.set_attr('extract_morphemes_from_rules_corpus', data['extract_morphemes_from_rules_corpus'], changed)
    changed = morphology.set_attr('rules', data['rules'], changed)
    changed = morphology.set_attr('rich_upper', data['rich_upper'], changed)
    changed = morphology.set_attr('rich_lower', data['rich_lower'], changed)
    changed = morphology.set_attr('include_unknowns', data['include_unknowns'], changed)
    changed = morphology.set_attr('rare_delimiter', h.rare_delimiter, changed)
    changed = morphology.set_attr('word_boundary_symbol', h.word_boundary_symbol, changed)
    if changed:
        session['user'] = Session.merge(session['user'])
        morphology.modifier = session['user']
        morphology.datetime_modified = h.now()
        return morphology
    return changed
Пример #9
0
def update_speaker(speaker, data):
    """Update a speaker.

    :param speaker: the speaker model to be updated.
    :param dict data: representation of the updated speaker.
    :returns: the updated speaker model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False

    # Unicode Data
    changed = speaker.set_attr('first_name', h.normalize(data['first_name']), changed)
    changed = speaker.set_attr('last_name', h.normalize(data['last_name']), changed)
    changed = speaker.set_attr('dialect', h.normalize(data['dialect']), changed)
    changed = speaker.set_attr('page_content', h.normalize(data['page_content']), changed)
    changed = speaker.set_attr('markup_language', h.normalize(data['markup_language']), changed)
    changed = speaker.set_attr('html',
                        h.get_HTML_from_contents(speaker.page_content, speaker.markup_language),
                        changed)

    if changed:
        speaker.datetime_modified = datetime.datetime.utcnow()
        return speaker
    return changed
Пример #10
0
def create_new_tag(data):
    """Create a new tag.

    :param dict data: the data for the tag to be created.
    :returns: an SQLAlchemy model object representing the tag.

    """
    tag = Tag()
    tag.name = h.normalize(data['name'])
    tag.description = h.normalize(data['description'])
    tag.datetime_modified = datetime.datetime.utcnow()
    return tag
Пример #11
0
def create_new_tag(data):
    """Create a new tag.

    :param dict data: the data for the tag to be created.
    :returns: an SQLAlchemy model object representing the tag.

    """
    tag = Tag()
    tag.name = h.normalize(data['name'])
    tag.description = h.normalize(data['description'])
    tag.datetime_modified = datetime.datetime.utcnow()
    return tag
Пример #12
0
def create_new_elicitation_method(data):
    """Create a new elicitation method.

    :param dict data: the elicitation method to be created.
    :returns: an SQLAlchemy model object representing the elicitation method.

    """
    elicitation_method = ElicitationMethod()
    elicitation_method.name = h.normalize(data['name'])
    elicitation_method.description = h.normalize(data['description'])
    elicitation_method.datetime_modified = datetime.datetime.utcnow()
    return elicitation_method
Пример #13
0
def create_new_elicitation_method(data):
    """Create a new elicitation method.

    :param dict data: the elicitation method to be created.
    :returns: an SQLAlchemy model object representing the elicitation method.

    """
    elicitation_method = ElicitationMethod()
    elicitation_method.name = h.normalize(data['name'])
    elicitation_method.description = h.normalize(data['description'])
    elicitation_method.datetime_modified = datetime.datetime.utcnow()
    return elicitation_method
Пример #14
0
def create_new_syntactic_category(data):
    """Create a new syntactic category.

    :param dict data: the data for the syntactic category to be created.
    :returns: an SQLAlchemy model object representing the syntactic category.

    """
    syntactic_category = SyntacticCategory()
    syntactic_category.name = h.normalize(data['name'])
    syntactic_category.type = data['type']
    syntactic_category.description = h.normalize(data['description'])
    syntactic_category.datetime_modified = datetime.datetime.utcnow()
    return syntactic_category
Пример #15
0
def create_new_syntactic_category(data):
    """Create a new syntactic category.

    :param dict data: the data for the syntactic category to be created.
    :returns: an SQLAlchemy model object representing the syntactic category.

    """
    syntactic_category = SyntacticCategory()
    syntactic_category.name = h.normalize(data["name"])
    syntactic_category.type = data["type"]
    syntactic_category.description = h.normalize(data["description"])
    syntactic_category.datetime_modified = datetime.datetime.utcnow()
    return syntactic_category
Пример #16
0
def create_new_user(data):
    """Create a new user.

    :param dict data: the data for the user to be created.
    :returns: an SQLAlchemy model object representing the user.

    """
    user = User()
    user.salt = h.generate_salt()
    user.password = unicode(h.encrypt_password(data['password'], str(user.salt)))
    user.username = h.normalize(data['username'])
    user.first_name = h.normalize(data['first_name'])
    user.last_name = h.normalize(data['last_name'])
    user.email = h.normalize(data['email'])
    user.affiliation = h.normalize(data['affiliation'])
    user.role = h.normalize(data['role'])
    user.markup_language = h.normalize(data['markup_language'])
    user.page_content = h.normalize(data['page_content'])
    user.html = h.get_HTML_from_contents(user.page_content, user.markup_language)

    # Many-to-One Data: input and output orthographies
    if data['input_orthography']:
        user.input_orthography= data['input_orthography']
    if data['output_orthography']:
        user.output_orthography = data['output_orthography']

    # OLD-generated Data
    user.datetime_modified = datetime.datetime.utcnow()

    # Create the user's directory
    h.create_user_directory(user)

    return user
Пример #17
0
def create_new_orthography(data):
    """Create a new orthography.

    :param dict data: the data for the orthography to be created.
    :returns: an SQLAlchemy model object representing the orthography.

    """
    orthography = Orthography()
    orthography.name = h.normalize(data['name'])
    orthography.orthography = h.normalize(data['orthography'])
    orthography.lowercase = data['lowercase']
    orthography.initial_glottal_stops = data['initial_glottal_stops']
    orthography.datetime_modified = datetime.datetime.utcnow()
    return orthography
Пример #18
0
def create_new_user(data):
    """Create a new user.

    :param dict data: the data for the user to be created.
    :returns: an SQLAlchemy model object representing the user.

    """
    user = User()
    user.salt = h.generate_salt()
    user.password = unicode(
        h.encrypt_password(data['password'], str(user.salt)))
    user.username = h.normalize(data['username'])
    user.first_name = h.normalize(data['first_name'])
    user.last_name = h.normalize(data['last_name'])
    user.email = h.normalize(data['email'])
    user.affiliation = h.normalize(data['affiliation'])
    user.role = h.normalize(data['role'])
    user.markup_language = h.normalize(data['markup_language'])
    user.page_content = h.normalize(data['page_content'])
    user.html = h.get_HTML_from_contents(user.page_content,
                                         user.markup_language)

    # Many-to-One Data: input and output orthographies
    if data['input_orthography']:
        user.input_orthography = data['input_orthography']
    if data['output_orthography']:
        user.output_orthography = data['output_orthography']

    # OLD-generated Data
    user.datetime_modified = datetime.datetime.utcnow()

    # Create the user's directory
    h.create_user_directory(user)

    return user
Пример #19
0
def create_new_orthography(data):
    """Create a new orthography.

    :param dict data: the data for the orthography to be created.
    :returns: an SQLAlchemy model object representing the orthography.

    """
    orthography = Orthography()
    orthography.name = h.normalize(data['name'])
    orthography.orthography = h.normalize(data['orthography'])
    orthography.lowercase = data['lowercase']
    orthography.initial_glottal_stops = data['initial_glottal_stops']
    orthography.datetime_modified = datetime.datetime.utcnow()
    return orthography
Пример #20
0
def create_new_page(data):
    """Create a new page.

    :param dict data: the data for the page to be created.
    :returns: an SQLAlchemy model object representing the page.

    """
    page = Page()
    page.name = h.normalize(data['name'])
    page.heading = h.normalize(data['heading'])
    page.markup_language = data['markup_language']
    page.content = h.normalize(data['content'])
    page.html = h.get_HTML_from_contents(page.content, page.markup_language)
    page.datetime_modified = datetime.datetime.utcnow()
    return page
Пример #21
0
def create_new_form_search(data):
    """Create a new form search.

    :param dict data: the form search to be created.
    :returns: an form search model object.

    """

    form_search = FormSearch()
    form_search.name = h.normalize(data['name'])
    form_search.search = data['search']      # Note that this is purposefully not normalized (reconsider this? ...)
    form_search.description = h.normalize(data['description'])
    form_search.enterer = session['user']
    form_search.datetime_modified = datetime.datetime.utcnow()
    return form_search
Пример #22
0
def create_new_page(data):
    """Create a new page.

    :param dict data: the data for the page to be created.
    :returns: an SQLAlchemy model object representing the page.

    """
    page = Page()
    page.name = h.normalize(data['name'])
    page.heading = h.normalize(data['heading'])
    page.markup_language = data['markup_language']
    page.content = h.normalize(data['content'])
    page.html = h.get_HTML_from_contents(page.content, page.markup_language)
    page.datetime_modified = datetime.datetime.utcnow()
    return page
Пример #23
0
    def apply(self, id, direction):
        """Call foma apply in the direction of ``direction`` on the input in the request body using a morphology.

        :param str id: the ``id`` value of the morphology that will be used.
        :param str direction: the direction of foma application.
        :Request body: JSON object of the form ``{'transcriptions': [t1, t2, ...]}``.
        :returns: if the morphology exists and foma is installed, a JSON object
            of the form ``{t1: [p1t1, p2t1, ...], ...}`` where ``t1`` is a
            transcription from the request body and ``p1t1``, ``p2t1``, etc. are
            outputs of ``t1`` after apply up/down.

        """
        morphology = Session.query(Morphology).get(id)
        if morphology:
            if h.foma_installed():
                morphology_binary_path = morphology.get_file_path('binary')
                if os.path.isfile(morphology_binary_path):
                    try:
                        inputs = json.loads(unicode(request.body, request.charset))
                        inputs = MorphemeSequencesSchema.to_python(inputs)
                        inputs = [h.normalize(i) for i in inputs['morpheme_sequences']]
                        return morphology.apply(direction, inputs)
                    except h.JSONDecodeError:
                        response.status_int = 400
                        return h.JSONDecodeErrorResponse
                    except Invalid, e:
                        response.status_int = 400
                        return {'errors': e.unpack_errors()}
                else:
                    response.status_int = 400
                    return {'error': 'Morphology %d has not been compiled yet.' % morphology.id}
            else:
                response.status_int = 400
                return {'error': 'Foma and flookup are not installed.'}
Пример #24
0
def create_new_speaker(data):
    """Create a new speaker.

    :param dict data: the data for the speaker to be created.
    :returns: an SQLAlchemy model object representing the speaker.

    """
    speaker = Speaker()
    speaker.first_name = h.normalize(data['first_name'])
    speaker.last_name = h.normalize(data['last_name'])
    speaker.dialect = h.normalize(data['dialect'])
    speaker.page_content = h.normalize(data['page_content'])
    speaker.datetime_modified = datetime.datetime.utcnow()
    speaker.markup_language = h.normalize(data['markup_language'])
    speaker.html = h.get_HTML_from_contents(speaker.page_content, speaker.markup_language)
    return speaker
Пример #25
0
def create_new_form_search(data):
    """Create a new form search.

    :param dict data: the form search to be created.
    :returns: an form search model object.

    """

    form_search = FormSearch()
    form_search.name = h.normalize(data['name'])
    form_search.search = data[
        'search']  # Note that this is purposefully not normalized (reconsider this? ...)
    form_search.description = h.normalize(data['description'])
    form_search.enterer = session['user']
    form_search.datetime_modified = datetime.datetime.utcnow()
    return form_search
Пример #26
0
    def get_probabilities(self, id):
        """Return the probability of each sequence of morphemes passed in the JSON PUT params.

        :param list morpheme_sequences: space-delimited morphemes in form|gloss|category
            format wherer "|" is actually ``h.rare_delimiter``.
        :returns: a dictionary with morpheme sequences as keys and log probabilities as values.

        """
        lm = Session.query(MorphemeLanguageModel).get(id)
        if lm:
            try:
                schema = MorphemeSequencesSchema()
                values = json.loads(unicode(request.body, request.charset))
                data = schema.to_python(values)
                morpheme_sequences = [h.normalize(ms) for ms in data['morpheme_sequences']]
                return lm.get_probabilities(morpheme_sequences)
            except h.JSONDecodeError:
                response.status_int = 400
                return h.JSONDecodeErrorResponse
            except Invalid, e:
                response.status_int = 400
                return {'errors': e.unpack_errors()}
            except Exception:
                response.status_int = 400
                return {'error': 'An error occurred while trying to generate probabilities.'}
Пример #27
0
def update_subinterval_referencing_file(file):
    """Update a subinterval-referencing file model.

    :param file: a file model object to update.
    :param request.body: a JSON object containing the data for updating the file.
    :returns: the file model or, if the file has not been updated, ``False``.

    """
    changed = False
    schema = FileSubintervalReferencingSchema()
    data = json.loads(unicode(request.body, request.charset))
    data['name'] = data.get('name') or u''
    state = h.State()
    state.full_dict = data
    state.user = session['user']
    data = schema.to_python(data, state)

    # Data unique to referencing subinterval files
    changed = file.set_attr('parent_file', data['parent_file'], changed)
    changed = file.set_attr('name', (h.normalize(data['name']) or file.parent_file.filename), changed)
    changed = file.set_attr('start', data['start'], changed)
    changed = file.set_attr('end', data['end'], changed)

    file, changed = update_standard_metadata(file, data, changed)

    if changed:
        file.datetime_modified = datetime.datetime.utcnow()
        return file
    return changed
Пример #28
0
def update_elicitation_method(elicitation_method, data):
    """Update an elicitation method.

    :param elicitation_method: the elicitation method model to be updated.
    :param dict data: representation of the updated elicitation method.
    :returns: the updated elicitation method model or, if ``changed`` has not
        been set to ``True``, ``False``.

    """
    changed = False
    changed = elicitation_method.set_attr('name', h.normalize(data['name']), changed)
    changed = elicitation_method.set_attr('description', h.normalize(data['description']), changed)
    if changed:
        elicitation_method.datetime_modified = datetime.datetime.utcnow()
        return elicitation_method
    return changed
Пример #29
0
    def applydown(self, id):
        """Apply-down (i.e., phonologize) the input in the request body using a phonology.

        :URL: ``PUT /phonologies/applydown/id`` (or ``PUT /phonologies/phonologize/id``)
        :param str id: the ``id`` value of the phonology that will be used.
        :Request body: JSON object of the form ``{'transcriptions': [t1, t2, ...]}``.
        :returns: if the phonology exists and foma is installed, a JSON object
            of the form ``{t1: [p1t1, p2t1, ...], ...}`` where ``t1`` is a
            transcription from the request body and ``p1t1``, ``p2t1``, etc. are
            phonologized outputs of ``t1``.

        """
        phonology = Session.query(Phonology).get(id)
        if phonology:
            if h.foma_installed():
                binary_path = phonology.get_file_path('binary')
                if os.path.isfile(binary_path):
                    try:
                        inputs = json.loads(unicode(request.body, request.charset))
                        inputs = MorphophonemicTranscriptionsSchema.to_python(inputs)
                        inputs = [h.normalize(i) for i in inputs['transcriptions']]
                        return phonology.applydown(inputs)
                    except h.JSONDecodeError:
                        response.status_int = 400
                        return h.JSONDecodeErrorResponse
                    except Invalid, e:
                        response.status_int = 400
                        return {'errors': e.unpack_errors()}
                else:
                    response.status_int = 400
                    return {'error': 'Phonology %d has not been compiled yet.' % phonology.id}
            else:
                response.status_int = 400
                return {'error': 'Foma and flookup are not installed.'}
Пример #30
0
def update_externally_hosted_file(file):
    """Update an externally hosted file model.

    :param file: a file model object to update.
    :param request.body: a JSON object containing the data for updating the file.
    :returns: the file model or, if the file has not been updated, ``False``.

    """
    changed = False
    data = json.loads(unicode(request.body, request.charset))
    data['password'] = data.get('password') or u''
    data = FileExternallyHostedSchema().to_python(data)

    # Data unique to referencing subinterval files
    changed = file.set_attr('url', data['url'], changed)
    changed = file.set_attr('name', h.normalize(data['name']), changed)
    changed = file.set_attr('password', data['password'], changed)
    changed = file.set_attr('MIME_type', data['MIME_type'], changed)

    file, changed = update_standard_metadata(file, data, changed)

    if changed:
        file.datetime_modified = datetime.datetime.utcnow()
        return file
    return changed
Пример #31
0
    def get_probabilities(self, id):
        """Return the probability of each sequence of morphemes passed in the JSON PUT params.

        :param list morpheme_sequences: space-delimited morphemes in form|gloss|category
            format wherer "|" is actually ``h.rare_delimiter``.
        :returns: a dictionary with morpheme sequences as keys and log probabilities as values.

        """
        lm = Session.query(MorphemeLanguageModel).get(id)
        if lm:
            try:
                schema = MorphemeSequencesSchema()
                values = json.loads(unicode(request.body, request.charset))
                data = schema.to_python(values)
                morpheme_sequences = [
                    h.normalize(ms) for ms in data['morpheme_sequences']
                ]
                return lm.get_probabilities(morpheme_sequences)
            except h.JSONDecodeError:
                response.status_int = 400
                return h.JSONDecodeErrorResponse
            except Invalid, e:
                response.status_int = 400
                return {'errors': e.unpack_errors()}
            except Exception:
                response.status_int = 400
                return {
                    'error':
                    'An error occurred while trying to generate probabilities.'
                }
Пример #32
0
def create_subinterval_referencing_file(data):
    """Create a subinterval-referencing file.

    :param dict data: the data to create the file model.
    :param int data['parent_file']: the ``id`` value of an audio/video file model.
    :param float/int data['start']: the start of the interval in seconds.
    :param float/int data['end']: the end of the interval in seconds.
    :returns: an SQLAlchemy model object representing the file.

    A value for ``data['name']`` may also be supplied.

    """
    data['name'] = data.get('name') or u''
    schema = FileSubintervalReferencingSchema()
    state = h.State()
    state.full_dict = data
    state.user = session['user']
    data = schema.to_python(data, state)

    file = File()

    # Data unique to referencing subinterval files
    file.parent_file = data['parent_file']
    file.name = h.normalize(data['name']) or file.parent_file.filename   # Name defaults to the parent file's filename if nothing provided by user
    file.start = data['start']
    file.end = data['end']
    file.MIME_type = file.parent_file.MIME_type

    file = add_standard_metadata(file, data)
    file = restrict_file_by_forms(file)

    return file
Пример #33
0
def update_tag(tag, data):
    """Update a tag.

    :param tag: the tag model to be updated.
    :param dict data: representation of the updated tag.
    :returns: the updated tag model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = tag.set_attr('name', h.normalize(data['name']), changed)
    changed = tag.set_attr('description', h.normalize(data['description']), changed)
    if changed:
        tag.datetime_modified = datetime.datetime.utcnow()
        return tag
    return changed
Пример #34
0
def create_externally_hosted_file(data):
    """Create an externally hosted file.

    :param dict data: the data to create the file model.
    :param str data['url']: a valid URL where the file data are served.
    :returns: an SQLAlchemy model object representing the file.

    Optional keys of the data dictionary, not including the standard metadata
    ones, are ``name``, ``password`` and ``MIME_type``.
    
    """
    data['password'] = data.get('password') or u''
    schema = FileExternallyHostedSchema()
    data = schema.to_python(data)
    file = File()

    # User-inputted string data
    file.name = h.normalize(data['name'])
    file.password = data['password']
    file.MIME_type = data['MIME_type']
    file.url = data['url']

    file = add_standard_metadata(file, data)
    file = restrict_file_by_forms(file)
    return file
Пример #35
0
    def parse(self, id):
        """Parse the input word transcriptions using the morphological parser with id=``id``.

        :param str id: the ``id`` value of the morphological parser that will be used.
        :Request body: JSON object of the form ``{'transcriptions': [t1, t2, ...]}``.
        :returns: if the morphological parser exists and foma is installed, a JSON object
            of the form ``{t1: p1, t2: p2, ...}`` where ``t1`` and ``t2`` are transcriptions
            of words from the request body and ``p1`` and ``p2`` are the most probable morphological
            parsers of t1 and t2.

        """
        parser = Session.query(MorphologicalParser).get(id)
        if not parser:
            response.status_int = 404
            return {'error': 'There is no morphological parser with id %s' % id}
        if not h.foma_installed():
            response.status_int = 400
            return {'error': 'Foma and flookup are not installed.'}
        try:
            inputs = json.loads(unicode(request.body, request.charset))
            schema = TranscriptionsSchema
            inputs = schema.to_python(inputs)
            inputs = [h.normalize(w) for w in inputs['transcriptions']]
            parses = parser.parse(inputs)
            # TODO: allow for a param which causes the candidates to be
            # returned as well as/instead of only the most probable parse
            # candidate.
            return dict((transcription, parse) for transcription, (parse, candidates) in
                        parses.iteritems())
        except h.JSONDecodeError:
            response.status_int = 400
            return h.JSONDecodeErrorResponse
        except Invalid, e:
            response.status_int = 400
            return {'errors': e.unpack_errors()}
Пример #36
0
def add_standard_metadata(file, data):
    """Add the standard metadata to the file model using the data dictionary.
    
    :param file: file model object
    :param dict data: dictionary containing file attribute values.
    :returns: the updated file model object.
    
    """
    file.description = h.normalize(data['description'])
    file.utterance_type = data['utterance_type']
    file.date_elicited = data['date_elicited']
    if data['elicitor']:
        file.elicitor = data['elicitor']
    if data['speaker']:
        file.speaker = data['speaker']
    file.tags = [t for t in data['tags'] if t]
    file.forms = [f for f in data['forms'] if f]
    now = h.now()
    file.datetime_entered = now
    file.datetime_modified = now
    # Because of SQLAlchemy's uniqueness constraints, we may need to set the
    # enterer to the elicitor.
    if data['elicitor'] and (data['elicitor'].id == session['user'].id):
        file.enterer = data['elicitor']
    else:
        file.enterer = session['user']
    return file
Пример #37
0
def create_new_keyboard(data):
    """Create a new keyboard.

    :param dict data: the data for the keyboard to be created.
    :returns: an SQLAlchemy model object representing the keyboard.

    """
    keyboard = Keyboard()
    keyboard.name = h.normalize(data['name'])
    keyboard.description = h.normalize(data['description'])
    keyboard.keyboard = h.normalize(data['keyboard'])

    # OLD-generated Data
    keyboard.datetime_entered = keyboard.datetime_modified = h.now()
    keyboard.enterer = keyboard.modifier = session['user']

    return keyboard
Пример #38
0
def update_tag(tag, data):
    """Update a tag.

    :param tag: the tag model to be updated.
    :param dict data: representation of the updated tag.
    :returns: the updated tag model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = tag.set_attr('name', h.normalize(data['name']), changed)
    changed = tag.set_attr('description', h.normalize(data['description']),
                           changed)
    if changed:
        tag.datetime_modified = datetime.datetime.utcnow()
        return tag
    return changed
Пример #39
0
def create_new_keyboard(data):
    """Create a new keyboard.

    :param dict data: the data for the keyboard to be created.
    :returns: an SQLAlchemy model object representing the keyboard.

    """
    keyboard = Keyboard()
    keyboard.name = h.normalize(data['name'])
    keyboard.description = h.normalize(data['description'])
    keyboard.keyboard = h.normalize(data['keyboard'])

    # OLD-generated Data
    keyboard.datetime_entered = keyboard.datetime_modified = h.now()
    keyboard.enterer = keyboard.modifier = session['user']

    return keyboard
Пример #40
0
def update_orthography(orthography, data):
    """Update an orthography.

    :param orthography: the orthography model to be updated.
    :param dict data: representation of the updated orthography.
    :returns: the updated orthography model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = orthography.set_attr('name', h.normalize(data['name']), changed)
    changed = orthography.set_attr('orthography', h.normalize(data['orthography']), changed)
    changed = orthography.set_attr('lowercase', data['lowercase'], changed)
    changed = orthography.set_attr('initial_glottal_stops', data['initial_glottal_stops'], changed)
    if changed:
        orthography.datetime_modified = datetime.datetime.utcnow()
        return orthography
    return changed
Пример #41
0
def update_syntactic_category(syntactic_category, data):
    """Update a syntactic category.

    :param syntactic_category: the syntactic category model to be updated.
    :param dict data: representation of the updated syntactic category.
    :returns: the updated syntactic category model or, if ``changed`` has not
        been set to ``True``, ``False``.

    """
    changed = False
    # Unicode Data
    changed = syntactic_category.set_attr("name", h.normalize(data["name"]), changed)
    changed = syntactic_category.set_attr("type", h.normalize(data["type"]), changed)
    changed = syntactic_category.set_attr("description", h.normalize(data["description"]), changed)

    if changed:
        syntactic_category.datetime_modified = datetime.datetime.utcnow()
        return syntactic_category
    return changed
Пример #42
0
def update_elicitation_method(elicitation_method, data):
    """Update an elicitation method.

    :param elicitation_method: the elicitation method model to be updated.
    :param dict data: representation of the updated elicitation method.
    :returns: the updated elicitation method model or, if ``changed`` has not
        been set to ``True``, ``False``.

    """
    changed = False
    changed = elicitation_method.set_attr('name', h.normalize(data['name']),
                                          changed)
    changed = elicitation_method.set_attr('description',
                                          h.normalize(data['description']),
                                          changed)
    if changed:
        elicitation_method.datetime_modified = datetime.datetime.utcnow()
        return elicitation_method
    return changed
Пример #43
0
def update_application_settings(application_settings, data):
    """Update an application settings.

    :param application_settings: the application settings model to be updated.
    :param dict data: representation of the updated application settings.
    :returns: the updated application settings model or, if ``changed`` has not
        been set to ``True``, then ``False``.

    """
    changed = False

    # Unicode Data
    changed = application_settings.set_attr('object_language_name', data['object_language_name'], changed)
    changed = application_settings.set_attr('object_language_id', data['object_language_id'], changed)
    changed = application_settings.set_attr('metalanguage_name', data['metalanguage_name'], changed)
    changed = application_settings.set_attr('metalanguage_id', data['metalanguage_id'], changed)
    changed = application_settings.set_attr('metalanguage_inventory',
            h.normalize(h.remove_all_white_space(data['metalanguage_inventory'])), changed)
    changed = application_settings.set_attr('orthographic_validation',
            data['orthographic_validation'], changed)
    changed = application_settings.set_attr('narrow_phonetic_inventory',
            h.normalize(h.remove_all_white_space(data['narrow_phonetic_inventory'])), changed)
    changed = application_settings.set_attr('narrow_phonetic_validation',
            data['narrow_phonetic_validation'], changed)
    changed = application_settings.set_attr('broad_phonetic_inventory',
            h.normalize(h.remove_all_white_space(data['broad_phonetic_inventory'])), changed)
    changed = application_settings.set_attr('broad_phonetic_validation',
            data['broad_phonetic_validation'], changed)
    changed = application_settings.set_attr('morpheme_break_is_orthographic',
            data['morpheme_break_is_orthographic'], changed)
    changed = application_settings.set_attr('morpheme_break_validation',
            data['morpheme_break_validation'], changed)
    changed = application_settings.set_attr('phonemic_inventory',
            h.normalize(h.remove_all_white_space(data['phonemic_inventory'])), changed)
    changed = application_settings.set_attr('morpheme_delimiters',
            h.normalize(data['morpheme_delimiters']), changed)
    changed = application_settings.set_attr('punctuation',
            h.normalize(h.remove_all_white_space(data['punctuation'])), changed)
    changed = application_settings.set_attr('grammaticalities',
            h.normalize(h.remove_all_white_space(data['grammaticalities'])), changed)

    # Many-to-One
    changed = application_settings.set_attr('storage_orthography', data['storage_orthography'], changed)
    changed = application_settings.set_attr('input_orthography', data['input_orthography'], changed)
    changed = application_settings.set_attr('output_orthography', data['output_orthography'], changed)

    # Many-to-Many Data: unrestricted_users
    # First check if the user has made any changes. If there are changes, just
    # delete all and replace with new.
    unrestricted_users_to_add = [u for u in data['unrestricted_users'] if u]
    if set(unrestricted_users_to_add) != set(application_settings.unrestricted_users):
        application_settings.unrestricted_users = unrestricted_users_to_add
        changed = True

    if changed:
        application_settings.datetime_modified = datetime.datetime.utcnow()
        return application_settings
    return changed
Пример #44
0
def update_user(user, data):
    """Update a user.

    :param user: the user model to be updated.
    :param dict data: representation of the updated user.
    :returns: the updated user model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False

    # Unicode Data
    changed = user.set_attr('first_name', h.normalize(data['first_name']),
                            changed)
    changed = user.set_attr('last_name', h.normalize(data['last_name']),
                            changed)
    changed = user.set_attr('email', h.normalize(data['email']), changed)
    changed = user.set_attr('affiliation', h.normalize(data['affiliation']),
                            changed)
    changed = user.set_attr('role', h.normalize(data['role']), changed)
    changed = user.set_attr('page_content', h.normalize(data['page_content']),
                            changed)
    changed = user.set_attr('markup_language',
                            h.normalize(data['markup_language']), changed)
    changed = user.set_attr(
        'html',
        h.get_HTML_from_contents(user.page_content, user.markup_language),
        changed)

    # username and password need special treatment: a value of None means that
    # these should not be updated.
    if data['password'] is not None:
        changed = user.set_attr(
            'password',
            unicode(h.encrypt_password(data['password'], str(user.salt))),
            changed)
    if data['username'] is not None:
        username = h.normalize(data['username'])
        if username != user.username:
            h.rename_user_directory(user.username, username)
        changed = user.set_attr('username', username, changed)

    # Many-to-One Data
    changed = user.set_attr('input_orthography', data['input_orthography'],
                            changed)
    changed = user.set_attr('output_orthography', data['output_orthography'],
                            changed)

    if changed:
        user.datetime_modified = datetime.datetime.utcnow()
        return user
    return changed
Пример #45
0
def update_orthography(orthography, data):
    """Update an orthography.

    :param orthography: the orthography model to be updated.
    :param dict data: representation of the updated orthography.
    :returns: the updated orthography model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = orthography.set_attr('name', h.normalize(data['name']), changed)
    changed = orthography.set_attr('orthography',
                                   h.normalize(data['orthography']), changed)
    changed = orthography.set_attr('lowercase', data['lowercase'], changed)
    changed = orthography.set_attr('initial_glottal_stops',
                                   data['initial_glottal_stops'], changed)
    if changed:
        orthography.datetime_modified = datetime.datetime.utcnow()
        return orthography
    return changed
Пример #46
0
def create_new_phonology(data):
    """Create a new phonology.

    :param dict data: the data for the phonology to be created.
    :returns: an SQLAlchemy model object representing the phonology.

    """
    phonology = Phonology(
        parent_directory = h.get_OLD_directory_path('phonologies', config=config),
        word_boundary_symbol = h.word_boundary_symbol,
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        script = h.normalize(data['script']).replace(u'\r', u''),  # normalize or not?
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now()
    )
    return phonology
Пример #47
0
def update_keyboard(keyboard, data):
    """Update a keyboard.

    :param keyboard: the keyboard model to be updated.
    :param dict data: representation of the updated keyboard.
    :returns: the updated keyboard model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = keyboard.set_attr('name', h.normalize(data['name']), changed)
    changed = keyboard.set_attr('description',
                                h.normalize(data['description']), changed)
    changed = keyboard.set_attr('keyboard', h.normalize(data['keyboard']),
                                changed)
    if changed:
        keyboard.datetime_modified = h.now()
        session['user'] = Session.merge(session['user'])
        keyboard.modifier = session['user']
        return keyboard
    return changed
Пример #48
0
def create_new_morphological_parser(data):
    """Create a new morphological parser.

    :param dict data: the data for the morphological parser to be created.
    :returns: an SQLAlchemy model object representing the morphological parser.

    """
    morphological_parser = MorphologicalParser(
        parent_directory=h.get_OLD_directory_path('morphologicalparsers',
                                                  config=config),
        UUID=unicode(uuid4()),
        name=h.normalize(data['name']),
        description=h.normalize(data['description']),
        enterer=session['user'],
        modifier=session['user'],
        datetime_modified=h.now(),
        datetime_entered=h.now(),
        phonology=data['phonology'],
        morphology=data['morphology'],
        language_model=data['language_model'])
    return morphological_parser
Пример #49
0
def update_standard_metadata(file, data, changed):
    """Update the standard metadata attributes of the input file.
    
    :param file: a file model object to be updated.
    :param dict data: the data used to update the file model.
    :param bool changed: indicates whether the file has been changed.
    :returns: a tuple whose first element is the file model and whose second is
        the boolean ``changed``.

    """
    changed = file.set_attr('description', h.normalize(data['description']), changed)
    changed = file.set_attr('utterance_type', h.normalize(data['utterance_type']), changed)
    changed = file.set_attr('date_elicited', data['date_elicited'], changed)
    changed = file.set_attr('elicitor', data['elicitor'], changed)
    changed = file.set_attr('speaker', data['speaker'], changed)

    # Many-to-Many Data: tags & forms
    # Update only if the user has made changes.
    forms_to_add = [f for f in data['forms'] if f]
    tags_to_add = [t for t in data['tags'] if t]

    if set(forms_to_add) != set(file.forms):
        file.forms = forms_to_add
        changed = True

        # Cause the entire file to be tagged as restricted if any one of its
        # forms are so tagged.
        tags = [f.tags for f in file.forms]
        tags = [tag for tag_list in tags for tag in tag_list]
        restricted_tags = [tag for tag in tags if tag.name == u'restricted']
        if restricted_tags:
            restricted_tag = restricted_tags[0]
            if restricted_tag not in tags_to_add:
                tags_to_add.append(restricted_tag)

    if set(tags_to_add) != set(file.tags):
        file.tags = tags_to_add
        changed = True

    return file, changed
Пример #50
0
def update_form_search(form_search, data):
    """Update a form search model.

    :param form: the form search model to be updated.
    :param dict data: representation of the updated form search.
    :returns: the updated form search model or, if ``changed`` has not been set
        to ``True``, then ``False``.

    """

    changed = False

    # Unicode Data
    changed = form_search.set_attr('name', h.normalize(data['name']), changed)
    changed = form_search.set_attr('search', data['search'], changed)
    changed = form_search.set_attr('description',
                                   h.normalize(data['description']), changed)

    if changed:
        form_search.datetime_modified = datetime.datetime.utcnow()
        return form_search
    return changed
Пример #51
0
def update_phonology(phonology, data):
    """Update a phonology.

    :param page: the phonology model to be updated.
    :param dict data: representation of the updated phonology.
    :returns: the updated phonology model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    # Unicode Data
    changed = phonology.set_attr('name', h.normalize(data['name']), changed)
    changed = phonology.set_attr('description', h.normalize(data['description']), changed)
    changed = phonology.set_attr('script', h.normalize(data['script']), changed)
    changed = phonology.set_attr('word_boundary_symbol', h.word_boundary_symbol, changed)

    if changed:
        session['user'] = Session.merge(session['user'])
        phonology.modifier = session['user']
        phonology.datetime_modified = h.now()
        return phonology
    return changed
Пример #52
0
def create_new_application_settings(data):
    """Create a new application settings.

    :param dict data: the application settings to be created.
    :returns: an SQLAlchemy model object representing the application settings.

    """

    # Create the application_settings model object.
    application_settings = ApplicationSettings()
    application_settings.object_language_name = data['object_language_name']
    application_settings.object_language_id = data['object_language_id']
    application_settings.metalanguage_name = data['metalanguage_name']
    application_settings.metalanguage_id = data['metalanguage_id']
    application_settings.metalanguage_inventory = h.normalize(
        h.remove_all_white_space(data['metalanguage_inventory']))
    application_settings.orthographic_validation = data[
        'orthographic_validation']
    application_settings.narrow_phonetic_inventory = h.normalize(
        h.remove_all_white_space(data['narrow_phonetic_inventory']))
    application_settings.narrow_phonetic_validation = data[
        'narrow_phonetic_validation']
    application_settings.broad_phonetic_inventory = h.normalize(
        h.remove_all_white_space(data['broad_phonetic_inventory']))
    application_settings.broad_phonetic_validation = data[
        'broad_phonetic_validation']
    application_settings.morpheme_break_is_orthographic = data[
        'morpheme_break_is_orthographic']
    application_settings.morpheme_break_validation = data[
        'morpheme_break_validation']
    application_settings.phonemic_inventory = h.normalize(
        h.remove_all_white_space(data['phonemic_inventory']))
    application_settings.morpheme_delimiters = h.normalize(
        data['morpheme_delimiters'])
    application_settings.punctuation = h.normalize(
        h.remove_all_white_space(data['punctuation']))
    application_settings.grammaticalities = h.normalize(
        h.remove_all_white_space(data['grammaticalities']))

    # Many-to-One
    if data['storage_orthography']:
        application_settings.storage_orthography = data['storage_orthography']
    if data['input_orthography']:
        application_settings.input_orthography = data['input_orthography']
    if data['output_orthography']:
        application_settings.output_orthography = data['output_orthography']

    # Many-to-Many Data: unrestricted_users
    application_settings.unrestricted_users = [
        u for u in data['unrestricted_users'] if u
    ]

    return application_settings