示例#1
0
    def compile(self, id):
        """Compile the script of a phonology as a foma FST.

        :URL: ``PUT /phonologies/compile/id``
        :param str id: the ``id`` value of the phonology whose script will be compiled.
        :returns: if the phonology exists and foma is installed, the phonology
            model is returned;  ``GET /phonologies/id`` must be polled to
            determine when and how the compilation task has terminated.

        .. note::

            The script is compiled asynchronously in a worker thread.  See 
            :mod:`onlinelinguisticdatabase.lib.foma_worker`.

        """
        phonology = Session.query(Phonology).get(id)
        if phonology:
            if h.foma_installed():
                foma_worker_q.put({
                    'id': h.generate_salt(),
                    'func': 'compile_phonology',
                    'args': {
                        'phonology_id': phonology.id,
                        'user_id': session['user'].id,
                        'timeout': h.phonology_compile_timeout
                    }
                })
                return phonology
            else:
                response.status_int = 400
                return {'error': 'Foma and flookup are not installed.'}
        else:
            response.status_int = 404
            return {'error': 'There is no phonology with id %s' % id}
示例#2
0
文件: users.py 项目: jrwdunham/old
def create_new_user(data):
    """Create a new user.

    :param dict data: the data for the user to be created.
    :returns: an SQLAlchemy model object representing the user.

    """
    user = User()
    user.salt = h.generate_salt()
    user.password = unicode(h.encrypt_password(data['password'], str(user.salt)))
    user.username = h.normalize(data['username'])
    user.first_name = h.normalize(data['first_name'])
    user.last_name = h.normalize(data['last_name'])
    user.email = h.normalize(data['email'])
    user.affiliation = h.normalize(data['affiliation'])
    user.role = h.normalize(data['role'])
    user.markup_language = h.normalize(data['markup_language'])
    user.page_content = h.normalize(data['page_content'])
    user.html = h.get_HTML_from_contents(user.page_content, user.markup_language)

    # Many-to-One Data: input and output orthographies
    if data['input_orthography']:
        user.input_orthography= data['input_orthography']
    if data['output_orthography']:
        user.output_orthography = data['output_orthography']

    # OLD-generated Data
    user.datetime_modified = datetime.datetime.utcnow()

    # Create the user's directory
    h.create_user_directory(user)

    return user
示例#3
0
    def generate(self, id):
        """Generate the files that constitute the morpheme language model, crucially the file that holds the pickled LM trie.

        :URL: ``PUT /morpheme_language_model/id/generate``
        :param str id: the ``id`` value of the morpheme language model whose files will be generated.
        :returns: the morpheme language model is returned;  ``GET /morpheme_language_model/id`` must be polled to
            determine when the generation task has terminated.

        """
        lm = Session.query(MorphemeLanguageModel).get(id)
        if not lm:
            response.status_int = 404
            return {
                'error': 'There is no morpheme language model with id %s' % id
            }
        args = {
            'morpheme_language_model_id': lm.id,
            'user_id': session['user'].id,
            'timeout': h.morpheme_language_model_generate_timeout
        }
        foma_worker_q.put({
            'id': h.generate_salt(),
            'func': 'generate_language_model',
            'args': args
        })
        return lm
示例#4
0
def create_new_user(data):
    """Create a new user.

    :param dict data: the data for the user to be created.
    :returns: an SQLAlchemy model object representing the user.

    """
    user = User()
    user.salt = h.generate_salt()
    user.password = unicode(
        h.encrypt_password(data['password'], str(user.salt)))
    user.username = h.normalize(data['username'])
    user.first_name = h.normalize(data['first_name'])
    user.last_name = h.normalize(data['last_name'])
    user.email = h.normalize(data['email'])
    user.affiliation = h.normalize(data['affiliation'])
    user.role = h.normalize(data['role'])
    user.markup_language = h.normalize(data['markup_language'])
    user.page_content = h.normalize(data['page_content'])
    user.html = h.get_HTML_from_contents(user.page_content,
                                         user.markup_language)

    # Many-to-One Data: input and output orthographies
    if data['input_orthography']:
        user.input_orthography = data['input_orthography']
    if data['output_orthography']:
        user.output_orthography = data['output_orthography']

    # OLD-generated Data
    user.datetime_modified = datetime.datetime.utcnow()

    # Create the user's directory
    h.create_user_directory(user)

    return user
示例#5
0
def generate_and_compile_morphological_parser(morphological_parser_id, compile_=True):
    morphological_parser = Session.query(MorphologicalParser).get(morphological_parser_id)
    if not morphological_parser:
        response.status_int = 404
        return {'error': 'There is no morphological parser with id %s' % id}
    if compile_ and not h.foma_installed():
        response.status_int = 400
        return {'error': 'Foma and flookup are not installed.'}
    foma_worker_q.put({
        'id': h.generate_salt(),
        'func': 'generate_and_compile_parser',
        'args': {
            'morphological_parser_id': morphological_parser.id,
            'compile': compile_,
            'user_id': session['user'].id,
            'timeout': h.morphological_parser_compile_timeout
        }
    })
    return morphological_parser
示例#6
0
def generate_and_compile_morphological_parser(morphological_parser_id,
                                              compile_=True):
    morphological_parser = Session.query(MorphologicalParser).get(
        morphological_parser_id)
    if not morphological_parser:
        response.status_int = 404
        return {'error': 'There is no morphological parser with id %s' % id}
    if compile_ and not h.foma_installed():
        response.status_int = 400
        return {'error': 'Foma and flookup are not installed.'}
    foma_worker_q.put({
        'id': h.generate_salt(),
        'func': 'generate_and_compile_parser',
        'args': {
            'morphological_parser_id': morphological_parser.id,
            'compile': compile_,
            'user_id': session['user'].id,
            'timeout': h.morphological_parser_compile_timeout
        }
    })
    return morphological_parser
示例#7
0
    def compute_perplexity(self, id):
        """Compute the perplexity of the LM's corpus according to the LM.

        Randomly divide the corpus into training and test sets multiple times and compute
        the perplexity and return the average.  See ``evaluate_morpheme_language_model`` in lib/foma_worker.py.

        """
        lm = Session.query(MorphemeLanguageModel).get(id)
        if not lm:
            response.status_int = 404
            return {'error': 'There is no morpheme language model with id %s' % id}
        args = {
            'morpheme_language_model_id': lm.id,
            'user_id': session['user'].id,
            'timeout': h.morpheme_language_model_generate_timeout
        }
        foma_worker_q.put({
            'id': h.generate_salt(),
            'func': 'compute_perplexity',
            'args': args
        })
        return lm
示例#8
0
    def generate(self, id):
        """Generate the files that constitute the morpheme language model, crucially the file that holds the pickled LM trie.

        :URL: ``PUT /morpheme_language_model/id/generate``
        :param str id: the ``id`` value of the morpheme language model whose files will be generated.
        :returns: the morpheme language model is returned;  ``GET /morpheme_language_model/id`` must be polled to
            determine when the generation task has terminated.

        """
        lm = Session.query(MorphemeLanguageModel).get(id)
        if not lm:
            response.status_int = 404
            return {'error': 'There is no morpheme language model with id %s' % id}
        args = {
            'morpheme_language_model_id': lm.id,
            'user_id': session['user'].id,
            'timeout': h.morpheme_language_model_generate_timeout
        }
        foma_worker_q.put({
            'id': h.generate_salt(),
            'func': 'generate_language_model',
            'args': args
        })
        return lm
示例#9
0
    def compute_perplexity(self, id):
        """Compute the perplexity of the LM's corpus according to the LM.

        Randomly divide the corpus into training and test sets multiple times and compute
        the perplexity and return the average.  See ``evaluate_morpheme_language_model`` in lib/foma_worker.py.

        """
        lm = Session.query(MorphemeLanguageModel).get(id)
        if not lm:
            response.status_int = 404
            return {
                'error': 'There is no morpheme language model with id %s' % id
            }
        args = {
            'morpheme_language_model_id': lm.id,
            'user_id': session['user'].id,
            'timeout': h.morpheme_language_model_generate_timeout
        }
        foma_worker_q.put({
            'id': h.generate_salt(),
            'func': 'compute_perplexity',
            'args': args
        })
        return lm
示例#10
0
文件: corpora.py 项目: jrwdunham/old
    def tgrep2(self, id):
        """Search the corpus-as-treebank using Tgrep2.

        :URL: ``SEARCH/POST /corpora/id/tgrep2``.
        :Request body: JSON object with obligatory 'tgrep2pattern' attribute and
            optional 'paginator' and 'order_by' attributes.
        :param str id: the ``id`` value of the corpus.
        :returns: an array of forms as JSON objects

        """
        if not h.command_line_program_installed('tgrep2'):
            response.status_int = 400
            return {'error': 'TGrep2 is not installed.'}
        corpus = Session.query(Corpus).get(id)
        if corpus:
            try:
                treebank_corpus_file_object = filter(lambda cf: cf.format == u'treebank',
                        corpus.files)[0]
                corpus_dir_path = get_corpus_dir_path(corpus)
                tgrep2_corpus_file_path = os.path.join(corpus_dir_path,
                        '%s.t2c' % treebank_corpus_file_object.filename)
            except Exception:
                response.status_int = 400
                return {'error': 'Corpus %d has not been written to file as a treebank.'}
            if not os.path.exists(tgrep2_corpus_file_path):
                response.status_int = 400
                return {'error': 'Corpus %d has not been written to file as a treebank.'}
            #if not authorized_to_access_corpus_file(session['user'], treebank_corpus_file_object):
            #    response.status_int = 403
            #    return h.unauthorized_msg
            try:
                request_params = json.loads(unicode(request.body, request.charset))
                try:
                    tgrep2pattern = request_params['tgrep2pattern']
                    assert isinstance(tgrep2pattern, basestring)
                except Exception:
                    response.status_int = 400
                    return {'errors': {'tgrep2pattern':
                        'A tgrep2pattern attribute must be supplied and must have a unicode/string value'}}
                tmp_path = os.path.join(corpus_dir_path, '%s%s.txt' % (session['user'].username, h.generate_salt()))
                with open(os.devnull, "w") as fnull:
                    with open(tmp_path, 'w') as stdout:
                        # The -wu option causes TGrep2 to print only the root symbol of each matching tree
                        process = Popen(['tgrep2', '-c', tgrep2_corpus_file_path, '-wu', tgrep2pattern],
                            stdout=stdout, stderr=fnull)
                        process.communicate()
                match_ids = filter(None, map(get_form_ids_from_tgrep2_output_line, open(tmp_path, 'r')))
                os.remove(tmp_path)
                if match_ids:
                    query = h.eagerload_form(Session.query(Form)).filter(Form.id.in_(match_ids))
                    query = h.filter_restricted_models('Form', query)
                    query = h.add_order_by(query, request_params.get('order_by'), self.query_builder)
                    result = h.add_pagination(query, request_params.get('paginator'))
                elif request_params.get('paginator'):
                    paginator = request_params['paginator']
                    paginator['count'] = 0
                    result = {'paginator': paginator, 'items': []}
                else:
                    result = []
                return result
            except h.JSONDecodeError:
                response.status_int = 400
                return h.JSONDecodeErrorResponse
            except Invalid, e:
                response.status_int = 400
                return {'errors': e.unpack_errors()}
            except Exception, e:
                response.status_int = 400
                return {'error': 'Unable to perform TGrep2 search: %s.' % e}
示例#11
0
    def tgrep2(self, id):
        """Search the corpus-as-treebank using Tgrep2.

        :URL: ``SEARCH/POST /corpora/id/tgrep2``.
        :Request body: JSON object with obligatory 'tgrep2pattern' attribute and
            optional 'paginator' and 'order_by' attributes.
        :param str id: the ``id`` value of the corpus.
        :returns: an array of forms as JSON objects

        """
        if not h.command_line_program_installed('tgrep2'):
            response.status_int = 400
            return {'error': 'TGrep2 is not installed.'}
        corpus = Session.query(Corpus).get(id)
        if corpus:
            try:
                treebank_corpus_file_object = filter(
                    lambda cf: cf.format == u'treebank', corpus.files)[0]
                corpus_dir_path = get_corpus_dir_path(corpus)
                tgrep2_corpus_file_path = os.path.join(
                    corpus_dir_path,
                    '%s.t2c' % treebank_corpus_file_object.filename)
            except Exception:
                response.status_int = 400
                return {
                    'error':
                    'Corpus %d has not been written to file as a treebank.'
                }
            if not os.path.exists(tgrep2_corpus_file_path):
                response.status_int = 400
                return {
                    'error':
                    'Corpus %d has not been written to file as a treebank.'
                }
            #if not authorized_to_access_corpus_file(session['user'], treebank_corpus_file_object):
            #    response.status_int = 403
            #    return h.unauthorized_msg
            try:
                request_params = json.loads(
                    unicode(request.body, request.charset))
                try:
                    tgrep2pattern = request_params['tgrep2pattern']
                    assert isinstance(tgrep2pattern, basestring)
                except Exception:
                    response.status_int = 400
                    return {
                        'errors': {
                            'tgrep2pattern':
                            'A tgrep2pattern attribute must be supplied and must have a unicode/string value'
                        }
                    }
                tmp_path = os.path.join(
                    corpus_dir_path,
                    '%s%s.txt' % (session['user'].username, h.generate_salt()))
                with open(os.devnull, "w") as fnull:
                    with open(tmp_path, 'w') as stdout:
                        # The -wu option causes TGrep2 to print only the root symbol of each matching tree
                        process = Popen([
                            'tgrep2', '-c', tgrep2_corpus_file_path, '-wu',
                            tgrep2pattern
                        ],
                                        stdout=stdout,
                                        stderr=fnull)
                        process.communicate()
                match_ids = filter(
                    None,
                    map(get_form_ids_from_tgrep2_output_line,
                        open(tmp_path, 'r')))
                os.remove(tmp_path)
                if match_ids:
                    query = h.eagerload_form(Session.query(Form)).filter(
                        Form.id.in_(match_ids))
                    query = h.filter_restricted_models('Form', query)
                    query = h.add_order_by(query,
                                           request_params.get('order_by'),
                                           self.query_builder)
                    result = h.add_pagination(query,
                                              request_params.get('paginator'))
                elif request_params.get('paginator'):
                    paginator = request_params['paginator']
                    paginator['count'] = 0
                    result = {'paginator': paginator, 'items': []}
                else:
                    result = []
                return result
            except h.JSONDecodeError:
                response.status_int = 400
                return h.JSONDecodeErrorResponse
            except Invalid, e:
                response.status_int = 400
                return {'errors': e.unpack_errors()}
            except Exception, e:
                response.status_int = 400
                return {'error': 'Unable to perform TGrep2 search: %s.' % e}