def write_result(self, result, analysis_id):
        analysis  = DatabaseAdapter.getDB().analyses.\
                find_one({"_id" : ObjectId(analysis_id)})

        analysis['complete'] = True
        analysis['result'] = result

        print("Analysis " + str(analysis_id) + " complete. submitting record to DB")

        DatabaseAdapter.getDB().analyses.update({'_id': ObjectId(analysis_id)} ,
                analysis);
    def write_result(self, result, analysis_id):
        analysis = DatabaseAdapter.getDB().analyses.find_one(
            {"_id": ObjectId(analysis_id)})

        analysis['complete'] = True
        analysis['result'] = result

        print("Analysis " + str(analysis_id) +
              " complete. submitting record to DB")

        DatabaseAdapter.getDB().analyses.update({'_id': ObjectId(analysis_id)},
                                                analysis)
        self.is_finished = True
示例#3
0
    def parse_json(self, json_data):
        try:
            input_data = json.loads(json_data.decode())

            print(input_data)
            self.transaction_id = input_data['transaction_id']
            self.operation = input_data['operation']
            self.library = input_data['library']
            if 'user_id' in input_data.keys():
                self.user_id = input_data['user_id']
            if 'cleanup' in input_data.keys():
                self.cleanups = input_data['cleanup']
            self.corpora_ids = input_data['corpora_ids']
            if 'tokenizer' in input_data.keys():
                self.tokenizer = input_data['tokenizer']
        except KeyError:
            raise TransactionException('Missing property transaction_id, operation, library, tokenizer or corpora_ids.')
        except ValueError:
            raise TransactionException('Could not parse JSON.')
        try:
            #load corpora from database
            corpora = DatabaseAdapter.getDB().corpus
            for id in self.corpora_ids:
                corpus = corpora.find_one({"_id" : ObjectId(id)})
                print(corpus)
                self.corpora.append(Corpus(id, corpus["title"], corpus["contents"], corpus["tags"]))
        except (TypeError, InvalidId):
            raise TransactionException('Could not find corpus.')
示例#4
0
 def parse_json(self, json_data):
     try:
         input_data = json.loads(json_data.decode())
         print(input_data)
         self.transaction_id = input_data['transaction_id']
         self.operation = input_data['operation']
         self.library = input_data['library']
         if 'user_id' in input_data.keys():
             self.user_id = input_data['user_id']
         if 'cleanup' in input_data.keys():
             self.cleanups = input_data['cleanup']
         self.corpora_ids = input_data['corpora_ids']
         if 'tokenizer' in input_data.keys():
             self.tokenizer = input_data['tokenizer']
     except KeyError:
         raise TransactionException(
             'Missing property transaction_id, operation, library, tokenizer or corpora_ids.'
         )
     except ValueError:
         raise TransactionException('Could not parse JSON.')
     try:
         #load corpora from database
         corpora = DatabaseAdapter.getDB().corpus
         for id in self.corpora_ids:
             corpus = corpora.find_one({"_id": ObjectId(id)})
             self.corpora.append(
                 Corpus(id, corpus["title"], corpus["contents"],
                        corpus["tags"]))
     except (TypeError, InvalidId):
         raise TransactionException('Could not find corpus.')
 def read_corpora(self, corpora_ids):
     try:
         #load corpora from database
         corpora = DatabaseAdapter.getDB().corpus
         for id in self.corpora_ids:
             corpus = corpora.find_one({"_id" : ObjectId(id)})
             self.corpora.append(Corpus(id, corpus["title"],
                 corpus["contents"], corpus["tags"]))
     except (TypeError, InvalidId):
         raise TransactionException('Could not find corpus.')
 def read_corpora(self, corpora_ids):
     try:
         #load corpora from database
         corpora = DatabaseAdapter.getDB().corpus
         for id in self.corpora_ids:
             corpus = corpora.find_one({"_id": ObjectId(id)})
             self.corpora.append(
                 Corpus(id, corpus["title"], corpus["contents"],
                        corpus["tags"]))
     except (TypeError, InvalidId):
         raise TransactionException('Could not find corpus.')
 def create_analysis_record(self):
     analysis = {'user_id':ObjectId(self.user_id),
                 'analysis_name': self.analysis_name,
                 'corpora_ids':self.corpora_ids,
                 'cleanup_ids':self.cleanups,
                 'result': "",
                 'tokenizer': self.tokenizer,
                 'eta': self.eta,
                 'complete': False,
                 'time_created': self.time_created,
                 'analysis':self.operation}
     return DatabaseAdapter.getDB().analyses.insert(analysis)
 def create_analysis_record(self):
     analysis = {
         'user_id': ObjectId(self.user_id),
         'analysis_name': self.analysis_name,
         'corpora_ids': self.corpora_ids,
         'cleanup_ids': self.cleanups,
         'result': "",
         'tokenizer': self.tokenizer,
         'eta': self.eta,
         'complete': False,
         'time_created': self.time_created,
         'analysis': self.operation
     }
     return DatabaseAdapter.getDB().analyses.insert(analysis)
示例#9
0
    def run(self):
        corpora = self.corpora
        tokenized_corpora = []
        analysis = {}

        if not self.tokenizer == None:
            op_handler = linguine.operation_builder.get_operation_handler(
                self.tokenizer)
            tokenized_corpora = op_handler.run(corpora)
        for cleanup in self.cleanups:
            op_handler = linguine.operation_builder.get_operation_handler(
                cleanup)
            corpora = op_handler.run(corpora)
        op_handler = linguine.operation_builder.get_operation_handler(
            self.operation)

        if self.operation in self.token_based_operations:
            if self.tokenizer == None:
                tokenizer_handler = linguine.operation_builder.get_operation_handler(
                    'word_tokenize_spaces')
                tokenized_corpora = tokenizer_handler.run(corpora)
            analysis = {
                'user_id': ObjectId(self.user_id),
                'corpora_ids': self.corpora_ids,
                'cleanup_ids': self.cleanups,
                'result': op_handler.run(tokenized_corpora),
                'analysis': self.operation
            }
        else:
            analysis = {
                'user_id': ObjectId(self.user_id),
                'corpora_ids': self.corpora_ids,
                'cleanup_ids': self.cleanups,
                'result': op_handler.run(corpora),
                'analysis': self.operation
            }
        print(analysis)
        analysis_id = DatabaseAdapter.getDB().analyses.insert(analysis)
        response = {
            'transaction_id': self.transaction_id,
            'cleanup_ids': self.cleanups,
            'library': self.library,
            'operation': self.operation,
            'results': str(analysis_id)
        }
        print(response)
        return json.JSONEncoder().encode(response)
示例#10
0
    def run(self):
        corpora = self.corpora
        tokenized_corpora = []
        analysis = {}
        
        if not self.tokenizer == None and not self.tokenizer == '':
            op_handler = linguine.operation_builder.get_operation_handler(self.tokenizer)
            tokenized_corpora = op_handler.run(corpora)
        for cleanup in self.cleanups:
            op_handler = linguine.operation_builder.get_operation_handler(cleanup)
            corpora = op_handler.run(corpora)
        op_handler = linguine.operation_builder.get_operation_handler(self.operation)

        if self.operation in self.token_based_operations:
            if self.tokenizer == None:
                tokenizer_handler = linguine.operation_builder.get_operation_handler('word_tokenize_spaces')
                tokenized_corpora = tokenizer_handler.run(corpora)
            analysis = {'user_id':ObjectId(self.user_id),
                        'corpora_ids':self.corpora_ids,
                        'cleanup_ids':self.cleanups,
                        'result':op_handler.run(tokenized_corpora),
                        'analysis':self.operation}
        else:
            analysis = {'user_id':ObjectId(self.user_id),
                        'corpora_ids':self.corpora_ids,
                        'cleanup_ids':self.cleanups,
                        'result':op_handler.run(corpora),
                        'analysis':self.operation}
        print(analysis)
        analysis_id = DatabaseAdapter.getDB().analyses.insert(analysis)
        response = {'transaction_id': self.transaction_id,
                    'cleanup_ids': self.cleanups,
                    'library':self.library,
                    'operation':self.operation,
                    'results':str(analysis_id)}
        print(response)
        return json.JSONEncoder().encode(response)