def write_result(self, result, analysis_id): analysis = DatabaseAdapter.getDB().analyses.\ find_one({"_id" : ObjectId(analysis_id)}) analysis['complete'] = True analysis['result'] = result print("Analysis " + str(analysis_id) + " complete. submitting record to DB") DatabaseAdapter.getDB().analyses.update({'_id': ObjectId(analysis_id)} , analysis);
def write_result(self, result, analysis_id): analysis = DatabaseAdapter.getDB().analyses.find_one( {"_id": ObjectId(analysis_id)}) analysis['complete'] = True analysis['result'] = result print("Analysis " + str(analysis_id) + " complete. submitting record to DB") DatabaseAdapter.getDB().analyses.update({'_id': ObjectId(analysis_id)}, analysis) self.is_finished = True
def parse_json(self, json_data): try: input_data = json.loads(json_data.decode()) print(input_data) self.transaction_id = input_data['transaction_id'] self.operation = input_data['operation'] self.library = input_data['library'] if 'user_id' in input_data.keys(): self.user_id = input_data['user_id'] if 'cleanup' in input_data.keys(): self.cleanups = input_data['cleanup'] self.corpora_ids = input_data['corpora_ids'] if 'tokenizer' in input_data.keys(): self.tokenizer = input_data['tokenizer'] except KeyError: raise TransactionException('Missing property transaction_id, operation, library, tokenizer or corpora_ids.') except ValueError: raise TransactionException('Could not parse JSON.') try: #load corpora from database corpora = DatabaseAdapter.getDB().corpus for id in self.corpora_ids: corpus = corpora.find_one({"_id" : ObjectId(id)}) print(corpus) self.corpora.append(Corpus(id, corpus["title"], corpus["contents"], corpus["tags"])) except (TypeError, InvalidId): raise TransactionException('Could not find corpus.')
def parse_json(self, json_data): try: input_data = json.loads(json_data.decode()) print(input_data) self.transaction_id = input_data['transaction_id'] self.operation = input_data['operation'] self.library = input_data['library'] if 'user_id' in input_data.keys(): self.user_id = input_data['user_id'] if 'cleanup' in input_data.keys(): self.cleanups = input_data['cleanup'] self.corpora_ids = input_data['corpora_ids'] if 'tokenizer' in input_data.keys(): self.tokenizer = input_data['tokenizer'] except KeyError: raise TransactionException( 'Missing property transaction_id, operation, library, tokenizer or corpora_ids.' ) except ValueError: raise TransactionException('Could not parse JSON.') try: #load corpora from database corpora = DatabaseAdapter.getDB().corpus for id in self.corpora_ids: corpus = corpora.find_one({"_id": ObjectId(id)}) self.corpora.append( Corpus(id, corpus["title"], corpus["contents"], corpus["tags"])) except (TypeError, InvalidId): raise TransactionException('Could not find corpus.')
def read_corpora(self, corpora_ids): try: #load corpora from database corpora = DatabaseAdapter.getDB().corpus for id in self.corpora_ids: corpus = corpora.find_one({"_id" : ObjectId(id)}) self.corpora.append(Corpus(id, corpus["title"], corpus["contents"], corpus["tags"])) except (TypeError, InvalidId): raise TransactionException('Could not find corpus.')
def read_corpora(self, corpora_ids): try: #load corpora from database corpora = DatabaseAdapter.getDB().corpus for id in self.corpora_ids: corpus = corpora.find_one({"_id": ObjectId(id)}) self.corpora.append( Corpus(id, corpus["title"], corpus["contents"], corpus["tags"])) except (TypeError, InvalidId): raise TransactionException('Could not find corpus.')
def create_analysis_record(self): analysis = {'user_id':ObjectId(self.user_id), 'analysis_name': self.analysis_name, 'corpora_ids':self.corpora_ids, 'cleanup_ids':self.cleanups, 'result': "", 'tokenizer': self.tokenizer, 'eta': self.eta, 'complete': False, 'time_created': self.time_created, 'analysis':self.operation} return DatabaseAdapter.getDB().analyses.insert(analysis)
def create_analysis_record(self): analysis = { 'user_id': ObjectId(self.user_id), 'analysis_name': self.analysis_name, 'corpora_ids': self.corpora_ids, 'cleanup_ids': self.cleanups, 'result': "", 'tokenizer': self.tokenizer, 'eta': self.eta, 'complete': False, 'time_created': self.time_created, 'analysis': self.operation } return DatabaseAdapter.getDB().analyses.insert(analysis)
def run(self): corpora = self.corpora tokenized_corpora = [] analysis = {} if not self.tokenizer == None: op_handler = linguine.operation_builder.get_operation_handler( self.tokenizer) tokenized_corpora = op_handler.run(corpora) for cleanup in self.cleanups: op_handler = linguine.operation_builder.get_operation_handler( cleanup) corpora = op_handler.run(corpora) op_handler = linguine.operation_builder.get_operation_handler( self.operation) if self.operation in self.token_based_operations: if self.tokenizer == None: tokenizer_handler = linguine.operation_builder.get_operation_handler( 'word_tokenize_spaces') tokenized_corpora = tokenizer_handler.run(corpora) analysis = { 'user_id': ObjectId(self.user_id), 'corpora_ids': self.corpora_ids, 'cleanup_ids': self.cleanups, 'result': op_handler.run(tokenized_corpora), 'analysis': self.operation } else: analysis = { 'user_id': ObjectId(self.user_id), 'corpora_ids': self.corpora_ids, 'cleanup_ids': self.cleanups, 'result': op_handler.run(corpora), 'analysis': self.operation } print(analysis) analysis_id = DatabaseAdapter.getDB().analyses.insert(analysis) response = { 'transaction_id': self.transaction_id, 'cleanup_ids': self.cleanups, 'library': self.library, 'operation': self.operation, 'results': str(analysis_id) } print(response) return json.JSONEncoder().encode(response)
def run(self): corpora = self.corpora tokenized_corpora = [] analysis = {} if not self.tokenizer == None and not self.tokenizer == '': op_handler = linguine.operation_builder.get_operation_handler(self.tokenizer) tokenized_corpora = op_handler.run(corpora) for cleanup in self.cleanups: op_handler = linguine.operation_builder.get_operation_handler(cleanup) corpora = op_handler.run(corpora) op_handler = linguine.operation_builder.get_operation_handler(self.operation) if self.operation in self.token_based_operations: if self.tokenizer == None: tokenizer_handler = linguine.operation_builder.get_operation_handler('word_tokenize_spaces') tokenized_corpora = tokenizer_handler.run(corpora) analysis = {'user_id':ObjectId(self.user_id), 'corpora_ids':self.corpora_ids, 'cleanup_ids':self.cleanups, 'result':op_handler.run(tokenized_corpora), 'analysis':self.operation} else: analysis = {'user_id':ObjectId(self.user_id), 'corpora_ids':self.corpora_ids, 'cleanup_ids':self.cleanups, 'result':op_handler.run(corpora), 'analysis':self.operation} print(analysis) analysis_id = DatabaseAdapter.getDB().analyses.insert(analysis) response = {'transaction_id': self.transaction_id, 'cleanup_ids': self.cleanups, 'library':self.library, 'operation':self.operation, 'results':str(analysis_id)} print(response) return json.JSONEncoder().encode(response)