def post_jobs(): """ Create a job to create preferences/predictions for a custom sequence using the specified model(model_name). request['sequence_id'] str: uuid of the custom sequence to process request['job_type'] str: see config.DataType properties for values request['model_name'] str: name of the model to use :return: json response with id of the job """ required_prop_names = ["sequence_id", "job_type", "model_name"] (sequence_id, job_type, model_name) = get_required_json_props(request, required_prop_names) try: seq = SequenceList(sequence_id) seq.load(get_db()) except KeyError as ex: raise ClientException("Unable to find sequence. It may have purged.", ErrorType.SEQUENCE_NOT_FOUND, error_data=sequence_id) job = CustomJob.find_existing_job(get_db(), job_type, sequence_id, model_name) status_code = None if not job: status_code = None job = CustomJob.create_job(get_db(), job_type, sequence_id, model_name) return make_ok_json_response({'id': job.uuid}, status_code)
def delete_old_items(cur): print("Deleting old custom lists.") cur.execute(DELETE_OLD_LISTS_SQL, []) print("Deleting old jobs.") CustomJob.delete_old_jobs(cur, DELETE_AFTER_HOURS) print("Deleting old custom sequences.") SequenceList.delete_old_and_unattached(cur, DELETE_AFTER_HOURS)
def get_custom_sequences_data(sequence_id): """ Get base64 encoded contents and other properties of a custom sequence(DNA). :param sequence_id: str: uuid associated with a particular sequence :return: json response """ seq = SequenceList(sequence_id) seq.load(get_db()) return make_json_response({ "id": seq.seq_uuid, "data": base64.b64encode(seq.content), "created": seq.created })
def test_custom_job_no_data(self): FASTA_DATA1 = """>someseq\nAAACCCGGGGTT""" db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name='E2f1').uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = '' result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(1, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual('None', first['max']) self.assertEqual([], first['values']) self.assertEqual('AAACCCGGGGTT', first['sequence']) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def test_sequence_list(self): FASTA_DATA1 = """>HSBGPG Human gene for bone gla protein (BGP) GGCAGATTCCCCCTAGACCCGCCCGCACCATGGTCAGGCATGCCCCTCCTCATCGCTGGGCACAGCCCAGAGGGT ATAAACAGTGCTGGAGGCTGGCGGGGCAGGCCAGCTGAGTCCTGAGCAGCAGCCCAGCGCAGCCACCGAGACACC >HSGLTH1 Human theta 1-globin gene CCACTGCACTCACCGCACCCGGCCAATTTTTGTGTTTTTAGTAGAGACTAAATACCATATAGTGAACACCTAAGA CGGGGGGCCTTGGATCCAGGGCGATTCAGAGGGCCCCGGTCGGAGCTGTCGGAGATTGAGCGCGCGCGGTCCCGG""" FASTA_DATA2 = """>stuff AAACCCGGGG""" db = create_db_connection(TestWithPostgres.config.dbconfig) sequence_list1_uuid = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "mystuff") sequence_list2_uuid = SequenceList.create_with_content_and_title(db, FASTA_DATA2, "mystuff2") seq_list1 = SequenceList.read_list(db, sequence_list1_uuid) seq_list2 = SequenceList.read_list(db, sequence_list2_uuid) self.assertEqual(FASTA_DATA1, seq_list1.content) self.assertEqual("mystuff", seq_list1.title) self.assertEqual("mystuff2", seq_list2.title)
def test_custom_job_normal_workflow(self): SHORT_SEQUENCE = 'AAACCCGGGGTT' LONG_SEQUENCE = 'AAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT' \ 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' FASTA_DATA1 = '>someseq\n' + SHORT_SEQUENCE + '\n' \ '>someseq2\n' + LONG_SEQUENCE db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "sometitle") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = """ someseq\t0\t10\t12.5\tAAACCCGGGG someseq2\t20\t30\t4.5\tGGTTAAACCC someseq2\t60\t75\t15.5\tAAAAAAAAAAAAAAA """.strip() result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() self.assertEqual(BED_DATA, CustomResultData.bed_file_contents(db, result_uuid).strip()) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(2, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual(12.5, float(first['max'])) self.assertEqual([{u'start': 0, u'end': 10, u'value': 12.5}], first['values']) self.assertEqual(SHORT_SEQUENCE, first['sequence']) second = predictions[1] self.assertEqual('someseq2', second['name']) self.assertEqual(15.5, float(second['max'])) self.assertEqual(LONG_SEQUENCE, second['sequence']) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=None, offset=None) self.assertEqual(2, len(predictions)) self.assertEqual(15.5, float(predictions[0]['max'])) self.assertEqual(12.5, float(predictions[1]['max'])) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=1, offset=1) self.assertEqual(1, len(predictions)) self.assertEqual(12.5, float(predictions[0]['max'])) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def test_sequence_too_big(self): with self.assertRaises(ClientException) as cm: seq_list = SequenceList('1234') seq_list.content = make_too_big() seq_list.title = 'Too big list' seq_list.insert(db=FakeDB()) self.assertEqual(ErrorType.UPLOADED_DATA_TOO_BIG, cm.exception.error_type) self.assertEqual(400, cm.exception.status_code)
def test_custom_job_normal_workflow(self): FASTA_DATA1 = """>someseq\nAAACCCGGGGTT\n>someseq2\nAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT""" db = create_db_connection(TestWithPostgres.config.dbconfig) # upload FASTA file sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "sometitle") # create a job to determine predictions for a sequence_list job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid # mark job as running CustomJob.set_job_running(db, job_uuid) # upload file BED_DATA = """ someseq\t0\t10\t12.5 someseq2\t20\t30\t4.5 someseq2\t60\t75\t15.5 """.strip() result_uuid = CustomResultData.new_uuid() result = CustomResultData(db, result_uuid, job_uuid, model_name='E2f1', bed_data=BED_DATA) result.save() self.assertEqual(BED_DATA, CustomResultData.bed_file_contents(db, result_uuid).strip()) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=False, limit=None, offset=None) self.assertEqual(2, len(predictions)) first = predictions[0] self.assertEqual('someseq', first['name']) self.assertEqual(12.5, float(first['max'])) self.assertEqual([{u'start': 0, u'end': 10, u'value': 12.5}], first['values']) self.assertEqual('AAACCCGGGGTT', first['sequence']) second = predictions[1] self.assertEqual('someseq2', second['name']) self.assertEqual(15.5, float(second['max'])) self.assertEqual('AAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTTAAACCCGGGGTT', second['sequence']) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=None, offset=None) self.assertEqual(2, len(predictions)) self.assertEqual(15.5, float(predictions[0]['max'])) self.assertEqual(12.5, float(predictions[1]['max'])) predictions = CustomResultData.get_predictions(db, result_uuid, sort_max_value=True, limit=1, offset=1) self.assertEqual(1, len(predictions)) self.assertEqual(12.5, float(predictions[0]['max'])) # Make sure we can convert predictions to JSON json_version = json.dumps({'data': predictions}) self.assertEqual('{"data', json_version[:6])
def test_customjob(self): FASTA_DATA1 = """>stuff\nAAACCCGGGGTT""" db = create_db_connection(TestWithPostgres.config.dbconfig) update_database(db, """ delete from custom_result_row; delete from custom_result; delete from job; delete from sequence_list_item; delete from sequence_list; """, []) # start out finding no jobs jobs = CustomJob.find_jobs(db, None) self.assertEqual(len(jobs), 0) # create a new job that should be NEW status sequence_list = SequenceList.create_with_content_and_title(db, FASTA_DATA1, "somelist") job_uuid = CustomJob.create_job(db, DataType.PREDICTION, sequence_list, model_name="E2f1").uuid job = CustomJob.read_job(db, job_uuid) self.assertEqual(job_uuid, job.uuid) self.assertEqual(JobStatus.NEW, job.status) self.assertEqual(DataType.PREDICTION, job.type) self.assertEqual(sequence_list, job.sequence_list) self.assertIsNotNone(job.created) self.assertIsNone(job.finished) # find NEW job without filters jobs = CustomJob.find_jobs(db, None) self.assertEqual(len(jobs), 1) self.assertEqual(jobs[0].uuid, job_uuid) # find no for RUNNING jobs jobs = CustomJob.find_jobs(db, JobStatus.RUNNING) self.assertEqual(len(jobs), 0) # find 1 for NEW jobs jobs = CustomJob.find_jobs(db, JobStatus.NEW) self.assertEqual(len(jobs), 1) # Jobs can be set to running only once (when in NEW state) CustomJob.set_job_running(db, job_uuid) job = CustomJob.read_job(db, job_uuid) self.assertEqual(JobStatus.RUNNING, job.status) self.assertIsNone(job.finished) # Disallow setting a job running twice (prevents two workers working on the same job) with self.assertRaises(ValueError): CustomJob.set_job_running(db, job_uuid) # find 0 for NEW jobs jobs = CustomJob.find_jobs(db, JobStatus.NEW) self.assertEqual(len(jobs), 0) # Jobs can be set to complete from RUNNING state CustomJob.set_job_complete(db, job_uuid) job = CustomJob.read_job(db, job_uuid) self.assertEqual(JobStatus.COMPLETE, job.status) self.assertIsNotNone(job.finished) # find 0 for NEW jobs jobs = CustomJob.find_jobs(db, JobStatus.NEW) self.assertEqual(len(jobs), 0) # Jobs can be set to complete from ERROR state CustomJob.set_job_as_error(db, job_uuid, "Something failed.") job = CustomJob.read_job(db, job_uuid) self.assertEqual(JobStatus.ERROR, job.status) self.assertEqual("Something failed.", job.error_msg) self.assertIsNotNone(job.finished) # find 0 for NEW jobs jobs = CustomJob.find_jobs(db, JobStatus.NEW) self.assertEqual(len(jobs), 0)
def post_custom_sequences(): (data, title) = get_required_json_props(request, ["data", "title"]) decoded_data = base64.b64decode(data) seq_uuid = SequenceList.create_with_content_and_title(get_db(), decoded_data, title) return make_ok_json_response({'id': seq_uuid})
def post_custom_sequences(): (data, title) = get_required_json_props(request, ["data", "title"]) decoded_data = base64.b64decode(data) seq_uuid = SequenceList.create_with_content_and_title( get_db(), decoded_data, title) return make_ok_json_response({'id': seq_uuid})
def test_sequence_good_size(self): seq_list = SequenceList('1234') seq_list.content = "myseq>\nAACCGGTTAACCGTTTTTAACCTTGGG" seq_list.title = 'Good list' seq_list.insert(db=FakeDB())