def convert_to_dict(runs): run_dict = dict() for run in runs: if not run_dict.get(run["runId"]): run_dict[run["runId"]] = run else: if run_dict[run["runId"]].get("fastqs"): logger.error("Fastq empty") if run_dict[run["runId"]]["fastqs"][0] != run["fastqs"][0]: logger.error("File %s do not match with %s" % (run_dict[run["runId"]]["fastqs"][0], run["fastqs"][0])) raise FailedToFetchSampleException( "File %s do not match with %s" % (run_dict[run["runId"]]["fastqs"][0], run["fastqs"][0])) if run_dict[run["runId"]]["fastqs"][1] != run["fastqs"][1]: logger.error("File %s do not match with %s" % (run_dict[run["runId"]]["fastqs"][1], run["fastqs"][1])) raise FailedToFetchSampleException( "File %s do not match with %s" % (run_dict[run["runId"]]["fastqs"][1], run["fastqs"][1])) return run_dict
def fetch_sample_metadata(sample_id, igocomplete, request_id, request_metadata, redelivery=False, job_group_notifier=None): logger.info("Fetch sample metadata for sampleId:%s" % sample_id) sampleMetadata = LIMSClient.get_sample_manifest(sample_id) try: data = sampleMetadata[0] except Exception as e: raise FailedToFetchSampleException( "Failed to fetch SampleManifest for sampleId:%s. Invalid response" % sample_id) if data['igoId'] != sample_id: # logger.info(data) logger.info("Failed to fetch SampleManifest for sampleId:%s. LIMS returned %s " % (sample_id, data['igoId'])) raise FailedToFetchSampleException( "Failed to fetch SampleManifest for sampleId:%s. LIMS returned %s " % (sample_id, data['igoId'])) validate_sample(sample_id, data.get('libraries', []), igocomplete, redelivery) libraries = data.pop('libraries') for library in libraries: logger.info("Processing library %s" % library) runs = library.pop('runs') run_dict = convert_to_dict(runs) logger.info("Processing runs %s" % run_dict) for run in run_dict.values(): logger.info("Processing run %s" % run) fastqs = run.pop('fastqs') for fastq in fastqs: logger.info("Adding file %s" % fastq) create_or_update_file(fastq, request_id, settings.IMPORT_FILE_GROUP, 'fastq', igocomplete, data, library, run, request_metadata, R1_or_R2(fastq), update=redelivery, job_group_notifier=job_group_notifier)
def create_or_update_file(path, request_id, file_group_id, file_type, igocomplete, data, library, run, sample, request_metadata, r, update=False, job_group_notifier=None): logger.info("Creating file %s " % path) try: file_group_obj = FileGroup.objects.get(id=file_group_id) file_type_obj = FileType.objects.filter(name=file_type).first() lims_metadata = copy.deepcopy(data) library_copy = copy.deepcopy(library) lims_metadata['requestId'] = request_id lims_metadata['igocomplete'] = igocomplete lims_metadata['R'] = r for k, v in library_copy.items(): lims_metadata[k] = v for k, v in run.items(): lims_metadata[k] = v for k, v in request_metadata.items(): lims_metadata[k] = v metadata = format_metadata(lims_metadata) # validator = MetadataValidator(METADATA_SCHEMA) except Exception as e: logger.error("Failed to parse metadata for file %s path" % path) raise FailedToFetchSampleException("Failed to create file %s. Error %s" % (path, str(e))) try: logger.info(lims_metadata) # validator.validate(metadata) except MetadataValidationException as e: logger.error("Failed to create file %s. Error %s" % (path, str(e))) raise FailedToFetchSampleException("Failed to create file %s. Error %s" % (path, str(e))) else: f = FileRepository.filter(path=path).first() if not f: create_file_object(path, file_group_obj, lims_metadata, metadata, file_type_obj, sample) if update: message = "File registered: %s" % path update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict() send_notification.delay(update) else: if update: before = f.file.filemetadata_set.order_by('-created_date').count() update_file_object(f.file, path, metadata) after = f.file.filemetadata_set.order_by('-created_date').count() if after != before: all_metadata = f.file.filemetadata_set.order_by('-created_date') ddiff = DeepDiff(all_metadata[1].metadata, all_metadata[0].metadata, ignore_order=True) diff_file_name = "%s_metadata_update.json" % f.file.file_name message = "Updating file metadata: %s, details in file %s\n" % (path, diff_file_name) update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict() diff_details_event = LocalStoreFileEvent(job_group_notifier, diff_file_name, str(ddiff)).to_dict() send_notification.delay(update) send_notification.delay(diff_details_event) else: raise FailedToFetchSampleException("File %s already exist with id %s" % (path, str(f.id)))
def create_file_object(path, file_group, lims_metadata, metadata, file_type): try: f = File.objects.create(file_name=os.path.basename(path), path=path, file_group=file_group, file_type=file_type) f.save() fm = FileMetadata(file=f, metadata=metadata) fm.save() Job.objects.create( run=TYPES["CALCULATE_CHECKSUM"], args={ "file_id": str(f.id), "path": path }, status=JobStatus.CREATED, max_retry=3, children=[], ) import_metadata = ImportMetadata.objects.create(file=f, metadata=lims_metadata) except Exception as e: logger.error("Failed to create file %s. Error %s" % (path, str(e))) raise FailedToFetchSampleException( "Failed to create file %s. Error %s" % (path, str(e)))
def fetch_samples(request_id, import_pooled_normals=True, import_samples=True, job_group=None, job_group_notifier=None, redelivery=False): logger.info("Fetching sampleIds for requestId:%s" % request_id) jg = None jgn = None try: jg = JobGroup.objects.get(id=job_group) logger.debug("JobGroup found") except JobGroup.DoesNotExist: logger.debug("No JobGroup Found") try: jgn = JobGroupNotifier.objects.get(id=job_group_notifier) logger.debug("JobGroup found") except JobGroupNotifier.DoesNotExist: logger.debug("No JobGroup Found") children = set() sample_ids = LIMSClient.get_request_samples(request_id) if sample_ids['requestId'] != request_id: raise ErrorInconsistentDataException( "LIMS returned wrong response for request %s. Got %s instead" % (request_id, sample_ids['requestId'])) request_metadata = { "dataAnalystEmail": sample_ids['dataAnalystEmail'], "dataAnalystName": sample_ids['dataAnalystName'], "investigatorEmail": sample_ids['investigatorEmail'], "investigatorName": sample_ids['investigatorName'], "labHeadEmail": sample_ids['labHeadEmail'], "labHeadName": sample_ids['labHeadName'], "otherContactEmails": sample_ids['otherContactEmails'], "dataAccessEmails": sample_ids['dataAccessEmails'], "qcAccessEmails": sample_ids['qcAccessEmails'], "projectManagerName": sample_ids['projectManagerName'], "recipe": sample_ids['recipe'], "piEmail": sample_ids["piEmail"], } set_recipe_event = ETLSetRecipeEvent(job_group_notifier, request_metadata['recipe']).to_dict() send_notification.delay(set_recipe_event) pooled_normals = sample_ids.get("pooledNormals", []) if import_pooled_normals and pooled_normals: for f in pooled_normals: job = get_or_create_pooled_normal_job(f, jg) children.add(str(job.id)) if import_samples: if not sample_ids.get('samples', False): raise FailedToFetchSampleException("No samples reported for requestId: %s" % request_id) for sample in sample_ids.get('samples', []): job = create_sample_job(sample['igoSampleId'], sample['igocomplete'], request_id, request_metadata, redelivery, jg, jgn) children.add(str(job.id)) return list(children)
def get_deliveries(timestamp): requestIds = requests.get( '%s/LimsRest/api/getDeliveries' % settings.LIMS_URL, params={"timestamp": timestamp}, auth=(settings.LIMS_USERNAME, settings.LIMS_PASSWORD), verify=False) if requestIds.status_code != 200: raise FailedToFetchSampleException( "Failed to fetch new requests, status_code: %s" % requestIds.status_code) return requestIds.json()
def get_sample_manifest(sample_id): sample_metadata = requests.get( '%s/LimsRest/api/getSampleManifest' % settings.LIMS_URL, params={"igoSampleId": sample_id}, auth=(settings.LIMS_USERNAME, settings.LIMS_PASSWORD), verify=False) if sample_metadata.status_code != 200: raise FailedToFetchSampleException( "Failed to fetch SampleManifest for sampleId:%s, status_code: %s" % (sample_id, sample_metadata.status_code)) return sample_metadata.json()
def get_request_samples(request_id): sample_ids = requests.get( '%s/LimsRest/api/getRequestSamples' % settings.LIMS_URL, params={"request": request_id}, auth=(settings.LIMS_USERNAME, settings.LIMS_PASSWORD), verify=False) if sample_ids.status_code != 200: raise FailedToFetchSampleException( "Failed to fetch sampleIds for request %s, status_code: %s" % (request_id, sample_ids.status_code)) return sample_ids.json()
def update_file_object(file_object, path, metadata): data = { "path": path, "metadata": metadata, } try: user = User.objects.get(username=settings.ETL_USER) data['user'] = user.id except User.DoesNotExist: user = None serializer = UpdateFileSerializer(file_object, data=data) if serializer.is_valid(): serializer.save() else: logger.error("Failed to update file %s: Error %s" % (path, serializer.errors)) raise FailedToFetchSampleException( "Failed to update metadata for fastq files for %s : %s" % (path, serializer.errors))
def create_or_update_file( path, request_id, file_group_id, file_type, igocomplete, data, library, run, request_metadata, r, update=False, job_group_notifier=None, ): logger.info("Creating file %s " % path) try: file_group_obj = FileGroup.objects.get(id=file_group_id) file_type_obj = FileType.objects.filter(name=file_type).first() lims_metadata = copy.deepcopy(data) library_copy = copy.deepcopy(library) lims_metadata["requestId"] = request_id lims_metadata["igocomplete"] = igocomplete lims_metadata["R"] = r for k, v in library_copy.items(): lims_metadata[k] = v for k, v in run.items(): lims_metadata[k] = v for k, v in request_metadata.items(): lims_metadata[k] = v metadata = format_metadata(lims_metadata) # validator = MetadataValidator(METADATA_SCHEMA) except Exception as e: logger.error("Failed to parse metadata for file %s path" % path) raise FailedToFetchSampleException( "Failed to create file %s. Error %s" % (path, str(e))) try: logger.info(lims_metadata) # validator.validate(metadata) except MetadataValidationException as e: logger.error("Failed to create file %s. Error %s" % (path, str(e))) raise FailedToFetchSampleException( "Failed to create file %s. Error %s" % (path, str(e))) else: recipe = metadata.get("recipe", "") new_path = CopyService.remap(recipe, path) # Get copied file path f = FileRepository.filter(path=new_path).first() if not f: try: if path != new_path: CopyService.copy(path, new_path) except Exception as e: if "Permission denied" in str(e): raise FailedToCopyFilePermissionDeniedException( "Failed to copy file %s. Error %s" % (path, str(e))) else: raise FailedToCopyFileException( "Failed to copy file %s. Error %s" % (path, str(e))) create_file_object(new_path, file_group_obj, lims_metadata, metadata, file_type_obj) if update: message = "File registered: %s" % path update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict() send_notification.delay(update) else: if update: before = f.file.filemetadata_set.order_by( "-created_date").count() update_file_object(f.file, f.file.path, metadata) after = f.file.filemetadata_set.order_by( "-created_date").count() if after != before: all_metadata = f.file.filemetadata_set.order_by( "-created_date") ddiff = DeepDiff(all_metadata[1].metadata, all_metadata[0].metadata, ignore_order=True) diff_file_name = "%s_metadata_update_%s.json" % ( f.file.file_name, all_metadata[0].version) message = "Updating file metadata: %s, details in file %s\n" % ( path, diff_file_name) update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict() diff_details_event = LocalStoreFileEvent( job_group_notifier, diff_file_name, str(ddiff)).to_dict() send_notification.delay(update) send_notification.delay(diff_details_event) else: raise FailedToFetchSampleException( "File %s already exist with id %s" % (path, str(f.id)))
def fetch_samples( request_id, import_pooled_normals=True, import_samples=True, job_group=None, job_group_notifier=None, redelivery=False, ): logger.info("Fetching sampleIds for requestId:%s" % request_id) jg = None jgn = None try: jg = JobGroup.objects.get(id=job_group) logger.debug("JobGroup found") except JobGroup.DoesNotExist: logger.debug("No JobGroup Found") try: jgn = JobGroupNotifier.objects.get(id=job_group_notifier) logger.debug("JobGroup found") except JobGroupNotifier.DoesNotExist: logger.debug("No JobGroup Found") children = set() sample_ids = LIMSClient.get_request_samples(request_id) if sample_ids["requestId"] != request_id: raise ErrorInconsistentDataException( "LIMS returned wrong response for request %s. Got %s instead" % (request_id, sample_ids["requestId"])) request_metadata = { "dataAnalystEmail": sample_ids["dataAnalystEmail"], "dataAnalystName": sample_ids["dataAnalystName"], "investigatorEmail": sample_ids["investigatorEmail"], "investigatorName": sample_ids["investigatorName"], "labHeadEmail": sample_ids["labHeadEmail"], "labHeadName": sample_ids["labHeadName"], "otherContactEmails": sample_ids["otherContactEmails"], "dataAccessEmails": sample_ids["dataAccessEmails"], "qcAccessEmails": sample_ids["qcAccessEmails"], "projectManagerName": sample_ids["projectManagerName"], "recipe": sample_ids["recipe"], "piEmail": sample_ids["piEmail"], } set_recipe_event = ETLSetRecipeEvent(job_group_notifier, request_metadata["recipe"]).to_dict() send_notification.delay(set_recipe_event) pooled_normals = sample_ids.get("pooledNormals", []) if import_pooled_normals and pooled_normals: for f in pooled_normals: job = get_or_create_pooled_normal_job(f, jg, jgn, redelivery=redelivery) children.add(str(job.id)) if import_samples: if not sample_ids.get("samples", False): raise FailedToFetchSampleException( "No samples reported for requestId: %s" % request_id) for sample in sample_ids.get("samples", []): sampleMetadata = LIMSClient.get_sample_manifest( sample["igoSampleId"]) try: data = sampleMetadata[0] except Exception as e: pass patient_id = format_patient_id(data.get("cmoPatientId")) if not Patient.objects.filter(patient_id=patient_id): Patient.objects.create(patient_id=patient_id) sample_name = data.get("cmoSampleName", None) specimen_type = data.get("specimenType", None) cmo_sample_name = format_sample_name(sample_name, specimen_type) if not Sample.objects.filter(sample_id=sample["igoSampleId"], sample_name=sample_name, cmo_sample_name=cmo_sample_name): Sample.objects.create(sample_id=sample["igoSampleId"], sample_name=sample_name, cmo_sample_name=cmo_sample_name) job = create_sample_job(sample["igoSampleId"], sample["igoComplete"], request_id, request_metadata, redelivery, jg, jgn) children.add(str(job.id)) return list(children)