def validate_bubbleplot_input(_id, feature_table_artifact_path, taxonomy_artifact_path, metadata_path=None, fill_variable=None): """ Do prechecks as to decrease the chance of job failing. Input: - feature_table_artifact_path: feature table in QIIME2 artifact format (either path or FileStorage object) - taxonomy_artifact_path: taxonomy in QIIME2 artifact format (either path or FileStorage object) """ # Save uploaded file in the docker container feature_table_uploaded_path = save_uploaded_file(_id, feature_table_artifact_path) taxonomy_uploaded_path = save_uploaded_file(_id, taxonomy_artifact_path) if(metadata_path is not None): metadata_uploaded_path = save_uploaded_file(_id, metadata_path) else: metadata_uploaded_path = None def validate_artifact(feature_table_uploaded_path, taxonomy_uploaded_path): # Check Artifact type try: check_artifact_type(feature_table_uploaded_path, "feature_table") check_artifact_type(taxonomy_uploaded_path, "taxonomy") except AXIOME3PipelineError as err: message = str(err) return 400, message return 200, "Imported data good!" def validate_metadata(metadata_uploaded_path, fill_variable): # Load metadata via QIIME2 metadata API # It will verify metadata vadlity as well try: metadata_df = load_metadata(metadata_uploaded_path) except MetadataFileError as err: message = str(err) return 400, message # Check user-specified columns actually exist in the metadata file if(fill_variable is not None): try: check_column_exists(metadata_df, fill_variable) except AXIOME3PipelineError as err: message = str(err) return 400, message return 200, "Ok" responseIfError(validate_artifact, feature_table_uploaded_path=feature_table_uploaded_path, taxonomy_uploaded_path=taxonomy_uploaded_path) if(metadata_uploaded_path is not None): responseIfError(validate_metadata, metadata_uploaded_path=metadata_uploaded_path, fill_variable=fill_variable) return feature_table_uploaded_path, taxonomy_uploaded_path, metadata_uploaded_path
def validate_triplot_input(_id, feature_table_artifact_path, taxonomy_artifact_path, metadata_path, environmental_metadata_path, fill_variable): # Save uploaded file in the docker container feature_table_uploaded_path = save_uploaded_file(_id, feature_table_artifact_path) taxonomy_uploaded_path = save_uploaded_file(_id, taxonomy_artifact_path) metadata_uploaded_path = save_uploaded_file(_id, metadata_path) environmental_metadata_uploaded_path = save_uploaded_file(_id, environmental_metadata_path) def validate_metadata(metadata_path, environmental_metadata_path): # Load metadata via QIIME2 metadata API # It will verify metadata vadlity as well try: metadata_df = load_metadata(metadata_path) except MetadataFileError as err: message = str(err) return 400, message try: environmental_metadata_df = load_metadata(environmental_metadata_path) except MetadataFileError as err: message = str(err) return 400, message # Check user-specified columns actually exist in the metadata file try: check_column_exists(metadata_df, fill_variable) except AXIOME3PipelineError as err: message = str(err) return 400, message return 200, "Ok" def validate_artifact(feature_table_uploaded_path, taxonomy_uploaded_path): # Check Artifact type try: check_artifact_type(feature_table_uploaded_path, "feature_table") check_artifact_type(taxonomy_uploaded_path, "taxonomy") except AXIOME3PipelineError as err: message = str(err) return 400, message return 200, "Imported data good!" responseIfError(validate_metadata, metadata_path=metadata_uploaded_path, environmental_metadata_path=environmental_metadata_uploaded_path) responseIfError(validate_artifact, feature_table_uploaded_path=feature_table_uploaded_path, taxonomy_uploaded_path=taxonomy_uploaded_path) return feature_table_uploaded_path, taxonomy_uploaded_path, metadata_uploaded_path, environmental_metadata_uploaded_path
def make_log_dir(_id): """ Make sub-directory in /log to store log files for each request. Input: - _id: UUID4 in string representation. Returns: status code and message """ # /log should already exist inside the docker container as a bind mount. base_log_dir = "/log" log_dir = os.path.join(base_log_dir, _id) utils.responseIfError(make_dir, dirpath=log_dir) return 200, "OK"
def analysis_precheck(_id, feature_table, rep_seqs, taxonomy, metadata): """ Do prechecks as to decrease the chance of job failing. Input: - feature_table: QIIME2 artifact of type FeatureTable[Frequency] - rep_seqs: QIIME2 artifact of type FeatureData[Sequence] """ feature_table_path = save_uploaded_file(_id, feature_table) rep_seqs_path = save_uploaded_file(_id, rep_seqs) taxonomy_path = save_uploaded_file(_id, taxonomy) metadata_path = save_uploaded_file(_id, metadata) def validate_analysis_input(feature_table, rep_seqs, taxonomy): """ Precheck input files prior to running denoise step Input: - feature_table: Path to QIIME2 artifact of type FeatureTable[Frequency] - rep_seqs: Path to QIIME2 artifact of type FeatureData[Sequence] """ # Check Artifact type try: feature_table_artifact = Artifact.load(feature_table) rep_seqs_artifact = Artifact.load(rep_seqs) if (str(feature_table_artifact.type) != "FeatureTable[Frequency]"): msg = "Input Feature Table is not of type 'FeatureTable[Frequency]'!" raise ValueError(msg) if (str(rep_seqs_artifact.type) != "FeatureData[Sequence]"): msg = "Input Representative Sequences is not of type 'FeatureData[Sequence]'!" raise ValueError(msg) except ValueError as err: message = str(err) return 400, message return 200, "Imported data good!" responseIfError(validate_analysis_input, feature_table=feature_table_path, rep_seqs=rep_seqs_path, taxonomy=taxonomy_path) return feature_table_path, rep_seqs_path, taxonomy_path, metadata_path
def taxonomic_classification_precheck(_id, feature_table, rep_seqs, classifier=None): """ Do prechecks as to decrease the chance of job failing. Input: - feature_table: QIIME2 artifact of type FeatureTable[Frequency] - rep_seqs: QIIME2 artifact of type FeatureData[Sequence] """ feature_table_path = save_uploaded_file(_id, feature_table) rep_seqs_path = save_uploaded_file(_id, rep_seqs) # default classifier path default_classifier_path = current_app.config["DEFAULT_CLASSIFIER_PATH"] classifier_path = save_uploaded_file(_id, classifier) if classifier is not None else default_classifier_path def validate_taxonomic_classification_input(feature_table, rep_seqs): """ Precheck input files prior to running denoise step Input: - feature_table: Path to QIIME2 artifact of type FeatureTable[Frequency] - rep_seqs: Path to QIIME2 artifact of type FeatureData[Sequence] """ # Check Artifact type try: feature_table_artifact = Artifact.load(feature_table) rep_seqs_artifact = Artifact.load(rep_seqs) if(str(feature_table_artifact.type) != "FeatureTable[Frequency]"): msg = "Input Feature Table is not of type 'FeatureTable[Frequency]'!" raise ValueError(msg) if(str(rep_seqs_artifact.type) != "FeatureData[Sequence]"): msg = "Input Representative Sequences is not of type 'FeatureData[Sequence]'!" raise ValueError(msg) except ValueError as err: message = str(err) return 400, message return 200, "Imported data good!" responseIfError(validate_taxonomic_classification_input, feature_table=feature_table_path, rep_seqs=rep_seqs_path) return feature_table_path, rep_seqs_path, classifier_path
def save_uploaded_file(_id, uploaded_file): if (isinstance(uploaded_file, FileStorage)): upload_path = utils.responseIfError(save_filestorage, _id=_id, _file=uploaded_file) else: upload_path = uploaded_file return upload_path
def pipeline_setup(_id): """ Set up directories (output, log, config) prior to running the pipeline. This is to be run in every type of request. Input: - _id: UUID4 in string representation Returns: - log_config_path: path to logging configuration file. """ utils.responseIfError(make_output_dir, _id=_id) # Make sub log dir in /log utils.responseIfError(make_log_dir, _id=_id) # Create luigi logging config file log_config_path = utils.responseIfError(config_generator.make_log_config, _id=_id) return log_config_path
def validate_pcoa_input(_id, pcoa_artifact_path, metadata_path, target_primary, target_secondary=None): metadata_uploaded_path = save_uploaded_file(_id, metadata_path) # Save QIIME2 PCoA artiffact pcoa_uploaded_path = save_uploaded_file(_id, pcoa_artifact_path) def validate_metadata(metadata_path, target_primary, target_secondary): # Load metadata via QIIME2 metadata API # It will verify metadata vadlity as well try: metadata_df = load_metadata(metadata_path) except MetadataFileError as err: message = str(err) return 400, message # Check user-specified columns actually exist in the metadata file try: check_column_exists(metadata_df, target_primary, target_secondary) except AXIOME3PipelineError as err: message = str(err) return 400, message return 200, "Ok" def validate_artifact(pcoa_artifact_path): try: check_artifact_type(pcoa_artifact_path, "pcoa") except AXIOME3PipelineError as err: message = str(err) return 400, message return 200, "OK" responseIfError(validate_metadata, metadata_path=metadata_uploaded_path, target_primary=target_primary, target_secondary=target_secondary) responseIfError(validate_artifact, pcoa_artifact_path=pcoa_uploaded_path) return pcoa_uploaded_path, metadata_uploaded_path
def luigi_config_generator(_id, log_config_path, **kwargs): """ Run all "Input Upload" related steps Input: - manifest_path: path to manifest file in the container - _id: UUID4 in string representation - log_config_path: path to logging configuration file. """ config_path = utils.responseIfError(config_generator.make_luigi_config, _id=_id, logging_config=log_config_path, **kwargs)
def analysis_setup(_id, feature_table, rep_seqs, taxonomy): denoise_dir = get_denoise_dir(_id) feature_table_destination = os.path.join(denoise_dir, "merged_table.qza") rep_seqs_destination = os.path.join(denoise_dir, "merged_rep_seqs.qza") taxonomy_dir = get_taxonomic_classification_dir(_id) taxonomy_destination = os.path.join(taxonomy_dir, "taxonomy.qza") # pre-make input dir and copy files # pipeline requires input files to be present in specific dirs responseIfError(make_dir, dirpath=denoise_dir) responseIfError(copy_file, source=feature_table, destination=feature_table_destination) responseIfError(copy_file, source=rep_seqs, destination=rep_seqs_destination) responseIfError(make_dir, dirpath=taxonomy_dir) responseIfError(copy_file, source=taxonomy, destination=taxonomy_destination)
def taxonomic_classification_setup(_id, feature_table, rep_seqs): denoise_dir = get_denoise_dir(_id) feature_table_destination = os.path.join(denoise_dir, "merged_table.qza") rep_seqs_destination = os.path.join(denoise_dir, "merged_rep_seqs.qza") responseIfError(make_dir, dirpath=denoise_dir) responseIfError(copy_file, source=feature_table, destination=feature_table_destination) responseIfError(copy_file, source=rep_seqs, destination=rep_seqs_destination)
def save_filestorage(_id, _file): """ Saves upload data on the server. Input: - _id: UUID4 in string representation. - _file: FileStorage Object Returns: status code and result """ # /input should already exist as a named volume. # only available in backend and celery services base_input_dir = "/input" input_dir = os.path.join(base_input_dir, _id) utils.responseIfError(make_dir, dirpath=input_dir) # File name to save as input_path = os.path.join(input_dir, _file.filename) _file.save(input_path) return 200, input_path
def denoise_precheck(_id, sequence_data): """ Do prechecks as to decrease the chance of job failing. Input: - sequence_data: sequence data in QIIME2 artifact format """ # Save uploaded file in the docker container upload_path = save_uploaded_file(_id, sequence_data) def validate_denoise_input(sequence_data): """ Precheck input files prior to running denoise step Input: - sequence_data: sequence data in QIIME2 artifact format """ # Check Artifact type try: q2_artifact = Artifact.load(sequence_data) if (str(q2_artifact.type) != "SampleData[PairedEndSequencesWithQuality]"): msg = "Input QIIME2 Artifact is not of type 'SampleData[PairedEndSequencesWithQuality]'!" raise ValueError(msg) except ValueError as err: message = str(err) return 400, message return 200, "Imported data good!" responseIfError(validate_denoise_input, sequence_data=upload_path) return upload_path
def input_upload_precheck(_id, uploaded_manifest, input_format, is_multiple="no"): """ Do pre-checks as to decrease the chance of job failing. Input: - id: UUID4 in string representation. - uploaded_manifest: Either filestorage object or file path Returns: - path to modified manifest file if valid input """ # Save uploaded manifest file in the docker container if (isinstance(uploaded_manifest, FileStorage)): manifest_path = responseIfError(save_filestorage, _id=_id, _file=uploaded_manifest) else: manifest_path = uploaded_manifest base_input_dir = "/input" input_dir = os.path.join(base_input_dir, _id) responseIfError(make_dir, dirpath=input_dir) new_manifest_path = responseIfError(reformat_manifest_with_run_id, _id=_id, _file=manifest_path, input_format=input_format, is_multiple=is_multiple) if (is_multiple.lower() == "no"): responseIfError(validate_manifest, manifest_path=new_manifest_path, input_format=input_format) return new_manifest_path
def triplot_setup(_id): # Make output directory responseIfError(make_output_dir, _id=_id)
def denoise_setup(denoise_input, _id): destination_dir = os.path.join('/output', _id) destination = os.path.join(destination_dir, "paired_end_demux.qza") responseIfError(make_dir, dirpath=destination_dir) responseIfError(copy_file, source=denoise_input, destination=destination)