示例#1
0
    def generate_unique_name(cls, tantalus_api, jira, version, args,
                             input_datasets, input_results):
        # Get hash of lane data based on bams from the same ticket
        bam_datasets = tantalus_api.list(
            'sequence_dataset',
            analysis__jira_ticket=jira,
            library__library_id=args['library_id'],
            dataset_type='BAM',
            aligner__name__startswith=args['aligner'],
            reference_genome__name=args['ref_genome'],
        )

        # TODO: check aligner and reference genome against bam dataset

        lanes_hashed = get_datasets_lanes_hash(tantalus_api,
                                               [d['id'] for d in bam_datasets])

        # TODO: control aligner vocabulary elsewhere
        assert args['aligner'] in ('BWA_ALN', 'BWA_MEM')

        name = templates.SC_QC_ANALYSIS_NAME_TEMPLATE.format(
            analysis_type=cls.analysis_type_,
            aligner=args['aligner'],
            ref_genome=args['ref_genome'],
            library_id=args['library_id'],
            lanes_hashed=lanes_hashed,
        )

        return name
示例#2
0
    def generate_unique_name(self, jira, version, args, input_datasets,
                             input_results):
        lanes_hashed = get_datasets_lanes_hash(tantalus_api, input_datasets)

        name = templates.TENX_ANALYSIS_NAME_TEMPLATE.format(
            analysis_type="tenx",
            ref_genome=args['ref_genome'],
            library_id=args['library_id'],
            lanes_hashed=lanes_hashed,
        )

        return name
示例#3
0
    def generate_unique_name(cls, tantalus_api, jira, version, args, input_datasets, input_results):
        lanes_hashed = get_datasets_lanes_hash(tantalus_api, input_datasets)

        name = templates.SC_QC_ANALYSIS_NAME_TEMPLATE.format(
            analysis_type=cls.analysis_type_,
            aligner=args['aligner'],
            ref_genome=args['ref_genome'],
            library_id=args['library_id'],
            lanes_hashed=lanes_hashed,
        )

        return name
示例#4
0
    def generate_unique_name(cls, tantalus_api, jira, version, args,
                             input_datasets, input_results):
        lanes_hashed = get_datasets_lanes_hash(tantalus_api, input_datasets)

        # TODO: control aligner vocabulary elsewhere
        assert args['aligner'] in ('BWA_ALN', 'BWA_MEM')

        name = templates.SC_QC_ANALYSIS_NAME_TEMPLATE.format(
            analysis_type=cls.analysis_type_,
            aligner=args['aligner'],
            ref_genome=args['ref_genome'],
            library_id=args['library_id'],
            lanes_hashed=lanes_hashed,
        )

        return name
示例#5
0
    def create_output_datasets(self, tag_name=None, update=False):

        library_id = self.args["library_id"]
        ref_genome = self.args["ref_genome"]

        dna_library = tantalus_api.get("dna_library", library_id=library_id)

        tenx_library = colossus_api.get("tenxlibrary", name=library_id)

        sample_id = tenx_library["sample"]["sample_id"]
        sample = tantalus_api.get("sample", sample_id=sample_id)

        storage_name = "scrna_bams"
        storage_client = tantalus_api.get_storage(storage_name)

        sequence_lanes = self.get_lane_ids()

        lanes_hashed = get_datasets_lanes_hash(tantalus_api,
                                               self.analysis["input_datasets"])

        bam_filepath = os.path.join(storage_client["prefix"], library_id,
                                    "bams.tar.gz")
        file_resource, file_instance = tantalus_api.add_file(storage_name,
                                                             bam_filepath,
                                                             update=True)

        name = "BAM-{}-SC_RNASEQ-lanes_{}-{}".format(library_id, lanes_hashed,
                                                     ref_genome)

        sequence_dataset = tantalus_api.get_or_create(
            "sequence_dataset",
            name=name,
            dataset_type="BAM",
            sample=sample["id"],
            library=dna_library["id"],
            sequence_lanes=sequence_lanes,
            file_resources=[file_resource["id"]],
            reference_genome=self.args["ref_genome"],
            aligner=None,
            analysis=self.analysis['id'],
        )

        log.info("Created sequence dataset {}".format(name))