Exemplo n.º 1
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        # load platform data
        platform_data=\
          [{"platform_igf_id" : "M03291" ,
            "model_name" : "MISEQ" ,
            "vendor_name" : "ILLUMINA" ,
            "software_name" : "RTA" ,
            "software_version" : "RTA1.18.54"
           },
           {"platform_igf_id" : "NB501820",
            "model_name" : "NEXTSEQ",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           },
           {"platform_igf_id" : "K00345",
            "model_name" : "HISEQ4000",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           }]

        flowcell_rule_data=\
          [{"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 SR",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"},
           {"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 PE",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"NB501820",
            "flowcell_type":"NEXTSEQ",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"M03291",
            "flowcell_type":"MISEQ",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"}]

        pl = PlatformAdaptor(**{'session_class': base.session_class})
        pl.start_session()
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        pl.close_session()

        # load project data

        project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}]
        pa = ProjectAdaptor(**{'session_class': base.session_class})
        pa.start_session()
        pa.store_project_and_attribute_data(data=project_data)
        pa.close_session()

        # load samples

        sample_data = [
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109792',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109793',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109794',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109795',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109796',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797_1',
                'expected_read': 40000000
            },
        ]

        sa = SampleAdaptor(**{'session_class': base.session_class})
        sa.start_session()
        sa.store_sample_and_attribute_data(data=sample_data)
        sa.close_session()

        # load seqrun data

        seqrun_data = [{
            'flowcell_id': 'HV2GJBBXX',
            'platform_igf_id': 'K00345',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX'
        }]

        sra = SeqrunAdaptor(**{'session_class': base.session_class})
        sra.start_session()
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        sra.close_session()

        # load experiment data

        experiment_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'library_name': 'IGF109792',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109792',
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'library_name': 'IGF109793',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109793',
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'library_name': 'IGF109794',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109794',
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'library_name': 'IGF109795',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109795',
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'library_name': 'IGF109796',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109796',
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'library_name': 'IGF109797',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109797',
           },
          ]

        ea = ExperimentAdaptor(**{'session_class': base.session_class})
        ea.start_session()
        ea.store_project_and_attribute_data(data=experiment_data)
        ea.close_session()

        # load run data

        run_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':288046541
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':14666330
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':5009143
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1391747
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': '	IGF109796_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1318008
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1216324
           },
          ]

        ra = RunAdaptor(**{'session_class': base.session_class})
        ra.start_session()
        ra.store_run_and_attribute_data(data=run_data)
        ra.close_session()
Exemplo n.º 2
0
  def run(self):
    try:
      project_igf_id = self.param_required('project_igf_id')
      experiment_igf_id=self.param_required('experiment_igf_id')
      sample_igf_id = self.param_required('sample_igf_id')
      input_files = self.param_required('input_files')
      igf_session_class = self.param_required('igf_session_class')
      template_report_file = self.param_required('template_report_file')
      rscript_path = self.param_required('rscript_path')
      batch_effect_rscript_path = self.param_required('batch_effect_rscript_path')
      base_result_dir = self.param_required('base_result_dir')
      strand_info = self.param('strand_info')
      read_threshold = self.param('read_threshold')
      collection_type = self.param('collection_type')
      collection_table = self.param('collection_table')
      analysis_name = self.param('analysis_name')
      tag_name = self.param('tag_name')
      use_ephemeral_space = self.param('use_ephemeral_space')

      output_file_list = None
      if len(input_files)==0:
        raise ValueError('No input files found for bactch effect checking')
      elif len(input_files) < 3:
        output_file_list = ''                                                   # can't run batch effect checking on less than 3 lanes
      else:
        for file in input_files:
          check_file_path(file)                                                 # check input filepath

        file_data = list()
        ra = RunAdaptor(**{'session_class':igf_session_class})
        ra.start_session()
        for file in input_files:
          run_igf_id = os.path.basename(file).\
                       replace('ReadsPerGene.out.tab','')                       # using simple string match to fetch run igf ids
          flowcell_id, lane_id = \
            ra.fetch_flowcell_and_lane_for_run(run_igf_id=run_igf_id)           # fetch flowcell id and lane info
          file_data.append({'file':file,
                            'flowcell':flowcell_id,
                            'lane':lane_id
                          })
        ra.close_session()
        temp_dir = \
          get_temp_dir(use_ephemeral_space=use_ephemeral_space)
        temp_json_file = \
          os.path.join(temp_dir,'star_gene_counts.json')                        # temp json file path
        temp_output_file = \
          os.path.join(\
            temp_dir,
            os.path.basename(template_report_file))                             # temp report file path
        with open(temp_json_file,'w') as jp:
          json.dump(file_data,jp,indent=2)                                      # dumping json output

        br = Batch_effect_report(\
               input_json_file=temp_json_file,
               template_file=template_report_file,
               rscript_path=rscript_path,
               batch_effect_rscript_path=batch_effect_rscript_path,
               strand_info=strand_info,
               read_threshold=read_threshold
             )                                                                  # set up batch effect run
        br.check_lane_effect_and_log_report(\
             project_name=project_igf_id,
             sample_name=sample_igf_id,
              output_file=temp_output_file
            )                                                                   # generate report file
        au = Analysis_collection_utils(\
               dbsession_class=igf_session_class,
               analysis_name=analysis_name,
               base_path=base_result_dir,
               tag_name=tag_name,
               collection_name=experiment_igf_id,
               collection_type=collection_type,
               collection_table=collection_table
             )                                                                  # prepare to load file
        output_file_list = \
          au.load_file_to_disk_and_db(\
               input_file_list=[temp_output_file])                              # load file to db and disk

      self.param('dataflow_params',
                 {'batch_effect_reports':output_file_list})                     # populating data flow only if report is present
    except Exception as e:
      message = \
        'project: {2}, sample:{3}, Error in {0}: {1}'.\
        format(\
          self.__class__.__name__,
          e,
          project_igf_id,
          sample_igf_id)
      self.warning(message)
      self.post_message_to_slack(message,reaction='fail')                       # post msg to slack for failed jobs
      raise