def create_project(manager: StatsAPI, project_name: str) -> stats_models.Project: project: stats_models.Project = manager.Project() project.projectname = project_name project.time = sqlalchemy.func.now() manager.add(project) manager.flush() LOG.info("Creating new project object %s", project) return project
def create_novaseq_flowcell(manager: StatsAPI, demux_results: DemuxResults): """Add a novaseq flowcell to CG stats""" LOG.info("Adding flowcell information to cgstats") support_parameters_id: Optional[int] = find.get_support_parameters_id( demux_results=demux_results) if not support_parameters_id: support_parameters: stats_models.Supportparams = create_support_parameters( manager=manager, demux_results=demux_results) support_parameters_id: int = support_parameters.supportparams_id else: LOG.info("Support parameters already exists") datasource_id: Optional[int] = find.get_datasource_id( demux_results=demux_results) if not datasource_id: datasource_object: stats_models.Datasource = create_datasource( manager=manager, demux_results=demux_results, support_parameters_id=support_parameters_id, ) datasource_id: int = datasource_object.datasource_id else: LOG.info("Data source already exists") flowcell_id: Optional[int] = find.get_flowcell_id( flowcell_name=demux_results.flowcell.flowcell_id) if not flowcell_id: flowcell: stats_models.Flowcell = create_flowcell( manager=manager, demux_results=demux_results) flowcell_id: int = flowcell.flowcell_id else: LOG.info("Flowcell already exists") demux_id: Optional[int] = find.get_demux_id(flowcell_object_id=flowcell_id) if not demux_id: demux_object: stats_models.Demux = create_demux( manager=manager, demux_results=demux_results, flowcell_id=flowcell_id, datasource_id=datasource_id, ) demux_id: int = demux_object.demux_id else: LOG.info("Demux object already exists") project_name_to_id = create_projects(manager=manager, project_names=demux_results.projects) create_samples( manager=manager, demux_results=demux_results, project_name_to_id=project_name_to_id, demux_id=demux_id, ) manager.commit()
def delete_flowcell(manager: StatsAPI, flowcell_name: str): flowcell_id: Optional[int] = get_flowcell_id(flowcell_name=flowcell_name) if flowcell_id: flowcell: List[models.Flowcell] = manager.Flowcell.query.filter_by( flowcell_id=flowcell_id).all() for entry in flowcell: log.info("Removing entry %s in from cgstats", entry.flowcellname) manager.delete(flowcell) manager.commit()
def create_flowcell(manager: StatsAPI, demux_results: DemuxResults) -> stats_models.Flowcell: flowcell = manager.Flowcell() flowcell.flowcellname = demux_results.flowcell.flowcell_id flowcell.flowcell_pos = demux_results.flowcell.flowcell_position flowcell.hiseqtype = "novaseq" flowcell.time = sqlalchemy.func.now() manager.add(flowcell) manager.flush() LOG.info("Creating new flowcell object %s", flowcell) return flowcell
def create_sample(manager: StatsAPI, sample_id: str, barcode: str, project_id: int) -> stats_models.Sample: sample: stats_models.Sample = manager.Sample() sample.project_id = project_id sample.samplename = sample_id sample.limsid = sample_id.split("_")[0] sample.barcode = barcode sample.time = sqlalchemy.func.now() manager.add(sample) manager.flush() return sample
def cg_stats_api(self) -> StatsAPI: api = self.__dict__.get("cg_stats_api_") if api is None: LOG.debug("Instantiating cg_stats api") api = StatsAPI(config=self.dict()) self.cg_stats_api_ = api return api
def create_datasource(manager: StatsAPI, demux_results: DemuxResults, support_parameters_id: int) -> stats_models.Datasource: datasource = manager.Datasource() datasource.runname = demux_results.run_name datasource.rundate = demux_results.run_date datasource.machine = demux_results.machine_name datasource.server = demux_results.demux_host datasource.document_path = str(demux_results.conversion_stats_path) datasource.document_type = demux_results.conversion_stats_path.suffix.strip( PERIOD) datasource.time = sqlalchemy.func.now() datasource.supportparams_id = support_parameters_id manager.add(datasource) manager.flush() LOG.info("Creating new datasource object %s", datasource) return datasource
def create_support_parameters( manager: StatsAPI, demux_results: DemuxResults) -> stats_models.Supportparams: logfile_parameters: LogfileParameters = demux_results.get_logfile_parameters( ) support_parameters = manager.Supportparams() support_parameters.document_path = str( demux_results.results_dir) # This is the unaligned directory support_parameters.idstring = logfile_parameters.id_string support_parameters.program = logfile_parameters.program support_parameters.commandline = logfile_parameters.command_line support_parameters.sampleconfig_path = str(demux_results.sample_sheet_path) support_parameters.sampleconfig = demux_results.sample_sheet_path.read_text( ) support_parameters.time = logfile_parameters.time manager.add(support_parameters) manager.flush() LOG.info("Creating new support parameters object %s", support_parameters) return support_parameters
def create_dragen_unaligned(manager: StatsAPI, demux_sample: DragenDemuxSample, sample_id: int, demux_id: int) -> stats_models.Unaligned: """Create an unaligned object in cgstats for a sample demultiplexed with Dragen""" unaligned: stats_models.Unaligned = manager.Unaligned() unaligned.sample_id: int = sample_id unaligned.demux_id: int = demux_id unaligned.lane: int = demux_sample.lane unaligned.passed_filter_pct: float = DRAGEN_PASSED_FILTER_PCT unaligned.readcounts: int = _calculate_read_counts(demux_sample) unaligned.perfect_indexreads_pct: float = _calculate_perfect_indexreads_pct( demux_sample) unaligned.q30_bases_pct: float = _calculate_q30_bases_pct(demux_sample) unaligned.yield_mb: float = _calculate_yield(demux_sample) unaligned.mean_quality_score: float = demux_sample.mean_quality_score unaligned.time: sqlalchemy.sql.func.now = sqlalchemy.func.now() manager.add(unaligned) manager.flush() return unaligned
def create_unaligned(manager: StatsAPI, demux_sample: DemuxSample, sample_id: int, demux_id: int) -> stats_models.Unaligned: unaligned: stats_models.Unaligned = manager.Unaligned() unaligned.sample_id = sample_id unaligned.demux_id = demux_id unaligned.lane = demux_sample.lane unaligned.yield_mb = round( int(demux_sample.pass_filter_yield) / 1000000, 2) unaligned.passed_filter_pct = demux_sample.pass_filter_yield_pc unaligned.readcounts = demux_sample.pass_filter_clusters * 2 unaligned.raw_clusters_per_lane_pct = demux_sample.raw_clusters_pc unaligned.perfect_indexreads_pct = (round( demux_sample.perfect_barcodes / demux_sample.barcodes * 100, 5) if demux_sample.barcodes else 0) unaligned.q30_bases_pct = demux_sample.pass_filter_Q30 unaligned.mean_quality_score = demux_sample.pass_filter_qscore unaligned.time = sqlalchemy.func.now() manager.add(unaligned) manager.flush() return unaligned
def create_demux( manager: StatsAPI, datasource_id: int, demux_results: DemuxResults, flowcell_id: int, ) -> stats_models.Demux: demux: stats_models.Demux = manager.Demux() demux.flowcell_id = flowcell_id demux.datasource_id = datasource_id if demux_results.bcl_converter == "dragen": demux.basemask = "{read_length},{index_length},{index_length},{read_length}".format( index_length=demux_results.run_info.index_length, read_length=demux_results.run_info.read_length, ) else: demux.basemask = "" demux.time = sqlalchemy.func.now() manager.add(demux) manager.flush() LOG.info("Creating new demux object %s", demux) return demux
def fixture_stats_api(project_dir: Path) -> StatsAPI: """Setup base CGStats store.""" _store = StatsAPI( {"cgstats": { "database": "sqlite://", "root": "tests/fixtures/DEMUX" }}) _store.create_all() yield _store _store.drop_all()
def test_flow_cell_reads_and_q30_summary(nipt_stats_api: StatsAPI, flowcell_name: str, sample_id: str): # GIVEN a flow cell with only one sample on it with 90% Q30 and 1200000000 yield sample_obj: stats_model.Sample = nipt_stats_api.Sample.query.filter( stats_model.Sample.limsid == sample_id).first() unaligned_obj: stats_model.Unaligned = ( nipt_stats_api.Unaligned.query.join( stats_model.Flowcell.demux, stats_model.Demux.unaligned).filter( stats_model.Unaligned.sample_id == sample_obj.sample_id).first()) # WHEN retrieving reads and q30 summary from flow cell on which the sample ran flow_cell_reads_and_q30_summary: Dict[str, Union[ int, float]] = nipt_stats_api.flow_cell_reads_and_q30_summary( flow_cell_name=flowcell_name) # THEN the number of reads on the flow cell assert flow_cell_reads_and_q30_summary["reads"] == unaligned_obj.readcounts # AND the percent of clusters passed q30 is equel to the one sample assert flow_cell_reads_and_q30_summary["q30"] == float( unaligned_obj.q30_bases_pct)
def base_store_stats(store_stats: StatsAPI, data: dict) -> StatsAPI: """Setup CGStats store with sample data.""" demuxes = {} for sample_data in data["samples"]: project: stats_models.Project = store_stats.Project( projectname="test", time=dt.datetime.now()) sample: stats_models.Sample = store_stats.Sample( samplename=sample_data["name"], barcode=sample_data["index"], limsid=sample_data["name"], ) sample.project = project unaligned: stats_models.Unaligned = store_stats.Unaligned( readcounts=300000000, q30_bases_pct=85) unaligned.sample = sample if sample_data["flowcell"] in demuxes: demux = demuxes[sample_data["flowcell"]] else: flowcell: stats_models.Flowcell = store_stats.Flowcell( flowcellname=sample_data["flowcell"], flowcell_pos="A", hiseqtype=sample_data["type"], time=dt.datetime.now(), ) supportparams: stats_models.Supportparams = store_stats.Supportparams( document_path="NA", idstring="NA") datasource: stats_models.Datasource = store_stats.Datasource( document_path="NA", document_type="html") datasource.supportparams = supportparams demux = store_stats.Demux() demux.flowcell = flowcell demux.datasource = datasource demuxes[sample_data["flowcell"]] = demux unaligned.demux = demux store_stats.add(unaligned) store_stats.commit() yield store_stats