def process_helper(self, filePath, path_to_GENIE, mafSynId, centerMafSynId, vcf2mafPath, veppath, vepdata, reference=None): logger.info('MAF2MAF %s' % filePath) fileName = "data_mutations_extended_%s_MAF.txt" % self.center newMafPath = os.path.join(path_to_GENIE,self.center,"staging",fileName) narrowMafPath = os.path.join(path_to_GENIE,self.center,"staging","data_mutations_extended_%s_MAF_narrow.txt" % self.center) narrowMafColumns = [col['name'] for col in self.syn.getTableColumns(mafSynId) if col['name'] != 'inBED'] #Strips out windows indentations \r command = ['dos2unix',filePath] subprocess.check_call(command) tempdir = os.path.join(path_to_GENIE, self.center) commandCall = ["perl",os.path.join(vcf2mafPath,"maf2maf.pl"), "--input-maf",filePath, "--output-maf",newMafPath, "--vep-fork", '8', "--tmp-dir",tempdir, '--vep-path', veppath, '--vep-data', vepdata, #'--ref-fasta','/root/.vep/homo_sapiens/86_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa', "--custom-enst", os.path.join(vcf2mafPath,"data/isoform_overrides_uniprot")] if reference is not None: commandCall.extend(["--ref-fasta",reference]) maf = subprocess.check_call(commandCall) process_functions.rmFiles(tempdir, recursive=False) open(narrowMafPath,"w").close() if os.path.exists(newMafPath): #This needs to switch to streaming at some point mafDf = pd.read_csv(newMafPath,sep="\t",comment="#") mafDf = self.formatMAF(mafDf) self.createFinalMaf(mafDf, newMafPath, maf=True) narrowMafDf = mafDf[narrowMafColumns] self.createFinalMaf(narrowMafDf, narrowMafPath, maf=True) #These functions have to be next to each other, because no modifications can happen #Store Narrow MAF into db if self._fileType == "maf": self.storeProcessedMaf(narrowMafPath, mafSynId, centerMafSynId, isNarrow=True) #Store MAF flat file into synapse self.storeProcessedMaf(newMafPath, mafSynId, centerMafSynId) else: logger.error('ERROR PROCESSING %s' % filePath) filePath = "NOTPROCESSED" return(filePath)
def input_to_database(syn, center, process, testing, only_validate, vcf2maf_path, vep_path, vep_data, database_to_synid_mappingdf, center_mapping_df, reference=None, delete_old=False, oncotree_link=None, thread=1): if only_validate: log_path = os.path.join(process_functions.SCRIPT_DIR, "%s_validation_log.txt" % center) else: log_path = os.path.join(process_functions.SCRIPT_DIR, "%s_%s_log.txt" % (center, process)) logFormatter = logging.Formatter( "%(asctime)s [%(name)s][%(levelname)s] %(message)s") fileHandler = logging.FileHandler(log_path, mode='w') fileHandler.setFormatter(logFormatter) logger.addHandler(fileHandler) if testing: logger.info("###########################################") logger.info("############NOW IN TESTING MODE############") logger.info("###########################################") # ---------------------------------------- # Start input to staging process # ---------------------------------------- #path_to_genie = os.path.realpath(os.path.join(process_functions.SCRIPT_DIR,"../")) #Make the synapsecache dir the genie input folder for now #The main reason for this is because the .synaspecache dir is mounted by batch path_to_genie = os.path.expanduser("~/.synapseCache") #Create input and staging folders if not os.path.exists(os.path.join(path_to_genie, center, "input")): os.makedirs(os.path.join(path_to_genie, center, "input")) if not os.path.exists(os.path.join(path_to_genie, center, "staging")): os.makedirs(os.path.join(path_to_genie, center, "staging")) if delete_old: process_functions.rmFiles(os.path.join(path_to_genie, center)) validFiles = validation(syn, center, process, center_mapping_df, database_to_synid_mappingdf, thread, testing, oncotree_link) if len(validFiles) > 0 and not only_validate: #Reorganize so BED file are always validated and processed first validBED = [ os.path.basename(i).endswith('.bed') for i in validFiles['path'] ] beds = validFiles[validBED] validFiles = beds.append(validFiles) validFiles.drop_duplicates(inplace=True) #Valid maf, mafsp, vcf and cbs files validMAF = [ i for i in validFiles['path'] if os.path.basename(i) == "data_mutations_extended_%s.txt" % center ] validMAFSP = [ i for i in validFiles['path'] if os.path.basename(i) == "nonGENIE_data_mutations_extended_%s.txt" % center ] validVCF = [ i for i in validFiles['path'] if os.path.basename(i).endswith('.vcf') ] #validCBS = [i for i in validFiles['path'] if os.path.basename(i).endswith('.cbs')] if process == 'mafSP': validMAFs = validMAFSP else: validMAFs = validMAF processTrackerSynId = process_functions.getDatabaseSynId( syn, "processTracker", databaseToSynIdMappingDf=database_to_synid_mappingdf) #Add process tracker for time start processTracker = syn.tableQuery( "SELECT timeStartProcessing FROM %s where center = '%s' and processingType = '%s'" % (processTrackerSynId, center, process)) processTrackerDf = processTracker.asDataFrame() if len(processTrackerDf) == 0: new_rows = [[ center, str(int(time.time() * 1000)), str(int(time.time() * 1000)), process ]] table = syn.store( synapseclient.Table(processTrackerSynId, new_rows)) else: processTrackerDf['timeStartProcessing'][0] = str( int(time.time() * 1000)) syn.store( synapseclient.Table(processTrackerSynId, processTrackerDf)) processFiles(syn, validFiles, center, path_to_genie, thread, center_mapping_df, oncotree_link, database_to_synid_mappingdf, validVCF=validVCF, validMAFs=validMAFs, vcf2mafPath=vcf2maf_path, veppath=vep_path, vepdata=vep_data, test=testing, processing=process, reference=reference) #Should add in this process end tracking before the deletion of samples processTracker = syn.tableQuery( "SELECT timeEndProcessing FROM %s where center = '%s' and processingType = '%s'" % (processTrackerSynId, center, process)) processTrackerDf = processTracker.asDataFrame() processTrackerDf['timeEndProcessing'][0] = str(int(time.time() * 1000)) syn.store(synapseclient.Table(processTrackerSynId, processTrackerDf)) logger.info("SAMPLE/PATIENT RETRACTION") toRetract.retract(syn, testing) else: messageOut = "%s does not have any valid files" if not only_validate else "ONLY VALIDATION OCCURED FOR %s" logger.info(messageOut % center) #Store log file syn.store(synapseclient.File(log_path, parentId="syn10155804")) os.remove(log_path) logger.info("ALL PROCESSES COMPLETE")
def main(args): cbioValidatorPath = os.path.join( args.cbioportalPath, "core/src/main/scripts/importer/validateData.py" ) assert os.path.exists(cbioValidatorPath), "Please specify correct cbioportalPath" assert not ( args.test and args.staging ), "You can only specify --test or --staging, not both" try: processingDate = datetime.datetime.strptime(args.processingDate, "%b-%Y") except ValueError: raise ValueError( "Process date must be in the format " "abbreviated_month-YEAR ie. Oct-2017" ) syn = process_functions.synLogin(args.pemFile, debug=args.debug) genie_user = os.environ.get("GENIE_USER") if args.pemFile is not None: genie_pass = process_functions.get_password(args.pemFile) else: genie_pass = None # Get all the possible public releases # Get configuration if args.test: databaseSynIdMappingId = "syn11600968" args.genieVersion = "TESTpublic" elif args.staging: databaseSynIdMappingId = "syn12094210" else: databaseSynIdMappingId = "syn10967259" databaseSynIdMapping = syn.tableQuery("select * from %s" % databaseSynIdMappingId) databaseSynIdMappingDf = databaseSynIdMapping.asDataFrame() public_synid = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "public" ].values[0] releaseSynId = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "release" ].values[0] officialPublic = consortium_to_public.get_public_to_consortium_synid_mapping( syn, releaseSynId, test=args.test ) assert ( args.genieVersion in officialPublic.keys() ), "genieVersion must be one of these: {}.".format(", ".join(officialPublic.keys())) args.releaseId = officialPublic[args.genieVersion] if not args.test and not args.staging: processTrackerSynId = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "processTracker" ].values[0] processTracker = syn.tableQuery( "SELECT timeStartProcessing FROM %s where center = 'SAGE' " "and processingType = 'public'" % processTrackerSynId ) processTrackerDf = processTracker.asDataFrame() processTrackerDf["timeStartProcessing"].iloc[0] = str(int(time.time() * 1000)) syn.store(synapseclient.Table(processTrackerSynId, processTrackerDf)) caseListEntities, genePanelEntities = consortium_to_public.consortiumToPublic( syn, processingDate, args.genieVersion, args.releaseId, databaseSynIdMappingDf, publicReleaseCutOff=args.publicReleaseCutOff, ) database_to_staging.revise_metadata_files( syn, args.staging, public_synid, args.genieVersion ) logger.info("CBIO VALIDATION") # Must be exit 0 because the validator sometimes fails, # but we still want to capture the output command = [ cbioValidatorPath, "-s", database_to_staging.GENIE_RELEASE_DIR, "-n", "; exit 0", ] cbio_output = subprocess.check_output(" ".join(command), shell=True) cbio_decoded_output = cbio_output.decode("utf-8") logger.info(cbio_decoded_output) if not args.test and not args.staging: log_folder_synid = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "logs" ].values[0] # Use tempfiles cbio_log_file = "cbioValidatorLogsPublic_{}.txt".format(args.genieVersion) with open(cbio_log_file, "w") as cbioLog: cbioLog.write(cbio_decoded_output) syn.store(synapseclient.File(cbio_log_file, parentId=log_folder_synid)) os.remove(cbio_log_file) logger.info("REMOVING OLD FILES") process_functions.rmFiles(database_to_staging.CASE_LIST_PATH) seg_meta_file = "{}/genie_public_meta_cna_hg19_seg.txt".format( database_to_staging.GENIE_RELEASE_DIR ) if os.path.exists(seg_meta_file): os.unlink(seg_meta_file) logger.info("CREATING LINK VERSION") folders = database_to_staging.create_link_version( syn, args.genieVersion, caseListEntities, genePanelEntities, databaseSynIdMappingDf, release_type="public", ) # Don't update process tracker is testing or staging if not args.test and not args.staging: processTracker = syn.tableQuery( "SELECT timeEndProcessing FROM %s where center = 'SAGE' and " "processingType = 'public'" % processTrackerSynId ) processTrackerDf = processTracker.asDataFrame() processTrackerDf["timeEndProcessing"].iloc[0] = str(int(time.time() * 1000)) syn.store(synapseclient.Table(processTrackerSynId, processTrackerDf)) if not args.test: logger.info("DASHBOARD UPDATE") dashboard_table_updater.run_dashboard( syn, databaseSynIdMappingDf, args.genieVersion, staging=args.staging ) generate_dashboard_html( args.genieVersion, staging=args.staging, genie_user=genie_user, genie_pass=genie_pass, ) logger.info("DASHBOARD UPDATE COMPLETE") logger.info("AUTO GENERATE DATA GUIDE") onco_link = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "oncotreeLink" ].values[0] onco_link_ent = syn.get(onco_link) oncotree_link = onco_link_ent.externalURL oncotree_version = oncotree_link.split("=")[1] data_guide_pdf = generate_data_guide( args.genieVersion, oncotree_version=oncotree_version, database_mapping=databaseSynIdMappingId, genie_user=genie_user, genie_pass=genie_pass, ) data_guide_ent = synapseclient.File( data_guide_pdf, parent=folders["release_folder"] ) syn.store(data_guide_ent) logger.info("COMPLETED CONSORTIUM TO PUBLIC")
def main(genie_version, processing_date, cbioportal_path, oncotree_link=None, consortium_release_cutoff=184, pemfile=None, test=False, staging=False, debug=False, skip_mutationsincis=False): ''' - Does parameter checks - Updates process tracking start - initiates database to staging - create case lists - revise meta files - run cBioPortal validation - create link versions - update process tracking end - Create dashboard tables and plots Args: genie_version: GENIE version, processing_date: processing date cbioportal_path: Path to cbioportal validator oncotree_link: Link to oncotree codes consortium_release_cutoff: release cut off value in days pemfile: Path to private key file test: Test flag, uses test databases staging: Staging flag, uses staging databases debug: Synapse debug flag skip_mutationsincis: Skip mutation in cis filter ''' syn = process_functions.synLogin(pemfile, debug=debug) genie_user = os.environ.get('GENIE_USER') if pemfile is not None: genie_pass = process_functions.get_password(pemfile) else: genie_pass = None if test: databaseSynIdMappingId = 'syn11600968' genie_version = "TESTING" elif staging: skip_mutationsincis = True databaseSynIdMappingId = 'syn12094210' else: databaseSynIdMappingId = 'syn10967259' # Database/folder syn id mapping databaseSynIdMapping = syn.tableQuery( 'select * from {}'.format(databaseSynIdMappingId)) databaseSynIdMappingDf = databaseSynIdMapping.asDataFrame() # databaseSynIdMappingDf.index = databaseSynIdMappingDf.Database # del databaseSynIdMappingDf['Database'] # databaseSynIdMappingDf.to_dict() if oncotree_link is None: oncoLink = databaseSynIdMappingDf['Id'][ databaseSynIdMappingDf['Database'] == 'oncotreeLink'].values[0] oncoLinkEnt = syn.get(oncoLink) oncotree_link = oncoLinkEnt.externalURL # Check if you can connect to oncotree link, # if not then don't run validation / processing process_functions.checkUrl(oncotree_link) cbioValidatorPath = os.path.join( cbioportal_path, "core/src/main/scripts/importer/validateData.py") assert os.path.exists(cbioValidatorPath),\ "Please specify correct cbioportalPath" syn.table_query_timeout = 50000 consortiumSynId = databaseSynIdMappingDf['Id'][ databaseSynIdMappingDf['Database'] == 'consortium'].values[0] processTrackerSynId = databaseSynIdMappingDf['Id'][ databaseSynIdMappingDf['Database'] == 'processTracker'].values[0] # get syn id of case list folder in consortium release # caseListSynId = findCaseListId(syn, consortiumSynId) caseListSynId, _ = database_to_staging.search_and_create_folder( syn, consortiumSynId, "case_lists") if not staging: database_to_staging.update_process_trackingdf(syn, processTrackerSynId, 'SAGE', 'dbToStage', start=True) centerMappingSynId = databaseSynIdMappingDf['Id'][ databaseSynIdMappingDf['Database'] == 'centerMapping'].values[0] # Only release files where release is true center_mapping = syn.tableQuery( 'SELECT * FROM {} where release is true'.format(centerMappingSynId)) center_mappingdf = center_mapping.asDataFrame() processingDate = datetime.datetime.strptime(processing_date, '%b-%Y') logger.info("STAGING TO CONSORTIUM") genePanelEntities = database_to_staging.stagingToCbio( syn, processingDate, genie_version, center_mappingdf, databaseSynIdMappingDf, oncotree_url=oncotree_link, consortiumReleaseCutOff=consortium_release_cutoff, current_release_staging=staging, skipMutationsInCis=skip_mutationsincis, test=test, genie_user=genie_user, genie_pass=genie_pass) # Create case lists files logger.info("CREATE CASE LIST FILES") # Remove old caselists first if not os.path.exists(database_to_staging.CASE_LIST_PATH): os.mkdir(database_to_staging.CASE_LIST_PATH) caselists = os.listdir(database_to_staging.CASE_LIST_PATH) for caselist in caselists: os.remove(os.path.join(database_to_staging.CASE_LIST_PATH, caselist)) clinical_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, 'data_clinical_{}.txt'.format(genie_version)) gene_matrix_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "data_gene_matrix_{}.txt".format(genie_version)) create_case_lists.main( clinical_path, gene_matrix_path, database_to_staging.CASE_LIST_PATH, "genie_private") caseListFiles = os.listdir(database_to_staging.CASE_LIST_PATH) caseListEntities = [] for casePath in caseListFiles: casePath = os.path.join(database_to_staging.CASE_LIST_PATH, casePath) caseListEntities.append(database_to_staging.store_file( syn, casePath, parent=caseListSynId, genieVersion=genie_version)) logger.info("REMOVING UNNECESSARY FILES") genie_files = os.listdir(database_to_staging.GENIE_RELEASE_DIR) for genie_file in genie_files: if genie_version not in genie_file and \ "meta" not in genie_file and "case_lists" not in genie_file: os.remove(os.path.join(database_to_staging.GENIE_RELEASE_DIR, genie_file)) os.remove(clinical_path) logger.info("REVISE METADATA FILES") database_to_staging.revise_metadata_files(syn, staging, consortiumSynId, genie_version) logger.info("CBIO VALIDATION") ''' Must be exit 0 because the validator sometimes fails, but we still want to capture the output ''' command = [cbioValidatorPath, '-s', database_to_staging.GENIE_RELEASE_DIR, '-n', '; exit 0'] cbioOutput = subprocess.check_output(" ".join(command), shell=True) logger.info(cbioOutput.decode("utf-8")) cbio_validator_log = \ "cbioValidatorLogsConsortium_{}.txt".format(genie_version) if not test and not staging: log_folder_synid = databaseSynIdMappingDf['Id'][ databaseSynIdMappingDf['Database'] == 'logs'].values[0] with open(cbio_validator_log, "w") as cbioLog: cbioLog.write(cbioOutput.decode("utf-8")) syn.store(synapseclient.File( cbio_validator_log, parentId=log_folder_synid)) os.remove(cbio_validator_log) logger.info("REMOVING OLD FILES") process_functions.rmFiles(database_to_staging.CASE_LIST_PATH) private_cna_meta_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR, "genie_private_meta_cna_hg19_seg.txt") if os.path.exists(private_cna_meta_path): os.unlink(private_cna_meta_path) logger.info("CREATING LINK VERSION") database_to_staging.create_link_version(syn, genie_version, caseListEntities, genePanelEntities, databaseSynIdMappingDf) if not staging: database_to_staging.update_process_trackingdf( syn, processTrackerSynId, 'SAGE', 'dbToStage', start=False) logger.info("COMPLETED DATABASE TO STAGING") if not test: logger.info("DASHBOARD UPDATE") dashboard_table_updater.run_dashboard( syn, databaseSynIdMappingDf, genie_version, staging=staging) dashboard_markdown_html_commands = [ 'Rscript', os.path.join(os.path.dirname(os.path.abspath(__file__)), 'dashboard_markdown_generator.R'), genie_version] if genie_user is not None and genie_pass is not None: dashboard_markdown_html_commands.extend( ['--syn_user', genie_user, '--syn_pass', genie_pass]) if staging: dashboard_markdown_html_commands.append('--staging') subprocess.check_call(dashboard_markdown_html_commands) logger.info("DASHBOARD UPDATE COMPLETE")
def main( genie_version, processing_date, cbioportal_path, oncotree_link=None, consortium_release_cutoff=184, pemfile=None, test=False, staging=False, debug=False, skip_mutationsincis=False, ): """ - Does parameter checks - Updates process tracking start - initiates database to staging - create case lists - revise meta files - run cBioPortal validation - create link versions - update process tracking end - Create dashboard tables and plots Args: genie_version: GENIE version, processing_date: processing date cbioportal_path: Path to cbioportal validator oncotree_link: Link to oncotree codes consortium_release_cutoff: release cut off value in days pemfile: Path to private key file test: Test flag, uses test databases staging: Staging flag, uses staging databases debug: Synapse debug flag skip_mutationsincis: Skip mutation in cis filter """ syn = process_functions.synLogin(pemfile, debug=debug) genie_user = os.environ.get("GENIE_USER") if pemfile is not None: genie_pass = process_functions.get_password(pemfile) else: genie_pass = None if test: databaseSynIdMappingId = "syn11600968" genie_version = "TESTING" elif staging: skip_mutationsincis = True databaseSynIdMappingId = "syn12094210" else: databaseSynIdMappingId = "syn10967259" # Database/folder syn id mapping databaseSynIdMapping = syn.tableQuery( "select * from {}".format(databaseSynIdMappingId)) databaseSynIdMappingDf = databaseSynIdMapping.asDataFrame() # databaseSynIdMappingDf.index = databaseSynIdMappingDf.Database # del databaseSynIdMappingDf['Database'] # databaseSynIdMappingDf.to_dict() if oncotree_link is None: oncoLink = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "oncotreeLink"].values[0] oncoLinkEnt = syn.get(oncoLink) oncotree_link = oncoLinkEnt.externalURL # Check if you can connect to oncotree link, # if not then don't run validation / processing process_functions.checkUrl(oncotree_link) cbioValidatorPath = os.path.join( cbioportal_path, "core/src/main/scripts/importer/validateData.py") assert os.path.exists( cbioValidatorPath), "Please specify correct cbioportalPath" syn.table_query_timeout = 50000 consortiumSynId = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "consortium"].values[0] processTrackerSynId = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "processTracker"].values[0] # get syn id of case list folder in consortium release # caseListSynId = findCaseListId(syn, consortiumSynId) caseListSynId, _ = database_to_staging.search_and_create_folder( syn, consortiumSynId, "case_lists") if not staging: database_to_staging.update_process_trackingdf(syn, processTrackerSynId, "SAGE", "dbToStage", start=True) centerMappingSynId = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "centerMapping"].values[0] # Only release files where release is true center_mapping = syn.tableQuery( "SELECT * FROM {} where release is true".format(centerMappingSynId)) center_mappingdf = center_mapping.asDataFrame() processingDate = datetime.datetime.strptime(processing_date, "%b-%Y") logger.info("STAGING TO CONSORTIUM") genePanelEntities = database_to_staging.stagingToCbio( syn, processingDate, genie_version, center_mappingdf, databaseSynIdMappingDf, oncotree_url=oncotree_link, consortiumReleaseCutOff=consortium_release_cutoff, current_release_staging=staging, skipMutationsInCis=skip_mutationsincis, test=test, genie_user=genie_user, genie_pass=genie_pass, ) # Create case lists files logger.info("CREATE CASE LIST FILES") # Remove old caselists first if not os.path.exists(database_to_staging.CASE_LIST_PATH): os.mkdir(database_to_staging.CASE_LIST_PATH) caselists = os.listdir(database_to_staging.CASE_LIST_PATH) for caselist in caselists: os.remove(os.path.join(database_to_staging.CASE_LIST_PATH, caselist)) clinical_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "data_clinical_{}.txt".format(genie_version), ) assay_information_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "assay_information_{}.txt".format(genie_version), ) create_case_lists.main( clinical_path, assay_information_path, database_to_staging.CASE_LIST_PATH, "genie_private", ) caseListFiles = os.listdir(database_to_staging.CASE_LIST_PATH) caseListEntities = [] for casePath in caseListFiles: casePath = os.path.join(database_to_staging.CASE_LIST_PATH, casePath) caseListEntities.append( database_to_staging.store_file(syn, casePath, parent=caseListSynId, genieVersion=genie_version)) logger.info("REMOVING UNNECESSARY FILES") genie_files = os.listdir(database_to_staging.GENIE_RELEASE_DIR) for genie_file in genie_files: if (genie_version not in genie_file and "meta" not in genie_file and "case_lists" not in genie_file): os.remove( os.path.join(database_to_staging.GENIE_RELEASE_DIR, genie_file)) os.remove(clinical_path) logger.info("REVISE METADATA FILES") database_to_staging.revise_metadata_files(syn, staging, consortiumSynId, genie_version) logger.info("CBIO VALIDATION") # Must be exit 0 because the validator sometimes fails, # but we still want to capture the output command = [ cbioValidatorPath, "-s", database_to_staging.GENIE_RELEASE_DIR, "-n", "; exit 0", ] cbioOutput = subprocess.check_output(" ".join(command), shell=True) logger.info(cbioOutput.decode("utf-8")) cbio_validator_log = f"cbioValidatorLogsConsortium_{genie_version}.txt" if not test and not staging: log_folder_synid = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "logs"].values[0] with open(cbio_validator_log, "w") as cbio_log: cbio_log.write(cbioOutput.decode("utf-8")) syn.store( synapseclient.File(cbio_validator_log, parentId=log_folder_synid)) os.remove(cbio_validator_log) logger.info("REMOVING OLD FILES") process_functions.rmFiles(database_to_staging.CASE_LIST_PATH) private_cna_meta_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "genie_private_meta_cna_hg19_seg.txt") if os.path.exists(private_cna_meta_path): os.unlink(private_cna_meta_path) logger.info("CREATING LINK VERSION") # Returns release and case list folder folders = database_to_staging.create_link_version(syn, genie_version, caseListEntities, genePanelEntities, databaseSynIdMappingDf) if not staging: database_to_staging.update_process_trackingdf(syn, processTrackerSynId, "SAGE", "dbToStage", start=False) if not test: logger.info("DASHBOARD UPDATE") dashboard_table_updater.run_dashboard(syn, databaseSynIdMappingDf, genie_version, staging=staging) generate_dashboard_html(genie_version, staging=staging, genie_user=genie_user, genie_pass=genie_pass) logger.info("DASHBOARD UPDATE COMPLETE") logger.info("AUTO GENERATE DATA GUIDE") oncotree_version = oncotree_link.split("=")[1] data_guide_pdf = generate_data_guide( genie_version, oncotree_version=oncotree_version, database_mapping=databaseSynIdMappingId, genie_user=genie_user, genie_pass=genie_pass, ) database_to_staging.store_file( syn, data_guide_pdf, genieVersion=genie_version, parent=folders["release_folder"], ) logger.info("COMPLETED DATABASE TO STAGING")