def interaction_func(self, submission, admin): """Archives Project Submissions Args: submission: Submission object admin: Specify Synapse userid/team for archive to be shared with Returns: archive status dict """ project_entity = Project('Archived {} {} {} {}'.format( submission.name.replace("&", "+").replace("'", ""), int(round(time.time() * 1000)), submission.id, submission.entityId)) new_project_entity = self.syn.store(project_entity) permissions.set_entity_permissions(self.syn, new_project_entity, admin, "admin") synapseutils.copy(self.syn, submission.entityId, new_project_entity.id) archived = {"archived": new_project_entity.id} archive_status = {'valid': True, 'annotations': archived, 'message': "Archived!"} return archive_status
def archive_writeup(syn, evaluation, stat="VALIDATED", reArchive=False): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. Defaults to: 'select * from evaluation_[EVAL_ID] where status=="SCORED"' """ if type(evaluation) != synapseclient.Evaluation: evaluation = syn.getEvaluation(evaluation) print("\n\nArchiving", evaluation.id, evaluation.name) print("-" * 60) for sub, status in syn.getSubmissionBundles(evaluation, status=stat): # retrieve file into cache and copy it to destination checkIfArchived = filter(lambda x: x.get("key") == "archived", status.annotations['stringAnnos']) if len(list(checkIfArchived)) == 0 or reArchive: projectEntity = synapseclient.Project( 'Archived {} {} {} {}'.format( sub.name.replace("&", "+").replace("'", ""), int(round(time.time() * 1000)), sub.id, sub.entityId)) entity = syn.store(projectEntity) adminPriv = [ 'DELETE', 'DOWNLOAD', 'CREATE', 'READ', 'CHANGE_PERMISSIONS', 'UPDATE', 'MODERATE', 'CHANGE_SETTINGS' ] syn.setPermissions(entity, "3324230", adminPriv) synapseutils.copy(syn, sub.entityId, entity.id) archived = {"archived": entity.id} status = utils.update_single_submission_status(status, archived) syn.store(status)
def copy(args,syn): mappings = synapseutils.copy(syn, args.id, args.destinationId, copyWikiPage=args.skipCopyWiki, excludeTypes=args.excludeTypes, version=args.version, updateExisting=args.updateExisting, setProvenance=args.setProvenance) print(mappings)
def test_no_copy_types(self): """Docker repositories and EntityViews aren't copied""" access_requirements = {'results': []} permissions = ["DOWNLOAD"] with patch.object(syn, "get", return_value=self.project_entity) as patch_syn_get,\ patch.object(syn, "getPermissions", return_value=permissions) as patch_syn_permissions,\ patch.object(syn, "restGET", return_value=access_requirements) as patch_restget,\ patch.object(syn, "getChildren") as patch_get_children: copied_file = synapseutils.copy( syn, self.project_entity, destinationId=self.second_project.id, skipCopyWikiPage=True) assert_equals(copied_file, {self.project_entity.id: self.second_project.id}) calls = [ call(self.project_entity, downloadFile=False), call(self.second_project.id) ] patch_syn_get.assert_has_calls(calls) patch_restget.assert_called_once_with( '/entity/{}/accessRequirement'.format(self.project_entity.id)) patch_get_children.assert_called_once_with( self.project_entity, includeTypes=['folder', 'file', 'table', 'link'])
def archive(evaluation, stat="VALIDATED", reArchive=False): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ if type(evaluation) != Evaluation: evaluation = syn.getEvaluation(evaluation) print "\n\nArchiving", evaluation.id, evaluation.name print "-" * 60 sys.stdout.flush() for submission, status in syn.getSubmissionBundles(evaluation, status=stat): ## retrieve file into cache and copy it to destination checkIfArchived = filter(lambda x: x.get("key") == "archived", status.annotations['stringAnnos']) if len(checkIfArchived)==0 or reArchive: projectEntity = Project('Archived %s %d %s %s' % (submission.name,int(round(time.time() * 1000)),submission.id,submission.entityId)) entity = syn.store(projectEntity) adminPriv = ['DELETE','DOWNLOAD','CREATE','READ','CHANGE_PERMISSIONS','UPDATE','MODERATE','CHANGE_SETTINGS'] syn.setPermissions(entity,"3324230",adminPriv) syn.setPermissions(entity,"3329874",adminPriv) syn.setPermissions(entity,"3356007",["READ","DOWNLOAD"]) copied = synu.copy(syn, submission.entityId, entity.id) archived = {"archived":entity.id} status = update_single_submission_status(status, archived) syn.store(status)
def buildProject(syn, projectName, teamId, adminId, templateId, projectView): """ Copies a synapse project template and adds it to the csbc consortium project view :param syn: :param projectName: :param teamId: :param adminId: :param templateId: :param projectView: :return: """ pc = createProject(syn, project_name=projectName, teamId=teamId, adminId=adminId) print("project %s location on synapse is %s" % (projectName, pc.id)) copied_syn_dict = synapseutils.copy(syn, entity=templateId, destinationId=pc.id) pv = updateProjectViewScope(syn, projectView, pc.id) print("Updated csbc project view scope - needs updated annotations\n")
def copy(args,syn): mappings = synapseutils.copy(syn, args.id, args.destinationId, skipCopyWikiPage=args.skipCopyWiki, skipCopyAnnotations=args.skipCopyAnnotations, excludeTypes=args.excludeTypes, version=args.version, updateExisting=args.updateExisting, setProvenance=args.setProvenance) print(mappings)
def archive(evaluation, archiveType, destination=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ tempdir = tempfile.mkdtemp() archive_dirname = 'submissions_%s' % utils.id_of(evaluation) if not query: query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation) ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if 'objectId' not in results.headers: raise ValueError("Can't find the required field \"objectId\" in the results of the query: \"{0}\"".format(query)) if archiveType == "submission": if not name: name = 'submissions_%s.tgz' % utils.id_of(evaluation) tar_path = os.path.join(tempdir, name) print "creating tar at:", tar_path print results.headers with tarfile.open(tar_path, mode='w:gz') as archive: with open(os.path.join(tempdir, 'submission_metadata.csv'), 'w') as f: f.write( (','.join(hdr for hdr in (results.headers + ['filename'])) + '\n').encode('utf-8') ) for result in results: ## retrieve file into cache and copy it to destination submission = syn.getSubmission(result[results.headers.index('objectId')]) prefixed_filename = submission.id + "_" + os.path.basename(submission.filePath) archive.add(submission.filePath, arcname=os.path.join(archive_dirname, prefixed_filename)) line = (','.join(unicode(item) for item in (result+[prefixed_filename]))).encode('utf-8') print line f.write(line + '\n') archive.add( name=os.path.join(tempdir, 'submission_metadata.csv'), arcname=os.path.join(archive_dirname, 'submission_metadata.csv')) entity = syn.store(File(tar_path, parent=destination), evaluation_id=utils.id_of(evaluation)) print("created:", entity.id, entity.name) toReturn = entity.id else: toReturn = {} for result in results: ## retrieve file into cache and copy it to destination submission = syn.getSubmission(result[results.headers.index('objectId')]) projectEntity = Project('Archived %s %s %s %s' % (time.strftime("%Y%m%d"),submission.id,submission.entity.id,submission.entity.name)) entity = syn.store(projectEntity) copied = synu.copy(syn, submission.entity.id, entity.id) toReturn.update(copied) return toReturn
def test_dont_copy_read_permissions(self): """Entities with READ permissions not copied""" permissions = {'canDownload': False} with patch.object(syn, "get", return_value=self.file_ent) as patch_syn_get,\ patch.object(syn, "restGET", return_value=permissions) as patch_rest_get: copied_file = synapseutils.copy( syn, self.file_ent, destinationId=self.second_project.id, skipCopyWikiPage=True) assert_equal(copied_file, dict()) patch_syn_get.assert_called_once_with(self.file_ent, downloadFile=False) rest_call = "/entity/{}/permissions".format(self.file_ent.id) patch_rest_get.assert_called_once_with(rest_call)
def test_dont_copy_read_permissions(self): """Entities with READ permissions not copied""" permissions = ["READ"] with patch.object(syn, "get", return_value=self.file_ent) as patch_syn_get,\ patch.object(syn, "getPermissions", return_value=permissions) as patch_syn_permissions: copied_file = synapseutils.copy( syn, self.file_ent, destinationId=self.second_project.id, skipCopyWikiPage=True) assert_equals(copied_file, dict()) patch_syn_get.assert_called_once_with(self.file_ent, downloadFile=False) patch_syn_permissions.assert_called_once_with( self.file_ent, syn.username)
def archive_project(syn, submission, admin): """ Make a copy (archive) of the Project submission. Args: submission - submission ID admin - user who will own the archived project """ writeup = syn.getSubmission(submission) name = writeup.entity.name.replace("&", "+").replace("'", "") curr_time = int(round(time.time() * 1000)) new_project = Project(f"Archived {name} {curr_time} {writeup.id} " + f"{writeup.entityId}") archive = syn.store(new_project) permissions.set_entity_permissions(syn, archive, admin, "admin") archived = synapseutils.copy(syn, writeup.entityId, archive.id) return {"archived": archived.get(writeup.entityId)}
def test_copy_entity_access_requirements(self): # TEST: Entity with access requirement not copied access_requirements = {'results': ["fee", "fi"]} permissions = ["DOWNLOAD"] with patch.object(syn, "get", return_value=self.file_ent) as patch_syn_get,\ patch.object(syn, "getPermissions", return_value=permissions) as patch_syn_permissions,\ patch.object(syn, "restGET", return_value=access_requirements) as patch_restget: copied_file = synapseutils.copy( syn, self.file_ent, destinationId=self.second_project.id, skipCopyWikiPage=True) assert_equals(copied_file, dict()) patch_syn_get.assert_called_once_with(self.file_ent, downloadFile=False) patch_restget.assert_called_once_with( '/entity/{}/accessRequirement'.format(self.file_ent.id))
def create_team_wikis(syn, synid, templateid, tracker_table_synid): """ Function that creates wiki pages from a template by looking at teams that are registered for a challenge. The teams that have a wiki made for them Are stored into a trackerTable that has columns wikiSynId, and teamId Args: synId: Synapse id of challenge project templateId: Synapse id of the template trackerTableSynId: Synapse id of Table that tracks if wiki pages have been made per team """ challenge_ent = syn.get(synid) challenge_obj = utils.get_challenge(challenge_ent) registered_teams = syn._GET_paginated("/challenge/{}/challengeTeam".format( challenge_obj['id'])) for i in registered_teams: submitted_teams = syn.tableQuery( "SELECT * FROM {} where teamId = '{}'".format( tracker_table_synid, i['teamId'])) if len(submitted_teams.asDataFrame()) == 0: team = syn.getTeam(i['teamId']) # The project name is the challenge project name and team name project = syn.store( synapseclient.Project("{} {}".format(challenge_ent.name, team.name))) # Give admin access to the team syn.setPermissions(project, i['teamId'], accessType=[ 'DELETE', 'CHANGE_SETTINGS', 'MODERATE', 'CREATE', 'READ', 'DOWNLOAD', 'UPDATE', 'CHANGE_PERMISSIONS' ]) wiki_copy = synapseutils.copy(syn, templateid, project.id) # syn.sendMessage(i[]) # Store copied synId to tracking table tracking_table = synapseclient.Table( tracker_table_synid, [[wiki_copy[templateid], i['teamId']]]) syn.store(tracking_table)
def test_copy_entity_access_requirements(self): # TEST: Entity with access requirement not copied access_requirements = {'results': ["fee", "fi"]} permissions = {'canDownload': True} with patch.object(syn, "get", return_value=self.file_ent) as patch_syn_get,\ patch.object(syn, "restGET", side_effects=[permissions, access_requirements]) as patch_rest_get: copied_file = synapseutils.copy( syn, self.file_ent, destinationId=self.second_project.id, skipCopyWikiPage=True) assert_equal(copied_file, dict()) patch_syn_get.assert_called_once_with(self.file_ent, downloadFile=False) calls = [ call('/entity/{}/accessRequirement'.format(self.file_ent.id)), call("/entity/{}/permissions".format(self.file_ent.id)) ] patch_rest_get.has_calls(calls)
def consortiumToPublic(syn, processingDate, genie_version, releaseId, databaseSynIdMappingDf, publicReleaseCutOff=365, staging=False): ANONYMIZE_CENTER = syn.tableQuery('SELECT * FROM syn10170510') ANONYMIZE_CENTER_DF = ANONYMIZE_CENTER.asDataFrame() CNA_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,"data_CNA_%s.txt" % genie_version) CLINICAL_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_clinical_%s.txt' % genie_version) CLINICAL_SAMPLE_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_clinical_sample_%s.txt' % genie_version) CLINICAL_PATIENT_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_clinical_patient_%s.txt' % genie_version) DATA_GENE_PANEL_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_gene_matrix_%s.txt' % genie_version) MUTATIONS_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_mutations_extended_%s.txt' % genie_version) FUSIONS_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_fusions_%s.txt' % genie_version) SEG_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'genie_public_data_cna_hg19_%s.seg' % genie_version) COMBINED_BED_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'genie_combined_%s.bed' % genie_version) if not os.path.exists(db_to_staging.GENIE_RELEASE_DIR): os.mkdir(db_to_staging.GENIE_RELEASE_DIR) if not os.path.exists(db_to_staging.CASE_LIST_PATH): os.mkdir(db_to_staging.CASE_LIST_PATH) # if staging: # #public release staging # PUBLIC_RELEASE_PREVIEW = "syn7871696" # PUBLIC_RELEASE_PREVIEW_CASELIST = "syn9689659" # else: #public release preview PUBLIC_RELEASE_PREVIEW = databaseSynIdMappingDf['Id'][databaseSynIdMappingDf['Database'] == 'public'].values[0] PUBLIC_RELEASE_PREVIEW_CASELIST = db_to_staging.find_caselistid(syn, PUBLIC_RELEASE_PREVIEW) ############################################################################################################################## ## Sponsored projects filter ############################################################################################################################## ## if before release date -> go into staging consortium ## if after date -> go into public # sponsoredReleaseDate = syn.tableQuery('SELECT * FROM syn8545108') # sponsoredReleaseDateDf = sponsoredReleaseDate.asDataFrame() # sponsoredProjectSamples = syn.tableQuery('SELECT * FROM syn8545106') # sponsoredProjectSamplesDf = sponsoredProjectSamples.asDataFrame() # sponsoredProjectsDf = sponsoredProjectSamplesDf.merge(sponsoredReleaseDateDf, left_on="sponsoredProject", right_on="sponsoredProjects") # dates = sponsoredProjectsDf['releaseDate'].apply(lambda date: datetime.datetime.strptime(date, '%b-%Y')) # publicReleaseSamples = sponsoredProjectsDf['genieSampleId'][dates < processingDate] ############################################################################################################################## # SEQ_DATE filter # Jun-2015, given processing date (today) -> public release (processing date - Jun-2015 > 12 months) consortiumReleaseWalk = synapseutils.walk(syn, releaseId) consortiumRelease = next(consortiumReleaseWalk) clinical = [syn.get(synid, followLink=True) for filename, synid in consortiumRelease[2] if filename == "data_clinical.txt"][0] gene_matrix = [syn.get(synid, followLink=True) for filename, synid in consortiumRelease[2] if filename == "data_gene_matrix.txt"][0] clinicalDf = pd.read_csv(clinical.path, sep="\t", comment="#") gene_matrixdf = pd.read_csv(gene_matrix.path, sep="\t") removeForPublicSamples = process_functions.seqDateFilter(clinicalDf,processingDate,publicReleaseCutOff) #comment back in when public release filter back on #publicReleaseSamples = publicReleaseSamples.append(keepForPublicSamples) #Make sure all null oncotree codes are removed clinicalDf = clinicalDf[~clinicalDf['ONCOTREE_CODE'].isnull()] publicReleaseSamples = clinicalDf.SAMPLE_ID[~clinicalDf.SAMPLE_ID.isin(removeForPublicSamples)] logger.info("SEQ_DATES for public release: " + ", ".join(set(clinicalDf.SEQ_DATE[clinicalDf.SAMPLE_ID.isin(publicReleaseSamples)].astype(str)))) #Clinical release scope filter #If consortium -> Don't release to public clinicalReleaseScope = syn.tableQuery("SELECT * FROM syn8545211 where releaseScope = 'public'") publicRelease = clinicalReleaseScope.asDataFrame() allClin = clinicalDf[clinicalDf['SAMPLE_ID'].isin(publicReleaseSamples)] allClin.to_csv(CLINICAL_PATH, sep="\t", index=False) gene_matrixdf = gene_matrixdf[gene_matrixdf['SAMPLE_ID'].isin(publicReleaseSamples)] gene_matrixdf.to_csv(DATA_GENE_PANEL_PATH,sep="\t",index=False) storeFile(syn, DATA_GENE_PANEL_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_gene_matrix.txt") storeFile(syn, CLINICAL_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_clinical.txt") create_case_lists.main(CLINICAL_PATH, DATA_GENE_PANEL_PATH, db_to_staging.CASE_LIST_PATH, "genie_public") caseListFiles = os.listdir(db_to_staging.CASE_LIST_PATH) caseListEntities = [] for casePath in caseListFiles: casePath = os.path.join(db_to_staging.CASE_LIST_PATH, casePath) caseListEntities.append(storeFile(syn, casePath, PUBLIC_RELEASE_PREVIEW_CASELIST, ANONYMIZE_CENTER_DF, genie_version)) #Grab mapping table to fill in clinical headers mapping_table = syn.tableQuery('SELECT * FROM syn9621600') mapping = mapping_table.asDataFrame() genePanelEntities = [] for entName, entId in consortiumRelease[2]: if "data_linear" in entName or "meta_" in entName: continue elif entName == "data_clinical.txt": patientCols = publicRelease['fieldName'][publicRelease['level'] == "patient"].tolist() sampleCols = ["PATIENT_ID"] sampleCols.extend(publicRelease['fieldName'][publicRelease['level'] == "sample"].tolist()) #clinicalDf is defined on line 36 # clinicalDf['AGE_AT_SEQ_REPORT'] = [int(math.floor(int(float(i))/365.25)) if process.checkInt(i) else i for i in clinicalDf['AGE_AT_SEQ_REPORT']] # clinicalDf['AGE_AT_SEQ_REPORT'][clinicalDf['AGE_AT_SEQ_REPORT'] == ">32485"] = ">89" # clinicalDf['AGE_AT_SEQ_REPORT'][clinicalDf['AGE_AT_SEQ_REPORT'] == "<6570"] = "<18" clinicalDf = clinicalDf[clinicalDf['SAMPLE_ID'].isin(publicReleaseSamples)] #Delete columns that are private scope # for private in privateRelease: # del clinicalDf[private] process_functions.addClinicalHeaders(clinicalDf, mapping, patientCols, sampleCols, CLINICAL_SAMPLE_PATH, CLINICAL_PATIENT_PATH) storeFile(syn, CLINICAL_SAMPLE_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_clinical_sample.txt") storeFile(syn, CLINICAL_PATIENT_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_clinical_patient.txt") elif "mutation" in entName: mutation = syn.get(entId, followLink=True) mutationDf = pd.read_csv(mutation.path, sep="\t", comment="#") mutationDf = commonVariantFilter(mutationDf) mutationDf['FILTER'] = "PASS" mutationDf = mutationDf[mutationDf['Tumor_Sample_Barcode'].isin(publicReleaseSamples)] text = process_functions.removeFloat(mutationDf) with open(MUTATIONS_PATH, 'w') as f: f.write(text) storeFile(syn, MUTATIONS_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_mutations_extended.txt") elif "fusion" in entName: fusion = syn.get(entId, followLink=True) fusionDf = pd.read_csv(fusion.path, sep="\t") #remove = ["Entrez_Gene_Id","Method"] #fusionDf = fusionDf[fusionDf.columns[~fusionDf.columns.isin(remove)]] fusionDf = fusionDf[fusionDf['Tumor_Sample_Barcode'].isin(publicReleaseSamples)] fusionDf.to_csv(FUSIONS_PATH,sep="\t",index=False) storeFile(syn, FUSIONS_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_fusions.txt") elif "CNA" in entName: cna = syn.get(entId, followLink=True) cnaDf = pd.read_csv(cna.path, sep="\t") cnaDf = cnaDf[cnaDf.columns[cnaDf.columns.isin(publicReleaseSamples.append(pd.Series("Hugo_Symbol")))]] text = process_functions.removeFloat(cnaDf) text = text.replace("\t\t","\tNA\t").replace("\t\t","\tNA\t").replace('\t\n',"\tNA\n") with open(CNA_PATH, "w") as cnaFile: cnaFile.write(text) storeFile(syn, CNA_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_CNA.txt") elif entName.endswith(".seg"): seg = syn.get(entId, followLink=True) segDf = pd.read_csv(seg.path, sep="\t") segDf = segDf[segDf['ID'].isin(publicReleaseSamples)] text = process_functions.removeFloat(segDf) with open(SEG_PATH, "w") as segFile: segFile.write(text) storeFile(syn, SEG_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="genie_public_data_cna_hg19.seg") elif entName == "data_gene_matrix.txt": pass # This file was processed above because it had to be used for generating caselists # panel = syn.get(entId, followLink=True) # panelDf = pd.read_csv(panel.path, sep="\t") # panelDf = panelDf[panelDf['SAMPLE_ID'].isin(publicReleaseSamples)] # panelDf.to_csv(DATA_GENE_PANEL_PATH,sep="\t",index=False) # storeFile(syn, DATA_GENE_PANEL_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_gene_matrix.txt") elif entName == "genie_combined.bed": bed = syn.get(entId, followLink=True) bedDf = pd.read_csv(bed.path, sep="\t") bedDf = bedDf[bedDf.SEQ_ASSAY_ID.isin(allClin.SEQ_ASSAY_ID)] bedDf.to_csv(COMBINED_BED_PATH,sep="\t",index=False) storeFile(syn, COMBINED_BED_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="genie_combined.bed") elif entName in ["data_clinical_sample.txt", "data_clinical_patient.txt"] or entName.endswith(".html"): continue elif entName.startswith("data_gene_panel"): genePanel = syn.get(entId, followLink=True) #Create new gene panel naming and store fileName = os.path.basename(genePanel.path) newFileList = fileName.split("_") newFileList[-1] = genie_version + ".txt" newFileName = "_".join(newFileList) genePanelPath = os.path.join(db_to_staging.GENIE_RELEASE_DIR, newFileName) shutil.copy(genePanel.path, genePanelPath) del newFileList[-1] entName = "_".join(newFileList) entName = entName + ".txt" genePanelEntities.append(storeFile(syn, genePanelPath, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name=entName)) else: ent = syn.get(entId, followLink=True, downloadFile=False) copiedId = synapseutils.copy(syn, ent, PUBLIC_RELEASE_PREVIEW, version=ent.versionNumber, updateExisting=True, setProvenance = None, skipCopyAnnotations=True) copiedEnt = syn.get(copiedId[ent.id],downloadFile=False) #Set version comment copiedEnt.versionComment=genie_version syn.store(copiedEnt, forceVersion=False) return((caseListEntities,genePanelEntities))
def copy(self, entity, destinationId, skipCopyWikiPage = False, skipCopyAnnotations = False, **kwargs): return synapseutils.copy(self.syn, entity, destinationId, skipCopyWikiPage, skipCopyAnnotations, **kwargs)
def test_copyWiki(): # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) second_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(second_folder.id) third_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(third_folder.id) filename = utils.make_bogus_data_file() attachname = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) nested_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=folder_entity)) second_file = syn.store(File(filename, parent=nested_folder)) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(nested_folder.id) schedule_for_cleanup(second_file.id) fileWiki = Wiki(owner=second_file, title='A Test Wiki', markdown="Test") fileWiki = syn.store(fileWiki) # Create mock wiki md = """ This is a test wiki ======================= Blabber jabber blah blah boo. %s %s """ % (file_entity.id, second_file.id) wiki = Wiki(owner=project_entity, title='A Test Wiki', markdown=md, attachments=[attachname]) wiki = syn.store(wiki) # Create a Wiki sub-page subwiki = Wiki(owner=project_entity, title='A sub-wiki', markdown='%s' % file_entity.id, parentWikiId=wiki.id) subwiki = syn.store(subwiki) second_md = """ Testing internal links ====================== [test](#!Synapse:%s/wiki/%s) %s) """ % (project_entity.id, subwiki.id, second_file.id) sub_subwiki = Wiki(owner=project_entity, title='A sub-sub-wiki', markdown=second_md, parentWikiId=subwiki.id, attachments=[attachname]) sub_subwiki = syn.store(sub_subwiki) # Copy wiki to second project second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) fileMapping = synapseutils.copy(syn, project_entity, second_project.id, skipCopyWikiPage=True) # Test: copyWikiPage = False assert_raises(SynapseHTTPError, syn.getWiki, second_project.id) first_headers = syn.getWikiHeaders(project_entity) second_headers = synapseutils.copyWiki(syn, project_entity.id, second_project.id, entityMap=fileMapping) mapping = dict() # Test: Check that all wikis were copied correctly with the correct mapping for index, info in enumerate(second_headers): mapping[first_headers[index]['id']] = info['id'] assert_equals(first_headers[index]['title'], info['title']) if info.get('parentId', None) is not None: # Check if parent Ids are mapping correctly in the copied Wikis assert_equals(info['parentId'], mapping[first_headers[index]['parentId']]) # Test: Check that all wikis have the correct attachments and have correct internal synapse link/file mapping for index, info in enumerate(second_headers): # Check if markdown is the correctly mapped orig_wikiPage = syn.getWiki(project_entity, first_headers[index]['id']) new_wikiPage = syn.getWiki(second_project, info['id']) s = orig_wikiPage.markdown for oldWikiId in mapping.keys(): oldProjectAndWikiId = "%s/wiki/%s" % (project_entity.id, oldWikiId) newProjectAndWikiId = "%s/wiki/%s" % (second_project.id, mapping[oldWikiId]) s = re.sub(oldProjectAndWikiId, newProjectAndWikiId, s) for oldFileId in fileMapping.keys(): s = re.sub(oldFileId, fileMapping[oldFileId], s) assert_equals(s, new_wikiPage.markdown) orig_attach = syn.getWikiAttachments(orig_wikiPage) new_attach = syn.getWikiAttachments(new_wikiPage) orig_file = [ i['fileName'] for i in orig_attach if i['concreteType'] != "org.sagebionetworks.repo.model.file.PreviewFileHandle" ] new_file = [ i['fileName'] for i in new_attach if i['concreteType'] != "org.sagebionetworks.repo.model.file.PreviewFileHandle" ] # check that attachment file names are the same assert_equals(orig_file, new_file) # Test: copyWikiPage = True (Default) (Should copy all wikis including wikis on files) third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) copiedFile = synapseutils.copy(syn, second_file, third_project.id) copiedWiki = syn.getWiki(copiedFile[second_file.id]) assert_equals(copiedWiki.title, fileWiki.title) assert_equals(copiedWiki.markdown, fileWiki.markdown) # Test: entitySubPageId third_header = synapseutils.copyWiki(syn, project_entity.id, third_project.id, entitySubPageId=sub_subwiki.id, destinationSubPageId=None, updateLinks=False, updateSynIds=False, entityMap=fileMapping) test_ent_subpage = syn.getWiki(third_project.id, third_header[0]['id']) # Test: No internal links updated assert_equals(test_ent_subpage.markdown, sub_subwiki.markdown) assert_equals(test_ent_subpage.title, sub_subwiki.title) # Test: destinationSubPageId fourth_header = synapseutils.copyWiki( syn, project_entity.id, third_project.id, entitySubPageId=subwiki.id, destinationSubPageId=test_ent_subpage.id, updateLinks=False, updateSynIds=False, entityMap=fileMapping) temp = syn.getWiki(third_project.id, fourth_header[0]['id']) # There are issues where some title pages are blank. This is an issue that needs to be addressed assert_equals(temp.title, subwiki.title) assert_equals(temp.markdown, subwiki.markdown) temp = syn.getWiki(third_project.id, fourth_header[1]['id']) assert_equals(temp.title, sub_subwiki.title) assert_equals(temp.markdown, sub_subwiki.markdown)
def test_copy(): """Tests the copy function""" # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create two Folders in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) second_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) third_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) schedule_for_cleanup(second_folder.id) schedule_for_cleanup(third_folder.id) # Annotations and provenance repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annos = {'test': ['hello_world']} prov = Activity(name="test", used=repo_url) # Create, upload, and set annotations/provenance on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) externalURL_entity = syn.store( File(repo_url, name='rand', parent=folder_entity, synapseStore=False)) syn.setAnnotations(file_entity, annos) syn.setAnnotations(externalURL_entity, annos) syn.setProvenance(externalURL_entity.id, prov) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(externalURL_entity.id) # ------------------------------------ # TEST COPY FILE # ------------------------------------ output = synapseutils.copy(syn, file_entity.id, destinationId=project_entity.id) output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=project_entity.id, skipCopyAnnotations=True) # Verify that our copied files are identical copied_ent = syn.get(output[file_entity.id]) copied_URL_ent = syn.get(output_URL[externalURL_entity.id], downloadFile=False) copied_ent_annot = syn.getAnnotations(copied_ent) copied_url_annot = syn.getAnnotations(copied_URL_ent) copied_prov = syn.getProvenance(copied_ent) copied_url_prov = syn.getProvenance(copied_URL_ent) schedule_for_cleanup(copied_ent.id) schedule_for_cleanup(copied_URL_ent.id) # TEST: set_Provenance = Traceback assert_equals(copied_prov['used'][0]['reference']['targetId'], file_entity.id) assert_equals(copied_url_prov['used'][0]['reference']['targetId'], externalURL_entity.id) # TEST: Make sure copied files are the same assert_equals(copied_ent_annot, annos) assert_equals(copied_ent.dataFileHandleId, file_entity.dataFileHandleId) # TEST: Make sure copied URLs are the same assert_equals(copied_url_annot, {}) assert_equals(copied_URL_ent.externalURL, repo_url) assert_equals(copied_URL_ent.name, 'rand') assert_equals(copied_URL_ent.dataFileHandleId, externalURL_entity.dataFileHandleId) # TEST: Throw error if file is copied to a folder/project that has a file with the same filename assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=third_folder.id, setProvenance="gib") assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=file_entity.id) # Test: setProvenance = None output = synapseutils.copy(syn, file_entity.id, destinationId=second_folder.id, setProvenance=None) assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id]) schedule_for_cleanup(output[file_entity.id]) # Test: setProvenance = Existing output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=second_folder.id, setProvenance="existing") output_prov = syn.getProvenance(output_URL[externalURL_entity.id]) schedule_for_cleanup(output_URL[externalURL_entity.id]) assert_equals(output_prov['name'], prov['name']) assert_equals(output_prov['used'], prov['used']) # ------------------------------------ # TEST COPY LINKS # ------------------------------------ second_file = utils.make_bogus_data_file() # schedule_for_cleanup(filename) second_file_entity = syn.store(File(second_file, parent=project_entity)) link_entity = Link(second_file_entity.id, parent=folder_entity.id) link_entity = syn.store(link_entity) copied_link = synapseutils.copy(syn, link_entity.id, destinationId=second_folder.id) old = syn.get(link_entity.id, followLink=False) new = syn.get(copied_link[link_entity.id], followLink=False) assert_equals(old.linksTo['targetId'], new.linksTo['targetId']) schedule_for_cleanup(second_file_entity.id) schedule_for_cleanup(link_entity.id) schedule_for_cleanup(copied_link[link_entity.id]) time.sleep(3) assert_raises(ValueError, synapseutils.copy, syn, link_entity.id, destinationId=second_folder.id) # ------------------------------------ # TEST COPY TABLE # ------------------------------------ second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) cols = [ Column(name='n', columnType='DOUBLE', maximumSize=50), Column(name='c', columnType='STRING', maximumSize=50), Column(name='i', columnType='INTEGER') ] data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]] schema = syn.store( Schema(name='Testing', columns=cols, parent=project_entity.id)) syn.store(RowSet(schema=schema, rows=[Row(r) for r in data])) table_map = synapseutils.copy(syn, schema.id, destinationId=second_project.id) copied_table = syn.tableQuery('select * from %s' % table_map[schema.id]) rows = copied_table.asRowSet()['rows'] # TEST: Check if all values are the same for i, row in enumerate(rows): assert_equals(row['values'], data[i]) assert_raises(ValueError, synapseutils.copy, syn, schema.id, destinationId=second_project.id) schedule_for_cleanup(schema.id) schedule_for_cleanup(table_map[schema.id]) # ------------------------------------ # TEST COPY FOLDER # ------------------------------------ mapping = synapseutils.copy(syn, folder_entity.id, destinationId=second_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) assert_equals(old.name, new.name) assert_equals(old.annotations, new.annotations) assert_equals(old.concreteType, new.concreteType) assert_raises(ValueError, synapseutils.copy, syn, folder_entity.id, destinationId=second_project.id) # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes=["foo"]) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes="file") # TEST: excludeType = ["file"], only the folder is created second = synapseutils.copy(syn, second_folder.id, destinationId=second_project.id, excludeTypes=["file", "table", "link"]) copied_folder = syn.get(second[second_folder.id]) assert_equals(copied_folder.name, second_folder.name) assert_equals(len(second), 1) # TEST: Make sure error is thrown if foldername already exists assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id) # ------------------------------------ # TEST COPY PROJECT # ------------------------------------ third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) mapping = synapseutils.copy(syn, project_entity.id, destinationId=third_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) if not isinstance(old, Project): assert_equals(old.name, new.name) assert_equals(old.annotations, new.annotations) assert_equals(old.concreteType, new.concreteType) # TEST: Can't copy project to a folder assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=second_folder.id)
def consortiumToPublic( syn, processingDate, genie_version, releaseId, databaseSynIdMappingDf, publicReleaseCutOff=365, ): cna_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR, "data_CNA_%s.txt" % genie_version) clinical_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR, "data_clinical_%s.txt" % genie_version) clinical_sample_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "data_clinical_sample_%s.txt" % genie_version, ) clinicl_patient_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "data_clinical_patient_%s.txt" % genie_version, ) data_gene_panel_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "data_gene_matrix_%s.txt" % genie_version) mutations_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "data_mutations_extended_%s.txt" % genie_version, ) fusions_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR, "data_fusions_%s.txt" % genie_version) seg_path = os.path.join( database_to_staging.GENIE_RELEASE_DIR, "genie_public_data_cna_hg19_%s.seg" % genie_version, ) combined_bed_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR, "genie_combined_%s.bed" % genie_version) if not os.path.exists(database_to_staging.GENIE_RELEASE_DIR): os.mkdir(database_to_staging.GENIE_RELEASE_DIR) if not os.path.exists(database_to_staging.CASE_LIST_PATH): os.mkdir(database_to_staging.CASE_LIST_PATH) # public release preview public_release_preview = databaseSynIdMappingDf["Id"][ databaseSynIdMappingDf["Database"] == "public"].values[0] public_release_preview_caselist = database_to_staging.find_caselistid( syn, public_release_preview) ####################################################################### # Sponsored projects filter ####################################################################### # if before release date -> go into staging consortium # if after date -> go into public # sponsoredReleaseDate = syn.tableQuery('SELECT * FROM syn8545108') # sponsoredReleaseDateDf = sponsoredReleaseDate.asDataFrame() # sponsoredProjectSamples = syn.tableQuery('SELECT * FROM syn8545106') # sponsoredProjectSamplesDf = sponsoredProjectSamples.asDataFrame() # sponsoredProjectsDf = sponsoredProjectSamplesDf.merge( # sponsoredReleaseDateDf, left_on="sponsoredProject", # right_on="sponsoredProjects") # dates = sponsoredProjectsDf['releaseDate'].apply( # lambda date: datetime.datetime.strptime(date, '%b-%Y')) # publicReleaseSamples = sponsoredProjectsDf['genieSampleId'][ # dates < processingDate] ####################################################################### # SEQ_DATE filter # Jun-2015, given processing date (today) -> public release # (processing date - Jun-2015 > 12 months) consortiumReleaseWalk = synapseutils.walk(syn, releaseId) consortiumRelease = next(consortiumReleaseWalk) for filename, synid in consortiumRelease[2]: if filename == "data_clinical.txt": clinical = syn.get(synid, followLink=True) elif filename == "data_gene_matrix.txt": gene_matrix = syn.get(synid, followLink=True) elif filename == "assay_information.txt": assay_info = syn.get(synid, followLink=True) clinicalDf = pd.read_csv(clinical.path, sep="\t", comment="#") gene_matrixdf = pd.read_csv(gene_matrix.path, sep="\t") removeForPublicSamples = process_functions.seqDateFilter( clinicalDf, processingDate, publicReleaseCutOff) logger.info("SAMPLE CLASS FILTER") remove_sc_samples = database_to_staging.sample_class_filter( clinical_df=clinicalDf) removeForPublicSamples = list( set(removeForPublicSamples).union(remove_sc_samples)) # comment back in when public release filter back on # publicReleaseSamples = publicReleaseSamples.append(keepForPublicSamples) # Make sure all null oncotree codes are removed clinicalDf = clinicalDf[~clinicalDf["ONCOTREE_CODE"].isnull()] publicReleaseSamples = clinicalDf.SAMPLE_ID[~clinicalDf.SAMPLE_ID. isin(removeForPublicSamples)] existing_seq_dates = clinicalDf.SEQ_DATE[clinicalDf.SAMPLE_ID.isin( publicReleaseSamples)] logger.info("SEQ_DATES for public release: " + ", ".join(set(existing_seq_dates.astype(str)))) # Clinical release scope filter # If consortium -> Don't release to public clinicalReleaseScope = syn.tableQuery( "SELECT * FROM syn8545211 where releaseScope = 'public'") publicRelease = clinicalReleaseScope.asDataFrame() allClin = clinicalDf[clinicalDf["SAMPLE_ID"].isin(publicReleaseSamples)] allClin.to_csv(clinical_path, sep="\t", index=False) gene_matrixdf = gene_matrixdf[gene_matrixdf["SAMPLE_ID"].isin( publicReleaseSamples)] gene_matrixdf.to_csv(data_gene_panel_path, sep="\t", index=False) storeFile( syn, data_gene_panel_path, public_release_preview, genie_version, name="data_gene_matrix.txt", ) storeFile( syn, clinical_path, public_release_preview, genie_version, name="data_clinical.txt", ) create_case_lists.main( clinical_path, assay_info.path, database_to_staging.CASE_LIST_PATH, "genie_public", ) caseListFiles = os.listdir(database_to_staging.CASE_LIST_PATH) caseListEntities = [] for casePath in caseListFiles: casePath = os.path.join(database_to_staging.CASE_LIST_PATH, casePath) caseListEntities.append( storeFile(syn, casePath, public_release_preview_caselist, genie_version)) # Grab mapping table to fill in clinical headers mapping_table = syn.tableQuery("SELECT * FROM syn9621600") mapping = mapping_table.asDataFrame() genePanelEntities = [] for entName, entId in consortiumRelease[2]: # skip files to convert if (entName.startswith("data_linear") or "meta_" in entName or entName.endswith(".html") or entName in [ "data_clinical_sample.txt", "data_gene_matrix.txt", "data_clinical_patient.txt", "data_guide.pdf", "release_notes.pdf", "samples_to_retract.csv", "non_somatic.csv", "snv_as_dnp.csv", "snv_as_onp.csv", ]): # data_gene_matrix was processed above because it had to be # used for generating caselists continue if entName == "data_clinical.txt": patientCols = publicRelease["fieldName"][publicRelease["level"] == "patient"].tolist() sampleCols = ["PATIENT_ID"] sampleCols.extend(publicRelease["fieldName"][publicRelease["level"] == "sample"].tolist()) # clinicalDf is defined on line 127 clinicalDf = clinicalDf[clinicalDf["SAMPLE_ID"].isin( publicReleaseSamples)] # Delete columns that are private scope # for private in privateRelease: # del clinicalDf[private] process_functions.addClinicalHeaders( clinicalDf, mapping, patientCols, sampleCols, clinical_sample_path, clinicl_patient_path, ) storeFile( syn, clinical_sample_path, public_release_preview, genie_version, name="data_clinical_sample.txt", ) storeFile( syn, clinicl_patient_path, public_release_preview, genie_version, name="data_clinical_patient.txt", ) elif "mutation" in entName: mutation = syn.get(entId, followLink=True) mutationDf = pd.read_csv(mutation.path, sep="\t", comment="#") # mutationDf = commonVariantFilter(mutationDf) mutationDf["FILTER"] = "PASS" mutationDf = mutationDf[mutationDf["Tumor_Sample_Barcode"].isin( publicReleaseSamples)] text = process_functions.removeFloat(mutationDf) with open(mutations_path, "w") as f: f.write(text) storeFile( syn, mutations_path, public_release_preview, genie_version, name="data_mutations_extended.txt", ) elif "fusion" in entName: fusion = syn.get(entId, followLink=True) fusionDf = pd.read_csv(fusion.path, sep="\t") fusionDf = fusionDf[fusionDf["Tumor_Sample_Barcode"].isin( publicReleaseSamples)] fusionDf.to_csv(fusions_path, sep="\t", index=False) storeFile( syn, fusions_path, public_release_preview, genie_version, name="data_fusions.txt", ) elif "CNA" in entName: cna = syn.get(entId, followLink=True) cnaDf = pd.read_csv(cna.path, sep="\t") cna_columns = pd.concat( [publicReleaseSamples, pd.Series("Hugo_Symbol")]) # parse out the CNA columns to keep cnaDf = cnaDf[cnaDf.columns[cnaDf.columns.isin(cna_columns)]] text = process_functions.removeFloat(cnaDf) text = (text.replace("\t\t", "\tNA\t").replace("\t\t", "\tNA\t").replace( "\t\n", "\tNA\n")) with open(cna_path, "w") as cnaFile: cnaFile.write(text) storeFile( syn, cna_path, public_release_preview, genie_version, name="data_CNA.txt", ) elif entName.endswith(".seg"): seg = syn.get(entId, followLink=True) segDf = pd.read_csv(seg.path, sep="\t") segDf = segDf[segDf["ID"].isin(publicReleaseSamples)] text = process_functions.removeFloat(segDf) with open(seg_path, "w") as segFile: segFile.write(text) storeFile( syn, seg_path, public_release_preview, genie_version, name="genie_public_data_cna_hg19.seg", ) elif entName == "genomic_information.txt": bed = syn.get(entId, followLink=True) bedDf = pd.read_csv(bed.path, sep="\t") bedDf = bedDf[bedDf.SEQ_ASSAY_ID.isin(allClin.SEQ_ASSAY_ID)] bedDf.to_csv(combined_bed_path, sep="\t", index=False) storeFile( syn, combined_bed_path, public_release_preview, genie_version, name="genomic_information.txt", ) elif entName.startswith("data_gene_panel"): genePanel = syn.get(entId, followLink=True) # Create new gene panel naming and store fileName = os.path.basename(genePanel.path) newFileList = fileName.split("_") newFileList[-1] = genie_version + ".txt" newFileName = "_".join(newFileList) genePanelPath = os.path.join(database_to_staging.GENIE_RELEASE_DIR, newFileName) shutil.copy(genePanel.path, genePanelPath) del newFileList[-1] entName = "_".join(newFileList) entName = entName + ".txt" genepanel_ent = storeFile(syn, genePanelPath, public_release_preview, genie_version, name=entName) genePanelEntities.append(genepanel_ent) else: ent = syn.get(entId, followLink=True, downloadFile=False) copiedId = synapseutils.copy( syn, ent, public_release_preview, version=ent.versionNumber, updateExisting=True, setProvenance=None, skipCopyAnnotations=True, ) copiedEnt = syn.get(copiedId[ent.id], downloadFile=False) # Set version comment copiedEnt.versionComment = genie_version syn.store(copiedEnt, forceVersion=False) return caseListEntities, genePanelEntities
def test_copy(): """Tests the copy function""" # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) acl = syn.setPermissions( project_entity, other_user['principalId'], accessType=['READ', 'CREATE', 'UPDATE', 'DOWNLOAD']) # Create two Folders in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) second_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) third_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) schedule_for_cleanup(second_folder.id) schedule_for_cleanup(third_folder.id) # Annotations and provenance repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annots = {'test': ['hello_world']} prov = Activity(name="test", used=repo_url) # Create, upload, and set annotations/provenance on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) externalURL_entity = syn.store( File(repo_url, name='rand', parent=folder_entity, synapseStore=False)) syn.setAnnotations(file_entity, annots) syn.setAnnotations(externalURL_entity, annots) syn.setProvenance(externalURL_entity.id, prov) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(externalURL_entity.id) # ------------------------------------ # TEST COPY FILE # ------------------------------------ output = synapseutils.copy(syn, file_entity.id, destinationId=project_entity.id) output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=project_entity.id, skipCopyAnnotations=True) #Verify that our copied files are identical copied_ent = syn.get(output[file_entity.id]) copied_URL_ent = syn.get(output_URL[externalURL_entity.id], downloadFile=False) copied_ent_annot = syn.getAnnotations(copied_ent) copied_url_annot = syn.getAnnotations(copied_URL_ent) copied_prov = syn.getProvenance(copied_ent) copied_url_prov = syn.getProvenance(copied_URL_ent) schedule_for_cleanup(copied_ent.id) schedule_for_cleanup(copied_URL_ent.id) # TEST: set_Provenance = Traceback print("Test: setProvenance = Traceback") assert copied_prov['used'][0]['reference']['targetId'] == file_entity.id assert copied_url_prov['used'][0]['reference'][ 'targetId'] == externalURL_entity.id # TEST: Make sure copied files are the same assert copied_ent_annot == annots assert copied_ent.dataFileHandleId == file_entity.dataFileHandleId # TEST: Make sure copied URLs are the same assert copied_url_annot == {} assert copied_URL_ent.externalURL == repo_url assert copied_URL_ent.name == 'rand' assert copied_URL_ent.dataFileHandleId == externalURL_entity.dataFileHandleId # TEST: Throw error if file is copied to a folder/project that has a file with the same filename assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=third_folder.id, setProvenance="gib") assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=file_entity.id) print("Test: setProvenance = None") output = synapseutils.copy(syn, file_entity.id, destinationId=second_folder.id, setProvenance=None) assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id]) schedule_for_cleanup(output[file_entity.id]) print("Test: setProvenance = Existing") output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=second_folder.id, setProvenance="existing") output_prov = syn.getProvenance(output_URL[externalURL_entity.id]) schedule_for_cleanup(output_URL[externalURL_entity.id]) assert output_prov['name'] == prov['name'] assert output_prov['used'] == prov['used'] if 'username' not in other_user or 'password' not in other_user: sys.stderr.write( '\nWarning: no test-authentication configured. skipping testing copy function when trying to copy file made by another user.\n' ) return try: print( "Test: Other user copy should result in different data file handle" ) syn_other = synapseclient.Synapse(skip_checks=True) syn_other.login(other_user['username'], other_user['password']) output = synapseutils.copy(syn_other, file_entity.id, destinationId=third_folder.id) new_copied_ent = syn.get(output[file_entity.id]) new_copied_ent_annot = syn.getAnnotations(new_copied_ent) schedule_for_cleanup(new_copied_ent.id) copied_URL_ent.externalURL = "https://www.google.com" copied_URL_ent = syn.store(copied_URL_ent) output = synapseutils.copy(syn_other, copied_URL_ent.id, destinationId=third_folder.id, version=1) new_copied_URL = syn.get(output[copied_URL_ent.id], downloadFile=False) schedule_for_cleanup(new_copied_URL.id) assert new_copied_ent_annot == annots assert new_copied_ent.dataFileHandleId != copied_ent.dataFileHandleId #Test if copying different versions gets you the correct file assert new_copied_URL.versionNumber == 1 assert new_copied_URL.externalURL == repo_url assert new_copied_URL.dataFileHandleId != copied_URL_ent.dataFileHandleId finally: syn_other.logout() # ------------------------------------ # TEST COPY LINKS # ------------------------------------ print("Test: Copy Links") second_file = utils.make_bogus_data_file() #schedule_for_cleanup(filename) second_file_entity = syn.store(File(second_file, parent=project_entity)) link_entity = Link(second_file_entity.id, parent=folder_entity.id) link_entity = syn.store(link_entity) #function under test uses queries which are eventually consistent but not immediately after creating the entities start_time = time.time() while syn.query("select id from entity where id=='%s'" % link_entity.id).get('totalNumberOfResults') <= 0: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) copied_link = synapseutils.copy(syn, link_entity.id, destinationId=second_folder.id) old = syn.get(link_entity.id, followLink=False) new = syn.get(copied_link[link_entity.id], followLink=False) assert old.linksTo['targetId'] == new.linksTo['targetId'] assert old.linksTo['targetVersionNumber'] == new.linksTo[ 'targetVersionNumber'] schedule_for_cleanup(second_file_entity.id) schedule_for_cleanup(link_entity.id) schedule_for_cleanup(copied_link[link_entity.id]) time.sleep(3) assert_raises(ValueError, synapseutils.copy, syn, link_entity.id, destinationId=second_folder.id) # ------------------------------------ # TEST COPY TABLE # ------------------------------------ second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) print("Test: Copy Tables") cols = [ Column(name='n', columnType='DOUBLE', maximumSize=50), Column(name='c', columnType='STRING', maximumSize=50), Column(name='i', columnType='INTEGER') ] data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]] schema = syn.store( Schema(name='Testing', columns=cols, parent=project_entity.id)) row_reference_set = syn.store( RowSet(columns=cols, schema=schema, rows=[Row(r) for r in data])) table_map = synapseutils.copy(syn, schema.id, destinationId=second_project.id) copied_table = syn.tableQuery('select * from %s' % table_map[schema.id]) rows = copied_table.asRowSet()['rows'] # TEST: Check if all values are the same for i, row in enumerate(rows): assert row['values'] == data[i] assert_raises(ValueError, synapseutils.copy, syn, schema.id, destinationId=second_project.id) schedule_for_cleanup(schema.id) schedule_for_cleanup(table_map[schema.id]) # ------------------------------------ # TEST COPY FOLDER # ------------------------------------ print("Test: Copy Folder") mapping = synapseutils.copy(syn, folder_entity.id, destinationId=second_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) assert old.name == new.name assert old.annotations == new.annotations assert old.concreteType == new.concreteType assert_raises(ValueError, synapseutils.copy, syn, folder_entity.id, destinationId=second_project.id) # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes=["foo"]) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes="file") # TEST: excludeType = ["file"], only the folder is created second = synapseutils.copy(syn, second_folder.id, destinationId=second_project.id, excludeTypes=["file", "table", "link"]) copied_folder = syn.get(second[second_folder.id]) assert copied_folder.name == second_folder.name assert len(second) == 1 # TEST: Make sure error is thrown if foldername already exists start_time = time.time() while syn.query("select id from entity where id=='%s'" % copied_folder.id).get('totalNumberOfResults') <= 0: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id) # ------------------------------------ # TEST COPY PROJECT # ------------------------------------ print("Test: Copy Project") third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) mapping = synapseutils.copy(syn, project_entity.id, destinationId=third_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) if not isinstance(old, Project): assert old.name == new.name assert old.annotations == new.annotations assert old.concreteType == new.concreteType # TEST: Can't copy project to a folder assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=second_folder.id)
def test_copyWiki(): # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) second_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(second_folder.id) third_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(third_folder.id) filename = utils.make_bogus_data_file() attachname = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) nested_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=folder_entity)) second_file = syn.store(File(filename, parent=nested_folder)) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(nested_folder.id) schedule_for_cleanup(second_file.id) fileWiki = Wiki(owner=second_file, title='A Test Wiki', markdown="Test") fileWiki = syn.store(fileWiki) #Create mock wiki md = """ This is a test wiki ======================= Blabber jabber blah blah boo. %s %s """ %(file_entity.id,second_file.id) wiki = Wiki(owner=project_entity, title='A Test Wiki', markdown=md, attachments=[attachname]) wiki = syn.store(wiki) # Create a Wiki sub-page subwiki = Wiki(owner=project_entity, title='A sub-wiki', markdown='%s' % file_entity.id, parentWikiId=wiki.id) subwiki = syn.store(subwiki) second_md = """ Testing internal links ====================== [test](#!Synapse:%s/wiki/%s) %s) """ % (project_entity.id,subwiki.id, second_file.id) sub_subwiki = Wiki(owner=project_entity, title='A sub-sub-wiki', markdown=second_md, parentWikiId=subwiki.id, attachments=[attachname]) sub_subwiki = syn.store(sub_subwiki) #Copy wiki to second project second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) fileMapping = synu.copy(syn, project_entity, second_project.id, copyWikiPage=False) print("Test: copyWikiPage = False") assert_raises(SynapseHTTPError,syn.getWiki,second_project.id) first_headers = syn.getWikiHeaders(project_entity) second_headers = synu.copyWiki(syn, project_entity.id, second_project.id, entityMap=fileMapping) mapping = dict() print("Test: Check that all wikis were copied correctly with the correct mapping") for index,info in enumerate(second_headers): mapping[first_headers[index]['id']] = info['id'] assert first_headers[index]['title'] == info['title'] if info.get('parentId',None) is not None: #Check if parent Ids are mapping correctly in the copied Wikis assert info['parentId'] == mapping[first_headers[index]['parentId']] print("Test: Check that all wikis have the correct attachments and have correct internal synapse link/file mapping") for index,info in enumerate(second_headers): #Check if markdown is the correctly mapped orig_wikiPage= syn.getWiki(project_entity, first_headers[index]['id']) new_wikiPage = syn.getWiki(second_project, info['id']) s = orig_wikiPage.markdown for oldWikiId in mapping.keys(): oldProjectAndWikiId = "%s/wiki/%s" % (project_entity.id, oldWikiId) newProjectAndWikiId = "%s/wiki/%s" % (second_project.id, mapping[oldWikiId]) s=re.sub(oldProjectAndWikiId, newProjectAndWikiId, s) for oldFileId in fileMapping.keys(): s = re.sub(oldFileId, fileMapping[oldFileId], s) assert s == new_wikiPage.markdown orig_attach = syn.getWikiAttachments(orig_wikiPage) new_attach = syn.getWikiAttachments(new_wikiPage) #check that attachment file names are the same assert orig_attach == new_attach print("Test: copyWikiPage = True (Default) (Should copy all wikis including wikis on files)") third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) copiedFile = synu.copy(syn, second_file, third_project.id) copiedWiki = syn.getWiki(copiedFile[second_file.id]) assert copiedWiki.title == fileWiki.title assert copiedWiki.markdown == fileWiki.markdown print("Test: entitySubPageId") third_header = synu.copyWiki(syn, project_entity.id, third_project.id, entitySubPageId=sub_subwiki.id, destinationSubPageId=None, updateLinks=False, updateSynIds=False,entityMap=fileMapping) test_ent_subpage = syn.getWiki(third_project.id,third_header[0]['id']) print("Test: No internal links updated") assert test_ent_subpage.markdown == sub_subwiki.markdown assert test_ent_subpage.title == sub_subwiki.title print("Test: destinationSubPageId") fourth_header = synu.copyWiki(syn, project_entity.id, third_project.id, entitySubPageId=subwiki.id, destinationSubPageId=test_ent_subpage.id, updateLinks=False, updateSynIds=False,entityMap=fileMapping) temp = syn.getWiki(third_project.id, fourth_header[0]['id']) #There are issues where some title pages are blank. This is an issue that needs to be addressed #assert temp.title == subwiki.title assert temp.markdown == subwiki.markdown temp = syn.getWiki(third_project.id, fourth_header[1]['id']) assert temp.title == sub_subwiki.title assert temp.markdown == sub_subwiki.markdown assert fourth_header[0] == third_header[0]
def test_copy(): """Tests the copy function""" # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create two Folders in Project folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) second_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) third_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) schedule_for_cleanup(second_folder.id) schedule_for_cleanup(third_folder.id) # Annotations and provenance repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annos = {'test': ['hello_world']} prov = Activity(name="test", used=repo_url) # Create, upload, and set annotations/provenance on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) externalURL_entity = syn.store(File(repo_url, name='rand', parent=folder_entity, synapseStore=False)) syn.setAnnotations(file_entity, annos) syn.setAnnotations(externalURL_entity, annos) syn.setProvenance(externalURL_entity.id, prov) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(externalURL_entity.id) # ------------------------------------ # TEST COPY FILE # ------------------------------------ output = synapseutils.copy(syn, file_entity.id, destinationId=project_entity.id) output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=project_entity.id, skipCopyAnnotations=True) # Verify that our copied files are identical copied_ent = syn.get(output[file_entity.id]) copied_URL_ent = syn.get(output_URL[externalURL_entity.id], downloadFile=False) copied_ent_annot = syn.getAnnotations(copied_ent) copied_url_annot = syn.getAnnotations(copied_URL_ent) copied_prov = syn.getProvenance(copied_ent) copied_url_prov = syn.getProvenance(copied_URL_ent) schedule_for_cleanup(copied_ent.id) schedule_for_cleanup(copied_URL_ent.id) # TEST: set_Provenance = Traceback assert_equals(copied_prov['used'][0]['reference']['targetId'], file_entity.id) assert_equals(copied_url_prov['used'][0]['reference']['targetId'], externalURL_entity.id) # TEST: Make sure copied files are the same assert_equals(copied_ent_annot, annos) assert_equals(copied_ent.dataFileHandleId, file_entity.dataFileHandleId) # TEST: Make sure copied URLs are the same assert_equals(copied_url_annot, {}) assert_equals(copied_URL_ent.externalURL, repo_url) assert_equals(copied_URL_ent.name, 'rand') assert_equals(copied_URL_ent.dataFileHandleId, externalURL_entity.dataFileHandleId) # TEST: Throw error if file is copied to a folder/project that has a file with the same filename assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=third_folder.id, setProvenance="gib") assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=file_entity.id) # Test: setProvenance = None output = synapseutils.copy(syn, file_entity.id, destinationId=second_folder.id, setProvenance=None) assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id]) schedule_for_cleanup(output[file_entity.id]) # Test: setProvenance = Existing output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=second_folder.id, setProvenance="existing") output_prov = syn.getProvenance(output_URL[externalURL_entity.id]) schedule_for_cleanup(output_URL[externalURL_entity.id]) assert_equals(output_prov['name'], prov['name']) assert_equals(output_prov['used'], prov['used']) # ------------------------------------ # TEST COPY LINKS # ------------------------------------ second_file = utils.make_bogus_data_file() # schedule_for_cleanup(filename) second_file_entity = syn.store(File(second_file, parent=project_entity)) link_entity = Link(second_file_entity.id, parent=folder_entity.id) link_entity = syn.store(link_entity) copied_link = synapseutils.copy(syn, link_entity.id, destinationId=second_folder.id) old = syn.get(link_entity.id, followLink=False) new = syn.get(copied_link[link_entity.id], followLink=False) assert_equals(old.linksTo['targetId'], new.linksTo['targetId']) schedule_for_cleanup(second_file_entity.id) schedule_for_cleanup(link_entity.id) schedule_for_cleanup(copied_link[link_entity.id]) time.sleep(3) assert_raises(ValueError, synapseutils.copy, syn, link_entity.id, destinationId=second_folder.id) # ------------------------------------ # TEST COPY TABLE # ------------------------------------ second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) cols = [Column(name='n', columnType='DOUBLE', maximumSize=50), Column(name='c', columnType='STRING', maximumSize=50), Column(name='i', columnType='INTEGER')] data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]] schema = syn.store(Schema(name='Testing', columns=cols, parent=project_entity.id)) syn.store(RowSet(schema=schema, rows=[Row(r) for r in data])) table_map = synapseutils.copy(syn, schema.id, destinationId=second_project.id) copied_table = syn.tableQuery('select * from %s' % table_map[schema.id]) rows = copied_table.asRowSet()['rows'] # TEST: Check if all values are the same for i, row in enumerate(rows): assert_equals(row['values'], data[i]) assert_raises(ValueError, synapseutils.copy, syn, schema.id, destinationId=second_project.id) schedule_for_cleanup(schema.id) schedule_for_cleanup(table_map[schema.id]) # ------------------------------------ # TEST COPY FOLDER # ------------------------------------ mapping = synapseutils.copy(syn, folder_entity.id, destinationId=second_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) assert_equals(old.name, new.name) assert_equals(old.annotations, new.annotations) assert_equals(old.concreteType, new.concreteType) assert_raises(ValueError, synapseutils.copy, syn, folder_entity.id, destinationId=second_project.id) # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes=["foo"]) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes="file") # TEST: excludeType = ["file"], only the folder is created second = synapseutils.copy(syn, second_folder.id, destinationId=second_project.id, excludeTypes=["file", "table", "link"]) copied_folder = syn.get(second[second_folder.id]) assert_equals(copied_folder.name, second_folder.name) assert_equals(len(second), 1) # TEST: Make sure error is thrown if foldername already exists assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id) # ------------------------------------ # TEST COPY PROJECT # ------------------------------------ third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) mapping = synapseutils.copy(syn, project_entity.id, destinationId=third_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) if not isinstance(old, Project): assert_equals(old.name, new.name) assert_equals(old.annotations, new.annotations) assert_equals(old.concreteType, new.concreteType) # TEST: Can't copy project to a folder assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=second_folder.id)
def test_copy(): """Tests the copy function""" # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) acl = syn.setPermissions(project_entity, other_user['principalId'], accessType=['READ', 'CREATE', 'UPDATE']) # Create two Folders in Project folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) second_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) third_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) schedule_for_cleanup(second_folder.id) schedule_for_cleanup(third_folder.id) # Annotations and provenance repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annots = {'test':['hello_world']} prov = Activity(name = "test",used = repo_url) # Create, upload, and set annotations/provenance on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) externalURL_entity = syn.store(File(repo_url,name='rand',parent=folder_entity,synapseStore=False)) syn.setAnnotations(file_entity,annots) syn.setAnnotations(externalURL_entity,annots) syn.setProvenance(externalURL_entity.id, prov) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(externalURL_entity.id) # ------------------------------------ # TEST COPY FILE # ------------------------------------ output = synu.copy(syn,file_entity.id,destinationId=project_entity.id) output_URL = synu.copy(syn,externalURL_entity.id,destinationId=project_entity.id) #Verify that our copied files are identical copied_ent = syn.get(output[file_entity.id]) copied_URL_ent = syn.get(output_URL[externalURL_entity.id],downloadFile=False) copied_ent_annot = syn.getAnnotations(copied_ent) copied_url_annot = syn.getAnnotations(copied_URL_ent) copied_prov = syn.getProvenance(copied_ent) copied_url_prov = syn.getProvenance(copied_URL_ent) schedule_for_cleanup(copied_ent.id) schedule_for_cleanup(copied_URL_ent.id) # TEST: set_Provenance = Traceback print("Test: setProvenance = Traceback") assert copied_prov['used'][0]['reference']['targetId'] == file_entity.id assert copied_url_prov['used'][0]['reference']['targetId'] == externalURL_entity.id # TEST: Make sure copied files are the same assert copied_ent_annot == annots assert copied_ent.dataFileHandleId == file_entity.dataFileHandleId # TEST: Make sure copied URLs are the same assert copied_url_annot == annots assert copied_URL_ent.externalURL == repo_url assert copied_URL_ent.name == 'rand' assert copied_URL_ent.dataFileHandleId == externalURL_entity.dataFileHandleId # TEST: Throw error if file is copied to a folder/project that has a file with the same filename assert_raises(ValueError,synu.copy,syn,project_entity.id,destinationId = project_entity.id) assert_raises(ValueError,synu.copy,syn,file_entity.id,destinationId = project_entity.id) assert_raises(ValueError,synu.copy,syn,file_entity.id,destinationId = third_folder.id,setProvenance = "gib") print("Test: setProvenance = None") output = synu.copy(syn,file_entity.id,destinationId=second_folder.id,setProvenance = None) assert_raises(SynapseHTTPError,syn.getProvenance,output[file_entity.id]) schedule_for_cleanup(output[file_entity.id]) print("Test: setProvenance = Existing") output_URL = synu.copy(syn,externalURL_entity.id,destinationId=second_folder.id,setProvenance = "existing") output_prov = syn.getProvenance(output_URL[externalURL_entity.id]) schedule_for_cleanup(output_URL[externalURL_entity.id]) assert output_prov['name'] == prov['name'] assert output_prov['used'] == prov['used'] if 'username' not in other_user or 'password' not in other_user: sys.stderr.write('\nWarning: no test-authentication configured. skipping testing copy function when trying to copy file made by another user.\n') return try: print("Test: Other user copy should result in different data file handle") syn_other = synapseclient.Synapse(skip_checks=True) syn_other.login(other_user['username'], other_user['password']) output = synu.copy(syn_other,file_entity.id,destinationId=third_folder.id) new_copied_ent = syn.get(output[file_entity.id]) new_copied_ent_annot = syn.getAnnotations(new_copied_ent) schedule_for_cleanup(new_copied_ent.id) copied_URL_ent.externalURL = "https://www.google.com" copied_URL_ent = syn.store(copied_URL_ent) output = synu.copy(syn_other,copied_URL_ent.id,destinationId=third_folder.id,version=1) new_copied_URL = syn.get(output[copied_URL_ent.id],downloadFile=False) schedule_for_cleanup(new_copied_URL.id) assert new_copied_ent_annot == annots assert new_copied_ent.dataFileHandleId != copied_ent.dataFileHandleId #Test if copying different versions gets you the correct file assert new_copied_URL.versionNumber == 1 assert new_copied_URL.externalURL == repo_url assert new_copied_URL.dataFileHandleId != copied_URL_ent.dataFileHandleId finally: syn_other.logout() # ------------------------------------ # TEST COPY LINKS # ------------------------------------ print("Test: Copy Links") second_file = utils.make_bogus_data_file() #schedule_for_cleanup(filename) second_file_entity = syn.store(File(second_file, parent=project_entity)) link_entity = Link(second_file_entity.id,parent=folder_entity.id) link_entity = syn.store(link_entity) copied_link = synu.copy(syn,link_entity.id, destinationId=second_folder.id) old = syn.get(link_entity.id,followLink=False) new = syn.get(copied_link[link_entity.id],followLink=False) assert old.linksTo['targetId'] == new.linksTo['targetId'] assert old.linksTo['targetVersionNumber'] == new.linksTo['targetVersionNumber'] schedule_for_cleanup(second_file_entity.id) schedule_for_cleanup(link_entity.id) schedule_for_cleanup(copied_link[link_entity.id]) assert_raises(ValueError,synu.copy,syn,link_entity.id,destinationId=second_folder.id) # ------------------------------------ # TEST COPY TABLE # ------------------------------------ second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) print("Test: Copy Tables") cols = [Column(name='n', columnType='DOUBLE', maximumSize=50), Column(name='c', columnType='STRING', maximumSize=50), Column(name='i', columnType='INTEGER')] data = [[2.1,'foo',10], [2.2,'bar',20], [2.3,'baz',30]] schema = syn.store(Schema(name='Testing', columns=cols, parent=project_entity.id)) row_reference_set = syn.store(RowSet(columns=cols, schema=schema, rows=[Row(r) for r in data])) table_map = synu.copy(syn,schema.id, destinationId=second_project.id) copied_table = syn.tableQuery('select * from %s' %table_map[schema.id]) rows = copied_table.asRowSet()['rows'] # TEST: Check if all values are the same for i,row in enumerate(rows): assert row['values'] == data[i] assert_raises(ValueError,synu.copy,syn,schema.id,destinationId=second_project.id) schedule_for_cleanup(schema.id) schedule_for_cleanup(table_map[schema.id]) # ------------------------------------ # TEST COPY FOLDER # ------------------------------------ print("Test: Copy Folder") mapping = synu.copy(syn,folder_entity.id,destinationId=second_project.id) for i in mapping: old = syn.get(i,downloadFile=False) new = syn.get(mapping[i],downloadFile=False) assert old.name == new.name assert old.annotations == new.annotations assert old.concreteType == new.concreteType assert_raises(ValueError,synu.copy,syn,folder_entity.id,destinationId=second_project.id) # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list assert_raises(ValueError,synu.copy,syn,second_folder.id,excludeTypes=["foo"]) assert_raises(ValueError,synu.copy,syn,second_folder.id,excludeTypes="file") # TEST: excludeType = ["file"], only the folder is created second = synu.copy(syn,second_folder.id,destinationId=second_project.id,excludeTypes=["file","table","link"]) copied_folder = syn.get(second[second_folder.id]) assert copied_folder.name == second_folder.name assert len(second) == 1 # TEST: Make sure error is thrown if foldername already exists assert_raises(ValueError,synu.copy,syn,second_folder.id, destinationId=second_project.id) # ------------------------------------ # TEST COPY PROJECT # ------------------------------------ print("Test: Copy Project") third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) mapping = synu.copy(syn,project_entity.id,destinationId=third_project.id) for i in mapping: old = syn.get(i,downloadFile=False) new = syn.get(mapping[i],downloadFile=False) if not isinstance(old, Project): assert old.name == new.name assert old.annotations == new.annotations assert old.concreteType == new.concreteType # TEST: Can't copy project to a folder assert_raises(ValueError,synu.copy,syn,project_entity.id,destinationId=second_folder.id)