def uploadToSynapse(f):
    """Given a filepath extracts metadata and uploads to Synapse"""
    center, sample_id, workflow_name, date, call_type, dataType, fileType = ['']*7
    url = URLBASE+f
    if   'OICR_BL' in f: center = 'oicr_bl'
    elif 'CRG/clindel/somatic' in f:  center = 'crg_clindel'
    else: center = f.split('/')[4]
    filename =  f.split('/')[-1]
    if center in ('yale', 'wustl', 'LOHcomplete'):
        if filename =='bd829214-f230-4331-b234-def10bbe7938CNV.vcf.gz':
            sample_id, dataType, fileType='bd829214-f230-4331-b234-def10bbe7938', 'cnv', 'vcf'
        else:
            sample_id, dataType = filename.lower().split('.')[:2]
            fileType =  [i for i in filename.split('.')[2:] if i != 'gz'][-1]
    elif center in ('broad', 'BSC', 'oicr_sga', 'mda_kchen', 'MDA_HGSC', 'mcgill_popsv', 'sfu', 'UCSC', 'oicr_bl', 'Synteka_pgm21', 'crg_clindel'):
        sample_id, workflow_name, date, call_type, dataType  =  filename.replace('indels', 'indel', split('.')[:5])
        fileType =  [i for i in filename.split('.')[5:] if i != 'gz'][-1]
    else:
        print 'Not uploading:', f
        return 
    print center, workflow_name, date, call_type, dataType, fileType
    file = File(url, parentId=DIRS[center], synapseStore=False)
    file.center = center.lower()
    file.sample_id = sample_id
    file.workflow_name = workflow_name
    file.date = date
    file.call_type = call_type
    file.dataType = 'DNA'
    file.disease = 'Cancer'
    file.dataSubType = dataType
    file.fileType = fileType
    #file.analysis_id_tumor = ?????
    syn.store(file, forceVersion=False)
Пример #2
0
def storeFile(fileName, stagingID, used, center, annotations, meta=False):
	print("STORING FILES")
	fileEnt = File(fileName, parent = stagingID)
	fileEnt.center = center
	fileEnt.dataSubType = annotations.get("dataSubType",'')
	fileEnt.dataType = annotations.get("dataType",'')
	fileEnt.disease = 'cancer'
	fileEnt.fileType = annotations.get("fileType",'')
	fileEnt.organism = 'H**o Sapiens'
	fileEnt.platform = annotations.get("platform",'')
	fileEnt.tissueSource = annotations.get("tissueSource",'')
	fileEnt.consortium = 'GENIE'
	if meta:
		fileEnt.fileType = "txt"
		fileEnt.dataType = "meta"
	fileEnt.fileStage = "staging"
	ent = syn.store(fileEnt,annotations = used)
	return(ent)
        'tissueType':
        ['Frontal Pole', 'Superior Temporal Gyrus', 'Parahippocampal Gyrus'],
        'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
        'name':
        'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_normalized-sex-race-age-RIN-PMI-batch-site.corrected.csv'
    },
}

for id, v in toMove.items():
    ent = syn.get(id)
    print v['name']
    os.rename(ent.path, v['name'])

    f = File(v['name'], parentId=v['parentId'], name=v['name'][7:-4])
    print f.name
    f.consortium, f.study, f.center, f.disease = consortium, study, center, disease
    f.dataType = v['dataType']
    f.platfrom = v['platform']
    if 'tissueTypeAbrv' in v:
        f.tissueTypeAbrv = v['tissueTypeAbrv']
        f.tissueType = v['tissueType']
    f.fileType = fileType
    f.organism = organism
    f = syn.store(
        f,
        used=[id],
        executed=[
            'https://github.com/Sage-Bionetworks/ampAdScripts/blob/e71bbde262625e6999ea9defd98e10fce8f3c542/Mount-Sinai/migrateMSBBMetaAndRNASeq.py'
        ],
        activityName='Data migration')
Пример #4
0
PLATFORM_MAP = {'133AB': 'AffymetrixU133AB', 
                'Plus2': 'AffymetrixU133Plus2'}
	
query = 'select id, name from entity where parentId=="%s"' %OLDPARENTID
df = synapseHelpers.query2df(syn.chunkedQuery(query))
for i in range(1,df.shape[0]):
    row =  df.ix[i, :]
    ent = syn.get(row.id)
    fStudy, fTissue, fPlatform, fDatatype,  fRest = ent.name.split('_')
    name = 'AMP-AD_MSBB_MSSM_%s_%s_%s' % (PLATFORM_MAP[fPlatform],   
                                          TISSUEABRMAP[fTissue][0], fRest)
    print name
    os.rename(ent.path, name)

    f = File(name, parentId=NEWPARENTID, name=name[7:])
    f.consortium = 'AMP-AD'
    f.study = 'MSBB'
    f.center = 'MSSM'
    f.dataType =  'mRNA'
    f.disease = 'Alzheimers Disease'
    f.platfrom = PLATFORM_MAP[fPlatform]
    f.tissueTypeAbrv = TISSUEABRMAP[fTissue][1]
    f.tissueType = TISSUEABRMAP[fTissue][0]
    f.dataSubType = 'geneExp'
    f.fileType =  'genomicMatrix'
    f.organism =  'human'
    f = syn.store(f, used = [ent], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/4d7d6b78b1e73058483354a1a18bff7422966a4b/Mount-Sinai/migrateMSBBExpression.py'], activityName='Data migration')
    

Пример #5
0
DST_FOLDER = 'syn3079564' #test upload folder

#Create Provenance log
provenance = Activity(name=meta_data['activity'],
                      desciption=meta_data['description'],
                      used = meta_data['used']
                      exectuted = meta_data['used']
                )
#prov = syn.store(prov)

name  = of.path.basename(input_path)
#Add metadata to files to be uploaded
f = File(input_path, name = name, parentId=DST_FOLDER)
f.dataType = meta_data['dataType']
f.fileType = meta_data['dataType']
f.variant_workflow = meta_data['workflow']
f.variant_workflow_version = meta_data['workflowVersion']
f.call_type = call_type
f.reference_build = meta_data['referenceBuild']
f.center_name = meta_data['center_name']
f.file_md5 = synapseclient.utils.md5_for_file(input_path)
f.study = 'PCAWG 2.0'
f.submitter_donor_id = meta_data['donor_id']
f.alignment_workflow_name='Workflow_Bundle_BWA (UCSC Implementation)'
f.alignment_workflow_source_url='https://github.com/kellrott/tcga_realign'
f.alignment_workflow_version='2.6.0'


#Store metadata and file to Synapse
#f = syn.store(f, activity = provenance)
                path = os.path.join(dirpath, filename)
                stat = os.stat(path)
                if stat.st_size > 0:

                    mtime = stat.st_mtime
                    previous_mtime = previous_uploads.get(path, None)
                    if mtime > previous_mtime:

                        print('Uploading {0}...'.format(path))
                        f = File(path, parent=parents[dirpath], name=filename)

                        #-------------------------------------------------------------                                              
                        # Annotate the file on Synapse:                                                                             
                        #-------------------------------------------------------------                                              
                        for istr2, str2 in enumerate(types):
                            if filename.endswith(str2):
                                f.fileType = type_names[istr2]

                        # Optionally add "syn.store(f, used='http://..)"                                                            
                        # to specify the source location                                                                            
                        syn.store(f)

                        c = conn.cursor()
                        c.execute('INSERT OR REPLACE INTO files (path, mtime) VALUES ("%s", "%s")' % (path, mtime))
                        conn.commit()

finally:
    conn.close()


                  'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
                  'name' :'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_rawCounts.tsv'},
    'syn2920161':{'parentId' :'syn3157743',  #'normalized.sex_race_age_RIN_PMI_batch_site.corrected.csv'
                  'dataType': 'mRNA',                  
                  'platform': 'IlluminaHiSeq2500',
                  'tissueType':['Frontal Pole', 'Superior Temporal Gyrus','Parahippocampal Gyrus'],
                  'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
                  'name' :'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_normalized-sex-race-age-RIN-PMI-batch-site.corrected.csv'},
    }


for id, v in toMove.items():
    ent = syn.get(id)
    print v['name']
    os.rename(ent.path, v['name'])

    f = File(v['name'], parentId=v['parentId'], name=v['name'][7:-4])
    print f.name
    f.consortium, f.study, f.center, f.disease = consortium, study, center, disease
    f.dataType =  v['dataType']
    f.platfrom = v['platform']
    if 'tissueTypeAbrv' in v:
        f.tissueTypeAbrv = v['tissueTypeAbrv']
        f.tissueType = v['tissueType']
    f.fileType =  fileType
    f.organism =  organism
    f = syn.store(f, used = [id], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/e71bbde262625e6999ea9defd98e10fce8f3c542/Mount-Sinai/migrateMSBBMetaAndRNASeq.py'], 
                  activityName='Data migration')