def test_store_activity():
    # Create a File and an Activity
    path = utils.make_bogus_binary_file()
    schedule_for_cleanup(path)
    entity = File(path, name='Hinkle horn honking holes', parent=project)
    honking = Activity(name='Hinkle horn honking', 
                       description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    # This doesn't set the ID of the Activity
    entity = syn.store(entity, activity=honking)

    # But this does
    honking = syn.getProvenance(entity.id)

    # Verify the Activity
    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    # Store another Entity with the same Activity
    entity = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', 
                  name='Nettlebed Cave', parent=project, synapseStore=False)
    entity = syn.store(entity, activity=honking)

    # The Activities should match
    honking2 = syn.getProvenance(entity)
    assert honking['id'] == honking2['id']
def test_store_activity():
    """Test storing entities with Activities"""
    project = create_project()

    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    f = File(path, name='Hinkle horn honking holes', parent=project)

    honking = Activity(name='Hinkle horn honking', description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    ## doesn't set the ID of the activity
    f = syn.store(f, activity=honking)

    honking = syn.getProvenance(f.id)
    ## now, we have an activity ID

    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    ## store another entity with the same activity
    f2 = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project)
    f2 = syn.store(f2, activity=honking)

    honking2 = syn.getProvenance(f2)

    assert honking['id'] == honking2['id']
Exemplo n.º 3
0
def loadOneSample(a):
    """Goes through a single json annotation file a and:
        1) Finds the parent Folder where to store the file (or makes directories)
        2) Fetches the md5 of any existing file and compares
        3) If new or different md5 upload file.
    """
    logging.debug("Loading:" + a)
    with open(a) as handle:
        meta = json.load(handle)
    dpath = re.sub(r'.json$', '', a)
    #Skip the rest of the loop if data file is empty or we are not doing the current acronyms
    if os.stat(dpath).st_size == 0 or (
            args.acronym != meta['annotations']['acronym']
            and args.acronym is not None):
        return

    parentId = getParentFolder(syn, args.project, meta)
    #Determine if we are updating an existing file and if we should update based on md5
    query = "select id from entity where parentId=='%s' and name=='%s'" % (
        parentId, meta['name'])
    res = list(syn.chunkedQuery(query))
    if len(res) != 0:
        tmp_ent = syn.get(res[0]['entity.id'], downloadFile=False)
        upload = (tmp_ent.md5 != meta['annotations']['md5'])
        logging.debug("\tFound: %s and upload (MD5 %s match)" %
                      (tmp_ent.id, 'DOESN\'T' if upload else 'does'))
    else:
        logging.debug("\tNot found:" + meta['name'])
        upload = True
    #Prepare the entity for upload
    if upload and not args.push:
        logging.info("\tWILL UPLOAD: %s" % meta['name'])
    if upload and args.push:
        entity = File(dpath,
                      name=meta['name'],
                      parentId=parentId,
                      annotations=meta['annotations'])
        if 'provenance' in meta:
            #Fix labels for urls
            for u in meta['provenance']['used']:
                if 'name' not in u and 'url' in u:
                    u['name'] = u['url']
            prov = Activity(data=meta['provenance'])
            prov.executed('https://github.com/Sage-Bionetworks/tcgaImport')

        else:
            prov = None
        logging.debug('\tUploading:%s' % entity.name)
        entity = syn.store(entity, activity=prov)
        logging.debug('\tCreated/Updated: **** %s ****' % entity.id)
Exemplo n.º 4
0
def test_activity_creation_from_dict():
    """test that activities are created correctly from a dictionary"""
    d = {
        'name':
        'Project Fuzz',
        'description':
        'hipster beard dataset',
        'used': [{
            'reference': {
                'targetId': 'syn12345',
                'versionNumber': 42
            },
            'wasExecuted': True
        }]
    }
    a = Activity(data=d)
    assert a['name'] == 'Project Fuzz'
    assert a['description'] == 'hipster beard dataset'

    usedEntities = a['used']
    assert len(usedEntities) == 1

    u = usedEntities[0]
    assert u['wasExecuted']

    assert u['reference']['targetId'] == 'syn12345'
    assert u['reference']['versionNumber'] == 42
def test_store_activity():
    """Test storing entities with Activities"""
    project = create_project()

    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    f = File(path, name='Hinkle horn honking holes', parent=project)

    honking = Activity(
        name='Hinkle horn honking',
        description=
        'Nettlebed Cave is a limestone cave located on the South Island of New Zealand.'
    )
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    ## doesn't set the ID of the activity
    f = syn.store(f, activity=honking)

    honking = syn.getProvenance(f.id)
    ## now, we have an activity ID

    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0][
        'concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith(
        'http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1][
        'concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    ## store another entity with the same activity
    f2 = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg',
              name='Nettlebed Cave',
              parent=project)
    f2 = syn.store(f2, activity=honking)

    honking2 = syn.getProvenance(f2)

    assert honking['id'] == honking2['id']
def loadOneSample(a):
    """Goes through a single json annotation file a and:
        1) Finds the parent Folder where to store the file (or makes directories)
        2) Fetches the md5 of any existing file and compares
        3) If new or different md5 upload file.
    """
    logging.debug( "Loading:" + a )
    with open(a) as handle:
        meta = json.load(handle)
    dpath = re.sub(r'.json$', '', a)
    #Skip the rest of the loop if data file is empty or we are not doing the current acronyms
    if os.stat(dpath).st_size==0 or (args.acronym != meta['annotations']['acronym'] and args.acronym is not None):
        return 

    parentId= getParentFolder(syn, args.project, meta)
    #Determine if we are updating an existing file and if we should update based on md5
    query = "select id from entity where parentId=='%s' and name=='%s'" % (parentId, meta['name'])
    res = list(syn.chunkedQuery(query))
    if len(res) != 0:
        tmp_ent = syn.get(res[0]['entity.id'], downloadFile=False)
        upload = (tmp_ent.md5 != meta['annotations']['md5'])
        logging.debug( "\tFound: %s and upload (MD5 %s match)" %(tmp_ent.id, 'DOESN\'T' if upload else 'does'))
    else:
        logging.debug("\tNot found:" + meta['name'])
        upload = True
    #Prepare the entity for upload
    if upload and not args.push:
        logging.info( "\tWILL UPLOAD: %s" %meta['name'])
    if upload and args.push: 
        entity = File(dpath, name=meta['name'], parentId=parentId, annotations=meta['annotations'])
        if 'provenance' in meta:
            #Fix labels for urls
            for u in meta['provenance']['used']:
                if 'name' not in u and 'url' in u:
                    u['name'] = u['url']
            prov = Activity(data=meta['provenance'])
            prov.executed('https://github.com/Sage-Bionetworks/tcgaImport')

        else:
            prov=None
        logging.debug('\tUploading:%s' %entity.name)
        entity = syn.store(entity, activity=prov)
        logging.debug('\tCreated/Updated: **** %s ****' %entity.id)
Exemplo n.º 7
0
def setProvenance(args, syn):
    """Set provenance information on a synapse entity."""

    activity = Activity(name=args.name, description=args.description)

    if args.used:
        for item in _convertProvenanceList(args.used, args.limitSearch, syn):
            activity.used(item)
    if args.executed:
        for item in _convertProvenanceList(args.executed, args.limitSearch,
                                           syn):
            activity.used(item, wasExecuted=True)
    activity = syn.setProvenance(args.id, activity)

    # Display the activity record, if -o or -output specified
    if args.output:
        if args.output == 'STDOUT':
            sys.stdout.write(json.dumps(activity))
            sys.stdout.write('\n')
        else:
            with open(args.output, 'w') as f:
                f.write(json.dumps(activity))
                f.write('\n')
    else:
        print 'Set provenance record %s on entity %s\n' % (str(
            activity['id']), str(args.id))
Exemplo n.º 8
0
def setProvenance(args, syn):
    """Set provenance information on a synapse entity."""
    
    activity = Activity(name=args.name, description=args.description)
    if args.used:
        for item in args.used:
            activity.used(item)
    if args.executed:
        for item in args.executed:
            activity.used(item, wasExecuted=True)
    activity = syn.setProvenance(args.id, activity)

    # Display the activity record, if -o or -output specified
    if args.output:
        if args.output=='STDOUT':
            sys.stdout.write(json.dumps(activity))
            sys.stdout.write('\n')
        else:
            with open(args.output, 'w') as f:
                f.write(json.dumps(activity))
                f.write('\n')
    else:
        print 'Set provenance record %s on entity %s\n' % (str(activity['id']), str(args.id))
Exemplo n.º 9
0
def test_activity_parameter_errors():
    """Test error handling in Activity.used()"""
    a = Activity(name='Foobarbat', description='Apply foo to a bar and a bat')
    pytest.raises(SynapseMalformedEntityError,
                  a.used, ['syn12345', 'http://google.com'],
                  url='http://amazon.com')
    pytest.raises(SynapseMalformedEntityError,
                  a.used,
                  'syn12345',
                  url='http://amazon.com')
    pytest.raises(SynapseMalformedEntityError,
                  a.used,
                  'http://amazon.com',
                  targetVersion=1)
Exemplo n.º 10
0
def add_workflow_step_to_synapse(inFilePath,
                                 stepDict,
                                 step='1',
                                 software=None,
                                 parentid=None,
                                 syn=None,
                                 stepIDs=None,
                                 inFilename=None):
    '''Uploads files with provenance and annotations to Synapse.'''
    usedList = None
    if not inFilename:
        inFilename = os.path.basename(inFilePath.strip())
    if not software:
        software = stepDict['softwareName']
    if 'used' in stepDict:
        usedList = stepDict['used'].strip().split(',')
        if 'depends' in stepDict:
            usedList.append(stepIDs[stepDict['depends']])
    elif 'depends' in stepDict:
        usedList = stepIDs[stepDict['depends']]
    execList = stepDict['executed'].strip().split(';')

    act = Activity(name=stepDict['actName'],
                   description=stepDict['description'])
    if usedList is not None:
        act.used(usedList)
    for item in execList:
        splitItem = item.split(',')
        target = splitItem[0]
        version = 1
        if (len(splitItem) > 1):
            version = splitItem[1]
        if target.startswith('http'):
            act.executed(url=target, name=os.path.basename(target))
        else:
            act.executed(target=target, targetVersion=version)

    step_file = File(path=inFilePath,
                     name=inFilename,
                     description=stepDict['fileDescription'],
                     parentId=parentid,
                     synapseStore=str2bool(stepDict['store']))
    step_file = syn.store(step_file, activity=act, forceVersion=False)
    if 'annotations' in stepDict:
        syn.setAnnotations(step_file, annotations=stepDict['annotations'])
    print 'new entity id %s' % step_file.id
    return (step_file.id)
Exemplo n.º 11
0
 def tmp(self, path, setter, parent):
     try:
         result = syn.store(File(path, parentId=parent))
         synid = result.properties['id']
         setter(synid)
         syn.setProvenance(
             synid, activity=Activity(name='gTap Archive Manager'))
         syn.setAnnotations(synid,
                            annotations={
                                'study_id': self.consent.study_id,
                                'internal_id': self.consent.internal_id
                            })
         self.__log_it(f'uploaded {path} data as {synid}')
         os.remove(path)
     except Exception as e:
         self.__log_it(f'uploading {path} data failed with <{str(e)}>')
         return 0
def test_provenance(syn, project, schedule_for_cleanup):
    # Create a File Entity
    fname = utils.make_bogus_data_file()
    schedule_for_cleanup(fname)
    data_entity = syn.store(File(fname, parent=project['id']))

    # Create a File Entity of Code
    fd, path = tempfile.mkstemp(suffix=".py")
    with os.fdopen(fd, 'w') as f:
        f.write(
            utils.normalize_lines("""
            ## Chris's fabulous random data generator
            ############################################################
            import random
            random.seed(12345)
            data = [random.gauss(mu=0.0, sigma=1.0) for i in range(100)]
            """))
    schedule_for_cleanup(path)
    code_entity = syn.store(File(path, parent=project['id']))

    # Create a new Activity asserting that the Code Entity was 'used'
    activity = Activity(name='random.gauss',
                        description='Generate some random numbers')
    activity.used(code_entity, wasExecuted=True)
    activity.used(
        {
            'name': 'Superhack',
            'url': 'https://github.com/joe_coder/Superhack'
        },
        wasExecuted=True)
    activity = syn.setProvenance(data_entity, activity)

    # Retrieve and verify the saved Provenance record
    retrieved_activity = syn.getProvenance(data_entity)
    assert retrieved_activity == activity

    # Test Activity update
    new_description = 'Generate random numbers like a gangsta'
    retrieved_activity['description'] = new_description
    updated_activity = syn.updateActivity(retrieved_activity)
    assert updated_activity['name'] == retrieved_activity['name']
    assert updated_activity['description'] == new_description

    # Test delete
    syn.deleteProvenance(data_entity)
    pytest.raises(SynapseHTTPError, syn.getProvenance, data_entity['id'])
def update_figure_and_table(sources,
                            script_commit_url=None,
                            replace_table=False,
                            force_update=False,
                            dry_run=False):
    df_all, df, df_progress = create_metadata_df(sources)
    print df_progress.groupby(["source", "variant_type"])["synapse_id"].count()

    table = add_new_rows_to_table(df_progress, replace_table, dry_run=dry_run)

    if table or force_update:
        script_entity = syn.get(THIS_SCRIPT_SYNAPSE_ID, downloadFile=False)
        if script_commit_url and not dry_run:
            script_entity.externalURL = script_commit_url
            fileHandle = syn._addURLtoFileHandleService(
                script_commit_url, mimetype="text/x-python")
            script_entity.dataFileHandleId = fileHandle['id']
            script_entity = syn.store(script_entity)

        activity = Activity(
            name='Pilot-63-progress',
            description=
            'Track VCF files uploaded for the PCAWG Pilot-63 project',
            used=list(set(source.folder_id for source in sources)),
            executed=[script_entity])

        if not dry_run:
            activity = syn.setProvenance(TABLE_SYNAPSE_ID, activity)

        image_filename = "pilot-63-progress.png"
        plot_progress(df_progress, sources, image_filename)

        bar_chart = syn.get(BAR_CHART_SYNAPSE_ID, downloadFile=False)
        bar_chart.path = "pilot-63-progress.png"
        bar_chart.synapseStore = True
        if not dry_run:
            bar_chart = syn.store(bar_chart, activity=activity)
Exemplo n.º 14
0
def test_activity_creation_by_constructor():
    """test activity creation adding used entities by the constructor"""

    ue1 = {
        'reference': {
            'targetId': 'syn101',
            'targetVersionNumber': 42
        },
        'wasExecuted': False
    }
    ue2 = {
        'id': 'syn102',
        'versionNumber': 2,
        'concreteType': 'org.sagebionetworks.repo.model.FileEntity'
    }
    ue3 = 'syn103'

    a = Activity(name='Fuzz',
                 description='hipster beard dataset',
                 used=[ue1, ue3],
                 executed=[ue2])

    used_syn101 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn101')
    assert used_syn101 is not None
    assert used_syn101['reference']['targetVersionNumber'] == 42
    assert not used_syn101['wasExecuted']

    used_syn102 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn102')
    assert used_syn102 is not None
    assert used_syn102['reference']['targetVersionNumber'] == 2
    assert used_syn102['wasExecuted']

    used_syn103 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn103')
    assert used_syn103 is not None
Exemplo n.º 15
0
def test_activity_used_execute_methods():
    """test activity creation and used and execute methods"""
    a = Activity(name='Fuzz', description='hipster beard dataset')
    a.used({
        'id': 'syn101',
        'versionNumber': 42,
        'concreteType': 'org.sagebionetworks.repo.model.FileEntity'
    })
    a.executed('syn102', targetVersion=1)
    usedEntities = a['used']
    len(usedEntities), 2

    assert a['name'] == 'Fuzz'
    assert a['description'] == 'hipster beard dataset'

    used_syn101 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn101')
    assert used_syn101['reference']['targetVersionNumber'] == 42
    assert not used_syn101['wasExecuted']

    used_syn102 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn102')
    assert used_syn102['reference']['targetVersionNumber'] == 1
    assert used_syn102['wasExecuted']
Exemplo n.º 16
0
	existingBAMDict[BAMentity.path] = BAMentity


# Check for new BAM and submit
BAMDir = os.listdir(args.bam)
for dir in BAMDir:
	filesList = os.listdir(os.path.join(args.bam, dir)):
	for file in filesList:
		if file.endswith('.bam'):
			if file not in existingBAMDict:		
### For qsub, the evaluation code for this submission will have to contain the provenance calls.
				filePath = os.path.join(args.bam, dir, file)
				newFile = File(filePath, description = 'BAM file of aligned reads.', parentId = foldersDict['BAM'], synapseStore = False)
				
				## Try to extract this code to a function in seq_loading
				act = Activity(name='Alignment',  description='Align reads to genome.')
				#act.executed(target='tophatid', targetVersion=version)

				newFile = syn.store(newFile, activity = act)
				submission = syn.submit(entity=newFile, evaluation = evalID,  name = 'submissionTest', teamName = profile['displayName'])
				print 'Submitted %s to %s' % (newFile.name, countEval.name)

				### Could have multiple other submissions here: count, fusion, 


## Notes
# Can evaluations be used or executed entities in an activity?
# -->> Prefer to use/execute the code behind the eval?
# How to break up data between projects? i.e. could data be generated for more than one project within the window that the cron job runs?
# --> ?? Don't know yet...hope it doesn't come to parsing the sample sheet. If cron is hard-coded to check a specific directory per project, that will work. Not so much if data is dumped into same directory generically for all projects.
# How are evaluations exposed to users? i.e. how will they know the eval id? 
Exemplo n.º 17
0
import synapseclient
from synapseclient import File, Activity

syn = synapseclient.Synapse()
syn.login()

### Ensembl raw counts
annotDict = dict()
annotDict['fileType'] = 'count'
annotDict['normalized'] = 'no'
annotDict['summaryLevel'] = 'gene'

act = Activity(name='Counting', description='Raw gene counts using HTSeq.')
act.used(['syn2290932', 'syn2215531']) # syn2290932 is BAM, syn2215531 is GTF
act.executed('syn2243147') # syn2243147 is htseq
counts =
File(path='/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.'
'DataFreeze.CountMatrix_V7.ensemble.Clean.txt',
name='PFC_CountMatrix_ensembl.txt', description='Gene counts for all BAMs'
'summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq'
'are not included.', parentId='syn2290933', synapseStore=True)
counts = syn.store(counts, activity=act)
syn.setAnnotations(counts, annotations=annotDict)


# Need to check:
# - annotations: standards?
# - synapseclient: auto-return synapse id on upload
# - syn.store: specify activity independently?
# - Activity.executed: executable file, or just script?
import synapseclient
from synapseclient import File, Activity

syn = synapseclient.Synapse()
syn.login()

### Ensembl raw counts
annotDict = dict()
annotDict['fileType'] = 'count'
annotDict['normalized'] = 'no'
annotDict['summaryLevel'] = 'gene'

act = Activity(name='Counting', description='Raw gene counts using HTSeq.')
act.used(['syn2290932', 'syn2215531'])  # syn2290932 is BAM, syn2215531 is GTF
act.executed('syn2243147')  # syn2243147 is htseq
counts = File(
    path=
    '/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.DataFreeze.CountMatrix_V7.ensemble.Clean.txt',
    name='PFC_CountMatrix_ensembl.txt',
    description=
    'Gene counts for all BAMs summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq are not included.',
    parentId='syn2290933',
    synapseStore=True)
counts = syn.store(counts, activity=act)
syn.setAnnotations(counts, annotations=annotDict)
Exemplo n.º 19
0
import sys
import json
import synapseclient
from synapseclient import File, Activity, Wiki
syn = synapseclient.login()

input_path = sys.argv[1]

with open(input_path + ".json") as handle:
    meta_data = json.loads(handle.read())

DST_FOLDER = 'syn3079564' #test upload folder

#Create Provenance log
provenance = Activity(name=meta_data['activity'],
                      desciption=meta_data['description'],
                      used = meta_data['used']
                      exectuted = meta_data['used']
                )
#prov = syn.store(prov)

name  = of.path.basename(input_path)
#Add metadata to files to be uploaded
f = File(input_path, name = name, parentId=DST_FOLDER)
f.dataType = meta_data['dataType']
f.fileType = meta_data['dataType']
f.variant_workflow = meta_data['workflow']
f.variant_workflow_version = meta_data['workflowVersion']
f.call_type = call_type
f.reference_build = meta_data['referenceBuild']
f.center_name = meta_data['center_name']
def test_syncFromSynase__manifest(syn):
    """Verify that we generate manifest files when syncing to a location outside of the cache."""

    project = Project(name="the project", parent="whatever", id="syn123")
    path1 = '/tmp/foo'
    file1 = File(name="file1", parent=project, id="syn456", path=path1)
    path2 = '/tmp/afolder/bar'
    file2 = File(name="file2",
                 parent=project,
                 id="syn789",
                 parentId='syn098',
                 path=path2)
    folder = Folder(name="afolder", parent=project, id="syn098")
    entities = {
        file1.id: file1,
        file2.id: file2,
        folder.id: folder,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    file_1_provenance = Activity(data={
        'used': '',
        'executed': '',
    })
    file_2_provenance = Activity(data={
        'used': '',
        'executed': '',
        'name': 'foo',
        'description': 'bar',
    })

    provenance = {
        file1.id: file_1_provenance,
        file2.id: file_2_provenance,
    }

    def getProvenance_side_effect(entity, *args, **kwargs):
        return provenance[id_of(entity)]

    expected_project_manifest = \
        f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription
{path1}\tsyn123\tfile1\tTrue\t\t\t\t\t
{path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar
"""

    expected_folder_manifest = \
        f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription
{path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar
"""

    expected_synced_files = [file2, file1]

    with tempfile.TemporaryDirectory() as sync_dir:

        with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\
                patch.object(syn, "get", side_effect=syn_get_side_effect),\
                patch.object(syn, "getProvenance") as patch_syn_get_provenance:

            patch_syn_get_provenance.side_effect = getProvenance_side_effect

            synced_files = synapseutils.syncFromSynapse(syn,
                                                        project,
                                                        path=sync_dir)
            assert sorted([id_of(e) for e in expected_synced_files
                           ]) == sorted([id_of(e) for e in synced_files])

            # we only expect two calls to provenance even though there are three rows of provenance data
            # in the manifests (two in the outer project, one in the folder)
            # since one of the files is repeated in both manifests we expect only the single get provenance call
            assert len(
                expected_synced_files) == patch_syn_get_provenance.call_count

            # we should have two manifest files, one rooted at the project and one rooted in the sub folder

            _compareCsv(
                expected_project_manifest,
                os.path.join(sync_dir, synapseutils.sync.MANIFEST_FILENAME))
            _compareCsv(
                expected_folder_manifest,
                os.path.join(sync_dir, folder.name,
                             synapseutils.sync.MANIFEST_FILENAME))
Exemplo n.º 21
0
	cmd = ' '.join(['featureCounts -p -t exon -g gene_id -a', gtf, '-o', outputFile, '-s', args.strand, '-T', args.thread, localBAMfilePath])
print 'featurecounts start %s' % time.asctime()
print >> commandsFile, '%s' % cmd
subprocess.call(cmd, shell = True)
print 'featurecounts end %s' % time.asctime()
	


## Load results to synapse

# Set up provenance.
print 'Loading %s to Synapse.' % cfPath
commandsFile.close()
cf = File(path=cfPath, description='Job commands.', parentId=args.out, synapseStore=True)
cf = syn.store(cf, activityName='count_evaluation', executed=['https://github.com/Sage-Bionetworks/synapse-seq/blob/master/scripts/eval_counts_featurecounts.py'])
act = Activity(name='Read counting', description='Counting aligned reads to GTF features using featurecounts.', executed=['syn2807330', cf.id])
act.used(target=submission.entityId, targetVersion=submission.versionNumber)
act.used(args.gtf) 

# Load raw count file
print 'Loading %s to Synapse.' % outputFile
quantEntity = File(path=outputFile, name=prefix+'_gene_counts.txt', description='Read counts summarized at gene level.', parentId=args.out, synapseStore=True)	
quantEntity = syn.store(quantEntity, forceVersion=False, activity=act)
syn.setAnnotations(quantEntity, annotations=dict(fileType='count',normalized='no',summaryLevel='gene',biasCorrection='False'))
print 'new entity id %s' % quantEntity.id



## Use this code after related JIRA is resolved
## Load metrics to Synapse table
# table = syn.get(syncfg.featurecountsMetricsTable)
def test_copy():
    """Tests the copy function"""
    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    acl = syn.setPermissions(
        project_entity,
        other_user['principalId'],
        accessType=['READ', 'CREATE', 'UPDATE', 'DOWNLOAD'])
    # Create two Folders in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    second_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    third_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    schedule_for_cleanup(second_folder.id)
    schedule_for_cleanup(third_folder.id)

    # Annotations and provenance
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annots = {'test': ['hello_world']}
    prov = Activity(name="test", used=repo_url)
    # Create, upload, and set annotations/provenance on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    externalURL_entity = syn.store(
        File(repo_url, name='rand', parent=folder_entity, synapseStore=False))
    syn.setAnnotations(file_entity, annots)
    syn.setAnnotations(externalURL_entity, annots)
    syn.setProvenance(externalURL_entity.id, prov)
    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(externalURL_entity.id)
    # ------------------------------------
    # TEST COPY FILE
    # ------------------------------------
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=project_entity.id)
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=project_entity.id,
                                   skipCopyAnnotations=True)

    #Verify that our copied files are identical
    copied_ent = syn.get(output[file_entity.id])
    copied_URL_ent = syn.get(output_URL[externalURL_entity.id],
                             downloadFile=False)

    copied_ent_annot = syn.getAnnotations(copied_ent)
    copied_url_annot = syn.getAnnotations(copied_URL_ent)
    copied_prov = syn.getProvenance(copied_ent)
    copied_url_prov = syn.getProvenance(copied_URL_ent)
    schedule_for_cleanup(copied_ent.id)
    schedule_for_cleanup(copied_URL_ent.id)

    # TEST: set_Provenance = Traceback
    print("Test: setProvenance = Traceback")
    assert copied_prov['used'][0]['reference']['targetId'] == file_entity.id
    assert copied_url_prov['used'][0]['reference'][
        'targetId'] == externalURL_entity.id

    # TEST: Make sure copied files are the same
    assert copied_ent_annot == annots
    assert copied_ent.dataFileHandleId == file_entity.dataFileHandleId

    # TEST: Make sure copied URLs are the same
    assert copied_url_annot == {}
    assert copied_URL_ent.externalURL == repo_url
    assert copied_URL_ent.name == 'rand'
    assert copied_URL_ent.dataFileHandleId == externalURL_entity.dataFileHandleId

    # TEST: Throw error if file is copied to a folder/project that has a file with the same filename
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=third_folder.id,
                  setProvenance="gib")
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=file_entity.id)

    print("Test: setProvenance = None")
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=second_folder.id,
                               setProvenance=None)
    assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id])
    schedule_for_cleanup(output[file_entity.id])

    print("Test: setProvenance = Existing")
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=second_folder.id,
                                   setProvenance="existing")
    output_prov = syn.getProvenance(output_URL[externalURL_entity.id])
    schedule_for_cleanup(output_URL[externalURL_entity.id])
    assert output_prov['name'] == prov['name']
    assert output_prov['used'] == prov['used']

    if 'username' not in other_user or 'password' not in other_user:
        sys.stderr.write(
            '\nWarning: no test-authentication configured. skipping testing copy function when trying to copy file made by another user.\n'
        )
        return

    try:
        print(
            "Test: Other user copy should result in different data file handle"
        )
        syn_other = synapseclient.Synapse(skip_checks=True)
        syn_other.login(other_user['username'], other_user['password'])

        output = synapseutils.copy(syn_other,
                                   file_entity.id,
                                   destinationId=third_folder.id)
        new_copied_ent = syn.get(output[file_entity.id])
        new_copied_ent_annot = syn.getAnnotations(new_copied_ent)
        schedule_for_cleanup(new_copied_ent.id)

        copied_URL_ent.externalURL = "https://www.google.com"
        copied_URL_ent = syn.store(copied_URL_ent)
        output = synapseutils.copy(syn_other,
                                   copied_URL_ent.id,
                                   destinationId=third_folder.id,
                                   version=1)
        new_copied_URL = syn.get(output[copied_URL_ent.id], downloadFile=False)
        schedule_for_cleanup(new_copied_URL.id)

        assert new_copied_ent_annot == annots
        assert new_copied_ent.dataFileHandleId != copied_ent.dataFileHandleId
        #Test if copying different versions gets you the correct file
        assert new_copied_URL.versionNumber == 1
        assert new_copied_URL.externalURL == repo_url
        assert new_copied_URL.dataFileHandleId != copied_URL_ent.dataFileHandleId
    finally:
        syn_other.logout()

    # ------------------------------------
    # TEST COPY LINKS
    # ------------------------------------
    print("Test: Copy Links")
    second_file = utils.make_bogus_data_file()
    #schedule_for_cleanup(filename)
    second_file_entity = syn.store(File(second_file, parent=project_entity))
    link_entity = Link(second_file_entity.id, parent=folder_entity.id)
    link_entity = syn.store(link_entity)

    #function under test uses queries which are eventually consistent but not immediately after creating the entities
    start_time = time.time()
    while syn.query("select id from entity where id=='%s'" %
                    link_entity.id).get('totalNumberOfResults') <= 0:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)

    copied_link = synapseutils.copy(syn,
                                    link_entity.id,
                                    destinationId=second_folder.id)
    old = syn.get(link_entity.id, followLink=False)
    new = syn.get(copied_link[link_entity.id], followLink=False)
    assert old.linksTo['targetId'] == new.linksTo['targetId']
    assert old.linksTo['targetVersionNumber'] == new.linksTo[
        'targetVersionNumber']

    schedule_for_cleanup(second_file_entity.id)
    schedule_for_cleanup(link_entity.id)
    schedule_for_cleanup(copied_link[link_entity.id])

    time.sleep(3)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  link_entity.id,
                  destinationId=second_folder.id)

    # ------------------------------------
    # TEST COPY TABLE
    # ------------------------------------
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)
    print("Test: Copy Tables")
    cols = [
        Column(name='n', columnType='DOUBLE', maximumSize=50),
        Column(name='c', columnType='STRING', maximumSize=50),
        Column(name='i', columnType='INTEGER')
    ]
    data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]]

    schema = syn.store(
        Schema(name='Testing', columns=cols, parent=project_entity.id))
    row_reference_set = syn.store(
        RowSet(columns=cols, schema=schema, rows=[Row(r) for r in data]))

    table_map = synapseutils.copy(syn,
                                  schema.id,
                                  destinationId=second_project.id)
    copied_table = syn.tableQuery('select * from %s' % table_map[schema.id])
    rows = copied_table.asRowSet()['rows']
    # TEST: Check if all values are the same
    for i, row in enumerate(rows):
        assert row['values'] == data[i]

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  schema.id,
                  destinationId=second_project.id)

    schedule_for_cleanup(schema.id)
    schedule_for_cleanup(table_map[schema.id])

    # ------------------------------------
    # TEST COPY FOLDER
    # ------------------------------------
    print("Test: Copy Folder")
    mapping = synapseutils.copy(syn,
                                folder_entity.id,
                                destinationId=second_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        assert old.name == new.name
        assert old.annotations == new.annotations
        assert old.concreteType == new.concreteType

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  folder_entity.id,
                  destinationId=second_project.id)
    # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes=["foo"])
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes="file")
    # TEST: excludeType = ["file"], only the folder is created
    second = synapseutils.copy(syn,
                               second_folder.id,
                               destinationId=second_project.id,
                               excludeTypes=["file", "table", "link"])

    copied_folder = syn.get(second[second_folder.id])
    assert copied_folder.name == second_folder.name
    assert len(second) == 1
    # TEST: Make sure error is thrown if foldername already exists
    start_time = time.time()
    while syn.query("select id from entity where id=='%s'" %
                    copied_folder.id).get('totalNumberOfResults') <= 0:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id)

    # ------------------------------------
    # TEST COPY PROJECT
    # ------------------------------------
    print("Test: Copy Project")
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    mapping = synapseutils.copy(syn,
                                project_entity.id,
                                destinationId=third_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        if not isinstance(old, Project):
            assert old.name == new.name
        assert old.annotations == new.annotations
        assert old.concreteType == new.concreteType

    # TEST: Can't copy project to a folder
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=second_folder.id)
Exemplo n.º 23
0
def test_activity_used_url():
    """test activity creation with UsedURLs"""
    u1 = 'http://xkcd.com'
    u2 = {'name': 'The Onion', 'url': 'http://theonion.com'}
    u3 = {
        'name': 'Seriously advanced code',
        'url':
        'https://github.com/cbare/Pydoku/blob/ef88069f70823808f3462410e941326ae7ffbbe0/solver.py',
        'wasExecuted': True
    }
    u4 = {
        'name': 'Heavy duty algorithm',
        'url': 'https://github.com/cbare/Pydoku/blob/master/solver.py'
    }

    a = Activity(name='Foobarbat',
                 description='Apply foo to a bar and a bat',
                 used=[u1, u2, u3],
                 executed=[u3, u4])

    a.executed(url='http://cran.r-project.org/web/packages/glmnet/index.html',
               name='glm.net')
    a.used(url='http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day',
           name='earthquakes')

    u = utils._find_used(a, lambda res: 'url' in res and res['url'] == u1)
    assert u is not None
    assert u['url'] == u1
    assert not u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'The Onion')
    assert u is not None
    assert u['url'] == 'http://theonion.com'
    assert not u['wasExecuted']

    u = utils._find_used(
        a,
        lambda res: 'name' in res and res['name'] == 'Seriously advanced code')
    assert u is not None
    assert u['url'] == u3['url']
    assert u['wasExecuted'] == u3['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'Heavy duty algorithm')
    assert u is not None
    assert u['url'] == u4['url']
    assert u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'glm.net')
    assert u is not None
    assert u[
        'url'] == 'http://cran.r-project.org/web/packages/glmnet/index.html'
    assert u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'earthquakes')
    assert u is not None
    assert u[
        'url'] == 'http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day'
    assert not u['wasExecuted']
Exemplo n.º 24
0
 def get_activity(self, entity: Entity, version=None) -> Activity:
     try:
         act = self.getProvenance(entity, version)
     except (SynapseHTTPError, ValueError):
         act = Activity()
     return act