Пример #1
0
def test_entity_view_add_annotation_columns(syn, project,
                                            schedule_for_cleanup):
    folder1 = syn.store(
        Folder(name=str(uuid.uuid4()) +
               'test_entity_view_add_annotation_columns_proj1',
               parent=project,
               annotations={
                   'strAnno': 'str1',
                   'intAnno': 1,
                   'floatAnno': 1.1
               }))
    folder2 = syn.store(
        Folder(name=str(uuid.uuid4()) +
               'test_entity_view_add_annotation_columns_proj2',
               parent=project,
               annotations={
                   'dateAnno': datetime.now(),
                   'strAnno': 'str2',
                   'intAnno': 2
               }))
    schedule_for_cleanup(folder1)
    schedule_for_cleanup(folder2)
    scopeIds = [utils.id_of(folder1), utils.id_of(folder2)]

    # This test is to ensure that user code which use the deprecated field `type` continue to work
    # TODO: remove this test case in Synapse Python client 2.0
    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   type='project',
                                   parent=project)
    syn.store(entity_view)
    # This test is to ensure that user code which use the deprecated field `type` continue to work
    # TODO: remove this test case in Synapse Python client 2.0
    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   type='file',
                                   includeEntityTypes=[EntityViewType.PROJECT],
                                   parent=project)
    syn.store(entity_view)

    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   includeEntityTypes=[EntityViewType.PROJECT],
                                   parent=project)
    syn.store(entity_view)
Пример #2
0
    def _view_setup(cls):
        # set up a file view
        folder = syn.store(
            Folder(name="PartialRowTestFolder" + str(uuid.uuid4()),
                   parent=project))
        syn.store(
            File("~/path/doesnt/matter",
                 name="f1",
                 parent=folder,
                 synapseStore=False))
        syn.store(
            File("~/path/doesnt/matter/again",
                 name="f2",
                 parent=folder,
                 synapseStore=False))

        cols = [
            Column(name='foo', columnType='STRING', maximumSize=1000),
            Column(name='bar', columnType='STRING')
        ]
        return syn.store(
            EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()),
                             columns=cols,
                             addDefaultViewColumns=False,
                             parent=project,
                             scopes=[folder]))
Пример #3
0
def test_entity_view_add_annotation_columns():
    folder1 = syn.store(
        Folder(name=str(uuid.uuid4()) +
               'test_entity_view_add_annotation_columns_proj1',
               parent=project,
               annotations={
                   'strAnno': 'str1',
                   'intAnno': 1,
                   'floatAnno': 1.1
               }))
    folder2 = syn.store(
        Folder(name=str(uuid.uuid4()) +
               'test_entity_view_add_annotation_columns_proj2',
               parent=project,
               annotations={
                   'dateAnno': datetime.now(),
                   'strAnno': 'str2',
                   'intAnno': 2
               }))
    schedule_for_cleanup(folder1)
    schedule_for_cleanup(folder2)
    scopeIds = [utils.id_of(folder1), utils.id_of(folder2)]

    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   type='project',
                                   parent=project)
    syn.store(entity_view)
Пример #4
0
def test_table_file_view_csv_update_annotations__includeEntityEtag():
    folder = syn.store(
        synapseclient.Folder(name="updateAnnoFolder" + str(uuid.uuid4()),
                             parent=project))
    anno1_name = "annotationColumn1"
    anno2_name = "annotationColumn2"
    initial_annotations = {
        anno1_name: "initial_value1",
        anno2_name: "initial_value2"
    }
    file_entity = syn.store(
        File(name=
             "test_table_file_view_csv_update_annotations__includeEntityEtag",
             path="~/fakepath",
             synapseStore=False,
             parent=folder,
             annotations=initial_annotations))

    annotation_columns = [
        Column(name=anno1_name, columnType='STRING'),
        Column(name=anno2_name, columnType='STRING')
    ]
    entity_view = syn.store(
        EntityViewSchema(name="TestEntityViewSchemaUpdateAnnotation" +
                         str(uuid.uuid4()),
                         parent=project,
                         scopes=[folder],
                         columns=annotation_columns))

    query_str = "SELECT {anno1}, {anno2} FROM {proj_id}".format(
        anno1=anno1_name, anno2=anno2_name, proj_id=utils.id_of(entity_view))

    #modify first annotation using rowset
    rowset_query_result = syn.tableQuery(query_str, resultsAs="rowset")
    rowset = rowset_query_result.asRowSet()
    rowset_changed_anno_value = "rowset_value_change"
    rowset.rows[0].values[0] = rowset_changed_anno_value
    syn.store(rowset)

    #modify second annotation using csv
    csv_query_result = syn.tableQuery(query_str, resultsAs="csv")
    dataframe = csv_query_result.asDataFrame()
    csv_changed_anno_value = "csv_value_change"
    dataframe.ix[0, anno2_name] = csv_changed_anno_value
    syn.store(Table(utils.id_of(entity_view), dataframe))

    #check annotations in the file entity. Annotations may not be immediately updated so we wait in while loop
    expected_annotations = {
        anno1_name: [rowset_changed_anno_value],
        anno2_name: [csv_changed_anno_value]
    }
    start_time = time.time()
    while (expected_annotations != file_entity.annotations):
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)
        file_entity = syn.get(file_entity, downloadFile=False)
Пример #5
0
    def __init__(self,
                 datasetId: str,
                 synapse: Synapse,
                 name: str = None,
                 temporary: bool = True,
                 parentId: str = None) -> None:
        """Create a file view scoped to a dataset folder.
        
        Args:
            datasetId (str): Synapse ID for a dataset folder/project.
            synapse (Synapse): Used for Synapse requests.
            name (str): Name of the file view (temporary or not).
            temporary (bool): Whether to delete the file view on exit
                of either a 'with' statement or Python entirely.
            parentId (str, optional): Synapse ID specifying where to
                store the file view. Defaults to datasetId.
        """

        self.datasetId = datasetId
        self.synapse = synapse
        self.is_temporary = temporary

        if name is None:
            self.name = f"schematic annotation file view for {self.datasetId}"

        if self.is_temporary:
            uid = secrets.token_urlsafe(5)
            self.name = f"{self.name} - UID {uid}"

        # TODO: Allow a DCC admin to configure a "universal parent"
        #       Such as a Synapse project writeable by everyone.
        self.parentId = datasetId if parentId is None else parentId

        # TODO: Create local sharing setting to hide from everyone else
        view_schema = EntityViewSchema(
            name=self.name,
            parent=self.parentId,
            scopes=self.datasetId,
            includeEntityTypes=[EntityViewType.FILE, EntityViewType.FOLDER],
            addDefaultViewColumns=False,
            addAnnotationColumns=True)

        # TODO: Handle failure due to insufficient permissions by
        #       creating a temporary new project to store view
        self.view_schema = self.synapse.store(view_schema)

        # These are filled in after calling `self.query()`
        self.results = None
        self.table = None

        # Ensure deletion of the file view (last resort)
        if self.is_temporary:
            atexit.register(self.delete)
Пример #6
0
    def get_or_create_view(self, **kwargs) -> EntityViewSchema:
        """Gets an existing view schema by name and parent or
        creates a new one.

        Args:
            Same arguments as synapseclient.EntityViewSchema

        Returns:
            A synapseclient.EntityViewSchema.

        """
        view = EntityViewSchema(**kwargs)
        view = self._find_by_obj_or_create(view)
        self.logger.info('{} View {} ({})'.format(self._update_str, view.name,
                                                  view.id))
        return view
Пример #7
0
def test_create_and_update_file_view(syn, project, schedule_for_cleanup):

    # Create a folder
    folder = Folder(str(uuid.uuid4()),
                    parent=project,
                    description='creating a file-view')
    folder = syn.store(folder)

    # Create dummy file with annotations in our folder
    path = utils.make_bogus_data_file()
    file_annotations = dict(fileFormat='jpg',
                            dataType='image',
                            artist='Banksy',
                            medium='print',
                            title='Girl With Ballon')
    schedule_for_cleanup(path)
    a_file = File(path, parent=folder, annotations=file_annotations)
    a_file = syn.store(a_file)
    schedule_for_cleanup(a_file)

    # Add new columns for the annotations on this file and get their IDs
    my_added_cols = [
        syn.store(Column(name=k, columnType="STRING"))
        for k in file_annotations.keys()
    ]
    my_added_cols_ids = [c['id'] for c in my_added_cols]
    view_default_ids = [
        c['id'] for c in syn._get_default_view_columns(
            "entityview", EntityViewType.FILE.value)
    ]
    col_ids = my_added_cols_ids + view_default_ids
    scopeIds = [folder['id'].lstrip('syn')]

    # Create an empty entity-view with defined scope as folder

    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=True,
                                   addAnnotationColumns=False,
                                   type='file',
                                   columns=my_added_cols,
                                   parent=project)

    entity_view = syn.store(entity_view)
    schedule_for_cleanup(entity_view)

    assert set(scopeIds) == set(entity_view.scopeIds)
    assert set(col_ids) == set(entity_view.columnIds)
    assert EntityViewType.FILE.value == entity_view.viewTypeMask

    # get the current view-schema
    view = syn.tableQuery("select * from %s" % entity_view.id)
    schedule_for_cleanup(view.filepath)

    view_dict = list(
        csv.DictReader(io.open(view.filepath, encoding="utf-8", newline='')))

    # check that all of the annotations were retrieved from the view
    assert set(file_annotations.keys()).issubset(set(view_dict[0].keys()))

    updated_a_file = syn.get(a_file.id, downloadFile=False)

    # Check that the values are the same as what was set
    # Both in the view and on the entity itself
    for k, v in file_annotations.items():
        assert view_dict[0][k] == v
        assert updated_a_file.annotations[k][0] == v

    # Make a change to the view and store
    view_dict[0]['fileFormat'] = 'PNG'

    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as temp:
        schedule_for_cleanup(temp.name)
        temp_filename = temp.name

    with io.open(temp_filename, mode='w', encoding="utf-8",
                 newline='') as temp_file:
        dw = csv.DictWriter(temp_file,
                            fieldnames=view_dict[0].keys(),
                            quoting=csv.QUOTE_NONNUMERIC,
                            lineterminator=str(os.linesep))
        dw.writeheader()
        dw.writerows(view_dict)
        temp_file.flush()
    syn.store(Table(entity_view.id, temp_filename))
    new_view_dict = list(
        csv.DictReader(io.open(temp_filename, encoding="utf-8", newline='')))
    assert new_view_dict[0]['fileFormat'] == 'PNG'

    # query for the change
    start_time = time.time()

    new_view_results = syn.tableQuery("select * from %s" % entity_view.id)
    schedule_for_cleanup(new_view_results.filepath)
    new_view_dict = list(
        csv.DictReader(
            io.open(new_view_results.filepath, encoding="utf-8", newline='')))
    # query until change is seen.
    while new_view_dict[0]['fileFormat'] != 'PNG':
        # check timeout
        assert time.time() - start_time < QUERY_TIMEOUT_SEC
        # query again
        new_view_results = syn.tableQuery("select * from %s" % entity_view.id)
        new_view_dict = list(
            csv.DictReader(
                io.open(new_view_results.filepath,
                        encoding="utf-8",
                        newline='')))
    # paranoid check
    assert new_view_dict[0]['fileFormat'] == 'PNG'
Пример #8
0
def partial_rowset_test_state(syn, project):
    cols = [
        Column(name='foo', columnType='INTEGER'),
        Column(name='bar', columnType='INTEGER')
    ]
    table_schema = syn.store(
        Schema(name='PartialRowTest' + str(uuid.uuid4()),
               columns=cols,
               parent=project))
    data = [[1, None], [None, 2]]
    syn.store(RowSet(schema=table_schema, rows=[Row(r) for r in data]))

    # set up a file view
    folder = syn.store(
        Folder(name="PartialRowTestFolder" + str(uuid.uuid4()),
               parent=project))
    syn.store(
        File("~/path/doesnt/matter",
             name="f1",
             parent=folder,
             synapseStore=False))
    syn.store(
        File("~/path/doesnt/matter/again",
             name="f2",
             parent=folder,
             synapseStore=False))

    cols = [
        Column(name='foo', columnType='INTEGER'),
        Column(name='bar', columnType='INTEGER')
    ]
    view_schema = syn.store(
        EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()),
                         columns=cols,
                         addDefaultViewColumns=False,
                         parent=project,
                         scopes=[folder]))

    table_changes = [{'foo': 4}, {'bar': 5}]
    view_changes = [{'bar': 6}, {'foo': 7}]

    expected_table_cells = pd.DataFrame({
        'foo': [4.0, float('NaN')],
        'bar': [float('NaN'), 5.0]
    })
    expected_view_cells = pd.DataFrame({
        'foo': [float('NaN'), 7.0],
        'bar': [6.0, float('NaN')]
    })

    class TestState:
        def __init__(self):
            self.syn = syn
            self.project = project
            self.table_schema = table_schema
            self.view_schema = view_schema
            self.table_changes = table_changes
            self.view_changes = view_changes
            self.expected_table_cells = expected_table_cells
            self.expected_view_cells = expected_view_cells

    return TestState()
Пример #9
0
def test_entity_view_add_annotation_columns():
    folder1 = syn.store(
        Folder(name=str(uuid.uuid4()) +
               'test_entity_view_add_annotation_columns_proj1',
               parent=project,
               annotations={
                   'strAnno': 'str1',
                   'intAnno': 1,
                   'floatAnno': 1.1
               }))
    folder2 = syn.store(
        Folder(name=str(uuid.uuid4()) +
               'test_entity_view_add_annotation_columns_proj2',
               parent=project,
               annotations={
                   'dateAnno': datetime.now(),
                   'strAnno': 'str2',
                   'intAnno': 2
               }))
    schedule_for_cleanup(folder1)
    schedule_for_cleanup(folder2)
    scopeIds = [utils.id_of(folder1), utils.id_of(folder2)]

    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   type='project',
                                   parent=project)
    assert_true(entity_view['addAnnotationColumns'])

    #For some reason this call is eventually consistent but not immediately consistent. so we just wait till the size returned is correct
    expected_column_types = {
        'dateAnno': 'DATE',
        'intAnno': 'INTEGER',
        'strAnno': 'STRING',
        'floatAnno': 'DOUBLE'
    }
    columns = syn._get_annotation_entity_view_columns(scopeIds, 'project')

    start_time = time.time()
    while len(columns) != len(expected_column_types):
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)

        columns = syn._get_annotation_entity_view_columns(scopeIds, 'project')
        time.sleep(2)

    entity_view = syn.store(entity_view)
    assert_false(entity_view['addAnnotationColumns'])

    view_column_types = {
        column['name']: column['columnType']
        for column in syn.getColumns(entity_view)
    }
    assert_dict_equal(expected_column_types, view_column_types)

    #add another annotation to the project and make sure that EntityViewSchema only adds one moe column
    folder1['anotherAnnotation'] = 'I need healing!'
    folder1 = syn.store(folder1)

    prev_columns = list(entity_view.columnIds)
    # sometimes annotation columns are not immediately updated so we wait for it to update in a loop
    start_time = time.time()
    while len(entity_view.columnIds) != len(prev_columns) + 1:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        entity_view.addAnnotationColumns = True
        entity_view = syn.store(entity_view)

    expected_column_types.update({'anotherAnnotation': 'STRING'})
    view_column_types = {
        column['name']: column['columnType']
        for column in syn.getColumns(entity_view)
    }
    assert_dict_equal(expected_column_types, view_column_types)
Пример #10
0
def test_entity_view_add_annotation_columns():
    proj1 = syn.store(
        Project(name=str(uuid.uuid4()) +
                'test_entity_view_add_annotation_columns_proj1',
                annotations={
                    'strAnno': 'str1',
                    'intAnno': 1,
                    'floatAnno': 1.1
                }))
    proj2 = syn.store(
        Project(name=str(uuid.uuid4()) +
                'test_entity_view_add_annotation_columns_proj2',
                annotations={
                    'dateAnno': datetime.now(),
                    'strAnno': 'str2',
                    'intAnno': 2
                }))
    schedule_for_cleanup(proj1)
    schedule_for_cleanup(proj2)
    scopeIds = [utils.id_of(proj1), utils.id_of(proj2)]

    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   type='project',
                                   parent=project)
    assert_true(entity_view['addAnnotationColumns'])

    #For some reason this call is eventually consistent but not immediately consistent. so we just wait till the size returned is correct
    expected_column_types = {
        'dateAnno': 'DATE',
        'intAnno': 'INTEGER',
        'strAnno': 'STRING',
        'floatAnno': 'DOUBLE',
        'concreteType': 'STRING'
    }
    columns = syn._get_annotation_entity_view_columns(scopeIds, 'project')
    while len(columns) != len(expected_column_types):
        columns = syn._get_annotation_entity_view_columns(scopeIds, 'project')
        time.sleep(2)

    entity_view = syn.store(entity_view)
    assert_false(entity_view['addAnnotationColumns'])

    view_column_types = {
        column['name']: column['columnType']
        for column in syn.getColumns(entity_view.columnIds)
    }
    assert_dict_equal(expected_column_types, view_column_types)

    #add another annotation to the project and make sure that EntityViewSchema only adds one moe column
    proj1['anotherAnnotation'] = 'I need healing!'
    proj1 = syn.store(proj1)

    entity_view.addAnnotationColumns = True
    entity_view = syn.store(entity_view)

    expected_column_types.update({'anotherAnnotation': 'STRING'})
    view_column_types = {
        column['name']: column['columnType']
        for column in syn.getColumns(entity_view.columnIds)
    }
    assert_dict_equal(expected_column_types, view_column_types)
Пример #11
0
    def _ensure_syt_view(self):
        """
        Ensure the syt table/view exists for the project.
        """
        try:
            # This will fail if the schema doesn't exist. This is a synapseclient bug.
            self._syt_view = self._synapse_client.get(EntityViewSchema(
                name=self.SYT_VIEW_NAME, parent=self._project),
                                                      downloadFile=False)
        except:
            pass

        if self._syt_view == None:
            evs = EntityViewSchema(name=self.SYT_VIEW_NAME,
                                   parent=self._project,
                                   scopes=[self._project],
                                   properties={'viewTypeMask': 9})

            # Delete the 'type' property so we can set our own viewTypeMask to Files and Folders.
            evs.pop('type')

            # Since we removed 'type' we have to manually populate the base columns.
            evs.addColumn(Column(name='id', columnType='ENTITYID'))
            evs.addColumn(Column(name='parentId', columnType='ENTITYID'))
            evs.addColumn(Column(name='projectId', columnType='ENTITYID'))
            evs.addColumn(Column(name='type', columnType='STRING'))
            evs.addColumn(
                Column(name='name', columnType='STRING', maximumSize=256))

            evs.addColumn(
                Column(name=self.ANNO_CHECKED_OUT_BY_ID, columnType='STRING'))
            evs.addColumn(
                Column(name=self.ANNO_CHECKED_OUT_BY_NAME,
                       columnType='STRING'))
            evs.addColumn(
                Column(name=self.ANNO_CHECKED_OUT_DATE, columnType='DATE'))

            self._syt_view = self._synapse_client.store(evs)