Exemplo n.º 1
0
    def __init__(self, profile_data, model_data):

        # 1. load meta info: store all settings in Meta class as _meta attribute
        meta = Meta( profile_data, model_data )
        self._meta = meta

        # 2. init Cache and Remote backend
        self._cache = Cache( meta )
        self._remote = Remote( meta )
        self._remote.open() # authenticate at the remote

        # 3. load odML terminologies
        # TODO make odML to load terms into our cache folder, not default /tmp
        terms = terminology.terminologies.load(profile_data['odml_repository'])
        self.terminologies = terms.sections

        # 4. attach supported models
        self.models = dict( models_map )

        warnings.simplefilter('ignore', tb.NaturalNameWarning)
        print "Session initialized."
Exemplo n.º 2
0
class Session( Browser ):
    """ Object to handle connection and client-server data transfer """

    def __init__(self, profile_data, model_data):

        # 1. load meta info: store all settings in Meta class as _meta attribute
        meta = Meta( profile_data, model_data )
        self._meta = meta

        # 2. init Cache and Remote backend
        self._cache = Cache( meta )
        self._remote = Remote( meta )
        self._remote.open() # authenticate at the remote

        # 3. load odML terminologies
        # TODO make odML to load terms into our cache folder, not default /tmp
        terms = terminology.terminologies.load(profile_data['odml_repository'])
        self.terminologies = terms.sections

        # 4. attach supported models
        self.models = dict( models_map )

        warnings.simplefilter('ignore', tb.NaturalNameWarning)
        print "Session initialized."


    def clear_cache(self):
        """ removes all objects from the cache """
        self._cache.clear_cache()


    @activate_remote
    def select(self, model_name, params={}, data_load=False, mode='obj'):
        """ requests objects of a given type from server in bulk mode. 

        caching:    caches files only
        cascade:    no
        data_load:  yes/no

        Arguments:

        model_name: type of the object (like 'block', 'segment' or 'section'.)
        params:     dict that can contain several categories of key-value pairs
        data_load:  fetch the data or not (applied if mode == 'obj')
        mode:       'obj' or 'json' - return mode, python object or JSON

        Params can be:

        1. filters, like:
            'owner__username': '******'
            'segment__id__in': [19485,56223,89138]
            'n_definition__icontains': 'blafoo' # negative filter! (has 'n_')

        2. common params, like
            'at_time': '2013-02-22 15:34:57'
            'offset': 50
            'max_results': 20

        3. data params, to get only parts of the original object(s). These only 
            work for the data-related objects (like 'analogsignal' or 
            'spiketrain').

            start_time - start time of the required range (calculated
                using the same time unit as the t_start of the signal)
            end_time - end time of the required range (calculated using
                the same time unit as the t_start of the signal)
            duration - duration of the required range (calculated using
                the same time unit as the t_start of the signal)
            start_index - start index of the required datarange (an index
                of the starting datapoint)
            end_index - end index of the required range (an index of the
                end datapoint)
            samples_count - number of points of the required range (an
                index of the end datapoint)
            downsample - number of datapoints. This parameter is used to
                indicate whether downsampling is needed. The downsampling
                is applied on top of the selected data range using other
                parameters (if specified)

        Examples:
        get('analogsignal', params={'id__in': [38551], 'downsample': 100})
        get('analogsignal', params={'segment__id': 93882, 'start_time': 500.0})
        get('section', params={'odml_type': 'experiment', 'date_created': '2013-02-22'})

        """
        if model_name in self._meta.cls_aliases.values(): # TODO put into model_safe decorator
            model_name = [k for k, v in self._meta.cls_aliases.items() if v==model_name][0]

        if not model_name in self._meta.models_map.keys():
            raise TypeError('Objects of that type are not supported.')

        # fetch from remote + save in cache if possible
        json_objs = self._remote.get_list( model_name, params )

        if mode == 'json':
            # return pure JSON (no data) if requested
            objects = json_objs

        else:
            # convert to objects in 'obj' mode
            app = self._meta.app_prefix_dict[ model_name ]
            model = models_map[ model_name ]

            objects = []
            for json_obj in json_objs:
                data_refs = {} # is a dict like {'signal': <array...>, ...}
                if data_load:
                    data_refs = self.__parse_data_from_json( json_obj )

                obj = Serializer.deserialize( json_obj, self._meta, data_refs )
                objects.append( obj )

        self._cache.save_data_map() # updates on-disk cache with new datafiles
        self._cache.save_h5_map()

        return objects


    @activate_remote
    def pull(self, location, params={}, cascade=True, data_load=True):
        """ pulls object from the specified location on the server. 

        caching:    yes
        cascade:    True/False
        data_load:  True/False

        Arguments:

        location:   object location as URL like 
                    'http://<host>/metadata/section/2394/', or just a location 
                    '/metadata/section/2394' or a stripped version like 
                    '/mtd/sec/2394'
        params:     dict that can contain several categories of key-value pairs
        cascade:    fetch related objects recursively (True/False)
        data_load:  fetch the data (True/False)

        Params can be:

        1. common params, like
            'at_time': '2013-02-22 15:34:57'

        2. data params, to get only parts of the original object(s). These only 
            work for the data-related objects (like 'analogsignal' or 
            'spiketrain').

            start_time - start time of the required range (calculated
                using the same time unit as the t_start of the signal)
            end_time - end time of the required range (calculated using
                the same time unit as the t_start of the signal)
            duration - duration of the required range (calculated using
                the same time unit as the t_start of the signal)
            start_index - start index of the required datarange (an index
                of the starting datapoint)
            end_index - end index of the required range (an index of the
                end datapoint)
            samples_count - number of points of the required range (an
                index of the end datapoint)
            downsample - number of datapoints. This parameter is used to
                indicate whether downsampling is needed. The downsampling
                is applied on top of the selected data range using other
                parameters (if specified)

        """
        location = self._meta.parse_location( location )
        supp_models = [k for k in models_map.keys() if \
            not k in ['property', 'value']]
        if not location[1] in supp_models:
            raise TypeError('Objects of that type are not pull-supported.')

        processed = {} # collector of processed objects like
                       # {"metadata/section/2394/": <object..>, ...}
        to_clean = [] # collector of ids of objects to clean parent
        stack = [ location ] # a stack of objects to sync

        while len( stack ) > 0:
            loc = stack[0]

            # find object in cache
            etag = None
            cached_obj = self._cache.get_obj_by_location( loc )
            if not type(cached_obj) == type(None):
                obj_descr = self._meta.get_gnode_descr(cached_obj)
                if obj_descr and obj_descr['fields'].has_key('guid'):
                    etag = obj_descr['fields']['guid']

            # request object from the server (with ETag)
            json_obj = self._remote.get(loc, params, etag)

            if json_obj == 304: # get object from cache
                obj = cached_obj
                print_status('%s loaded from cache.' % str(loc))

            else: # request from server

                # download related data
                data_refs = {} # is a dict like {'signal': <array...>, ...}
                if data_load:
                    data_refs = self.__parse_data_from_json( json_obj )

                # parse json (+data) into python object
                obj = Serializer.deserialize( json_obj, self._meta, data_refs )

                # put metadata in the stack
                #if json_obj['fields'].has_key('metadata'):
                #    for value in json_obj['fields']['metadata']:
                #        cl_value = self._meta.clean_location( value )
                #        stack.append( cl_value )

                # or just download attached metadata here?
                # metadata = self._fetch_metadata_by_json(cls, json_obj)
                print_status("%s fetched from server." % loc)

            stack.remove( loc ) # not to forget to remove processed object
            processed[ str(loc) ] = obj # add it to processed

            app, cls, lid = loc[0], loc[1], loc[2]
            children = self._meta.app_definitions[cls]['children'] # child object types
            obj_descr = self._meta.get_gnode_descr(obj)
            if cascade and children and obj_descr:
                for child in children: # 'child' is like 'segment', 'event' etc.

                    field_name = child + '_set'
                    if obj_descr['fields'].has_key( field_name ) and \
                        obj_descr['fields'][ field_name ]:

                        for rel_link in obj_descr['fields'][ field_name ]:
                            cl_link = self._meta.parse_location( rel_link )

                            if not str(cl_link) in processed.keys() and not \
                                str(cl_link) in [str(o) for o in stack]:
                                stack.insert( 0, cl_link )

        # building relationships for python objects
        for key, obj in processed.items():
            # TODO make some iterator below to avoid duplicate code
            loc = self._meta.parse_location( key )
            app, cls, lid = loc[0], loc[1], loc[2]
            children = self._meta.app_definitions[cls]['children']
            obj_descr = self._meta.get_gnode_descr(obj)
            if cascade and children and obj_descr:  
                for child in children: # 'child' is like 'segment', 'event' etc.

                    field_name = child + '_set'
                    if obj_descr['fields'].has_key( field_name ) and \
                        obj_descr['fields'][ field_name ]:

                            rel_objs = []

                            for rel_link in obj_descr['fields'][ field_name ]:
                                cl_link = self._meta.parse_location( rel_link )
                                rel_objs.append( processed[str(cl_link)] )

                            if rel_objs: # parse children into parent attrs
                                # a way to assign kids depends on object type
                                self.__assign_child( child, obj, rel_objs )

        """ TODO add metadata to objects 
        # parse related metadata
        if not json_obj['fields'].has_key('metadata') or \
            not json_obj['fields']['metadata']:

        else:
            mobj = Metadata()
            for p, v in raw_json['metadata']:
                prp = Serializer.deserialize(p, self)
                val = Serializer.deserialize(v, self)
                prp.append( val )

                # save both objects to cache
                self._cache.add_object( prp )
                self._cache.add_object( val )

                setattr( mobj, prp.name, prp )
        """

        print_status( 'Object(s) loaded.\n' )

        obj = processed[ str(location) ]
        self._cache.add_object(obj)
        self._cache.save_data_map()
        self._cache.save_h5_map()

        return obj


    @activate_remote
    def sync(self, obj_to_sync, cascade=False):
        """ syncs a given object to the server (updates or creates a new one).

        cascade:    True/False

        Arguments:

        obj_to_sync:a python object to sync. If an object has gnode attribute,
                    it means it will be updated on the server. If no gnode
                    attribute is found a new object will be submitted.
        cascade:    sync all children recursively (True/False)
        """
        supp_models = [m for k, m in models_map.items() if \
            not k in ['property', 'value']]
        if not obj_to_sync.__class__ in supp_models:
            raise TypeError('Objects of that type are not supported.')

        processed = [] # collector of permalinks of processed objects
        to_clean = [] # collector of ids of objects to clean parent
        stack = [ obj_to_sync ] # a stack of objects to sync

        self._cache.add_object(obj_to_sync) # if not yet there

        while len( stack ) > 0:

            obj = stack[0] # take first object from stack
            success = False # flag to indicate success of the syncing
            cls = self._meta.get_type_by_obj( obj ) # type of the object, like 'segment'

            # bloody workaround for duplications because of NEO
            obj_descr = self._meta.get_gnode_descr(obj)
            if obj_descr and obj_descr['permalink'] in processed:
                stack.remove( obj )
                continue

            # 1. validate class type
            if not obj.__class__ in supported_models:
                # skip this object completely
                stack.remove( obj )
                print_status('Object %s is not supported.\n' % cut_to_render( obj.__repr__() ))
                continue

            # 2. pre-push new/changed array data to the server (+put in cache)
            # data_refs is a dict like {'signal': 'http://host:/neo/signal/148348', ...}
            try:
                data_refs = self.__push_data_from_obj( obj )
            except (errors.FileUploadError, errors.UnitsError), e:
                # skip this object completely
                stack.remove( obj )
                print_status('%s skipped: %s\n' % (cut_to_render(obj.__repr__(), 15), str(e)))
                continue

            # 3. pre-sync related metadata if exists (+put in cache)
            if hasattr(obj, 'metadata'):

                metadata = getattr(obj, 'metadata')
                if isinstance(metadata, Metadata):

                    to_sync = []
                    for name, prp in metadata.__dict__.items():
                        if prp.value:
                            if not self._meta.get_gnode_descr(prp.value):
                                to_sync.insert(0, prp.value) # sync value if never synced

                            if not self._meta.get_gnode_descr(prp):
                                to_sync.insert(0, prp) # sync property if never synced
                                if not prp.parent:
                                    print_status('Cannot sync %s for %s: section is not defined.\n' % \
                                        (name, cut_to_render( obj.__repr__() )))
                                    stack.remove( prp )
                                    continue # move to other property

                                if not self._meta.get_gnode_descr(prp.parent):
                                    to_sync.insert(0, prp.parent) # sync parent section

                    if to_sync: # sync what's needed first
                        stack = to_sync + stack
                        continue

            # 4. sync main object
            try:
                json_obj = Serializer.serialize(obj, self._meta, data_refs)

                # TODO ideally the JSON object representation should be unique
                # and this code excluded
                for k in list( json_obj['fields'].keys() ):
                    if k.endswith('_set') or k == 'shared_with':
                        json_obj['fields'].pop( k, None )

                raw_json = self._remote.save( json_obj )

                if not raw_json == 304:
                    # update local in-memory object with newly acquired params
                    self._meta.set_gnode_descr(obj, raw_json)

                # a list of children in the gnode attribute in all parent 
                # objects for obj must be updated with a newly synced child. it 
                # should be done here, not at the end of the sync, to keep 
                # objects updated in case the sync fails.
                Serializer.update_parent_children(obj, self._meta)

                success = True
                obj_descr = self._meta.get_gnode_descr(obj)
                processed.append( obj_descr['permalink'] )
                print_status('Object at %s synced.' % obj_descr['location'])

            except (errors.UnitsError, errors.ValidationError, \
                errors.SyncFailed, errors.BadRequestError), e:
                print_status('%s skipped: %s\n' % (cut_to_render(obj.__repr__(), 20), str(e)))

            stack.remove( obj ) # not to forget to remove processed object

            # 5. if cascade put children objects to the stack to sync
            children = self._meta.app_definitions[cls]['children'] # child object types
            obj_descr = self._meta.get_gnode_descr(obj)
            if cascade and children and obj_descr:
                for child in children: # 'child' is like 'segment', 'event' etc.

                    # cached children references
                    child_link_set = list( obj_descr['fields'][ child + '_set' ] )

                    for rel in getattr(obj, get_children_field_name( child )):

                        # detect children of a given type that were removed (using cache)
                        rel_descr = self._meta.get_gnode_descr(rel)
                        if rel_descr and rel_descr['permalink'] in child_link_set:
                            child_link_set.remove( rel_descr['permalink'] )

                        # important to skip already scheduled or processed objs
                        if not (not (rel_descr == None) and rel_descr['permalink'] in processed):
                            # and not obj in stack:
                            # stupid NEO!! NEO object can't be compared with any
                            # other object type (error), so the workaround
                            # would be to check if the object was processed 
                            # before processing
                            stack.append( rel )

                    par_name = get_parent_field_name(cls, child)
                    # collect permalinks of removed objects as (link, par_field_name)
                    to_clean += [(x, par_name) for x in child_link_set]