Пример #1
0
    def dframe(self,
               query_args=None,
               keep_parent_ids=False,
               padded=False,
               index=False,
               reload_=False,
               keep_mongo_keys=False):
        """Fetch the dframe for this dataset.

        :param query_args: An optional QueryArgs to hold the query arguments.
        :param keep_parent_ids: Do not remove parent IDs from the dframe,
            default False.
        :param padded: Used for joining, default False.
        :param index: Return the index with dframe, default False.
        :param reload_: Force refresh of data, default False.
        :param keep_mongo_keys: Used for updating documents, default False.

        :returns: Return DataFrame with contents based on query parameters
            passed to MongoDB. DataFrame will not have parent ids if
            `keep_parent_ids` is False.
        """
        # bypass cache if we need specific version
        cacheable = not (query_args or keep_parent_ids or padded)

        # use cached copy if we have already fetched it
        if cacheable and not reload_ and self.__is_cached:
            return self.__dframe

        query_args = query_args or QueryArgs()
        observations = self.observations(query_args, as_cursor=True)

        if query_args.distinct:
            return DataFrame(observations)

        dframe = Observation.batch_read_dframe_from_cursor(
            self, observations, query_args.distinct, query_args.limit)

        dframe = df_mongo_decode(dframe, keep_mongo_keys=keep_mongo_keys)

        excluded = [keep_parent_ids and PARENT_DATASET_ID, index and INDEX]
        dframe = remove_reserved_keys(dframe, filter(bool, excluded))

        if index:
            dframe.rename(columns={INDEX: 'index'}, inplace=True)

        dframe = self.__maybe_pad(dframe, padded)

        if cacheable:
            self.__dframe = dframe

        return dframe
Пример #2
0
    def dframe(self, query_args=None, keep_parent_ids=False, padded=False,
               index=False, reload_=False, keep_mongo_keys=False):
        """Fetch the dframe for this dataset.

        :param query_args: An optional QueryArgs to hold the query arguments.
        :param keep_parent_ids: Do not remove parent IDs from the dframe,
            default False.
        :param padded: Used for joining, default False.
        :param index: Return the index with dframe, default False.
        :param reload_: Force refresh of data, default False.
        :param keep_mongo_keys: Used for updating documents, default False.

        :returns: Return DataFrame with contents based on query parameters
            passed to MongoDB. DataFrame will not have parent ids if
            `keep_parent_ids` is False.
        """
        # bypass cache if we need specific version
        cacheable = not (query_args or keep_parent_ids or padded)

        # use cached copy if we have already fetched it
        if cacheable and not reload_ and self.__is_cached:
            return self.__dframe

        query_args = query_args or QueryArgs()
        observations = self.observations(query_args, as_cursor=True)

        if query_args.distinct:
            return DataFrame(observations)

        dframe = Observation.batch_read_dframe_from_cursor(
            self, observations, query_args.distinct, query_args.limit)

        dframe = df_mongo_decode(dframe, keep_mongo_keys=keep_mongo_keys)

        excluded = [keep_parent_ids and PARENT_DATASET_ID, index and INDEX]
        dframe = remove_reserved_keys(dframe, filter(bool, excluded))

        if index:
            dframe.rename(columns={INDEX: 'index'}, inplace=True)

        dframe = self.__maybe_pad(dframe, padded)

        if cacheable:
            self.__dframe = dframe

        return dframe
Пример #3
0
 def test_decode_reserved_keys(self):
     self.assertTrue(MONGO_ID in self.dframe.columns)
     dframe = df_mongo_decode(self.dframe)
     self.assertFalse(MONGO_ID in dframe.columns)
Пример #4
0
 def test_decode_reserved_keys(self):
     self.assertTrue(MONGO_ID in self.dframe.columns)
     dframe = df_mongo_decode(self.dframe)
     self.assertFalse(MONGO_ID in dframe.columns)