Пример #1
0
    def tables(self):
        if self._tables is None:
            from couchexport.export import get_headers

            headers = get_headers(self.schema, separator=".")
            self._tables = [(index, row[0]) for index, row in headers]
        return self._tables
Пример #2
0
    def get_export_files(self, format='', previous_export_id=None, filter=None,
                         use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs):
        # the APIs of how these methods are broken down suck, but at least
        # it's DRY
        from couchexport.export import get_writer, get_export_components, get_headers, get_formatted_rows
        from django.core.cache import cache
        import hashlib

        export_tag = self.index

        CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds

        def _build_cache_key(tag, prev_export_id, format, max_column_size):
            def _human_readable_key(tag, prev_export_id, format, max_column_size):
                return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size)
            return hashlib.md5(_human_readable_key(tag, prev_export_id,
                format, max_column_size)).hexdigest()

        # check cache, only supported for filterless queries, currently
        cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size)
        if use_cache and filter is None:
            cached_data = cache.get(cache_key)
            if cached_data:
                (tmp, checkpoint) = cached_data
                return ExportFiles(tmp, checkpoint)

        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            schema_index = export_tag
            config, updated_schema, export_schema_checkpoint = get_export_components(schema_index,
                                                                                     previous_export_id, filter)
            if config:
                writer = get_writer(format)

                # get cleaned up headers
                formatted_headers = self.remap_tables(get_headers(updated_schema, separator=separator))
                writer.open(formatted_headers, tmp, max_column_size=max_column_size)

                total_docs = len(config.potentially_relevant_ids)
                if process:
                    DownloadBase.set_progress(process, 0, total_docs)
                for i, doc in config.enum_docs():
                    if self.transform:
                        doc = self.transform(doc)

                    writer.write(self.remap_tables(get_formatted_rows(
                        doc, updated_schema, include_headers=False,
                        separator=separator)))
                    if process:
                        DownloadBase.set_progress(process, i + 1, total_docs)
                writer.close()

            checkpoint = export_schema_checkpoint

        if checkpoint:
            if use_cache:
                cache.set(cache_key, (path, checkpoint), CACHE_TIME)
            return ExportFiles(path, checkpoint)

        return None
Пример #3
0
    def get_export_files(self, format='', previous_export_id=None, filter=None,
                         use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs):
        # the APIs of how these methods are broken down suck, but at least
        # it's DRY
        from couchexport.export import get_writer, get_export_components, get_headers, get_formatted_rows
        from django.core.cache import cache
        import hashlib

        export_tag = self.index

        CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds

        def _build_cache_key(tag, prev_export_id, format, max_column_size):
            def _human_readable_key(tag, prev_export_id, format, max_column_size):
                return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size)
            return hashlib.md5(_human_readable_key(tag, prev_export_id,
                format, max_column_size)).hexdigest()

        # check cache, only supported for filterless queries, currently
        cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size)
        if use_cache and filter is None:
            cached_data = cache.get(cache_key)
            if cached_data:
                (tmp, checkpoint) = cached_data
                return ExportFiles(tmp, checkpoint)

        fd, path = tempfile.mkstemp()
        with os.fdopen(fd, 'wb') as tmp:
            schema_index = export_tag
            config, updated_schema, export_schema_checkpoint = get_export_components(schema_index,
                                                                                     previous_export_id, filter)
            if config:
                writer = get_writer(format)

                # get cleaned up headers
                formatted_headers = self.remap_tables(get_headers(updated_schema, separator=separator))
                writer.open(formatted_headers, tmp, max_column_size=max_column_size)

                total_docs = len(config.potentially_relevant_ids)
                if process:
                    DownloadBase.set_progress(process, 0, total_docs)
                for i, doc in config.enum_docs():
                    if self.transform:
                        doc = self.transform(doc)

                    writer.write(self.remap_tables(get_formatted_rows(
                        doc, updated_schema, include_headers=False,
                        separator=separator)))
                    if process:
                        DownloadBase.set_progress(process, i + 1, total_docs)
                writer.close()

            checkpoint = export_schema_checkpoint

        if checkpoint:
            if use_cache:
                cache.set(cache_key, (path, checkpoint), CACHE_TIME)
            return ExportFiles(path, checkpoint)

        return None
Пример #4
0
 def generate_table_headers(self, schemas, checkpoints):
     headers = []
     for i, schema in enumerate(schemas):
         if not checkpoints[i]:
             continue
         header = self.export_objects[i].parse_headers(get_headers(schema, separator=self.separator))
         headers.extend(header)
     return headers
Пример #5
0
 def generate_table_headers(self, schemas, checkpoints):
     headers = []
     for i, schema in enumerate(schemas):
         if not checkpoints[i]:
             continue
         header = self.export_objects[i].parse_headers(get_headers(schema, separator=self.separator))
         headers.extend(header)
     return headers
Пример #6
0
 def tables(self):
     if self._tables is None:
         from couchexport.export import get_headers
         headers = get_headers(self.schema, separator=".")
         self._tables = [(index, row[0]) for index, row in headers]
     return self._tables