示例#1
0
    def load(self, truncate=False, skip=False):
        for table_name in FIAS_TABLES:
            try:
                table = self.tables[table_name]
            except KeyError:
                log.debug('Table `{0}` not found in archive'.format(table_name))
                continue

            try:
                status = Status.objects.get(table=table_name)
            except Status.DoesNotExist:

                log.info('Filling table `{0}` to ver. {1}...'.format(table.full_name, self._version.ver))

                ldr = loader(table)
                ldr.load(truncate=truncate, update=False)

                status = Status(table=table.full_name, ver=self._version)
                status.save()

                self._process_deleted_table(table_name)
            else:
                log.warning('Table `{0}` has version `{1}`. '
                            'Please use --force-replace for replace '
                            'all tables. Skipping...'.format(status.table, status.ver))
示例#2
0
    def load(self, truncate=False, skip=False):
        to_update = [s.table for s in Status.objects.filter(ver__ver__lt=self._version.ver)]
        for table_name in set(to_update) & set(FIAS_TABLES):
            try:
                table = self.tables[table_name]
            except KeyError:
                log.debug('Table `{0}` not found in archive'.format(table_name))
                continue

            status = Status.objects.get(table=table.full_name)

            log.info('Updating table `{0}` from {1} to {2}...'.format(table.full_name,
                                                                      status.ver.ver,
                                                                      self._version.ver))

            ldr = loader(table)
            try:
                ldr.load(truncate=False, update=True)
            except XMLSyntaxError as e:
                msg = 'XML file for table `{0}` is broken. Data not loaded!'.format(table.full_name)
                if skip:
                    log.error(msg)
                else:
                    raise BadArchiveError(msg)
            else:
                status.ver = self._version
                status.save()

            self._process_deleted_table(table_name)
示例#3
0
    def load(self, truncate=False, skip=False):
        to_update = [
            s.table
            for s in Status.objects.filter(ver__ver__lt=self._version.ver)
        ]
        for table_name in set(to_update) & set(FIAS_TABLES):
            try:
                table = self.tables[table_name]
            except KeyError:
                log.debug(
                    'Table `{0}` not found in archive'.format(table_name))
                continue

            status = Status.objects.get(table=table.full_name)

            log.info('Updating table `{0}` from {1} to {2}...'.format(
                table.full_name, status.ver.ver, self._version.ver))

            ldr = loader(table)
            try:
                ldr.load(truncate=False, update=True)
            except XMLSyntaxError as e:
                msg = 'XML file for table `{0}` is broken. Data not loaded!'.format(
                    table.full_name)
                if skip:
                    log.error(msg)
                else:
                    raise BadArchiveError(msg)
            else:
                status.ver = self._version
                status.save()

            self._process_deleted_table(table_name)
示例#4
0
    def load(self, truncate=False, skip=False):
        for table_name in FIAS_TABLES:
            try:
                table = self.tables[table_name]
            except KeyError:
                log.debug(
                    'Table `{0}` not found in archive'.format(table_name))
                continue

            try:
                status = Status.objects.get(table=table_name)
            except Status.DoesNotExist:

                log.info('Filling table `{0}` to ver. {1}...'.format(
                    table.full_name, self._version.ver))

                ldr = loader(table)
                ldr.load(truncate=truncate, update=False)

                status = Status(table=table.full_name, ver=self._version)
                status.save()

                self._process_deleted_table(table_name)
            else:
                log.warning('Table `{0}` has version `{1}`. '
                            'Please use --force-replace for replace '
                            'all tables. Skipping...'.format(
                                status.table, status.ver))
示例#5
0
    def push(self, raw_data, related_attrs=None):
        data = self._lower_keys(raw_data.attrib)

        if isinstance(related_attrs, dict):
            data.update(related_attrs)

        key = data[self.pk]

        if self.mode == 'fill' or not self.model.objects.filter(**{self.pk: key}).exists():
            self.objects.append(self.model(**data))
            self.counter += 1
        elif self.upd_field is not None and self.upd_field in data:
            old_obj = self.model.objects.get(**{self.pk: key})
            data[self.upd_field] = datetime.datetime.strptime(data[self.upd_field], "%Y-%m-%d").date()

            if getattr(old_obj, self.upd_field) < data[self.upd_field]:
                for k, v in data.items():
                    setattr(old_obj, k, v)
                old_obj.save()
                self.upd_counter += 1

            """
            При обновлении выполняется очень много SELECT-запросов,
            которые тоже неслабо отъедают память.
            Так что лучше почаще чистить лог.
            """
            if settings.DEBUG:
                db.reset_queries()

        del data

        if self.counter and self.counter % 10000 == 0:
            self._create()
            log.info('Created {0} objects'.format(self.counter))
示例#6
0
    def push(self, raw_data, related_attrs=None):
        data = dict(self._lower_keys_empty_uuids_to_none(raw_data.attrib))

        if isinstance(related_attrs, dict):
            data.update(related_attrs)

        key = data[self.pk]

        if self.mode == 'fill' or not self.model.objects.filter(**{self.pk: key}).exists():
            self.objects.append(self.model(**data))
            self.counter += 1
        elif self.upd_field is not None and self.upd_field in data:
            old_obj = self.model.objects.get(**{self.pk: key})
            data[self.upd_field] = datetime.datetime.strptime(data[self.upd_field], "%Y-%m-%d").date()

            if getattr(old_obj, self.upd_field) < data[self.upd_field]:
                for k, v in data.items():
                    setattr(old_obj, k, v)
                old_obj.save()
                self.upd_counter += 1

            """
            При обновлении выполняется очень много SELECT-запросов,
            которые тоже неслабо отъедают память.
            Так что лучше почаще чистить лог.
            """
            if settings.DEBUG:
                db.reset_queries()

        del data

        if self.counter and self.counter % 10000 == 0:
            self._create()
            log.info('Created {0} objects'.format(self.counter))
示例#7
0
    def load(self, truncate=False, update=False):
        if truncate:
            self._truncate()

        if update:
            self._bulk.mode = 'update'
            self._bulk.reset_counters()
        else:
            self._bulk.mode = 'fill'

        # workaround for XMLSyntaxError: Document is empty, line 1, column 1
        xml = self._table.open()
        bom = xml.read(3)
        if bom != _bom_header:
            xml = self._table.open()
        else:
            log.info('Fixed wrong BOM header')

        context = etree.iterparse(xml)

        _fast_iter(context=context, func=self.process_row)

        self._bulk.finish()

        log.info('Processing table `{0}` is finished'.format(
            self._table.full_name))
示例#8
0
    def load(self, truncate=False, update=False):
        if truncate:
            self._truncate()

        if update:
            self._bulk.mode = 'update'
            self._bulk.reset_counters()
        else:
            self._bulk.mode = 'fill'

        # workaround for XMLSyntaxError: Document is empty, line 1, column 1
        xml = self._table.open()
        bom = xml.read(3)
        if bom != _bom_header:
            xml = self._table.open()
        else:
            log.info('Fixed wrong BOM header')

        context = etree.iterparse(xml)

        _fast_iter(context=context, func=self.process_row)

        self._bulk.finish()

        log.info('Processing table `{0}` is finished'.format(self._table.full_name))
示例#9
0
def update_data(path=None, version=None, skip=False, data_format='xml', limit=1000, tables=None, tempdir=None):
    tablelist = get_tablelist(path=path, version=version, data_format=data_format, tempdir=tempdir)

    for tbl in get_table_names(tables):
        # Пропускаем таблицы, которых нет в архиве
        if tbl not in tablelist.tables:
            continue

        st = Status.objects.get(table=tbl)

        if st.ver.ver >= tablelist.version.ver:
            log.info('Update of the table `{0}` is not needed [{1} <= {2}]. Skipping...'.format(
                tbl, st.ver.ver, tablelist.version.ver
            ))
            continue

        for table in tablelist.tables[tbl]:
            loader = TableUpdater(limit=limit)
            try:
                loader.load(tablelist=tablelist, table=table)
            except BadTableError as e:
                if skip:
                    log.error(str(e))
                else:
                    raise

        st.ver = tablelist.version
        st.save()
示例#10
0
    def _retrieve(self, version=None, path=None):
        self._path = path
        if self._path is None:
            path = getattr(version, self.field_name)
            log.info('Downloading file: {0}'.format(path))
            self._path = urlretrieve(path)[0]

        try:
            self._archive = rarfile.RarFile(self._path)
        except (rarfile.NotRarFile, rarfile.BadRarFile) as e:
            raise BadArchiveError('Archive: `{0}`, ver: `{1}` corrupted'
                                  ' or is not rar-archive'.format(path, version or 'unknown'))

        if self._version is None:
            self._version = self._get_version()

        return self._archive
示例#11
0
    def _retrieve(self, version=None, path=None):
        self._path = path
        if self._path is None:
            path = getattr(version, self.field_name)
            log.info('Downloading file: {0}'.format(path))
            self._path = urlretrieve(path)[0]

        try:
            self._archive = rarfile.RarFile(self._path)
        except (rarfile.NotRarFile, rarfile.BadRarFile) as e:
            raise BadArchiveError('Archive: `{0}`, ver: `{1}` corrupted'
                                  ' or is not rar-archive'.format(path, version or 'unknown'))

        if self._version is None:
            self._version = self._get_version()

        return self._archive
示例#12
0
def update_data(path=None,
                version=None,
                skip=False,
                data_format='xml',
                limit=1000,
                tables=None,
                tempdir=None):
    tablelist = get_tablelist(path=path,
                              version=version,
                              data_format=data_format,
                              tempdir=tempdir)

    for tbl in get_table_names(tables):
        # Пропускаем таблицы, которых нет в архиве
        if tbl not in tablelist.tables:
            continue

        st = Status.objects.get(table=tbl)

        if st.ver.ver >= tablelist.version.ver:
            log.info(
                'Update of the table `{0}` is not needed [{1} <= {2}]. Skipping...'
                .format(tbl, st.ver.ver, tablelist.version.ver))
            continue

        for table in tablelist.tables[tbl]:
            loader = TableUpdater(limit=limit)
            try:
                loader.load(tablelist=tablelist, table=table)
            except BadTableError as e:
                if skip:
                    log.error(str(e))
                else:
                    raise

        st.ver = tablelist.version
        st.save()
示例#13
0
    def finish(self):
        if self.objects:
            self._create()

        if self.upd_counter:
            log.info('Updated {0} objects'.format(self.upd_counter))
示例#14
0
    def finish(self):
        if self.objects:
            self._create()

        if self.upd_counter:
            log.info('Updated {0} objects'.format(self.upd_counter))