def load(self, truncate=False, skip=False): for table_name in FIAS_TABLES: try: table = self.tables[table_name] except KeyError: log.debug('Table `{0}` not found in archive'.format(table_name)) continue try: status = Status.objects.get(table=table_name) except Status.DoesNotExist: log.info('Filling table `{0}` to ver. {1}...'.format(table.full_name, self._version.ver)) ldr = loader(table) ldr.load(truncate=truncate, update=False) status = Status(table=table.full_name, ver=self._version) status.save() self._process_deleted_table(table_name) else: log.warning('Table `{0}` has version `{1}`. ' 'Please use --force-replace for replace ' 'all tables. Skipping...'.format(status.table, status.ver))
def load(self, truncate=False, skip=False): to_update = [s.table for s in Status.objects.filter(ver__ver__lt=self._version.ver)] for table_name in set(to_update) & set(FIAS_TABLES): try: table = self.tables[table_name] except KeyError: log.debug('Table `{0}` not found in archive'.format(table_name)) continue status = Status.objects.get(table=table.full_name) log.info('Updating table `{0}` from {1} to {2}...'.format(table.full_name, status.ver.ver, self._version.ver)) ldr = loader(table) try: ldr.load(truncate=False, update=True) except XMLSyntaxError as e: msg = 'XML file for table `{0}` is broken. Data not loaded!'.format(table.full_name) if skip: log.error(msg) else: raise BadArchiveError(msg) else: status.ver = self._version status.save() self._process_deleted_table(table_name)
def load(self, truncate=False, skip=False): to_update = [ s.table for s in Status.objects.filter(ver__ver__lt=self._version.ver) ] for table_name in set(to_update) & set(FIAS_TABLES): try: table = self.tables[table_name] except KeyError: log.debug( 'Table `{0}` not found in archive'.format(table_name)) continue status = Status.objects.get(table=table.full_name) log.info('Updating table `{0}` from {1} to {2}...'.format( table.full_name, status.ver.ver, self._version.ver)) ldr = loader(table) try: ldr.load(truncate=False, update=True) except XMLSyntaxError as e: msg = 'XML file for table `{0}` is broken. Data not loaded!'.format( table.full_name) if skip: log.error(msg) else: raise BadArchiveError(msg) else: status.ver = self._version status.save() self._process_deleted_table(table_name)
def load(self, truncate=False, skip=False): for table_name in FIAS_TABLES: try: table = self.tables[table_name] except KeyError: log.debug( 'Table `{0}` not found in archive'.format(table_name)) continue try: status = Status.objects.get(table=table_name) except Status.DoesNotExist: log.info('Filling table `{0}` to ver. {1}...'.format( table.full_name, self._version.ver)) ldr = loader(table) ldr.load(truncate=truncate, update=False) status = Status(table=table.full_name, ver=self._version) status.save() self._process_deleted_table(table_name) else: log.warning('Table `{0}` has version `{1}`. ' 'Please use --force-replace for replace ' 'all tables. Skipping...'.format( status.table, status.ver))
def push(self, raw_data, related_attrs=None): data = self._lower_keys(raw_data.attrib) if isinstance(related_attrs, dict): data.update(related_attrs) key = data[self.pk] if self.mode == 'fill' or not self.model.objects.filter(**{self.pk: key}).exists(): self.objects.append(self.model(**data)) self.counter += 1 elif self.upd_field is not None and self.upd_field in data: old_obj = self.model.objects.get(**{self.pk: key}) data[self.upd_field] = datetime.datetime.strptime(data[self.upd_field], "%Y-%m-%d").date() if getattr(old_obj, self.upd_field) < data[self.upd_field]: for k, v in data.items(): setattr(old_obj, k, v) old_obj.save() self.upd_counter += 1 """ При обновлении выполняется очень много SELECT-запросов, которые тоже неслабо отъедают память. Так что лучше почаще чистить лог. """ if settings.DEBUG: db.reset_queries() del data if self.counter and self.counter % 10000 == 0: self._create() log.info('Created {0} objects'.format(self.counter))
def push(self, raw_data, related_attrs=None): data = dict(self._lower_keys_empty_uuids_to_none(raw_data.attrib)) if isinstance(related_attrs, dict): data.update(related_attrs) key = data[self.pk] if self.mode == 'fill' or not self.model.objects.filter(**{self.pk: key}).exists(): self.objects.append(self.model(**data)) self.counter += 1 elif self.upd_field is not None and self.upd_field in data: old_obj = self.model.objects.get(**{self.pk: key}) data[self.upd_field] = datetime.datetime.strptime(data[self.upd_field], "%Y-%m-%d").date() if getattr(old_obj, self.upd_field) < data[self.upd_field]: for k, v in data.items(): setattr(old_obj, k, v) old_obj.save() self.upd_counter += 1 """ При обновлении выполняется очень много SELECT-запросов, которые тоже неслабо отъедают память. Так что лучше почаще чистить лог. """ if settings.DEBUG: db.reset_queries() del data if self.counter and self.counter % 10000 == 0: self._create() log.info('Created {0} objects'.format(self.counter))
def load(self, truncate=False, update=False): if truncate: self._truncate() if update: self._bulk.mode = 'update' self._bulk.reset_counters() else: self._bulk.mode = 'fill' # workaround for XMLSyntaxError: Document is empty, line 1, column 1 xml = self._table.open() bom = xml.read(3) if bom != _bom_header: xml = self._table.open() else: log.info('Fixed wrong BOM header') context = etree.iterparse(xml) _fast_iter(context=context, func=self.process_row) self._bulk.finish() log.info('Processing table `{0}` is finished'.format( self._table.full_name))
def load(self, truncate=False, update=False): if truncate: self._truncate() if update: self._bulk.mode = 'update' self._bulk.reset_counters() else: self._bulk.mode = 'fill' # workaround for XMLSyntaxError: Document is empty, line 1, column 1 xml = self._table.open() bom = xml.read(3) if bom != _bom_header: xml = self._table.open() else: log.info('Fixed wrong BOM header') context = etree.iterparse(xml) _fast_iter(context=context, func=self.process_row) self._bulk.finish() log.info('Processing table `{0}` is finished'.format(self._table.full_name))
def update_data(path=None, version=None, skip=False, data_format='xml', limit=1000, tables=None, tempdir=None): tablelist = get_tablelist(path=path, version=version, data_format=data_format, tempdir=tempdir) for tbl in get_table_names(tables): # Пропускаем таблицы, которых нет в архиве if tbl not in tablelist.tables: continue st = Status.objects.get(table=tbl) if st.ver.ver >= tablelist.version.ver: log.info('Update of the table `{0}` is not needed [{1} <= {2}]. Skipping...'.format( tbl, st.ver.ver, tablelist.version.ver )) continue for table in tablelist.tables[tbl]: loader = TableUpdater(limit=limit) try: loader.load(tablelist=tablelist, table=table) except BadTableError as e: if skip: log.error(str(e)) else: raise st.ver = tablelist.version st.save()
def _retrieve(self, version=None, path=None): self._path = path if self._path is None: path = getattr(version, self.field_name) log.info('Downloading file: {0}'.format(path)) self._path = urlretrieve(path)[0] try: self._archive = rarfile.RarFile(self._path) except (rarfile.NotRarFile, rarfile.BadRarFile) as e: raise BadArchiveError('Archive: `{0}`, ver: `{1}` corrupted' ' or is not rar-archive'.format(path, version or 'unknown')) if self._version is None: self._version = self._get_version() return self._archive
def update_data(path=None, version=None, skip=False, data_format='xml', limit=1000, tables=None, tempdir=None): tablelist = get_tablelist(path=path, version=version, data_format=data_format, tempdir=tempdir) for tbl in get_table_names(tables): # Пропускаем таблицы, которых нет в архиве if tbl not in tablelist.tables: continue st = Status.objects.get(table=tbl) if st.ver.ver >= tablelist.version.ver: log.info( 'Update of the table `{0}` is not needed [{1} <= {2}]. Skipping...' .format(tbl, st.ver.ver, tablelist.version.ver)) continue for table in tablelist.tables[tbl]: loader = TableUpdater(limit=limit) try: loader.load(tablelist=tablelist, table=table) except BadTableError as e: if skip: log.error(str(e)) else: raise st.ver = tablelist.version st.save()
def finish(self): if self.objects: self._create() if self.upd_counter: log.info('Updated {0} objects'.format(self.upd_counter))