def connect(self, reconnect=False, dbname='russtat', user='******', password=None, host='127.0.0.1', port='5432'): if not self.con is None and not reconnect: return True if password is None: password = input('>> Enter password:'******'Connected to {self._connparams[0]} as {self._connparams[1]} at {self._connparams[3]}:{self._connparams[4]}' ) return True except Exception as err: self.con = None print(err) return False
def disconnect(self): if self.con is None: return True try: self.con.commit() self.con.close() self.con = None report( f'Disconnected from {self._connparams[0] if self._connparams else "DB"}' ) return True except: pass return False
def add_data(self, data_json, disable_triggers=False, on_error=print): if not data_json: report('NONE data!', force=True) return None triggers_disabled = False if disable_triggers: triggers_disabled = self.disable_triggers(on_error=on_error) cur = self.exec( f"select * from public.add_data($${data_json}$$::text);", commit=True, on_error=on_error) if cur: res = cur.fetchone() if triggers_disabled: self.enable_triggers(on_error=on_error) return res else: raise Exception(self.dbmessages)
def find_datasets(self, pattern, regex=False, case_sense=False, fullmatch=False): results = [] if regex: regexp = re.compile(pattern) if case_sense else re.compile(pattern, re.I) if not self.datasets: self.update_dataset_list() for item in self.datasets: if not 'title' in item: continue title = item['title'] if regex: if (fullmatch and regexp.fullmatch(title)) or (not fullmatch and regexp.search(title)): results.append(item) else: if (fullmatch and \ ((case_sense and title == pattern) or (not case_sense and title.lower() == pattern.lower()))) or \ (not fullmatch and \ ((case_sense and pattern in title) or (not case_sense and pattern.lower() in title.lower()))): results.append(item) report(f"Found {len(results)} matches for query '{pattern}'") return results
def get_many(self, datasets=0, xmlfilenames='auto', overwrite=True, del_xml=True, save2json='auto', loadfromjson='auto', processes='auto', wait=True, on_dataset=None, on_dataset_kwargs=None, on_results_ready=None, on_error=None, on_stopcheck=None): args = [] if datasets is None and loadfromjson != 'auto' and not loadfromjson is None: if is_iterable(loadfromjson): for json_file in loadfromjson: args.append((None, None, False, False, None, json_file, on_dataset, on_dataset_kwargs)) else: args.append((None, None, False, False, None, loadfromjson, on_dataset, on_dataset_kwargs)) else: if not self.datasets: self.update_dataset_list() if datasets is None: datasets = self.datasets elif isinstance(datasets, str): datasets = self.find_datasets(datasets) elif isinstance(datasets, int): datasets = [self.datasets[datasets]] elif is_iterable(datasets): if len(datasets) == 0: report('Empty datasets parameter!', True) return None if isinstance(datasets[0], int) or isinstance(datasets[0], str): datasets = [self[k] for k in datasets] else: report('Bad type: datasets', True) return None if not datasets: report('No datasets matching your request.', True) return None # prepare args for worker function for i, ds in enumerate(datasets): try: if is_iterable(xmlfilenames): xmlfilename = xmlfilenames[i] else: xmlfilename = xmlfilenames if is_iterable(save2json): save2json_ = save2json[i] else: save2json_ = save2json if is_iterable(loadfromjson): loadfromjson_ = loadfromjson[i] else: loadfromjson_ = loadfromjson args.append((ds, xmlfilename, overwrite, del_xml, save2json_, loadfromjson_, on_dataset, on_dataset_kwargs)) except Exception as err: report(err, True) return None if processes == 'auto': processes = None with Pool(processes=processes) as pool: try: result = pool.starmap_async(self.get_one, args, callback=on_results_ready, error_callback=on_error) pool.close() if wait: pool.join() return result except Exception as err: report(err, True) return None
def get_one(self, dataset, xmlfilename='auto', overwrite=True, del_xml=True, save2json='auto', loadfromjson='auto', on_dataset=None, on_dataset_kwargs=None): if loadfromjson is None or loadfromjson == 'auto': if isinstance(dataset, str): datasets = self.find_datasets(dataset) if not datasets: report(f"No datasets match query '{dataset}'") return None dataset = datasets[0] elif isinstance(dataset, int): try: dataset = self[dataset] except Exception as err: report(err) return None elif not isinstance(dataset, dict): report(f"Bad data type for 'dataset': {type(dataset)}") return None if loadfromjson: if loadfromjson == 'auto': loadfromjson = os.path.join(self.root_folder, dataset.get('identifier', 'dataset') + '.json') ds = None try: with open(os.path.abspath(loadfromjson), 'r', encoding='utf-8') as infile: ds = json.load(infile, object_hook=Russtat.json_hook) except Exception as err: report(f"{err} Importing from XML...") return self.get_one(dataset, xmlfilename, overwrite, del_xml, save2json, None, on_dataset, on_dataset_kwargs) else: report(f'Loaded from JSON ({loadfromjson})') if on_dataset: if on_dataset_kwargs: on_dataset(ds, **on_dataset_kwargs) else: on_dataset(ds) return ds if not 'link' in dataset: report('Dataset has no "link" object!') #return None if xmlfilename == 'auto': xmlfilename = dataset.get('identifier', 'dataset') + '.xml' outputfile = os.path.abspath(os.path.join(self.root_folder, xmlfilename)) if not os.path.exists(outputfile) or overwrite: try: os.remove(outputfile) report(f'Deleted existing XML ({outputfile})') except Exception as err: report(err) try: res = requests.get(dataset['link'], timeout=self.connection_timeout) if not res: report(f"Could not retrieve dataset from {dataset['link']}") #return None with open(outputfile, 'wb') as outfile: outfile.write(res.content) report(f"Downloaded XML from {dataset['link']} to {outputfile}") except Exception as err: report(err) #return None ds = {'prepared': dt.now(), 'id': dataset['identifier'], 'agency_id': '', 'codes': {}, 'full_name': dataset['title'], 'unit': '', 'periodicity': {'value': '', 'releases': '', 'next': dt.fromisoformat('1900-01-01')}, 'data_range': (-1, -1), 'updated': dt.fromisoformat('1900-01-01'), 'methodology': '', 'agency_name': '', 'agency_dept': '', 'classifier': {'id': '', 'path': ''}, 'prepared_by': {'name': '', 'contacts': ''}, 'data': []} try: tree = ET.parse(outputfile, ET.XMLParser(encoding='utf-8')) ds_rootnode = tree.getroot() # Header node_hdr = ds_rootnode.find('message:Header', XML_NS) ds['prepared'] = dt.fromisoformat(self._get_text(node_hdr, 'message:Prepared', '1900-01-01')) - timedelta(hours=3) ds['id'] = self._get_text(node_hdr, 'message:DataSetID') ds['agency_id'] = self._get_text(node_hdr, 'message:DataSetAgency') # Codes ds['codes'] = self._get_codes(ds_rootnode) # Description node_desc = ds_rootnode.find('message:Description', XML_NS).find('message:Indicator', XML_NS) ds['full_name'] = ' '.join(self._get_attr(node_desc, 'name').split()) ds['unit'] = self._get_attr(node_desc, 'value', ['message:Units', 'message:Unit']) ds['periodicity']['value'] = self._get_attr(node_desc, 'value', ['message:Periodicities', 'message:Periodicity']) ds['periodicity']['releases'] = self._get_attr(node_desc, 'releases', ['message:Periodicities', 'message:Periodicity']) ds['periodicity']['next'] = dt.strptime(self._get_attr(node_desc, 'next-release', ['message:Periodicities', 'message:Periodicity'], '01.01.1900'), '%d.%m.%Y') - timedelta(hours=3) ds['data_range'] = tuple(int(self._get_attr(node_desc, x, 'message:DataRange', '0')) for x in ('start', 'end')) ds['updated'] = dt.fromisoformat(self._get_attr(node_desc, 'value', 'message:LastUpdate', '1900-01-01')) - timedelta(hours=3) ds['methodology'] = ' '.join(self._get_attr(node_desc, 'value', 'message:Methodology').split()) ds['agency_name'] = self._get_attr(node_desc, 'value', 'message:Organization') ds['agency_dept'] = self._get_attr(node_desc, 'value', 'message:Department') ds['classifier']['id'] = self._get_attr(node_desc, 'id', ['message:Allocations', 'message:Allocation']) ds['classifier']['path'] = self._get_text(node_desc, ['message:Allocations', 'message:Allocation', 'message:Name']) ds['prepared_by']['name'] = self._get_text(node_desc, ['message:Responsible', 'message:Name']) ds['prepared_by']['contacts'] = self._get_text(node_desc, ['message:Responsible', 'message:Contacts']) ds['data'] = self._get_data(ds_rootnode, ds['codes']) if save2json: if save2json == 'auto': save2json = dataset.get('identifier', 'dataset') + '.json' try: json_file = os.path.abspath(os.path.join(self.root_folder, save2json)) with open(json_file, 'w', encoding='utf-8') as outfile: json.dump(ds, outfile, ensure_ascii=False, indent=4, default=str) report(f'Saved to JSON ({json_file})') except Exception as err: report(err) if del_xml: try: os.remove(outputfile) report(f'Deleted XML ({outputfile})') except Exception as err: report(err) if on_dataset: if on_dataset_kwargs: on_dataset(ds, **on_dataset_kwargs) else: on_dataset(ds) except Exception as err: report(err) # try to process empty dataset if on_dataset: try: if on_dataset_kwargs: on_dataset(ds, **on_dataset_kwargs) else: on_dataset(ds) except: pass if del_xml: try: os.remove(outputfile) report(f'Deleted XML ({outputfile})') except Exception as err2: report(err2) return ds return ds
def _get_data(self, ds_rootnode, codes, max_row=-1): n = 0 dataset = ds_rootnode.find('message:DataSet', XML_NS) if not dataset: return [] data = [] for item in dataset.iterfind('generic:Series', XML_NS): try: # period and unit per, ei = ('', '') try: for attr in item.find('generic:Attributes', XML_NS).iterfind('generic:Value', XML_NS): concept = self._get_attr(attr, 'concept') val = self._get_attr(attr, 'value') if concept == 'EI': ei = val elif concept == 'PERIOD': per = val except: per, ei = ('', '') # year try: tim = int(self._get_text(item, ['generic:Obs', 'generic:Time'], '0')) except: tim = 0 # value try: val = float(self._get_attr(item, 'value', ['generic:Obs', 'generic:ObsValue'], '0.0').replace(',', '.').replace(' ', '')) except: val = 0.0 # classifier and class try: for key_item in item.find('generic:SeriesKey', XML_NS).iterfind('generic:Value', XML_NS): key_concept = self._get_attr(key_item, 'concept') key_key = self._get_attr(key_item, 'value') classifier, cl = ('', '') for code in codes: if code == key_concept: classifier = codes[code]['name'] for cval in codes[code]['values']: if cval[0] == key_key: cl = cval[1] break break data.append((classifier, cl, ei, per, tim, val)) n += 1 if max_row > 0 and n > max_row: break except: data.append(('', '', ei, per, tim, val)) n += 1 if max_row > 0 and n > max_row: break except Exception as err: report(err) break return data
def update_dataset_list(self, xmlfilename='list.xml', xml_only=True, overwrite=False, del_xml=True, save2json='list_json.json', loadfromjson='list_json.json'): self.datasets = [] if loadfromjson: try: json_file = os.path.abspath(os.path.join(self.root_folder, loadfromjson)) with open(json_file, 'r', encoding='utf-8') as infile: self.datasets = json.load(infile) report(f'Loaded from JSON ({json_file}): {len(self.datasets)} datasets') return except Exception as err: report(f"{err} Importing from XML...") self.update_dataset_list(xmlfilename, xml_only, overwrite, save2json, None) outputfile = os.path.abspath(os.path.join(self.root_folder, xmlfilename)) if not os.path.exists(outputfile) or overwrite: try: os.remove(outputfile) report(f'Deleted existing XML ({outputfile})') except Exception as err: report(err) try: res = requests.get(URL_EMISS_LIST, timeout=self.connection_timeout) if not res: report(f'Could not retrieve dataset list from {URL_EMISS_LIST}') return with open(outputfile, 'wb') as outfile: outfile.write(res.content) report(f'Downloaded XML from {URL_EMISS_LIST} to {outputfile}') except Exception as err: report(err) return tree = ET.parse(outputfile, ET.XMLParser(encoding='utf-8')) root_el = tree.getroot() for item in root_el.find('meta').iter('item'): if xml_only and item.find('format').text != 'xml': continue self.datasets.append({child.tag: child.text.strip('"').strip() for child in item}) report(f'Loaded from XML ({outputfile}): {len(self.datasets)} datasets') if del_xml: try: os.remove(outputfile) report(f'Deleted XML ({outputfile})') except Exception as err: report(err) if save2json: try: json_file = os.path.abspath(os.path.join(self.root_folder, save2json)) with open(json_file, 'w', encoding='utf-8') as outfile: json.dump(self.datasets, outfile, ensure_ascii=False, indent=4) report(f'Saved to JSON ({json_file})') except Exception as err: report(err)