def addXYModels(self, attrs, t0=None, t1=None): """ Convert model, dates to 'tgarch://alba03.cells.es:10000/sr/id/scw01/pressure?db=*;t0=2019-11-11T11:41:59;t1=2020-01-03T09:03:03;ts', """ c = self.cursor() self.setCursor(Qt.Qt.WaitCursor) attrs = fn.toList(attrs) if not t0 and not t1 and not self.t0 and not self.t1: t0, t1 = self.tc.getTimes() if t0 and t1: t0 = t0 if fn.isNumber(t0) else fn.str2time(t0, relative=True) t1 = t1 if fn.isNumber(t1) else fn.str2time(t1, relative=True) self.t0, self.t1 = fn.time2str(t0, iso=1), fn.time2str(t1, iso=1) self.t0 = self.t0.replace(' ', 'T') self.t1 = self.t1.replace(' ', 'T') ms = [] for attr in attrs: attr = fn.tango.get_full_name(attr, fqdn=True) attr = attr.replace('tango://', '') q = 'tgarch://%s?db=*;t0=%s;t1=%s' % (attr, self.t0, self.t1) m = (q + ';ts', q) ms.append(m) self.plot.onAddXYModel(ms) self.setCursor(c)
def get_partition_time_by_name(self, partition): m = fn.clsearch('[0-9].*', partition) if m: d = fn.str2time(m.group(), cad='%Y%m%d') return d else: return fn.END_OF_TIME
def load_last_values(self, attributes=None, n=1, epoch=None, tref=90 * 86400): """ load_last_values provided to comply with Reader API get_last_attribute_values provided to comply with CommonAPI load_last_values returns a dictionary {attr:(last_time,last_value)} attributes: attribute name or list n: the number of last values to be retorned tref: time from which start searching values (-1d by default) epoch: end of window to search values (now by default) """ if attributes is None: attributes = self.get_archived_attributes() if epoch is not None: epoch = fn.str2time(epoch) if fn.isString(epoch) else epoch kwargs = { 'epoch': epoch, 'period': (epoch - tref) if tref > 1e9 else abs(tref) } else: kwargs = {} vals = dict((a, self.get_last_attribute_values(a, n=n, **kwargs)) for a in fn.toList(attributes)) return vals
def getStartDate(self): try: t = str(self.xEditStart.text()) if t == self.DEFAULT_START: return None return str2time(t) except: traceback.print_exc()
def getStartDate(self): try: t = str(self.xEditStart.text()) if t == self.DEFAULT_START: return None return str2time(t) except: traceback.print_exc()
def onRefreshButton(self): self._trend.applyNewDates() try: date = str2time(str(self.xEditStart.text())) except: try: date = getTrendBounds(self._trend)[0] self.xEditStart.setText(time2str(date)) except: traceback.print_exc()
def checkSchema(k, schema, attribute='', start=None, stop=None): schema = k.getSchema(schema) if not schema: return False f = schema.get('check') if not f: print('%s has no check function' % str(schema)) return True try: now = time.time() start = (str2time(start) if fn.isString(start) else fn.notNone(start,now-1)) stop = (str2time(stop) if fn.isString(stop) else fn.notNone(stop,now)) k.LOCALS.update({'attribute':attribute.lower(), 'match':clmatch,'clmatch':clmatch, 'overlap':overlap, 'time2str':time2str,'str2time':str2time, 't2s':time2str,'s2t':str2time, 'start':start,'stop':stop,'now':now, 'begin':start,'end':stop,'NOW':now, 'reader':schema.get('reader',schema.get('api')), 'schema':schema.get('schema'), 'dbname':schema.get('dbname',schema.get('schema','')), }) if 'reader' in f: k.getReader(schema) if 'api' in f: k.getApi(schema) #print('In reader.Schemas.checkSchema(%s,%s,%s,%s): %s' #% (schema,attribute,start,stop,f)) #print('(%s)%%(%s)'%(f,[t for t in k.LOCALS.items() if t[0] in f])) v =fn.evalX(f,k.LOCALS,k.MODULES) except: print('checkSchema(%s,%s) failed!' % (schema,attribute)) traceback.print_exc() v = False #print('checkSchema(%s): %s'%(schema,v)) return v
def refreshAction(self): self._trend.applyNewDates() try: date = str2time(str(self.xEditStart.text())) except: try: date = getTrendBounds(self._trend)[0] self.xEditStart.setText(time2str(date)) except: traceback.print_exc()
def mysqldump_by_date(schema, user, passwd, folder, start, stop, compress = True, delete = True): """ This method creates a backup between selected dates for each table of the selected database. All dump files are exported to the same folder, and a compressed file is created at the end. Deleting of temporary files created (folder/*dmp) must be done manually. """ print('mysqldump_by_date(%s,,,folder=%s,%s,%s,compress=%s,delete=%s)' % (schema, folder, start, stop, compress, delete)) db = FriendlyDB(schema,user=user,passwd=passwd) t,e = start,stop print(t,e) start = start if fn.isString(start) else fn.time2str(start) stop = stop if fn.isString(stop) else fn.time2str(stop) tables = db.getTables() print('mysqldump_by_date(%s): %d tables to backup between %s and %s' % (schema,len(tables),start,stop)) if not os.path.isdir(folder): print('mkdir %s' % folder) os.mkdir(folder) for t in tables: filename = ('%s/%s-%s-%s-%s.dmp' % (folder,schema,t,start.split()[0],stop.split()[0])) cols = db.getTableCols(t) col = [c for c in ('time','data_time') if c in cols] if col: where = " %s >= '%s' and %s < '%s' " % (col[0],start,col[0],stop) else: where = "" mysqldump(schema,user,passwd,filename,t,where) ext = ('part.' if fn.str2time(stop) > fn.now() else '') + 'tgz' if compress: filename = ('%s/%s-%s-%s.%s' % (folder,schema,start.split()[0],stop.split()[0],ext)) cmd = 'tar zcvf %s %s/*.dmp' % (filename,folder) print(cmd) fn.linos.shell_command(cmd) if compress and delete: cmd = 'rm -rf %s/*.dmp' % folder print(cmd) fn.linos.shell_command(cmd) return filename
def get_last_attribute_values(self,table,n=1, check_table=False,epoch=None): if epoch is None: start,epoch = None,fn.now()+600 elif epoch < 0: start,epoch = fn.now()+epoch,fn.now()+600 if start is None: #Rounding to the last month partition start = fn.str2time( fn.time2str().split()[0].rsplit('-',1)[0]+'-01') vals = self.get_attribute_values(table, N=n, human=True, desc=True, start_date=start, stop_date=epoch) if len(vals): return vals[0] if abs(n)==1 else vals else: return vals
def get_last_attribute_values(self,table,n,check_table=False,epoch=fn.END_OF_TIME): """ Check table set to False as sometimes order of insertion is not the same as expected, BE CAREFUL WITH THIS ARGUMENT! """ query,where = table,'' if check_table: table_size = self.getTableSize(table) if table_size>1e3: x = max((2*n,20)) query = '(select * from %s limit %d,%d)'%(table,table_size-x,x) epoch = fn.str2time(epoch) if fn.isString(epoch) else epoch if epoch not in (None, fn.END_OF_TIME): where = " where T.time < '%s' " % (fn.time2str(epoch)) what = 'SELECT time' what += (',value',',read_value')['read_value' in self.getTableCols(table)] return self.Query('%s from %s T %s order by T.time desc limit %d' % ( what, query, where, n))
def str2mysqlsecs(self, date): """ converts given date to int mysql seconds() value """ rt = fn.str2time(date) return int(rt + self.get_mysqlsecsdiff(date))
#!/usr/bin/env python import PyTangoArchiving, sys, time, re, fandango as fd import PyTangoArchiving.hdbpp.config as ptahc import PyTangoArchiving.utils as ptau args = sys.argv[1:] tt0 = time.time() try: db_name = args[0] #'hdbmiras' tstart = fd.time2str(fd.str2time(args[1])) tend = fd.time2str(fd.str2time(args[2])) data_types = args[3:] #['scalar_devdouble_ro'] except: print('Usage: PyTangoArchiving/hdbpp/decimate.py db_name tstart tend [data_types]') api = ptahc.HDBpp(db_name=db_name,user='******',passwd='manager') if not data_types: data_types = [r[0] for r in api.Query('select data_type from att_conf_data_type')] else: data_types = [d.replace('att_','') for d in data_types] print('Decimating %s types between %s and %s: %s'%(db_name,tstart,tend,data_types)) for data_type in data_types: attrs = api.Query('select att_conf_id from att_conf,att_conf_data_type ' 'where att_conf.att_conf_data_type_id = att_conf_data_type.att_conf_data_type_id ' 'and data_type = "%s"'%data_type)
def transfer_table(db, db2, table, bunch=16 * 16 * 1024, is_str=False, per_value=60, min_tdelta=0.2, ids=[]): t0 = fn.now() tq = 0 cols = db.getTableCols(table) has_int = 'int_time' in cols cols = sorted(c for c in cols if c not in ('recv_time', 'insert_time', 'int_time')) it, iv, ii = (cols.index('data_time'), cols.index('value_r'), cols.index('att_conf_id')) ix = cols.index('idx') if 'idx' in cols else None is_float = 'double' in table or 'float' in table #if is_array: #print("%s: THIS METHOD IS NO SUITABLE YET FOR ARRAYS!" % table) ## dim_x/dim_y dim_x_r/dim_y_r columns should be taken into account ## when array should be stored? only when value changes, or on time/fixed basis? #return lasts = dict() qcols = (','.join(cols)).replace( 'data_time', 'CAST(UNIX_TIMESTAMP(data_time) AS DOUBLE)') query = 'select %s from %s' % (qcols, table) if has_int: where = " where int_time >= %d and int_time < %d " else: where = " where data_time >= '%s'" where += " and data_time < '%s'" order = ' order by data_time' if has_int: #order = ' order by int_time' #It may put NULL/error values FIRST!! if min_tdelta > 1: order = ' group by int_time DIV %d' % int(min_tdelta) + order else: if min_tdelta > 1: order = ' group by data_time DIV %d' % int(min_tdelta) + order limit = ' limit %s' % bunch print('inserting data ...') count, done, changed, periodic = 0, 0, 0, 0 attr_ids = get_table_attr_ids(db, table) for aii, ai in enumerate(attr_ids): if ids and ai not in ids: continue print('attr: %s (%s/%s)' % (ai, aii, len(attr_ids))) print('getting limits ...') last = db2.Query('select UNIX_TIMESTAMP(data_time) from %s ' ' where att_conf_id = %d order by ' 'att_conf_id, data_time desc limit 1' % (table, ai)) last = last and last[0][0] or 0 if not last: last = db.Query( 'select CAST(UNIX_TIMESTAMP(data_time) AS DOUBLE) from %s ' ' where att_conf_id = %d ' 'order by att_conf_id,data_time limit 1' % (table, ai)) last = last and last[0][0] or 0 last = fn.time2str(last) print(last) end = db.Query( 'select CAST(UNIX_TIMESTAMP(data_time) AS DOUBLE) from %s ' ' where att_conf_id = %d ' 'order by att_conf_id,data_time desc limit 1' % (table, ai)) end = end and end[0][0] or fn.now() if end > fn.now(): end = fn.now() end = fn.time2str(end, us=True) print(end) #return while True: print('attr: %s (%s/%s)' % (ai, aii, len(attr_ids))) values = '' #.split('.')[0] prev = last print('last: %s' % last) nxt = fn.time2str(fn.str2time(last) + 4 * 86400) if fn.str2time(last) >= fn.now() or fn.str2time(nxt) >= fn.now(): break if fn.str2time(last) + 60 >= fn.str2time(end): break if has_int: qr = query + (where % (int(str2time(last)), int(str2time(nxt)))) else: qr = query + (where % (last, nxt)) qr += ' and att_conf_id = %s' % ai qr += order + limit print(qr) tq = fn.now() cursor = db.Query(qr, export=False) print(fn.now() - tq) v = cursor.fetchone() if v is None: last = nxt else: last = fn.time2str(v[it], us=True) if fn.str2time(last) + 60 >= fn.str2time(end): break #It must be checked before and after querying if v is None: continue curr = 0 for _i in range(bunch): #print(_i,bunch) curr += 1 count += 1 i, t, w = v[ii], v[it], v[iv] x = v[ix] if ix is not None else None last = fn.time2str(t, us=True) if i not in lasts: diff = True elif t < lasts[i][0] + min_tdelta: diff = False else: diff = (w != lasts[i][1]) if is_float: if w and None not in (w, lasts[i][1]): diff = diff and abs((w - lasts[i][1]) / w) > 1e-12 if ix is None and diff: # changed scalar value lasts[i] = (t, w) v = map(str, v) v[2] = repr(last) if values: values += ',' values += '(%s)' % ','.join(v) changed += 1 done += 1 v = cursor.fetchone() if v is None: break elif ix is None and (t - lasts[i][0]) >= per_value: # periodic scalar value lasts[i] = (t, w) v = map(str, v) v[2] = repr(last) if values: values += ',' values += '(%s)' % ','.join(v) periodic += 1 done += 1 v = cursor.fetchone() if v is None: break elif ix is not None and ((i, x) not in lasts or (t - lasts[(i, x)][0]) >= per_value): # periodic array value lasts[(i, x)] = (t, w) v = map(str, v) v[2] = repr(last) if values: values += ',' values += '(%s)' % ','.join(v) done += 1 v = cursor.fetchone() if v is None: break else: v = cursor.fetchone() if v is None: break if values: values = values.replace('None', 'NULL') insert = "insert into %s (%s) VALUES %s" % ( table, ','.join(cols), values) print(insert[:80], insert[-80:]) db2.Query(insert) #else: #print('NO VALUES TO INSERT') #break print(curr, changed, periodic, done, count) #print(last,nxt,end) if last == prev: last = nxt if fn.str2time(last) >= fn.now(): break print('%d/%d values inserted in %d seconds' % (done, count, fn.now() - t0))
def mysqldump_by_date(schema, user, passwd, folder, start, stop, options = '', tables = None, compress = True, delete = True): """ This method creates a backup between selected dates for each table of the selected database. All dump files are exported to the same folder, and a compressed file is created at the end. Deleting of temporary files created (folder/*dmp) must be done manually. """ print('mysqldump_by_date(%s,,,folder=%s,%s,%s,compress=%s,delete=%s)' % (schema, folder, start, stop, compress, delete)) db = FriendlyDB(schema,user=user,passwd=passwd) t,e = start,stop print(t,e) start = start if fn.isString(start) else fn.time2str(start) stop = stop if fn.isString(stop) else fn.time2str(stop) tables = tables or db.getTables() print('mysqldump_by_date(%s): %d tables to backup between %s and %s' % (schema,len(tables),start,stop)) if not os.path.isdir(folder): print('mkdir %s' % folder) os.mkdir(folder) t0 = fn.now() filenames = [] for t in sorted(tables): currops = options filename = ('%s/%s-%s-%s-%s.dmp' % (folder,schema,t,start.split()[0],stop.split()[0])) cols = db.getTableCols(t) col = [c for c in ('int_time','time','data_time') if c in cols] if col and col[0] == 'int_time': where = " %s >= %s and %s < %s " % ( col[0],fn.str2time(start),col[0],fn.str2time(stop)) elif col: where = " %s >= '%s' and %s < '%s' " % (col[0],start,col[0],stop) else: where = "" if t in CONFIG_TABLES: currops += " --add-drop-table " else: currops += "" mysqldump(schema,user,passwd,filename,t,where,options=currops) filenames.append(filename) t1 = fn.now() ext = ('part.' if fn.str2time(stop) > fn.now() else '') + 'tgz' dext = '.dmp' if compress: # doing it on separate files ... #for f in filenames: #cmd = 'tar zcvf %s.tgz %s' % (f,f) #print(cmd) #fn.linos.shell_command(cmd) #dext+='.tgz' filename = ('%s/%s-%s-%s.%s' % (folder,schema,start.split()[0],stop.split()[0],ext)) cmd = 'tar zcvf %s %s/*%s' % (filename,folder,dext) print(cmd) fn.linos.shell_command(cmd) if compress and delete: cmd = 'rm -rf %s/*.dmp*' % folder print(cmd) fn.linos.shell_command(cmd) t2 = fn.now() print('Backup took %d seconds' % int(t1-t0)) print('Compression took %d seconds' % int(t2-t1)) return filename
def decimate(db_name, keys, tstart, tend, period=10, dry=False): """ time arguments are strings BUT!, this method seems to not work anymore to free space in TDB Maybe a mysqld restart is needed, I don't know; but up to now space is not freed """ api = pta.api(db_name) if '/' in keys[0]: print('Decimating by attribute names') tables = fn.defaultdict(list) for a in keys: api.get_attr_id_type_table(a) tables[api[a].table].append(a) print('tables: %s' % (tables.keys())) for table, attrs in tables.items(): for a in attrs: pta.dbs.decimate_db_table_by_time(api, table, api[a].id, tstart, tend, period, optimize=(a == attrs[-1])) if not '/' in keys[0]: print('Decimating by data_type') data_types = keys if not data_types: data_types = [ r[0] for r in api.Query('select data_type from att_conf_data_type') ] else: data_types = [d.replace('att_', '') for d in data_types] print('Decimating %s types between %s and %s: %s' % (db_name, tstart, tend, data_types)) for data_type in data_types: attrs = api.Query( 'select att_conf_id from att_conf,att_conf_data_type ' 'where att_conf.att_conf_data_type_id = att_conf_data_type.att_conf_data_type_id ' 'and data_type = "%s"' % data_type) attrs = [r[0] for r in attrs] q = ("select partition_name,table_name" " from information_schema.partitions where" " partition_name is not NULL" " and table_schema = '%s'" % db_name + " and table_name like '%" + data_type + "'") print(q) partitions = api.Query(q) if partitions: table = partitions[0][1] else: table = 'att_' + data_type print('%s has %d attributes in %d partitions' % (table, len(attrs), len(partitions))) c0 = api.Query('select count(*) from %s ' % table) import re intervals = [] for p in partitions: p = p[0] r = '(?P<year>[0-9][0-9][0-9][0-9])(?P<month>[0-9][0-9])' md = re.search(r, p).groupdict() t0 = '%s-%s-01 00:00:00' % (md['year'], md['month']) m, y = int(md['month']), int(md['year']) if m == 12: m, y = 1, y + 1 else: m += 1 t1 = '%04d-%02d-01 00:00:00' % (y, m) if fn.str2time(t0)<fn.str2time(tend) and \ fn.str2time(t1)>fn.str2time(tstart): intervals.append((t0, t1, p)) if not partitions: ts, te = fn.str2time(tstart), fn.str2time(tend) tinc = (te - ts) / 10. for i in range(1, 11): intervals.append((fn.time2str(ts + (i - 1) * tinc), fn.time2str(ts + i * tinc), None)) print('%d intervals in %s' % (len(intervals), table)) for t0, t1, p in intervals: print((t0, t1)) if dry: continue for a in attrs: c0 = api.getTableSize(table) pta.dbs.decimate_db_table( db=api, table=table, start=fn.str2time(t0), end=fn.str2time(t1), period=600 if 'string' in table else 300, condition=' att_conf_id = %s ' % a, iteration=2000, cols=['data_time', 'value_r'], us=True, repeated=True) if p: api.Query('alter table %s optimize partition %s' % (table, p)) if not dry: q = 'repair table %s;' % table print('\n' + q) api.Query(q) c1 = api.getTableSize(table) print('\n\n%s size reduced from %s to %s' % (table, c0, c1)) print('ellapsed %d seconds' % (time.time() - tt0))
def archiving_check(schema,csvpath=''): api = PyTangoArchiving.ArchivingAPI(schema) states = api.servers.states() values = api.load_last_values()#time consuming on HDB shouldbe = sorted(a for a in api if values[a] and fandango.date2time(values[a][0][0]) > time.time()-2*30*3600*24) active = api.get_archived_attributes() updated = sorted(a for a in active if values[a] and fandango.date2time(values[a][0][0]) > time.time()-3*3600) missing = sorted(a for a in shouldbe if a not in active) lost = sorted(a for a in active if a not in updated) loadarchivers = defaultdict(list) loadservers = defaultdict(list) lostarchivers = defaultdict(list) lostservers = defaultdict(list) for a in active: arch = api[a].archiver.lower() server = api.servers.get_device_server(arch).lower() loadarchivers[arch].append(a) loadservers[server].append(a) if a in lost: lostarchivers[arch].append(a) lostservers[server].append(a) [loadservers[api.servers.get_device_server(api[a].archiver.lower()).lower()].append(a) for a in active] emptyarchivers = [a for a,v in loadarchivers.items() if not len(v)] lostrate = dict((a,len(v) and len([a for a in v if a in lost])/float(len(v))) for a,v in loadarchivers) lostserversrate = dict((a,len(v) and len([a for a in v if a in lost])/float(len(v))) for a,v in loadservers.items()) dedi = api.load_dedicated_archivers() dediattrs = defaultdict(list) [dediattrs[a.lower()].append(d) for d,v in dedi.items() for a in v]; dmult = [a for a,v in dediattrs.items() if len(v)>1] wrongnames = [a for a in dediattrs if not attribute_name_check(a)] wrongarchivers = set(k.lower() for k,v in dedi.items() if any(a.lower() in map(str.lower,v) for a in wrongnames)) wrongattrs = [a for a,v in dediattrs if a in api and api[a].archiver.lower()!=v[0].lower()] deleteattrs = [a for a in dediattrs if a not in shouldbe] fnames = GetConfigFiles(csvpath) if csvpath else GetConfigFiles() csvs = dict((f,pta.ParseCSV(f,schema)) for f in fnames) csvattrs = defaultdict(list) [csvattrs[a.lower().strip()].append(f) for f,v in csvs.items() for a in v] stats = sorted([(len(v),len(v) and len([a for a in v if a in lost])/float(len(v))) for v in loadservers.values()]) stats = [(x,fandango.avg(t[1] for t in stats if t[0]==x)) for x in sorted(set(v[0] for v in stats))] # pylab.plot([t[0] for t in stats], [t[1] for t in stats]); pylab.show() exported = dict((d,fandango.str2time(fandango.get_device_info(d).started,'%dst %B %Y at %H:%M:%S')) for d in api.get_archivers()) first = min(exported.values()) #SLOWER SPEEDS ALWAYS HAVE MORE LOST ATTRIBUTES #Let's try a different approach to restart, much less agressive than fandango.start_servers()! #It seems that there's a lock when so many devices are restarted at once! torestart = list(reversed(sorted((len(v),k) for k,v in lostservers.items()))) for k in torestart.values(): print('Restarting %s') fandango.Astor(k).stop_servers() time.sleep(20.) fandango.Astor(k).start_servers(wait=240.) allattrs = sorted(set([a for a in csvattrs if a in api]+shouldbe+active))
for i,r in enumerate(vals[1:]): if r[1]!=vals[i][1]: if vals[i] != diffs[-1]: diffs.append(vals[i]) diffs.append(r) print('At least, %d rows will be kept' % len(diffs)) if float(len(diffs))/len(vals) < 0.7 : if 'd' in flags: sys.exit(0) for i,d in enumerate(diffs[1:]): t0 = fn.time2str(diffs[i][0]+1) t1 = fn.time2str(d[0]-1) if fn.str2time(t1)-fn.str2time(t0) >= abs((int(tdiff) or int(mlimit))-2): q = ("delete from %s where time between '%s' and '%s'" % (table, t0, t1)) query(q,aid) else: print('standard decimation doesnt pay off') if tdiff: print('decimating t < %s' % tdiff) tfirst = vals[0][0] #query(q)[0][0] trange = 3600*12 for tt in range(int(tfirst),int(fn.str2time(tend)),int(trange)): q = ("select count(*) from %s where (UNIX_TIMESTAMP(%s) between %s and %s) " "and UNIX_TIMESTAMP(%s) %% %d <= %d") q = q % (table, tcol, tt, tt+trange,
def decimate_db_table(db,table,host='',user='',passwd='',start=0,end=0,period=300,iteration=1000,condition='',cols=None,us=True,test=False, repeated = False): """ This method will remove all values from a MySQL table that seem duplicated in time or value. All values with a difference in time lower than period will be kept. To use it with hdb++: decimate_db_table('hdbpp',user='******',passwd='...', table = 'att_scalar_devdouble_ro', start = 0, end = now()-600*86400, period = 60, #Keep a value every 60s condition = 'att_conf_id = XX', iteration = 1000, columns = ['data_time','value_r'], us=True, ) """ print('Decimating all repeated values in %s(%s) with less ' 'than %d seconds in between.'%(table,condition,period)) db = FriendlyDB(db,host,user,passwd) if not isinstance(db,FriendlyDB) else db #rw = 'write_value' in ','.join([l[0] for l in db.Query("describe %s"%table)]).lower() #date,column = 'read_value,write_value' if rw else 'value' columns = cols or ['time','value'] date,column = columns[0],columns[1:] start = time2date(start) if isNumber(start) else time2date(str2time(start)) t0,vw0,now = start,None,time2date(time.time()) end = time2date(end) if isNumber(end) else time2date(str2time(end)) removed,pool,reps = 0,[],[] count = 0 ## WHY T0 AND END ARE DATES!?!? : to be easy to compare against read values while t0<(end or now): query = "select %s,%s from %s where" %(date,','.join(column),table) query += " '%s' < %s"%(date2str(t0,us=True),date)#,date2str(end)) if condition: query+=' and %s'%condition query += ' order by %s'%date query += ' limit %d'%iteration values = db.Query(query) #print(query+': %d'%len(values)) #print('inspecting %d values between %s and %s'%(len(values),date2str(t0),date2str(end))) if not values: break for i,v in enumerate(values): count += 1 t1,vw1 = v[0],v[1:1+len(column)] #v[1],(rw and v[2] or None) #print((i,count,t1,vw0,vw1)) e0,e1 = 1e-3*int(1e3*date2time(t0)),1e-3*int(1e3*date2time(t1)) #millisecs tdelta = e1-e0 is_last = i >= (len(values)-1) or t1 >= end buff = len(pool) if is_last or tdelta>=period or vw0!=vw1: #if tdelta>=period: print('%s >= %s'%(tdelta,period)) #elif vw0!=vw1: print('%s != %s'%(vw0,vw1)) #else: print('i = %s/%s'%(i,len(values))) # End of repeated values, apply decimation ... if buff: # Dont apply remove on windows < 1 second e1 = date2time(values[i-1][0]) #previous value if True: #(int(e1)-int(e0))>1: #print('remove %d values in pool'%len(pool)) if not test: #Don't use the between syntax!! q = "delete from %s where "%table if condition: q+= condition+' and ' #e0,e1 = e0+1,e1-1 #t0 should not be removed! q+= "%s > '%s' and "%(date,time2str(e0,us=us)) q+= "%s < '%s'"%(date,time2str(e1,us=us)) #print(q) #removed += buff db.Query(q) #print('t0: %s; removed %d values' % (date2str(t0),buff-1)) #print('pool:%s'%str(pool)) if reps: if not test: #print('repeated timestamp: %s,%s == %s,%s'%(t0,vw0,t1,vw1)) q = "delete from %s where "%(table) if condition: q+= condition+' and ' q+= "%s = '%s' limit %d" % ( date,date2str(reps[-1],us=us),len(reps)) #print(q) db.Query(q) pool,reps = [],[] #print('%s => %s'%(t0,t1)) t0,vw0 = t1,vw1 else: # repeated values with tdiff<period will be removed in a single query # This should apply only if values are different and timestamp equal? # if timestamp is repeated the condition t < d < t is useless # repeated timestamps are removed directly #print(tdelta) if repeated and not tdelta: reps.append(t1) #print(('reps',t1)) elif vw0 == vw1: #if buff and not buff%100: # print('%s repeated values in %s seconds'%(buff,tdelta)) pool.append(t1) #removed +=1 else: pass #print((vw0,vw1)) if is_last: break query = "select count(*) from %s where" %(table) query += " '%s' < %s and %s < '%s'"%(date2str(start,us=us),date,date,date2str(end,us=us)) if condition: query+=' and %s'%condition cur = db.Query(query)[0][0] removed = count-cur print('decimate_db_table(%s,%s) took %d seconds to remove %d = %d - %d values'%( table,condition,time.time()-date2time(now),removed,count,cur)) return removed
def checkSchema(k, schema, attribute='', start=None, stop=None): if not isinstance(schema, SchemaDict): schema = k.getSchema(schema) if not schema: return False f = schema.get('check') if not f: print('%s has no check function' % str(schema)) return True try: now = time.time() start = (str2time(start) if fn.isString(start) else fn.notNone( start, now - 1)) stop = (str2time(stop) if fn.isString(stop) else fn.notNone( stop, now)) xmatch = lambda e, a: clmatch(e, a, extend=True) k.LOCALS.update({ 'attr': attribute.lower(), 'attribute': attribute.lower(), 'device': attribute.lower().rsplit('/', 1)[0], 'match': lambda r: xmatch(r, attribute), 'clmatch': xmatch, 'overlap': overlap, 'time2str': time2str, 'str2time': str2time, 't2s': time2str, 's2t': str2time, 'start': start, 'stop': stop, 'now': now, 'begin': start, 'end': stop, 'NOW': now, 'reader': schema.get('reader', schema.get('api')), 'schema': schema.get('schema'), 'dbname': schema.get('dbname', schema.get('db_name', schema.get('schema', ''))), }) if 'reader' in f: k.getReader(schema.get('schema')) if 'api' in f: k.getApi(schema.get('schema')) #print('In reader.Schemas.checkSchema(%s,%s,%s,%s): %s' #% (schema,attribute,start,stop,f)) #print('(%s)%%(%s)'%(f,[t for t in k.LOCALS.items() if t[0] in f])) v = fn.evalX(f, k.LOCALS, k.MODULES) except: print('checkSchema(%s,%s) failed!' % (schema, attribute)) traceback.print_exc() v = False #print('checkSchema(%s): %s'%(schema,v)) return v
def decimate(db_name,keys,tstart,tend,period=10,dry=False): """ time arguments are strings BUT!, this method seems to not work anymore to free space in TDB Maybe a mysqld restart is needed, I don't know; but up to now space is not freed """ api = pta.api(db_name) if '/' in keys[0]: print('Decimating by attribute names') tables = fn.defaultdict(list) for a in keys: api.get_attr_id_type_table(a) tables[api[a].table].append(a) print('tables: %s' % (tables.keys())) for table,attrs in tables.items(): for a in attrs: pta.dbs.decimate_db_table_by_time(api, table,api[a].id,tstart,tend,period, optimize=(a==attrs[-1])) if not '/' in keys[0]: print('Decimating by data_type') data_types = keys if not data_types: data_types = [r[0] for r in api.Query('select data_type from att_conf_data_type')] else: data_types = [d.replace('att_','') for d in data_types] print('Decimating %s types between %s and %s: %s'%(db_name,tstart,tend,data_types)) for data_type in data_types: attrs = api.Query('select att_conf_id from att_conf,att_conf_data_type ' 'where att_conf.att_conf_data_type_id = att_conf_data_type.att_conf_data_type_id ' 'and data_type = "%s"'%data_type) attrs = [r[0] for r in attrs] q = ("select partition_name,table_name" " from information_schema.partitions where" " partition_name is not NULL" " and table_schema = '%s'"%db_name + " and table_name like '%"+data_type+"'" ) print(q) partitions = api.Query(q) if partitions: table = partitions[0][1] else: table = 'att_'+data_type print('%s has %d attributes in %d partitions'%(table,len(attrs),len(partitions))) c0 = api.Query('select count(*) from %s '%table) import re intervals = [] for p in partitions: p = p[0] r = '(?P<year>[0-9][0-9][0-9][0-9])(?P<month>[0-9][0-9])' md = re.search(r,p).groupdict() t0 = '%s-%s-01 00:00:00'%(md['year'],md['month']) m,y = int(md['month']),int(md['year']) if m == 12: m,y = 1, y+1 else: m+=1 t1 = '%04d-%02d-01 00:00:00'%(y,m) if fn.str2time(t0)<fn.str2time(tend) and \ fn.str2time(t1)>fn.str2time(tstart): intervals.append((t0,t1,p)) if not partitions: ts,te = fn.str2time(tstart),fn.str2time(tend) tinc = (te-ts)/10. for i in range(1,11): intervals.append((fn.time2str(ts+(i-1)*tinc), fn.time2str(ts+i*tinc),None)) print('%d intervals in %s'%(len(intervals),table)) for t0,t1,p in intervals: print((t0,t1)) if dry: continue for a in attrs: c0 = api.getTableSize(table) pta.dbs.decimate_db_table(db=api,table=table, start=fn.str2time(t0),end=fn.str2time(t1), period=600 if 'string' in table else 300, condition=' att_conf_id = %s '%a, iteration=2000,cols=['data_time','value_r'], us=True, repeated=True) if p: api.Query('alter table %s optimize partition %s'%(table,p)) if not dry: q = 'repair table %s;'%table print('\n'+q) api.Query(q) c1 = api.getTableSize(table) print('\n\n%s size reduced from %s to %s'%(table,c0,c1)) print('ellapsed %d seconds'%(time.time()-tt0))
if not args or args[0] not in "help check decimate": print(__doc__) sys.exit(-1) action,args = args[0],args[1:] if action == 'help': print(__doc__) elif action == 'check': db_name = args[0] if len(args) == 1: check_db(db_name) else: tstart = fn.str2time(args[1]) tend = fn.str2time(args[2]) table = args[3] check_table(db_name,table,tstart,tend) sys.exit(0) elif action == 'decimate': db_name = args[0] #'hdbmiras' tstart = fn.time2str(fn.str2time(args[1])) tend = fn.time2str(fn.str2time(args[2])) period = int(args[3]) keys = args[4:] #['scalar_devdouble_ro'] decimate(db_name,keys,tstart,tend, period)
for i, r in enumerate(vals[1:]): if r[1] != vals[i][1]: if vals[i] != diffs[-1]: diffs.append(vals[i]) diffs.append(r) print('At least, %d rows will be kept' % len(diffs)) if float(len(diffs)) / len(vals) < 0.7: if 'd' in flags: sys.exit(0) for i, d in enumerate(diffs[1:]): t0 = fn.time2str(diffs[i][0] + 1) t1 = fn.time2str(d[0] - 1) if fn.str2time(t1) - fn.str2time(t0) >= abs( (int(tdiff) or int(mlimit)) - 2): q = ("delete from %s where time between '%s' and '%s'" % (table, t0, t1)) query(q, aid) else: print('standard decimation doesnt pay off') if tdiff: print('decimating t < %s' % tdiff) tfirst = vals[0][0] #query(q)[0][0] trange = 3600 * 12 for tt in range(int(tfirst), int(fn.str2time(tend)), int(trange)): q = ( "select count(*) from %s where (UNIX_TIMESTAMP(%s) between %s and %s) "
def transfer_table(db, db2, table, bunch = 16*16*1024, is_str = False, per_value = 60, min_tdelta = 0.2, ids = []): t0 = fn.now() tq = 0 cols = db.getTableCols(table) has_int = 'int_time' in cols cols = sorted(c for c in cols if c not in ('recv_time','insert_time','int_time')) it, iv, ii = (cols.index('data_time'), cols.index('value_r'), cols.index('att_conf_id')) ix = cols.index('idx') if 'idx' in cols else None is_float = 'double' in table or 'float' in table #if is_array: #print("%s: THIS METHOD IS NO SUITABLE YET FOR ARRAYS!" % table) ## dim_x/dim_y dim_x_r/dim_y_r columns should be taken into account ## when array should be stored? only when value changes, or on time/fixed basis? #return lasts = dict() qcols = (','.join(cols)).replace('data_time', 'CAST(UNIX_TIMESTAMP(data_time) AS DOUBLE)') query = 'select %s from %s' % (qcols, table) if has_int: where = " where int_time >= %d and int_time < %d " else: where = " where data_time >= '%s'" where += " and data_time < '%s'" order = ' order by data_time' if has_int: #order = ' order by int_time' #It may put NULL/error values FIRST!! if min_tdelta > 1: order = ' group by int_time DIV %d'%int(min_tdelta) + order else: if min_tdelta > 1: order = ' group by data_time DIV %d'%int(min_tdelta) + order limit = ' limit %s' % bunch print('inserting data ...') count,done,changed,periodic = 0,0,0,0 attr_ids = get_table_attr_ids(db, table) for aii,ai in enumerate(attr_ids): if ids and ai not in ids: continue print('attr: %s (%s/%s)' % (ai,aii,len(attr_ids))) print('getting limits ...') last = db2.Query('select UNIX_TIMESTAMP(data_time) from %s ' ' where att_conf_id = %d order by ' 'att_conf_id, data_time desc limit 1' % (table,ai)) last = last and last[0][0] or 0 if not last: last = db.Query('select CAST(UNIX_TIMESTAMP(data_time) AS DOUBLE) from %s ' ' where att_conf_id = %d ' 'order by att_conf_id,data_time limit 1' % (table,ai)) last = last and last[0][0] or 0 last = fn.time2str(last) print(last) end = db.Query('select CAST(UNIX_TIMESTAMP(data_time) AS DOUBLE) from %s ' ' where att_conf_id = %d ' 'order by att_conf_id,data_time desc limit 1' % (table,ai)) end = end and end[0][0] or fn.now() if end > fn.now(): end = fn.now() end = fn.time2str(end, us = True) print(end) #return while True: print('attr: %s (%s/%s)' % (ai,aii,len(attr_ids))) values = '' #.split('.')[0] prev = last print('last: %s' % last) nxt = fn.time2str(fn.str2time(last)+4*86400) if fn.str2time(last) >= fn.now() or fn.str2time(nxt) >= fn.now(): break if fn.str2time(last)+60 >= fn.str2time(end): break if has_int: qr = query+(where%(int(str2time(last)),int(str2time(nxt)))) else: qr = query+(where%(last,nxt)) qr += ' and att_conf_id = %s' % ai qr += order+limit print(qr) tq = fn.now() cursor = db.Query(qr, export=False) print(fn.now()-tq) v = cursor.fetchone() if v is None: last = nxt else: last = fn.time2str(v[it],us=True) if fn.str2time(last)+60 >= fn.str2time(end): break #It must be checked before and after querying if v is None: continue curr = 0 for _i in range(bunch): #print(_i,bunch) curr += 1 count += 1 i,t,w = v[ii], v[it], v[iv] x = v[ix] if ix is not None else None last = fn.time2str(t,us=True) if i not in lasts: diff = True elif t < lasts[i][0]+min_tdelta: diff = False else: diff = (w != lasts[i][1]) if is_float: if w and None not in (w,lasts[i][1]): diff = diff and abs((w-lasts[i][1])/w)>1e-12 if ix is None and diff: # changed scalar value lasts[i] = (t,w) v = map(str,v) v[2] = repr(last) if values: values += ',' values += '(%s)' % ','.join(v) changed += 1 done += 1 v = cursor.fetchone() if v is None: break elif ix is None and (t-lasts[i][0]) >= per_value: # periodic scalar value lasts[i] = (t,w) v = map(str,v) v[2] = repr(last) if values: values += ',' values += '(%s)' % ','.join(v) periodic += 1 done += 1 v = cursor.fetchone() if v is None: break elif ix is not None and ((i,x) not in lasts or (t-lasts[(i,x)][0]) >= per_value): # periodic array value lasts[(i,x)] = (t,w) v = map(str,v) v[2] = repr(last) if values: values += ',' values += '(%s)' % ','.join(v) done += 1 v = cursor.fetchone() if v is None: break else: v = cursor.fetchone() if v is None: break if values: values = values.replace('None','NULL') insert = "insert into %s (%s) VALUES %s" % ( table, ','.join(cols), values) print(insert[:80],insert[-80:]) db2.Query(insert) #else: #print('NO VALUES TO INSERT') #break print(curr,changed,periodic,done,count) #print(last,nxt,end) if last == prev: last = nxt if fn.str2time(last) >= fn.now(): break print('%d/%d values inserted in %d seconds' % (done,count,fn.now()-t0))
def getRange(self): try: return str2time(str(self.xRangeCB.currentText())) except: traceback.print_exc()
def getRange(self): try: return str2time(str(self.xRangeCB.currentText())) except: traceback.print_exc()
def check_db_schema(schema, attributes = None, values = None, tref = -12*3600, n = 1, filters = '*', export = 'json', restart = False, subscribe = False): """ tref is the time that is considered updated (e.g. now()-86400) n is used to consider multiple values attrs: all attributes in db on: archived off: in db but not currently archived ok: updated known error causes (attrs not lost but not updated): nok: attributes are not currently readable noevs: attributes not sending events novals: attributes never recorded a value stall: not updated, but current value matches archiving lost: not updated, and values doesn't match with current """ t0 = fn.now() if hasattr(schema,'schema'): api,schema = schema,api.schema else: api = pta.api(schema) r = fn.Struct(api=api,schema=schema) if isString(tref): tref = fn.str2time(tref) r.tref = fn.now()+tref if tref < 0 else tref r.attrs = [a for a in (attributes or api.get_attributes()) if fn.clmatch(filters,a)] print('check_db_schema(%s,attrs[%s],tref="%s",export as %s)' % (schema,len(r.attrs),fn.time2str(r.tref),export)) if restart and schema!='hdbpc': archs = [a for a in api.get_archivers() if not fn.check_device(a)] if archs: try: print('Restarting archivers: %s' % str(archs)) astor = fn.Astor(archs) astor.stop_servers() astor.start_servers() except: traceback.print_exc() stopped = api.get_stopped_attributes() print('Restarting %d stopped attributes' % len(stopped)) api.restart_attributes(stopped) r.on = [a for a in api.get_archived_attributes() if a in r.attrs] r.off = [a for a in r.attrs if a not in r.on] r.archs = fn.defaultdict(list) r.pers = fn.defaultdict(list) r.values = load_schema_values(api,r.on,values,n,tref=tref) if schema in ('tdb','hdb'): [r.archs[api[k].archiver].append(k) for k in r.on] else: r.rvals = r.values r.freq, r.values = {}, {} for k,v in r.rvals.items(): try: if n > 1: v = v[0] if isSequence(v) and len(v) else v r.values[k] = v[0] if isSequence(v) and len(v) else v r.freq[k] = v and float(len(v))/abs(v[0][0]-v[-1][0]) else: r.values[k] = v except Exception as e: print(k,v) print(fn.except2str()) for k in api.get_archivers(): r.archs[k] = api.get_archiver_attributes(k) for k in api.get_periodic_archivers(): r.pers[k] = api.get_periodic_archivers_attributes(k) # Get all updated attributes r.ok = [a for a,v in r.values.items() if v and v[0] > r.tref] # Try to read not-updated attributes r.check = dict((a,fn.check_attribute(a) ) for a in r.on if a not in r.ok) #r.novals = [a for a,v in r.values.items() if not v] r.nok, r.stall, r.noevs, r.lost, r.novals, r.evs, r.rem = [],[],[],[],[],{},[] # Method to compare numpy values for a,v in r.check.items(): state = check_archived_attribute(a, v, default=CheckState.LOST, cache=r, tref=r.tref, check_events = subscribe and not api.is_periodic_archived(a)) { #CheckState.ON : r.on, #CheckState.OFF : r.off, CheckState.OK : r.ok, #Shouldn't be any ok in check list CheckState.NO_READ : r.nok, CheckState.STALL : r.stall, CheckState.NO_EVENTS : r.noevs, CheckState.LOST : r.lost, CheckState.UNK : r.novals, }[state].append(a) # SUMMARY r.summary = schema +'\n' r.summary += ','.join( """on: archived off: not archived ok: updated nok: not readable noevs: no events novals: no values stall: not changing lost: not updated """.split('\n'))+'\n' getline = lambda k,v,l: '\t%s:\t:%d\t(%s)' % (k,len(v),l) r.summary += '\n\t%s:\t:%d\tok+stall: %2.1f %%' % ( 'attrs',len(r.attrs), (100.*(len(r.ok)+len(r.stall))/(len(r.on) or 1e12))) r.summary += '\n\t%s/%s:\t:%d/%d' % ( 'on','off',len(r.on),len(r.off)) #if r.off > 20: r.summary+=' !!!' r.summary += '\n\t%s/%s:\t:%d/%d' % ( 'ok','nok',len(r.ok),len(r.nok)) if len(r.nok) > 10: r.summary+=' !!!' r.summary += '\n\t%s/%s:\t:%d/%d' % ( 'noevs','novals',len(r.noevs),len(r.novals)) if len(r.novals) > 1: r.summary+=' !!!' r.summary += '\n\t%s/%s:\t:%d/%d' % ( 'lost','stall',len(r.lost),len(r.stall)) if len(r.lost) > 1: r.summary+=' !!!' r.summary += '\n' r.archivers = dict.fromkeys(api.get_archivers()) for d in sorted(r.archivers): r.archivers[d] = api.get_archiver_attributes(d) novals = [a for a in r.archivers[d] if a in r.novals] lost = [a for a in r.archivers[d] if a in r.lost] if (len(novals)+len(lost)) > 2: r.summary += ('\n%s (all/novals/lost): %s/%s/%s' % (d,len(r.archivers[d]),len(novals),len(lost))) if hasattr(api,'get_periodic_archivers'): r.periodics = dict.fromkeys(api.get_periodic_archivers()) for d in sorted(r.periodics): r.periodics[d] = api.get_periodic_archiver_attributes(d) novals = [a for a in r.periodics[d] if a in r.novals] lost = [a for a in r.periodics[d] if a in r.lost] if len(novals)+len(lost) > 2: r.summary += ('\n%s (all/novals/lost): %s/%s/%s' % (d,len(r.periodics[d]),len(novals),len(lost))) r.perattrs = [a for a in r.on if a in api.get_periodic_attributes()] r.notper = [a for a in r.on if a not in r.perattrs] r.summary += '\nfinished in %d seconds\n\n'%(fn.now()-t0) print(r.summary) if restart: try: retries = r.lost+r.novals+r.nok print('restarting %d attributes' % len(retries)) api.restart_attributes(retries) except: traceback.print_exc() if export is not None: if export is True: export = 'txt' for x in (export.split(',') if isString(export) else export): if x in ('json','pck','pickle','txt'): x = '/tmp/%s.%s' % (schema,x) print('Saving %s file with keys:\n%s' % (x,r.keys())) if 'json' in x: fn.dict2json(r.dict(),x) else: f = open(x,'w') if 'pck' in x or 'pickle' in x: pickle.dump(r.dict(),f) else: f.write(fn.dict2str(r.dict())) f.close() for k,v in r.items(): if fn.isSequence(v): r[k] = sorted(v) return r
if __name__ == '__main__': if not args or args[0] not in "help check decimate": print(__doc__) sys.exit(-1) action, args = args[0], args[1:] if action == 'help': print(__doc__) elif action == 'check': db_name = args[0] if len(args) == 1: check_db(db_name) else: tstart = fn.str2time(args[1]) tend = fn.str2time(args[2]) table = args[3] check_table(db_name, table, tstart, tend) sys.exit(0) elif action == 'decimate': db_name = args[0] #'hdbmiras' tstart = fn.time2str(fn.str2time(args[1])) tend = fn.time2str(fn.str2time(args[2])) period = int(args[3]) keys = args[4:] #['scalar_devdouble_ro'] decimate(db_name, keys, tstart, tend, period)
def str2mysqlsecs(date): rt = fn.str2time(date) return int(rt+self.get_mysqlsecsdiff(date))