def get_attributes_values(self, tables='', start_date=None, stop_date=None, desc=False, N=0, unixtime=True, extra_columns='quality', decimate=0, human=False): if start_date or stop_date: start_date,start_time,stop_date,stop_time = \ Reader.get_time_interval(start_date,stop_date) if not fn.isSequence(tables): tables = self.get_archived_attributes(tables) return dict((t, self.get_attribute_values( t, start_date, stop_date, desc, N, unixtime, extra_columns, decimate, human)) for t in tables)
def get_attribute_values(self, table, start_date=None, stop_date=None, desc=False, N=0, unixtime=True, extra_columns='quality', decimate=0, human=False, as_double=True, **kwargs): """ This method returns values between dates from a given table. If stop_date is not given, then anything above start_date is returned. desc controls the sorting of values unixtime = True enhances the speed of querying by a 60%!!!! #(due to MySQLdb implementation of datetime) If N is specified: * Query will return last N values if there's no stop_date * If there is, then it will return the first N values (windowing?) * IF N is negative, it will return the last N values instead start_date and stop_date must be in a format valid for SQL """ t0 = time.time() self.warning('HDBpp.get_attribute_values(%s,%s,%s,%s,decimate=%s,%s)' % (table, start_date, stop_date, N, decimate, kwargs)) if fn.isSequence(table): aid, tid, table = table else: aid, tid, table = self.get_attr_id_type_table(table) human = kwargs.get('asHistoryBuffer', human) what = 'UNIX_TIMESTAMP(data_time)' if unixtime else 'data_time' if as_double: what = 'CAST(%s as DOUBLE)' % what if 'array' in table: what += ",idx" what += ',value_r' if 'value_r' in self.getTableCols(table) \ else ',value' if extra_columns: what += ',' + extra_columns interval = 'where att_conf_id = %s'%aid if aid is not None \ else 'where att_conf_id >= 0 ' if start_date or stop_date: start_date,start_time,stop_date,stop_time = \ Reader.get_time_interval(start_date,stop_date) if start_date and stop_date: interval += (" and data_time between '%s' and '%s'" % (start_date, stop_date)) elif start_date and fandango.str2epoch(start_date): interval += " and data_time > '%s'" % start_date query = 'select %s from %s %s order by data_time' \ % (what,table,interval) if N == 1: human = 1 if N < 0 or desc: query += " desc" # or (not stop_date and N>0): if N: query += ' limit %s' % abs(N) # if 'array' not in table else 1024) ###################################################################### # QUERY self.debug(query) try: result = self.Query(query) except MySQLdb.ProgrammingError as e: if 'DOUBLE' in str(e) and "as DOUBLE" in query: return self.get_attribute_values((aid, tid, table), start_date, stop_date, desc, N, unixtime, extra_columns, decimate, human, as_double=False, **kwargs) self.debug('read [%d] in %f s' % (len(result), time.time() - t0)) t0 = time.time() if not result or not result[0]: return [] ###################################################################### if 'array' in table: data = fandango.dicts.defaultdict(list) for t in result: data[float(t[0])].append(t[1:]) result = [] for k, v in sorted(data.items()): l = [0] * (1 + max(t[0] for t in v)) for i, t in enumerate(v): if None in t: l = None break l[t[0]] = t[1] #Ignoring extra columns (e.g. quality) result.append((k, l)) if N > 0: #for k,l in result: #print((k,l and len(l))) result = result[-N:] self.warning('array arranged [%d] in %f s' % (len(result), time.time() - t0)) t0 = time.time() # Converting the timestamp from Decimal to float # Weird results may appear in filter_array comparison if not done # Although it is INCREDIBLY SLOW!!! #result = [] #nr = [] #if len(result[0]) == 2: #for i,t in enumerate(result): #result[i] = (float(t[0]),t[1]) #elif len(result[0]) == 3: #for i,t in enumerate(result): #result[i] = (float(t[0]),t[1],t[2]) #elif len(result[0]) == 4: #for i,t in enumerate(result): #result[i] = ((float(t[0]),t[1],t[2],t[3])) #else: #for i,t in enumerate(result): #result[i] = ([float(t[0])]+t[1:]) self.warning('timestamp arranged [%d] in %f s' % (len(result), time.time() - t0)) t0 = time.time() # Decimation to be done in Reader object #if decimate: ## When called from trends, decimate may be the decimation method ## or the maximum sample number #try: #N = int(decimate) ##decimate = data_has_changed #decimate = #result = PyTangoArchiving.reader.decimation( #result,decimate,window=0,N=N) #except: ##N = 1080 #result = PyTangoArchiving.reader.decimation(result,decimate) if human: result = [list(t) + [fn.time2str(t[0])] for t in result] if not desc and ((not stop_date and N > 0) or (N < 0)): #THIS WILL BE APPLIED ONLY WHEN LAST N VALUES ARE ASKED #self.warning('reversing ...' ) result = list(reversed(result)) else: # why? self.getCursor(klass=MySQLdb.cursors.SSCursor) self.warning('result arranged [%d]' % len(result)) return result
def get_attribute_values( self, attribute, start_date=None, stop_date=None, desc=False, N=0, unixtime=True, extra_columns='', #quality', decimate=0, human=False, as_double=True, aggregate='', #'MAX', int_time=True, what='', where='', **kwargs): """ Returns archived values between dates for a given table/attribute. Parameters ---------- attribute attribute or table start_date/stop_date if not stop_date, anything between start_date and now() start_date and stop_date float or str in a format valid for SQL desc controls the sorting of values N If 0, None or False, has no effect Query will return last N values if there's no stop_date If there is, then it will return the first N values (windowing?) If N is negative, it will return the last N values instead unixtime if True forces conversion of datetime to unix timestamp at query time. It speeds querying by a 60%!!!! extra_columns adds columns to result ('quality' by default) decimate period or aggregation methods 0 by default (the method will choose) if None (RAW), no decimation is done at all """ t0 = time.time() N = N or kwargs.get('n', 0) self.info( 'HDBpp.get_attribute_values(%s,%s,%s,N=%s,decimate=%s,' 'int_time=%s,%s)' % (attribute, start_date, stop_date, N, decimate, int_time, kwargs)) aid, tid, table, index = self.get_attribute_indexes(attribute) if not all((aid, tid, table)): self.warning('%s is not archived' % table) return [] human = kwargs.get('asHistoryBuffer', human) if start_date or stop_date: start_date,start_time,stop_date,stop_time = \ Reader.get_time_interval(start_date,stop_date) query = self.get_attribute_values_query(attribute, start_date, stop_date, desc, N, unixtime, extra_columns, decimate, human, as_double, aggregate, int_time, what, where, **kwargs) ###################################################################### # QUERY t0 = time.time() is_array = 'array' in table try: result = [] lasts = {} cursor = self.Query(query, export=False) while True: # Fetching/decimating data in blocks of 1024 rows v = cursor.fetchmany(1024) if v is None: break span = ((v[-1][0] - v[0][0]) if len(v) > 1 else 0) density = len(v) / (span or 1) limit = (128 if is_array else 1) * MAX_QUERY_SIZE if decimate != RAW and span and ( density * (stop_time - start_time)) > limit: if not decimate or type(decimate) not in (int, float): decimate = float(stop_time - start_time) / limit self.warning( 'density=%s values/s!: enforce decimate every %s seconds' % (density, decimate)) for l in v: ix = l[2] if is_array else None if ((ix not in lasts) or (None in (l[1], lasts[ix][1])) or (l[0] >= (lasts[ix][0] + decimate))): result.append(l) lasts[ix] = l else: result.extend(v) if len(v) < 1024: break self.debug('read [%d] in %f s: %s' % (len(result), time.time() - t0, len(result) > 1 and (result[0], result[1], result[-1]))) except MySQLdb.ProgrammingError as e: result = [] if 'DOUBLE' in str(e) and "as DOUBLE" in query: return self.get_attribute_values((aid, tid, table), start_date, stop_date, desc, N, unixtime, extra_columns, decimate, human, as_double=False, **kwargs) else: traceback.print_exc() if not result or not result[0]: return [] ###################################################################### t0 = time.time() if is_array and (not index or not self.INDEX_IN_QUERY): max_ix = 0 data = fandango.dicts.defaultdict(list) for t in result: data[float(t[0])].append(t[1:]) if t[2] is not None and max_ix < t[2]: max_ix = t[2] result = [] last_arrs = [None] * (1 + max_ix) for k, v in sorted(data.items()): # it forces all lines to be equal in length l = last_arrs[:] for i, t in enumerate(v): if None in t: l = None break # t[1] is index, t[0] is value l[t[1]] = t[0] #Ignoring extra columns (e.g. quality) last_arrs[t[1]] = t[0] result.append((k, l)) if N > 0: #for k,l in result: #print((k,l and len(l))) result = result[-N:] if N < 0 or desc: result = list(reversed(result)) if index is not None: nr = [] for i, r in enumerate(result): try: nr.append( (r[0], r[1][index] if r[1] is not None else r[1])) except: print(index, r) traceback.print_exc() break result = nr self.debug('array arranged [%d][%s] in %f s' % (len(result), index, time.time() - t0)) # Decimation to be done in Reader object, after caching if human: result = [list(t) + [fn.time2str(t[0])] for t in result] if not desc and ((not stop_date and N > 0) or (N < 0)): #THIS WILL BE APPLIED ONLY WHEN LAST N VALUES ARE ASKED self.debug('reversing ...') result = list(reversed(result)) self.debug('result arranged [%d]: %s, %s' % (len(result), result[0], result[-1])) return result
def get_attribute_values_query( self, attribute, start_date=None, stop_date=None, desc=False, N=0, unixtime=True, extra_columns='', #quality', decimate=0, human=False, as_double=True, aggregate='', #'MAX', int_time=True, what='', where='', group_by='', **kwargs): if attribute in self.getTables(): aid, tid, table, index = None, None, attribute, None else: aid, tid, table, index = self.get_attribute_indexes(attribute) if not what: what = 'UNIX_TIMESTAMP(data_time)' if unixtime else 'data_time' if as_double: what = 'CAST(%s as DOUBLE)' % what #what += ' AS DTS' value = 'value_r' if 'value_r' in self.getTableCols(table) \ else 'value' if decimate and aggregate in ('AVG', 'MAX', 'MIN'): value = '%s(%s)' % (aggregate, value) what += ', ' + value if 'array' in table: what += ", idx" if extra_columns: what += ',' + extra_columns #quality! if where: where = where + ' and ' if 'where' not in where: where = 'where ' + where interval = 'att_conf_id = %s'%aid if aid is not None \ else 'att_conf_id >= 0 ' if index and self.INDEX_IN_QUERY: interval += ' and idx = %s ' % index int_time = int_time and 'int_time' in self.getTableCols(table) #if int_time: self.debug('Using int_time indexing for %s' % table) if start_date or stop_date: start_date,start_time,stop_date,stop_time = \ Reader.get_time_interval(start_date,stop_date) if int_time: if start_date and stop_date: interval += (" and int_time between %d and %d" % (self.str2mysqlsecs(start_date), self.str2mysqlsecs(stop_date))) elif start_date and fandango.str2epoch(start_date): interval += (" and int_time > %d" % self.str2mysqlsecs) else: if start_date and stop_date: interval += (" and data_time between '%s' and '%s'" % (start_date, stop_date)) elif start_date and fandango.str2epoch(start_date): interval += " and data_time > '%s'" % start_date where = where + interval query = 'select %s from %s %s' % (what, table, where) #self.warning('decimate = %s = %s' % (str(decimate),bool(decimate))) if decimate: if isinstance(decimate, (int, float)): d = int(decimate) or 1 else: d = int((stop_time - start_time) / MAX_QUERY_SIZE) or 1 def next_power_of_2(x): return 1 if x == 0 else 2**int(x - 1).bit_length() #d = next_power_of_2(d) #(d/2 or 1) # decimation on server side if not group_by: group_by = 'att_conf_id,' if 'att_conf_id' in str(what) else '' if int_time: group_by += '(%s DIV %d)' % ('int_time', d) else: group_by += '(FLOOR(%s/%d))' % ( 'UNIX_TIMESTAMP(data_time)', d) if 'array' in table: group_by += ',idx' query += " and value_r is not NULL group by %s" % group_by query += ' order by %s' % ( 'int_time' #, DTS' # much slower! if int_time else 'data_time') if N == 1: human = 1 if N < 0 or desc: query += " desc" # or (not stop_date and N>0): if N: query += ' limit %s' % (abs(N) if 'array' not in table else N * 1024) # too dangerous to remove always data by default, and bunching does not work #else: #query+=' limit %s' % (MAX_QUERY_SIZE) return query