Пример #1
0
    def get_attributes_values(self,
                              tables='',
                              start_date=None,
                              stop_date=None,
                              desc=False,
                              N=0,
                              unixtime=True,
                              extra_columns='quality',
                              decimate=0,
                              human=False):

        if start_date or stop_date:
            start_date,start_time,stop_date,stop_time = \
                Reader.get_time_interval(start_date,stop_date)

        if not fn.isSequence(tables):
            tables = self.get_archived_attributes(tables)

        return dict((t,
                     self.get_attribute_values(
                         t, start_date, stop_date, desc, N, unixtime,
                         extra_columns, decimate, human)) for t in tables)
Пример #2
0
    def get_attribute_values(self,
                             table,
                             start_date=None,
                             stop_date=None,
                             desc=False,
                             N=0,
                             unixtime=True,
                             extra_columns='quality',
                             decimate=0,
                             human=False,
                             as_double=True,
                             **kwargs):
        """
        This method returns values between dates from a given table.
        If stop_date is not given, then anything above start_date is returned.
        desc controls the sorting of values
        
        unixtime = True enhances the speed of querying by a 60%!!!! 
            #(due to MySQLdb implementation of datetime)
        
        If N is specified:
        
            * Query will return last N values if there's no stop_date
            * If there is, then it will return the first N values (windowing?)
            * IF N is negative, it will return the last N values instead
            
        start_date and stop_date must be in a format valid for SQL
        """
        t0 = time.time()
        self.warning('HDBpp.get_attribute_values(%s,%s,%s,%s,decimate=%s,%s)' %
                     (table, start_date, stop_date, N, decimate, kwargs))
        if fn.isSequence(table):
            aid, tid, table = table
        else:
            aid, tid, table = self.get_attr_id_type_table(table)
        human = kwargs.get('asHistoryBuffer', human)

        what = 'UNIX_TIMESTAMP(data_time)' if unixtime else 'data_time'
        if as_double:
            what = 'CAST(%s as DOUBLE)' % what
        if 'array' in table: what += ",idx"
        what += ',value_r' if 'value_r' in self.getTableCols(table) \
                                else ',value'
        if extra_columns: what += ',' + extra_columns
        interval = 'where att_conf_id = %s'%aid if aid is not None \
                                                else 'where att_conf_id >= 0 '

        if start_date or stop_date:
            start_date,start_time,stop_date,stop_time = \
                Reader.get_time_interval(start_date,stop_date)
            if start_date and stop_date:
                interval += (" and data_time between '%s' and '%s'" %
                             (start_date, stop_date))
            elif start_date and fandango.str2epoch(start_date):
                interval += " and data_time > '%s'" % start_date

        query = 'select %s from %s %s order by data_time' \
                        % (what,table,interval)

        if N == 1:
            human = 1
        if N < 0 or desc:
            query += " desc"  # or (not stop_date and N>0):
        if N:
            query += ' limit %s' % abs(N)  # if 'array' not in table else 1024)

        ######################################################################
        # QUERY
        self.debug(query)
        try:
            result = self.Query(query)
        except MySQLdb.ProgrammingError as e:
            if 'DOUBLE' in str(e) and "as DOUBLE" in query:
                return self.get_attribute_values((aid, tid, table),
                                                 start_date,
                                                 stop_date,
                                                 desc,
                                                 N,
                                                 unixtime,
                                                 extra_columns,
                                                 decimate,
                                                 human,
                                                 as_double=False,
                                                 **kwargs)

        self.debug('read [%d] in %f s' % (len(result), time.time() - t0))
        t0 = time.time()
        if not result or not result[0]: return []
        ######################################################################

        if 'array' in table:
            data = fandango.dicts.defaultdict(list)
            for t in result:
                data[float(t[0])].append(t[1:])
            result = []
            for k, v in sorted(data.items()):
                l = [0] * (1 + max(t[0] for t in v))
                for i, t in enumerate(v):
                    if None in t:
                        l = None
                        break
                    l[t[0]] = t[1]  #Ignoring extra columns (e.g. quality)
                result.append((k, l))
            if N > 0:
                #for k,l in result:
                #print((k,l and len(l)))
                result = result[-N:]
            self.warning('array arranged [%d] in %f s' %
                         (len(result), time.time() - t0))
            t0 = time.time()

        # Converting the timestamp from Decimal to float
        # Weird results may appear in filter_array comparison if not done
        # Although it is INCREDIBLY SLOW!!!
        #result = []
        #nr = []
        #if len(result[0]) == 2:
        #for i,t in enumerate(result):
        #result[i] = (float(t[0]),t[1])
        #elif len(result[0]) == 3:
        #for i,t in enumerate(result):
        #result[i] = (float(t[0]),t[1],t[2])
        #elif len(result[0]) == 4:
        #for i,t in enumerate(result):
        #result[i] = ((float(t[0]),t[1],t[2],t[3]))
        #else:
        #for i,t in enumerate(result):
        #result[i] = ([float(t[0])]+t[1:])

        self.warning('timestamp arranged [%d] in %f s' %
                     (len(result), time.time() - t0))
        t0 = time.time()

        # Decimation to be done in Reader object
        #if decimate:
        ## When called from trends, decimate may be the decimation method
        ## or the maximum sample number
        #try:
        #N = int(decimate)
        ##decimate = data_has_changed
        #decimate =
        #result = PyTangoArchiving.reader.decimation(
        #result,decimate,window=0,N=N)
        #except:
        ##N = 1080
        #result = PyTangoArchiving.reader.decimation(result,decimate)

        if human:
            result = [list(t) + [fn.time2str(t[0])] for t in result]

        if not desc and ((not stop_date and N > 0) or (N < 0)):
            #THIS WILL BE APPLIED ONLY WHEN LAST N VALUES ARE ASKED
            #self.warning('reversing ...' )
            result = list(reversed(result))
        else:
            # why?
            self.getCursor(klass=MySQLdb.cursors.SSCursor)

        self.warning('result arranged [%d]' % len(result))
        return result
Пример #3
0
    def get_attribute_values(
            self,
            attribute,
            start_date=None,
            stop_date=None,
            desc=False,
            N=0,
            unixtime=True,
            extra_columns='',  #quality',
            decimate=0,
            human=False,
            as_double=True,
            aggregate='',  #'MAX',
            int_time=True,
            what='',
            where='',
            **kwargs):
        """ 
        Returns archived values between dates for a given table/attribute.
        
        Parameters
        ----------
        attribute
            attribute or table
        start_date/stop_date
            if not stop_date, anything between start_date and now()
            start_date and stop_date float or str in a format valid for SQL
        desc
            controls the sorting of values
        N
            If 0, None or False, has no effect
            Query will return last N values if there's no stop_date
            If there is, then it will return the first N values (windowing?)
            If N is negative, it will return the last N values instead
        unixtime
            if True forces conversion of datetime to unix timestamp
            at query time. It speeds querying by a 60%!!!! 
        extra_columns
            adds columns to result ('quality' by default)
        decimate
            period or aggregation methods
            0 by default (the method will choose)
            if None (RAW), no decimation is done at all
            
        """
        t0 = time.time()
        N = N or kwargs.get('n', 0)
        self.info(
            'HDBpp.get_attribute_values(%s,%s,%s,N=%s,decimate=%s,'
            'int_time=%s,%s)' %
            (attribute, start_date, stop_date, N, decimate, int_time, kwargs))

        aid, tid, table, index = self.get_attribute_indexes(attribute)

        if not all((aid, tid, table)):
            self.warning('%s is not archived' % table)
            return []

        human = kwargs.get('asHistoryBuffer', human)

        if start_date or stop_date:
            start_date,start_time,stop_date,stop_time = \
                Reader.get_time_interval(start_date,stop_date)

        query = self.get_attribute_values_query(attribute, start_date,
                                                stop_date, desc, N, unixtime,
                                                extra_columns, decimate, human,
                                                as_double, aggregate, int_time,
                                                what, where, **kwargs)

        ######################################################################
        # QUERY

        t0 = time.time()
        is_array = 'array' in table

        try:
            result = []
            lasts = {}
            cursor = self.Query(query, export=False)
            while True:
                # Fetching/decimating data in blocks of 1024 rows
                v = cursor.fetchmany(1024)
                if v is None:
                    break
                span = ((v[-1][0] - v[0][0]) if len(v) > 1 else 0)
                density = len(v) / (span or 1)
                limit = (128 if is_array else 1) * MAX_QUERY_SIZE
                if decimate != RAW and span and (
                        density * (stop_time - start_time)) > limit:
                    if not decimate or type(decimate) not in (int, float):
                        decimate = float(stop_time - start_time) / limit
                        self.warning(
                            'density=%s values/s!: enforce decimate every %s seconds'
                            % (density, decimate))
                    for l in v:
                        ix = l[2] if is_array else None
                        if ((ix not in lasts) or (None in (l[1], lasts[ix][1]))
                                or (l[0] >= (lasts[ix][0] + decimate))):
                            result.append(l)
                            lasts[ix] = l
                else:
                    result.extend(v)

                if len(v) < 1024:
                    break

            self.debug('read [%d] in %f s: %s' %
                       (len(result), time.time() - t0, len(result) > 1 and
                        (result[0], result[1], result[-1])))

        except MySQLdb.ProgrammingError as e:
            result = []
            if 'DOUBLE' in str(e) and "as DOUBLE" in query:
                return self.get_attribute_values((aid, tid, table),
                                                 start_date,
                                                 stop_date,
                                                 desc,
                                                 N,
                                                 unixtime,
                                                 extra_columns,
                                                 decimate,
                                                 human,
                                                 as_double=False,
                                                 **kwargs)
            else:
                traceback.print_exc()

        if not result or not result[0]:
            return []
        ######################################################################

        t0 = time.time()

        if is_array and (not index or not self.INDEX_IN_QUERY):
            max_ix = 0
            data = fandango.dicts.defaultdict(list)
            for t in result:
                data[float(t[0])].append(t[1:])
                if t[2] is not None and max_ix < t[2]:
                    max_ix = t[2]

            result = []
            last_arrs = [None] * (1 + max_ix)
            for k, v in sorted(data.items()):
                # it forces all lines to be equal in length
                l = last_arrs[:]
                for i, t in enumerate(v):
                    if None in t:
                        l = None
                        break
                    # t[1] is index, t[0] is value
                    l[t[1]] = t[0]  #Ignoring extra columns (e.g. quality)
                    last_arrs[t[1]] = t[0]
                result.append((k, l))

            if N > 0:
                #for k,l in result:
                #print((k,l and len(l)))
                result = result[-N:]
            if N < 0 or desc:
                result = list(reversed(result))

            if index is not None:
                nr = []
                for i, r in enumerate(result):
                    try:
                        nr.append(
                            (r[0], r[1][index] if r[1] is not None else r[1]))
                    except:
                        print(index, r)
                        traceback.print_exc()
                        break
                result = nr

            self.debug('array arranged [%d][%s] in %f s' %
                       (len(result), index, time.time() - t0))

        # Decimation to be done in Reader object, after caching
        if human:
            result = [list(t) + [fn.time2str(t[0])] for t in result]

        if not desc and ((not stop_date and N > 0) or (N < 0)):
            #THIS WILL BE APPLIED ONLY WHEN LAST N VALUES ARE ASKED
            self.debug('reversing ...')
            result = list(reversed(result))

        self.debug('result arranged [%d]: %s, %s' %
                   (len(result), result[0], result[-1]))
        return result
Пример #4
0
    def get_attribute_values_query(
            self,
            attribute,
            start_date=None,
            stop_date=None,
            desc=False,
            N=0,
            unixtime=True,
            extra_columns='',  #quality',
            decimate=0,
            human=False,
            as_double=True,
            aggregate='',  #'MAX',
            int_time=True,
            what='',
            where='',
            group_by='',
            **kwargs):

        if attribute in self.getTables():
            aid, tid, table, index = None, None, attribute, None
        else:
            aid, tid, table, index = self.get_attribute_indexes(attribute)

        if not what:
            what = 'UNIX_TIMESTAMP(data_time)' if unixtime else 'data_time'
            if as_double:
                what = 'CAST(%s as DOUBLE)' % what
            #what += ' AS DTS'

            value = 'value_r' if 'value_r' in self.getTableCols(table) \
                                    else 'value'

            if decimate and aggregate in ('AVG', 'MAX', 'MIN'):
                value = '%s(%s)' % (aggregate, value)

            what += ', ' + value
            if 'array' in table:
                what += ", idx"

            if extra_columns:
                what += ',' + extra_columns  #quality!

        if where:
            where = where + ' and '
        if 'where' not in where:
            where = 'where ' + where

        interval = 'att_conf_id = %s'%aid if aid is not None \
                                                else 'att_conf_id >= 0 '

        if index and self.INDEX_IN_QUERY:
            interval += ' and idx = %s ' % index

        int_time = int_time and 'int_time' in self.getTableCols(table)
        #if int_time: self.debug('Using int_time indexing for %s' % table)

        if start_date or stop_date:
            start_date,start_time,stop_date,stop_time = \
                Reader.get_time_interval(start_date,stop_date)

            if int_time:

                if start_date and stop_date:
                    interval += (" and int_time between %d and %d" %
                                 (self.str2mysqlsecs(start_date),
                                  self.str2mysqlsecs(stop_date)))

                elif start_date and fandango.str2epoch(start_date):
                    interval += (" and int_time > %d" % self.str2mysqlsecs)

            else:
                if start_date and stop_date:
                    interval += (" and data_time between '%s' and '%s'" %
                                 (start_date, stop_date))

                elif start_date and fandango.str2epoch(start_date):
                    interval += " and data_time > '%s'" % start_date

        where = where + interval
        query = 'select %s from %s %s' % (what, table, where)

        #self.warning('decimate = %s = %s' % (str(decimate),bool(decimate)))
        if decimate:

            if isinstance(decimate, (int, float)):
                d = int(decimate) or 1
            else:
                d = int((stop_time - start_time) / MAX_QUERY_SIZE) or 1

            def next_power_of_2(x):
                return 1 if x == 0 else 2**int(x - 1).bit_length()

            #d = next_power_of_2(d) #(d/2 or 1)

            # decimation on server side
            if not group_by:
                group_by = 'att_conf_id,' if 'att_conf_id' in str(what) else ''
                if int_time:
                    group_by += '(%s DIV %d)' % ('int_time', d)
                else:
                    group_by += '(FLOOR(%s/%d))' % (
                        'UNIX_TIMESTAMP(data_time)', d)

                if 'array' in table:
                    group_by += ',idx'

            query += " and value_r is not NULL group by %s" % group_by

        query += ' order by %s' % (
            'int_time'  #, DTS' # much slower!
            if int_time else 'data_time')

        if N == 1:
            human = 1
        if N < 0 or desc:
            query += " desc"  # or (not stop_date and N>0):
        if N:
            query += ' limit %s' % (abs(N) if 'array' not in table else N *
                                    1024)

        # too dangerous to remove always data by default, and bunching does not work
        #else:
        #query+=' limit %s' % (MAX_QUERY_SIZE)

        return query