def update_from_aggregate(self, min_last_update=None, max_rate=None, max_rate_callback=None): """Update this aggregate from another aggregate.""" # LAST_UPDATE points to the last step updated step = self.agg.metadata['STEP'] steps_needed = step // self.ancestor.metadata['STEP'] # XXX what to do if our step isn't divisible by ancestor steps? last_update = self.agg.metadata['LAST_UPDATE'] + \ self.ancestor.metadata['STEP'] if min_last_update and min_last_update > last_update: last_update = min_last_update data = self.ancestor.select( begin=last_update, end=self.ancestor.max_valid_timestamp()) # get all timestamps since the last update # fill as many bins as possible work = list(itertools.islice(data, 0, steps_needed)) slot = None while len(work) == steps_needed: slot = ((work[0].timestamp / step) * step) #+ step # assert work[-1].timestamp == slot valid = 0 row = Aggregate(slot, ROW_VALID, delta=0, average=None, min=None, max=None) for datum in work: if datum.flags & ROW_VALID: valid += 1 row.delta += datum.delta if isNaN(row.min) or datum.delta < row.min: row.min = datum.delta if isNaN(row.max) or datum.delta > row.max: row.max = datum.delta row.average = row.delta / float(step) valid_ratio = float(valid)/float(len(work)) if valid_ratio < self.agg.metadata['VALID_RATIO']: row.invalidate() self.agg.insert(row) work = list(itertools.islice(data, 0, steps_needed)) if slot is not None: self.agg.metadata['LAST_UPDATE'] = slot self.agg.flush()
def handle_aggregates(self, parts): if len(parts) > 0 and parts[-1] == '': parts = parts[:-1] if len(parts) == 0: r = [ dict(name=s, uri="%s/%s" % (SNMP_URI, s), leaf=False) for s in self.agg_db.list_sets() ] elif len(parts) == 1: r = [ dict(name=v, uri="%s/%s/%s" % (SNMP_URI, parts[0], v), leaf=True) for v in self.agg_db.get_set(parts[0]).list_vars() ] elif len(parts) == 2: args = parse_query_string() if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) path = "/".join(parts) try: v = self.agg_db.get_var(path) except TSDBVarDoesNotExistError: print "ERR> var doesn't exist: %s" % path return web.notfound() # Requested variable does not exist except InvalidMetaData: print "ERR> invalid metadata: %s" % path return web.notfound() print v data = v.select(begin=begin, end=end) data = [d for d in data] r = [] for datum in data: d = [datum.timestamp, datum.value] if isNaN(d[1]): d[1] = None r.append(d) result = dict(data=r, begin_time=begin, end_time=end, agg="30") return result else: print "ERR> too many parts in handle_aggregates" return web.notfound() return dict(children=r)
def isNaN(val): if sys.version_info < (2, 5) or sys.platform.startswith('win'): import fpconst return fpconst.isNaN(val) else: return str(float(val)) == 'nan'
def isNaN(val): if is_float_broken(): import fpconst return fpconst.isNaN(val) else: return str(float(val)) == str(1e300000/1e300000)
def __check_if_all_value_are_numbers(self, wf_controlled_data): """ Check if all the values in the forecast are numbers. """ for sElement in wf_controlled_data.get_matrix_col_list(): if sElement.find('TIME') > 0: continue npElement = wf_controlled_data.get_matrix_col(sElement) # In the case of 'CC', only return an error if both SF and IR # are not given if sElement is 'CC': if metro_config.get_value('SF') and metro_config.get_value( 'IR'): continue for fElement in npElement: if fpconst.isNaN(fElement): if wf_controlled_data.is_standardCol(sElement): sMessage = _("Value in forecast file must be valid.\n") \ + _("A value for the element <%s> is invalid")\ % (sElement.lower())+\ _(" in the file\n'%s'") %\ (metro_config.get_value(\ "FILE_FORECAST_IN_FILENAME")) raise metro_error.Metro_data_error(sMessage) else: sMessage = _("A value for the extended element <%s> is invalid") % (sElement.lower())+\ _(" in the file\n'%s'") % (metro_config.get_value("FILE_FORECAST_IN_FILENAME")) metro_logger.print_message(metro_logger.LOGGER_MSG_STOP,\ sMessage)
def writeDouble(self, d): if isNaN(d): self.writeNaN() elif isInf(d): self.writeInfinity(isPosInf(d)) else: self.stream.write(HproseTags.TagDouble) self.stream.write(str(d).encode('utf-8')) self.stream.write(HproseTags.TagSemicolon)
def __check_if_all_value_are_numbers(self, wf_controlled_data): """ Check if all the values in the forecast are numbers. """ for sElement in wf_controlled_data.get_matrix_col_list(): if sElement.find('TIME') > 0: continue npElement = wf_controlled_data.get_matrix_col(sElement) # In the case of 'CC', only return an error if both SF and IR # are not given if sElement is 'CC': if metro_config.get_value('SF') and metro_config.get_value('IR'): continue for fElement in npElement: if fpconst.isNaN(fElement): if wf_controlled_data.is_standardCol(sElement): sMessage = _("Value in forecast file must be valid.\n") \ + _("A value for the element <%s> is invalid")\ % (sElement.lower())+\ _(" in the file\n'%s'") %\ (metro_config.get_value(\ "FILE_FORECAST_IN_FILENAME")) raise metro_error.Metro_data_error(sMessage) else: sMessage = _("A value for the extended element <%s> is invalid") % (sElement.lower())+\ _(" in the file\n'%s'") % (metro_config.get_value("FILE_FORECAST_IN_FILENAME")) metro_logger.print_message(metro_logger.LOGGER_MSG_STOP,\ sMessage)
def get_all_data(self, path, args): if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) if args.has_key('cf'): cf = args['cf'] else: cf = 'raw' print "DBG> path is %s" % path if 'ALUSAPPoll' in path: path += "/TSDBAggregates/11/" if args.has_key('cf'): cf = args['cf'] else: cf = 'average' try: v = self.db.get_var(path) except TSDBVarDoesNotExistError: print "ERR> var doesn't exist: %s" % path return web.notfound() # Requested variable does not exist except InvalidMetaData: print "ERR> invalid metadata: %s" % path return web.notfound() print "MIN: %d MAX %d" % (v.min_timestamp(recalculate=True), v.max_timestamp(recalculate=True)) data = v.select(begin=begin, end=end) data = [d for d in data] r = [] for datum in data: if cf != 'raw': d = [datum.timestamp, getattr(datum, cf)] else: d = [datum.timestamp, datum.value] if isNaN(d[1]) or datum.flags != tsdb.row.ROW_VALID: d[1] = None r.append(d) if len(r): agg = r[1][0] - r[0][ 0] # not really the best way to guess the agg. result = dict(data=r[:-1], begin_time=begin, end_time=end, agg=agg, scale=0) else: result = dict(data=[], begin_time=begin, end_time=end, agg=agg, scale=0) return result
def _serialize_float(d): if fpconst.isNaN(d): return 'd:NAN;' elif fpconst.isPosInf(d): return 'd:INF;' elif fpconst.isNegInf(d): return 'd:-INF;' else: return 'd:%s;' % d
def write_double_workaround(self, d): if fpconst.isNaN(d): self.write('\xff\xf8\x00\x00\x00\x00\x00\x00') elif fpconst.isNegInf(d): self.write('\xff\xf0\x00\x00\x00\x00\x00\x00') elif fpconst.isPosInf(d): self.write('\x7f\xf0\x00\x00\x00\x00\x00\x00') else: write_double_workaround.old_func(self, d)
def test_infinites(self): import fpconst self._run([(fpconst.NegInf, '\x00\xff\xf0\x00\x00\x00\x00\x00\x00')]) self._run([(fpconst.PosInf, '\x00\x7f\xf0\x00\x00\x00\x00\x00\x00')]) self.buf.truncate() self.buf.write('\x00\xff\xf8\x00\x00\x00\x00\x00\x00') self.buf.seek(0) x = self.decoder.readElement() self.assertTrue(fpconst.isNaN(x))
def test_nan(self): import fpconst x = ByteStream('\xff\xf8\x00\x00\x00\x00\x00\x00') self.assertTrue(fpconst.isNaN(x.read_double())) x = ByteStream('\xff\xf0\x00\x00\x00\x00\x00\x00') self.assertTrue(fpconst.isNegInf(x.read_double())) x = ByteStream('\x7f\xf0\x00\x00\x00\x00\x00\x00') self.assertTrue(fpconst.isPosInf(x.read_double()))
def handle_aggregates(self, parts): if len(parts) > 0 and parts[-1] == '': parts = parts[:-1] if len(parts) == 0: r = [dict(name=s, uri="%s/%s" % (SNMP_URI, s), leaf=False) for s in self.agg_db.list_sets()] elif len(parts) == 1: r = [dict(name=v, uri="%s/%s/%s" % (SNMP_URI, parts[0], v), leaf=True) for v in self.agg_db.get_set(parts[0]).list_vars()] elif len(parts) == 2: args = parse_query_string() if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) path = "/".join(parts) try: v = self.agg_db.get_var(path) except TSDBVarDoesNotExistError: print "ERR> var doesn't exist: %s" % path return web.notfound() # Requested variable does not exist except InvalidMetaData: print "ERR> invalid metadata: %s" % path return web.notfound() print v data = v.select(begin=begin, end=end) data = [d for d in data] r = [] for datum in data: d = [datum.timestamp, datum.value] if isNaN(d[1]): d[1] = None r.append(d) result = dict(data=r, begin_time=begin, end_time=end, agg="30") return result else: print "ERR> too many parts in handle_aggregates" return web.notfound() return dict(children=r)
def get_all_data(self, path, args): if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) if args.has_key('cf'): cf = args['cf'] else: cf = 'raw' print "DBG> path is %s" % path if 'ALUSAPPoll' in path: path += "/TSDBAggregates/11/" if args.has_key('cf'): cf = args['cf'] else: cf = 'average' try: v = self.db.get_var(path) except TSDBVarDoesNotExistError: print "ERR> var doesn't exist: %s" % path return web.notfound() # Requested variable does not exist except InvalidMetaData: print "ERR> invalid metadata: %s" % path return web.notfound() print "MIN: %d MAX %d"%(v.min_timestamp(recalculate=True),v.max_timestamp(recalculate=True)) data = v.select(begin=begin, end=end) data = [d for d in data] r = [] for datum in data: if cf != 'raw': d = [datum.timestamp, getattr(datum, cf)] else: d = [datum.timestamp, datum.value] if isNaN(d[1]) or datum.flags != tsdb.row.ROW_VALID: d[1] = None r.append(d) if len(r): agg = r[1][0]-r[0][0] # not really the best way to guess the agg. result = dict(data=r[:-1], begin_time=begin, end_time=end,agg=agg,scale=0) else: result = dict(data=[], begin_time=begin, end_time=end,agg=agg,scale=0) return result
def ic(pwm, bg): """ Calculate the information content of the PWM against the background in nats. Both pwm and bg are expected to be callable objects that return the log likelihood. """ ic = 0.0 ix = N.zeros(pwm.K, dtype=int) while True: l = pwm(ix) if not fpconst.isNegInf(l): x = N.exp(l) * (l - bg(ix)) assert not fpconst.isNaN(x) ic += x if not inc_index(ix): break return ic
def ic(pwm, bg): """ Calculate the information content of the PWM against the background in nats. Both pwm and bg are expected to be callable objects that return the log likelihood. """ ic = 0. ix = N.zeros(pwm.K, dtype=int) while True: l = pwm(ix) if not fpconst.isNegInf(l): x = N.exp(l) * (l - bg(ix)) assert not fpconst.isNaN(x) ic += x if not inc_index(ix): break return ic
def get_sap(self, device, rest): if not rest: result = dict(children=[],leaf=False) path = '/%s/ALUSAPPoll' % device.name for v in self.db.get_set(path).list_vars(): result['children'].append(dict( leaf=False, speed=0, uri="%s/%s/sap/" % (SNMP_URI, device.name, rest), name = s, descr = '')) return result path = "/%s/ALUSAPPoll/%s" % (device.name, rest) print ">>", path try: v = self.db.get_var(path) except TSDBVarDoesNotExistError: self.log.error("not found: %s" % path) return web.notfound() args = parse_query_string() if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) data = v.select(begin=begin, end=end) data = [d for d in data] r = [] for datum in data: d = [datum.timestamp, datum.value] if isNaN(d[1]): d[1] = None r.append(d) result = dict(data=r, begin_time=begin, end_time=end) return result
def get_sap(self, device, rest): if not rest: result = dict(children=[], leaf=False) path = '/%s/ALUSAPPoll' % device.name for v in self.db.get_set(path).list_vars(): result['children'].append( dict(leaf=False, speed=0, uri="%s/%s/sap/" % (SNMP_URI, device.name, rest), name=s, descr='')) return result path = "/%s/ALUSAPPoll/%s" % (device.name, rest) print ">>", path try: v = self.db.get_var(path) except TSDBVarDoesNotExistError: self.log.error("not found: %s" % path) return web.notfound() args = parse_query_string() if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) data = v.select(begin=begin, end=end) data = [d for d in data] r = [] for datum in data: d = [datum.timestamp, datum.value] if isNaN(d[1]): d[1] = None r.append(d) result = dict(data=r, begin_time=begin, end_time=end) return result
def test_rrd_gap1(): """Test that we handle gaps in a similar fashion to RRDTool. We aren't identical though because we will optimistically report partial results for the last timestep. RRDTool keeps some state to make sure it's got full data for a timestep before reporting anything. """ db = TSDB(TESTDB) var = db.add_var("foo", Counter32, 30, YYYYMMDDChunkMapper) var.add_aggregate("30s", YYYYMMDDChunkMapper, ["average", "delta"], metadata=dict(HEARTBEAT=90) ) begin = 3600*24*365*20 rrd_file = make_rrd(var, begin-60, TESTRRD, heartbeat=90) data = ( (0, 0), # miss poll in slot 30, gap smaller than heartbeat (75, 75000), # miss poll at slots 90 and 120, gap larger than heartbeat (166, 166000), (195, 195000), (225, 225000), ) for (t,v) in data: var.insert(Counter32(begin+t, ROW_VALID, v)) u = "%d:%d" % (begin+t,v) rrdtool.update(rrd_file, u) print u var.update_all_aggregates() agg = var.get_aggregate("30s") for t in range(0, 210, 30): t += begin args = [rrd_file, "AVERAGE", "-r 30", "-s", str(t), "-e", str(t)] a = agg.get(t) r = rrdtool.fetch(*args)[-1][0][0] print t-begin, a, r if r is None and isNaN(a.average): assert True elif t-begin == 150 and a.average == 1000 and r == None: # RRD takes one step longer to recover assert True else: assert a.average == r
def set_peak(self, peak): self.peak = peak if fpconst.isNaN(peak): self.modify_bg(gtk.STATE_NORMAL, gtk.gdk.Color(int(65535 * 0.7), 0, 0)) self.label.set_text("NaN") else: text = "%+.1f" % peak if peak > 0: self.modify_bg( gtk.STATE_NORMAL, gtk.gdk.Color(int(65535 * 0.8), int(65535 * 0.3), 0)) else: self.modify_bg(gtk.STATE_NORMAL, self.label.style.bg[gtk.STATE_NORMAL]) self.label.set_text(text)
def dump_float(self, obj, tag, typed = 1, ns_map = {}): if Config.debug: print "In dump_float." tag = tag or self.gentag() if Config.strict_range: doubleType(obj) if fpconst.isPosInf(obj): obj = "INF" elif fpconst.isNegInf(obj): obj = "-INF" elif fpconst.isNaN(obj): obj = "NaN" else: obj = str(obj) # Note: python 'float' is actually a SOAP 'double'. self.out.append(self.dumper(None, "double", obj, tag, typed, ns_map, self.genroot(ns_map)))
def dump_float(self, obj, tag, typed = 1, ns_map = {}): if Config.debug: print "In dump_float." tag = tag or self.gentag() tag = toXMLname(tag) # convert from SOAP 1.2 XML name encoding if Config.strict_range: doubleType(obj) if fpconst.isPosInf(obj): obj = "INF" elif fpconst.isNegInf(obj): obj = "-INF" elif fpconst.isNaN(obj): obj = "NaN" else: obj = str(obj) # Note: python 'float' is actually a SOAP 'double'. self.out.append(self.dumper(None, "double", obj, tag, typed, ns_map, self.genroot(ns_map)))
def get_firewall(self, device, rest): path = "/%s/JnxFirewall/counter/%s" % (device.name, rest) print ">>", path try: v = self.db.get_var(path) except TSDBVarDoesNotExistError: self.log.error("not found: %s" % path) return web.notfound() args = parse_query_string() if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) data = v.select(begin=begin, end=end) data = [d for d in data] r = [] for datum in data: d = [datum.timestamp, datum.value] if isNaN(d[1]): d[1] = None r.append(d) result = dict(data=r, begin_time=begin, end_time=end) return result
def points_as_string( self, missing_data = "9999", no_per_line = 3, linefeed = "unix"): as_string = '' index = 0 if linefeed=="windows": eol = '\r\n' else: eol = '\n' while index < len(self.as_list): out_value = self.as_list[index] if fpconst.isNaN(out_value): out_value = missing_data else: out_value = "%3f" % out_value if ((index+1) % no_per_line) != 0: as_string += out_value + (' ' * (15-len(out_value))) else: as_string += out_value + eol index += 1 if len(self.as_list) > 3: as_string += eol return as_string
def cyymmdd_to_ordinal(x): if isNaN(x): return x return cyymmdd_to_date(x).toordinal()
def get_interface_data(self, devicename, iface, dataset, rest): """Returns a JSON object representing counter data for an interface. This is obtained by doing a GET of one of the follwing URIs: /snmp/DEVICE_NAME/interface/INTERFACE_NAME/in /snmp/DEVICE_NAME/interface/INTERFACE_NAME/out /snmp/DEVICE_NAME/interface/INTERFACE_NAME/error/in /snmp/DEVICE_NAME/interface/INTERFACE_NAME/error/out /snmp/DEVICE_NAME/interface/INTERFACE_NAME/discard/in /snmp/DEVICE_NAME/interface/INTERFACE_NAME/discard/out For in and out get_interface_data accepts several query parameters: begin -- expressed a seconds since the epoch end -- expressed a seconds since the epoch agg -- use a precomputed aggregate for data, defaults to highest available resolution cf -- consolidation function. defaults to average calc -- calculate an aggregate, see below for more details calc_func -- oidset -- specifically specify an oidset, see below agg specifies which precomputed aggregate to use. Aggregates are represented as rates (eg. bytes/sec) and are calculated for the base rate at the time the data is persisted to disk. This is specified as the number of seconds in the aggregation period or as 'raw'. 'raw' returns the counter data as collected from the device without any processing. Currently there is only the aggreagate for the base polling interval and as a result this is rarely used. cf determines how datapoints are agreggated into a single datapoint. By default the datapoints are averaged but the maximum and minimum can also be used. valid options for this parameter are 'min', 'max' and 'average'. This applies to precomputed aggregates that are greater than the base polling frequency. calc requests that the database dynamically generate an aggregate from the base aggregate for this counter. The parameter is set to the numberof seconds to be used in the aggregation period. The function used to consolidate each group of data points into a single data in the aggregate is controlled by the calc_func parameter. calc_func specifies the function to use when calculating an aggregate. It may be one of 'average', 'min', or 'max' and defaults to 'average'. oidset allows the query to specify a specific oidset to get the data from rather than using the usual method for locating the oidset. This is very rarely used. An interface data JSON object has the following fields: :param data: a list of tuples. each tuple is [timestamp, value] :param begin_time: the requested begin_time :param end_time: the requested end_time :param agg: the requested aggregation period :param cf: the requestion consolidation function Example: {"agg": "30", "end_time": 1254350090, "data": [[1254349980, 163.0], [1254350010, 28.133333333333333], [1254350040, 96.966666666666669], [1254350070, 110.03333333333333]], "cf": "average", "begin_time": 1254350000} """ next = None if rest: next, rest = split_url(rest) if next == 'aggs': # XXX list actual aggs return dict(aggregates=[30], cf=['average']) elif dataset not in ['error', 'discard' ] and next not in ['in', 'out']: return web.notfound("nope") args = parse_query_string() if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) if args.has_key('cf'): cf = args['cf'] else: cf = 'average' if args.has_key('oidset'): traffic_oidset = args['oidset'] if traffic_oidset == 'FastPoll': traffic_mod = '' else: traffic_mod = 'HC' else: traffic_oidset, traffic_mod = get_traffic_oidset(devicename) if args.has_key('agg'): agg = args['agg'] suffix = 'TSDBAggregates/%s/' % (args['agg'], ) else: if cf == 'raw': suffix = '' agg = '' else: if traffic_oidset != 'SuperFastPollHC': suffix = 'TSDBAggregates/30/' agg = '30' else: suffix = 'TSDBAggregates/10/' agg = '10' if dataset in ['in', 'out']: # traffic begin, end = int(begin), int(end) if traffic_oidset == 'InfFastPollHC': path = '%s/%s/gigeClientCtpPmReal%sOctets/%s/%s' % ( devicename, traffic_oidset, DATASET_INFINERA_MAP[dataset], remove_metachars(iface), suffix) else: path = '%s/%s/if%s%sOctets/%s/%s' % ( devicename, traffic_oidset, traffic_mod, dataset.capitalize(), remove_metachars(iface), suffix) elif dataset in ['error', 'discard']: # XXX set agg to delta rather than average path = '%s/Errors/if%s%ss/%s' % (devicename, next.capitalize(), dataset.capitalize(), remove_metachars(iface)) path += '/TSDBAggregates/300/' agg = '300' else: print "ERR> can't resolve path" return web.notfound() # Requested variable does not exist try: v = self.db.get_var(path) except TSDBVarDoesNotExistError: print "ERR> var doesn't exist: %s" % path return web.notfound() # Requested variable does not exist except InvalidMetaData: print "ERR> invalid metadata: %s" % path return web.notfound() try: data = v.select(begin=begin, end=end) except TSDBVarEmpty: print "ERR> var has no data: %s" % path return web.notfound() data = [d for d in data] r = [] for datum in data: if cf != 'raw': d = [datum.timestamp, getattr(datum, cf)] else: d = [datum.timestamp, datum.value] if isNaN(d[1]): d[1] = None r.append(d) result = dict(data=r, begin_time=begin, end_time=end, cf=cf, agg=agg) if args.has_key('calc'): if args.has_key('calc_func'): calc_func = args['calc_func'] else: calc_func = 'average' r = self.calculate(args['calc'], agg, calc_func, r) if isinstance(r, HTTPError): return r result['data'] = r result['calc'] = args['calc'] result['calc_func'] = calc_func # these don't make sense if we're using calc del result['agg'] del result['cf'] return result
def pljulian_to_date(x): """Convert a PLearn Julian Number to a Python date. """ assert not isNaN(x) return date.fromordinal(int(x - 1721425))
def main(): # Parse command line options, args = doc_optparse.parse( __doc__ ) try: score_fname = args[0] interval_fname = args[1] chrom_col = args[2] start_col = args[3] stop_col = args[4] if len( args ) > 5: out_file = open( args[5], 'w' ) else: out_file = sys.stdout binned = bool( options.binned ) mask_fname = options.mask except: doc_optparse.exit() if score_fname == 'None': stop_err( 'This tool works with data from genome builds hg16, hg17 or hg18. Click the pencil icon in your history item to set the genome build if appropriate.' ) try: chrom_col = int(chrom_col) - 1 start_col = int(start_col) - 1 stop_col = int(stop_col) - 1 except: stop_err( 'Chrom, start & end column not properly set, click the pencil icon in your history item to set these values.' ) if chrom_col < 0 or start_col < 0 or stop_col < 0: stop_err( 'Chrom, start & end column not properly set, click the pencil icon in your history item to set these values.' ) if binned: scores_by_chrom = load_scores_ba_dir( score_fname ) else: try: chrom_buffer = int( options.chrom_buffer ) except: chrom_buffer = 3 scores_by_chrom = load_scores_wiggle( score_fname, chrom_buffer ) if mask_fname: masks = binned_bitsets_from_file( open( mask_fname ) ) else: masks = None skipped_lines = 0 first_invalid_line = 0 invalid_line = '' for i, line in enumerate( open( interval_fname )): valid = True line = line.rstrip('\r\n') if line and not line.startswith( '#' ): fields = line.split() try: chrom, start, stop = fields[chrom_col], int( fields[start_col] ), int( fields[stop_col] ) except: valid = False skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line if valid: total = 0 count = 0 min_score = 100000000 max_score = -100000000 for j in range( start, stop ): if chrom in scores_by_chrom: try: # Skip if base is masked if masks and chrom in masks: if masks[chrom][j]: continue # Get the score, only count if not 'nan' score = scores_by_chrom[chrom][j] if not isNaN( score ): total += score count += 1 max_score = max( score, max_score ) min_score = min( score, min_score ) except: continue if count > 0: avg = total/count else: avg = "nan" min_score = "nan" max_score = "nan" # Build the resulting line of data out_line = [] for k in range(0, len(fields)): out_line.append(fields[k]) out_line.append(avg) out_line.append(min_score) out_line.append(max_score) print >> out_file, "\t".join( map( str, out_line ) ) else: skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line elif line.startswith( '#' ): # We'll save the original comments print >> out_file, line out_file.close() if skipped_lines > 0: print 'Data issue: skipped %d invalid lines starting at line #%d which is "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) if skipped_lines == i: print 'Consider changing the metadata for the input dataset by clicking on the pencil icon in the history item.'
def convertToBasicTypes(self, d, t, attrs, config=Config): dnn = d or '' if t[0] in NS.EXSD_L: if t[1] == "integer": try: d = int(d) if len(attrs): d = long(d) except: d = long(d) return d if self.intlimits.has_key(t[1]): # integer types l = self.intlimits[t[1]] try: d = int(d) except: d = long(d) if l[1] != None and d < l[1]: raise UnderflowError, "%s too small" % d if l[2] != None and d > l[2]: raise OverflowError, "%s too large" % d if l[0] or len(attrs): return long(d) return d if t[1] == "string": if len(attrs): return unicode(dnn) try: return str(dnn) except: return dnn if t[1] == "boolean": d = d.strip().lower() if d in ('0', 'false'): return 0 if d in ('1', 'true'): return 1 raise AttributeError, "invalid boolean value" if t[1] in ('double', 'float'): l = self.floatlimits[t[1]] s = d.strip().lower() d = float(s) if config.strict_range: if d < l[1]: raise UnderflowError if d > l[2]: raise OverflowError else: # some older SOAP impementations (notably SOAP4J, # Apache SOAP) return "infinity" instead of "INF" # so check the first 3 characters for a match. if s == "nan": return fpconst.NaN elif s[0:3] in ("inf", "+inf"): return fpconst.PosInf elif s[0:3] == "-inf": return fpconst.NegInf if fpconst.isNaN(d): if s != 'nan': raise ValueError, "invalid %s: %s" % (t[1], s) elif fpconst.isNegInf(d): if s != '-inf': raise UnderflowError, "%s too small: %s" % (t[1], s) elif fpconst.isPosInf(d): if s != 'inf': raise OverflowError, "%s too large: %s" % (t[1], s) elif d < 0 and d < l[1]: raise UnderflowError, "%s too small: %s" % (t[1], s) elif d > 0 and (d < l[0] or d > l[2]): raise OverflowError, "%s too large: %s" % (t[1], s) elif d == 0: if type(self.zerofloatre) == StringType: self.zerofloatre = re.compile(self.zerofloatre) if self.zerofloatre.search(s): raise UnderflowError, "invalid %s: %s" % (t[1], s) return d if t[1] in ("dateTime", "date", "timeInstant", "time"): return self.convertDateTime(d, t[1]) if t[1] == "decimal": return float(d) if t[1] in ("language", "QName", "NOTATION", "NMTOKEN", "Name", "NCName", "ID", "IDREF", "ENTITY"): return collapseWhiteSpace(d) if t[1] in ("IDREFS", "ENTITIES", "NMTOKENS"): d = collapseWhiteSpace(d) return d.split() if t[0] in NS.XSD_L: if t[1] in ("base64", "base64Binary"): if d: return base64.decodestring(d) else: return '' if t[1] == "hexBinary": if d: return decodeHexString(d) else: return if t[1] == "anyURI": return urllib.unquote(collapseWhiteSpace(d)) if t[1] in ("normalizedString", "token"): return collapseWhiteSpace(d) if t[0] == NS.ENC: if t[1] == "base64": if d: return base64.decodestring(d) else: return '' if t[0] == NS.XSD: if t[1] == "binary": try: e = attrs[(None, 'encoding')] if d: if e == 'hex': return decodeHexString(d) elif e == 'base64': return base64.decodestring(d) else: return '' except: pass raise Error, "unknown or missing binary encoding" if t[1] == "uri": return urllib.unquote(collapseWhiteSpace(d)) if t[1] == "recurringInstant": return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD2, NS.ENC): if t[1] == "uriReference": return urllib.unquote(collapseWhiteSpace(d)) if t[1] == "timePeriod": return self.convertDateTime(d, t[1]) if t[1] in ("century", "year"): return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD, NS.XSD2, NS.ENC): if t[1] == "timeDuration": return self.convertDateTime(d, t[1]) if t[0] == NS.XSD3: if t[1] == "anyURI": return urllib.unquote(collapseWhiteSpace(d)) if t[1] in ("gYearMonth", "gMonthDay"): return self.convertDateTime(d, t[1]) if t[1] == "gYear": return self.convertDateTime(d, t[1]) if t[1] == "gMonth": return self.convertDateTime(d, t[1]) if t[1] == "gDay": return self.convertDateTime(d, t[1]) if t[1] == "duration": return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD2, NS.XSD3): if t[1] == "token": return collapseWhiteSpace(d) if t[1] == "recurringDate": return self.convertDateTime(d, t[1]) if t[1] == "month": return self.convertDateTime(d, t[1]) if t[1] == "recurringDay": return self.convertDateTime(d, t[1]) if t[0] == NS.XSD2: if t[1] == "CDATA": return collapseWhiteSpace(d) raise UnknownTypeError, "unknown type `%s'" % (t[0] + ':' + t[1])
def get_interface_data(self, devicename, iface, dataset, rest): """Returns a JSON object representing counter data for an interface. This is obtained by doing a GET of one of the follwing URIs: /snmp/DEVICE_NAME/interface/INTERFACE_NAME/in /snmp/DEVICE_NAME/interface/INTERFACE_NAME/out /snmp/DEVICE_NAME/interface/INTERFACE_NAME/error/in /snmp/DEVICE_NAME/interface/INTERFACE_NAME/error/out /snmp/DEVICE_NAME/interface/INTERFACE_NAME/discard/in /snmp/DEVICE_NAME/interface/INTERFACE_NAME/discard/out For in and out get_interface_data accepts several query parameters: begin -- expressed a seconds since the epoch end -- expressed a seconds since the epoch agg -- use a precomputed aggregate for data, defaults to highest available resolution cf -- consolidation function. defaults to average calc -- calculate an aggregate, see below for more details calc_func -- oidset -- specifically specify an oidset, see below agg specifies which precomputed aggregate to use. Aggregates are represented as rates (eg. bytes/sec) and are calculated for the base rate at the time the data is persisted to disk. This is specified as the number of seconds in the aggregation period or as 'raw'. 'raw' returns the counter data as collected from the device without any processing. Currently there is only the aggreagate for the base polling interval and as a result this is rarely used. cf determines how datapoints are agreggated into a single datapoint. By default the datapoints are averaged but the maximum and minimum can also be used. valid options for this parameter are 'min', 'max' and 'average'. This applies to precomputed aggregates that are greater than the base polling frequency. calc requests that the database dynamically generate an aggregate from the base aggregate for this counter. The parameter is set to the numberof seconds to be used in the aggregation period. The function used to consolidate each group of data points into a single data in the aggregate is controlled by the calc_func parameter. calc_func specifies the function to use when calculating an aggregate. It may be one of 'average', 'min', or 'max' and defaults to 'average'. oidset allows the query to specify a specific oidset to get the data from rather than using the usual method for locating the oidset. This is very rarely used. An interface data JSON object has the following fields: :param data: a list of tuples. each tuple is [timestamp, value] :param begin_time: the requested begin_time :param end_time: the requested end_time :param agg: the requested aggregation period :param cf: the requestion consolidation function Example: {"agg": "30", "end_time": 1254350090, "data": [[1254349980, 163.0], [1254350010, 28.133333333333333], [1254350040, 96.966666666666669], [1254350070, 110.03333333333333]], "cf": "average", "begin_time": 1254350000} """ next = None if rest: next, rest = split_url(rest) if next == 'aggs': # XXX list actual aggs return dict(aggregates=[30], cf=['average']) elif dataset not in ['error', 'discard'] and next not in ['in', 'out']: return web.notfound("nope") args = parse_query_string() if args.has_key('begin'): begin = args['begin'] else: begin = int(time.time() - 3600) if args.has_key('end'): end = args['end'] else: end = int(time.time()) if args.has_key('cf'): cf = args['cf'] else: cf = 'average' if args.has_key('oidset'): traffic_oidset = args['oidset'] if traffic_oidset == 'FastPoll': traffic_mod = '' else: traffic_mod = 'HC' else: traffic_oidset, traffic_mod = get_traffic_oidset(devicename) if args.has_key('agg'): agg = args['agg'] suffix = 'TSDBAggregates/%s/' % (args['agg'], ) else: if cf == 'raw': suffix = '' agg = '' else: if traffic_oidset != 'SuperFastPollHC': suffix = 'TSDBAggregates/30/' agg = '30' else: suffix = 'TSDBAggregates/10/' agg = '10' if dataset in ['in', 'out']: # traffic begin, end = int(begin), int(end) if traffic_oidset == 'InfFastPollHC': path = '%s/%s/gigeClientCtpPmReal%sOctets/%s/%s' % (devicename, traffic_oidset, DATASET_INFINERA_MAP[dataset], remove_metachars(iface), suffix) else: path = '%s/%s/if%s%sOctets/%s/%s' % (devicename, traffic_oidset, traffic_mod, dataset.capitalize(), remove_metachars(iface), suffix) elif dataset in ['error', 'discard']: # XXX set agg to delta rather than average path = '%s/Errors/if%s%ss/%s' % (devicename, next.capitalize(), dataset.capitalize(), remove_metachars(iface)) path += '/TSDBAggregates/300/' agg = '300' else: print "ERR> can't resolve path" return web.notfound() # Requested variable does not exist try: v = self.db.get_var(path) except TSDBVarDoesNotExistError: print "ERR> var doesn't exist: %s" % path return web.notfound() # Requested variable does not exist except InvalidMetaData: print "ERR> invalid metadata: %s" % path return web.notfound() try: data = v.select(begin=begin, end=end) except TSDBVarEmpty: print "ERR> var has no data: %s" % path return web.notfound() data = [d for d in data] r = [] for datum in data: if cf != 'raw': d = [datum.timestamp, getattr(datum, cf)] else: d = [datum.timestamp, datum.value] if isNaN(d[1]): d[1] = None r.append(d) result = dict(data=r, begin_time=begin, end_time=end, cf=cf, agg=agg) if args.has_key('calc'): if args.has_key('calc_func'): calc_func = args['calc_func'] else: calc_func = 'average' r = self.calculate(args['calc'], agg, calc_func, r) if isinstance(r, HTTPError): return r result['data'] = r result['calc'] = args['calc'] result['calc_func'] = calc_func # these don't make sense if we're using calc del result['agg'] del result['cf'] return result
if seq[0] == cand: del seq[0] if not isinstance(C, (types.ClassType, types.ObjectType)): raise TypeError, 'class type expected' if hasattr(C, '__mro__'): return C.__mro__ return merge([[C]] + map(get_mro, C.__bases__) + [list(C.__bases__)]) # workaround for python2.4's shortcomings with exceptional floats # see: http://blog.pyamf.org/archives/when-is-nan-not-a-number-with-python-24 import fpconst if not fpconst.isNaN(struct.unpack("!d", '\xff\xf8\x00\x00\x00\x00\x00\x00')[0]): def read_double_workaround(self): bytes = self._read(8) if bytes == '\xff\xf8\x00\x00\x00\x00\x00\x00': return fpconst.NaN if bytes == '\xff\xf0\x00\x00\x00\x00\x00\x00': return fpconst.NegInf if bytes == '\x7f\xf0\x00\x00\x00\x00\x00\x00': return fpconst.PosInf return struct.unpack("%sd" % self.endian, bytes)[0] DataTypeMixIn.read_double = read_double_workaround
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: score_fname = args[0] interval_fname = args[1] chrom_col = args[2] start_col = args[3] stop_col = args[4] if len(args) > 5: out_file = open(args[5], "w") else: out_file = sys.stdout binned = bool(options.binned) mask_fname = options.mask except: doc_optparse.exit() if score_fname == "None": stop_err( "This tool works with data from genome builds hg16, hg17 or hg18. Click the pencil icon in your history item to set the genome build if appropriate." ) try: chrom_col = int(chrom_col) - 1 start_col = int(start_col) - 1 stop_col = int(stop_col) - 1 except: stop_err( "Chrom, start & end column not properly set, click the pencil icon in your history item to set these values." ) if chrom_col < 0 or start_col < 0 or stop_col < 0: stop_err( "Chrom, start & end column not properly set, click the pencil icon in your history item to set these values." ) if binned: scores_by_chrom = load_scores_ba_dir(score_fname) else: try: chrom_buffer = int(options.chrom_buffer) except: chrom_buffer = 3 scores_by_chrom = load_scores_wiggle(score_fname, chrom_buffer) if mask_fname: masks = binned_bitsets_from_file(open(mask_fname)) else: masks = None skipped_lines = 0 first_invalid_line = 0 invalid_line = "" for i, line in enumerate(open(interval_fname)): valid = True line = line.rstrip("\r\n") if line and not line.startswith("#"): fields = line.split() try: chrom, start, stop = fields[chrom_col], int(fields[start_col]), int(fields[stop_col]) except: valid = False skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line if valid: total = 0 count = 0 min_score = 100000000 max_score = -100000000 for j in range(start, stop): if chrom in scores_by_chrom: try: # Skip if base is masked if masks and chrom in masks: if masks[chrom][j]: continue # Get the score, only count if not 'nan' score = scores_by_chrom[chrom][j] if not isNaN(score): total += score count += 1 max_score = max(score, max_score) min_score = min(score, min_score) except: continue if count > 0: avg = total / count else: avg = "nan" min_score = "nan" max_score = "nan" # Build the resulting line of data out_line = [] for k in range(0, len(fields)): out_line.append(fields[k]) out_line.append(avg) out_line.append(min_score) out_line.append(max_score) print >> out_file, "\t".join(map(str, out_line)) else: skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line elif line.startswith("#"): # We'll save the original comments print >> out_file, line out_file.close() if skipped_lines > 0: print 'Data issue: skipped %d invalid lines starting at line #%d which is "%s"' % ( skipped_lines, first_invalid_line, invalid_line, ) if skipped_lines == i: print "Consider changing the metadata for the input dataset by clicking on the pencil icon in the history item."
for at in attrs: obj_attrs[at] = getattr(obj, at) if obj_attrs is None: obj_attrs = get_attrs(obj) return obj_attrs if sys.version_info < (2, 5) or sys.platform.startswith('win'): # workaround for python2.4's shortcomings with exceptional floats # see: http://blog.pyamf.org/archives/when-is-nan-not-a-number-with-python-24 import fpconst if not fpconst.isNaN( struct.unpack("!d", '\xff\xf8\x00\x00\x00\x00\x00\x00')[0]): def read_double_workaround(self): bytes = self._read(8) if bytes == '\xff\xf8\x00\x00\x00\x00\x00\x00': return fpconst.NaN if bytes == '\xff\xf0\x00\x00\x00\x00\x00\x00': return fpconst.NegInf if bytes == '\x7f\xf0\x00\x00\x00\x00\x00\x00': return fpconst.PosInf return struct.unpack("%sd" % self.endian, bytes)[0]
def writeDouble(self, d): if isNaN(d): self.writeNaN() elif isInf(d): self.writeInfinity(isPosInf(d)) else: self.stream.write( '%c%s%c' % (HproseTags.TagDouble, d, HproseTags.TagSemicolon))
def writeDouble(self, d): if isNaN(d): self.writeNaN() elif isInf(d): self.writeInfinity(isPosInf(d)) else: self.stream.write('%c%s%c' % (HproseTags.TagDouble, d, HproseTags.TagSemicolon))
def convertToBasicTypes(self, d, t, attrs, config=Config): dnn = d or '' #if Config.debug: #print "convertToBasicTypes:" #print " requested_type=", t #print " data=", d # print "convertToBasicTypes:" # print " requested_type=", t # print " data=", d # print " attrs=", attrs # print " t[0]=", t[0] # print " t[1]=", t[1] # print " in?", t[0] in NS.EXSD_L if t[0] in NS.EXSD_L: if t[1] == "integer": # unbounded integer type try: d = int(d) if len(attrs): d = long(d) except: d = long(d) return d if self.intlimits.has_key(t[1]): # range-bounded integer types l = self.intlimits[t[1]] try: d = int(d) except: d = long(d) if l[1] != None and d < l[1]: raise UnderflowError, "%s too small" % d if l[2] != None and d > l[2]: raise OverflowError, "%s too large" % d if l[0] or len(attrs): return long(d) return d if t[1] == "string": if len(attrs): return unicode(dnn) try: return str(dnn) except: return dnn if t[1] in ("bool", "boolean"): d = d.strip().lower() if d in ('0', 'false'): return False if d in ('1', 'true'): return True raise AttributeError, "invalid boolean value" if t[1] in ('double', 'float'): l = self.floatlimits[t[1]] s = d.strip().lower() # Explicitly check for NaN and Infinities if s == "nan": d = fpconst.NaN elif s[0:2] == "inf" or s[0:3] == "+inf": d = fpconst.PosInf elif s[0:3] == "-inf": d = fpconst.NegInf else: d = float(s) if config.strict_range: if fpconst.isNaN(d): if s[0:2] != 'nan': raise ValueError, "invalid %s: %s" % (t[1], s) elif fpconst.isNegInf(d): if s[0:3] != '-inf': raise UnderflowError, "%s too small: %s" % (t[1], s) elif fpconst.isPosInf(d): if s[0:2] != 'inf' and s[0:3] != '+inf': raise OverflowError, "%s too large: %s" % (t[1], s) elif d < 0 and d < l[1]: raise UnderflowError, "%s too small: %s" % (t[1], s) elif d > 0 and (d < l[0] or d > l[2]): raise OverflowError, "%s too large: %s" % (t[1], s) elif d == 0: if type(self.zerofloatre) == StringType: self.zerofloatre = re.compile(self.zerofloatre) if self.zerofloatre.search(s): raise UnderflowError, "invalid %s: %s" % (t[1], s) return d if t[1] in ("dateTime", "date", "timeInstant", "time"): return self.convertDateTime(d, t[1]) if t[1] == "decimal": return float(d) if t[1] in ("language", "QName", "NOTATION", "NMTOKEN", "Name", "NCName", "ID", "IDREF", "ENTITY"): return collapseWhiteSpace(d) if t[1] in ("IDREFS", "ENTITIES", "NMTOKENS"): d = collapseWhiteSpace(d) return d.split() if t[0] in NS.XSD_L: if t[1] in ("base64", "base64Binary"): if d: return base64.decodestring(d) else: return '' if t[1] == "hexBinary": if d: return decodeHexString(d) else: return if t[1] == "anyURI": return urllib.unquote(collapseWhiteSpace(d)) if t[1] in ("normalizedString", "token"): return collapseWhiteSpace(d) if t[0] == NS.ENC: if t[1] == "base64": if d: return base64.decodestring(d) else: return '' if t[0] == NS.XSD: if t[1] == "binary": try: e = attrs[(None, 'encoding')] if d: if e == 'hex': return decodeHexString(d) elif e == 'base64': return base64.decodestring(d) else: return '' except: pass raise Error, "unknown or missing binary encoding" if t[1] == "uri": return urllib.unquote(collapseWhiteSpace(d)) if t[1] == "recurringInstant": return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD2, NS.ENC): if t[1] == "uriReference": return urllib.unquote(collapseWhiteSpace(d)) if t[1] == "timePeriod": return self.convertDateTime(d, t[1]) if t[1] in ("century", "year"): return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD, NS.XSD2, NS.ENC): if t[1] == "timeDuration": return self.convertDateTime(d, t[1]) if t[0] == NS.XSD3: if t[1] == "anyURI": return urllib.unquote(collapseWhiteSpace(d)) if t[1] in ("gYearMonth", "gMonthDay"): return self.convertDateTime(d, t[1]) if t[1] == "gYear": return self.convertDateTime(d, t[1]) if t[1] == "gMonth": return self.convertDateTime(d, t[1]) if t[1] == "gDay": return self.convertDateTime(d, t[1]) if t[1] == "duration": return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD2, NS.XSD3): if t[1] == "token": return collapseWhiteSpace(d) if t[1] == "recurringDate": return self.convertDateTime(d, t[1]) if t[1] == "month": return self.convertDateTime(d, t[1]) if t[1] == "recurringDay": return self.convertDateTime(d, t[1]) if t[0] == NS.XSD2: if t[1] == "CDATA": return collapseWhiteSpace(d) raise UnknownTypeError, "unknown type `%s'" % (str(t[0]) + ':' + t[1])
def convertToBasicTypes(self, d, t, attrs, config=Config): dnn = d or '' #if Config.debug: #print "convertToBasicTypes:" #print " requested_type=", t #print " data=", d if t[0] in NS.EXSD_L: if t[1] == "integer": try: d = int(d) if len(attrs): d = long(d) except: d = long(d) return d if self.intlimits.has_key (t[1]): # integer types l = self.intlimits[t[1]] try: d = int(d) except: d = long(d) if l[1] != None and d < l[1]: raise UnderflowError, "%s too small" % d if l[2] != None and d > l[2]: raise OverflowError, "%s too large" % d if l[0] or len(attrs): return long(d) return d if t[1] == "string": if len(attrs): return unicode(dnn) try: return str(dnn) except: return dnn if t[1] == "boolean": d = d.strip().lower() if d in ('0', 'false'): return 0 if d in ('1', 'true'): return 1 raise AttributeError, "invalid boolean value" if t[1] in ('double','float'): l = self.floatlimits[t[1]] s = d.strip().lower() d = float(s) if config.strict_range: if d < l[1]: raise UnderflowError if d > l[2]: raise OverflowError else: # some older SOAP impementations (notably SOAP4J, # Apache SOAP) return "infinity" instead of "INF" # so check the first 3 characters for a match. if s == "nan": return fpconst.NaN elif s[0:3] in ("inf", "+inf"): return fpconst.PosInf elif s[0:3] == "-inf": return fpconst.NegInf if fpconst.isNaN(d): if s != 'nan': raise ValueError, "invalid %s: %s" % (t[1], s) elif fpconst.isNegInf(d): if s != '-inf': raise UnderflowError, "%s too small: %s" % (t[1], s) elif fpconst.isPosInf(d): if s != 'inf': raise OverflowError, "%s too large: %s" % (t[1], s) elif d < 0 and d < l[1]: raise UnderflowError, "%s too small: %s" % (t[1], s) elif d > 0 and ( d < l[0] or d > l[2] ): raise OverflowError, "%s too large: %s" % (t[1], s) elif d == 0: if type(self.zerofloatre) == StringType: self.zerofloatre = re.compile(self.zerofloatre) if self.zerofloatre.search(s): raise UnderflowError, "invalid %s: %s" % (t[1], s) return d if t[1] in ("dateTime", "date", "timeInstant", "time"): return self.convertDateTime(d, t[1]) if t[1] == "decimal": return float(d) if t[1] in ("language", "QName", "NOTATION", "NMTOKEN", "Name", "NCName", "ID", "IDREF", "ENTITY"): return collapseWhiteSpace(d) if t[1] in ("IDREFS", "ENTITIES", "NMTOKENS"): d = collapseWhiteSpace(d) return d.split() if t[0] in NS.XSD_L: if t[1] in ("base64", "base64Binary"): if d: return base64.decodestring(d) else: return '' if t[1] == "hexBinary": if d: return decodeHexString(d) else: return if t[1] == "anyURI": return urllib.unquote(collapseWhiteSpace(d)) if t[1] in ("normalizedString", "token"): return collapseWhiteSpace(d) if t[0] == NS.ENC: if t[1] == "base64": if d: return base64.decodestring(d) else: return '' if t[0] == NS.XSD: if t[1] == "binary": try: e = attrs[(None, 'encoding')] if d: if e == 'hex': return decodeHexString(d) elif e == 'base64': return base64.decodestring(d) else: return '' except: pass raise Error, "unknown or missing binary encoding" if t[1] == "uri": return urllib.unquote(collapseWhiteSpace(d)) if t[1] == "recurringInstant": return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD2, NS.ENC): if t[1] == "uriReference": return urllib.unquote(collapseWhiteSpace(d)) if t[1] == "timePeriod": return self.convertDateTime(d, t[1]) if t[1] in ("century", "year"): return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD, NS.XSD2, NS.ENC): if t[1] == "timeDuration": return self.convertDateTime(d, t[1]) if t[0] == NS.XSD3: if t[1] == "anyURI": return urllib.unquote(collapseWhiteSpace(d)) if t[1] in ("gYearMonth", "gMonthDay"): return self.convertDateTime(d, t[1]) if t[1] == "gYear": return self.convertDateTime(d, t[1]) if t[1] == "gMonth": return self.convertDateTime(d, t[1]) if t[1] == "gDay": return self.convertDateTime(d, t[1]) if t[1] == "duration": return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD2, NS.XSD3): if t[1] == "token": return collapseWhiteSpace(d) if t[1] == "recurringDate": return self.convertDateTime(d, t[1]) if t[1] == "month": return self.convertDateTime(d, t[1]) if t[1] == "recurringDay": return self.convertDateTime(d, t[1]) if t[0] == NS.XSD2: if t[1] == "CDATA": return collapseWhiteSpace(d) raise UnknownTypeError, "unknown type `%s'" % (str(t[0]) + ':' + t[1])
def convertToBasicTypes(self, d, t, attrs, config=Config): dnn = d or "" # if Config.debug: # print "convertToBasicTypes:" # print " requested_type=", t # print " data=", d # print "convertToBasicTypes:" # print " requested_type=", t # print " data=", d # print " attrs=", attrs # print " t[0]=", t[0] # print " t[1]=", t[1] # print " in?", t[0] in NS.EXSD_L if t[0] in NS.EXSD_L: if t[1] == "integer": # unbounded integer type try: d = int(d) if len(attrs): d = long(d) except: d = long(d) return d if self.intlimits.has_key(t[1]): # range-bounded integer types l = self.intlimits[t[1]] try: d = int(d) except: d = long(d) if l[1] != None and d < l[1]: raise UnderflowError, "%s too small" % d if l[2] != None and d > l[2]: raise OverflowError, "%s too large" % d if l[0] or len(attrs): return long(d) return d if t[1] == "string": if len(attrs): return unicode(dnn) try: return str(dnn) except: return dnn if t[1] in ("bool", "boolean"): d = d.strip().lower() if d in ("0", "false"): return False if d in ("1", "true"): return True raise AttributeError, "invalid boolean value" if t[1] in ("double", "float"): l = self.floatlimits[t[1]] s = d.strip().lower() # Explicitly check for NaN and Infinities if s == "nan": d = fpconst.NaN elif s[0:2] == "inf" or s[0:3] == "+inf": d = fpconst.PosInf elif s[0:3] == "-inf": d = fpconst.NegInf else: d = float(s) if config.strict_range: if fpconst.isNaN(d): if s[0:2] != "nan": raise ValueError, "invalid %s: %s" % (t[1], s) elif fpconst.isNegInf(d): if s[0:3] != "-inf": raise UnderflowError, "%s too small: %s" % (t[1], s) elif fpconst.isPosInf(d): if s[0:2] != "inf" and s[0:3] != "+inf": raise OverflowError, "%s too large: %s" % (t[1], s) elif d < 0 and d < l[1]: raise UnderflowError, "%s too small: %s" % (t[1], s) elif d > 0 and (d < l[0] or d > l[2]): raise OverflowError, "%s too large: %s" % (t[1], s) elif d == 0: if type(self.zerofloatre) == StringType: self.zerofloatre = re.compile(self.zerofloatre) if self.zerofloatre.search(s): raise UnderflowError, "invalid %s: %s" % (t[1], s) return d if t[1] in ("dateTime", "date", "timeInstant", "time"): return self.convertDateTime(d, t[1]) if t[1] == "decimal": return float(d) if t[1] in ("language", "QName", "NOTATION", "NMTOKEN", "Name", "NCName", "ID", "IDREF", "ENTITY"): return collapseWhiteSpace(d) if t[1] in ("IDREFS", "ENTITIES", "NMTOKENS"): d = collapseWhiteSpace(d) return d.split() if t[0] in NS.XSD_L: if t[1] in ("base64", "base64Binary"): if d: return base64.decodestring(d) else: return "" if t[1] == "hexBinary": if d: return decodeHexString(d) else: return if t[1] == "anyURI": return urllib.unquote(collapseWhiteSpace(d)) if t[1] in ("normalizedString", "token"): return collapseWhiteSpace(d) if t[0] == NS.ENC: if t[1] == "base64": if d: return base64.decodestring(d) else: return "" if t[0] == NS.XSD: if t[1] == "binary": try: e = attrs[(None, "encoding")] if d: if e == "hex": return decodeHexString(d) elif e == "base64": return base64.decodestring(d) else: return "" except: pass raise Error, "unknown or missing binary encoding" if t[1] == "uri": return urllib.unquote(collapseWhiteSpace(d)) if t[1] == "recurringInstant": return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD2, NS.ENC): if t[1] == "uriReference": return urllib.unquote(collapseWhiteSpace(d)) if t[1] == "timePeriod": return self.convertDateTime(d, t[1]) if t[1] in ("century", "year"): return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD, NS.XSD2, NS.ENC): if t[1] == "timeDuration": return self.convertDateTime(d, t[1]) if t[0] == NS.XSD3: if t[1] == "anyURI": return urllib.unquote(collapseWhiteSpace(d)) if t[1] in ("gYearMonth", "gMonthDay"): return self.convertDateTime(d, t[1]) if t[1] == "gYear": return self.convertDateTime(d, t[1]) if t[1] == "gMonth": return self.convertDateTime(d, t[1]) if t[1] == "gDay": return self.convertDateTime(d, t[1]) if t[1] == "duration": return self.convertDateTime(d, t[1]) if t[0] in (NS.XSD2, NS.XSD3): if t[1] == "token": return collapseWhiteSpace(d) if t[1] == "recurringDate": return self.convertDateTime(d, t[1]) if t[1] == "month": return self.convertDateTime(d, t[1]) if t[1] == "recurringDay": return self.convertDateTime(d, t[1]) if t[0] == NS.XSD2: if t[1] == "CDATA": return collapseWhiteSpace(d) raise UnknownTypeError, "unknown type `%s'" % (str(t[0]) + ":" + t[1])