def date_pql_string(date): if date is None: return '_end == None' if date == '~': return '' before = lambda d: '_start <= %f' % dt2ts(d) after = lambda d: '(_end >= %f or _end == None)' % dt2ts(d) split = date.split('~') # replace all occurances of 'T' with ' ' # this is used for when datetime is passed in # like YYYY-MM-DDTHH:MM:SS instead of # YYYY-MM-DD HH:MM:SS as expected # and drop all occurances of 'timezone' like substring split = [re.sub('\+\d\d:\d\d', '', d.replace('T', ' ')) for d in split] if len(split) == 1: # 'dt' return '%s and %s' % (before(split[0]), after(split[0])) elif split[0] == '': # '~dt' return before(split[1]) elif split[1] == '': # 'dt~' return after(split[0]) else: # 'dt~dt' return '%s and %s' % (before(split[1]), after(split[0]))
def test_date_pql_string(): from metriqued.utils import date_pql_string as _ assert _(None) == '_end == None' assert _('~') == '' d1 = datetime(2000, 1, 1, 0, 0, 0) d1_ts = dt2ts(d1) ba = '_start <= %f and (_end >= %f or _end == None)' % (d1_ts, d1_ts) d1_str = str(d1) # test no T b/w date/time # test passing only a date (no ~ separator) assert _(d1_str) == ba d1_iso = d1.isoformat() # test with T b/w date/time # test 'before()' assert _('~%s' % d1_iso) == '_start <= %f' % d1_ts # test 'after()' d1_tz = d1.replace(tzinfo=pytz.UTC).isoformat() # test with timezone assert _('%s~' % d1_tz) == '(_end >= %f or _end == None)' % d1_ts d1_date = '2000-01-01' # without time assert _('~%s' % d1_date) == '_start <= %f' % d1_ts # test 'date~date' date range, passing in raw datetime objects d1 = datetime(2000, 1, 1, 0, 0, 0) d1_ts = dt2ts(d1) d2 = datetime(2000, 1, 2, 0, 0, 0) d2_ts = dt2ts(d2) ba = '_start <= %f and (_end >= %f or _end == None)' % (d2_ts, d1_ts) assert _('%s~%s' % (d1, d2)) == ba
def date_pql_string(date): ''' Generate a new pql date query component that can be used to query for date (range) specific data in cubes. :param date: metrique date (range) to apply to pql query If date is None, the resulting query will be a current value only query (_end == None) The tilde '~' symbol is used as a date range separated. A tilde by itself will mean 'all dates ranges possible' and will therefore search all objects irrelevant of it's _end date timestamp. A date on the left with a tilde but no date on the right will generate a query where the date range starts at the date provide and ends 'today'. ie, from date -> now. A date on the right with a tilde but no date on the left will generate a query where the date range starts from the first date available in the past (oldest) and ends on the date provided. ie, from beginning of known time -> date. A date on both the left and right will be a simple date range query where the date range starts from the date on the left and ends on the date on the right. ie, from date to date. ''' if date is None: return '_end == None' if date == '~': return '' before = lambda d: '_start <= %f' % dt2ts(d) after = lambda d: '(_end >= %f or _end == None)' % dt2ts(d) split = date.split('~') # replace all occurances of 'T' with ' ' # this is used for when datetime is passed in # like YYYY-MM-DDTHH:MM:SS instead of # YYYY-MM-DD HH:MM:SS as expected # and drop all occurances of 'timezone' like substring split = [re.sub('\+\d\d:\d\d', '', d.replace('T', ' ')) for d in split] if len(split) == 1: # 'dt' return '%s and %s' % (before(split[0]), after(split[0])) elif split[0] == '': # '~dt' return before(split[1]) elif split[1] == '': # 'dt~' return after(split[0]) else: # 'dt~dt' return '%s and %s' % (before(split[1]), after(split[0]))
def test_dt2ts(): ''' ''' from metriqueu.utils import dt2ts # FIXME: millisecond precision, better? now_time = int(time()) now_date = dt.utcfromtimestamp(now_time) now_date_iso = now_date.isoformat() assert dt2ts(now_time) == now_time assert dt2ts(now_date) == now_time assert dt2ts(now_date_iso) == now_time
def obj_hook(dct): _dct = {} for k, v in dct.items(): _k = str(k).replace('.', '_') if k == 'timestamp': try: # convert milliseconds to seconds v = v / 1000. if v else v except: # some cases timestamp is a datetime str v = dt2ts(v) if k == 'date': v = dt2ts(v) _dct[_k] = v return _dct
def _linreg_future(self, series, since, days=20): ''' Predicts future using linear regression. :param pandas.Series series: A series in which the values will be places. The index will not be touched. Only the values on dates > `since` will be predicted. :param datetime since: The starting date from which the future will be predicted. :param integer days: Specifies how many past days should be used in the linear regression. ''' last_days = pd.date_range(end=since, periods=days) hist = self.history(last_days) xi = np.array(map(dt2ts, hist.index)) A = np.array([xi, np.ones(len(hist))]) y = hist.values w = np.linalg.lstsq(A.T, y)[0] for d in series.index[series.index > since]: series[d] = w[0] * dt2ts(d) + w[1] series[d] = 0 if series[d] < 0 else series[d] return series
def json_encode(obj): ''' Convert datetime.datetime to timestamp ''' if isinstance(obj, datetime): return dt2ts(obj) else: return json_encoder.default(obj)
def json_encode(obj): """ Convert datetime.datetime to timestamp :param obj: value to (possibly) convert """ if isinstance(obj, datetime): return dt2ts(obj) else: return json_encoder.default(obj)
def test_query_add_date(): from metriqued.utils import query_add_date as _ d1 = datetime(2000, 1, 1, 0, 0, 0) d1_ts = dt2ts(d1) q = 'i_heart == "metrique"' _pql = '_start <= %f' % d1_ts assert _(q, '~') == q assert _(q, None) == '%s and _end == None' % q assert _(q, '~%s' % d1) == '%s and %s' % (q, _pql)
def obj_hook(dct): ''' JSON decoder. Converts the following: * 'timestamp' from epoch milliseconds to epoch seconds * 'date' strings to epoch seconds ''' _dct = {} for k, v in dct.items(): _k = str(k).replace('.', '_') if k == 'timestamp': try: # convert milliseconds to seconds v = v / 1000. if v else v except: # some cases timestamp is a datetime str v = dt2ts(v) if k == 'date': v = dt2ts(v) _dct[_k] = v return _dct
def get_objects(self): ''' Run `rpm -q` command on a {local, remote} system to get back details of installed RPMs. Default rpm details extracted are as follows: * name * version * release * arch * nvra * license * os * packager * platform * sourcepackage * sourcerpm * summary ''' fmt = ':::'.join('%%{%s}' % f for f in self.fields) if self.ssh_host: output = self._ssh_cmd(fmt) else: output = self._local_cmd(fmt) if isinstance(output, basestring): output = output.strip().split('\n') lines = [l.strip().split(':::') for l in output] now = dt2ts(datetime.now()) host = self.ssh_host or socket.gethostname() objects = [] for line in lines: obj = {'host': host, '_start': now} for i, item in enumerate(line): if item == '(none)': item = None obj[self.fields[i]] = item obj['_oid'] = '%s__%s' % (host, obj['nvra']) objects.append(obj) objects = self.normalize(objects) return objects
def _activity_import_doc(self, time_doc, activities): ''' Import activities for a single document into timeline. ''' batch_updates = [time_doc] # compare tz aware/naive depending if acts 'when' is tz_aware or not tz_aware = True if activities and activities[0][0].tzinfo else False # We want to consider only activities that happend before time_doc # do not move this, because time_doc._start changes # time_doc['_start'] is a timestamp, whereas act[0] is a datetime td_start = ts2dt(time_doc['_start'], tz_aware=tz_aware) activities = filter(lambda act: (act[0] < td_start and act[1] in time_doc), activities) incon_log_type = self.config.get('incon_log_type') creation_field = self.get_property('cfield') # make sure that activities are sorted by when descending activities.sort(reverse=True, key=lambda o: o[0]) new_doc = {} for when, field, removed, added in activities: when = dt2ts(when) last_doc = batch_updates.pop() # check if this activity happened at the same time as the last one, # if it did then we need to group them together if last_doc['_end'] == when: new_doc = deepcopy(last_doc) last_doc = batch_updates.pop() else: new_doc = deepcopy(last_doc) new_doc['_start'] = when new_doc['_end'] = when last_doc['_start'] = when last_val = last_doc[field] # FIXME: pass in field and call _type() within _activity_backwards? # for added/removed? new_val, inconsistent = self._activity_backwards(new_doc[field], removed, added) new_doc[field] = new_val # Check if the object has the correct field value. if inconsistent: self._log_inconsistency(last_doc, last_val, field, removed, added, when, incon_log_type) new_doc.setdefault('_corrupted', {}) # set curreupted field value to the the value that was added # and continue processing as if that issue didn't exist new_doc['_corrupted'][field] = added # Add the objects to the batch batch_updates.extend([last_doc, new_doc]) # try to set the _start of the first version to the creation time try: # set start to creation time if available last_doc = batch_updates[-1] if creation_field: creation_ts = dt2ts(last_doc[creation_field]) if creation_ts < last_doc['_start']: last_doc['_start'] = creation_ts elif len(batch_updates) == 1: # we have only one version, that we did not change return [] except Exception as e: logger.error('Error updating creation time; %s' % e) return batch_updates
def _activity_import_doc(self, time_doc, activities): ''' Import activities for a single document into timeline. ''' batch_updates = [time_doc] # We want to consider only activities that happend before time_doc # do not move this, because time_doc._start changes # time_doc['_start'] is a timestamp, whereas act[0] is a datetime td_start = time_doc['_start'] = ts2dt(time_doc['_start']) activities = filter(lambda act: (act[0] < td_start and act[1] in time_doc), activities) # make sure that activities are sorted by when descending activities = sorted(activities, reverse=True) for when, field, removed, added in activities: removed = dt2ts(removed) if isinstance(removed, datetime) else removed added = dt2ts(added) if isinstance(added, datetime) else added last_doc = batch_updates.pop() # check if this activity happened at the same time as the last one, # if it did then we need to group them together if last_doc['_end'] == when: new_doc = last_doc last_doc = batch_updates.pop() else: new_doc = deepcopy(last_doc) new_doc.pop('_id') if '_id' in new_doc else None new_doc['_start'] = when new_doc['_end'] = when last_doc['_start'] = when last_val = last_doc[field] new_val, inconsistent = _activity_backwards(new_doc[field], removed, added) new_doc[field] = new_val # Check if the object has the correct field value. if inconsistent: incon = {'oid': last_doc['_oid'], 'field': field, 'removed': removed, 'removed_type': str(type(removed)), 'added': added, 'added_type': str(type(added)), 'last_val': last_val, 'last_val_type': str(type(last_val))} self.logger.error(json.dumps(incon)) if '_corrupted' not in new_doc: new_doc['_corrupted'] = {} new_doc['_corrupted'][field] = added # Add the objects to the batch batch_updates.append(last_doc) batch_updates.append(new_doc) # try to set the _start of the first version to the creation time try: # set start to creation time if available last_doc = batch_updates[-1] creation_field = self.get_property('cfield') creation_ts = ts2dt(last_doc[creation_field]) if creation_ts < last_doc['_start']: last_doc['_start'] = creation_ts elif len(batch_updates) == 1: # we have only one version, that we did not change return [] except Exception as e: self.logger.error('Error updating creation time; %s' % e) return batch_updates
def save_objects(self, owner, cube, objects, mtime=None): ''' :param str owner: target owner's cube :param str cube: target cube (collection) to save objects to :param list objects: list of dictionary-like objects to be stored :param datetime mtime: datetime to apply as mtime for objects :rtype: list - list of object ids saved Get a list of dictionary objects from client and insert or save them to the timeline. Apply the given mtime to all objects or apply utcnow(). _mtime is used to support timebased 'delta' updates. ''' self.cube_exists(owner, cube) self.requires_owner_write(owner, cube) mtime = dt2ts(mtime) if mtime else utcnow() current_mtime = self.get_cube_last_start(owner, cube) if current_mtime and mtime and current_mtime > mtime: # don't fail, but make sure the issue is logged! # likely, a ntp time sync is required logger.warn( "object mtime is < server mtime; %s < %s; " % (mtime, current_mtime)) _cube = self.timeline(owner, cube, admin=True) olen_r = len(objects) logger.debug('[%s.%s] Recieved %s objects' % (owner, cube, olen_r)) objects = self.prepare_objects(_cube, objects, mtime) logger.debug('[%s.%s] %s objects match their current version in db' % ( owner, cube, olen_r - len(objects))) if not objects: logger.debug('[%s.%s] No NEW objects to save' % (owner, cube)) return [] else: logger.debug('[%s.%s] Saving %s objects' % (owner, cube, len(objects))) # End the most recent versions in the db of those objects that # have newer versionsi (newest version must have _end == None, # activity import saves objects for which this might not be true): to_snap = dict([(o['_oid'], o['_start']) for o in objects if o['_end'] is None]) if to_snap: db_versions = _cube.find({'_oid': {'$in': to_snap.keys()}, '_end': None}, fields={'_id': 1, '_oid': 1}) snapped = 0 for doc in db_versions: _cube.update({'_id': doc['_id']}, {'$set': {'_end': to_snap[doc['_oid']]}}, multi=False) snapped += 1 logger.debug('[%s.%s] Updated %s OLD versions' % (owner, cube, snapped)) # Insert all new versions: insert_bulk(_cube, objects) logger.debug('[%s.%s] Saved %s NEW versions' % (owner, cube, len(objects))) # return object ids saved return [o['_oid'] for o in objects]
def get_objects(self, repo_fullname=DEFAULT_REPO, since=None): ''' Given valid github credentials and a repository name, generate a list of github issue objects for all existing issues in the repository. All issues are returned, including open and closed. :param repo_fullname: github repository name (ie, 'user/repo') :param since: dateonly return issues updated since date An example repo_fullname is 'kejbaly2/metrique'. Issue objects contain the following properties: * _oid (issue id) * assignee * body * closed_at * closed_by * created_at * labels * milestone * name * number * repo url * state * title * updated_at * full github url * user (reported by) ''' repo_fullname = repo_fullname repo = self.proxy.get_repo(repo_fullname) if not repo: raise ValueError("invalid repo: %s" % repo) if isinstance(since, basestring): since = dt_parse(since) if since: _open = repo.get_issues(since=since) _closed = repo.get_issues(state='closed', since=since) else: _open = repo.get_issues() _closed = repo.get_issues(state='closed') objects = [] for i in chain(_open, _closed): obj = { '_oid': i.id, 'assignee': getattr(i.assignee, 'login', None), 'body': i.body, 'closed_at': dt2ts(i.closed_at), 'closed_by': getattr(i.closed_by, 'login', None), 'created_at': dt2ts(i.created_at), 'labels': [l.name for l in i.labels], 'milestone': getattr(i.milestone, 'title', None), 'name': repo_fullname, 'number': i.number, 'repo': i.repository.url, 'state': i.state, 'title': i.title, 'updated_at': dt2ts(i.updated_at), 'url': i.url, 'user': i.user.name, } objects.append(obj) break objects = self.normalize(objects) return objects