def _decode(self,json_response): obj = json.loads(json_response) # TimeStamp is always present obj['TimeStamp'] = duparse(obj['TimeStamp']) # Transaction response if 'Transactions' in obj: for t in obj['Transactions']: for key in ('InsDate', 'ClearanceDate'): if t[key]: t[key] = duparse(t['InsDate']) return obj
def _decode(self, json_response): obj = json.loads(json_response) # TimeStamp is always present obj['TimeStamp'] = duparse(obj['TimeStamp']) # Transaction response if 'Transactions' in obj: for t in obj['Transactions']: for key in ('InsDate', 'ClearanceDate'): if t[key]: t[key] = duparse(t['InsDate']) return obj
def parse_datetime(self, text): if text is None or not text.strip(): return None try: return duparse(text) except: return None
def convert_datetime(s): if s == '': raise ValueError try: return duparse(s) except TypeError: raise ValueError
def extract_member(m): """Extract Rows of MemberEvents and their associated fields from a gharchive.org event dict""" # Out members... out_m = { 'id': m['id'], 'type': 'MemberEvent', 'created_at': duparse(m['created_at']), 'public': m['public'] } actor = m['actor'] out_m['actor_id'] = actor['id'] out_m['actor_user_name'] = actor['login'] payload = m['payload'] out_m['action'] = payload['action'] member = payload['member'] out_m['member_id'] = member['id'] out_m['member_name'] = member['login'] out_m['site_admin'] = member['site_admin'] repo = m['repo'] out_m['repo_id'] = repo['id'] out_m['repo_name'] = repo['name'] return out_m
def extract_delete(d): """Extract Rows of DeleteEvents and their associated fields from a gharchive.org event dict""" # Out deletes... out_d = { 'id': d['id'], 'type': 'DeleteEvent', 'created_at': duparse(d['created_at']) } actor = d['actor'] out_d['actor_id'] = actor['id'] out_d['actor_user_name'] = actor['login'] repo = d['repo'] out_d['repo_id'] = repo['id'] out_d['repo_name'] = repo['name'] org = d['org'] if 'org' in d else {} out_d['org_id'] = org['id'] if 'id' in org else '' out_d['org_name'] = org['login'] if 'login' in org else '' out_d['public'] = d['public'] return Row(**out_d)
def parse_date_from_text(text): if text is None or not text.strip(): return None try: return duparse(text).date() except: return None
def extract_fork(f): """Extracts Rows of ForkEvents and their associated fields from a gharchive.org event dict""" # Out forks... out_f = { 'id': f['id'], 'created_at': duparse(f['created_at']), 'type': 'ForkEvent', 'public': f['public'] } actor = f['actor'] out_f['actor_user_id'] = actor['id'] out_f['actor_user_name'] = actor['login'] org = f['org'] if 'org' in f else {} out_f['from_org_id'] = org['id'] if 'id' in org else '' out_f['from_org_login'] = org['login'] if 'login' in org else '' repo = f['repo'] out_f['from_repo_id'] = repo['id'] out_f['from_repo_name'] = repo['name'] payload = f['payload'] forkee = payload['forkee'] owner = forkee['owner'] out_f['to_user_id'] = owner['id'] out_f['to_user_name'] = owner['login'] out_f['to_repo_created_at'] = duparse(forkee['created_at']) out_f['to_repo_updated_at'] = duparse(forkee['updated_at']) out_f['to_repo_pushed_at'] = duparse(forkee['pushed_at']) out_f['to_repo_size'] = forkee['size'] out_f['to_repo_stargazer_count'] = forkee['stargazers_count'] out_f['to_repo_watcher_count'] = forkee['watchers_count'] out_f['to_repo_forks_count'] = forkee['forks_count'] license = forkee['license'] if 'license' in forkee and isinstance(forkee['license'], dict) else {} out_f['to_license_key'] = license['key'] if 'key' in license else '' out_f['to_license_name'] = license['name'] if 'name' in license else '' return Row(**out_f)
def convert_datetime(s, date_formats=DATE_FORMATS, time_formats=TIME_FORMATS): if sys.version < '3.5': if duparse: try: dt = duparse(s) if dt.time(): return duparse(s) except TypeError: # parse may throw this in py3 raise ValueError for df in date_formats: for tf in time_formats: for sep in DATE_TIME_SEPS: f = '{0}{1}{2}'.format(df, sep, tf) try: return datetime.strptime(s, f) except ValueError: pass raise ValueError
def extract_issue(i): """Extract Rows of IssueEvents and their associated fields from a gharchive.org event dict""" # Out issues... out_i = { 'id': i['id'], 'type': 'IssuesEvent', 'created_at': duparse(i['created_at']), 'public': i['public'] } actor = i['actor'] out_i['actor_id'] = actor['id'] out_i['actor_user_name'] = actor['login'] repo = i['repo'] out_i['repo_id'] = repo['id'] out_i['repo_name'] = repo['name'] payload = i['payload'] out_i['action'] = payload['action'] issue = payload['issue'] out_i['assignee'] = issue['assignee'] out_i['assignees'] = issue['assignees'] out_i['body'] = issue['body'] out_i['closed_at'] = duparse(issue['closed_at']) if issue['closed_at'] else None out_i['comments'] = issue['comments'] out_i['issue_id'] = issue['id'] out_i['labels'] = issue['labels'] out_i['locked'] = issue['locked'] out_i['number'] = issue['number'] out_i['title'] = issue['title'] out_i['updated_at'] = issue['updated_at'] user = issue['user'] out_i['user_id'] = user['id'] out_i['user_name'] = user['login'] return Row(**out_i)
def convert_date(s, date_formats=DATE_FORMATS): if duparse: try: return duparse(s).date() except TypeError: # parse may throw this in py3 raise ValueError for f in date_formats: try: return datetime.strptime(s, f).date() except ValueError: pass raise ValueError
def extract_push(p): """Extracts Rows of PushEvents and their associated Commits from a gharchive.org event dict""" # Out pushes... out_p = { 'type': 'PushEvent', 'id': p['id'], 'created_at': duparse(p['created_at']), 'public': p['public'] } # Who pushed it? actor = p['actor'] out_p['actor_id'] = actor['id'] out_p['actor_user_name'] = actor['login'] # To what repo? repo = p['repo'] out_p['repo_id'] = repo['id'] out_p['repo_name'] = repo['name'] # What did they push? payload = p['payload'] out_p['push_id'] = payload['push_id'] out_p['push_size'] = payload['size'] out_p['push_ref'] = payload['ref'] out_p['push_head'] = payload['head'] out_p['push_before'] = payload['before'] # Out commits... out_cs = [] commits = payload['commits'] for c in commits: out_c = { 'type': 'Commit', 'sha': c['sha'], 'repo_id': out_p['repo_id'], 'repo_name': out_p['repo_name'], 'push_id': out_p['push_id'], 'actor_id': out_p['actor_id'], 'actor_user_name': out_p['actor_user_name'], 'author_name': c['author']['name'], 'url': c['url'], 'message': c['message'], 'push_created_at': out_p['created_at'], 'public': out_p['public'] } out_cs.append(Row(**out_c)) return [Row(**out_p)] + (out_cs)
def extract_create(c): """Extract Rows of CreateEvents and their associated fields from a gharchive.org event dict""" # Out creates... out_c = { 'id': c['id'], 'created_at': duparse(c['created_at']), 'type': 'CreateEvent', } actor = c['actor'] out_c['actor_id'] = actor['id'] out_c['actor_user_name'] = actor['login'] repo = c['repo'] out_c['repo_id'] = repo['id'] out_c['repo_name'] = repo['name'] out_c['public'] = c['public'] return Row(**out_c)
def check_jupyter(port=8888): now = datetime.datetime.utcnow().replace(tzinfo=dutz.tzutc()) # bah humbug # print("NOW:", now) recent = datetime.datetime(2021, 1, 6, tzinfo=dutz.tzutc()) r = requests.get(f"http://127.0.0.1:{port}/api/kernels") if r.status_code == 200: j = json.loads(r.content) # pprint(j) for k in j: if k['execution_state'] == 'busy': recent = now break last = duparse(k['last_activity']) # print("LAST:", last) if last > recent: recent = last seconds = (now - recent).total_seconds() # print(f"last activity {seconds} seconds ago") return seconds else: print("Error:", r.status_code, r.content) return False
update_file = 'last_update.txt' update_path = '/var/log/domotica' update_file_path = os.path.join(update_path, update_file) if os.path.isfile(update_file_path): with open(update_file_path, 'r') as f: last_update = f.read() else: # Just make some fake earlier date.. last_update = '1999-12-31 01:01:01.01+02:00' # And create the file.. with open(update_file_path, 'w') as f: f.write(last_update) # With this we parse the date that was found in the file last_update = duparse(last_update) # Here we loop over all the news sources and add all the items to ElasticSearch # But only if their publication date is after our latest update for news_source in news_source_list: # Keep track of items per news source new_items = 0 old_items = 0 news_content = res_text[news_source] i_content = news_content[0] for i_content in news_content: content_date = duparse(i_content['date']) if content_date > last_update: # A check to see if our date comparisson is correct
some_test('Melissa', ['content', 'date']) s_match[:10].execute() res = s.execute() for i in res: print(i) s = Search(using=es, index="dutch_news") max_count = s.count() res = s[0:max_count].execute() res.hits.total len(res.hits.hits) from dateutil.parser import parse as duparse res_time = [duparse(x['_source']['date']) for x in res.hits.hits] import numpy as np import matplotlib.pyplot as plt plt.plot(np.array(res_time)) plt.hist(res_time) import collections for i in collections.Counter([x.hour for x in res_time]).items(): print(i) z = np.array(list(collections.Counter([x.hour for x in res_time]).values())) z = np.array(list(collections.Counter([x.day for x in res_time]).values())) z = np.array(list(collections.Counter([x.month for x in res_time]).values())) plt.plot(z)
def extract_pull(p): """Extract Rows of PullRequestEvents and their associated fields from a gharchive.org event dict""" # Out pull requests... out_p = { 'id': p['id'], 'type': 'PullRequestEvent', 'created_at': duparse(p['created_at']), 'public': p['public'] } actor = p['actor'] out_p['actor_id'] = actor['id'] out_p['actor_user_name'] = actor['login'] org = p['org'] if 'org' in p else {} out_p['org_id'] = org['id'] if 'id' in org else None out_p['org_name'] = org['login'] if 'login' in org else None payload = p['payload'] out_p['action'] = payload['action'] out_p['number'] = payload['number'] pull_request = payload['pull_request'] out_p['additions'] = pull_request['additions'] out_p['assignee'] = pull_request['assignee'] out_p['assignees'] = pull_request['assignees'] out_p['author_association'] = pull_request['author_association'] base = pull_request['base'] out_p['base_label'] = base['label'] out_p['base_ref'] = base['ref'] base_repo = base['repo'] out_p['base_repo_created_at'] = duparse(base_repo['created_at']) out_p['base_repo_default_branch'] = base_repo['default_branch'] if 'default_branch' in base_repo else None out_p['base_repo_description'] = base_repo['description'] out_p['base_repo_fork'] = base_repo['fork'] out_p['base_repo_forks'] = base_repo['forks'] out_p['base_repo_full_name'] = base_repo['full_name'] out_p['base_repo_id'] = base_repo['id'] out_p['base_repo_language'] = base_repo['language'] license = base_repo['license'] if isinstance(base_repo['license'], dict) else {} out_p['base_repo_license_key'] = license['key'] if 'key' in license else None out_p['base_repo_license_name'] = license['name'] if 'name' in license else None out_p['base_repo_name'] = base_repo['name'] out_p['base_repo_open_issues'] = base_repo['open_issues'] owner = base_repo['owner'] out_p['base_repo_owner_id'] = owner['id'] out_p['base_repo_owner_user_name'] = owner['login'] out_p['base_repo_owner_site_admin'] = owner['site_admin'] out_p['base_repo_private'] = base_repo['private'] out_p['base_repo_pushed_at'] = duparse(base_repo['pushed_at']) out_p['base_repo_size'] = base_repo['size'] out_p['base_repo_stargazers_count'] = base_repo['stargazers_count'] out_p['base_repo_updated_at'] = duparse(base_repo['updated_at']) out_p['base_repo_watchers'] = base_repo['watchers'] out_p['base_sha'] = base['sha'] base_user = base['user'] out_p['base_user_id'] = base_user['id'] out_p['base_user_user_name'] = base_user['login'] out_p['base_user_site_admin'] = base_user['site_admin'] out_p['body'] = pull_request['body'] out_p['changed_files'] = pull_request['changed_files'] out_p['closed_at'] = duparse(pull_request['closed_at']) if pull_request['closed_at'] else None out_p['comments'] = pull_request['comments'] out_p['commits'] = pull_request['commits'] out_p['created_at'] = duparse(pull_request['created_at']) out_p['deletions'] = pull_request['deletions'] head = pull_request['head'] out_p['head_label'] = head['label'] out_p['head_ref'] = head['ref'] head_repo = head['repo'] if 'repo' in head and isinstance(head['repo'], dict) else {} out_p['head_repo_created_at'] = duparse(head_repo['created_at']) if 'created_at' in head_repo and head_repo['created_at'] else None out_p['head_repo_default_branch'] = head_repo['default_branch'] if 'default_branch' in head_repo else None out_p['head_repo_description'] = head_repo['description'] if 'description' in head_repo else None out_p['head_repo_fork'] = head_repo['fork'] if 'fork' in head_repo else None out_p['head_repo_forks'] = head_repo['forks'] if 'forks' in head_repo else None out_p['head_repo_full_name'] = head_repo['full_name'] if 'full_name' in head_repo else None out_p['head_repo_id'] = head_repo['id'] if 'id' in head_repo else None out_p['head_repo_language'] = head_repo['language'] if 'language' in head_repo else None out_p['head_repo_languages'] = head_repo['languages'] if 'languages' in head_repo else '' head_repo_license = head_repo['license'] if 'license' in head_repo and isinstance(head_repo['license'], dict) else {} out_p['head_repo_license_key'] = head_repo_license['key'] if 'key' in head_repo_license else None out_p['head_repo_license_name'] = head_repo_license['name'] if 'name' in head_repo_license else None out_p['head_repo_name'] = head_repo['name'] if 'name' in head_repo else None out_p['head_repo_open_issues'] = head_repo['open_issues'] if 'open_issues' in head_repo else None head_repo_owner = head_repo['owner'] if 'owner' in head_repo else {} out_p['head_repo_owner_id'] = head_repo_owner['id'] if 'id' in head_repo_owner else None out_p['head_repo_owner_user_name'] = head_repo_owner['login'] if 'login' in head_repo_owner else None out_p['head_repo_owner_site_admin'] = head_repo_owner['site_admin'] if 'site_admin' in head_repo_owner else None out_p['head_repo_private'] = head_repo['private'] if 'private' in head_repo else None out_p['head_repo_pushed_at'] = duparse(head_repo['pushed_at']) if 'pushed_at' in head_repo else None out_p['head_repo_size'] = head_repo['size'] if 'size' in head_repo else None out_p['head_repo_stargazers_count'] = head_repo['stargazers_count'] if 'stargazers_count' in head_repo else None out_p['head_repo_updated_at'] = duparse(head_repo['updated_at']) if 'updated_at' in head_repo else None out_p['head_repo_watchers'] = head_repo['watchers'] if 'watchers' in head_repo else None out_p['head_sha'] = head['sha'] head_user = head['user'] out_p['head_user_id'] = head_user['id'] out_p['head_user_name'] = head_user['login'] out_p['head_user_site_admin'] = head_user['site_admin'] out_p['id'] = pull_request['id'] # out_p['labels'] = pull_request['labels'] out_p['locked'] = pull_request['locked'] out_p['merge_commit_sha'] = pull_request['merge_commit_sha'] out_p['mergeable'] = pull_request['mergeable'] out_p['merged'] = pull_request['merged'] out_p['merged_at'] = duparse(pull_request['merged_at']) if 'merged_at' in pull_request and pull_request['merged_at'] else None out_p['merged_by'] = pull_request['merged_by'] out_p['milestone'] = pull_request['milestone'] out_p['number'] = pull_request['number'] out_p['rebaseable'] = pull_request['rebaseable'] out_p['requested_reviewers'] = pull_request['requested_reviewers'] out_p['requested_teams'] = pull_request['requested_teams'] out_p['review_comments'] = pull_request['review_comments'] out_p['state'] = pull_request['state'] out_p['title'] = pull_request['title'] out_p['updated_at'] = duparse(pull_request['updated_at']) if 'updated_at' in pull_request else None user = pull_request['user'] out_p['user_id'] = user['id'] out_p['user_name'] = user['login'] out_p['user_site_admin'] = user['site_admin'] out_p['public'] = p['public'] repo = p['repo'] out_p['repo_id'] = repo['id'] out_p['repo_name'] = repo['name'] return Row(**out_p)
def get_xmltv(): """ Download XMLTV url and store channels and programs in the database. :return: None :return: """ # http://wiki.xmltv.org/index.php/Main_Page/xmltvfileformat.html import urllib2 import gzip import StringIO import xmltv url = cfg.TVGURL # Download XMLTV source out_file_path = url.split("/")[-1][:-3] print('Downloading TV program from: {}'.format(url)) response = urllib2.urlopen(url) compressed_file = StringIO.StringIO(response.read()) decompressed_file = gzip.GzipFile(fileobj=compressed_file) # Extract XMLTV with open(out_file_path, 'w') as outfile: outfile.write(decompressed_file.read()) # Print XMLTV header xmltv_data = xmltv.read_data(open(out_file_path, 'r')) ic(xmltv_data) # Read xml channels xmlchannels = xmltv.read_channels(open(out_file_path, 'r')) print("Got {} channels from XMLTV source".format(len(xmlchannels))) # Drop content of XMLChannel XMLChannel.query.delete() db.session.commit() # Populate XMLChannel with channels from XMLTV source for xc in xmlchannels: xmlchannel = XMLChannel(id=int(xc['id']), label=xc['display-name'][0][0].strip()) db.session.add(xmlchannel) db.session.commit() programs = xmltv.read_programmes(open(out_file_path, 'r')) chunk = 1024 index = 0 for pr in programs: desc = "" try: desc = pr['desc'][0][0] except KeyError: pass a_category = Category.query.filter( Category.name == pr['category'][0][0]).first() if a_category: p = Program(channel=int(pr['channel']), title=pr['title'][0][0], start=duparse(pr['start']), stop=duparse(pr['stop']), desc=desc, category_id=a_category.id) db.session.add(p) else: py = Category(name=pr['category'][0][0]) Program(channel=int(pr['channel']), title=pr['title'][0][0], start=duparse(pr['start']), stop=duparse(pr['stop']), desc=desc, category=py) db.session.add(py) index += 1 if index % chunk == 0: db.session.commit() db.session.commit() categories = [x.name for x in Category.query.all()] ic(u', '.join(categories))
for pty in pts[:-1]: #last tty is ours # print ("PTY:", pty) proc = subprocess.Popen([ "docker", "exec", "-ti", ID, "stat", f"/dev/pts/{pty}" ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) lines = proc.stdout.read().strip().split(b"\n") for out in lines: out = out.decode() columns = out.split() # print ("COLS:", columns) if len(columns) == 4 and columns[0] in [ "Access:", "Modify:", "Change:" ]: t = duparse( f"{columns[1]} {columns[2]} {columns[3]}") # print ("STAT:", t, recent) if t > recent: print( f"tty activity {(now-t).total_seconds()} seconds ago" ) really_busy = True break if really_busy: break if really_busy: break # check for jupyter activity if juport: sec = check_jupyter(juport)