def filter_by_date_interval(self, start_time, end_time): return ASADBDataSet( [ row for row in self.data if parse_date(start_time) <= parse_date(row[5]) < parse_date(end_time) ] ) # and parse_date(start_time) < parse_date(row[6]) < parse_date(end_time)
def read_admins(rng, sheet_id, sheet_api): """ Reads in admin data from the specified range and google sheet and returns a dictionary of admins :param rng: Range to be read from sheet_id :param sheet_id: Google sheet_id for the sheet to be read :param sheet_api: Google sheets API used to read the sheet :return: admin dictionary with (k,v) = (id, Admin() object) """ admins = {} admin_emails = {} data = get_range(rng, sheet_id, sheet_api) for admin in data[1:]: try: emails = [email.lower() for email in admin[6:9]] name = admin[1] org = admin[0] check_in = util.parse_date(admin[4]) last_contact = util.parse_date(admin[5]) new = Admin(name, org, last_contact, check_in, emails) admins[new.id] = new for email in emails: admin_emails[email] = new.id except IndexError: print "The following admin does not have a complete set of data on the " + rng + " tab." \ "This is likely the result of a missing phone number. " \ "Please update the member and rerun the script\n" + admin exit() return admins, admin_emails
def comp_data_list(a, b): self.assertEqual(len(a), len(b)) for x, y in zip(a, b): self.assertEqual(parse_date(x["date"]), parse_date(y["date"])) self.assertAlmostEqual(float(x["value_min"]), float(y["value_min"])) self.assertAlmostEqual(float(x["value_avg"]), float(y["value_avg"])) self.assertAlmostEqual(float(x["value_max"]), float(y["value_max"]))
def get_menus(self, page: html.Element, location: str): # initialize empty dictionary menus: Dict[date, Menu] = dict() # convert passed date to string # get all available daily menus daily_menus: html.Element = self.__get_daily_menus_as_html(page) # iterate through daily menus for daily_menu in daily_menus: # get html representation of current menu menu_html = html.fromstring(html.tostring(daily_menu)) # get the date of the current menu; some string modifications are necessary current_menu_date_str = menu_html.xpath("//strong/text()")[0] # parse date try: current_menu_date: date = util.parse_date( current_menu_date_str) except ValueError: print( "Warning: Error during parsing date from html page. Problematic date: %s" % current_menu_date_str) # continue and parse subsequent menus continue # parse dishes of current menu dishes: List[Dish] = self.__parse_dishes(menu_html, location) # create menu object menu: Menu = Menu(current_menu_date, dishes) # add menu object to dictionary using the date as key menus[current_menu_date] = menu # return the menu for the requested date; if no menu exists, None is returned return menus
def get_perims(self, fire): """! Return perimeters that match a specific fire @param self Pointer to this @param fire Fire to find perimeters for @return Dictionary of dates to perimeters """ if not self.by_fire.has_key(fire): shps = {'by_date': {}} max_date = None by_date = shps['by_date'] for s in self.all_shps: if fire in s: if "_F_" in s: shps['Final'] = s cur_date = parse_date(s, self.year) # HACK: import here to avoid dependency loop from gis import GetFeatureCount # don't use if there aren't any features in the file if cur_date and 0 < GetFeatureCount(s): by_date[cur_date] = s if not max_date or cur_date > max_date: max_date = cur_date if 0 == len(by_date): shps = None elif not shps.has_key('Final'): shps['Final'] = by_date[max_date] self.by_fire[fire] = shps return self.by_fire[fire]
def __init__(self, message): """ :param message: GMail message used to build the Message object. """ self.thread_id = message['X-GM-THRID'] if self.thread_id is None: self.thread_id = "" # TODO Log? self.to = message['To'] if self.to is None: self.to = "" subject = message['Subject'] if subject is None: self.subject = "" else: # Remove whitespace which can appear in messages with a long list of labels. self.subject = subject.replace('\n', '').replace('\r', '') from_address = message['From'] if from_address is None: self.from_address = "" elif '<' not in from_address or '>' not in from_address: self.from_address = from_address.lower() else: # Extract email address from string containing "<email>" self.from_address = from_address[from_address.find("<") + 1:from_address.find(">")].lower() self.labels = message['X-Gmail-Labels'] date = message['Date'] if date is not None: self.date = util.parse_date(date)
def timetree(tree=None, aln=None, seq_meta=None, keeproot=False, confidence=False, resolve_polytomies=True, max_iter=2, infer_gtr=True, Tc=0.01, reroot='best', use_marginal=False, **kwarks): from treetime import TreeTime dates = {} for name, data in seq_meta.items(): num_date = parse_date(data["date"], date_fmt) if num_date is not None: dates[name] = num_date tt = TreeTime(tree=tree, aln=aln, dates=dates, gtr='JC69') if confidence and use_marginal: # estimate confidence intervals via marginal ML and assign marginal ML times to nodes marginal = 'assign' else: marginal = confidence tt.run(infer_gtr=infer_gtr, root=reroot, Tc=Tc, time_marginal=marginal, resolve_polytomies=resolve_polytomies, max_iter=max_iter, **kwarks) for n in T.find_clades(): n.num_date = n.numdate # treetime convention is different from augur... # get 90% max posterior region) if confidence: n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9)) return tt
def log_file(): analysis = request.args.get('analysis') start_date = request.args.get('start-date') end_date = request.args.get('end-date') # Parameter try: start_date = parse_date(start_date) except: return ("ERROR: Start date can't be parsed by YYYY-MM-DD format.", 400) try: end_date = parse_date(end_date) except: return ("ERROR: End date can't be parsed by YYYY-MM-DD format.", 400) # Validate if start_date > end_date: return ("ERROR: Start date can't be ahead of the end date.", 400) # Logic log_file = cli.generate_log_file(start_date, end_date) if analysis is None or analysis == 'summary': return jsonify(read_csv_file(cli.generate_summary_file(log_file))) elif analysis == 'revision': return jsonify(read_csv_file(cli.generate_revision_file(log_file))) elif analysis == 'coupling': return jsonify(read_csv_file(cli.generate_coupling_file(log_file))) elif analysis == 'age': return jsonify(read_csv_file(cli.generate_age_file(log_file))) elif analysis == 'abs-churn': return jsonify( read_csv_file(cli.generate_absolute_churn_file(log_file))) elif analysis == 'author-churn': return jsonify(read_csv_file(cli.generate_author_churn_file(log_file))) elif analysis == 'entity-churn': return jsonify(read_csv_file(cli.generate_entity_churn_file(log_file))) elif analysis == 'entity-ownership': return jsonify( read_csv_file(cli.generate_entity_ownership_file(log_file))) elif analysis == 'entity-effort': return jsonify(read_csv_file( cli.generate_entity_effort_file(log_file))) else: return ("ERROR: Analysis type not in selection.", 400)
def parse_date(self, datestring): """Date is a parsable string and we need to return a date object using the datestring and the year from the statement pdf. If we're in a December PDF and we see a January date, it's the next year. If we're in January and we see a December date, it's the previous year. """ d = u.parse_date(datestring) if self.statement.month == 12 and d.month == 1: return u.parse_date( "%s-%s" % (self.statement.year + 1, d.strftime("%m-%d")) ) if self.statement.month == 1 and d.month == 12: return u.parse_date( "%s-%s" % (self.statement.year - 1, d.strftime("%m-%d")) ) return u.parse_date("%s-%s" % (self.statement.year, d.strftime("%m-%d")))
def get_gps(self, id): streams = self._client.get_streams(id) activity = self.get_activity(int(id)) start_time = int(parse_date(activity['start_date']).timestamp()) streams = zip(*(streams[key]['data'] if key in streams else [] for key in ('time', 'latlng', 'altitude'))) return activity, [(time + start_time, point, altitude) for time, point, altitude in streams]
def timetree(tree=None, aln=None, ref=None, seq_meta=None, keeproot=False, confidence=False, resolve_polytomies=True, max_iter=2, dateLimits=None, infer_gtr=True, Tc=0.01, reroot='best', use_marginal=False, **kwarks): from treetime import TreeTime dL_int = None if dateLimits: dL_int = [int(x) for x in dateLimits] dL_int.sort() dates = {} for name, data in seq_meta.items(): num_date = parse_date(data["date"], date_fmt, dL_int) if num_date is not None: dates[name] = num_date #send ref, if is None, does no harm tt = TreeTime(tree=tree, aln=aln, ref=ref, dates=dates, gtr='JC69') if confidence and use_marginal: # estimate confidence intervals via marginal ML and assign marginal ML times to nodes marginal = 'assign' else: marginal = confidence #Length of VCF files means GTR model with gaps causes overestimation of mutation TO gaps #so gaps appear in internal nodes when no gaps at tips! To prevent.... pi = None if ref != None: #if VCF, fix pi pi = np.array([0.1618, 0.3188, 0.3176, 0.1618, 0.04]) #from real runs (Walker 2018) tt.run(infer_gtr=infer_gtr, root=reroot, Tc=Tc, time_marginal=marginal, resolve_polytomies=resolve_polytomies, max_iter=max_iter, fixed_pi=pi, **kwarks) for n in T.find_clades(): n.num_date = n.numdate # treetime convention is different from augur... # get 90% max posterior region) if confidence: n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9)) return tt
def main(): # get command line args args = cli.parse_cli_args() # print canteens if args.locations: with open("canteens.json", 'r') as canteens: print(json.dumps(json.load(canteens))) return # get location from args location = args.location # get required parser parser = get_menu_parsing_strategy(location) if parser is None: print("The selected location '%s' does not exist." % location) # parse menu menus = parser.parse(location) # if date has been explicitly specified, try to parse it menu_date = None if args.date is not None: try: menu_date = util.parse_date(args.date) except ValueError as e: print("Error during parsing date from command line: %s" % args.date) print("Required format: %s" % util.cli_date_format) return # print menu if menus is None: print("Error. Could not retrieve menu(s)") # jsonify argument is set elif args.jsonify is not None: weeks = Week.to_weeks(menus) if not os.path.exists(args.jsonify): os.makedirs(args.jsonify) jsonify(weeks, args.jsonify, location, args.combine) elif args.openmensa is not None: weeks = Week.to_weeks(menus) if not os.path.exists(args.openmensa): os.makedirs(args.openmensa) openmensa(weeks, args.openmensa) # date argument is set elif args.date is not None: if menu_date not in menus: print("There is no menu for '%s' on %s!" % (location, menu_date)) return menu = menus[menu_date] print(menu) # else, print weeks else: weeks = Week.to_weeks(menus) for calendar_week in weeks: print(weeks[calendar_week])
def from_dict(cls, dct): # type: (dict) -> ScheduleHashList """ :param dct: :return: """ return cls([ ScheduleHash.from_dict(dct[station_id][schedule_date], station_id, parse_date(schedule_date)) for station_id in dct for schedule_date in dct[station_id] ])
def read_members(rng, sheet_id, sheet_api, header_index, short_name_range): """ Reads in member data from the specified range and google sheet and returns a dictionary of members and statistic labels that will be counted :param rng: Range to be read from sheet_id :param sheet_id: Google sheet_id for the sheet to be read :param sheet_api: Google sheets API used to read the sheet :param header_index: Start index (inclusive) of values that will be returned from header :param short_name_range: Named Range in Retention sheet for short names. If mem_stats_sheet does not contain a name in this range it will be added to the mem_stats_sheet :return: member dictionary with (k,v) = (name, Member() object), header[header_index:] from rng """ data = get_range(rng, sheet_id, sheet_api) stat_header = data[0][header_index:] members = {} for member in data[1:]: try: name = member[0] contact = util.parse_date(member[1]) check_in = util.parse_date(member[2]) mem_stats = member[3:] parsed_stats = [] for s in mem_stats: parsed_stats.append(int(s)) members[name] = Member(name, contact, check_in, parsed_stats) except IndexError: print "The following member does not have a complete set of data on the " + rng + " tab." \ "Please update the member and rerun the script\n" + member exit() # Add any members not in the sheet for member in get_range(short_name_range, sheet_id, sheet_api): if member[0] not in members.keys(): # If the member is listed on the Retention tab but not Member Stats tab add a row of blank info members[member[0]] = Member(member[0], None, None, [0] * len(stat_header)) return members, stat_header
def parse(self): """The account summary is in two halves. The left side is payment info and the right is account summary. Get it all. Not every line has entries in both columns. Every statement should have an account summary. """ if not self.text: self.err("No lines to parse for block %s" % self.name) lines = self.text.split("\n") for i in range(len(lines)): line = lines[i] parts = rx["two_blanks"].split(line.strip()) if len(parts) >= 2: for k in [ "Available Credit", "Cash Advances", "Credit Limit", "Fees Charged", "Interest Charged", "New Balance", "Other Credits", "Previous Balance", "Payments", "Transactions", ]: if parts[-2].startswith(k): if parts[-1][0] in "-+=": parts[-1] = parts[-1][1:].strip() if parts[-1][0] == "$": parts[-1] = u.parse_money(parts[-1]) self[k] = parts[-1] if parts[0] == "Payment Due Date": self["Payment Due Date"] = u.parse_date( rx["two_blanks"].split(lines[i + 2].strip())[0] ) if parts[0] == "New Balance" and parts[1] == "Minimum Payment Due": parts_next = rx["two_blanks"].split(lines[i + 1].strip()) self["New Balance Left"] = u.parse_money(parts_next[0]) self["Minimum Payment Due"] = u.parse_money(parts_next[1])
def parse(self): sh = io.StringIO(self.text) fieldnames = ["date", "narration", "category", "amount"] reader = csv.DictReader(sh, fieldnames) rows = list(reader)[5:] for row in rows: row["category"] = row["category"].lower() while " " in row["narration"]: row["narration"] = row["narration"].replace(" ", " ") # Make tx tx = Transaction() tx["category"] = row["category"] tx["date"] = u.parse_date(row["date"]) tx["amount"] = u.parse_money(row["amount"]) tx["note"] = row["narration"] self.append(tx)
def parse_transation_info(userspeak,cut_text): """extract trading informtion from message Returns: tuple:(result,info) """ product_type_no = parse_product(cut_text) if isinstance(product_type_no, list): return ("items",product_type_no) if isinstance(product_type_no, tuple): product_type, product_no = product_type_no y,m,d = parse_date(userspeak.replace(" ","")) if m <=9: m = f"0{m}" if d <=9: d = f"0{d}" date = f"{y}/{m}/{d}" market_name = parse_market(cut_text,product_type) if not market_name: market_name = "台北一" market_no = MARKET_NO_NAME[product_type][market_name] #搜尋 content = get_transaction_info(date,type_= product_type,market_no=market_no,product_no=product_no) if isinstance(content, str): #send_text_message(uid,content) return ("fail",content) output = f"日期:{date}\n市場:{market_name}\n" for item in content: output += f'{item[0]}:{item[1]}\n' return ("success",output)
def favorites_received(message): global retweets data = json.loads(message.body) for tweet in data['favorites']: if tweet['id'] in retweets: continue created_at = parse_date(tweet['created_at']) if created_at <= aday: # Retweet is recent continue if tweet['user']['id'] not in friends: # Tweet is not by an account we follow continue if tweet['text'][0] == '@': # Tweet is a reply # Of course there is: tweet['in_reply_to_screen_name'] but a reply may be a valid candidate for a retweet, as # an old school RT creates this type of tweet continue # TODO: make this list persistent retweets.append(tweet['id']) EventBus.send('log.event', "retweet.create") EventBus.send("retweet.create", tweet['id'])
def static_file(filename, root, guessmime=True, mimetype=None, download=False): root = os.path.abspath(root) + os.sep filename = os.path.abspath(os.path.join(root, filename.strip('/\\'))) header = dict() if not filename.startswith(root): return HTTPError(403, "Access denied.") if not os.path.exists(filename) or not os.path.isfile(filename): return HTTPError(404, "File does not exist.") if not os.access(filename, os.R_OK): return HTTPError(403, "You do not have permission to access this file.") if not mimetype and guessmime: header['Content-Type'] = mimetypes.guess_type(filename)[0] else: header['Content-Type'] = mimetype if mimetype else 'text/plain' if download == True: download = os.path.basename(filename) if download: header['Content-Disposition'] = 'attachment; filename="%s"' % download stats = os.stat(filename) lm = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(stats.st_mtime)) header['Last-Modified'] = lm ims = request.environ.get('HTTP_IF_MODIFIED_SINCE') if ims: ims = ims.split(";")[0].strip() ims = parse_date(ims) if ims is not None and ims >= int(stats.st_mtime): header['Date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) return HTTPResponse(status=304, header=header) header['Content-Length'] = stats.st_size if request.method == 'HEAD': return HTTPResponse('', header=header) else: return HTTPResponse(open(filename, 'rb'), header=header)
def from_dict(cls, dct): # type: (dict) -> ProgramEventDetails """ :param dct: :return: """ ped = cls() if "venue100" in dct: ped.venue = dct.pop("venue100") if "gameDate" in dct: ped.game_date = parse_date(dct.pop("gameDate")) if "teams" in dct: for team in dct.pop("teams"): ped.teams.append(EventTeam.from_dict(team)) if len(dct) != 0: logging.warn("Key(s) not processed for ProgramEventDetails: %s", ", ".join(dct.keys())) return ped
def from_dict(cls, dct): # type: (dict) -> ScheduleMetadata """ :param dct: :type dct: dict :return: :rtype: ScheduleMetadata """ schedule_metadata = cls() schedule_metadata.modified = parse_datetime(dct.pop("modified")) schedule_metadata.md5 = dct.pop("md5") schedule_metadata.start_date = parse_date(dct.pop("startDate")) # optional if "code" in dct: schedule_metadata.code = dct.pop("code") if len(dct) != 0: logging.warn("Key(s) not processed for ScheduleMetadata: %s", ", ".join(dct.keys())) return schedule_metadata
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of :param tree: Tree object or hex or bin sha the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: Optional Commit objects to use as parents for the new commit. If empty list, the commit will have no parents at all and become a root commit. If None , the current head commit will be the parent of the new commit object :param head: If True, the HEAD will be advanced to the new commit automatically. Else the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. :return: Commit object representing the new commit :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for more information""" parents = parent_commits if parent_commits is None: try: parent_commits = [repo.head.commit] except ValueError: # empty repositories have no head commit parent_commits = list() # END handle parent commits # END if parent commits are unset # retrieve all additional information, create a commit object, and # serialize it # Generally: # * Environment variables override configuration values # * Sensible defaults are set according to the git documentation # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = os.environ default_email = get_user_id() default_name = default_email.split('@')[0] conf_name = cr.get_value('user', cls.conf_name, default_name) conf_email = cr.get_value('user', cls.conf_email, default_email) author_name = env.get(cls.env_author_name, conf_name) author_email = env.get(cls.env_author_email, default_email) committer_name = env.get(cls.env_committer_name, conf_name) committer_email = env.get(cls.env_committer_email, conf_email) # PARSE THE DATES unix_time = int(time()) offset = altzone author_date_str = env.get(cls.env_author_date, '') if author_date_str: author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if committer_date_str: committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) author = Actor(author_name, author_email) committer = Actor(committer_name, committer_email) # if the tree is no object, make sure we create one - otherwise # the created commit object is invalid if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion # CREATE NEW COMMIT new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) stream = StringIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) new_commit.binsha = istream.binsha if head: try: repo.head.commit = new_commit except ValueError: # head is not yet set to the ref our HEAD points to # Happens on first commit import git.refs master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit) repo.head.reference = master # END handle empty repositories # END advance head handling return new_commit
def sum_timedeltas(data): summed_deltas = timedelta() for row in data: summed_deltas += parse_date(row[6]) - parse_date(row[5]) return summed_deltas
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of :param tree: Tree object or hex or bin sha the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: Optional Commit objects to use as parents for the new commit. If empty list, the commit will have no parents at all and become a root commit. If None , the current head commit will be the parent of the new commit object :param head: If True, the HEAD will be advanced to the new commit automatically. Else the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. :return: Commit object representing the new commit :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for more information""" parents = parent_commits if parent_commits is None: try: parent_commits = [ repo.head.commit ] except ValueError: # empty repositories have no head commit parent_commits = list() # END handle parent commits # END if parent commits are unset # retrieve all additional information, create a commit object, and # serialize it # Generally: # * Environment variables override configuration values # * Sensible defaults are set according to the git documentation # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = os.environ committer = Actor.committer(cr) author = Actor.author(cr) # PARSE THE DATES unix_time = int(time()) offset = altzone author_date_str = env.get(cls.env_author_date, '') if author_date_str: author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if committer_date_str: committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) # if the tree is no object, make sure we create one - otherwise # the created commit object is invalid if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion # CREATE NEW COMMIT new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) stream = StringIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) new_commit.binsha = istream.binsha if head: # need late import here, importing git at the very beginning throws # as well ... import git.refs try: repo.head.set_commit(new_commit, logmsg="commit: %s" % message) except ValueError: # head is not yet set to the ref our HEAD points to # Happens on first commit import git.refs master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) # END handle empty repositories # END advance head handling return new_commit
def _parse_date(self, value): return util.parse_date( value['start_date' if self._utc else 'start_date_local'])
def _get_latest_timestamp(self, activities): if not activities: return 0 max_date = max([parse_date(a['start_date']) for a in activities]) return int(max_date.timestamp())
def from_dict(cls, dct): # type: (dict) -> ScheduleHashList """ :param dct: :return: """ return cls([ScheduleHash.from_dict(dct[station_id][schedule_date], station_id, parse_date(schedule_date)) for station_id in dct for schedule_date in dct[station_id]])
def from_dict(dct): # type: (dict) -> Program """ :param dct: :return: """ if "programID" not in dct or "md5" not in dct: return dct program = Program() program.program_id = dct.pop("programID") if program.program_id[:2] == "EP": program.episode_num = int(program.program_id[-4:]) program.titles = ProgramTitles.from_iterable(dct.pop("titles")) program.md5 = dct.pop("md5") if "eventDetails" in dct: program.event_details = ProgramEventDetails.from_dict( dct.pop("eventDetails")) if "descriptions" in dct: program.descriptions = ProgramDescriptionList.from_dict( dct.pop("descriptions")) if "originalAirDate" in dct: program.original_air_date = parse_date(dct.pop("originalAirDate")) if "genres" in dct: program.genres = dct.pop("genres") if "episodeTitle150" in dct: program.episode_title = dct.pop("episodeTitle150") if "metadata" in dct: program.metadata = ProgramMetadata.from_iterable( dct.pop("metadata")) if "cast" in dct: program.cast = ProgramCast.from_iterable(dct.pop("cast")) if "crew" in dct: program.crew = ProgramCrew.from_iterable(dct.pop("crew")) if "showType" in dct: program.show_type = dct.pop("showType") if "hasImageArtwork" in dct: program.has_image_artwork = dct.pop("hasImageArtwork") if "contentRating" in dct: program.content_ratings = ProgramContentRating.from_iterable( dct.pop("contentRating")) if "contentAdvisory" in dct: program.content_advisories = dct.pop("contentAdvisory") if "recommendations" in dct: program.recommendations = ProgramRecommendation.from_iterable( dct.pop("recommendations")) if "movie" in dct: program.movie = ProgramMovie.from_dict(dct.pop("movie")) if "animation" in dct: program.animation = dct.pop("animation") if "audience" in dct: program.audience = dct.pop("audience") if "holiday" in dct: program.holiday = dct.pop("holiday") if "keyWords" in dct: program.keywords = ProgramKeywords.from_dict(dct.pop("keyWords")) if "officialURL" in dct: program.official_url = dct.pop("officialURL") if "entityType" in dct: program.entity_type = dct.pop("entityType") if "resourceID" in dct: program.resource_id = dct.pop("resourceID") if "episodeImage" in dct: program.episode_image = Image.from_dict(dct.pop("episodeImage")) if "duration" in dct: program.duration = dct.pop("duration") if "awards" in dct: program.awards = ProgramAward.from_iterable(dct.pop("awards")) if len(dct) != 0: logging.warn("Key(s) not processed for Program: %s", ", ".join(dct.keys())) return program
def filter_by_date_interval(self, start_time, end_time): return ASADBDataSet([row for row in self.data if parse_date(start_time) <= parse_date(row[5]) < parse_date(end_time)]) # and parse_date(start_time) < parse_date(row[6]) < parse_date(end_time)
def __init__(self, name, db_dataset, xls_data, date, questionnaire_data): """ Combines the data from the database with that obtained from the xls file, to create an instance of ASAExperimentGroup """ self.name = name self.users = set() self.tasks = set() self.timespans = {} # {(user,task): (start_time,end_time)} self.task_durations = [] # (user, task, duration) self.activity_times = [] # (user, activity_type, duration) self.transposed_activity_times = [ ] # (user, activity1, activity2, activity3, ...) self.db_dataset = db_dataset # [[id, resource_type, resource_id, operation, username, time_started, time_ended]] self.questionnaire_data = questionnaire_data # process XLS data group_end_time = max([row[3] for row in xls_data]) for row in xls_data: self.users.add(row[0]) self.tasks.add(row[1]) assert ( not (row[0], row[1]) in self.timespans ) # finding duplicate tasks for the same user means something went wrong... if row[4] in ("yes", "partial"): # only account for completed tasks self.timespans[(row[0], row[1])] = ("%s %s:00.0" % (date, row[2]), "%s %s:00.0" % (date, row[3])) self.task_durations.append( (row[0], row[1], parse_date("%s %s:00.0" % (date, row[3])) - parse_date("%s %s:00.0" % (date, row[2])))) else: if not CONSIDER_ONLY_COMPLETED_TASKS: # make uncompleted tasks take up the rest of the available time if not row[2] == '': self.timespans[(row[0], row[1])] = ("%s %s:00.0" % (date, row[2]), "%s %s:00.0" % (date, group_end_time)) self.task_durations.append( (row[0], row[1], parse_date("%s %s:00.0" % (date, group_end_time)) - parse_date("%s %s:00.0" % (date, row[2])))) # Process DB data (needs refactoring) stats_wiki = self.db_dataset.filter_by_resource_type( ResourceTypes.wiki) stats_wiki_view = stats_wiki.filter_by_operation( "view").aggregate_timedeltas((1, 3, 4)) stats_wiki_edit = stats_wiki.filter_by_operation( "edit").aggregate_timedeltas((1, 3, 4)) stats_search = self.db_dataset.filter_by_resource_type( ResourceTypes.search).aggregate_timedeltas((1, 3, 4)) stats_asa_artifact = self.db_dataset.filter_by_resource_type( ResourceTypes.asa_artifact) stats_asa_artifact_view = stats_asa_artifact.filter_by_operation( "view").aggregate_timedeltas((1, 3, 4)) stats_asa_artifact_edit = stats_asa_artifact.filter_by_operation( "edit").aggregate_timedeltas((1, 3, 4)) stats_asa_index = self.db_dataset.filter_by_resource_type( ResourceTypes.asa_index).aggregate_timedeltas((1, 3, 4)) stats_asa_search = self.db_dataset.filter_by_resource_type( ResourceTypes.asa_search).aggregate_timedeltas((1, 3, 4)) activity_times = [] for collection, value_type in [ (stats_wiki_view, Measurements.wiki_view), (stats_wiki_edit, Measurements.wiki_edit), (stats_search, Measurements.search), (stats_asa_artifact_view, Measurements.asa_artifact_view), (stats_asa_artifact_edit, Measurements.asa_artifact_edit), (stats_asa_index, Measurements.asa_index), (stats_asa_search, Measurements.asa_search) ]: activity_times.extend( collection.delete_columns( (0, 1)).insert_column(1, "activity", value_type).data) self.activity_times.extend(activity_times) self.transposed_activity_times.extend( self._transpose_activity_times(activity_times))
def _process_extra(rec, uri, key, value): if isinstance(value, basestring): value = value.strip() lval = value.lower() if "unknown" in lval or "not specified" in lval: value = None if value is None or value == '': return if key.startswith("links:"): _process_linkset(rec, uri, key, value) elif key == "triples": rec.add((uri, RDF["type"], VOID["Dataset"])) rec.add((uri, VOID["triples"], Literal(int(value)))) elif key == "shortname": rec.add((uri, RDFS["label"], Literal(value))) elif key == "license_link": rec.add((uri, DC["rights"], URIRef(value))) elif key == "date_created": rec.add((uri, DC["created"], Literal(value))) elif key == "date_published": rec.add((uri, DC["available"], Literal(value))) elif key == "date_listed": rec.add((uri, DC["available"], Literal(value))) elif key == "update_frequency": freq = BNode() rec.add((uri, DC["accrualPeriodicity"], freq)) rec.add((freq, RDF["value"], Literal(value))) rec.add((freq, RDFS["label"], Literal(value))) elif key == "unique_id": rec.add((uri, DC["identifier"], Literal(value))) elif key in ("geospatial_coverage", "geographic_coverage", "geographical_coverage"): rec.add((uri, DC["spatial"], Literal(value))) elif key == "temporal_coverage": rec.add((uri, DC["temporal"], Literal(value))) elif key in ("precision", "granularity", "temporal_granularity", "geospatial_granularity", "geographic_granularity", "geographical_granularity"): rec.add((uri, DCAT["granularity"], Literal(value))) elif key in ("date_released",): rec.add((uri, DC["issued"], parse_date(value))) elif key in ("date_modified", "date_updated"): rec.add((uri, DC["modified"], parse_date(value))) elif key in ("agency", "department"): dept = BNode() rec.add((uri, DC["source"], dept)) rec.add((dept, RDFS["label"], Literal(value))) elif key == "import_source": rec.add((uri, DC["source"], Literal(value))) elif key == "external_reference": rec.add((uri, SKOS["notation"], Literal(value))) elif key == "categories": if isinstance(value, (list, tuple)): pass elif isinstance(value, basestring): value = value.split(',') else: value = [value] for cat in [x.strip() for x in value]: rec.add((uri, DCAT["theme"], Literal(cat))) else: extra = BNode() rec.add((uri, DC.relation, extra)) rec.add((extra, RDF.value, Literal(value))) #TODO Is this correct? #rec.add((extra, RDF.label, Literal(key))) rec.add((extra, RDFS["label"], Literal(key)))
def __init__(self, name, db_dataset, xls_data, date, questionnaire_data): """ Combines the data from the database with that obtained from the xls file, to create an instance of ASAExperimentGroup """ self.name = name self.users = set() self.tasks = set() self.timespans = {} # {(user,task): (start_time,end_time)} self.task_durations = [] # (user, task, duration) self.activity_times = [] # (user, activity_type, duration) self.transposed_activity_times = [] # (user, activity1, activity2, activity3, ...) self.db_dataset = db_dataset # [[id, resource_type, resource_id, operation, username, time_started, time_ended]] self.questionnaire_data = questionnaire_data # process XLS data group_end_time = max([row[3] for row in xls_data]) for row in xls_data: self.users.add(row[0]) self.tasks.add(row[1]) assert(not (row[0], row[1]) in self.timespans) # finding duplicate tasks for the same user means something went wrong... if row[4] in ("yes", "partial"): # only account for completed tasks self.timespans[(row[0], row[1])] = ( "%s %s:00.0" % (date, row[2]), "%s %s:00.0" % (date, row[3]) ) self.task_durations.append((row[0], row[1], parse_date("%s %s:00.0" % (date, row[3])) - parse_date("%s %s:00.0" % (date, row[2])) )) else: if not CONSIDER_ONLY_COMPLETED_TASKS: # make uncompleted tasks take up the rest of the available time if not row[2] == '': self.timespans[(row[0], row[1])] = ( "%s %s:00.0" % (date, row[2]), "%s %s:00.0" % (date, group_end_time) ) self.task_durations.append((row[0], row[1], parse_date("%s %s:00.0" % (date, group_end_time)) - parse_date("%s %s:00.0" % (date, row[2])) )) # Process DB data (needs refactoring) stats_wiki = self.db_dataset.filter_by_resource_type(ResourceTypes.wiki) stats_wiki_view = stats_wiki.filter_by_operation("view").aggregate_timedeltas((1,3,4)) stats_wiki_edit = stats_wiki.filter_by_operation("edit").aggregate_timedeltas((1,3,4)) stats_search = self.db_dataset.filter_by_resource_type(ResourceTypes.search).aggregate_timedeltas((1,3,4)) stats_asa_artifact = self.db_dataset.filter_by_resource_type(ResourceTypes.asa_artifact) stats_asa_artifact_view = stats_asa_artifact.filter_by_operation("view").aggregate_timedeltas((1,3,4)) stats_asa_artifact_edit = stats_asa_artifact.filter_by_operation("edit").aggregate_timedeltas((1,3,4)) stats_asa_index = self.db_dataset.filter_by_resource_type(ResourceTypes.asa_index).aggregate_timedeltas((1,3,4)) stats_asa_search = self.db_dataset.filter_by_resource_type(ResourceTypes.asa_search).aggregate_timedeltas((1,3,4)) activity_times = [] for collection, value_type in [ (stats_wiki_view, Measurements.wiki_view), (stats_wiki_edit, Measurements.wiki_edit), (stats_search, Measurements.search), (stats_asa_artifact_view, Measurements.asa_artifact_view), (stats_asa_artifact_edit, Measurements.asa_artifact_edit), (stats_asa_index, Measurements.asa_index), (stats_asa_search, Measurements.asa_search)]: activity_times.extend(collection.delete_columns((0,1)).insert_column(1, "activity", value_type).data) self.activity_times.extend(activity_times) self.transposed_activity_times.extend(self._transpose_activity_times(activity_times))
def from_dict(dct): # type: (dict) -> Program """ :param dct: :return: """ if "programID" not in dct or "md5" not in dct: return dct program = Program() program.program_id = dct.pop("programID") if program.program_id[:2] == "EP": program.episode_num = int(program.program_id[-4:]) program.titles = ProgramTitles.from_iterable(dct.pop("titles")) program.md5 = dct.pop("md5") if "eventDetails" in dct: program.event_details = ProgramEventDetails.from_dict(dct.pop("eventDetails")) if "descriptions" in dct: program.descriptions = ProgramDescriptionList.from_dict(dct.pop("descriptions")) if "originalAirDate" in dct: program.original_air_date = parse_date(dct.pop("originalAirDate")) if "genres" in dct: program.genres = dct.pop("genres") if "episodeTitle150" in dct: program.episode_title = dct.pop("episodeTitle150") if "metadata" in dct: program.metadata = ProgramMetadata.from_iterable(dct.pop("metadata")) if "cast" in dct: program.cast = ProgramCast.from_iterable(dct.pop("cast")) if "crew" in dct: program.crew = ProgramCrew.from_iterable(dct.pop("crew")) if "showType" in dct: program.show_type = dct.pop("showType") if "hasImageArtwork" in dct: program.has_image_artwork = dct.pop("hasImageArtwork") if "contentRating" in dct: program.content_ratings = ProgramContentRating.from_iterable(dct.pop("contentRating")) if "contentAdvisory" in dct: program.content_advisories = dct.pop("contentAdvisory") if "recommendations" in dct: program.recommendations = ProgramRecommendation.from_iterable(dct.pop("recommendations")) if "movie" in dct: program.movie = ProgramMovie.from_dict(dct.pop("movie")) if "animation" in dct: program.animation = dct.pop("animation") if "audience" in dct: program.audience = dct.pop("audience") if "holiday" in dct: program.holiday = dct.pop("holiday") if "keyWords" in dct: program.keywords = ProgramKeywords.from_dict(dct.pop("keyWords")) if "officialURL" in dct: program.official_url = dct.pop("officialURL") if "entityType" in dct: program.entity_type = dct.pop("entityType") if "resourceID" in dct: program.resource_id = dct.pop("resourceID") if "episodeImage" in dct: program.episode_image = Image.from_dict(dct.pop("episodeImage")) if "duration" in dct: program.duration = dct.pop("duration") if "awards" in dct: program.awards = ProgramAward.from_iterable(dct.pop("awards")) if len(dct) != 0: logging.warn("Key(s) not processed for Program: %s", ", ".join(dct.keys())) return program