def bootstrap(f_produce, partition_count, schema, database, table, config): start_time_millis = time() * 1000.0 inserted_rows = 0 total_rows = int( float(config['mysql']['schemas'][schema]['tables'][table][database] ['size'])) topic = config['mysql']['schemas'][schema]['tables'][table][database][ 'topic'] partition = abs(java_string_hashcode(database) % partition_count) produce(f_produce, partition, *bootstrap_start_message(topic, schema, database, table, config)) last_display_progress = time() for _, key, value in bootstrap_insert_messages(topic, schema, database, table, config, total_rows): produce(f_produce, partition, topic, key, value) inserted_rows += 1 if time() - last_display_progress > (DISPLAY_PROGRESS_PERIOD_MILLIS / 1000.0): display_progress(total_rows, inserted_rows, start_time_millis) last_display_progress = time() produce( f_produce, partition, *bootstrap_complete_message(topic, schema, database, table, config)) display_line("")
def consume(key, value): partition = abs(java_string_hashcode(key['database']) % partition_count) output = { "partition": partition, "key": key, "message": value } print json.dumps(output, separators=(',',':'))
def consume(key, value): database = key['database'] key_str = json.dumps(key, separators=(',', ':')) value_str = json.dumps(value, separators=(',', ':')) partition = abs(java_string_hashcode(database) % partition_count) kafka_producer.send(topic, key=key_str, value=value_str, partition=partition)
def bootstrap(f_produce, partition_count, schema, database, table, config): topic = config['kafka']['topic'] start_time_millis = time() * 1000.0 inserted_rows = 0 total_rows = int(float(config['mysql']['schemas'][schema]['tables'][table][database]['size'])) partition = abs(java_string_hashcode(database) % partition_count) produce(f_produce, topic, partition, *bootstrap_start_message(schema, database, table, config)) last_display_progress = time() for key, value in bootstrap_insert_messages(schema, database, table, config, total_rows): produce(f_produce, topic, partition, key, value) inserted_rows += 1 if time() - last_display_progress > (DISPLAY_PROGRESS_PERIOD_MILLIS / 1000.0): display_progress(total_rows, inserted_rows, start_time_millis) last_display_progress = time() produce(f_produce, topic, partition, *bootstrap_complete_message(schema, database, table, config)) display_line("")
def main() -> None: parser = argparse.ArgumentParser( description="prints the contents of a config file to standard out.") parser.add_argument("sfile", type=str, help="student name file") parser.add_argument("base_config", type=str, help="base config template") args = parser.parse_args() with open(args.base_config, 'r') as f: conf = json.load(f) # seed RNG based on assignment name seed = java_string_hashcode(conf["assignment_name"]) random.seed(seed) # read student names from input file groups: List[List[str]] = [] f = open(args.sfile) for line in f: group = line.rstrip().split(",") groups.append(group) f.close() # "randomly" shuffle group list random.shuffle(groups) # pair students with repositories repo_map: Dict[str, str] = {} for group in groups: # synthesize repo name repo = group2repo(conf["course"], conf["assignment_name"], group) for student in group: # add pairing to json repo_map[student] = repo # print config conf["repository_map"] = repo_map print(json.dumps(conf, indent=4, sort_keys=True))
def consume(key, value): partition = abs( java_string_hashcode(key['database']) % partition_count) output = {"partition": partition, "key": key, "message": value} print json.dumps(output, separators=(',', ':'))
def __init__(self, json_conf_file: str, verbosity: bool): # open config file with open(json_conf_file, 'r') as f: # read config conf = json.loads(f.read()) self.jsondict = conf "A dictionary object for parsed configuration file" # declare/init fields self.verbose: bool = verbosity "Flag to enable verbose output" self.hostname: str = conf["hostname"] """ The name of your SSH `config` host to use for script interaction. This allows you to use a different GitHub identity for managing course scripts because, presently, PyGithub does not support two-factor authentication. """ self.user2repo: Dict[str, str] = {} "A dictionary mapping user to their repository name." self.repo2group: Dict[str, List[str]] = {} """A dictionary mapping repo name to the list of students (assuming group assignment). The list will be length 1 if individual assignment. """ self.ta_assignments: Dict[str, str] = {} "A dictionary mapping each repository to the grading TAs." self.course: str = conf["course"] "The name of the course." self.assignment_name: str = conf["assignment_name"] "The name of the assignment." self.starter_repo: str = conf["starter_repo"] """ Path to starter repo. Starter code is distributed by setting each student repository as a "remote" for the starter repository and then `push`ing. Student repositories _must_ be empty (i.e., no `main` branch) otherwise `push` will fail. """ self.github_org: str = conf["github_org"] "Name of the GitHub organization to use." self.archive_path: str = conf["archive_path"] """Path to folder intended as deanonymized repository of student submissions for Academic Honor Code cases.""" self.submission_path: str = conf["submission_path"] """Path to faculty-only staging area for squashing and modifying TA feedback before issuing pull requests.""" self.ta_path: str = conf["ta_path"] """Path to TA staging area where anonymized student submissions are copied.""" self.feedback_branch: str = conf["feedback_branch"] """Branch to commit TA/instructor feedback on. Pull requests are issued from this branch.""" self.default_branch: str = conf["default_branch"] \ if "default_branch" in conf else "main" "Branch that student commits to. Defaults to `main` if not specified." if "do_not_accept_changes_after_due_date_timestamp" in conf: # TODO: type? best guess is int self.due_date = \ conf["do_not_accept_changes_after_due_date_timestamp"] "A UNIX timestamp representing the due date in the local timezone." self.anonymize_sub_path: bool = conf["anonymize_sub_path"] \ if "anonymize_sub_path" in conf else True """ whether the contents of the `submissions` folder, which is viewable only by faculty (not TAs), is anonymized.""" self.rsync_excludes: List[str] = conf["rsync_excludes"] """List of files & directories to be excluded from rsync when copying to TA folder.""" # populate mappings (user2repo, repo2group) for student in conf["repository_map"].keys(): self.add_mapping(student, conf["repository_map"][student]) # read TA list tas: List[str] = conf["TAs"] tas.sort() # sorting ensures that TA order is deterministic # generate TA map random.seed(java_string_hashcode(conf["assignment_name"])) repos = self.repositories random.shuffle(repos) self.ta_assignments = round_robin_map(tas, repos)
def consume(key, value): database = key['database'] key_str = json.dumps(key, separators=(',',':')) value_str = json.dumps(value, separators=(',',':')) partition = abs(java_string_hashcode(database) % partition_count) kafka_producer.send(topic, key=key_str, value=value_str, partition=partition)