def estimate( row: Tuple[str, str, str], *, data: Optional[Dict] = None, db: str, area_spec: Dict, run_id: str = '', ) -> Optional[int]: stnum, catalog, code = row if not data: with sqlite_connect(db, readonly=True) as conn: results = conn.execute(''' SELECT input_data FROM server_data WHERE (stnum, catalog, code) = (?, ?, ?) ''', [stnum, catalog, code]) record = results.fetchone() assert record is not None student = json.loads(record['input_data']) area_spec = area_spec else: student = data area_spec = area_spec for message in run(args=Arguments(estimate_only=True), student=student, area_spec=area_spec): if isinstance(message, EstimateMsg): return message.estimate else: assert False, type(message) return None
def audit( row: Tuple[str, str, str], *, data: Optional[Dict] = None, db: str, area_spec: Dict, run_id: str = '', timeout: Optional[float] = None, ) -> Optional[Dict]: stnum, catalog, code = row if not data: with sqlite_connect(db, readonly=True) as conn: results = conn.execute(''' SELECT input_data FROM server_data WHERE (stnum, catalog, code) = (?, ?, ?) ''', [stnum, catalog, code]) record = results.fetchone() assert record is not None student = json.loads(record['input_data']) area_spec = area_spec else: student = data area_spec = area_spec estimate_count = estimate((stnum, catalog, code), data=student, db=db, area_spec=area_spec) assert estimate_count is not None db_keys = {'stnum': stnum, 'catalog': catalog, 'code': code, 'estimate': estimate_count, 'branch': run_id} start_time = time.perf_counter() for message in run(args=Arguments(), student=student, area_spec=area_spec): if isinstance(message, ResultMsg): result = message.result.to_dict() return { "run": run_id, "stnum": stnum, "catalog": catalog, "code": code, "iterations": message.iters, "duration": message.elapsed_ms / 1000, "gpa": result["gpa"], "ok": result["ok"], "rank": result["rank"], "max_rank": result["max_rank"], "result": json.dumps(result, sort_keys=True), "status": result["status"], "version": message.version, } else: if timeout and time.perf_counter() - start_time >= timeout: raise TimeoutError(f'cancelling {" ".join(row)} after {time.perf_counter() - start_time}', db_keys) pass return None
def audit(*, area_spec: Dict, area_code: str, area_catalog: str, student: Dict, run_id: int, curs: psycopg2.extensions.cursor) -> None: args = Arguments() stnum = student['stnum'] logger.info("auditing #%s against %s %s", stnum, area_catalog, area_code) with sentry_sdk.configure_scope() as scope: scope.user = {"id": stnum} curs.execute( """ INSERT INTO result ( student_id, area_code, catalog, run, input_data, in_progress) VALUES (%(student_id)s, %(area_code)s, %(catalog)s, %(run)s, %(student)s , true ) RETURNING id """, { "student_id": stnum, "area_code": area_code, "catalog": area_catalog, "run": run_id, "student": json.dumps(student) }) row = curs.fetchone() result_id: int = cast(int, row[0]) logger.info(f"result id = {result_id}") with sentry_sdk.configure_scope() as scope: scope.user = dict(id=stnum) scope.set_tag("area_code", area_code) scope.set_tag("catalog", area_catalog) scope.set_extra("result_id", result_id) try: for msg in run(args, area_spec=area_spec, student=student): if isinstance(msg, NoAuditsCompletedMsg): logger.critical('no audits completed') elif isinstance(msg, EstimateMsg): pass elif isinstance(msg, ProgressMsg): avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True) curs.execute( """ UPDATE result SET iterations = %(count)s, duration = interval %(elapsed)s WHERE id = %(result_id)s """, { "result_id": result_id, "count": msg.iters, "elapsed": f"{msg.elapsed_ms}ms" }) logger.info(f"{msg.iters:,} at {avg_iter_time} per audit") elif isinstance(msg, ResultMsg): result = msg.result.to_dict() curs.execute( """ UPDATE result SET iterations = %(total_count)s , duration = interval %(elapsed)s , per_iteration = interval %(avg_iter_time)s , rank = %(rank)s , max_rank = %(max_rank)s , result = %(result)s::jsonb , ok = %(ok)s , ts = %(now)s , gpa = %(gpa)s , in_progress = false , claimed_courses = %(claimed_courses)s::jsonb WHERE id = %(result_id)s """, { "result_id": result_id, "total_count": msg.iters, "elapsed": f"{msg.elapsed_ms}ms", "avg_iter_time": f"{msg.avg_iter_ms}ms", "result": json.dumps(result), "claimed_courses": json.dumps( msg.result.keyed_claims()), "rank": result["rank"], "max_rank": result["max_rank"], "gpa": result["gpa"], "ok": result["ok"], # we insert a Python now() instead of using the now() psql function # because sql's now() is the start time of the transaction, and we # want this to be the end of the transaction "now": datetime.datetime.now(), }) else: logger.critical('unknown message %s', msg) except Exception as ex: sentry_sdk.capture_exception(ex) curs.execute( """ UPDATE result SET in_progress = false, error = %(error)s WHERE id = %(result_id)s """, { "result_id": result_id, "error": json.dumps({"error": str(ex)}) })
def audit( *, area_spec: Dict, area_code: str, area_catalog: str, student: Dict, run_id: int, expires_at: Optional[str], link_only: bool, curs: psycopg2.extensions.cursor, ) -> Optional[int]: args = Arguments() stnum = student['stnum'] logger.info("auditing #%s against %s %s", stnum, area_catalog, area_code) try: for msg in run(args, area_spec=area_spec, student=student): if isinstance(msg, NoAuditsCompletedMsg): logger.critical('no audits completed') elif isinstance(msg, EstimateMsg): pass elif isinstance(msg, ProgressMsg): pass elif isinstance(msg, ResultMsg): result = msg.result.to_dict() result_str = json.dumps(result) if not link_only: # delete any old copies of this exact result curs.execute( """ DELETE FROM result WHERE student_id = %(student_id)s AND catalog = %(area_catalog)s AND area_code = %(area_code)s AND result = %(result)s::jsonb """, { "student_id": stnum, "area_catalog": area_catalog, "area_code": area_code, "result": result_str }) # deactivate all existing records curs.execute( """ UPDATE result SET is_active = false WHERE student_id = %(student_id)s AND area_code = %(area_code)s AND is_active = true """, { "student_id": stnum, "area_code": area_code }) # we use clock_timestamp() instead of now() here, because # now() is the start time of the transaction, and we instead # want the time when the computation was finished. # see https://stackoverflow.com/a/24169018 curs.execute( """ INSERT INTO result ( student_id, area_code, catalog, run, input_data, expires_at, link_only, result_version, iterations, duration, per_iteration, rank, max_rank, result, ok, ts, gpa, claimed_courses, status, is_active, revision, student_classification, student_class, student_name, student_name_sort ) VALUES ( %(student_id)s, %(area_code)s, %(catalog)s, %(run)s, %(input_data)s, %(expires_at)s, %(link_only)s, %(result_version)s, %(total_count)s, interval %(elapsed)s, interval %(avg_iter_time)s, %(rank)s, %(max_rank)s, %(result)s::jsonb, %(ok)s, clock_timestamp(), %(gpa)s, %(claimed_courses)s::jsonb, %(status)s, %(is_active)s, coalesce((SELECT max(revision) FROM result WHERE student_id = %(student_id)s AND area_code = %(area_code)s), 0) + 1, %(student_classification)s, %(student_class)s, nullif(%(student_name)s, ''), nullif(%(student_name_sort)s, '') ) RETURNING id """, { "student_id": stnum, "area_code": area_code, "catalog": area_catalog, "run": run_id, "input_data": json.dumps(student), "expires_at": expires_at, "link_only": link_only, "is_active": False if link_only else True, "total_count": msg.iters, "elapsed": f"{msg.elapsed_ms}ms", "avg_iter_time": f"{msg.avg_iter_ms}ms", "result": result_str, "claimed_courses": json.dumps(msg.result.keyed_claims()), "rank": result["rank"], "max_rank": result["max_rank"], "gpa": result["gpa"], "ok": result["ok"], "status": result["status"], "student_name": student["name"], "student_name_sort": student["name_sort"], "student_classification": student["classification"], "student_class": student["class"] if student["class"] != "None" else None, "result_version": result["version"], }) result_id: int = curs.fetchone()[0] return result_id else: logger.critical('unknown message %s', msg) except Exception as ex: logger.error("error with student #%s, catalog %s, area %s: %s", stnum, area_catalog, area_code, ex) return None
def main() -> int: # noqa: C901 DEFAULT_DIR = os.getenv('DP_STUDENT_DIR') parser = argparse.ArgumentParser() parser.add_argument('-w', '--workers', help="the number of worker processes to spawn", default=os.cpu_count()) parser.add_argument('--dir', default=DEFAULT_DIR) parser.add_argument( '--areas-dir', default=os.path.expanduser('~/Projects/degreepath-areas')) parser.add_argument("--transcript", action='store_true') parser.add_argument("--invocation", action='store_true') parser.add_argument("-q", "--quiet", action='store_true') parser.add_argument("--paths", dest='show_paths', action='store_const', const=True, default=True) parser.add_argument("--no-paths", dest='show_paths', action='store_const', const=False) parser.add_argument("--ranks", dest='show_ranks', action='store_const', const=True, default=True) parser.add_argument("--no-ranks", dest='show_ranks', action='store_const', const=False) parser.add_argument("--table", action='store_true') parser.add_argument("-n", default=1, type=int) cli_args = parser.parse_args() # deduplicate, then duplicate if requested data = sorted( set(tuple(stnum_code.strip().split()) for stnum_code in sys.stdin)) * cli_args.n if not data: print('expects a list of "stnum catalog-year areacode" to stdin', file=sys.stderr) return 1 if cli_args.table: print('stnum,catalog,area_code,gpa,rank,max', flush=True) for stnum, catalog, area_code in data: student_file = os.path.join(cli_args.dir, f"{stnum}.json") args = Arguments(print_all=False, transcript_only=cli_args.transcript) area_file = find_area(root=pathlib.Path(cli_args.areas_dir), area_catalog=int(catalog.split('-')[0]), area_code=area_code) if not area_file: print( 'could not find area spec for %s at or below catalog %s, under %s', area_code, catalog, cli_args.areas_dir) return 1 if cli_args.invocation: print( f"python3 dp.py --student '{student_file}' --area '{area_file}'" ) continue student = load_student(student_file) area_spec = load_area(area_file) if not cli_args.quiet and not cli_args.table: print(f"auditing #{student['stnum']} against {area_file}", file=sys.stderr) try: for msg in run(args, area_spec=area_spec, student=student): if isinstance(msg, NoAuditsCompletedMsg): print('no audits completed', file=sys.stderr) return 2 elif isinstance(msg, EstimateMsg): print("estimate completed", file=sys.stderr) elif isinstance(msg, ProgressMsg): if not cli_args.quiet: avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True) print( f"{msg.iters:,} at {avg_iter_time} per audit (best: {msg.best_rank})", file=sys.stderr) elif isinstance(msg, ResultMsg): result = json.loads(json.dumps(msg.result.to_dict())) if cli_args.table: avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True) print(','.join([ stnum, catalog, area_code, str(round(float(result['gpa']), 2)), str(round(float(result['rank']), 2)), str(round(float(result['max_rank']))), ]), flush=True) else: print("\n" + "".join( summarize( result=result, transcript=msg.transcript, count=msg.iters, avg_iter_ms=msg.avg_iter_ms, elapsed=pretty_ms(msg.elapsed_ms), show_paths=cli_args.show_paths, show_ranks=cli_args.show_ranks, claims=msg.result.keyed_claims(), ))) else: if not cli_args.quiet: print('unknown message %s' % msg, file=sys.stderr) return 1 except Exception as ex: print( f"error during audit of #{student['stnum']} against {area_file}", file=sys.stderr) print(ex, file=sys.stderr) return 1 return 0
def main() -> int: # noqa: C901 parser = argparse.ArgumentParser() parser.add_argument("--area", dest="area_file") parser.add_argument("--student", dest="student_file") parser.add_argument("--loglevel", dest="loglevel", choices=("warn", "debug", "info", "critical"), default="info") parser.add_argument("--json", action='store_true') parser.add_argument("--csv", action='store_true') parser.add_argument("--print-all", action='store_true') parser.add_argument("--stop-after", action='store', type=int) parser.add_argument("--progress-every", action='store', type=int, default=1_000) parser.add_argument("--estimate", action='store_true') parser.add_argument("--transcript", action='store_true') parser.add_argument("--gpa", action='store_true') parser.add_argument("--quiet", "-q", action='store_true') parser.add_argument("--tracemalloc-init", action='store_true') parser.add_argument("--tracemalloc-end", action='store_true') parser.add_argument("--tracemalloc-each", action='store_true') parser.add_argument("--paths", dest='show_paths', action='store_const', const=True, default=True) parser.add_argument("--no-paths", dest='show_paths', action='store_const', const=False) parser.add_argument("--ranks", dest='show_ranks', action='store_const', const=True, default=True) parser.add_argument("--no-ranks", dest='show_ranks', action='store_const', const=False) cli_args = parser.parse_args() loglevel = getattr(logging, cli_args.loglevel.upper()) logging.basicConfig(level=loglevel, format=logformat) if cli_args.estimate: os.environ['DP_ESTIMATE'] = '1' has_tracemalloc = cli_args.tracemalloc_init or cli_args.tracemalloc_end or cli_args.tracemalloc_each args = Arguments( gpa_only=cli_args.gpa, print_all=cli_args.print_all, progress_every=cli_args.progress_every, stop_after=cli_args.stop_after, transcript_only=cli_args.transcript, estimate_only=cli_args.estimate, ) if has_tracemalloc: import tracemalloc tracemalloc.start() first_progress_message = True top_mem_items: Dict[str, Dict[int, float]] = defaultdict(dict) tracemalloc_index = 0 student = load_students(cli_args.student_file)[0] area_spec = load_areas(cli_args.area_file)[0] if not cli_args.quiet: print(f"auditing #{student['stnum']} against {cli_args.area_file}", file=sys.stderr) for msg in run(args, student=student, area_spec=area_spec): if isinstance(msg, NoAuditsCompletedMsg): logger.critical('no audits completed') return 2 elif isinstance(msg, EstimateMsg): if not cli_args.quiet: print( f"{msg.estimate:,} estimated solution{'s' if msg.estimate != 1 else ''}", file=sys.stderr) elif isinstance(msg, ProgressMsg): if (cli_args.tracemalloc_init and first_progress_message) or cli_args.tracemalloc_each: snapshot = tracemalloc.take_snapshot() for k, v in process_top(snapshot): top_mem_items[k][tracemalloc_index] = v tracemalloc_index += 1 first_progress_message = False if not cli_args.quiet or (cli_args.tracemalloc_init or cli_args.tracemalloc_each): avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True) print( f"{msg.iters:,} at {avg_iter_time} per audit (best: {msg.best_rank})", file=sys.stderr) elif isinstance(msg, ResultMsg): if not cli_args.quiet: print( result_str( msg, as_json=cli_args.json, as_csv=cli_args.csv, gpa_only=cli_args.gpa, show_paths=cli_args.show_paths, show_ranks=cli_args.show_ranks, )) else: if not cli_args.quiet: logger.critical('unknown message %s', msg) return 1 if cli_args.tracemalloc_end: snapshot = tracemalloc.take_snapshot() for k, v in process_top(snapshot): top_mem_items[k][tracemalloc_index] = v if has_tracemalloc: longest = max(index for item in top_mem_items.values() for index, datapoint in item.items()) for tracemalloc_index in range(0, longest + 1): print(tracemalloc_index * 10_000, end='\t') for file, datapoints in top_mem_items.items(): print(file, end='\t') for i in range(0, longest + 1): print(f"{datapoints.get(i, 0):.1f}", end='\t') print() return 0