def log_input(filename, source): """Log input to the database. Called by patched functions that do some sort of input (reading from a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ # Some packages, e.g., xarray, accept a list of files as input argument if isinstance(filename, list): for f in filename: log_input(f, source) return elif not isinstance(filename, six.string_types): try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('ignored metadata', 'input_hashes'): record = filename else: record = (filename, hash_file(filename)) if option_set('general', 'debug'): print("Input from %s using %s" % (record, source)) #Update object in DB version = get_version(source) db = open_or_create_db() db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID]) db.close()
def log_output(filename, source): """Log output to the database. Called by patched functions that do some sort of output (writing to a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if isinstance(filename, list): for f in filename: log_output(f, source) return elif not isinstance(filename, six.string_types): try: filename = filename.name except: pass filename = os.path.abspath(filename) version = get_version(source) db = open_or_create_db() if option_set('data', 'file_diff_outputs') and os.path.isfile(filename) \ and not is_binary(filename): tf = tempfile.NamedTemporaryFile(delete=False) shutil.copy2(filename, tf.name) add_file_diff_to_db(filename, tf.name, db) if option_set('general', 'debug'): print("Output to %s using %s" % (filename, source)) #Update object in DB # data hash will be hashed at script exit, if enabled db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID]) db.close()
def log_input(filename, source): """Log input to the database. Called by patched functions that do some sort of input (reading from a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if type(filename) is not str: try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('data', 'hash_inputs'): record = (filename, hash_file(filename)) else: record = filename if option_set('general', 'debug'): print("Input from %s using %s" % (record, source)) #Update object in DB db = open_or_create_db() db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID]) db.close()
def log_output(filename, source): """Log output to the database. Called by patched functions that do some sort of output (writing to a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if type(filename) is not str: try: filename = filename.name except: pass filename = os.path.abspath(filename) db = open_or_create_db() if option_set('data', 'file_diff_outputs') and os.path.isfile(filename): tf = tempfile.NamedTemporaryFile(delete=False) shutil.copy2(filename, tf.name) add_file_diff_to_db(filename, tf.name, db) if option_set('general', 'debug'): print("Output to %s using %s" % (filename, source)) #Update object in DB # data hash will be hashed at script exit, if enabled db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID]) db.close()
def log_input(filename, source): """Log input to the database. Called by patched functions that do some sort of input (reading from a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if type(filename) is not str: try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('data', 'hash_inputs'): record = (filename, hash_file(filename)) else: record = filename if option_set('general', 'debug'): print("Input from %s using %s" % (record, source)) #Update object in DB db = open_or_create_db() db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID]) db.close()
def log_init(notebookName=None): """Do the initial logging for a new run. Works out what script has been run, creates a new unique run ID, and gets the basic metadata. This is called when running `import recipy`. """ notebookMode = get_notebook_mode() if notebookMode and notebookName is None: # Avoid first call without Notebook name return if notebookMode: scriptpath = notebookName cmd_args = sys.argv[1:] # Get the path of the script we're running # When running python -m recipy ..., during the recipy import argument 0 # is -c (for Python 2) or -m (for Python 3) and the script is argument 1 elif sys.argv[0] in ['-c', '-m']: # Has the user called python -m recipy without further arguments? if len(sys.argv) < 2: return scriptpath = os.path.realpath(sys.argv[1]) cmd_args = sys.argv[2:] else: scriptpath = os.path.realpath(sys.argv[0]) cmd_args = sys.argv[1:] global RUN_ID # Open the database db = open_or_create_db() # Create the unique ID for this run guid = str(uuid.uuid4()) # Get general metadata, environment info, etc run = { "unique_id": guid, "author": getpass.getuser(), "description": "", "inputs": [], "outputs": [], "script": scriptpath, "command": sys.executable, "environment": [platform.platform(), "python " + sys.version.split('\n')[0]], "date": datetime.datetime.utcnow(), "command_args": " ".join(cmd_args), "warnings": [], "libraries": [get_version('recipy')], "custom_values": {} } if not notebookName and not option_set('ignored metadata', 'git'): add_git_info(run, scriptpath) if not notebookName and not option_set('ignored metadata', 'svn'): add_svn_info(run, scriptpath) # Put basics into DB RUN_ID = db.insert(run) # Print message if not option_set('general', 'quiet'): print("recipy run inserted, with ID %s" % (guid)) # check whether patched modules were imported before recipy was imported patches = db.table('patches') for p in patches.all(): if p['modulename'] in sys.modules: msg = 'not tracking inputs and outputs for {}; recipy was ' \ 'imported after this module'.format(p['modulename']) warnings.warn(msg, stacklevel=3) db.close() # Register exception hook so exceptions can be logged sys.excepthook = log_exception
def test_get_version_recipy(): assert_equal(libraryversions.get_version('recipy'), 'recipy v{}'.format(__version__))
def test_get_version_unknown_library(): assert_equal(libraryversions.get_version('unknown'), 'unknown v?')
def test_get_version_recipy(): assert_equal(libraryversions.get_version('recipy'), 'recipy v{}'.format(__version__))
def test_get_version_unknown_library(): assert_equal(libraryversions.get_version('unknown'), 'unknown v?')
def log_init(): """Do the initial logging for a new run. Works out what script has been run, creates a new unique run ID, and gets the basic metadata. This is called when running `import recipy`. """ # Get the path of the script we're running # When running python -m recipy ..., during the recipy import argument 0 # is -c (for Python 2) or -m (for Python 3) and the script is argument 1 if sys.argv[0] in ['-c', '-m']: # Has the user called python -m recipy without further arguments? if len(sys.argv) < 2: return scriptpath = os.path.realpath(sys.argv[1]) cmd_args = sys.argv[2:] else: scriptpath = os.path.realpath(sys.argv[0]) cmd_args = sys.argv[1:] global RUN_ID # Open the database db = open_or_create_db() # Create the unique ID for this run guid = str(uuid.uuid4()) # Get general metadata, environment info, etc run = { "unique_id": guid, "author": getpass.getuser(), "description": "", "inputs": [], "outputs": [], "script": scriptpath, "command": sys.executable, "environment": [platform.platform(), "python " + sys.version.split('\n')[0]], "date": datetime.datetime.utcnow(), "command_args": " ".join(cmd_args), "warnings": [], "libraries": [get_version('recipy')] } if not option_set('ignored metadata', 'git'): try: repo = Repo(scriptpath, search_parent_directories=True) run["gitrepo"] = repo.working_dir run["gitcommit"] = repo.head.commit.hexsha run["gitorigin"] = get_origin(repo) if not option_set('ignored metadata', 'diff'): whole_diff = '' diffs = repo.index.diff(None, create_patch=True) for diff in diffs: whole_diff += "\n\n\n" + diff.diff.decode("utf-8") run['diff'] = whole_diff except (InvalidGitRepositoryError, ValueError): # We can't store git info for some reason, so just skip it pass # Put basics into DB RUN_ID = db.insert(run) # Print message if not option_set('general', 'quiet'): print("recipy run inserted, with ID %s" % (guid)) # check whether patched modules were imported before recipy was imported patches = db.table('patches') for p in patches.all(): if p['modulename'] in sys.modules: msg = 'not tracking inputs and outputs for {}; recipy was ' \ 'imported after this module'.format(p['modulename']) warnings.warn(msg, stacklevel=3) db.close() # Register exception hook so exceptions can be logged sys.excepthook = log_exception
def log_init(): """Do the initial logging for a new run. Works out what script has been run, creates a new unique run ID, and gets the basic metadata. This is called when running `import recipy`. """ # Get the path of the script we're running # When running python -m recipy ..., during the recipy import argument 0 # is -c (for Python 2) or -m (for Python 3) and the script is argument 1 if sys.argv[0] in ['-c', '-m']: # Has the user called python -m recipy without further arguments? if len(sys.argv) < 2: return scriptpath = os.path.realpath(sys.argv[1]) cmd_args = sys.argv[2:] else: scriptpath = os.path.realpath(sys.argv[0]) cmd_args = sys.argv[1:] global RUN_ID # Open the database db = open_or_create_db() # Create the unique ID for this run guid = str(uuid.uuid4()) # Get general metadata, environment info, etc run = { "unique_id": guid, "author": getpass.getuser(), "description": "", "inputs": [], "outputs": [], "script": scriptpath, "command": sys.executable, "environment": [platform.platform(), "python " + sys.version.split('\n')[0]], "date": datetime.datetime.utcnow(), "command_args": " ".join(cmd_args), "warnings": [], "libraries": [get_version('recipy')] } if not option_set('ignored metadata', 'git'): try: repo = Repo(scriptpath, search_parent_directories=True) run["gitrepo"] = repo.working_dir run["gitcommit"] = repo.head.commit.hexsha run["gitorigin"] = get_origin(repo) if not option_set('ignored metadata', 'diff'): whole_diff = '' diffs = repo.index.diff(None, create_patch=True) for diff in diffs: whole_diff += "\n\n\n" + diff.diff.decode("utf-8") run['diff'] = whole_diff except (InvalidGitRepositoryError, ValueError): # We can't store git info for some reason, so just skip it pass # Put basics into DB RUN_ID = db.insert(run) # Print message if not option_set('general', 'quiet'): print("recipy run inserted, with ID %s" % (guid)) # check whether patched modules were imported before recipy was imported patches = db.table('patches') for p in patches.all(): if p['modulename'] in sys.modules: msg = 'not tracking inputs and outputs for {}; recipy was ' \ 'imported after this module'.format(p['modulename']) warnings.warn(msg, stacklevel=3) db.close() # Register exception hook so exceptions can be logged sys.excepthook = log_exception