Пример #1
0
def log_input(filename, source):
    """Log input to the database.

    Called by patched functions that do some sort of input (reading from a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    # Some packages, e.g., xarray, accept a list of files as input argument
    if isinstance(filename, list):
        for f in filename:
            log_input(f, source)
        return
    elif not isinstance(filename, six.string_types):
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)
    if option_set('ignored metadata', 'input_hashes'):
        record = filename
    else:
        record = (filename, hash_file(filename))

    if option_set('general', 'debug'):
        print("Input from %s using %s" % (record, source))
    #Update object in DB
    version = get_version(source)
    db = open_or_create_db()
    db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID])
    db.close()
Пример #2
0
def log_output(filename, source):
    """Log output to the database.

    Called by patched functions that do some sort of output (writing to a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if isinstance(filename, list):
        for f in filename:
            log_output(f, source)
        return
    elif not isinstance(filename, six.string_types):
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)

    version = get_version(source)
    db = open_or_create_db()

    if option_set('data', 'file_diff_outputs') and os.path.isfile(filename) \
       and not is_binary(filename):
        tf = tempfile.NamedTemporaryFile(delete=False)
        shutil.copy2(filename, tf.name)
        add_file_diff_to_db(filename, tf.name, db)

    if option_set('general', 'debug'):
        print("Output to %s using %s" % (filename, source))
    #Update object in DB
    # data hash will be hashed at script exit, if enabled
    db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID])
    db.close()
Пример #3
0
def log_input(filename, source):
    """Log input to the database.

    Called by patched functions that do some sort of input (reading from a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if type(filename) is not str:
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)
    if option_set('data', 'hash_inputs'):
        record = (filename, hash_file(filename))
    else:
        record = filename

    if option_set('general', 'debug'):
        print("Input from %s using %s" % (record, source))
    #Update object in DB
    db = open_or_create_db()
    db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", get_version(source), no_duplicates=True),
              eids=[RUN_ID])
    db.close()
Пример #4
0
def log_output(filename, source):
    """Log output to the database.

    Called by patched functions that do some sort of output (writing to a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if type(filename) is not str:
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)

    db = open_or_create_db()

    if option_set('data', 'file_diff_outputs') and os.path.isfile(filename):
        tf = tempfile.NamedTemporaryFile(delete=False)
        shutil.copy2(filename, tf.name)
        add_file_diff_to_db(filename, tf.name, db)

    if option_set('general', 'debug'):
        print("Output to %s using %s" % (filename, source))
    #Update object in DB
    # data hash will be hashed at script exit, if enabled
    db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID])
    db.close()
Пример #5
0
def log_input(filename, source):
    """Log input to the database.

    Called by patched functions that do some sort of input (reading from a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if type(filename) is not str:
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)
    if option_set('data', 'hash_inputs'):
        record = (filename, hash_file(filename))
    else:
        record = filename

    if option_set('general', 'debug'):
        print("Input from %s using %s" % (record, source))
    #Update object in DB
    db = open_or_create_db()
    db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID])
    db.close()
Пример #6
0
def log_init(notebookName=None):
    """Do the initial logging for a new run.

    Works out what script has been run, creates a new unique run ID,
    and gets the basic metadata.

    This is called when running `import recipy`.
    """
    notebookMode = get_notebook_mode()
    if notebookMode and notebookName is None:
        # Avoid first call without Notebook name
        return

    if notebookMode:
        scriptpath = notebookName
        cmd_args = sys.argv[1:]
    # Get the path of the script we're running
    # When running python -m recipy ..., during the recipy import argument 0
    # is -c (for Python 2) or -m (for Python 3) and the script is argument 1
    elif sys.argv[0] in ['-c', '-m']:
        # Has the user called python -m recipy without further arguments?
        if len(sys.argv) < 2:
            return
        scriptpath = os.path.realpath(sys.argv[1])
        cmd_args = sys.argv[2:]
    else:
        scriptpath = os.path.realpath(sys.argv[0])
        cmd_args = sys.argv[1:]

    global RUN_ID

    # Open the database
    db = open_or_create_db()

    # Create the unique ID for this run
    guid = str(uuid.uuid4())

    # Get general metadata, environment info, etc
    run = {
        "unique_id":
        guid,
        "author":
        getpass.getuser(),
        "description":
        "",
        "inputs": [],
        "outputs": [],
        "script":
        scriptpath,
        "command":
        sys.executable,
        "environment":
        [platform.platform(), "python " + sys.version.split('\n')[0]],
        "date":
        datetime.datetime.utcnow(),
        "command_args":
        " ".join(cmd_args),
        "warnings": [],
        "libraries": [get_version('recipy')],
        "custom_values": {}
    }

    if not notebookName and not option_set('ignored metadata', 'git'):
        add_git_info(run, scriptpath)

    if not notebookName and not option_set('ignored metadata', 'svn'):
        add_svn_info(run, scriptpath)

    # Put basics into DB
    RUN_ID = db.insert(run)

    # Print message
    if not option_set('general', 'quiet'):
        print("recipy run inserted, with ID %s" % (guid))

    # check whether patched modules were imported before recipy was imported
    patches = db.table('patches')

    for p in patches.all():
        if p['modulename'] in sys.modules:
            msg = 'not tracking inputs and outputs for {}; recipy was ' \
                  'imported after this module'.format(p['modulename'])
            warnings.warn(msg, stacklevel=3)

    db.close()

    # Register exception hook so exceptions can be logged
    sys.excepthook = log_exception
Пример #7
0
def test_get_version_recipy():
    assert_equal(libraryversions.get_version('recipy'),
                 'recipy v{}'.format(__version__))
Пример #8
0
def test_get_version_unknown_library():
    assert_equal(libraryversions.get_version('unknown'), 'unknown v?')
Пример #9
0
def test_get_version_recipy():
    assert_equal(libraryversions.get_version('recipy'), 'recipy v{}'.format(__version__))
Пример #10
0
def test_get_version_unknown_library():
    assert_equal(libraryversions.get_version('unknown'), 'unknown v?')
Пример #11
0
def log_init():
    """Do the initial logging for a new run.

    Works out what script has been run, creates a new unique run ID,
    and gets the basic metadata.

    This is called when running `import recipy`.
    """
    # Get the path of the script we're running
    # When running python -m recipy ..., during the recipy import argument 0
    # is -c (for Python 2) or -m (for Python 3) and the script is argument 1
    if sys.argv[0] in ['-c', '-m']:
        # Has the user called python -m recipy without further arguments?
        if len(sys.argv) < 2:
            return
        scriptpath = os.path.realpath(sys.argv[1])
        cmd_args = sys.argv[2:]
    else:
        scriptpath = os.path.realpath(sys.argv[0])
        cmd_args = sys.argv[1:]

    global RUN_ID

    # Open the database
    db = open_or_create_db()

    # Create the unique ID for this run
    guid = str(uuid.uuid4())

    # Get general metadata, environment info, etc
    run = {
        "unique_id":
        guid,
        "author":
        getpass.getuser(),
        "description":
        "",
        "inputs": [],
        "outputs": [],
        "script":
        scriptpath,
        "command":
        sys.executable,
        "environment":
        [platform.platform(), "python " + sys.version.split('\n')[0]],
        "date":
        datetime.datetime.utcnow(),
        "command_args":
        " ".join(cmd_args),
        "warnings": [],
        "libraries": [get_version('recipy')]
    }

    if not option_set('ignored metadata', 'git'):
        try:
            repo = Repo(scriptpath, search_parent_directories=True)
            run["gitrepo"] = repo.working_dir
            run["gitcommit"] = repo.head.commit.hexsha
            run["gitorigin"] = get_origin(repo)

            if not option_set('ignored metadata', 'diff'):
                whole_diff = ''
                diffs = repo.index.diff(None, create_patch=True)
                for diff in diffs:
                    whole_diff += "\n\n\n" + diff.diff.decode("utf-8")

                run['diff'] = whole_diff
        except (InvalidGitRepositoryError, ValueError):
            # We can't store git info for some reason, so just skip it
            pass

    # Put basics into DB
    RUN_ID = db.insert(run)

    # Print message
    if not option_set('general', 'quiet'):
        print("recipy run inserted, with ID %s" % (guid))

    # check whether patched modules were imported before recipy was imported
    patches = db.table('patches')

    for p in patches.all():
        if p['modulename'] in sys.modules:
            msg = 'not tracking inputs and outputs for {}; recipy was ' \
                  'imported after this module'.format(p['modulename'])
            warnings.warn(msg, stacklevel=3)

    db.close()

    # Register exception hook so exceptions can be logged
    sys.excepthook = log_exception
Пример #12
0
def log_init():
    """Do the initial logging for a new run.

    Works out what script has been run, creates a new unique run ID,
    and gets the basic metadata.

    This is called when running `import recipy`.
    """
    # Get the path of the script we're running
    # When running python -m recipy ..., during the recipy import argument 0
    # is -c (for Python 2) or -m (for Python 3) and the script is argument 1
    if sys.argv[0] in ['-c', '-m']:
        # Has the user called python -m recipy without further arguments?
        if len(sys.argv) < 2:
            return
        scriptpath = os.path.realpath(sys.argv[1])
        cmd_args = sys.argv[2:]
    else:
        scriptpath = os.path.realpath(sys.argv[0])
        cmd_args = sys.argv[1:]

    global RUN_ID

    # Open the database
    db = open_or_create_db()

    # Create the unique ID for this run
    guid = str(uuid.uuid4())

    # Get general metadata, environment info, etc
    run = {
        "unique_id": guid,
        "author": getpass.getuser(),
        "description": "",
        "inputs": [],
        "outputs": [],
        "script": scriptpath,
        "command": sys.executable,
        "environment": [platform.platform(), "python " + sys.version.split('\n')[0]],
        "date": datetime.datetime.utcnow(),
        "command_args": " ".join(cmd_args),
        "warnings": [],
        "libraries": [get_version('recipy')]
    }

    if not option_set('ignored metadata', 'git'):
        try:
            repo = Repo(scriptpath, search_parent_directories=True)
            run["gitrepo"] = repo.working_dir
            run["gitcommit"] = repo.head.commit.hexsha
            run["gitorigin"] = get_origin(repo)

            if not option_set('ignored metadata', 'diff'):
                whole_diff = ''
                diffs = repo.index.diff(None, create_patch=True)
                for diff in diffs:
                    whole_diff += "\n\n\n" + diff.diff.decode("utf-8")

                run['diff'] = whole_diff
        except (InvalidGitRepositoryError, ValueError):
            # We can't store git info for some reason, so just skip it
            pass

    # Put basics into DB
    RUN_ID = db.insert(run)

    # Print message
    if not option_set('general', 'quiet'):
        print("recipy run inserted, with ID %s" % (guid))

    # check whether patched modules were imported before recipy was imported
    patches = db.table('patches')

    for p in patches.all():
        if p['modulename'] in sys.modules:
            msg = 'not tracking inputs and outputs for {}; recipy was ' \
                  'imported after this module'.format(p['modulename'])
            warnings.warn(msg, stacklevel=3)

    db.close()

    # Register exception hook so exceptions can be logged
    sys.excepthook = log_exception