示例#1
0
def load_by_counter(counter, exp_id):
    """
    Load a dataset given its counter in one experiment
    Args:
        counter: Counter of the dataset
        exp_id:  Experiment the dataset belongs to

    Returns:
        the dataset
    """
    conn = connect(get_DB_location())
    sql = """
    SELECT run_id
    FROM
      runs
    WHERE
      result_counter= ? AND
      exp_id = ?
    """
    c = transaction(conn, sql, counter, exp_id)
    run_id = one(c, 'run_id')
    conn.close()
    d = DataSet(get_DB_location(), run_id=run_id)

    return d
示例#2
0
def experiments() -> List[Experiment]:
    """
    List all the experiments in the container (database file from config)

    Returns:
        All the experiments in the container
    """
    log.info("loading experiments from {}".format(get_DB_location()))
    rows = get_experiments(connect(get_DB_location(), get_DB_debug()))
    experiments = []
    for row in rows:
        experiments.append(load_experiment(row['exp_id']))
    return experiments
示例#3
0
def load_by_counter(counter: int,
                    exp_id: int,
                    conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load a dataset given its counter in a given experiment

    Lookup is performed in the database file that is specified in the config.

    Args:
        counter: counter of the dataset within the given experiment
        exp_id: id of the experiment where to look for the dataset
        conn: connection to the database to load from. If not provided, a
          connection to the DB file specified in the config is made

    Returns:
        dataset of the given counter in the given experiment
    """
    conn = conn or connect(get_DB_location())
    sql = """
    SELECT run_id
    FROM
      runs
    WHERE
      result_counter= ? AND
      exp_id = ?
    """
    c = transaction(conn, sql, counter, exp_id)
    run_id = one(c, 'run_id')

    d = DataSet(conn=conn, run_id=run_id)
    return d
示例#4
0
def load_by_guid(guid: str, conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load a dataset by its GUID

    If no connection is provided, lookup is performed in the database file that
    is specified in the config.

    Args:
        guid: guid of the dataset
        conn: connection to the database to load from

    Returns:
        dataset with the given guid

    Raises:
        NameError: if no run with the given GUID exists in the database
        RuntimeError: if several runs with the given GUID are found
    """
    conn = conn or connect(get_DB_location())

    # this function raises a RuntimeError if more than one run matches the GUID
    run_id = get_runid_from_guid(conn, guid)

    if run_id == -1:
        raise NameError(f'No run with GUID: {guid} found in database.')

    return DataSet(run_id=run_id, conn=conn)
def load_or_create_experiment(
        experiment_name: str,
        sample_name: Optional[str] = None,
        conn: Optional[ConnectionPlus] = None) -> Experiment:
    """
    Find and return an experiment with the given name and sample name,
    or create one if not found.

    Args:
        experiment_name: Name of the experiment to find or create
        sample_name: Name of the sample
        conn: Connection to the database. If not supplied, a new connection
          to the DB file specified in the config is made

    Returns:
        The found or created experiment
    """
    conn = conn or connect(get_DB_location())
    try:
        experiment = load_experiment_by_name(experiment_name,
                                             sample_name,
                                             conn=conn)
    except ValueError as exception:
        if "Experiment not found" in str(exception):
            experiment = new_experiment(experiment_name,
                                        sample_name,
                                        conn=conn)
        else:
            raise exception
    return experiment
def load_experiment_by_name(
        name: str,
        sample: Optional[str] = None,
        conn: Optional[ConnectionPlus] = None) -> Experiment:
    """
    Try to load experiment with the specified name.

    Nothing stops you from having many experiments with the same name and
    sample_name. In that case this won't work. And warn you.

    Args:
        name: the name of the experiment
        sample: the name of the sample
        conn: connection to the database. If not supplied, a new connection
          to the DB file specified in the config is made

    Returns:
        the requested experiment

    Raises:
        ValueError if the name is not unique and sample name is None.
    """
    conn = conn or connect(get_DB_location())

    if sample:
        sql = """
        SELECT
            *
        FROM
            experiments
        WHERE
            sample_name = ? AND
            name = ?
        """
        c = transaction(conn, sql, sample, name)
    else:
        sql = """
        SELECT
            *
        FROM
            experiments
        WHERE
            name = ?
        """
        c = transaction(conn, sql, name)
    rows = c.fetchall()
    if len(rows) == 0:
        raise ValueError("Experiment not found")
    elif len(rows) > 1:
        _repr = []
        for row in rows:
            s = (f"exp_id:{row['exp_id']} ({row['name']}-{row['sample_name']})"
                 f" started at ({row['start_time']})")
            _repr.append(s)
        _repr_str = "\n".join(_repr)
        raise ValueError(f"Many experiments matching your request"
                         f" found:\n{_repr_str}")
    else:
        e = Experiment(exp_id=rows[0]['exp_id'], conn=conn)
    return e
示例#7
0
def new_data_set(name,
                 exp_id: Optional[int] = None,
                 specs: SPECS = None,
                 values=None,
                 metadata=None,
                 conn=None) -> DataSet:
    """ Create a new dataset.
    If exp_id is not specified the last experiment will be loaded by default.

    Args:
        name: the name of the new dataset
        exp_id:  the id of the experiments this dataset belongs to
            defaults to the last experiment
        specs: list of parameters to create this data_set with
        values: the values to associate with the parameters
        metadata:  the values to associate with the dataset
    """
    path_to_db = get_DB_location()
    d = DataSet(path_to_db, conn=conn)

    if exp_id is None:
        if len(get_experiments(d.conn)) > 0:
            exp_id = get_last_experiment(d.conn)
        else:
            raise ValueError("No experiments found."
                             "You can start a new one with:"
                             " new_experiment(name, sample_name)")
    d._new(name, exp_id, specs, values, metadata)
    return d
示例#8
0
def test_has_attributes_after_init():
    """
    Ensure that all attributes are populated after __init__ in BOTH cases
    (run_id is None / run_id is not None)
    """

    attrs = ['path_to_db', '_path_to_db', 'conn', '_run_id', 'run_id',
             '_debug', 'subscribers', '_completed', 'name', 'table_name',
             'guid', 'number_of_results', 'counter', 'parameters',
             'paramspecs', 'exp_id', 'exp_name', 'sample_name',
             'run_timestamp_raw', 'completed_timestamp_raw', 'completed',
             'snapshot', 'snapshot_raw']

    path_to_db = get_DB_location()
    ds = DataSet(path_to_db, run_id=None)

    for attr in attrs:
        assert hasattr(ds, attr)
        getattr(ds, attr)

    ds = DataSet(path_to_db, run_id=1)

    for attr in attrs:
        assert hasattr(ds, attr)
        getattr(ds, attr)
示例#9
0
def new_experiment(name: str,
                   sample_name: str,
                   format_string: Optional[str] = "{}-{}-{}") -> Experiment:
    """ Create a new experiment

    Args:
        name: the name of the experiment
        sample_name: the name of the current sample
        format_string: basic format string for table-name
            must contain 3 placeholders.
    Returns:
        the new experiment
    """
    log.info("creating new experiment in {}".format(get_DB_location()))
    e = Experiment(get_DB_location())
    e._new(name, sample_name, format_string)
    return e
示例#10
0
def test_load_last_experiment(empty_temp_db):
    # test in case of no experiments
    with pytest.raises(ValueError, match='There are no experiments in the '
                                         'database file'):
        _ = load_last_experiment()

    # create 2 experiments
    exp1 = Experiment(exp_id=None)
    exp2 = Experiment(exp_id=None)
    assert get_DB_location() == exp1.path_to_db
    assert get_DB_location() == exp2.path_to_db

    # load last and assert that its the 2nd one that was created
    last_exp = load_last_experiment()
    assert last_exp.exp_id == exp2.exp_id
    assert last_exp.exp_id != exp1.exp_id
    assert last_exp.path_to_db == exp2.path_to_db
示例#11
0
def load_last_experiment() -> Experiment:
    """
    Load last experiment

    Returns:
        last experiment
    """
    conn = connect(get_DB_location())
    return Experiment(exp_id=get_last_experiment(conn))
示例#12
0
    def __init__(self,
                 path_to_db: Optional[str] = None,
                 exp_id: Optional[int] = None,
                 name: Optional[str] = None,
                 sample_name: Optional[str] = None,
                 format_string: str = "{}-{}-{}",
                 conn: Optional[ConnectionPlus] = None) -> None:
        """
        Create or load an experiment. If exp_id is None, a new experiment is
        created. If exp_id is not None, an experiment is loaded.

        Args:
            path_to_db: The path of the database file to create in/load from.
              If a conn is passed together with path_to_db, an exception is
              raised
            exp_id: The id of the experiment to load
            name: The name of the experiment to create. Ignored if exp_id is
              not None
            sample_name: The sample name for this experiment. Ignored if exp_id
              is not None
            format_string: The format string used to name result-tables.
              Ignored if exp_id is not None.
            conn: connection to the database. If not supplied, the constructor
              first tries to use path_to_db to figure out where to connect to.
              If path_to_db is not supplied either, a new connection
              to the DB file specified in the config is made
        """

        if path_to_db is not None and conn is not None:
            raise ValueError('Received BOTH conn and path_to_db. Please '
                             'provide only one or the other.')

        self._path_to_db = path_to_db or get_DB_location()
        self.conn = conn or connect(self.path_to_db, get_DB_debug())

        max_id = len(get_experiments(self.conn))

        if exp_id is not None:
            if exp_id not in range(1, max_id + 1):
                raise ValueError('No such experiment in the database')
            self._exp_id = exp_id
        else:

            # it is better to catch an invalid format string earlier than later
            try:
                # the sqlite_base will try to format
                # (name, exp_id, run_counter)
                format_string.format("name", 1, 1)
            except Exception as e:
                raise ValueError("Invalid format string. Can not format "
                                 "(name, exp_id, run_counter)") from e

            log.info("creating new experiment in {}".format(self.path_to_db))

            name = name or f"experiment_{max_id+1}"
            sample_name = sample_name or "some_sample"
            self._exp_id = ne(self.conn, name, sample_name, format_string)
示例#13
0
def load_last_experiment() -> Experiment:
    """
    Load last experiment

    Returns:
        last experiment
    """
    e = Experiment(get_DB_location())
    e.exp_id = get_last_experiment(e.conn)
    return e
示例#14
0
def load_last_experiment() -> Experiment:
    """
    Load last experiment (from database file from config)

    Returns:
        last experiment
    """
    last_exp_id = get_last_experiment(connect(get_DB_location()))
    if last_exp_id is None:
        raise ValueError('There are no experiments in the database file')
    return Experiment(exp_id=last_exp_id)
示例#15
0
def load_by_id(run_id)->DataSet:
    """ Load dataset by id

    Args:
        run_id: id of the dataset

    Returns:
        the datasets

    """
    d = DataSet(get_DB_location(), run_id=run_id)
    return d
示例#16
0
def test_runs_table_columns(empty_temp_db):
    """
    Ensure that the column names of a pristine runs table are what we expect
    """
    colnames = mut.RUNS_TABLE_COLUMNS.copy()
    conn = mut.connect(get_DB_location())
    query = "PRAGMA table_info(runs)"
    cursor = conn.cursor()
    for row in cursor.execute(query):
        colnames.remove(row['name'])

    assert colnames == []
示例#17
0
def load_experiment_by_name(name: str,
                            sample: Optional[str] = None) -> Experiment:
    """
    Try to load experiment with the specified name.
    Nothing stops you from having many experiments with
    the same name and sample_name.
    In that case this won't work. And warn you.
    Args:
        name: the name of the experiment
        sample: the name of the sample

    Returns:
        the requested experiment

    Raises:
        ValueError if the name is not unique and sample name is None.
    """
    e = Experiment(get_DB_location())
    if sample:
        sql = """
        SELECT
            *
        FROM
            experiments
        WHERE
            sample_name = ? AND
            name = ?
        """
        c = transaction(e.conn, sql, sample, name)
    else:
        sql = """
        SELECT
            *
        FROM
            experiments
        WHERE
            name = ?
        """
        c = transaction(e.conn, sql, name)
    rows = c.fetchall()
    if len(rows) == 0:
        raise ValueError("Experiment not found \n")
    elif len(rows) > 1:
        _repr = []
        for row in rows:
            s = f"exp_id:{row['exp_id']} ({row['name']}-{row['sample_name']}) started at({row['start_time']})"
            _repr.append(s)
        _repr_str = "\n".join(_repr)
        raise ValueError(
            f"Many experiments matching your request found {_repr_str}")
    else:
        e.exp_id = rows[0]['exp_id']
    return e
示例#18
0
def load_experiment(exp_id: int) -> Experiment:
    """
    Load experiment with the specified id
    Args:
        exp_id: experiment id

    Returns:
        experiment with the specified id
    """
    e = Experiment(get_DB_location())
    e.exp_id = exp_id
    return e
示例#19
0
def new_data_set(name,
                 exp_id: Optional[int] = None,
                 specs: SPECS = None,
                 values=None,
                 metadata=None,
                 conn=None) -> DataSet:
    """ Create a new dataset.
    If exp_id is not specified the last experiment will be loaded by default.

    Args:
        name: the name of the new dataset
        exp_id:  the id of the experiments this dataset belongs to
            defaults to the last experiment
        specs: list of parameters to create this data_set with
        values: the values to associate with the parameters
        metadata:  the values to associate with the dataset
    """
    path_to_db = get_DB_location()
    if conn is None:
        tempcon = True
        conn = connect(get_DB_location())
    else:
        tempcon = False

    if exp_id is None:
        if len(get_experiments(conn)) > 0:
            exp_id = get_last_experiment(conn)
        else:
            raise ValueError("No experiments found."
                             "You can start a new one with:"
                             " new_experiment(name, sample_name)")
    # This is admittedly a bit weird. We create a dataset, link it to some
    # run in the DB and then (using _new) change what it's linked to
    if tempcon:
        conn.close()
        conn = None
    d = DataSet(path_to_db, run_id=None, conn=conn)
    d._new(name, exp_id, specs, values, metadata)

    return d
示例#20
0
    def __init__(self, path_to_db: Optional[str]=None,
                 exp_id: Optional[int]=None,
                 name: Optional[str]=None,
                 sample_name: Optional[str]=None,
                 format_string: Optional[str]="{}-{}-{}") -> None:
        """
        Create or load an experiment. If exp_id is None, a new experiment is
        created. If exp_id is not None, an experiment is loaded.

        Args:
            path_to_db: The path of the database file to create in/load from
            exp_id: The id of the experiment to load
            name: The name of the experiment to create. Ignored if exp_id is
              not None
            sample_name: The sample name for this experiment. Ignored if exp_id
              is not None
            format_string: The format string used to name result-tables.
              Ignored if exp_id is not None.
        """

        self.path_to_db = path_to_db or get_DB_location()
        self.conn = connect(self.path_to_db, get_DB_debug())

        max_id = len(get_experiments(self.conn))

        if exp_id:
            if exp_id not in range(1, max_id+1):
                raise ValueError('No such experiment in the database')
            self._exp_id = exp_id
        else:

            # it is better to catch an invalid format string earlier than later
            try:
                # the sqlite_base will try to format
                # (name, exp_id, run_counter)
                format_string.format("name", 1, 1)
            except Exception as e:
                raise ValueError("Invalid format string. Can not format "
                                "(name, exp_id, run_counter)") from e

            log.info("creating new experiment in {}".format(self.path_to_db))

            name = name or f"experiment_{max_id+1}"
            sample_name = sample_name or "some_sample"
            self._exp_id = ne(self.conn, name, sample_name, format_string)
            self.format_string = format_string
示例#21
0
def test_is_run_id_in_db(empty_temp_db):
    conn = mut.connect(get_DB_location())
    mut.new_experiment(conn, 'test_exp', 'no_sample')

    for _ in range(5):
        ds = DataSet(conn=conn, run_id=None)

    # there should now be run_ids 1, 2, 3, 4, 5 in the database
    good_ids = [1, 2, 3, 4, 5]
    try_ids = [1, 3, 9999, 23, 0, 1, 1, 3, 34]

    sorted_try_ids = np.unique(try_ids)

    expected_dict = {tid: (tid in good_ids) for tid in sorted_try_ids}

    acquired_dict = mut.is_run_id_in_database(conn, *try_ids)

    assert expected_dict == acquired_dict
示例#22
0
def load_by_id(run_id: int, conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load dataset by run id

    If no connection is provided, lookup is performed in the database file that
    is specified in the config.

    Args:
        run_id: run id of the dataset
        conn: connection to the database to load from

    Returns:
        dataset with the given run id
    """
    if run_id is None:
        raise ValueError('run_id has to be a positive integer, not None.')

    conn = conn or connect(get_DB_location())

    d = DataSet(conn=conn, run_id=run_id)
    return d
示例#23
0
def new_experiment(name: str,
                   sample_name: Optional[str],
                   format_string: str = "{}-{}-{}",
                   conn: Optional[ConnectionPlus] = None) -> Experiment:
    """
    Create a new experiment (in the database file from config)

    Args:
        name: the name of the experiment
        sample_name: the name of the current sample
        format_string: basic format string for table-name
            must contain 3 placeholders.
        conn: connection to the database. If not supplied, a new connection
          to the DB file specified in the config is made
    Returns:
        the new experiment
    """
    conn = conn or connect(get_DB_location())
    return Experiment(name=name,
                      sample_name=sample_name,
                      format_string=format_string,
                      conn=conn)
示例#24
0
    def __init__(self, path_to_db: str=None,
                 run_id: Optional[int]=None,
                 conn=None,
                 exp_id=None,
                 name: str=None,
                 specs: SPECS=None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        # TODO: handle fail here by defaulting to
        # a standard db
        self.path_to_db = path_to_db or get_DB_location()
        if conn is None:
            self.conn = connect(self.path_to_db)
        else:
            self.conn = conn

        self.run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        if run_id:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
        else:

            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")

            # Actually perform all the side effects needed for
            # the creation of a new dataset.

            name = name or "dataset"

            _, run_id, __ = create_run(self.conn, exp_id, name,
                                       generate_guid(),
                                       specs, values, metadata)

            # this is really the UUID (an ever increasing count in the db)
            self.run_id = run_id
            self._completed = False
示例#25
0
def test_get_last_experiment_no_experiments(empty_temp_db):
    conn = mut.connect(get_DB_location())
    assert None is mut.get_last_experiment(conn)
示例#26
0
    def __init__(self,
                 path_to_db: str = None,
                 run_id: Optional[int] = None,
                 conn: Optional[ConnectionPlus] = None,
                 exp_id=None,
                 name: str = None,
                 specs: Optional[SpecsOrInterDeps] = None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB; if provided and `path_to_db` is
              provided as well, then a ValueError is raised (this is to
              prevent the possibility of providing a connection to a DB
              file that is different from `path_to_db`)
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        if path_to_db is not None and conn is not None:
            raise ValueError("Both `path_to_db` and `conn` arguments have "
                             "been passed together with non-None values. "
                             "This is not allowed.")
        self._path_to_db = path_to_db or get_DB_location()

        self.conn = make_connection_plus_from(conn) if conn is not None else \
            connect(self.path_to_db)

        self._run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        self._interdeps: InterDependencies_

        if run_id is not None:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
            run_desc = self._get_run_description_from_db()
            if run_desc._old_style_deps:
                # TODO: what if the old run had invalid interdep.s?
                old_idps: InterDependencies = cast(InterDependencies,
                                                   run_desc.interdeps)
                self._interdeps = old_to_new(old_idps)
            else:
                new_idps: InterDependencies_ = cast(InterDependencies_,
                                                    run_desc.interdeps)
                self._interdeps = new_idps
            self._metadata = get_metadata_from_run_id(self.conn, run_id)
            self._started = self.run_timestamp_raw is not None

        else:
            # Actually perform all the side effects needed for the creation
            # of a new dataset. Note that a dataset is created (in the DB)
            # with no parameters; they are written to disk when the dataset
            # is marked as started
            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")
            name = name or "dataset"
            _, run_id, __ = create_run(self.conn,
                                       exp_id,
                                       name,
                                       generate_guid(),
                                       parameters=None,
                                       values=values,
                                       metadata=metadata)
            # this is really the UUID (an ever increasing count in the db)
            self._run_id = run_id
            self._completed = False
            self._started = False
            if isinstance(specs, InterDependencies_):
                self._interdeps = specs
            elif specs is not None:
                self._interdeps = old_to_new(InterDependencies(*specs))
            else:
                self._interdeps = InterDependencies_()
            self._metadata = get_metadata_from_run_id(self.conn, self.run_id)