Пример #1
0
def query_results(n_query, mol):
    for i in range(n_query):
        # mol = Molecule.objects.first()  # --> the overhead in this query
        # option = Keywords.objects(program='Psi4').first()  # or [0], throws ex
        query = {
            "molecule": mol,
            "method": str(i),
            "basis": "B1",
            "keywords": None,
            "program": "P1",
            "driver": "energy",
        }
        Result.objects(**query)  # second DB access
Пример #2
0
    def _clear_db(self, db_name: str):
        """Dangerous, make sure you are deleting the right DB"""

        # make sure it's the right DB
        if get_db().name == db_name:
            logging.info('Clearing database: {}'.format(db_name))
            Result.drop_collection()
            Molecule.drop_collection()
            Options.drop_collection()
            Collection.drop_collection()
            TaskQueue.drop_collection()
            Procedure.drop_collection()
            User.drop_collection()

            self.client.drop_database(db_name)
Пример #3
0
def test_add_task_queue():
    """
        Simple test of adding a task using the ME classes
        in QCFractal, tasks should be added using storage_socket
    """

    assert TaskQueue.objects.count() == 0
    TaskQueue.objects().delete()

    # add a task that reference results
    result = Result.objects().first()

    task = TaskQueue(base_result=result)
    task.save()
    assert TaskQueue.objects().count() == 1

    # add a task that reference Optimization Procedure
    opt = OptimizationProcedure.objects().first()

    task = TaskQueue(base_result=opt)
    task.save()
    assert TaskQueue.objects().count() == 2

    # add a task that reference Torsiondrive Procedure
    tor = TorsiondriveProcedure.objects().first()

    task = TaskQueue(base_result=tor)
    task.save()
    assert TaskQueue.objects().count() == 3
Пример #4
0
    def add_results(self, data: List[dict], update_existing: bool=False, return_json=True):
        """
        Add results from a given dict. The dict should have all the required
        keys of a result.

        Parameters
        ----------
        data : list of dict
            Each dict must have:
            program, driver, method, basis, options, molecule
            Where molecule is the molecule id in the DB
            In addition, it should have the other attributes that it needs
            to store
        update_existing : bool (default False)
            Update existing results

        Returns
        -------
            Dict with keys: data, meta
            Data is the ids of the inserted/updated/existing docs
        """

        for d in data:
            for i in self._lower_results_index:
                if d[i] is None:
                    continue

                d[i] = d[i].lower()

        meta = storage_utils.add_metadata()

        results = []
        # try:
        for d in data:
            # search by index keywords not by all keys, much faster
            doc = Result.objects(program=d['program'], name=d['driver'],
                                 method=d['method'], basis=d['basis'],
                                 options=d['options'], molecule=d['molecule'])

            if doc.count() == 0 or update_existing:
                if not isinstance(d['molecule'], ObjectId):
                    d['molecule'] = ObjectId(d['molecule'])
                doc = doc.upsert_one(**d)
                results.append(str(doc.id))
                meta['n_inserted'] += 1
            else:
                meta['duplicates'].append(self._doc_to_tuples(doc.first(), with_ids=False))  # TODO
                # If new or duplicate, add the id to the return list
                results.append(str(doc.first().id))
        meta["success"] = True
        # except (mongoengine.errors.ValidationError, KeyError) as err:
        #     meta["validation_errors"].append(err)
        # except Exception as err:
        #     meta['error_description'] = err

        ret = {"data": results, "meta": meta}
        return ret
Пример #5
0
    def get_results_by_ids(self, ids: List[str]=None, projection=None, return_json=True,
                           with_ids=True):
        """
        Get list of Results using the given list of Ids

        Parameters
        ----------
        ids : List of str
            Ids of the results in the DB
        projection : list/set/tuple of keys, default is None
            The fields to return, default to return all
        return_json : bool, default is True
            Return the results as a list of json inseated of objects
        with_ids: bool, default is True
            Include the ids in the returned objects/dicts

        Returns
        -------
        Dict with keys: data, meta
            Data is the objects found
        """

        meta = storage_utils.get_metadata()

        data = []
        # try:
        if projection:
            data = Result.objects(id__in=ids).only(*projection).limit(self._max_limit)
        else:
            data = Result.objects(id__in=ids).limit(self._max_limit)

        meta["n_found"] = data.count()
        meta["success"] = True
        # except Exception as err:
        #     meta['error_description'] = str(err)

        if return_json:
            rdata = [self._doc_to_json(d, with_ids) for d in data]
        else:
            rdata = data

        return {"data": rdata, "meta": meta}
Пример #6
0
def duplicate_results(n_results, mol):
    """Half the documents are duplicates"""

    tosave_results = []
    for i in range(n_results):
        # mol = Molecule.objects.first()  # one DB access
        # option = Keywords.objects().first()
        data = {
            "molecule": mol,
            "method": str(i + int(n_results / 2)),
            "basis": "Bulk",
            "keywords": None,
            "program": "P1",
            "driver": "energy",
            "other_data": 5,
        }
        found = Result.objects(**data).first()
        if not found:
            tosave_results.append(Result(**data))

    Result.objects.insert(tosave_results)
    print("Duplciates: ", len(tosave_results))
Пример #7
0
def test_results(storage_socket):
    """
        Handling results throught the ME classes
    """

    assert Result.objects().count() == 0
    assert Options.objects().count() == 0

    molecules = Molecule.objects(molecular_formula='H4O2')

    assert molecules.count() == 2

    page1 = {
        "molecule": molecules[0],
        "method": "M1",
        "basis": "B1",
        "options": None,
        "program": "P1",
        "driver": "energy",
        "other_data": 5,
    }

    page2 = {
        "molecule": molecules[1],
        "method": "M1",
        "basis": "B1",
        "options": None,
        "program": "P1",
        "driver": "energy",
        "other_data": 10,
    }

    result = Result(**page1)
    result.save()
    # print('Result After save: ', result.to_json())
    assert result.molecule.molecular_formula == 'H4O2'
Пример #8
0
def bulk_insert_results(n_results, mol):

    results = []
    for i in range(n_results):
        # mol = Molecule.objects.first()  # one DB access
        # option = Keywords.objects().first()
        data = {
            "molecule": mol,
            "method": str(i),
            "basis": "Bulk",
            "keywords": None,
            "program": "P1",
            "driver": "energy",
            "other_data": 5,
        }
        results.append(Result(**data))
    return Result.objects.insert(results)
Пример #9
0
def insert_results(n_results, mol):

    # repeat searching for the molecule
    for i in range(n_results):
        # mol = Molecule.objects.first()  # one DB access
        # option = Keywords.objects().first()
        data = {
            "molecule": mol.id,
            "method": str(i),
            "basis": "B1",
            "keywords": None,
            "program": "P1",
            "driver": "energy",
            "other_data": 5,
        }
        results = Result(**data).save()  # second DB access
    return results
Пример #10
0
    def del_results(self, ids: List[str]):
        """
        Removes results from the database using their ids
        (Should be cautious! other tables maybe referencing results)

        Parameters
        ----------
        ids : list of str
            The Ids of the results to be deleted

        Returns
        -------
        int
            number of results deleted
        """

        obj_ids = [ObjectId(x) for x in ids]

        return Result.objects(id__in=obj_ids).delete()
Пример #11
0
    def get_results(self,
                    program: str=None,
                    method: str=None,
                    basis: str=None,
                    molecule: str=None,
                    driver: str=None,
                    options: str=None,
                    status: str='COMPLETE',
                    projection=None,
                    limit: int=None,
                    skip: int=None,
                    return_json=True,
                    with_ids=True):
        """

        Parameters
        ----------
        program : str
        method : str
        basis : str
        molecule : str
            Molecule id in the DB
        driver : str
        options : str
            The id of the option in the DB
        status : bool, default is 'COMPLETE'
            The status of the result: 'COMPLETE', 'INCOMPLETE', or 'ERROR'
        projection : list/set/tuple of keys, default is None
            The fields to return, default to return all
        limit : int, default is None
            maximum number of results to return
            if 'limit' is greater than the global setting self._max_limit,
            the self._max_limit will be returned instead
            (This is to avoid overloading the server)
        skip : int, default is None TODO
            skip the first 'skip' resaults. Used to paginate
        return_json : bool, deafult is True
            Return the results as a list of json inseated of objects
        with_ids : bool, default is True
            Include the ids in the returned objects/dicts

        Returns
        -------
        Dict with keys: data, meta
            Data is the objects found
        """

        meta = storage_utils.get_metadata()
        query = {}
        parsed_query = {}
        if program:
            query['program'] = program
        if method:
            query['method'] = method
        if basis:
            query['basis'] = basis
        if molecule:
            query['molecule'], _ = _str_to_indices_with_errors(molecule)
        if driver:
            query['driver'] = driver
        if options:
            query['options'] = options
        if status:
            query['status'] = status

        for key, value in query.items():
            if key == "molecule":
                parsed_query[key + "__in"] = query[key]
            elif key == "status":
                parsed_query[key] = value
            elif isinstance(value, (list, tuple)):
                parsed_query[key + "__in"] = [v.lower() for v in value]
            else:
                parsed_query[key] = value.lower()

        q_limit = limit if limit and limit < self._max_limit else self._max_limit

        data = []
        try:
            if projection:
                data = Result.objects(**parsed_query).only(*projection).limit(q_limit)
            else:
                data = Result.objects(**parsed_query).limit(q_limit)

            meta["n_found"] = data.count()
            meta["success"] = True
        except Exception as err:
            meta['error_description'] = str(err)

        if return_json:
            rdata = []
            for d in data:
                d = self._doc_to_json(d, with_ids)
                if "molecule" in d:
                    d["molecule"] = d["molecule"]["$oid"]
                rdata.append(d)

        else:
            rdata = data

        return {"data": rdata, "meta": meta}
Пример #12
0
    def queue_submit(self, data: List[Dict]):
        """Submit a list of tasks to the queue.
        Tasks are unique by their base_result, which should be inserted into
        the DB first before submitting it's corresponding task to the queue
        (with result.status='INCOMPLETE' as the default)
        The default task.status is 'WAITING'

        Duplicate tasks sould be a rare case.
        Hooks are merged if the task already exists

        Parameters
        ----------
        data : list of tasks (dict)
            A task is a dict, with the following fields:
            - hash_index: idx, not used anymore
            - spec: dynamic field (dict-like), can have any structure
            - hooks: list of any objects representing listeners (for now)
            - tag: str
            - base_results: tuple (required), first value is the class type
             of the result, {'results' or 'procedure'). The second value is
             the ID of the result in the DB. Example:
             "base_result": ('results', result_id)

        Returns
        -------
        dict (data and meta)
            'data' is a list of the IDs of the tasks IN ORDER, including
            duplicates. An errored task has 'None' in its ID
            meta['duplicates'] has the duplicate tasks
        """

        meta = storage_utils.add_metadata()

        results = []
        for d in data:
            try:
                if not isinstance(d['base_result'], tuple):
                    raise Exception("base_result must be a tuple not {}."
                                    .format(type(d['base_result'])))

                # If saved as DBRef, then use raw query to retrieve (avoid this)
                # if d['base_result'][0] in ('results', 'procedure'):
                #     base_result = DBRef(d['base_result'][0], d['base_result'][1])

                result_obj = None
                if d['base_result'][0] == 'results':
                    result_obj = Result(id=d['base_result'][1])
                elif d['base_result'][0] == 'procedure':
                    result_obj = Procedure(id=d['base_result'][1])
                else:
                    raise TypeError("Base_result type must be 'results' or 'procedure',"
                                    " {} is given.".format(d['base_result'][0]))
                task = TaskQueue(**d)
                task.base_result = result_obj
                task.save()
                results.append(str(task.id))
                meta['n_inserted'] += 1
            except mongoengine.errors.NotUniqueError as err:  # rare case
                # If results is stored as DBRef, get it with:
                # task = TaskQueue.objects(__raw__={'base_result': base_result}).first()  # avoid

                # If base_result is stored as a Result or Procedure class, get it with:
                task = TaskQueue.objects(base_result=result_obj).first()
                self.logger.warning('queue_submit got a duplicate task: ', task.to_mongo())
                if d['hooks']:  # merge hooks
                    task.hooks.extend(d['hooks'])
                    task.save()
                results.append(str(task.id))
                meta['duplicates'].append(self._doc_to_tuples(task, with_ids=False))  # TODO
            except Exception as err:
                meta["success"] = False
                meta["errors"].append(str(err))
                results.append(None)

        meta["success"] = True

        ret = {"data": results, "meta": meta}
        return ret