Exemplo n.º 1
0
def test_json_store_load(jsonstore, test_dir):
    jsonstore.connect()
    assert len(list(jsonstore.query())) == 20

    jsonstore = JSONStore(test_dir / "test_set" / "c.json.gz")
    jsonstore.connect()
    assert len(list(jsonstore.query())) == 20
Exemplo n.º 2
0
class MPWorksCompatibilityBuilderTest(unittest.TestCase):
    def setUp(self):
        # Set up test db, set up mpsft, etc.
        self.test_tasks = JSONStore([test_tasks])
        self.elasticity = MemoryStore("atomate_tasks")
        self.test_tasks.connect()
        self.elasticity.connect()

    def test_builder(self):
        mpw_builder = MPWorksCompatibilityBuilder(self.test_tasks,
                                                  self.elasticity,
                                                  incremental=False)
        items = mpw_builder.get_items()
        processed = [mpw_builder.process_item(item) for item in items]
        mpw_builder.update_targets(processed)

    def test_convert_mpworks_to_atomate(self):
        doc = self.test_tasks.collection.find_one(
            {"task_type": {
                "$regex": "deformed"
            }})
        new_doc = convert_mpworks_to_atomate(doc)
        self.assertTrue('hubbards' in new_doc['input'])
        doc = self.test_tasks.collection.find_one(
            {"task_type": {
                "$regex": "(2x)"
            }})
        new_doc = convert_mpworks_to_atomate(doc)
        self.assertTrue('hubbards' in new_doc['input'])

    def test_update_mpworks_schema(self):
        doc = self.test_tasks.query(criteria={"task_id": "mp-612"})[0]
        doc = update_mpworks_schema(doc)
        atomate_doc = convert_mpworks_to_atomate(doc)
Exemplo n.º 3
0
class MPDispatcher(Dispatcher):
    """
    Dispatcher to use with Materials Project data. Reads from a JSON file.
    """
    def __init__(self, data_file_path):
        """
        Assumers that the data_file_path contains data from
        a mpquery request in JSON format.
        """
        self._dataset = JSONStore(data_file_path)
        self._objective_partitioned_data = {}
        self._objective_weights = {}
        self._objective_ids = {}
        self._dataset.connect()

    def add_objective(self, objective, weight):
        assert isinstance(objective, Objective)
        assert isinstance(weight, float)

        self._objective_weights[objective.objective_id] = weight
        self._objective_ids[objective.objective_id] = objective

        self._objective_partitioned_data[
            objective.objective_id] = self.partition_data(objective)

    def partition_data(self, objective):
        model_inputs = objective.model_input_property_names()
        model_inputs = model_inputs | set(["task_id"])
        model_outputs = objective.model_response_property_names()
        all_props = model_inputs | model_outputs

        training_criteria = {}
        candidate_criteria = {}

        for prop in model_inputs:
            training_criteria[prop] = {"$exists": 1}
            candidate_criteria[prop] = {"$exists": 1}
        for prop in model_outputs:
            training_criteria[prop] = {"$exists": 1}
            candidate_criteria[prop] = {"$exists": 0}
        training_data = self._dataset.query(criteria=training_criteria,
                                            properties=list(all_props))
        candidate_data = self._dataset.query(criteria=candidate_criteria,
                                             properties=list(model_inputs))
        return (training_data, candidate_data)

    def rank_wflows(self, n):
        """
        Written
        TODO
        Needs to be rewritten for multiple objectives
        """
        for id in self._objective_weights:
            candidates = self._objective_partitioned_data[id][1]
            training_data = self._objective_partitioned_data[id][0]
            weight = self._objective_weights[id]
            score_mapping = self.run_objective(id, training_data, candidates)
        return score_mapping[:n]

    def run_objective(self, objective_id, training_data, candidates):
        """
        Returns a list of (workflow , score) tuples
        """
        objective = self._objective_ids[objective_id]
        objective.train_model(training_data)
        print("done training")
        score_mapping = objective.return_scores(candidates)
        workflow_scores = []
        for pair in score_mapping:
            candidate_mp_id = pair[0]["task_id"]
            score = pair[1]
            for wflow in objective.wflows:
                wflow_instance = wflow(candidate_mp_id)
                workflow_scores.append((wflow_instance, score))
        return workflow_scores

    def update(self):
        pass