def _get_pifquery(self, composition):
     pif_query = PifQuery(system=SystemQuery(
         chemical_formula=ChemicalFieldQuery(filter=ChemicalFilter(
             equal=composition))))
     # Check if any results found
     if 'hits' not in self.client.search(pif_query).as_dictionary():
         raise KeyError('No results found!')
     pifquery = self.client.search(pif_query).as_dictionary()['hits']
     return pifquery
示例#2
0
 def _get_pifquery(self, composition):
     # TODO: does this stop csv generation on first invalid composition?
     # TODO: Is there a way to send many compositions in one call to citrine?
     pif_query = PifQuery(system=SystemQuery(
         chemical_formula=ChemicalFieldQuery(filter=ChemicalFilter(
             equal=composition))))
     # Check if any results found
     if 'hits' not in self.client.search(pif_query).as_dictionary():
         raise KeyError('No results found!')
     pifquery = self.client.search(pif_query).as_dictionary()['hits']
     return pifquery
示例#3
0
    def get_data(self,
                 formula=None,
                 prop=None,
                 data_type=None,
                 reference=None,
                 min_measurement=None,
                 max_measurement=None,
                 from_record=None,
                 data_set_id=None,
                 max_results=None):
        """
        Gets raw api data from Citrine in json format. See api_link for more
        information on input parameters

        Args:
            formula: (str) filter for the chemical formula field; only those
                results that have chemical formulas that contain this string
                will be returned
            prop: (str) name of the property to search for
            data_type: (str) 'EXPERIMENTAL'/'COMPUTATIONAL'/'MACHINE_LEARNING';
                filter for properties obtained from experimental work,
                computational methods, or machine learning.
            reference: (str) filter for the reference field; only those
                results that have contributors that contain this string
                will be returned
            min_measurement: (str/num) minimum of the property value range
            max_measurement: (str/num) maximum of the property value range
            from_record: (int) index of first record to return (indexed from 0)
            data_set_id: (int) id of the particular data set to search on
            max_results: (int) number of records to limit the results to

        Returns: (list) of jsons/pifs returned by Citrine's API
        """

        json_data = []
        start = from_record if from_record else 0
        per_page = 100
        refresh_time = 3  # seconds to wait between search calls

        # Construct all of the relevant queries from input args
        formula_query = ChemicalFieldQuery(filter=ChemicalFilter(
            equal=formula))
        prop_query = PropertyQuery(
            name=FieldQuery(filter=Filter(equal=prop)),
            value=FieldQuery(
                filter=Filter(min=min_measurement, max=max_measurement)),
            data_type=FieldQuery(filter=Filter(equal=data_type)))
        ref_query = ReferenceQuery(doi=FieldQuery(filter=Filter(
            equal=reference)))

        system_query = PifSystemQuery(chemical_formula=formula_query,
                                      properties=prop_query,
                                      references=ref_query)
        dataset_query = DatasetQuery(id=Filter(equal=data_set_id))
        data_query = DataQuery(system=system_query, dataset=dataset_query)

        while True:
            # use per_page=max_results, eg: in case of max_results=68 < 100
            if max_results and max_results < per_page:
                pif_query = PifSystemReturningQuery(query=data_query,
                                                    from_index=start,
                                                    size=max_results)
            else:
                pif_query = PifSystemReturningQuery(query=data_query,
                                                    from_index=start,
                                                    size=per_page)

            # Check if any results found
            if "hits" not in self.client.search.pif_search(
                    pif_query).as_dictionary():
                raise KeyError("No results found!")

            data = self.client.search.pif_search(
                pif_query).as_dictionary()["hits"]
            size = len(data)
            start += size
            json_data.extend(data)

            # check if limit is reached
            if max_results and len(json_data) > max_results:
                # get first multiple of 100 records
                json_data = json_data[:max_results]
                break
            if size < per_page:  # break out of last loop of results
                break
            time.sleep(refresh_time)
        return json_data