def _get_pifquery(self, composition): pif_query = PifQuery(system=SystemQuery( chemical_formula=ChemicalFieldQuery(filter=ChemicalFilter( equal=composition)))) # Check if any results found if 'hits' not in self.client.search(pif_query).as_dictionary(): raise KeyError('No results found!') pifquery = self.client.search(pif_query).as_dictionary()['hits'] return pifquery
def _get_pifquery(self, composition): # TODO: does this stop csv generation on first invalid composition? # TODO: Is there a way to send many compositions in one call to citrine? pif_query = PifQuery(system=SystemQuery( chemical_formula=ChemicalFieldQuery(filter=ChemicalFilter( equal=composition)))) # Check if any results found if 'hits' not in self.client.search(pif_query).as_dictionary(): raise KeyError('No results found!') pifquery = self.client.search(pif_query).as_dictionary()['hits'] return pifquery
def get_data(self, formula=None, prop=None, data_type=None, reference=None, min_measurement=None, max_measurement=None, from_record=None, data_set_id=None, max_results=None): """ Gets raw api data from Citrine in json format. See api_link for more information on input parameters Args: formula: (str) filter for the chemical formula field; only those results that have chemical formulas that contain this string will be returned prop: (str) name of the property to search for data_type: (str) 'EXPERIMENTAL'/'COMPUTATIONAL'/'MACHINE_LEARNING'; filter for properties obtained from experimental work, computational methods, or machine learning. reference: (str) filter for the reference field; only those results that have contributors that contain this string will be returned min_measurement: (str/num) minimum of the property value range max_measurement: (str/num) maximum of the property value range from_record: (int) index of first record to return (indexed from 0) data_set_id: (int) id of the particular data set to search on max_results: (int) number of records to limit the results to Returns: (list) of jsons/pifs returned by Citrine's API """ json_data = [] start = from_record if from_record else 0 per_page = 100 refresh_time = 3 # seconds to wait between search calls # Construct all of the relevant queries from input args formula_query = ChemicalFieldQuery(filter=ChemicalFilter( equal=formula)) prop_query = PropertyQuery( name=FieldQuery(filter=Filter(equal=prop)), value=FieldQuery( filter=Filter(min=min_measurement, max=max_measurement)), data_type=FieldQuery(filter=Filter(equal=data_type))) ref_query = ReferenceQuery(doi=FieldQuery(filter=Filter( equal=reference))) system_query = PifSystemQuery(chemical_formula=formula_query, properties=prop_query, references=ref_query) dataset_query = DatasetQuery(id=Filter(equal=data_set_id)) data_query = DataQuery(system=system_query, dataset=dataset_query) while True: # use per_page=max_results, eg: in case of max_results=68 < 100 if max_results and max_results < per_page: pif_query = PifSystemReturningQuery(query=data_query, from_index=start, size=max_results) else: pif_query = PifSystemReturningQuery(query=data_query, from_index=start, size=per_page) # Check if any results found if "hits" not in self.client.search.pif_search( pif_query).as_dictionary(): raise KeyError("No results found!") data = self.client.search.pif_search( pif_query).as_dictionary()["hits"] size = len(data) start += size json_data.extend(data) # check if limit is reached if max_results and len(json_data) > max_results: # get first multiple of 100 records json_data = json_data[:max_results] break if size < per_page: # break out of last loop of results break time.sleep(refresh_time) return json_data