def retrieve_remaining_records(self): self.error(1) # If a query is running, stop it. if self.query_running: self.query_running = False return if self.nyt_api: num_steps = min(math.ceil(self.all_hits/10), 100) # Update buttons. self.retrieve_other_button.setText('Stop retrieving') self.open_set_api_key_dialog_button.setEnabled(False) self.run_query_button.setEnabled(False) # Accumulate remaining results in these lists. remaining_docs = [] num_metas = len(self.output_corpus.domain.metas) remaining_metas = np.empty((0, num_metas), dtype=object) remaining_classes = [] self.query_running = True self.progressBarInit() for i in range(int(self.num_retrieved/10), num_steps): # Stop querying if the flag is not set. if not self.query_running: break # Update the progress bar. self.progressBarSet(100.0 * (i/num_steps)) res, cached, error = self.nyt_api.execute_query(i) if res: docs, metas, meta_vars, class_values = parse_record_json(res, self.nyt_api.includes_fields) remaining_docs += docs remaining_metas = np.vstack((remaining_metas, np.array(metas))) remaining_classes += class_values # Update the info label. self.num_retrieved += len(res["response"]["docs"]) info_label = "Records: {}\nRetrieved: {}".format(self.all_hits, self.num_retrieved) if self.all_hits > 1000: info_label += " (max 1000)" self.query_info_label.setText(info_label) if not cached: # Only wait if an actual request was made. sleep(1) else: if error: if isinstance(error, HTTPError): self.error(1, "An error occurred(HTTP {})".format(error.code)) elif isinstance(error, URLError): self.error(1, "An error occurred(URL {})".format(error.reason)) break self.progressBarFinished() self.query_running = False # Update the corpus. self.output_corpus.extend_corpus(remaining_docs, remaining_metas, remaining_classes, meta_vars) self.send(Output.CORPUS, self.output_corpus) if self.num_retrieved == min(self.all_hits, 1000): self.retrieve_other_button.setText('All available records retrieved') self.retrieve_other_button.setEnabled(False) else: self.retrieve_other_button.setText('Retrieve remaining records ({})' .format(min(self.all_hits, 1000)-self.num_retrieved)) self.retrieve_other_button.setFocus() self.open_set_api_key_dialog_button.setEnabled(True) self.run_query_button.setEnabled(True)
def retrieve_remaining_records(self): self.error(1) # If a query is running, stop it. if self.query_running: self.query_running = False return if self.nyt_api: num_steps = min(math.ceil(self.all_hits / 10), 100) # Update buttons. self.retrieve_other_button.setText('Stop retrieving') self.open_set_api_key_dialog_button.setEnabled(False) self.run_query_button.setEnabled(False) # Accumulate remaining results in these lists. remaining_docs = [] num_metas = len(self.output_corpus.domain.metas) remaining_metas = np.empty((0, num_metas), dtype=object) remaining_classes = [] self.query_running = True self.progressBarInit() for i in range(int(self.num_retrieved / 10), num_steps): # Stop querying if the flag is not set. if not self.query_running: break # Update the progress bar. self.progressBarSet(100.0 * (i / num_steps)) res, cached, error = self.nyt_api.execute_query(i) if res: docs, metas, meta_vars, class_values = parse_record_json( res, self.nyt_api.includes_fields) remaining_docs += docs remaining_metas = np.vstack( (remaining_metas, np.array(metas))) remaining_classes += class_values # Update the info label. self.num_retrieved += len(res["response"]["docs"]) info_label = "Records: {}\nRetrieved: {}".format( self.all_hits, self.num_retrieved) if self.all_hits > 1000: info_label += " (max 1000)" self.query_info_label.setText(info_label) if not cached: # Only wait if an actual request was made. sleep(1) else: if error: if isinstance(error, HTTPError): self.error( 1, "An error occurred(HTTP {})".format( error.code)) elif isinstance(error, URLError): self.error( 1, "An error occurred(URL {})".format( error.reason)) break self.progressBarFinished() self.query_running = False # Update the corpus. self.output_corpus.extend_corpus(remaining_docs, remaining_metas, remaining_classes, meta_vars) self.send(Output.CORPUS, self.output_corpus) if self.num_retrieved == min(self.all_hits, 1000): self.retrieve_other_button.setText( 'All available records retrieved') self.retrieve_other_button.setEnabled(False) else: self.retrieve_other_button.setText( 'Retrieve remaining records ({})'.format( min(self.all_hits, 1000) - self.num_retrieved)) self.retrieve_other_button.setFocus() self.open_set_api_key_dialog_button.setEnabled(True) self.run_query_button.setEnabled(True)
def run_initial_query(self): self.warning(1) self.error(1) # Only execute if the NYT object is present(safety lock). # Otherwise this method cannot be called anyway. if self.nyt_api: # Query keywords. qkw = self.query_combo.currentText() # Text fields. text_includes_params = [self.includes_headline, self.includes_lead_paragraph, self.includes_snippet, self.includes_abstract, self.includes_keywords] if True not in text_includes_params: self.warning(1, "You must select at least one text field.") return # Set the query url. self.nyt_api.set_query_url(qkw, self.year_from, self.year_to, text_includes_params) # Execute the query. res, cached, error = self.nyt_api.execute_query(0) if res: # Construct a corpus for the output. documents, metas, meta_vars, class_values = parse_record_json(res, text_includes_params) class_vars = [DiscreteVariable("section_name", values=list(set(class_values)))] Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None] Y[np.isnan(Y)] = 0 domain = Domain([], class_vars=class_vars, metas=meta_vars) self.output_corpus = Corpus(documents, None, Y, metas, domain) self.send(Output.CORPUS, self.output_corpus) # Update the response info. self.all_hits = res["response"]["meta"]["hits"] self.num_retrieved = len(res["response"]["docs"]) info_label = "Records: {}\nRetrieved: {}".format(self.all_hits, self.num_retrieved) if self.all_hits > 1000: info_label += " (max 1000)" self.query_info_label.setText(info_label) # Enable 'retrieve remaining' button. if self.num_retrieved < min(self.all_hits, 1000): self.retrieve_other_button.setText('Retrieve remaining records ({})' .format(min(self.all_hits, 1000)-self.num_retrieved)) self.retrieve_other_button.setEnabled(True) self.retrieve_other_button.setFocus() else: self.retrieve_other_button.setText('All records retrieved') self.retrieve_other_button.setEnabled(False) # Add the query to history. if qkw not in self.recent_queries: self.recent_queries.insert(0, qkw) else: if error: if isinstance(error, HTTPError): self.error(1, "An error occurred(HTTP {})".format(error.code)) elif isinstance(error, URLError): self.error(1, "An error occurred(URL {})".format(error.reason))
def run_initial_query(self): self.warning(1) self.error(1) # Only execute if the NYT object is present(safety lock). # Otherwise this method cannot be called anyway. if self.nyt_api: # Query keywords. qkw = self.query_combo.currentText() # Text fields. text_includes_params = [ self.includes_headline, self.includes_lead_paragraph, self.includes_snippet, self.includes_abstract, self.includes_keywords ] if True not in text_includes_params: self.warning(1, "You must select at least one text field.") return # Set the query url. self.nyt_api.set_query_url(qkw, self.year_from, self.year_to, text_includes_params) # Execute the query. res, cached, error = self.nyt_api.execute_query(0) if res: # Construct a corpus for the output. documents, metas, meta_vars, class_values = parse_record_json( res, text_includes_params) class_vars = [ DiscreteVariable("section_name", values=list(set(class_values))) ] Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None] Y[np.isnan(Y)] = 0 domain = Domain([], class_vars=class_vars, metas=meta_vars) self.output_corpus = Corpus(documents, None, Y, metas, domain) self.send(Output.CORPUS, self.output_corpus) # Update the response info. self.all_hits = res["response"]["meta"]["hits"] self.num_retrieved = len(res["response"]["docs"]) info_label = "Records: {}\nRetrieved: {}".format( self.all_hits, self.num_retrieved) if self.all_hits > 1000: info_label += " (max 1000)" self.query_info_label.setText(info_label) # Enable 'retrieve remaining' button. if self.num_retrieved < min(self.all_hits, 1000): self.retrieve_other_button.setText( 'Retrieve remaining records ({})'.format( min(self.all_hits, 1000) - self.num_retrieved)) self.retrieve_other_button.setEnabled(True) self.retrieve_other_button.setFocus() else: self.retrieve_other_button.setText('All records retrieved') self.retrieve_other_button.setEnabled(False) # Add the query to history. if qkw not in self.recent_queries: self.recent_queries.insert(0, qkw) else: if error: if isinstance(error, HTTPError): self.error( 1, "An error occurred(HTTP {})".format(error.code)) elif isinstance(error, URLError): self.error( 1, "An error occurred(URL {})".format(error.reason))