def build_country_src_data(src_data, alpha2_iso_code, src_cat): cat, created = Category.objects.get_or_create(name=src_cat) for article in src_data: src = check_for_source(article["source"]["name"]) language_alpha2 = api_country_codes.get(alpha2_iso_code).get( 'language') language = get_or_create_language(language_alpha2) country = get_or_create_country(alpha2_iso_code) if src is None: if country and language: new_src = Source.objects.create(name=article["source"]["name"], publishing_country=country) new_src.languages.add(language) new_src.categories.add(cat) elif type(src) is Source: src.categories.add(cat) src.languages.add(language) if src.publishing_country is not country: src.readership_countries.add(country) if language not in src.languages.all(): src.languages.add(language) else: log.error( f'{type(src)} passed when expecting {type(Source)} or type{None} ' ) return
def get_geo_data(self) -> bool: try: if "ON_HEROKU" in os.environ: geo_data_url = staticfiles_storage.url("js/geo_data.json") response = requests.get(geo_data_url) self.json_data = response.json() return True else: with open("mtn_web/static/js/geo_data.json") as geo_data_json: self.json_data = json.load(geo_data_json) if self.json_data is None: raise FileNotFoundError return True return False except (FileNotFoundError, TypeError, IOError, MissingSchema): try: self.req_data = requests.get(os.getenv("GEO_DATA_URL")) if self.req_data.status_code == 200: self.json_data = self.req_data.json() return True else: return False except requests.exceptions.RequestException as e: log.error(f"Error fetching mapping json: {e}") return False
def choro_to_file(choro_html: str, filename: str) -> bool: try: with open(CHORO_MAP_ROOT + filename, "w") as file: file.write(choro_html) return True except FileNotFoundError as e: log.error(f"Error writing choropleth HTML to file: {e}") return False
def check_for_source(src_name: str) -> Source or None: if src_name: try: return Source.objects.get(name=src_name) except (AttributeError, Source.DoesNotExist) as err: log.error(f"{err} in check_for_source({src_name})") return None else: return None
def req_top_src_data(): response = requests.get(f"https://newsapi.org/v2/sources?apiKey={api_key}") try: if response.status_code == HTTPStatus.OK: return response.json()["sources"] else: response.raise_for_status() # TODO msg/log return None except requests.exceptions.HTTPError as err: log.error(f"{err} in req_top_src_data()") return None
def verify_source(source_name: str) -> str or False: if source_name: try: source = Source.objects.get(name=source_name) except (AttributeError, Source.DoesNotExist) as e: log.error( f"{e} propagating from constructor.verify_source({source_name})" ) source = False else: log.error(f"{source_name} retrieval failed.") source = False return source
def req_country_src_data(alpha2_iso_code, src_cat=None): if src_cat is None: endpoint = f"https://newsapi.org/v2/top-headlines?country={alpha2_iso_code}&apiKey={api_key}" else: endpoint = f"https://newsapi.org/v2/top-headlines?country={alpha2_iso_code}&category={src_cat}&apiKey={api_key}" try: response = requests.get(endpoint) if response.status_code == HTTPStatus.OK: return response.json()["articles"] else: response.raise_for_status() # TODO msg/log return None except requests.exceptions.HTTPError as err: log.error( f"{err} in req_country_src_data({alpha2_iso_code}, {src_cat})") return None
def build_choropleth(self, argument, focus, geo_data_manager: GeoDataManager) -> (folium.Map, str) or None: try: world_df = gp.read_file("mtn_web/static/js/geo_data.json") global_map = folium.Map(location=[0, 0], tiles="OpenStreetMap", zoom_start=3) articles_per_country = pd.Series(geo_data_manager.result_dict) world_df["article_count"] = world_df["id"].map(articles_per_country) world_df.head() world_df.plot(column="article_count") threshold_scale = self.get_threshold(articles_per_country) folium.Choropleth( geo_data=geo_data_manager.json_data, name="choropleth", data=world_df, columns=["id", "article_count"], key_on="feature.id", fill_color="Dark2_r", bins=[float(x) for x in threshold_scale], # https://github.com/python-visualization/folium/issues/1130 fill_opacity=0.8, line_opacity=0.2, nan_fill_color="#1a2b29", nan_fill_opacity=0.7, highlight=True, ).add_to(global_map) # ----------------------------------------------------------# # Alternative fill_color options # # ----------------------------------------------------------# # YlGrBu - RdYlGn - YlOrBr - RdYlBu - PuBuGn - YlOrRd # # Oranges - Greens -Purples - Reds - Greys - Blues # # Pastel1 - Pastel2 - Spectral - Set1 - Set2 - Set3 - Dark2 # # ----------------------------------------------------------# # TODO offer fill color options for user selection folium.TileLayer("stamenwatercolor", attr="attr").add_to(global_map) folium.TileLayer("stamenterrain", attr="attr").add_to(global_map) folium.TileLayer("cartodbpositron", attr="attr").add_to(global_map) folium.TileLayer("OpenStreetMap", attr="attr").add_to(global_map) folium.LayerControl().add_to(global_map) filename = f"{datetime.ctime(datetime.now()).replace(' ', '_').replace(':', '-')}_{focus}_query_{argument}_choropleth_map.html" global_map.save(CHORO_MAP_ROOT + filename) # TODO offer user download of map html file return global_map, filename if global_map and filename else None except FileNotFoundError as e: log.error(f"Error fetching mapping json: {e}") return None
def get_threshold(articles_per_country: [dict]) -> [int]: if articles_per_country.values.max() <= 5: threshold_scale = [0, 1, 2, 3, 4, 5] elif 5 < articles_per_country.values.max() <= 16: # threshold_scale = np.linspace( # articles_per_country.values.min(), # articles_per_country.values.max() + 1, # 6, # dtype=int, # ).tolist() threshold_scale = [0, 1].append(np.linspace(2, articles_per_country.values.max() + 1, 4).to_list()) elif 160 >= articles_per_country.values.max() > 16: threshold_scale = [0, 1, 3, 7, 15, articles_per_country.values.max() + 1] elif articles_per_country.values.max > 160: threshold_scale = [0, 1, 5, 13, 29, articles_per_country.values.max() + 1] else: log.error("threshold-scale not being set in choropleth by articles_per_country.max") threshold_scale = [0, 1, 2, 3, 4, 5] return threshold_scale
def execute_query(self) -> ([dict], int): response = requests.get(self.endpoint) # print(response.json()) if response.json()["status"] == "error": print(response.json()) log.error(response.json()) err_code = response.json()["status"]["code"] return False, False log.info(f"response = \n\n{response}") article_count = int(response.json()["totalResults"]) response_data = response.json()["articles"] article_data = list(response_data) # ** DO NOT UNCOMMENT CODE BELOW UNLESS YOU HAVE A PAID SUBSCRIPTION FOR NEWSAPI # free version is limited to first 100 results (cannot use multiple requests to page results), # you will burn up your api calls fast fast fast if using the below w/ free api # BELOW IS FOR PAGING THROUGH MORE THAN 100 RESULTS** """ if article_count > 100: pages = article_count//100 if pages > 5: pages = 5 for p in range(2, pages+2): # 1st page processed already, +2 to account for exclusive range and the remaining page of <100 articles left after floor division try: page = requests.get(f'{self.endpoint}&page={p}') print(f'len(page.json()[articles])={len(page.json()["articles"])}') article_data.extend(page.json()['articles']) except requests.exceptions.RequestException as rE: logger.log(level=logging.INFO, msg=f'RequestException while getting article_data @ page # {p}') logger.log(level=logging.ERROR, msg=logger.exception(rE)) continue except builtins.KeyError as kE: logger.log(level=logging.INFO, msg=f'KeyErrorException while getting article_data on {p}') logger.log(level=logging.ERROR, msg=logger.exception(kE)) continue """ return article_data, article_count