示例#1
0
def build_country_src_data(src_data, alpha2_iso_code, src_cat):
    cat, created = Category.objects.get_or_create(name=src_cat)
    for article in src_data:
        src = check_for_source(article["source"]["name"])
        language_alpha2 = api_country_codes.get(alpha2_iso_code).get(
            'language')
        language = get_or_create_language(language_alpha2)
        country = get_or_create_country(alpha2_iso_code)
        if src is None:
            if country and language:
                new_src = Source.objects.create(name=article["source"]["name"],
                                                publishing_country=country)
                new_src.languages.add(language)
                new_src.categories.add(cat)
        elif type(src) is Source:
            src.categories.add(cat)
            src.languages.add(language)
            if src.publishing_country is not country:
                src.readership_countries.add(country)
                if language not in src.languages.all():
                    src.languages.add(language)
        else:
            log.error(
                f'{type(src)} passed when expecting {type(Source)} or type{None} '
            )
    return
示例#2
0
 def get_geo_data(self) -> bool:
     try:
         if "ON_HEROKU" in os.environ:
             geo_data_url = staticfiles_storage.url("js/geo_data.json")
             response = requests.get(geo_data_url)
             self.json_data = response.json()
             return True
         else:
             with open("mtn_web/static/js/geo_data.json") as geo_data_json:
                 self.json_data = json.load(geo_data_json)
                 if self.json_data is None:
                     raise FileNotFoundError
             return True
         return False
     except (FileNotFoundError, TypeError, IOError, MissingSchema):
         try:
             self.req_data = requests.get(os.getenv("GEO_DATA_URL"))
             if self.req_data.status_code == 200:
                 self.json_data = self.req_data.json()
                 return True
             else:
                 return False
         except requests.exceptions.RequestException as e:
             log.error(f"Error fetching mapping json: {e}")
             return False
示例#3
0
 def choro_to_file(choro_html: str, filename: str) -> bool:
     try:
         with open(CHORO_MAP_ROOT + filename, "w") as file:
             file.write(choro_html)
             return True
     except FileNotFoundError as e:
         log.error(f"Error writing choropleth HTML to file: {e}")
         return False
示例#4
0
def check_for_source(src_name: str) -> Source or None:
    if src_name:
        try:
            return Source.objects.get(name=src_name)
        except (AttributeError, Source.DoesNotExist) as err:
            log.error(f"{err} in check_for_source({src_name})")
            return None
    else:
        return None
示例#5
0
def req_top_src_data():
    response = requests.get(f"https://newsapi.org/v2/sources?apiKey={api_key}")
    try:
        if response.status_code == HTTPStatus.OK:
            return response.json()["sources"]
        else:
            response.raise_for_status()
            # TODO msg/log
            return None
    except requests.exceptions.HTTPError as err:
        log.error(f"{err} in req_top_src_data()")
        return None
示例#6
0
 def verify_source(source_name: str) -> str or False:
     if source_name:
         try:
             source = Source.objects.get(name=source_name)
         except (AttributeError, Source.DoesNotExist) as e:
             log.error(
                 f"{e} propagating from constructor.verify_source({source_name})"
             )
             source = False
     else:
         log.error(f"{source_name} retrieval failed.")
         source = False
     return source
示例#7
0
def req_country_src_data(alpha2_iso_code, src_cat=None):
    if src_cat is None:
        endpoint = f"https://newsapi.org/v2/top-headlines?country={alpha2_iso_code}&apiKey={api_key}"
    else:
        endpoint = f"https://newsapi.org/v2/top-headlines?country={alpha2_iso_code}&category={src_cat}&apiKey={api_key}"
    try:
        response = requests.get(endpoint)
        if response.status_code == HTTPStatus.OK:
            return response.json()["articles"]
        else:
            response.raise_for_status()
            # TODO msg/log
            return None
    except requests.exceptions.HTTPError as err:
        log.error(
            f"{err} in req_country_src_data({alpha2_iso_code}, {src_cat})")
        return None
示例#8
0
    def build_choropleth(self, argument, focus, geo_data_manager: GeoDataManager) -> (folium.Map, str) or None:

        try:
            world_df = gp.read_file("mtn_web/static/js/geo_data.json")
            global_map = folium.Map(location=[0, 0], tiles="OpenStreetMap", zoom_start=3)
            articles_per_country = pd.Series(geo_data_manager.result_dict)
            world_df["article_count"] = world_df["id"].map(articles_per_country)
            world_df.head()
            world_df.plot(column="article_count")
            threshold_scale = self.get_threshold(articles_per_country)
            folium.Choropleth(
                geo_data=geo_data_manager.json_data,
                name="choropleth",
                data=world_df,
                columns=["id", "article_count"],
                key_on="feature.id",
                fill_color="Dark2_r",
                bins=[float(x) for x in threshold_scale],  # https://github.com/python-visualization/folium/issues/1130
                fill_opacity=0.8,
                line_opacity=0.2,
                nan_fill_color="#1a2b29",
                nan_fill_opacity=0.7,
                highlight=True,
            ).add_to(global_map)
            # ----------------------------------------------------------#
            #             Alternative fill_color options                #
            # ----------------------------------------------------------#
            #    YlGrBu - RdYlGn - YlOrBr - RdYlBu - PuBuGn - YlOrRd    #
            #     Oranges - Greens -Purples - Reds - Greys - Blues      #
            # Pastel1 - Pastel2 - Spectral - Set1 - Set2 - Set3 - Dark2 #
            # ----------------------------------------------------------#
            #  TODO offer fill color options for user selection
            folium.TileLayer("stamenwatercolor", attr="attr").add_to(global_map)
            folium.TileLayer("stamenterrain", attr="attr").add_to(global_map)
            folium.TileLayer("cartodbpositron", attr="attr").add_to(global_map)
            folium.TileLayer("OpenStreetMap", attr="attr").add_to(global_map)
            folium.LayerControl().add_to(global_map)
            filename = f"{datetime.ctime(datetime.now()).replace(' ', '_').replace(':', '-')}_{focus}_query_{argument}_choropleth_map.html"
            global_map.save(CHORO_MAP_ROOT + filename)  # TODO offer user download of map html file
            return global_map, filename if global_map and filename else None

        except FileNotFoundError as e:
            log.error(f"Error fetching mapping json: {e}")
            return None
示例#9
0
    def get_threshold(articles_per_country: [dict]) -> [int]:
        if articles_per_country.values.max() <= 5:
            threshold_scale = [0, 1, 2, 3, 4, 5]
        elif 5 < articles_per_country.values.max() <= 16:
            # threshold_scale = np.linspace(
            #     articles_per_country.values.min(),
            #     articles_per_country.values.max() + 1,
            #     6,
            #     dtype=int,
            # ).tolist()
            threshold_scale = [0, 1].append(np.linspace(2, articles_per_country.values.max() + 1, 4).to_list())
        elif 160 >= articles_per_country.values.max() > 16:

            threshold_scale = [0, 1, 3, 7, 15, articles_per_country.values.max() + 1]
        elif articles_per_country.values.max > 160:
            threshold_scale = [0, 1, 5, 13, 29, articles_per_country.values.max() + 1]
        else:
            log.error("threshold-scale not being set in choropleth by articles_per_country.max")
            threshold_scale = [0, 1, 2, 3, 4, 5]
        return threshold_scale
示例#10
0
    def execute_query(self) -> ([dict], int):
        response = requests.get(self.endpoint)
        # print(response.json())
        if response.json()["status"] == "error":
            print(response.json())
            log.error(response.json())
            err_code = response.json()["status"]["code"]
            return False, False
        log.info(f"response = \n\n{response}")
        article_count = int(response.json()["totalResults"])
        response_data = response.json()["articles"]
        article_data = list(response_data)

        # ** DO NOT UNCOMMENT CODE BELOW UNLESS YOU HAVE A PAID SUBSCRIPTION FOR NEWSAPI
        # free version is limited to first 100 results (cannot use multiple requests to page results),
        # you will burn up your api calls fast fast fast if using the below w/ free api
        # BELOW IS FOR PAGING THROUGH MORE THAN 100 RESULTS**
        """
        if article_count > 100:
            pages = article_count//100
            if pages > 5:
                pages = 5

            for p in range(2, pages+2):  # 1st page processed already, +2 to account for exclusive range and the remaining page of <100 articles left after floor division
                try:
                    page = requests.get(f'{self.endpoint}&page={p}')
                    print(f'len(page.json()[articles])={len(page.json()["articles"])}')
                    article_data.extend(page.json()['articles'])

                except requests.exceptions.RequestException as rE:
                    logger.log(level=logging.INFO, msg=f'RequestException while getting article_data @ page # {p}')
                    logger.log(level=logging.ERROR, msg=logger.exception(rE))
                    continue

                except builtins.KeyError as kE:
                    logger.log(level=logging.INFO, msg=f'KeyErrorException while getting article_data on {p}')
                    logger.log(level=logging.ERROR, msg=logger.exception(kE))
                    continue
        """
        return article_data, article_count