Пример #1
0
    def _post_info(
            self, shortcode_or_model: Union[str,
                                            InstagramPost]) -> InstagramPost:
        if isinstance(shortcode_or_model, InstagramPost):
            return shortcode_or_model
        shortcode = shortcode_or_model

        d = Addict(self._web_api_client.media_info2(shortcode))
        return InstagramPost(
            post_num_id=d.id or None,  # todo: this is actually a str
            shortcode=d.shortcode or None,
            img_height=_to_int(d.dimensions.height),
            img_width=_to_int(d.dimensions.width),
            display_url=d.display_url or None,
            is_video=_to_bool(d.is_video),
            caption_is_edited=_to_bool(d.caption_is_edited),
            created_at=_timestamp_to_datetime(d.taken_at_timestamp),
            like_count=_to_int(d.likes.count),
            comment_count=_to_int(d.comments.count),
            location_id=_to_int(d.location and d.location.id),
            location_name=(d.location and d.location.name) or None,
            location_address_json=(d.location and d.location.address_json)
            or None,
            owner_id=_to_int(d.owner.id),
            owner_username=d.owner.username or None,
            owner_full_name=d.owner.full_name or None,
            is_ad=_to_bool(d.is_ad),
            caption=d.caption.text or None,
            users_in_photo=[p.user for p in d.users_in_photo],
            hashtags=_get_hashtags(d.caption.text),
            mentions=_get_mentions(d.caption.text),
        )
Пример #2
0
    def fit(self, df: pd.DataFrame) -> None:
        self.columns = list(df.columns)
        self.encoders = Addict()

        for category_type, columns in self.columns_map.items():
            for column_id, column in enumerate(columns):
                encoder_class, parameters = self.type2encoder[category_type]

                if category_type == "cyclical":
                    parameters["amplitude"] = self.columns_map[category_type][
                        column_id][1]

                    column = self.columns_map[category_type][column_id][0]

                encoder = encoder_class(**parameters)

                x = df[column].values

                if category_type == "numerical":
                    x = x.reshape(-1, 1)

                encoder.fit(x)

                self.encoders[category_type][column] = encoder
                self.column2type[column] = category_type
Пример #3
0
def main(config_file):
    conf = Addict(yaml.safe_load(open(config_file, 'r')))
    if conf.get("logging") is not None:
        logging.config.dictConfig(conf["logging"])
    else:
        logging.basicConfig(level=logging.INFO,
                            format="%(asctime)s - %(levelname)s - %(message)s")
    raw_file = conf.get("output").get("existing_supermarkets_raw")
    supermarkets = []
    with open(raw_file, encoding='utf-8') as f:
        for supermarket_raw in json.loads(f.read()):
            supermarket_obj = {}
            supermarket_obj["name"] = supermarket_raw.get("name")
            supermarket_obj["addr"] = supermarket_raw.get("formatted_address")
            geocode = supermarket_raw.get("geometry").get("location")
            supermarket_obj["lat"] = geocode.get("lat")
            supermarket_obj["lng"] = geocode.get("lng")
            supermarket_obj["type"] = supermarket_raw.get("place_type")
            supermarkets.append(supermarket_obj)

    supermarkets_df = pd.DataFrame(supermarkets)
    logging.info("%s supermarkets are located in the city",
                 supermarkets_df.shape[0])
    supermarkets_df = supermarkets_df.drop_duplicates(subset=["lat", "lng"])
    logging.info("There are %s supermarkets left after duplicates removed",
                 supermarkets_df.shape[0])
    grocery_df = supermarkets_df.loc[supermarkets_df["type"] == "grocery"]
    logging.info("%s of the results are grocery", grocery_df.shape[0])
    supermarkets_df = supermarkets_df.reset_index()
    supermarkets_df = supermarkets_df.loc[supermarkets_df["type"] ==
                                          "supermarket"]
    output_fp = conf.get("output").get("existing_supermarkets_data")
    supermarkets_df.to_csv(output_fp, index=False)
    logging.info("Information of existing supermarkets written to %s",
                 output_fp)
Пример #4
0
    def immutable(cls, d):
        '''If d is not an instance of Addict, turn it into Addict and lock it.

    This is useful for configs and other static declarations.
    '''
        if not isinstance(d, Addict):
            return LockedDict.lock(Addict(d))
        return LockedDict.lock(d)
Пример #5
0
def all_pages(page_elem, iter_elem, func, *args, **kwargs):
    page = 1
    acc = []
    while True:
        paginated = Addict(func(*args, **kwargs, page=page))[page_elem]
        acc.extend(paginated[iter_elem])

        if int(paginated.page) >= int(paginated.pages):
            return acc

        page += 1
Пример #6
0
def _get_album_page(page, acc):
    search_result = Addict(flickr.photosets.getList(page=page))
    albums = search_result.photosets

    for album in albums.photoset:
        date_create = int(album.date_create)
        dt_date_create = datetime.fromtimestamp(date_create, timezone.utc)
        acc.append((album.title._content, dt_date_create, album.id))

    if albums.page < albums.pages:
        return _get_album_page(page + 1, acc)
    else:
        return acc
Пример #7
0
def main(config_file):
    conf = Addict(yaml.safe_load(open(config_file, 'r')))
    if conf.get("logging") is not None:
        logging.config.dictConfig(conf["logging"])
    else:
        logging.basicConfig(level=logging.INFO,
                            format="%(asctime)s - %(levelname)s - %(message)s")
    supermarkets_file = conf.get("input").get("supermarkets_file")
    logging.info("Loading geocode of supermarkets from %s", supermarkets_file)
    supermarkets_file_reader = csv.reader(open(supermarkets_file))
    supermarkets_file_header = next(supermarkets_file_reader)
    supermarkets = []
    for row in supermarkets_file_reader:
        supermarket = dict(zip(supermarkets_file_header, row))
        supermarkets.append(supermarket)
    logging.info("%s supermarkets located in the city.", len(supermarkets))
    
    grid_geocode_file = conf.get("input").get("grid_geocode_file")
    logging.info("Loading geocode of city grids from %s", grid_geocode_file)
    grids_file_reader = csv.reader(open(grid_geocode_file))
    grids_file_header = next(grids_file_reader)
    grids = []
    for row in grids_file_reader:
        grid = dict(zip(grids_file_header, row))
        grids.append(grid)
    logging.info("The city is covered by %s 1km x 1km grids.", len(grids))

    api_key = conf.get("API").get("KEY")
    gmaps = googlemaps.Client(key=api_key)
    results = []
    counter = 0
    logging.info("Start querying driving time from city grid to supermarkets ...")
    start_time = time.time()
    for grid in grids:
        for supermarket in supermarkets:
            logging.debug("Processing grid: %s - supermarket: %s", grid, supermarket)
            dist_api_worker = DistAPIWorker(gmaps, grid, supermarket)
            response = dist_api_worker.run()
            results.append(response)
            counter += 1
            if counter % 1000 == 0:
                logging.info("%s grid-supermarket pair processed ... Elapsed time %s seconds",
                             counter, round(time.time() - start_time, 4))

    # Export query responses to file
    if len(results) > 0:
        results_fp = conf.get("output").get("grid_to_supermarket_dist_raw")
        with open(results_fp, 'w') as output_file:
            json.dump(results, output_file, indent=4)
        logging.info("%s query responses dumped to %s", len(results), results_fp)
Пример #8
0
    def wrapper(*args: Any, **kwargs: Any) -> Any:
        """Wrap function for 'Dict'.

        Args:
            args: Arguments to pass into the wrapper.
            kwargs: Arguments to pass into the wrapper.

        Returns:
            addict.Dict: Instance `Dict` object.

        """
        lists = []
        return_value = func(*args, **kwargs)
        if isinstance(return_value, list):
            for value in return_value:
                if isinstance(value, dict):
                    lists.append(Addict(value))
                else:
                    lists.append(value)
            return lists
        elif isinstance(return_value, dict):
            return Addict(return_value)
        else:
            return return_value
Пример #9
0
 def _paginate_thumb_feed(
         self, feed_name: str, feed_kwargs: dict,
         media_path: Iterator[str]) -> Iterator[InstagramPostThumb]:
     has_next_page = True
     end_cursor = None
     while has_next_page:
         r = getattr(self._web_api_client, feed_name)(**feed_kwargs,
                                                      end_cursor=end_cursor)
         media = Addict(r)
         for p in media_path:
             media = media[p]
         has_next_page = media.page_info.has_next_page
         end_cursor = media.page_info.end_cursor
         for edge in media.edges:
             yield self._node_to_post_thumb(edge.node)
Пример #10
0
def _get_page_of_images_in_album(flickr, album_id, page, acc, output=False):
    album_info = Addict(
        flickr.photosets.getPhotos(
            photoset_id=album_id,
            page=page,
            extras="url_m,date_taken,geo",
        )).photoset

    if output:
        logger.info(
            f"Processing album '{album_info.title}' with {album_info.total} "
            "photos...")

    acc.extend(album_info.photo)

    # return album for data about it
    return album_info
Пример #11
0
 def _node_to_post_thumb(cls, data: dict) -> InstagramPostThumb:
     data = Addict(data)
     caption = _get_caption(data)
     return InstagramPostThumb(
         post_num_id=data.id,
         owner_num_id=_to_int(data.owner.id),
         caption=caption,
         shortcode=data.shortcode or None,
         comment_count=_to_int(data.edge_media_to_comment.count),
         like_count=_to_int(data.edge_media_preview_like.count),
         created_at=_timestamp_to_datetime(data.taken_at_timestamp),
         img_height=_to_int(data.dimensions.height),
         img_width=_to_int(data.dimensions.width),
         img_url=data.display_url or None,
         is_video=_to_bool(data.is_video),
         hashtags=_get_hashtags(caption),
         mentions=_get_mentions(caption),
     )
    def __init__(self, columns_map):
        self.columns_map = columns_map

        self.type2encoder = {
            "numerical": (MinMaxScaler, {
                "feature_range": (-1, 1)
            }),
            "categorical": (LabelEncoderUnseen, {}),
            "cyclical": (CyclicEncoder, {}),
        }

        if "joined_encoders" in self.columns_map:
            self.joined_encoders = self.columns_map["joined_encoders"]
            del columns_map["joined_encoders"]
        else:
            self.joined_encoders = {}

        if set(self.type2encoder.keys()).intersection(
                columns_map.keys()) != set(columns_map.keys()):
            raise ValueError(
                f"Wrong column names in columns_map {columns_map}.")

        self.category_types = set(self.columns_map.keys())

        self.column2type = self.get_columns2type()

        self.column_classes = Addict()

        for category_type in self.category_types:
            for column in columns_map[category_type]:
                self.column_classes[column] = Column(
                    column, category_type=category_type)

        for column_name, column_names in self.joined_encoders.items():
            category_type = self.column2type[column_name]
            for subcolumn_name in column_names:
                self.column_classes[subcolumn_name] = Column(
                    subcolumn_name, category_type=category_type)

        self.numerical_columns = self._get_columns("numerical")
        self.categorical_columns = self._get_columns("categorical")
        self.cyclical_columns = self._get_columns("cyclical")
Пример #13
0
def main(config_file):
    conf = Addict(yaml.safe_load(open(config_file, 'r')))
    if conf.get("logging") is not None:
        logging.config.dictConfig(conf["logging"])
    else:
        logging.basicConfig(level=logging.INFO,
                            format="%(asctime)s - %(levelname)s - %(message)s")
    API_KEY = conf.get("API").get("KEY")
    base_url = conf.get("API").get("URL")
    # place_types = ["supermarkets", "convenience_store", "department_store", "store", "grocery"]
    place_types = ["supermarkets", "grocery"]
    locations = []
    for place in place_types:
        logging.info("Searching for %s in Penang", place)
        query = "query=" + place + "+in+Penang"
        url = base_url + query + "&key=" + API_KEY
        logging.info("url of API query: %s", url)
        response = requests.get(url)
        results = json.loads(response.text).get("results")
        logging.info("%s results are found.", len(results))
        for result in results:
            location = {}
            location["name"] = result.get("name")
            location["addr"] = result.get("formatted_address")
            geocode = result.get("geometry").get("location")
            location["lat"] = geocode.get("lat")
            location["lng"] = geocode.get("lng")
            location["type"] = place
            locations.append(location)

    places_df = pd.DataFrame(locations)
    places_df = places_df.drop_duplicates(subset=["lat", "lng"])
    places_df = places_df.reset_index()
    output_fp = conf.get("output").get("filename")
    places_df.to_csv(output_fp, index=False)
    logging.info("%s supermarkets are located in the city", places_df.shape[0])
    logging.info("Information of existing supermarkets written to %s",
                 output_fp)
Пример #14
0
    def _user_info(self, u: Union[str, int]) -> InstagramUser:
        """

        :param u: Prefer username (fewer gets)
        :return:
        """

        # todo: username <-> user_id bidict cache

        if not isinstance(u, str) or u.isdigit():
            # input is a user_id, and we have to
            user_id = _to_int(u)
            # todo: potential problem if user has no posts, how can we get username?
            user_id = _to_int(u)
            first_thumb = self.user_feed(user_id).limit(1).to_list()[0]
            first_post = self._post_info(first_thumb.shortcode)
            u = first_post.owner_username

        username = u

        d = Addict(self._web_api_client.user_info2(user_name=username))
        return InstagramUser(
            biography=d.biography or None,
            website=d.website or None,
            followed_by_count=_to_int(d.counts.followed_by),
            follows_count=_to_int(d.counts.follows),
            full_name=d.full_name or None,
            user_id=_to_int(d.id),
            is_business_account=_to_bool(d.is_business_account),
            is_joined_recently=_to_bool(d.is_joined_recently),
            is_private=_to_bool(d.is_private),
            is_verified=_to_bool(d.is_verified),
            profile_pic_url=d.profile_pic_url or None,
            username=d.username or None,
            connected_fb_page=d.connected_fb_page or None,
            media_count=_to_int(d.counts.media),
        )
Пример #15
0
flickr = auth_flickr()

# search_result = Addict(
#     flickr.photos.search(
#         user_id="me",
#         tags="andonnee",
#         tag_mode="all",
#         min_uploaded_date="2021-10-25 00:00:00",
#         per_page=500,
#         extras="tags,machine_tags",
#     )
# )

album = Addict(
    flickr.photosets.getPhotos(
        photoset_id="72157720209505213",
        per_page=500,
    ))

start_id = "51730055105"
end_id = "51730863735"

is_process = False
# TODO replace wiht walk https://stuvel.eu/flickrapi-doc/7-util.html
for photo in album.photoset.photo:
    if photo.id == end_id:
        break

    if photo.id == start_id:
        is_process = True
from addict import Dict as Addict

from api_auth import auth_flickr

flickr = auth_flickr()

taken_date = "2020-11-14"

search_result = Addict(
    flickr.photos.search(
        user_id="me",
        min_taken_date=f"{taken_date} 00:00:00",
        max_taken_date=f"{taken_date} 23:59:59",
        per_page=5,
    ))

print(f"Searching {taken_date}...")

# TODO replace wiht walk https://stuvel.eu/flickrapi-doc/7-util.html
for photo in search_result.photos.photo:
    photo = Addict(flickr.photos.getInfo(photo_id=photo.id)).photo
    title = photo.title._content
    owner = photo.owner.nsid
    id_ = photo.id
    url = f"https://flickr.com/photos/{owner}/{id_}"
    print(f"{url} {title}")
Пример #17
0
def gpx2flickr(
    ctx,
    gpx_filepath,
    flickr_album,
    delta,
    delta_tz,
    tolerance,
    is_clear,
    kml_output_path,
    kml_thumbnail_size,
    is_update_images,
    api_key,
    api_secret,
):
    """Add location information to Flickr images based on a GPX file"""

    try:
        logger.info("Parsing time shift...")
        delta = process_delta(delta)
        if delta_tz:
            delta_tz = process_delta([delta_tz])
            delta_total = delta + delta_tz
        else:
            delta_tz = None
            delta_total = delta

        # tz delta no different from delta for flickr (update time not supported)
        # so do not print intermediate time deltas like for images
        print_delta(delta, "Time")

        tolerance = process_tolerance(tolerance)
        gpx_segments = process_gpx(gpx_filepath)

        token_cache_location = click.get_app_dir(DEFAULT_APP_DIR)

        logger.info("Logging in to Flickr...")
        flickr = create_flickr_api(api_key,
                                   api_secret,
                                   token_cache_location=token_cache_location)

        user = Addict(flickr.urls.lookupUser(url=flickr_album.url)).user

        logger.info("Synching Flickr Geo tags to GPX...")
        if not is_update_images:
            logger.warning(
                "The images will not be updated with the positions!")

        positions = synch_gps_flickr(
            flickr,
            user,
            flickr_album,
            gpx_segments,
            delta_total,
            tolerance,
            is_clear,
            is_update_images,
            ctx.obj["DEBUG"],
        )

        def image_src(x):
            return x.url_m

        def image_name(x):
            return create_photopage_url(x, user)

        process_kml(positions, kml_output_path, kml_thumbnail_size, image_src,
                    image_name)

    except Exception as ex:
        logger.error("*** An unrecoverable error occured ***")
        lf = logger.error if not ctx.obj["DEBUG"] else logger.exception
        lf(str(ex))
        sys.exit(1)
Пример #18
0
def main(config_file):
    conf = Addict(yaml.safe_load(open(config_file, 'r')))
    if conf.get("logging") is not None:
        logging.config.dictConfig(conf["logging"])
    else:
        logging.basicConfig(level=logging.INFO,
                            format="%(asctime)s - %(levelname)s - %(message)s")

    start_time = time.time()
    logging.info("Part I Load city grid layer")
    grid_fp = conf.get("input").get("grid_file")
    grid_df = pd.read_csv(grid_fp)
    grid_df["id"] = grid_df["id"].apply(lambda grid_id: str(grid_id))
    grid_df = grid_df.set_index("id")
    grid_df = grid_df.dropna()

    logging.info("Converting UTM coordinate system to geocode ...")
    inProj = pyproj.Proj(init='epsg:3857')
    outProj = pyproj.Proj(init='epsg:4326')
    grid_df[["left_lng", "top_lat"]] = grid_df.apply(lambda row: convert_utm_coords(row[["left", "top"]], inProj, outProj), axis=1)
    grid_df[["right_lng", "bottom_lat"]] = grid_df.apply(lambda row: convert_utm_coords(row[["right", "bottom"]], inProj, outProj), axis=1)
    grid_df["center_lng"] = (grid_df["left_lng"] + grid_df["right_lng"]) / 2
    grid_df["center_lat"] = (grid_df["top_lat"] + grid_df["bottom_lat"]) / 2
    logging.info("Write grid center geocode to file")
    grid_geocode_df = grid_df[["center_lng", "center_lat"]]
    grid_geocode_file = conf.get("output").get("grid_geocode_file")
    grid_geocode_df.to_csv(grid_geocode_file, index=True)
    logging.info("Elapsed time %s seconds ...",
                 round(time.time() - start_time, 4))

    logging.info("Part II Assign residential buildings to grids")
    grid_dict = grid_df.to_dict("index")
    buildings_fp = conf.get("input").get("residential_buildings_file")
    buildings_df = pd.read_csv(buildings_fp)
    logging.info("Range of longitude: %s - %s",
                 buildings_df["center_lng"].min(),
                 buildings_df["center_lng"].max())
    logging.info("Range of latitude: %s - %s",
                 buildings_df["center_lat"].min(),
                 buildings_df["center_lat"].max())
    buildings_df["grid"] = buildings_df.apply(lambda row: assign_grid(row[["center_lng", "center_lat"]], grid_dict), axis=1)
    buildings_df = buildings_df.set_index("id")
    logging.info("Elapsed time: %s seconds ...",
                 round(time.time() - start_time, 4))

    logging.info("Part III Compute gridwise total floor area")
    logging.info("Residential building types: %s",
                 buildings_df["type"].unique())
    buildings_df[["area", "area_bungalow"]] = buildings_df.apply(lambda row: check_bungalow(row["type"], row["area"]), axis=1)
    area_df = buildings_df.groupby(['grid'])['area', 'area_bungalow'].agg('sum')
    area_df = pd.merge(area_df, grid_df, left_index=True, right_index=True)
    area_df = area_df.drop(["left", "right", "top", "bottom",
                            "left_lng", "top_lat", "right_lng", "bottom_lat",
                            "center_lng", "center_lat"], axis=1)
    area_df = area_df.reset_index()
    logging.info("Shape of area_df: %s", area_df.shape)
    logging.info(area_df.head())
    logging.info("Elapsed time: %s seconds ...",
                 round(time.time() - start_time, 4))

    logging.info("Part IV Distribute city population into grids")
    district_df = area_df.groupby(["district"])['area', 'area_bungalow'].agg('sum')
    district_df["total_population"] = conf.get("district_population")
    district_df["bungalow_population"] = district_df["total_population"] / 100 * 5
    district_df["apartment_population"] = district_df["total_population"] - district_df["bungalow_population"]
    district_df = district_df.reset_index()
    logging.info(district_df)
    population_df = pd.merge(area_df, district_df[["district", "area", "apartment_population"]], on='district')
    population_df = population_df.rename(columns={
            "index": "grid_id",
            "area_x": "area",
            "area_bungalow_x": "area_bungalow",
            "area_y": "area_apartment_district",
            "apartment_population": "apartment_population_district"
    })
    population_df["population"] = population_df["apartment_population_district"] / population_df["area_apartment_district"] * population_df["area"] + \
                                        population_df["area_bungalow"] / 100 * 5
    population_df["grid_id"] = population_df["grid_id"].apply(lambda grid_id: int(grid_id))
    logging.info("Shape of population_df: %s", population_df.shape)
    logging.info(population_df.head())

    logging.info("Part V Incorporate grid population with shape file")
    grid_shape = conf.get("input").get("grid_shape_file")
    sf = shp.Reader(grid_shape)
    shp_df = read_shapefile(sf)
    logging.info("Shape of shp_df: %s", shp_df.shape)
    logging.info(shp_df.head())
    population_shp_df = pd.merge(shp_df, population_df[["grid_id", "population"]],
                                 left_on='id', right_on='grid_id', how='outer')
    population_shp_df["population"].fillna(0, inplace=True)
    population_shp_df = population_shp_df.drop(["grid_id", "coords"], axis=1)
    logging.info("Export grid population to text file")
    grid_population_file = conf.get("output").get("grid_population_file")
    population_shp_df.to_csv(grid_population_file, index=False)
    gdf = gpd.read_file(grid_shape)
    gdf = gdf.to_crs({'init': 'epsg:3857'})
    gdf["population"] = population_shp_df["population"]
    grid_population_shape_file = conf.get("output").get("grid_population_shape_file")
    gdf.to_file(grid_population_shape_file)
    logging.info("Population info added to the shape file of city grid layer")
Пример #19
0
import json
from pathlib import Path
import webbrowser

from addict import Dict as Addict
import flickrapi

with open("api_key.json") as f:
    flickr_key = Addict(json.load(f))

api_key = flickr_key.key
api_secret = flickr_key.secret


def auth_flickr():
    flickr = flickrapi.FlickrAPI(
        api_key,
        api_secret,
        format="parsed-json",
        token_cache_location=Path("./.flickr").resolve(),
    )
    if not flickr.token_valid(perms="write"):
        flickr.get_request_token(oauth_callback="oob")
        authorize_url = flickr.auth_url(perms="write")
        webbrowser.open_new_tab(authorize_url)
        verifier = input("Verifier code: ")
        flickr.get_access_token(verifier)

    return flickr
Пример #20
0
from api_auth import auth_flickr
from flickr_utils import get_photos


def make_flickr_photo_url(photo, user_id):
    if photo.pathalias:
        user_path = photo.pathalias
    else:
        user_path = user_id
    return f"https://www.flickr.com/photos/{user_path}/{photo.id}"


flickr = auth_flickr()

user = Addict(flickr.auth.oauth.checkToken())

album_id = "72157719125798869"
folder_path = (
    "/Users/guilhem/Pictures/camera/___uploaded/20210508_foretlanfon_ok/autocorrect2"
)
file_pattern = "DSCF*.JPG"

photos = list(
    get_photos(flickr, album_id, extras="date_taken,url_o,path_alias"))

flickr_time_index = {}
for photo in photos:
    date_taken = photo.datetaken
    flickr_time_index[date_taken] = photo
Пример #21
0
from addict import Dict as Addict
import dateutil
from dateutil.relativedelta import relativedelta

from api_auth import auth_flickr

flickr = auth_flickr()

search_result = Addict(
    flickr.photos.search(
        user_id="me",
        tags="summer,ete,2020",
        tag_mode="all",
        min_taken_date="2019-01-16 00:00:00",
        max_taken_date="2019-01-17 00:00:00",
        extras="date_taken",
        per_page=500,
    ))

for photo in search_result.photos.photo:
    taken = photo.datetaken
    print(f"Original: {taken}")
    taken_dt = dateutil.parser.isoparse(taken)
    delta = relativedelta(years=1, months=8, hours=-8)
    taken_dt_corrected = taken_dt + delta
    taken_corrected = taken_dt_corrected.strftime("%Y-%m-%d %H:%M:%S")
    print(f"Corrected: {taken_corrected}")
    resp = flickr.photos.setDates(photo_id=photo.id,
                                  date_taken=taken_corrected)
    def fit(self, df: pd.DataFrame) -> None:
        for column in self.categorical_columns:
            if df[column].dtype != "object":
                raise TypeError(
                    f"We can process only string columns as categorical. "
                    f"We got {df[column].dtype} for {column}. "
                    f"Please cast 'str' on the column.")

        self.columns = [x for x in df.columns if x in self.column2type]
        self.encoders = Addict()

        for column_name, subcolumn_names in self.joined_encoders.items():
            if column_name in self.encoders:
                raise ValueError(
                    f"We should not have same column in two joined columns! "
                    f"But we got it for {column_name}")

            category_type = self.column2type[column_name]
            encoder_class, parameters = self.type2encoder[category_type]

            encoder = encoder_class(**parameters)

            x = [df[column_name].values]

            if category_type == "categorical":
                for subcolumn_name in subcolumn_names:
                    x += [df[subcolumn_name].values]

                x = np.concatenate(x)

            encoder.fit(x)

            self.encoders[column_name] = encoder

            for subcolumn_name in subcolumn_names:
                if subcolumn_name in self.encoders:
                    raise ValueError(
                        f"We should not have same subcolumn in two joined columns! "
                        f"But we got it for {subcolumn_name}")

                self.encoders[subcolumn_name] = encoder

        for category_type, column_names in self.columns_map.items():
            encoder_class, parameters = self.type2encoder[category_type]

            for column_name in column_names:
                if column_name in self.encoders:
                    continue

                if category_type == "cyclical":
                    parameters["amplitude"] = column_name[1]

                    column = column_name[0]
                else:
                    column = column_name

                encoder = encoder_class(**parameters)

                x = df[column].values

                encoder.fit(x)
                self.encoders[column] = encoder
Пример #23
0
def main(config_file):
    conf = Addict(yaml.safe_load(open(config_file, 'r')))
    if conf.get("logging") is not None:
        logging.config.dictConfig(conf["logging"])
    else:
        logging.basicConfig(level=logging.INFO,
                            format="%(asctime)s - %(levelname)s - %(message)s")
    raw_file = conf.get("output").get("grid_to_supermarket_dist_raw")
    dist_data = []
    with open(raw_file, encoding='utf-8') as f:
        for dist_raw in json.loads(f.read()):
            dist_obj = {}
            dist_obj["grid_id"] = dist_raw["grid_id"]
            dist_obj["supermarket_id"] = dist_raw["supermarket_id"]
            dist_obj["status"] = dist_raw["status"]
            if dist_raw.get("rows"):
                row = dist_raw.get("rows")[0]
                if row.get("elements"):
                    element = row.get("elements")[0]
                    if element.get("distance"):
                        dist_obj["distance"] = element.get("distance").get("value")
                    else:
                        dist_obj["distance"] = None

                    if element.get("duration"):
                        dist_obj["driving_time"] = element.get("duration").get("value")
                    else:
                        dist_obj["driving_time"] = None
            else:
                dist_obj["distance"] = None
                dist_obj["driving_time"] = None
            dist_data.append(dist_obj)
    dist_df = pd.DataFrame(dist_data)
    output_file = conf.get("output").get("grid_to_supermarket_dist_data")
    dist_df.to_csv(output_file, index=False)
    logging.info("%s distance query results written to %s",
                 len(dist_data), output_file)

    supermarket_counts = {}
    max_driving_time = int(conf.get("max_driving_time"))
    for grid_id in dist_df["grid_id"].unique():
        supermarket_counts[grid_id] = catch_supermarkets(grid_id, dist_df, max_driving_time)
    
    population_file = conf.get("input").get("grid_population_file")
    logging.info("Loading simulated population of city grids from %s", population_file)
    population_df = pd.read_csv(population_file)
    population_df["density"] = population_df.apply(lambda pop: \
        compute_density(pop["id"], pop["population"], supermarket_counts), axis=1)
    density_df = population_df[["id", "density"]]

    grid_shape = conf.get("input").get("grid_shape_file")
    sf = shp.Reader(grid_shape)
    shp_df = read_shapefile(sf)
    logging.info("Shape of shp_df: %s", shp_df.shape)
    logging.info(shp_df.head())
    density_shp_df = pd.merge(shp_df, density_df, left_on='id', right_on='id', how='outer')
    density_shp_df = density_shp_df.drop("coords", axis=1)
    logging.info("Export supermarket density to text file")
    supermarket_density_file = conf.get("output").get("supermarket_density_file")
    density_shp_df.to_csv(supermarket_density_file, index=False)
    logging.info(density_shp_df.head())
    gdf = gpd.read_file(grid_shape)
    gdf = gdf.to_crs({'init': 'epsg:3857'})
    gdf["density"] = density_shp_df["density"]
    supermarket_density_shape_file = conf.get("output").get("supermarket_density_shape_file")
    gdf.to_file(supermarket_density_shape_file)
    logging.info("Supermarket density added to the shape file of city grid layer")