Пример #1
0
def process_post(store: Minio, post: CleanPost, data_dir: str) -> CleanPost:
    likes = post.likes
    comments = post.comments
    date = post.date
    _id = post.id

    rel = relevance(likes, comments)

    print(f"~ [{_id}]: L: {likes}, C: {comments}, D: {date}, R: {rel}")

    filepath = path.join(data_dir, post.id)

    bucket = os.getenv("S3_BUCKET", "")
    endpoint = os.getenv("S3_ENDPOINT", "")
    folder_name = os.getenv("S3_DATA_DIR_NAME", "laciudadinvisible")

    destination = folder_name + "/" + post.id + ".jpg"

    full_filepath = filepath + ".jpg"

    download_pic(full_filepath, post.image_uri)

    post_processed_img = crop_image(full_filepath)

    store.fput_object(bucket,
                      destination,
                      post_processed_img,
                      content_type="image/jpg",
                      metadata={"x-amz-acl": "public-read"})

    link = f"https://{bucket}.{endpoint}/{destination}"

    full_comments = post.comments_content

    p = CleanPost(
        id=post.id,
        date=post.date,
        likes=post.likes,
        comments=post.comments,
        hashtags=post.hashtags,
        mentions=post.mentions,
        relevance=rel,
        image_uri=link,
        description=post.description,
        comments_content=full_comments,
    )

    return p
Пример #2
0
def process_post(loader: Instaloader, post: Post, store: Minio,
                 data_dir: str) -> CleanPost:
    likes = post.likes
    comments = post.comments
    date = post.date_local
    _id = post.shortcode

    rel = relevance(likes, comments)

    print(f"\n~ [{_id}]: L: {likes}, C: {comments}, D: {date}, R: {rel}")

    filepath = path.join(data_dir, post.shortcode)

    loader.download_pic(filepath, post.url, post.date)
    print("")

    bucket = os.getenv("S3_BUCKET", "")
    endpoint = os.getenv("S3_ENDPOINT", "")
    folder_name = os.getenv("S3_DATA_DIR_NAME", "laciudadinvisible")

    destination = folder_name + "/" + post.shortcode + ".jpg"

    full_filepath = filepath + ".jpg"

    store.fput_object(bucket,
                      destination,
                      full_filepath,
                      content_type="image/jpg",
                      metadata={"x-amz-acl": "public-read"})

    link = f"https://{bucket}.{endpoint}/{destination}"

    full_comments: List[str] = []

    for comment in post.get_comments():
        full_comments.append(comment.text)

    p = CleanPost(
        id=post.shortcode,
        date=str(post.date_local),
        likes=post.likes,
        comments=post.comments,
        hashtags=post.caption_hashtags,
        mentions=post.caption_mentions,
        relevance=rel,
        image_uri=link,
        description=post.caption,
        comments_content=full_comments,
    )

    # coll_name = "Posts"

    # try:
    #     db.create_collection(coll_name)
    # except:
    #     print("error at execute create collection")

    # posts = db.collection(coll_name)

    # posts.insert(asdict(p))

    # if path.exists(full_filepath):
    #     os.remove(full_filepath)

    return p