def get_posts(hashtag):
    print("Starting collection of posts with hashtag %s" % hashtag)
    time.sleep(random.randint(1, 20))
    while True:
        try:
            data = parler.hashtags_feed(hashtag, 100, cursor="")
            feed = models.FeedSchema().load(data)
            break
        except:
            time.sleep(20)
    while feed.last == False:
        time.sleep(random.randint(1, 20))
        with open(filename % (hashtag, str(datetime.date.today()), "posts"),
                  mode="a") as posts:
            json.dump(feed.items, posts)
        with open(filename % (hashtag, str(datetime.date.today()), "users"),
                  mode="a") as users:
            json.dump(feed.users, users)
        with open(filename % (hashtag, str(datetime.date.today()), "links"),
                  mode="a") as links:
            json.dump(feed.links, links)

        while True:
            try:
                data = parler.hashtags_feed(hashtag, 100, cursor=feed.next)
                feed = models.FeedSchema().load(data)
                break
            except:
                time.sleep(20)
Exemplo n.º 2
0
def user_items(username, expiration=None, delete_active=False):
    """Retrieve posts and comments created by username."""
    for item_type in ['post', 'comment']:
        data = parler.created_items(item_type, username, limit=100, cursor="")
        if len(data[item_type + 's']) == 0:
            break  # skip processing if no posts/comments
        feed = models.FeedSchema().load(data)
        count = 0
        while count < 20:
            try:
                for item in feed.items:
                    process_item(item_type, item, expiration, delete_active)
                if feed.last:
                    break
                more_items = feed.next
                data = parler.created_items(item_type,
                                            username,
                                            limit=100,
                                            cursor=more_items)
                feed = models.FeedSchema().load(data)
                count += 1
            except:
                traceback.print_exc()
                time.sleep(5)
            finally:
                time.sleep(1)
Exemplo n.º 3
0
def get_posts(hashtag):
    print("Starting collection of posts with hashtag %s" % hashtag)
    time.sleep(random.randint(1, max_sleep_limit))
    while True:
        try:
            data = parler.hashtags_feed(hashtag, 100, cursor="")
            feed = models.FeedSchema().load(data)
            break
        except Exception as e:
            if e.__class__.__name__ == "TypeError:":
                sys.exit()
            time.sleep(max_sleep_limit)
    with open(filename % (hashtag, str(datetime.date.today()), "posts"),
              mode="a",
              encoding="utf-8") as posts:
        exputils.writetocsv(posts, feed.items, insert_headers=True)
    with open(filename % (hashtag, str(datetime.date.today()), "users"),
              mode="a",
              encoding="utf-8") as users:
        exputils.writetocsv(users, feed.users, insert_headers=True)
    with open(filename % (hashtag, str(datetime.date.today()), "links"),
              mode="a",
              encoding="utf-8") as links:
        exputils.writetocsv(links, feed.links, insert_headers=True)
    while True:
        if feed.last:
            logging.info("Exiting, all done.")
            break
        time.sleep(random.randint(max_sleep_limit / 2, max_sleep_limit))
        try:
            data = parler.hashtags_feed(hashtag, 100, cursor=feed.next)
            feed = models.FeedSchema().load(data)
        except Exception as e:
            if e.__class__.__name__ == "TypeError:":
                sys.exit()
            time.sleep(max_sleep_limit)
        finally:
            logging.info("Writing to file...")
            with open(filename %
                      (hashtag, str(datetime.date.today()), "posts"),
                      mode="a",
                      encoding="utf-8") as posts:
                exputils.writetocsv(posts, feed.items, insert_headers=False)
            with open(filename %
                      (hashtag, str(datetime.date.today()), "users"),
                      mode="a",
                      encoding="utf-8") as users:
                exputils.writetocsv(users, feed.users, insert_headers=False)
            with open(filename %
                      (hashtag, str(datetime.date.today()), "links"),
                      mode="a",
                      encoding="utf-8") as links:
                exputils.writetocsv(links, feed.links, insert_headers=False)
Exemplo n.º 4
0
def get_posts(username, output_dir):
    logging.info("Starting collection of posts from user %s" % username)
    
    while True:
        try:
            userdetails = parler.profile(username)
            data = parler.user_feed(userdetails.get("_id"), 100, cursor="")
            feed = models.FeedSchema().load(data)
            break
        except Exception as e:
            if e.__class__.__name__ == "TypeError:":
                sys.exit()
            time.sleep(max_sleep_limit)
    
    with open(filename % (output_dir, username, str(datetime.date.today()), "posts"), mode="a", encoding="utf-8") as posts:
        exputils.writetocsv(posts, feed.items, insert_headers=True)
    with open(filename % (output_dir, username, str(datetime.date.today()), "links"), mode="a", encoding="utf-8") as links:
        exputils.writetocsv(links, feed.links, insert_headers=True)
    for thread in feed.items:
                get_comments(username=username, thread_id=thread.get("Id"), output_dir=output_dir)
    while True:
        logging.info("is last? %s", feed.last)
        if feed.last:
            logging.info("Exiting, all done.")
            break
        time.sleep(random.randint(1, max_sleep_limit))

        try:
            data = parler.user_feed(
                userdetails.get("_id"), 100, cursor=feed.next)
            feed = models.FeedSchema().load(data)
        except Exception as e:
            traceback.print_exc()
            if e.__class__.__name__ == "TypeError:":
                sys.exit()
            time.sleep(max_sleep_limit)
        finally:
            with open(filename % (output_dir, username, str(datetime.date.today()), "posts"), mode="a", encoding="utf-8") as posts:
                exputils.writetocsv(posts, feed.items)
            with open(filename % (output_dir, username, str(datetime.date.today()), "links"), mode="a", encoding="utf-8") as links:
                exputils.writetocsv(links, feed.links)
            for thread in feed.items:
                get_comments(username=username, thread_id=thread.get("Id"), output_dir=output_dir)
Exemplo n.º 5
0
def get_comments(username, thread_id, output_dir):
    data = parler.comments(thread_id, cursor="")
    feed = models.FeedSchema().load(data)
    while True:
        if feed.last:
            logging.info("Exiting, all done.")
            break
        try:
            data = parler.comments(thread_id, cursor=feed.next)
            feed = models.FeedSchema().load(data)
        except Exception as e:
            if e.__class__.__name__ == "TypeError:":
                sys.exit()
            time.sleep(max_sleep_limit)
        finally:
            logging.info("Writing to file...")
            with open(filename % (output_dir, username + "_" + thread_id, str(datetime.date.today()), "users"), mode="a", encoding="utf-8") as users:
                exputils.writetocsv(users, feed.users, insert_headers=False)
            with open(filename % (output_dir, username + "_" + thread_id, str(datetime.date.today()), "comments"), mode="a", encoding="utf-8") as links:
                exputils.writetocsv(links, feed.links, insert_headers=False)
        time.sleep(random.randint(max_sleep_limit/2, max_sleep_limit))
Exemplo n.º 6
0
def user_items(username, item_types, expiration=None, delete_active=False):
    """Retrieve posts and comments created by username."""
    if not item_types:
        item_types = ['post', 'comment']
    for item_type in item_types:
        data = parler.created_items(item_type, username, limit=100, cursor="")
        # print(json.dumps(data, indent=1))
        item_key = item_type + 's'
        if item_key not in data.keys() or len(data[item_key]) == 0:
            print('*** no', item_key, 'available for', username)
            continue  # skip processing if no posts/comments
        print('***', item_key, 'for', username)
        # parse comments using same schema as posts
        if "comments" in data.keys():
            data["posts"] = data["comments"].copy()
            del data["comments"]
        feed = models.FeedSchema().load(data)
        count = 0
        while count < 20:
            try:
                for item in feed.items:
                    process_item(item_type, item, expiration, delete_active)
                if feed.last:
                    break
                more_items = feed.next
                data = parler.created_items(item_type,
                                            username,
                                            limit=100,
                                            cursor=more_items)
                # parse comments using same schema as posts
                if "comments" in data.keys():
                    data["posts"] = data["comments"].copy()
                    del data["comments"]
                feed = models.FeedSchema().load(data)
                count += 1
            except:
                traceback.print_exc()
                time.sleep(5)
            finally:
                time.sleep(1)
Exemplo n.º 7
0
import sys
import signal

sys.path.insert(1, os.path.join(sys.path[0], ".."))
from Parler import Parler, models

load_dotenv(dotenv_path=".parler.env")
parler = Parler(jst=os.getenv("JST"), mst=os.getenv("MST"), debug=False)

interval = 2
filename = "data/stopthesteal_11_09-10.json"
if len(sys.argv) > 1:
    filename = sys.argv[1]

data = parler.hashtags_feed("stopthesteal", 100, cursor="")
feed = models.FeedSchema().load(data)

while feed.last == False:
    time.sleep(interval)
    # for i in feed.items:
    # 	if i.get("Id2") == "84d168ce1cb94114a7cb3d4f17a30ae0": exit()
    with open(filename, mode="a") as posts:
        json.dump(feed.items, posts)
    try:
        data = parler.hashtags_feed("stopthesteal", 100, cursor=feed.next)
        feed = models.FeedSchema().load(data)
    except:
        time.sleep(20)
        data = parler.hashtags_feed("stopthesteal", 100, cursor=feed.next)
        feed = models.FeedSchema().load(data)
print("File needs cleaning up....")
Exemplo n.º 8
0
def get_posts(search_hit):
    print("Starting collection of posts with hashtag %s" % search_hit)
    time.sleep(random.randint(1, max_sleep_limit))
    while True:
        try:
            if "@" not in search_hit:

                data = parler.hashtags_feed(search_hit, 100, cursor="")
                feed = models.FeedSchema().load(data)
            else:
                userdetails = parler.profile(search_hit.replace("@", ""))
                data = parler.user_feed(userdetails.get("_id"), 100, cursor="")
                feed = models.FeedSchema().load(data)
            break
        except:
            traceback.print_exc()
            time.sleep(max_sleep_limit)
    with open(filename % (search_hit, str(datetime.date.today()), "posts"),
              mode="a") as posts:
        exputils.writetocsv(posts, feed.items, insert_headers=True)
    with open(filename % (search_hit, str(datetime.date.today()), "users"),
              mode="a") as users:
        exputils.writetocsv(users, feed.users, insert_headers=True)
    with open(filename % (search_hit, str(datetime.date.today()), "links"),
              mode="a") as links:
        exputils.writetocsv(links, feed.links, insert_headers=True)

    for link in feed.links:
        if "image-cdn.parler.com" in link.get("Long"):  #hosted by parler!
            save_file(link.get("Long"), link.get("Id"))
        if "video.parler.com" in link.get("Long"):  #hosted by parler!
            save_file(link.get("Long"), link.get("Id"))
    while True:
        if feed.last:
            logging.info("Exiting, all done.")
            break
        time.sleep(random.randint(max_sleep_limit / 2, max_sleep_limit))
        try:
            if "@" in search_hit:

                data = parler.hashtags_feed(search_hit, 100, cursor="")
                feed = models.FeedSchema().load(data)
            else:
                userdetails = parler.profile(search_hit.replace("@", ""))
                data = parler.user_feed(userdetails.get("_id"), 100, cursor="")
                feed = models.FeedSchema().load(data)

        except:
            traceback.print_exc()
            time.sleep(max_sleep_limit)
        finally:
            logging.info("Writing to file...")
            with open(filename %
                      (search_hit, str(datetime.date.today()), "posts"),
                      mode="a") as posts:
                exputils.writetocsv(posts, feed.items, insert_headers=False)
            with open(filename %
                      (search_hit, str(datetime.date.today()), "users"),
                      mode="a") as users:
                exputils.writetocsv(users, feed.users, insert_headers=False)
            with open(filename %
                      (search_hit, str(datetime.date.today()), "links"),
                      mode="a") as links:
                exputils.writetocsv(links, feed.links, insert_headers=False)
            for link in feed.links:
                if "image-cdn.parler.com" in link.get(
                        "Long"):  #hosted by parler!
                    save_file(link.get("Long"), link.get("Id"))
                if "video.parler.com" in link.get("Long"):  #hosted by parler!
                    save_file(link.get("Long"), link.get("Id"))