def main(): # get dataset for artist sting db = DatabaseWrapper() cluster = ClusterModule() dataset = db.getStationSetForArtist("Coldplay") output = cluster.cluster(dataset)
def selectTopStationTags(self, data): """ selects top tags when given a list of stations parameters ---------- data: a list of artists returns: top 3 tags for each artist """ db = DatabaseWrapper() taglist = [] output = [] for station in data: topTags = db.getTagsForStation(station[0]) taglist.append(topTags) if len(taglist) > 2: # calculate set differences output.append(list(set(taglist[0]) - (set(taglist[1] + taglist[2])))[:3]) # if the first set difference is empty, just return the first three tags if output[0] == []: output[0] = taglist[0][:3] output.append(list(set(taglist[1]) - (set(output[0] + taglist[2])))[:3]) output.append(list(set(taglist[2]) - (set(output[1] + output[0])))[:3]) else: print "Not enough tags to calculate tag difference" output = [[], [], []] return output
def get_translations(): fields_to_retrieve = [ 'uid', 'status', 'text', 'source_language', 'target_language' ] return DatabaseWrapper().get_translations(fields_to_retrieve)
def getPlayingStations(self, artist): """ gets the playing stations from last.fm and gets the stations who have historically played the artist from the database. Then, merges the sets and gets the artists for each station. Returns a dictionary with 2 lists. 1 is a list of stations as 3-tuples, the other is the list of artist for each station parameters ---------- artist: name of the artist being played dataset: dataset of stations who have played the artist. dictionary with two keys: 'labels' contains the set of station 3-tuples 'data' contains the list of artists for each station """ sr = ShoutcastWrapper() db = DatabaseWrapper() mergedict = {} mergelist = [] artistsetlist = [] # gets the set of currently playing stations playingStationSet = sr.getStationPlayingArtist(artist) # gets the set of historically played stations historicalStationSet = db.getStationTuplesForArtist(artist) # merges the two sets of stations, while preserving order of # listen count # add all of the historically played stations itemcount = 0 for item in historicalStationSet: itemId = item[1] itemName = item[0] mergedict[itemId] = itemcount mergelist.append((itemId, itemName, False)) # mergelist.append(item) itemcount = itemcount + 1 # add only the unique stations from now playing for item in playingStationSet: itemId = item[2] itemName = item[0] itemLC = item[1] itemCT = item[3] # if the station is already in the list, change # status to playing if mergedict.has_key(itemId): itemnumber = mergedict[itemId] mergelist[itemnumber] = item # else append the station to the top of the list # and add the station to the db else: # mergelist.insert(0, (itemId, itemName, True, itemCT)) mergelist.insert(0, item) db.addStationForArtist(artist, (itemName, itemId, itemLC)) # get set of artists for each station for item in mergelist: stationID = item[0] artistset = db.getArtistsForStationID(stationID) artistsetlist.append(artistset) return {"data": artistsetlist, "labels": mergelist}
def get_top_stories_with_comments(): api_client = APIClient() db_wrapper = DatabaseWrapper() json_response = api_client.get_top_stories_with_comments() db_wrapper.upsert_stories(json_response['stories']) db_wrapper.upsert_comments(json_response['comments'])
def __init__(self, db_wrapper=DatabaseWrapper()): self.db_wrapper = db_wrapper
class DictionaryBuilder: def __init__(self, **kwargs): self._source_parser = SDParser(filename=kwargs.pop('filepath'), sep=kwargs.pop('sep')) self._wordbase_builder = DatabaseWrapper(kwargs.pop('db_uri')) self.primary_col_index = -1 def __delete__(self, instance): del self._source_parser del self._wordbase_builder pass def build(self, table_name: str, column_infos: dict, language: str, start=0, end=0): columns = self._wordbase_builder.generate_columns( column_infos, self.primary_col_index) self._wordbase_builder.create_table(table_name, columns) self._parse(table_name, columns, language, start, end) print("Database build for \"%s.%s\" finished." % (self._wordbase_builder.get_dbname(), table_name)) def resume(self, table_name: str, column_infos: dict, language: str, primary_key=0, end=0): columns = self._wordbase_builder.generate_columns( column_infos, self.primary_col_index) start = self._wordbase_builder.resume_table(table_name, primary_key, columns) self._parse(table_name, columns, language, start, end) pass def read(self, table_name, row_num=-1, col_num=-1): return self._wordbase_builder.fetch_row(table_name, row_num, col_num) def _variable_row_values(self, language, rows: dict, row_values: str, columns: list, start_index: int) -> None: translated_values = self.translate_tag(load_tag(language), row_values) index = start_index for value in translated_values: rows[columns[index].name] = value index += 1 def _parse(self, table_name, columns, language, start=0, end=0): parsed = self._source_parser.parse_lines(0, (2, 3), start=start, end=end) for words, paradigms in zip(parsed[0].values(), parsed[1].values()): for word, paradigm in zip(words, paradigms): row_values = {'word': word} self._variable_row_values(language, row_values, paradigm, columns, 2) self._wordbase_builder.insert_values(table_name, row_values) pass @staticmethod def translate_tag(dictionary: dict, row: str): values = row.split(' ') translated_values = list() index = 0 for value in values: for key, cmp_value in zip(dictionary.keys(), dictionary.values()): if regex.match(r'<?' + cmp_value + r'(\d?|>?)', value, regex.I) is not None \ and key not in translated_values: translated_values.insert(index, key) index += 1 return translated_values
def __init__(self, **kwargs): self._source_parser = SDParser(filename=kwargs.pop('filepath'), sep=kwargs.pop('sep')) self._wordbase_builder = DatabaseWrapper(kwargs.pop('db_uri')) self.primary_col_index = -1
class DataManager: """ Class for manipulation of data specific to this project's database. It essentially wraps around the DatabaseWrapper() class. This creates and communicates to the ornus database which has the following structure: Tables: tweets: table with all the tweets from each coin twitter_users: table with all the twitter users that were found from collecting tweets hashtags: table with all the hashtags found in tweets tweet_hashtag: many to many relationship between tweets and hashtags cryptocurrencies: a table of all the cryptocurrencies Then each cryptocurrency additionally also has its own table storing its daily market data. So there is an additional 30 - 100 tables for all the cryptocurrencies currently being collected """ def __init__(self, coins): self.coins = coins self._database = DatabaseWrapper() def insert_hashtag(self, hashtag): """Will insert hashtag into the hashtag table""" _dict = {"name": hashtag} self._database.insert_into_table(_dict, "hashtags") def insert_twitter_user(self, twitter_user): """ Will insert a tweet into the 'twitter_users' table, with these columns: "id": "BIGINT UNSIGNED UNIQUE PRIMARY KEY NOT NULL", "date_created": "DATE", "followers": "INT UNSIGNED", "friends": "INT UNSIGNED", """ self._database.insert_into_table(twitter_user, "twitter_users") def insert_tweet(self, tweet: dict): """ Will insert a tweet into the 'tweets' table, with these columns: "id": "BIGINT UNSIGNED UNIQUE PRIMARY KEY NOT NULL", "date": "DATE", "content": "VARCHAR(1120) CHARACTER SET utf8 COLLATE utf8_unicode_ci", "coin_id": "INT UNSIGNED NOT NULL", "sentiment": "FLOAT", "user_id": "BIGINT UNSIGNED NOT NULL", "retweets": "INT UNSIGNED", Will also add the hashtags to the database, and the twitter user to the database """ self.insert_twitter_user(tweet["user"]) formatted_tweet = { "id": tweet["id"], "date": tweet["date"], "content": tweet["text"], "coin_id": self.get_coin_id(tweet["coin"]), "sentiment": tweet["sentiment"], "user_id": tweet["user"]["id"], "retweets": tweet["retweets"] } if formatted_tweet["coin_id"] is not None: # The try except is for ignoring tweets that are not properly encoded and thus ignored try: self._database.insert_into_table(formatted_tweet, "tweets") except Exception as e: return # Insert the hashtags into the hashtag table and insert them into the # tweet_hashtag table for the many to many relationship between tweets # and hashtags for hashtag in tweet["hashtags"]: self.insert_hashtag(hashtag) tweet_hashtag = { "tweet_id": tweet["id"], "hashtag_id": self.get_hashtag_id(hashtag), } if None not in tweet_hashtag.values(): self._database.insert_into_table(tweet_hashtag, "tweet_hashtag") def get_hashtag_id(self, hashtag: str): """ Returns the id of coin in the cryptocurrency table, returns None if coin is not in the table :param hashtag: str of the hashtag """ try: sql = "SELECT id FROM hashtags WHERE name = '{0}'".format(hashtag) result = self._database.query(sql) except: return None if result == []: return None return result[0][0] def get_coin_id(self, coin: str): """ Returns the id of coin in the cryptocurrency table, returns None if coin is not in the table :param coin: str of the name of the coin, note: not the ticker """ sql = "SELECT id FROM cryptocurrencies WHERE name = '{0}'".format(coin) result = self._database.query(sql) if result == []: return None return result[0][0] def fill_cryptocurrency_table(self): """ Will populate the cryptocurrency table in the database with everything from coins """ for coin in self.coins: self._database.insert_into_table(entry=coin.schema(), table="cryptocurrencies") def fill_market_data_tables(self, sentiment_data: dict, verbose=False): """ Populate each table for each individual cryptocurrency with its daily market data :param sentiment_data: dict storing all the twitter sentiment values for each coin so its structure should be: {"coin1": [ ... ], "coin2": [ ... ], ... } :paramm verbose: bool on whether to periodically notify the user how much has been completed """ for index, coin in enumerate(self.coins): average_sentiment = sentiment_data[ coin.name]["sum"] / sentiment_data[coin.name]["length"] pos_percentage = sentiment_data[coin.name][ "pos_sentiment"] / sentiment_data[coin.name]["length"] neg_percentage = sentiment_data[coin.name][ "neg_sentiment"] / sentiment_data[coin.name]["length"] coin_data = coin.current_market_data() market_data = { "date": coin_data["date"], "open": coin_data["open"], "high": coin_data["high"], "low": coin_data["low"], "close": coin_data["close"], "volume": coin_data["volume"], "num_trades": coin_data["num_trades"], "positive_tweet_sentiment": pos_percentage, "negative_tweet_sentiment": neg_percentage, "average_tweet_sentiment": average_sentiment, } self._database.insert_into_table(market_data, coin.name) if (index + 1) % 10 == 0 and verbose: print("Processed market data for", (index + 1), "of", len(self.coins), "coins.", end=" ") print("Percent Complete: {:0.2f}".format(index / len(self.coins))) def create_tables(self): """ Creates all the tables Necessary for the data, if the data already exists it does nothing """ cryptocurrency_table_schema = { "id": "INT UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL", "name": "VARCHAR(30) UNIQUE NOT NULL", "ticker": "VARCHAR(10) UNIQUE NOT NULL", } self._database.create_table("cryptocurrencies", cryptocurrency_table_schema) specific_crypto_schema = { "date": "DATE UNIQUE PRIMARY KEY NOT NULL", "open": "FLOAT", "high": "FLOAT", "low": "FLOAT", "close": "FLOAT", "volume": "FLOAT", "num_trades": "INT UNSIGNED", "positive_tweet_sentiment": "FLOAT", "negative_tweet_sentiment": "FLOAT", "average_tweet_sentiment": "FLOAT", } for coin in self.coins: self._database.create_table(coin.name, specific_crypto_schema) twitter_users_schema = { "id": "BIGINT UNSIGNED UNIQUE PRIMARY KEY NOT NULL", "date_created": "DATE", "followers": "INT UNSIGNED", "friends": "INT UNSIGNED", } self._database.create_table("twitter_users", twitter_users_schema) tweets_schema = { "id": "BIGINT UNSIGNED UNIQUE PRIMARY KEY NOT NULL", "date": "DATE", "content": "VARCHAR(1120) CHARACTER SET utf8 COLLATE utf8_unicode_ci", "coin_id": "INT UNSIGNED NOT NULL", "sentiment": "FLOAT", "user_id": "BIGINT UNSIGNED NOT NULL", "retweets": "INT UNSIGNED", } tweets_foreign_keys = { "coin_id": ("cryptocurrencies", "id"), "user_id": ("twitter_users", "id"), } self._database.create_table("tweets", tweets_schema, tweets_foreign_keys) hashtag_schema = { "id": "INT UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL", "name": "VARCHAR(50) UNIQUE NOT NULL", } self._database.create_table("hashtags", hashtag_schema) if "tweet_hashtag" not in self._database.show_tables(): sql_for_tweet_hashtag = """ CREATE TABLE tweet_hashtag ( tweet_id BIGINT UNSIGNED NOT NULL, hashtag_id INTEGER UNSIGNED NOT NULL, FOREIGN KEY (tweet_id) REFERENCES tweets (id) ON DELETE RESTRICT ON UPDATE CASCADE, FOREIGN KEY (hashtag_id) REFERENCES hashtags (id) ON DELETE RESTRICT ON UPDATE CASCADE, PRIMARY KEY (tweet_id, hashtag_id) ); """ self._database.execute(sql_for_tweet_hashtag) reddit_comments_schema = { "id": "VARCHAR(20) UNIQUE PRIMARY KEY NOT NULL", "date": "DATE", "content": "VARCHAR(8000) CHARACTER SET utf8 COLLATE utf8_unicode_ci", "coin_id": "INT UNSIGNED NOT NULL", "sentiment": "FLOAT", "user_id": "BIGINT UNSIGNED NOT NULL", "score": "INT UNSIGNED", "parent_id": "BIGINT UNSIGNED", "permalink": "VARCHAR(100)", "submission_id": "VARCHAR(15)", } self._database.create_table("reddit_comments", reddit_comments_schema) reddit_user_schema = { "id": "VARCHAR(20) UNIQUE PRIMARY KEY NOT NULL", "username": "******", "date_created": "DATE", "link_karma": "INT UNSIGNED", "comment_karma": "INT UNSIGNED", "subreddit_1_id": "VARCHAR(15)", "subreddit_2_id": "VARCHAR(15)", "subreddit_3_id": "VARCHAR(15)", } self._database.create_table("reddit_users", reddit_user_schema) reddit_post_schema = { "id": "VARCHAR(20) UNIQUE PRIMARY KEY NOT NULL", "date": "DATE", "title": "VARCHAR(500) CHARACTER SET utf8 COLLATE utf8_unicode_ci", "content": "VARCHAR(10000) CHARACTER SET utf8 COLLATE utf8_unicode_ci", "coin_id": "INT UNSIGNED NOT NULL", "sentiment": "FLOAT", "user_id": "VARCHAR(20)", "score": "INT UNSIGNED", "num_comments": "INT UNSIGNED", "upvote_percentage": "FLOAT UNSIGNED", "subreddit_id": "BIGINT UNSIGNED NOT NULL", "link": "VARCHAR(100)", } self._database.create_table("reddit_posts", reddit_post_schema) subreddit_schema = { "id": "INT UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL", "name": "VARCHAR(20) UNIQUE NOT NULL", "subscribers": "INT UNSIGNED", "date_created": "DATE", } self._database.create_table("subreddits", subreddit_schema)
def db_wrapper(): return DatabaseWrapper(database_name='test')
def __init__(self, coins): self.coins = coins self._database = DatabaseWrapper()
class InstaBot: """ Instagram bot v 1.1.0 like_per_day=1000 - How many likes set bot in one day. media_max_like=0 - Don't like media (photo or video) if it have more than media_max_like likes. media_min_like=0 - Don't like media (photo or video) if it have less than media_min_like likes. tag_list = ['cat', 'car', 'dog'] - Tag list to like. max_like_for_one_tag=5 - Like 1 to max_like_for_one_tag times by row. log_mod = 0 - Log mod: log_mod = 0 log to console, log_mod = 1 log to file, log_mod = 2 no log. total_run_time=0 - (in sec) bot will automatically exit after total run time is met. Will run indefinitely if 0 https://github.com/LevPasha/instabot.py """ url = 'https://www.instagram.com/' url_tag = 'https://www.instagram.com/explore/tags/' url_likes = 'https://www.instagram.com/web/likes/%s/like/' url_unlike = 'https://www.instagram.com/web/likes/%s/unlike/' url_comment = 'https://www.instagram.com/web/comments/%s/add/' url_follow = 'https://www.instagram.com/web/friendships/%s/follow/' url_unfollow = 'https://www.instagram.com/web/friendships/%s/unfollow/' url_login = '******' url_logout = 'https://www.instagram.com/accounts/logout/' url_media_detail = 'https://www.instagram.com/p/%s/?__a=1' url_user_detail = 'https://www.instagram.com/%s/?__a=1' user_agent = ("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36") accept_language = 'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4' # If instagram ban you - query return 400 error. error_400 = 0 # If you have 3 400 error in row - looks like you banned. error_400_to_ban = 3 # If InstaBot think you are banned - going to sleep. ban_sleep_time = 2 * 60 * 60 # All counter. bot_mode = 0 like_counter = 0 follow_counter = 0 unfollow_counter = 0 comments_counter = 0 current_user ='******' current_index = 0 current_id = 'abcds' # List of user_id, that bot follow bot_follow_list = [] user_info_list = [] user_list = [] ex_user_list = [] unwanted_username_list = [] is_checked = False is_selebgram = False is_fake_account = False is_active_user = False is_following = False is_follower = False is_rejected = False is_self_checking = False is_by_tag = False is_follower_number = 0 unfollow_on_close=True end_at = None self_following = 0 self_follower = 0 #database settings database_name = 'instabot' database_host = 'localhost' database_wrapper = DatabaseWrapper(database_name= database_name, database_host= database_host) # Log setting. log_file_path = '' log_file = 0 # Other. user_id = 0 media_by_tag = 0 media_on_feed = [] media_by_user = [] login_status = False # For new_auto_mod next_iteration = {"Like": 0, "Follow": 0, "Unfollow": 0, "Comments": 0} def __init__(self, login, password, like_per_day=1000, media_max_like=50, media_min_like=0, follow_per_day=0, follow_time=5 * 60 * 60, unfollow_per_day=0, comments_per_day=0, tag_list=['cat', 'car', 'dog'], max_like_for_one_tag=5, unfollow_break_min=15, unfollow_break_max=30, log_mod=0, proxy="", user_blacklist={}, tag_blacklist=[], unwanted_username_list=[], unfollow_on_close=True, total_run_time=0): self.bot_start = datetime.datetime.now() self.unfollow_break_min = unfollow_break_min self.unfollow_break_max = unfollow_break_max self.user_blacklist = user_blacklist self.tag_blacklist = tag_blacklist self.time_in_day = 24 * 60 * 60 # Like self.like_per_day = like_per_day if self.like_per_day != 0: self.like_delay = self.time_in_day / self.like_per_day # Follow self.follow_time = follow_time self.follow_per_day = follow_per_day if self.follow_per_day != 0: self.follow_delay = self.time_in_day / self.follow_per_day # Unfollow self.unfollow_per_day = unfollow_per_day if self.unfollow_per_day != 0: self.unfollow_delay = self.time_in_day / self.unfollow_per_day # Comment self.comments_per_day = comments_per_day if self.comments_per_day != 0: self.comments_delay = self.time_in_day / self.comments_per_day #run_time if total_run_time > 0: self.end_at = datetime.datetime.now() + datetime.timedelta(0, total_run_time) # Don't like if media have more than n likes. self.media_max_like = media_max_like # Don't like if media have less than n likes. self.media_min_like = media_min_like # Auto mod seting: # Default list of tag. self.tag_list = tag_list # Get random tag, from tag_list, and like (1 to n) times. self.max_like_for_one_tag = max_like_for_one_tag # log_mod 0 to console, 1 to file self.log_mod = log_mod self.s = requests.Session() # if you need proxy make something like this: # self.s.proxies = {"https" : "http://proxyip:proxyport"} # by @ageorgios if proxy!="": proxies = { 'http': 'http://'+proxy, 'https': 'http://'+proxy, } self.s.proxies.update(proxies) # try: # self.db_connection = psycopg2.connect("dbname='" + self.database_name + "' host='" + self.database_host + "'") # self.db_cursor = self.db_connection.cursor() # self.write_log("Connected to database: '" + self.database_name + "'") # except: # self.write_log("I am unable to connect to database: '" + self.database_name + "'") # sys.exit(1) if self.database_wrapper.connect(): self.write_log("Connected to database: '" + self.database_name + "'") else: self.write_log("I am unable to connect to database: '" + self.database_name + "'") sys.exit(1) # convert login to lower self.user_login = login.lower() self.user_password = password self.bot_mode = 0 self.media_by_tag = [] self.media_on_feed = [] self.media_by_user = [] self.unwanted_username_list = unwanted_username_list now_time = datetime.datetime.now() log_string = 'Instabot v1.1.0 started at %s' % (now_time.strftime("%d.%m.%Y %H:%M:%S")) self.write_log(log_string) if self.end_at is not None: self.write_log('Instabot set run until %s' % (self.end_at.strftime("%d.%m.%Y %H:%M:%S"))) self.login() self.populate_user_blacklist() self.unfollow_on_close = unfollow_on_close signal.signal(signal.SIGTERM, self.cleanup) atexit.register(self.cleanup) def populate_user_blacklist(self): for user in self.user_blacklist: user_id_url= self.url_user_detail % (user) info = self.s.get(user_id_url) all_data = json.loads(info.text) id_user = all_data['user']['media']['nodes'][0]['owner']['id'] #Update the user_name with the user_id self.user_blacklist[user]=id_user log_string = "Blacklisted user %s added with ID: %s" % (user, id_user) self.write_log(log_string) time.sleep(5 * random.random()) log_string = "Completed populating user blacklist with IDs" self.write_log(log_string) def login(self): log_string = 'Trying to login as %s...\n' % (self.user_login) self.write_log(log_string) self.s.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1', 'ig_vw': '1920', 'csrftoken': '', 's_network': '', 'ds_user_id': ''}) self.login_post = {'username': self.user_login, 'password': self.user_password} self.s.headers.update({'Accept-Encoding': 'gzip, deflate', 'Accept-Language': self.accept_language, 'Connection': 'keep-alive', 'Content-Length': '0', 'Host': 'www.instagram.com', 'Origin': 'https://www.instagram.com', 'Referer': 'https://www.instagram.com/', 'User-Agent': self.user_agent, 'X-Instagram-AJAX': '1', 'X-Requested-With': 'XMLHttpRequest'}) r = self.s.get(self.url) self.s.headers.update({'X-CSRFToken': r.cookies['csrftoken']}) time.sleep(5 * random.random()) login = self.s.post(self.url_login, data=self.login_post, allow_redirects=True) self.s.headers.update({'X-CSRFToken': login.cookies['csrftoken']}) self.csrftoken = login.cookies['csrftoken'] time.sleep(5 * random.random()) if login.status_code == 200: r = self.s.get('https://www.instagram.com/') finder = r.text.find(self.user_login) if finder != -1: ui = UserInfo() self.user_id = ui.get_user_id_by_login(self.user_login) self.login_status = True log_string = '%s login success!' % (self.user_login) self.write_log(log_string) else: self.login_status = False self.write_log('Login error! Check your login data!') exit() else: self.write_log('Login error! Connection error!') def logout(self): now_time = datetime.datetime.now() log_string = 'Logout: likes - %i, follow - %i, unfollow - %i, comments - %i.' % \ (self.like_counter, self.follow_counter, self.unfollow_counter, self.comments_counter) self.write_log(log_string) work_time = datetime.datetime.now() - self.bot_start log_string = 'Bot work time: %s' % (work_time) self.write_log(log_string) try: logout_post = {'csrfmiddlewaretoken': self.csrftoken} logout = self.s.post(self.url_logout, data=logout_post) self.write_log("Logout success!") self.login_status = False except: self.write_log("Logout error!") def cleanup(self, *_): # Unfollow all bot follow if self.unfollow_on_close: self.write_log("Unfollow on close requested!") self.write_log("Attempting to unfollow all users followed in this session...") if self.follow_counter >= self.unfollow_counter: for f in self.bot_follow_list: log_string = "Trying to unfollow: %s" % (f[0]) self.write_log(log_string) self.unfollow_on_cleanup(f[0]) sleeptime = random.randint(self.unfollow_break_min, self.unfollow_break_max) log_string = "Pausing for %i seconds... %i of %i" % ( sleeptime, self.unfollow_counter, self.follow_counter) self.write_log(log_string) time.sleep(sleeptime) #self.bot_follow_list.remove(f) else: self.write_log("Unfollow on close not requested. Exiting...") # Logout if (self.login_status): self.logout() exit(0) def cleanup_from_database(self): # Unfollow all bot follow using the database unfollow_list = self.database_wrapper.all_currently_following(self.user_id) self.follow_counter = len(unfollow_list) for f in unfollow_list: log_string = "Trying to unfollow: %s" % (f) self.write_log(log_string) self.unfollow_on_cleanup(f) sleeptime = random.randint(self.unfollow_break_min, self.unfollow_break_max) log_string = "Pausing for %i seconds... %i of %i" % (sleeptime, self.unfollow_counter, self.follow_counter) self.write_log(log_string) time.sleep(sleeptime) def get_media_id_by_tag(self, tag): """ Get media ID set, by your hashtag """ if (self.login_status): log_string = "Get media id by tag: %s" % (tag) self.write_log(log_string) if self.login_status == 1: url_tag = '%s%s%s' % (self.url_tag, tag, '/') try: r = self.s.get(url_tag) text = r.text finder_text_start = ('<script type="text/javascript">' 'window._sharedData = ') finder_text_start_len = len(finder_text_start) - 1 finder_text_end = ';</script>' all_data_start = text.find(finder_text_start) all_data_end = text.find(finder_text_end, all_data_start + 1) json_str = text[(all_data_start + finder_text_start_len + 1) \ : all_data_end] all_data = json.loads(json_str) self.media_by_tag = list(all_data['entry_data']['TagPage'][0] \ ['tag']['media']['nodes']) #pdb.set_trace() except: self.media_by_tag = [] self.write_log("Except on get_media!") else: return 0 def like_all_exist_media(self, media_size=-1, delay=True): """ Like all media ID that have self.media_by_tag """ if (self.login_status): if self.media_by_tag != 0: i = 0 for d in self.media_by_tag: # Media count by this tag. if media_size > 0 or media_size < 0: media_size -= 1 l_c = self.media_by_tag[i]['likes']['count'] if ((l_c <= self.media_max_like and l_c >= self.media_min_like) or (self.media_max_like == 0 and l_c >= self.media_min_like) or (self.media_min_like == 0 and l_c <= self.media_max_like) or (self.media_min_like == 0 and self.media_max_like == 0)): for blacklisted_user_name, blacklisted_user_id in self.user_blacklist.items(): if (self.media_by_tag[i]['owner']['id'] == blacklisted_user_id): self.write_log("Not liking media owned by blacklisted user: "******"Keep calm - It's your own media ;)") return False try: caption = self.media_by_tag[i]['caption'].encode('ascii',errors='ignore') tag_blacklist = set(self.tag_blacklist) if sys.version_info[0] == 3: tags = {str.lower((tag.decode('ASCII')).strip('#')) for tag in caption.split() if (tag.decode('ASCII')).startswith("#")} else: tags = {unicode.lower((tag.decode('ASCII')).strip('#')) for tag in caption.split() if (tag.decode('ASCII')).startswith("#")} if tags.intersection(tag_blacklist): matching_tags = ', '.join(tags.intersection(tag_blacklist)) self.write_log("Not liking media with blacklisted tag(s): " + matching_tags) return False except: self.write_log("Couldn't find caption - not liking") return False log_string = "Trying to like media: %s" % \ (self.media_by_tag[i]['id']) # Additional logging log_string +="\n >>> URL: instagram.com/p/%s" % self.media_by_tag[i]['code'] log_string +="\n >>> Caption: %s" % self.media_by_tag[i]['caption'].rstrip('\r\n').replace('\n',' ')[:100] log_string +="\n >>> Likes: %s" % self.media_by_tag[i]['likes']['count'] log_string +="\n >>> Comments: %s" % self.media_by_tag[i]['comments']['count'] self.write_log(log_string) like = self.like(self.media_by_tag[i]['id']) # comment = self.comment(self.media_by_tag[i]['id'], 'Cool!') # follow = self.follow(self.media_by_tag[i]["owner"]["id"]) if like != 0: if like.status_code == 200: # Like, all ok! self.error_400 = 0 self.like_counter += 1 log_string = "Liked: %s. Like #%i." % \ (self.media_by_tag[i]['id'], self.like_counter) self.write_log(log_string) elif like.status_code == 400: log_string = "Not liked: %i" \ % (like.status_code) self.write_log(log_string) # Some error. If repeated - can be ban! if self.error_400 >= self.error_400_to_ban: # Look like you banned! time.sleep(self.ban_sleep_time) else: self.error_400 += 1 else: log_string = "Not liked: %i" \ % (like.status_code) self.write_log(log_string) return False # Some error. i += 1 if delay: time.sleep(self.like_delay * 0.9 + self.like_delay * 0.2 * random.random()) else: return True else: return False else: return False else: return False else: self.write_log("No media to like!") def like(self, media_id): """ Send http request to like media by ID """ if (self.login_status): url_likes = self.url_likes % (media_id) try: like = self.s.post(url_likes) last_liked_media_id = media_id except: self.write_log("Exception on like!") like = 0 return like def unlike(self, media_id): """ Send http request to unlike media by ID """ if (self.login_status): url_unlike = self.url_unlike % (media_id) try: unlike = self.s.post(url_unlike) except: self.write_log("Exception on unlike!") unlike = 0 return unlike def comment(self, media_id, comment_text): """ Send http request to comment """ if (self.login_status): comment_post = {'comment_text': comment_text} url_comment = self.url_comment % (media_id) try: comment = self.s.post(url_comment, data=comment_post) if comment.status_code == 200: self.comments_counter += 1 log_string = 'Write: "%s". #%i.' % (comment_text, self.comments_counter) self.write_log(log_string) return comment except: self.write_log("Except on comment!") return False def follow(self, user_id): """ Send http request to follow """ if (self.login_status): url_follow = self.url_follow % (user_id) try: follow = self.s.post(url_follow) if follow.status_code == 200: self.follow_counter += 1 log_string = "Followed: %s #%i." % (user_id, self.follow_counter) self.write_log(log_string) return follow except: self.write_log("Except on follow!") return False def unfollow(self, user_id): """ Send http request to unfollow """ if (self.login_status): url_unfollow = self.url_unfollow % (user_id) try: unfollow = self.s.post(url_unfollow) if unfollow.status_code == 200: self.unfollow_counter += 1 log_string = "Unfollow: %s #%i." % (user_id, self.unfollow_counter) self.write_log(log_string) return unfollow except: self.write_log("Exept on unfollow!") return False def unfollow_on_cleanup(self, user_to_unfollow_id): """ Unfollow on cleanup by @rjmayott """ if (self.login_status): url_unfollow = self.url_unfollow % (user_to_unfollow_id) try: unfollow = self.s.post(url_unfollow) if unfollow.status_code == 200: self.unfollow_counter += 1 log_string = "Unfollow: %s #%i of %i." % (user_to_unfollow_id, self.unfollow_counter, self.follow_counter) self.write_log(log_string) self.database_wrapper.add_unfollow_record(self.user_id, user_to_unfollow_id) else: log_string = "Slow Down - Pausing for 5 minutes so we don't get banned!" self.write_log(log_string) time.sleep(300) unfollow = self.s.post(url_unfollow) if unfollow.status_code == 200: self.unfollow_counter += 1 log_string = "Unfollow: %s #%i of %i." % (user_to_unfollow_id, self.unfollow_counter, self.follow_counter) self.write_log(log_string) self.database_wrapper.add_unfollow_record(self.user_id, user_to_unfollow_id) else: log_string = "Still no good :( Skipping and pausing for another 5 minutes" self.write_log(log_string) time.sleep(300) return False return unfollow except: log_string = "Except on unfollow... Looks like a network error" self.write_log(log_string) return False def auto_mod(self): """ Star loop, that get media ID by your tag list, and like it """ if (self.login_status): while True: random.shuffle(self.tag_list) self.get_media_id_by_tag(random.choice(self.tag_list)) self.like_all_exist_media(random.randint \ (1, self.max_like_for_one_tag)) def new_auto_mod(self): while (self.end_at is None or datetime.datetime.now() < self.end_at): # ------------------- Get media_id ------------------- if len(self.media_by_tag) == 0: self.get_media_id_by_tag(random.choice(self.tag_list)) self.this_tag_like_count = 0 self.max_tag_like_count = random.randint(1, self.max_like_for_one_tag) # ------------------- Like ------------------- self.new_auto_mod_like() # ------------------- Follow ------------------- self.new_auto_mod_follow() # ------------------- Unfollow ------------------- self.new_auto_mod_unfollow() # ------------------- Comment ------------------- self.new_auto_mod_comments() # Bot iteration in 1 sec time.sleep(3) # print("Tic!") log_string = 'Specified run time reached. Starting shutdown at %s' % (datetime.datetime.now().strftime("%d.%m.%Y %H:%M:%S")) self.write_log(log_string) def new_auto_mod_like(self): if time.time() > self.next_iteration["Like"] and self.like_per_day != 0 \ and len(self.media_by_tag) > 0: # You have media_id to like: if self.like_all_exist_media(media_size=1, delay=False): # If like go to sleep: self.next_iteration["Like"] = time.time() + \ self.add_time(self.like_delay) # Count this tag likes: self.this_tag_like_count += 1 if self.this_tag_like_count >= self.max_tag_like_count: self.media_by_tag = [0] # Del first media_id del self.media_by_tag[0] def new_auto_mod_follow(self): if time.time() > self.next_iteration["Follow"] and \ self.follow_per_day != 0 and len(self.media_by_tag) > 0: if self.media_by_tag[0]["owner"]["id"] == self.user_id: self.write_log("Keep calm - It's your own profile ;)") return log_string = "Trying to follow: %s" % (self.media_by_tag[0]["owner"]["id"]) self.write_log(log_string) if self.follow(self.media_by_tag[0]["owner"]["id"]) != False: self.bot_follow_list.append([self.media_by_tag[0]["owner"]["id"], time.time()]) self.database_wrapper.add_follow_record(self.user_id, self.media_by_tag[0]["owner"]["id"]) self.next_iteration["Follow"] = time.time() + self.add_time(self.follow_delay) # def new_auto_mod_unfollow(self): # #unfollows a random person from your follow list based on rules in auto_unfollow() # if time.time() > self.next_iteration["Unfollow"] and \ # self.unfollow_per_day != 0 and len(self.bot_follow_list) > 0: # if (self.bot_mode == 0) : # for f in self.bot_follow_list: # if time.time() > (f[1] + self.follow_time): # log_string = "Trying to unfollow #%s: " % f[0] # self.write_log(log_string) # self.auto_unfollow() # self.bot_follow_list.remove(f) # self.next_iteration["Unfollow"] = time.time() + \ # self.add_time(self.unfollow_delay) # if (self.bot_mode == 1) : # unfollow_protocol(self) def new_auto_mod_unfollow(self): #TODO: This will get suck if there is a problem unfollowing someone (i.e. they have already been unfollowed) we should skip the problematic entries somehow #unfollows the olderst person that the bot has followed based on the database records if time.time() > self.next_iteration["Unfollow"] and self.unfollow_per_day != 0: f = self.database_wrapper.oldest_follow_id_and_follow_date(self.user_id) if f is not None: if gmtime() > (time.mktime(f[1].timetuple()) + self.follow_time): #if time.time() > (f[1] + self.follow_time): log_string = "Trying to unfollow %s: " % f[0] self.write_log(log_string) url_unfollow = self.url_unfollow % (f[0]) try: unfollow = self.s.post(url_unfollow) if unfollow.status_code == 200: self.unfollow_counter += 1 log_string = "Unfollow: %s #%i of %i." % (f[0], self.unfollow_counter, self.follow_counter) self.write_log(log_string) self.database_wrapper.add_unfollow_record(self.user_id, f[0]) else: log_string = "Could not unfollow :( Skipping until next cycle" self.write_log(log_string) except: log_string = "Except on unfollow... Looks like a network error" self.write_log(log_string) self.next_iteration["Unfollow"] = time.time() + self.add_time(self.unfollow_delay) def new_auto_mod_comments(self): if time.time() > self.next_iteration["Comments"] and self.comments_per_day != 0 \ and len(self.media_by_tag) > 0 \ and self.check_exisiting_comment(self.media_by_tag[0]['code']) == False: comment_text = self.generate_comment() log_string = "Trying to comment: %s" % (self.media_by_tag[0]['id']) self.write_log(log_string) if self.comment(self.media_by_tag[0]['id'], comment_text) != False: self.next_iteration["Comments"] = time.time() + \ self.add_time(self.comments_delay) def add_time(self, time): """ Make some random for next iteration""" return time * 0.9 + time * 0.2 * random.random() def generate_comment(self): c_list = list(itertools.product( ["this", "the", "your"], ["photo", "picture", "pic", "shot", "snapshot"], ["is", "looks", "feels", "is really"], ["great", "super", "good", "very good", "good", "wow", "WOW", "cool", "GREAT", "magnificent", "magical", "very cool", "stylish", "so stylish", "beautiful", "so beautiful", "so stylish", "so professional", "lovely", "so lovely", "very lovely", "glorious", "so glorious", "very glorious", "adorable", "excellent", "amazing"], [".", "..", "...", "!", "!!", "!!!"])) repl = [(" ", " "), (" .", "."), (" !", "!")] res = " ".join(random.choice(c_list)) for s, r in repl: res = res.replace(s, r) return res.capitalize() def check_exisiting_comment(self, media_code): url_check = self.url_media_detail % (media_code) check_comment = self.s.get(url_check) all_data = json.loads(check_comment.text) if all_data['media']['owner']['id'] == self.user_id: self.write_log("Keep calm - It's your own media ;)") # Del media to don't loop on it del self.media_by_tag[0] return True comment_list = list(all_data['media']['comments']['nodes']) for d in comment_list: if d['user']['id'] == self.user_id: self.write_log("Keep calm - Media already commented ;)") # Del media to don't loop on it del self.media_by_tag[0] return True return False #unfollows random users you are following based on a set of rules that try to determine if they are likely to follow you back #this may select ANY user you are following def auto_unfollow(self): chooser = 1 current_user = '******' current_id = '12345' self.media_on_feed = [] if len(self.media_on_feed) < 1: self.get_media_id_recent_feed() if len(self.media_on_feed) != 0 : chooser = random.randint(0,len(self.media_on_feed)-1) current_id=self.media_on_feed[chooser]["owner"]["id"] current_user=self.media_on_feed[chooser]["owner"]["username"] if (self.login_status): now_time = datetime.datetime.now() log_string = "%s : Get user info \n%s"%(self.user_login,now_time.strftime("%Y%d%m-%H:%M")) self.write_log(log_string) if self.login_status == 1: url_tag = 'https://www.instagram.com/%s/'%(current_user) try: r = self.s.get(url_tag) text = r.text finder_text_start = ('<script type="text/javascript">' 'window._sharedData = ') finder_text_start_len = len(finder_text_start)-1 finder_text_end = ';</script>' all_data_start = text.find(finder_text_start) all_data_end = text.find(finder_text_end, all_data_start + 1) json_str = text[(all_data_start + finder_text_start_len + 1) \ : all_data_end] all_data = json.loads(json_str) self.user_info = list(all_data['entry_data']['ProfilePage']) i=0 log_string="Checking user info..." self.write_log(log_string) while i<1: follows = self.user_info[0]['user']['follows']['count'] follower = self.user_info[0]['user']['followed_by']['count'] media = self.user_info[0]['user']['media']['count'] follow_viewer = self.user_info[0]['user']['follows_viewer'] followed_by_viewer = self.user_info[0]['user']['followed_by_viewer'] requested_by_viewer = self.user_info[0]['user']['requested_by_viewer'] has_requested_viewer = self.user_info[0]['user']['has_requested_viewer'] log_string = "Followers: %i" % (follower) self.write_log(log_string) log_string = "Following: %s" % (follows) self.write_log(log_string) log_string = "Media: %i" % (media) self.write_log(log_string) if follower/follows > 2: self.is_selebgram = True self.is_fake_account = False print(' >>>This is probably Selebgram account') elif follows/follower > 2: self.is_fake_account = True self.is_selebgram = False print(' >>>This is probably Fake account') else: self.is_selebgram = False self.is_fake_account = False print(' >>>This is a normal account') if follows/media < 10 and follower/media < 10: self.is_active_user = True print(' >>>This user is active') else: self.is_active_user = False print(' >>>This user is passive') if follow_viewer or has_requested_viewer: self.is_follower = True print(" >>>This account is following you") else: self.is_follower = False print(' >>>This account is NOT following you') if followed_by_viewer or requested_by_viewer: self.is_following = True print(' >>>You are following this account') else: self.is_following = False print(' >>>You are NOT following this account') i+=1 except: media_on_feed = [] self.write_log("Except on get_info!") time.sleep(20) return 0 else: return 0 if self.is_selebgram is not False or self.is_fake_account is not False or self.is_active_user is not True or self.is_follower is not True: print ("Unfollowing: %s" % current_user) self.unfollow(current_id) try: del self.media_on_feed[chooser] except: self.media_on_feed = [] else: print ("Not unfollowing: %s" % current_user) self.media_on_feed = [] def get_media_id_recent_feed (self): if (self.login_status): now_time = datetime.datetime.now() log_string = "%s : Get media id on recent feed"%(self.user_login) self.write_log(log_string) if self.login_status == 1: url_tag = 'https://www.instagram.com/#' try: r = self.s.get(url_tag) text = r.text finder_text_start = ('<script type="text/javascript">' 'window._sharedData = ') finder_text_start_len = len(finder_text_start)-1 finder_text_end = ';</script>' all_data_start = text.find(finder_text_start) all_data_end = text.find(finder_text_end, all_data_start + 1) json_str = text[(all_data_start + finder_text_start_len + 1) \ : all_data_end] all_data = json.loads(json_str) self.media_on_feed = list(all_data['entry_data']['FeedPage'][0]\ ['feed']['media']['nodes']) log_string="Media in recent feed = %i"%(len(self.media_on_feed)) self.write_log(log_string) except: self.media_on_feed = [] self.write_log("Except on get_media!") time.sleep(20) return 0 else: return 0 def write_log(self, log_text): """ Write log by print() or logger """ log_text = "[%s] %s" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), log_text) if self.log_mod == 0: try: print(log_text) except UnicodeEncodeError: print("Your text has unicode problem!") elif self.log_mod == 1: # Create log_file if not exist. if self.log_file == 0: self.log_file = 1 now_time = datetime.datetime.now() self.log_full_path = '%s%s_%s.log' % (self.log_file_path, self.user_login, now_time.strftime("%d.%m.%Y_%H:%M")) formatter = logging.Formatter('%(asctime)s - %(name)s ' '- %(message)s') self.logger = logging.getLogger(self.user_login) self.hdrl = logging.FileHandler(self.log_full_path, mode='w') self.hdrl.setFormatter(formatter) self.logger.setLevel(level=logging.INFO) self.logger.addHandler(self.hdrl) # Log to log file. try: self.logger.info(log_text) except UnicodeEncodeError: print("Your text has unicode problem!")
def test_db_connection(): try: db = DatabaseWrapper() return json.dumps(db.client.server_info()) except: return 'Failed to connect to mongo database'
""" for the given list, gets the stations for a given list of artists, then records the stations and the predicted artists in a machine-readable file """ import sys sys.path.insert(0, "/Users/meuse/Meuse/env/meuse/meuse/meuse") import json import pickle from cluster_module import ClusterModule from database_wrapper import DatabaseWrapper from random import shuffle cm = ClusterModule() db = DatabaseWrapper() stations_output_file = "stations.pickle" artists_output_file = "artistsandstations.pickle" stationslist = [] stationstoartist = {} # dict where stations are mapped to predicted and seed artists artistsforstation = [] artistsforotherstations = [] # define a list of artists, select 100 from db allartists = db.get100ArtistFromTop500() # select 100 artists randomly # shuffle(allartists) artist_list = allartists[:100]
""" does quality control on the tags, checking which ones should be active for a tag to be active it has to have at least a threshold level of artists linking to it """ import sys sys.path.insert(0, '/Users/meuse/Meuse/env/meuse/meuse/meuse') from database_wrapper import DatabaseWrapper threshold = 5 #minimum number of artists linking to a tag to make it valid db = DatabaseWrapper() #get tag data from database data = db.getA2TCount() tagstoactivate = [] for element in data: count = element[2] name = element[1] tagid = element[0] if count>threshold: tagstoactivate.append(tagid) print name for tagID in tagstoactivate: db.activateTag(tagID) print tagstoactivate