def load_train_data(categories, size=SIZE, localization=True, verbose=False): x = [] y = [] log('Status', 'Processing... ' + str(categories)) for t in categories: folder = TRAIN_FOLDER + t files = os.listdir(folder) log(t, len(files), suffix='files') for filename in files: img = load_image(folder + '/' + filename, size=size, expand_dims=False) x.append(img) y.append(t) log('Status', 'DONE') X = normalize(np.array(x)) log('X shape', X.shape) log('X size', bytesto(X.nbytes, 'm'), suffix='MB') Y = preprocessing.LabelEncoder().fit_transform(np.array(y)) log('Y shape', Y.shape) log('Y size', bytesto(Y.nbytes, 'k'), suffix='KB') return X, Y
def load_regression_data(categories, size=SIZE, verbose=False): x = [] y = [] log('Status', 'Processing... ' + str(categories)) for t in categories: folder = TRAIN_FOLDER + t files = os.listdir(folder) log(t, len(files), suffix='files') for filename in files: img = load_image(folder + '/' + filename, size=size, expand_dims=False) a = annotations.for_image(filename, t) if a != None: x.append(img) y.append([a['x'], a['y']]) log('Status', 'DONE') X = normalize(np.array(x)) log('X shape', X.shape) log('X size', bytesto(X.nbytes, 'm'), suffix='MB') Y = np.array(y) log('Y shape', Y.shape) log('Y size', bytesto(Y.nbytes, 'k'), suffix='KB') return X, Y
def get_db_size(human=False): if human: size = db_common.search(Common.data == "db_size") size = sorted(size, key=lambda k: k['db_size_timestamp']) return bytesto(size[-1].get("value") if size else 0, "m") else: size = db_common.search(Common.data == "db_size") return size[-1].get("value") if size else 0
def set_db_size(): if os.path.isfile(MAIN_DB): size = os.path.getsize(MAIN_DB) db_common.insert({"data": "db_size", "value": size, "value_mb": bytesto(size, "m"), "db_size_timestamp": int(time.time()), "timestamp": int(time.time())}) hours_ago_24 = int((int(time.time()) - DAY)) db_common.remove(Common.db_size_timestamp < hours_ago_24) else: log.info('no database found in: {}'.format(MAIN_DB))
def load_test_data(folder): t = [] files = os.listdir(folder) log('Status', 'Processing... ' + str(len(files)) + ' files') for filename in files: path = folder + '/' + filename img = load_image(path, expand_dims=False) t.append(img) log('Status', 'DONE') T = normalize(np.array(t)) log('Shape', T.shape) log('Size', bytesto(T.nbytes, 'm'), suffix='MB') return T, files
def learn(subreddit=None): log.info("trying to learn") if os.path.isfile(MAIN_DB): size = os.path.getsize(MAIN_DB) log.info("db size: " + str(bytesto(size, "m"))) else: size = 0 if size > MAIN_DB_MAX_SIZE: # learn faster early on log.info("DB size has reached limit: {}".format( bytesto(MAIN_DB_MAX_SIZE, "m"))) return try: if subreddit: # learning from a supplied if isinstance(subreddit, str): log.info("learning from: " + subreddit) sub = reddit.api.subreddit(subreddit) else: log.info( 'I have no idea what subreddit you gave me. not going to learn.' ) return elif SUBREDDIT_LIST: # learn from one of the filtered subreddits sub_name = random.choice(SUBREDDIT_LIST) log.info("SUBREDDIT_LIST is active") log.info("learning from: {}".format(sub_name)) sub = reddit.api.subreddit(sub_name) else: # no subreddit supplied, so we learn from a random one subok = False while subok == False: sub = reddit.api.subreddit("random") if sub.over18 == False: # we don't want nsfw sub if (sub.subscribers > 100000 ): # easier to get away with stuff on big subs log.info("checking if subreddit is blacklisted") with open('src/banned_subreddits.txt' ) as blacklisted_subreddits: list_of_blacklisted_subreddits = blacklisted_subreddits.read( ).splitlines() if (str(sub) ) not in list_of_blacklisted_subreddits: log.info("not blacklisted") log.info("found: " + str(sub.display_name)) subok = True else: log.info("r/" + str(sub) + " is blacklisted") sub_db = "{}/{}.db".format(DB_DIR, str(sub.display_name)) log.info("active db : {}".format(sub_db)) sub_brain = Brain(sub_db) sub_hot = sub.hot() log.info("looping through submissions") # Loop through each submission for submission in sub_hot: log.info("checking submission") # Replace the "MoreReplies" with all of the submission replies submission.comments.replace_more(limit=0) # Get a list of all the comments flattened comments = submission.comments.list() log.debug("looping through comments") # Loop through each comment from the submission for comment in comments: if len(comment.body ) < 240: # long text tends to turn into noise. log.debug("comment score: {}".format(comment.score)) if (comment.score > 20 ): # We only want to learn things people like to hear. if (comment.author != submission.author ): # We only want to learn comments as an ovserver comment_body = comment.body for dis_word in DISALLOWED_WORDS: if dis_word in comment_body: comment_body.replace(dis_word, "") if LOG_LEARNED_COMMENTS: log.info( "learning comment. score: {}; comment: {}". format(comment.score, comment_body.encode("utf8"))) base_brain.learn(comment_body.encode( "utf8")) # Tell the bot to learn this comment sub_brain.learn(comment_body.encode( "utf8")) # Tell the bot to learn this comment # else: # log.info("Did not learn form this comment as it contained a word from the dissallowed word list.") log.info("done learning") except Exception as e: # If any errors occur just print it to the console log.info(e, exc_info=True)
def download_photos(self): total_photos_from_neighborhood = 0 total_size_downloaded_from_neighborhood = 0 page = 1 while (True): print "\n-> PAGINA: %s" % (str(page)) total_photos_in_page = 0 total_size_downloaded_in_page = 0 url_params = {'idciudad': Constants.CITY_ID_TO_DOWNLOAD, 'idzona': Constants.NEIGHBORHOOD_ID_TO_DOWNLOAD, 'pagina': page} request_response = requests.get(Constants.IMOVELWEB_API_QUERY_URL, params=url_params, headers=Constants.DEFAULT_IMOVELWEB_API_HEADER) json_response = request_response.json() ads = json_response['data']['avisos'] for ad in ads: number_of_photos_from_ad = 0 total_size_downloaded_from_ad = 0 for attribute, value in ad.iteritems(): if attribute=='fotos': for photo in value: photo_url = photo.get('url').replace("580x465", Constants.PHOTO_SIZE_TO_DOWNLOAD); # getting smaller images local_dir_saved_photo = Constants.LOCAL_DIR_SAVE_PHOTO + "/" + str(ad.get('idpropiedad')) local_path_to_save_photo = local_dir_saved_photo + "/" + photo_url.rpartition('/')[2] if not os.path.exists(local_dir_saved_photo): os.mkdir(local_dir_saved_photo) #creating aviso directory to save photo urllib.urlretrieve(photo_url, local_path_to_save_photo) #downloading photo size = format(utils.bytesto(os.path.getsize(local_path_to_save_photo), 'm'),'.4f') #size in megabytes of each photo number_of_photos_from_ad += 1 total_size_downloaded_from_ad = total_size_downloaded_from_ad + float(size) print "[#AD] Photos from: %s: %s | Tamanho:%s\n" % (ad.get('idpropiedad'),str(number_of_photos_from_ad),str(total_size_downloaded_from_ad)) total_photos_in_page = total_photos_in_page + number_of_photos_from_ad total_size_downloaded_in_page = total_size_downloaded_in_page + total_size_downloaded_from_ad print "[#PAGE] Photos from page %s: %s | Tamanho:%s\n" % (page,str(total_photos_in_page),str(total_size_downloaded_in_page)) total_photos_from_neighborhood = total_photos_from_neighborhood + total_photos_in_page total_size_downloaded_from_neighborhood = total_size_downloaded_from_neighborhood + total_size_downloaded_in_page #checking if it's the end of the listings if len(ads)<Constants.NUMBER_OF_ADS_RETURNED_API: break page += 1 print "[#TOTAL] Photos from all pages: %s | Tamanho:%s\n" % (str(total_photos_from_neighborhood),str(total_size_downloaded_from_neighborhood))
from logger import log from requests import get try: ip = get('https://api.ipify.org').text print 'My public IP address is:', ip except Exception as e: print "counld not check external ip" limit = 52428800 log.info('------------new bot run--------------') log.info("user is " + str(reddit.api.user.me())) if __name__ == '__main__': log.info('db size size to start replying:' + str(bytesto(limit, 'm'))) while True: if os.path.isfile(MAIN_DB): size = os.path.getsize(MAIN_DB) log.info('db size: ' + str(bytesto(size, 'm'))) else: size = 0 if size < limit: # learn faster early on log.info('fast learning') learn() try: log.info('new db size: ' + str(bytesto(os.path.getsize(MAIN_DB), 'm'))) except:
def learn(subreddit=None): log.info("trying to learn") if os.path.isfile(MAIN_DB): size = os.path.getsize(MAIN_DB) log.info("db size: " + str(bytesto(size, "m"))) else: size = 0 if size > MAIN_DB_MAX_SIZE: # learn faster early on log.info("DB size has reached limit: {}".format( bytesto(MAIN_DB_MAX_SIZE, "m"))) return try: if subreddit: log.info("learning from: " + subreddit) sub = reddit.api.subreddit(subreddit) else: # no subreddit supplied, so we learn from a random one subok = False while subok == False: sub = reddit.api.subreddit("random") if sub.over18 == False: # we don't want nsfw sub if (sub.subscribers > 100000 ): # easier to get away with stuff on big subs log.info("found: " + str(sub.display_name)) subok = True sub_db = "{}/{}.db".format(DB_DIR, str(sub.display_name)) log.info("active db : {}".format(sub_db)) sub_brain = Brain(sub_db) sub_hot = sub.hot() log.info("looping through submissions") # Loop through each submission for submission in sub_hot: log.info("checking submission") # Replace the "MoreReplies" with all of the submission replies submission.comments.replace_more(limit=0) # Get a list of all the comments flattened comments = submission.comments.list() log.debug("looping through comments") # Loop through each comment from the submission for comment in comments: if len(comment.body ) < 240: # long text tends to turn into noise. log.debug("comment score: {}".format(comment.score)) if (comment.score > 20 ): # We only want to learn things people like to hear. if (comment.author != submission.author ): # We only want to learn comments as an ovserver log.info( "learning comment. score: {}; comment: {}". format(comment.score, comment.body.encode("utf8"))) base_brain.learn(comment.body.encode( "utf8")) # Tell the bot to learn this comment sub_brain.learn(comment.body.encode( "utf8")) # Tell the bot to learn this comment log.info("done learning") except Exception as e: # If any errors occur just print it to the console log.info(e, exc_info=True)
def init(): log.info("db size size to start replying:" + str(bytesto(MAIN_DB_MIN_SIZE, "m"))) reddit.shadow_check() # check if this is the first time running the bot set_user_info() check_first_run() set_db_size() while True: if get_db_size( ) < MAIN_DB_MIN_SIZE and not COMMENTS_DISABLED: # learn faster early on log.info(""" THE BOT IS WORKING. IT WILL TAKE ABOUT 8 HOURS FOR IT TO LEARN AND START COMMENTING. """) log.info("fast learning") learn() try: log.info("new db size: " + str(bytesto(get_db_size(), "m"))) except: pass set_db_size() countdown(2) if (get_db_size() > MAIN_DB_MIN_SIZE or COMMENTS_DISABLED ): # once we learn enough start submissions and replies log.info("database size is big enough") if USE_SLEEP_SCHEDULE: while should_we_sleep(): log.info("zzzzzzzz :snore:") time.sleep(60) for action in reddit_bot: if action.rate_limit_unlock_epoch != 0: if action.rate_limit_unlock_epoch > get_current_epoch(): log.info( "{} hit RateLimit recently we need to wait {} seconds with this" .format( action.name, action.rate_limit_unlock_epoch - get_current_epoch(), )) continue else: action._replace(rate_limit_unlock_epoch=0) else: if prob(action.probability): log.info("making a random {}".format(action.name)) try: action.action() except praw.exceptions.APIException as e: secs_to_wait = get_seconds_to_wait(str(e)) action._replace( rate_limit_unlock_epoch=(get_current_epoch() + secs_to_wait)) log.info( "{} hit RateLimit, need to sleep for {} seconds" .format(action.name, secs_to_wait)) except Exception as e: log.error("something weird happened, {}".format(e), exc_info=True) if prob(PROBABILITIES["LEARN"]): # chance we'll learn more log.info("going to learn") learn() # Wait 10 minutes to comment and post because of reddit rate limits countdown(1) log.info("end main loop")
log.error("counld not check external ip") RATE_LIMIT = 0 NEED_TO_WAIT = 0 log.info("------------new bot run--------------") log.info("user is " + str(reddit.api.user.me())) reddit_bot = [ reddit_bot_action("reply", reddit.random_reply, PROBABILITIES["REPLY"], 0), reddit_bot_action("delete", reddit.delete_comments, PROBABILITIES["DELETE"], 0), ] if __name__ == "__main__": log.info("db size size to start replying:" + str(bytesto(MAIN_DB_MIN_SIZE, "m"))) while True: if os.path.isfile(MAIN_DB): size = os.path.getsize(MAIN_DB) log.info("db size: " + str(bytesto(size, "m"))) else: size = 0 if size < MAIN_DB_MIN_SIZE: # learn faster early on log.info("fast learning") learn() try: log.info("new db size: " + str(bytesto(os.path.getsize(MAIN_DB), "m"))) except: