def send(self): try: gmailUser = config().get("this_account_email") gmailPassword = config().get("this_account_email_password") recipient = self.emailClass.recipient message = self.emailClass.message msg = MIMEMultipart() msg['From'] = gmailUser msg['To'] = recipient msg['Subject'] = self.emailClass.subject msg.attach(MIMEText(message)) if self.emailClass.attachmentAvailable: with open(self.emailClass.attachment, "rb") as file: part = MIMEApplication(file.read(), self.emailClass.attachmentName) part['Content-Disposition'] = 'attachment; filename="{}"'.format(self.emailClass.attachmentName) msg.attach(part) mailServer = smtplib.SMTP('smtp.gmail.com', 587) mailServer.ehlo() mailServer.starttls() mailServer.ehlo() mailServer.login(gmailUser, gmailPassword) mailServer.sendmail(gmailUser, recipient, msg.as_string()) mailServer.close() except: raise ScrapeExceptions.EmailNotificationError()
def numberOfUsersToActuallyUnfollow(self): response = Request().get("/report/daily-report/fetch") usersUnfollowedToday = response.json()["unfollowed"] usersToUnfollowTillLimit = config().get("unfollow_per_day") usersToUnfollowPerBatch = (config().get("unfollow_per_day") / config().get("unfollow_batches")) return int(min(usersToUnfollowTillLimit, usersToUnfollowPerBatch))
def createLogDirectoryForThisAccountIfItDoesNotExist(self): directoryPath = config().getConstant("log_path").replace( "/errors.log", "") if not os.path.exists(directoryPath): os.makedirs(directoryPath) try: file = open(config().getConstant("log_path"), 'wb') except OSError: file = open(config().getConstant("log_path"), 'w')
def random_date(self): todayYMD = datetime.datetime.today().strftime('%Y-%m-%d') start = todayYMD + " " + config().get( "start_at_h") + ":" + config().get("start_at_m") end = todayYMD + " " + config().get("end_at_h") + ":" + config().get( "end_at_m") start_timestamp = time.mktime(time.strptime(start, '%Y-%m-%d %H:%M')) end_timestamp = time.mktime(time.strptime(end, '%Y-%m-%d %H:%M')) randomTime = time.strftime( '%Y-%m-%d %H:%M', time.localtime(randrange(start_timestamp, end_timestamp))) return randomTime
def fillInForm(self): username = config().get("username") password = config().get("password") usernameInput = self.browser.find_element_by_name("username") passwordInput = self.browser.find_element_by_name("password") usernameInput.send_keys(username) passwordInput.send_keys(password) loginButton = self.browser.find_element_by_xpath('//form/span/button[text()="Log in"]') loginButton.click() sleep(4)
class EvaluatePosts: def __init__(self): self.generateMessage() self.generateSubject() recipient = config().get("master_email") subject = "" message = "" attachmentAvailable = False def generateMessage(self): dailyStats = Request().get("/post/daily-stats").json() url = constants.base_website + "/evaluate/" + config().get("bot_account_id") postsScrapedTodayMessage = "{} posts were scraped today.".format(dailyStats["scrapedToday"]) postBacklogMessage = "{} posts are in the backlog.".format(dailyStats["inBacklog"]) postBacklogMessageAddon = "!!!!" if dailyStats["inBacklog"] < 10 else "" toBeApproved = "{} posts have yet to be approved.".format(dailyStats["toBeApproved"]) messageText = """ {} {} {} {} To evaluate posts, visit: {} """ finalMessage = messageText.format(postsScrapedTodayMessage, postBacklogMessage, postBacklogMessageAddon, toBeApproved, url) self.message = finalMessage def generateSubject(self): self.subject = "Daily Scraping Report For {}, for the {}".format(config().get("bot_account_id"), datetime.date.today() )
def instantiateBrowser(self): if self.browser == None: chrome_options = webdriver.ChromeOptions() mobile_emulation = {"deviceName": "iPhone 7"} chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument("--disable-setuid-sandbox") chrome_options.add_experimental_option("mobileEmulation", mobile_emulation) chrome_options.add_argument('--disable-extensions') chrome_options.add_argument('--no-sandbox') path = config().getConstant("session_path") chrome_options.add_argument("user-data-dir={}".format(path)) if constants.headless: chrome_options.add_argument('--headless') chrome_options.add_argument( '--disable-gpu') # Last I checked this was necessary. capabilities = DesiredCapabilities.CHROME if config().get("use_proxy"): print("using proxy") proxy_address = config().get("proxy_address") proxy = Proxy() proxy.socksPassword = config().get("proxy_password") proxy.socksUsername = config().get("proxy_username") proxy.ftpProxy = proxy_address proxy.httpProxy = proxy_address proxy.sslProxy = proxy_address proxy.proxy_type = {'ff_value': 1, 'string': 'MANUAL'} proxy.add_to_capabilities(capabilities) else: capabilities.pop('proxy', None) self.browser = webdriver.Chrome(chrome_options=chrome_options, desired_capabilities=capabilities) if True: ## checking ip address print(capabilities) # print("screenshot") # self.browser.get("https://whatismyipaddress.com/") # self.browser.save_screenshot(config().get("bot_account_id") + ".png") AutoLogin(self.browser).login()
def hashtagComponent(self): hashtags = config().get("post_hashtag") hashtagString = "" for hashtag in hashtags: hashtagString = hashtagString + "#" + hashtag + " " print("hashtag string: " + hashtagString) return hashtagString
def postIsOnHashtagBlacklist(self): blacklistedHashtags = config().get("hashtag_blacklist") thisPostHashtagLinks = self.browser.find_elements_by_partial_link_text( '#') thisPostHashtags = list( map(lambda link: link.text.replace("#", ""), thisPostHashtagLinks)) commonHashtags = any(hashtag in blacklistedHashtags for hashtag in thisPostHashtags) return commonHashtags
def download(self, postInstance): self.createScrapedImageDirectoryForThisAccountIfItDoesNotExist() try: imagePath = config().getConstant( "local_path") + postInstance.instaId + ".jpg" urllib.request.urlretrieve(postInstance.imageUrl, imagePath) return postInstance.instaId except Exception as e: MyLogger().log(e) raise ScrapeExceptions.FileSaveError()
def follow(self): self.usersWhoHaveHadFollowersStolen = [] numberOfUsersToFollow = self.numberOfUsersToActuallyFollow() currentBatchSize = 0 while currentBatchSize < numberOfUsersToFollow: peopleLeftToFollow = (numberOfUsersToFollow - currentBatchSize) followFromThisUserLimit = config().get( "number_of_people_to_follow_per_user") if ( (currentBatchSize + config().get("number_of_people_to_follow_per_user")) < numberOfUsersToFollow) else peopleLeftToFollow currentBatchSize += followFromThisUserLimit try: username = self.unscrapedUserToStealFollowersFrom() self.AutoFollow.followFromUser(username, followFromThisUserLimit) except FollowExceptions.NoMoreAccountsToStealFollowersFrom: break except FollowExceptions.UsernameDoesNotExist: continue
def usersToUnfollow(self): response = Request().post( "/users-to-unfollow", {"unfollowUsersAfter": config().get("follow_time")}) allUsersToUnfollow = response.json() numberOfUsersToActuallyUnfollow = self.numberOfUsersToActuallyUnfollow( ) allUsersToUnfollowLength = len(allUsersToUnfollow) numberOfUsersToActuallyUnfollow = numberOfUsersToActuallyUnfollow if numberOfUsersToActuallyUnfollow < allUsersToUnfollowLength else allUsersToUnfollowLength return allUsersToUnfollow[:numberOfUsersToActuallyUnfollow]
def __init__(self): self.createLogDirectoryForThisAccountIfItDoesNotExist() logging.basicConfig(filename=config().getConstant("log_path"), level=logging.DEBUG) stream_handler = logging.StreamHandler() formatter = logging.Formatter( "%(levelname)s : %(pathname)s:%(lineno)s - %(msg)s --- %(asctime)s" ) stream_handler.setFormatter(formatter) logger = logging.getLogger('foo') logger.addHandler(stream_handler) logger.setLevel(logging.DEBUG)
def unscrapedUserToStealFollowersFrom(self): userToStealFollowersFrom = config().get( "account_to_scrape_followers_from") unscrapedUserToStealFollowersFrom = list( set(userToStealFollowersFrom) - set(self.usersWhoHaveHadFollowersStolen)) random.shuffle(unscrapedUserToStealFollowersFrom) # check if there are any users left if len(unscrapedUserToStealFollowersFrom) == 0: raise FollowExceptions.NoMoreAccountsToStealFollowersFrom() # take first user randomUsername = unscrapedUserToStealFollowersFrom[0] # record and return random user self.usersWhoHaveHadFollowersStolen.append(randomUsername) return randomUsername
def generateMessage(self): dailyStats = Request().get("/post/daily-stats").json() url = constants.base_website + "/evaluate/" + config().get("bot_account_id") postsScrapedTodayMessage = "{} posts were scraped today.".format(dailyStats["scrapedToday"]) postBacklogMessage = "{} posts are in the backlog.".format(dailyStats["inBacklog"]) postBacklogMessageAddon = "!!!!" if dailyStats["inBacklog"] < 10 else "" toBeApproved = "{} posts have yet to be approved.".format(dailyStats["toBeApproved"]) messageText = """ {} {} {} {} To evaluate posts, visit: {} """ finalMessage = messageText.format(postsScrapedTodayMessage, postBacklogMessage, postBacklogMessageAddon, toBeApproved, url) self.message = finalMessage
def dailyReport(self): self.browser.get("https://www.instagram.com/" + config().get("instagram_username") + "/") followerAndFollowingCount = self.browser.find_element_by_css_selector( "ul._h9luf li span._fd86t") followerCount = followerAndFollowingCount[1].text.replace(",", "") followingCount = followerAndFollowingCount[2].text.replace(",", "") # followerCount = self.browser.find_element_by_css_selector("a[href*='followers'] span").text.replace(",","") # followingCount = self.browser.find_element_by_css_selector("a[href*='following'] span").text.replace(",","") print(followerCount) print(followingCount) Request().post("/report/follower-count", { "followerCount": followerCount, "followingCount": followingCount }) Email(DailyReport(followerCount, followingCount)).send()
def unfollowFromSelf(self, limit): self.browser.get("https://www.instagram.com/" + config().get("instagram_username") + "/") sleep(2) followerButton = browser.find_element_by_css_selector( "a[href*='followers']") followerButton.click() sleep(2) unfollowButtons = browser.find_elements_by_css_selector( "button._qv64e._t78yp._4tgw8._njrw0") numberOfUnfollowButtons = len(unfollowButtons) maxLimit = numberOfUnfollowButtons if numberOfUnfollowButtons < limit else limit for x in range(1, maxLimit): unfollowButton = unfollowButtons[x] unfollowButton.click()
class ErrorLogEmail: def __init__(self): self.generateMessage() self.generateSubject() self.generateAttachment() recipient = config().get("master_email") subject = "" message = "" attachmentAvailable = True def generateMessage(self): self.message = "Error log for {}".format(datetime.date.today()) def generateSubject(self): self.subject = "Error log For {}, for the {}".format(config().get("bot_account_id"), datetime.date.today()) def generateAttachment(self): self.attachmentName = "error.txt" self.attachment = config().getConstant("log_path")
class DailyReport: def __init__(self, totalFollowerCount, totalFollowingCount): self.totalFollowerCount = totalFollowerCount self.totalFollowingCount = totalFollowingCount self.generateMessage() self.generateSubject() recipient = config().get("master_email") subject = "" message = "" attachmentAvailable = False def generateMessage(self): dailyStats = Request().get("/report/daily-report/fetch").json() likedToday = "{} posts were liked today.".format(dailyStats["liked"]) commentedToday = "{} posts were commented on today.".format( dailyStats["commented"]) followedToday = "{} users have been followed today.".format( dailyStats["followed"]) unfollowedToday = "{} users have been unfollowed today.".format( dailyStats["unfollowed"]) postedToday = "{} pictures were posted today".format( dailyStats["posted"]) totalFollowers = "Total follower count is: {}".format( self.totalFollowerCount) totalFollowing = "Total following count is: {}".format( self.totalFollowingCount) messageText = "{}\n{}\n{}\n{}\n{}\n{}\n{}\n" finalMessage = messageText.format(likedToday, commentedToday, followedToday, unfollowedToday, postedToday, totalFollowers, totalFollowing) self.message = finalMessage def generateSubject(self): self.subject = "Daily Report For {}, for the {}".format( config().get("bot_account_id"), datetime.date.today())
def generateNewVars(self): dailyTimes = { "vars": { "follow": [], "unfollow": [], "post": [], "like": [], }, "date": self.today(), } for _ in range(config().get("follow_batches")): dailyTimes["vars"]["follow"].append({ "complete": 0, "time": self.random_date() }) for _ in range(config().get("unfollow_batches")): dailyTimes["vars"]["unfollow"].append({ "complete": 0, "time": self.random_date() }) for _ in range(config().get("posts_per_day")): dailyTimes["vars"]["post"].append({ "complete": 0, "time": self.random_date() }) if not (config().get("like_per_day") == 0 and config().get("like_per_batch") == 0): likeBatches = int( round(config().get("like_per_day") / config().get("like_per_batch"))) for _ in range(likeBatches): dailyTimes["vars"]["like"].append({ "complete": 0, "time": self.random_date() }) self.save_obj(dailyTimes)
def filePath(self): return config().getConstant("daily_vars_path")
def headers(self): return { "bot": config().get("bot_account_id"), 'CONTENT_TYPE': 'application/json', 'Accept': 'application/json', }
def scrapingIncomplete(self): postBacklogAmount = Request().get("/post/backlog/amount").json() return postBacklogAmount < config().get("post_backlog")
def get(self, filePath): return config().getConstant("local_path") + filePath + ".jpg"
def empty(self): with open(config().getConstant("log_path"), "w") as file: file.truncate()
def deleteIfExists(self, filePath): fullFilePath = config().getConstant("local_path") + filePath + ".jpg" if os.path.exists(fullFilePath): os.remove(fullFilePath)
def uploadLimitNotReached(self): postsPerDay = config().get("posts_per_day") alreadyPostedToday = Request().get( "/report/daily-report/fetch").json()["posted"] return alreadyPostedToday < postsPerDay
def generateSubject(self): self.subject = "Daily Scraping Report For {}, for the {}".format(config().get("bot_account_id"), datetime.date.today() )
def __init__(self, browser): self.browser = browser self.possibleUsersToScrapeFrom = config().get( "accounts_to_scrape_posts_from") self.numberOfPostToScrape = config().get("posts_per_day") self.usersScrapingHasBeenAttemptedOn = []
def creditComponent(self): userPostWasScrapedFrom = self.record["owner_username"] if config().get("caption_give_credit"): return "via @" + userPostWasScrapedFrom else: return ""