def TwitterScan(encoded_url): # This tries to request the api, if successful extract the json try: twitter_req = urllib.urlopen("http://urls.api.twitter.com/1/urls/count.json?url={url}".format(url=encoded_url)) # Exception occured with urllib.urlopen(), store it in the Error model except Exception as e: error = Error( error="URL: {url}\nTwitter API error: {error}".format(error=e, url=encoded_url), created_by="social_sync.py" ) error.save() # Print that it failed print " This one failed! Automatically saved in errors model" # Return False so the Command function can know it failed. return False # Successfully requested the API, transform to JSON and return the total interactions twitter_json = json.load(twitter_req) return twitter_json["count"]
def contact_view(request): if request.method == "POST": contact_form = ContactForm(request.POST or None) if contact_form.is_valid(): data = contact_form.cleaned_data city = data.get("city") language = data.get("language") email = data.get("email") qs = Error.objects.filter(timestamp=dt.date.today()) if qs.exists(): err = qs.first() data = err.data.get("user_data", []) data.append({"city": city, "language": language, "email": email}) err.data["user_data"] = data err.save() else: data = [{"city": city, "language": language, "email": email}] Error(data=f"user_data:{data}").save() messages.success(request, 'Данные отправлены') return redirect("accounts:update") else: return redirect("accounts:update") else: return redirect("accounts:login")
def FacebookScan(encoded_url, use_proxies): # This checks if the user said that it wants to sync using proxies if use_proxies: # This assings the maximum time in seconds to wait for the urllib.urlopen() below to # respond socket.setdefaulttimeout(15) # This gets the proxy that was used longer ago from the FreeProxy model. # It also returns the proxy IP in a dictionary to use with urllib.urlopen oldest_proxy, oldest_proxy_dic = OldestProxyDic() # The function to get the proxies failed, without proxies this function can't continue # return false. if oldest_proxy is False: return False # If this runs it means the user didn't select any proxy. # Just assign an empty dictionary, that way urllib.openurl() will ignore the proxies # argument and use localhost else: oldest_proxy_dic = {"": ""} # We have to use this because if the URL contains things like "?X=" the facebook API # will think we are passing parameters to it. encoded_url = urllib.quote(encoded_url) # This tries to request the Facebook API. If successful, it will extract the JSON. # If it fails it will return False to let the Command function know it did so. try: # IMPORTANT NOTE: The idea to use proxies was to bypass the API request limit. # If we are using proxies + an access_token, it makes no sense to use proxies but # for now we have to use an access_token until a new function is coded for the proxies. # The reason for that is, the facebook API json changes when you visit it without an # access token, that means the json is different and so a new function is required. facebook_req = urllib.urlopen( "https://graph.facebook.com/v2.3/{url}?access_token=" "CONFIGURE-ME: Add your own Facebook access token".format(url=encoded_url), proxies=oldest_proxy_dic, ) # The urllib.urlopen() returned an exception, store it in the Error model except Exception as e: error = Error( error="URL: {url}\nurllib.urlopen() error: {error}".format(error=e, url=encoded_url), created_by="social_sync.py", ) error.save() # This checks if the exception was caused for any of the following reasons # If it did, call the DisableProxy function to mark the used proxy as obsolete. if "[Errno socket error]" in str(e) or "http protocol error" in str(e): DisableProxy(oldest_proxy, e) # Tell the user it failed print " This one failed! Automatically saved in errors model" # Return false instead of the social interactions return False # This checks if the returned html is actually a json. If it's not json, the proxy works, # if it isn't, the proxy is obsolete try: facebook_json = json.load(facebook_req) except Exception as e: error = Error( error=("URL: {url}\n" "json.load() error: {error}\n" "Proxy used during error: {proxy_ip}").format( error=e, url=encoded_url, proxy_ip=oldest_proxy.ip ), created_by="social_sync.py", ) error.save() # This proxy is obsolete, call DisableProxy to mark it as such in the FreeProxy model DisableProxy(oldest_proxy, e) # Tell the user it failed print " This one failed! Automatically saved in errors model" # Tell the Command function it failed return False # This checks if there's a key called error in the facebook json. # If so, the facebook api returned an error. if "error" in facebook_json: # Tell the user the API returned an exception and the exception message print " Facebook API returned exception:" print " {exception}".format(exception=facebook_json["error"]["message"]) # Store the error in the Error model error = Error( error="URL: {url}\nFacebook API returned exception:\n{error}".format( error=facebook_json["error"]["message"], url=encoded_url ), created_by="social_sync.py", ) error.save() # Return False instead of the total interactions return False # This checks if the Facebook API returned nothing for the requested URL # This can happen if the URL is from facebook itself. I don't know why the facebook API # does this. if not "share" in facebook_json: # Store the error in the errors model, it will probably spam the error model with this # for a couple of days but it doesn't hurt. error = Error( error="URL: {url}\n" "The Facebook API returned no shares.\n" "This is known to happen if the URL in question is from Facebook itself".format(url=encoded_url), created_by="social_sync.py", ) error.save() # Tell the user what happend, never bad practice print " The Facebook API returned no shares, setting it to 0" # It returns 0 because this way it will not delete the social interaction entry. # We can't get the facebook interactions for this URL but it's still of # interest to get the twitter ones. return 0 # PLEASE READ BEFORE ADDING MORE CODE BELOW: # it is important that "if not 'share' in facebook_json" stays at the bottom. # Because this 'if' returns 0 if true, it is important that the other 'ifs' to detect # errors run first. # Everything went fine; return the total interactions return facebook_json["share"]["share_count"]
for vacancy in vacs: v = Vacancy(**vacancy) print(vacancy) try: v.save() except DatabaseError: pass if errors: qs = Error.objects.filter(timestamp=dt.date.today()) if qs.exists: err = qs.first() err.data.update({'errors': errors}) err.save() else: er = Error(data=f'errors: {errors}').save() print(time.time() - start) # city = City.objects.filter(slug="moscow").first() # language = Language.objects.filter(slug="python").first() # vacs_hh, er_hh = hh() # vacs_jooble, er_job = jooble() # vacs_indeed, er_ind = indeed() # vacs = vacs_hh + vacs_jooble + vacs_indeed # errs = er_hh + er_job + er_ind # print(vacs) # print(errs) # print(er_hh) # with open("../vacs_hh.txt", "w", encoding="utf8") as f: # f.write("{}".format(vacs_hh))