예제 #1
0
 def test_url_shortener(self):
     url = "http://www.vnit.ac.in/index.php?option=com_content&view=article&id=448&Itemid=214"
     self.assertTrue(len(utils.url_shortener(url)) < len(url))
예제 #2
0
def cron(request):
    """
    Fetches the Student_Notifications_Url page.
    Grabs all the relevant links & their titles.
    stores every new link & tweets it out!
    """

    initialize = False
    testing = False
    if request.GET.get('initialize', "").lower() == 'true':
        initialize = True
    if request.GET.get('testing', "").lower() == 'true':
        testing = True
    response = []

    for stud_url in Urls.objects.all():
        cur_response = {"status": "Something's funny.", "links": list()}
        if not testing:
            # fetch page
            notifs = utils.get_page(stud_url.url)
            # Now, extract the content from the page
            content = notifs[notifs.find('<!-- BEGIN: CONTENT -->'):notifs.
                             find('<!-- END: CONTENT -->')]
            cur_links = utils.get_all_links('http://www.vnit.ac.in', content)
            # cur_links.update(utils.get_marquee_links('https://www.vnit.ac.in',
            # notifs))
            # logging.info(cur_links)
        else:
            cur_links = dict()

        # Gather pre-existing posts data
        All_Posts = memcache.get("all_posts")
        if not All_Posts:
            All_Posts = Posts.objects.all()
        All_Links = dict()
        All_Urls = set()
        All_Titles = set()
        for each_post in All_Posts:
            All_Links[each_post.title] = each_post.url
            All_Urls.add(each_post.url)
            All_Titles.add(each_post.title)

        # Check for "new posts" from the fetched content
        new_links = {}
        for title, url in cur_links.items():
            cur_title = " ".join(title.strip().split())
            cur_url = url.strip()
            if not cur_title in All_Titles:
                new_links[cur_title] = cur_url
            elif not cur_url in All_Urls:
                new_links["{0} ({1}) ".format(cur_title,
                    urlparse.urlsplit(cur_url).path.split('/')[-1])] = \
                        cur_url

        if new_links:
            cur_response["status"] = "New links found on {0}".format(
                stud_url.url)
            memcache.delete("all_posts")
            memcache.delete("latest_posts")
        else:
            cur_response["status"] = "No new links found on {0}".format(
                stud_url.url)
        for new in new_links:
            title, url = new, new_links[new]
            url = urlparse.urlunparse(urlparse.urlparse(url))
            if url.startswith("https"):
                url = url.replace("https", "http")
            All_Links[title] = url
            logging.info('Title -- ' + title + 'Url -- ' + url)
            # Save the post
            Posts.objects.create(url=url, title=title)
            # Shorten the url
            shortened_url = utils.url_shortener(url)
            # Tweet the Post
            shortened_title = title[:100]
            utils.tweet(' - '.join((shortened_title, shortened_url)),
                        initialize)
            # Display the new post in the response
            cur_response["links"].append([title, url])
        response.append(cur_response)

    if not response:
        response = {"status": "Did not execute", "links": list()}
    return HttpResponse(str(response), content_type="application/json")
예제 #3
0
 def test_url_shortener(self):
     url = "http://www.vnit.ac.in/index.php?option=com_content&view=article&id=448&Itemid=214"
     self.assertTrue(len(utils.url_shortener(url)) < len(url))
예제 #4
0
def cron(request):
    """
    Fetches the Student_Notifications_Url page.
    Grabs all the relevant links & their titles.
    stores every new link & tweets it out!
    """

    initialize = False
    testing = False
    if request.GET.get("initialize", "").lower() == "true":
        initialize = True
    if request.GET.get("testing", "").lower() == "true":
        testing = True
    response = []

    for stud_url in Urls.objects.all():
        cur_response = {"status": "Something's funny.", "links": list()}
        if not testing:
            # fetch page
            notifs = utils.get_page(stud_url.url)
            # Now, extract the content from the page
            content = notifs[notifs.find("<!-- BEGIN: CONTENT -->") : notifs.find("<!-- END: CONTENT -->")]
            cur_links = utils.get_all_links("http://www.vnit.ac.in", content)
            # cur_links.update(utils.get_marquee_links('https://www.vnit.ac.in',
            # notifs))
            # logging.info(cur_links)
        else:
            cur_links = dict()

        # Gather pre-existing posts data
        All_Posts = memcache.get("all_posts")
        if not All_Posts:
            All_Posts = Posts.objects.all()
        All_Links = dict()
        All_Urls = set()
        All_Titles = set()
        for each_post in All_Posts:
            All_Links[each_post.title] = each_post.url
            All_Urls.add(each_post.url)
            All_Titles.add(each_post.title)

        # Check for "new posts" from the fetched content
        new_links = {}
        for title, url in cur_links.items():
            cur_title = " ".join(title.strip().split())
            cur_url = url.strip()
            if not cur_title in All_Titles:
                new_links[cur_title] = cur_url
            elif not cur_url in All_Urls:
                new_links["{0} ({1}) ".format(cur_title, urlparse.urlsplit(cur_url).path.split("/")[-1])] = cur_url

        if new_links:
            cur_response["status"] = "New links found on {0}".format(stud_url.url)
            memcache.delete("all_posts")
            memcache.delete("latest_posts")
        else:
            cur_response["status"] = "No new links found on {0}".format(stud_url.url)
        for new in new_links:
            title, url = new, new_links[new]
            url = urlparse.urlunparse(urlparse.urlparse(url))
            if url.startswith("https"):
                url = url.replace("https", "http")
            All_Links[title] = url
            logging.info("Title -- " + title + "Url -- " + url)
            # Save the post
            Posts.objects.create(url=url, title=title)
            # Shorten the url
            shortened_url = utils.url_shortener(url)
            # Tweet the Post
            shortened_title = title[:100]
            utils.tweet(" - ".join((shortened_title, shortened_url)), initialize)
            # Display the new post in the response
            cur_response["links"].append([title, url])
        response.append(cur_response)

    if not response:
        response = {"status": "Did not execute", "links": list()}
    return HttpResponse(str(response), content_type="application/json")