Python main示例，date_subtracter.main Python示例

示例#1

0

显示文件

文件： grabber_utah_foundation.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "House Democrats"
    # print "\n"
    article = Article('http://www.utahfoundation.org/news/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://www.utahfoundation.org"


    letters = soups.find_all("article", class_="row news-entry col-sm-11")

    for element in letters:
        url = prefix + element.a["href"]
        pub_date = element.i.text.split()

        dateTimeForm = [["0", "0", "0"],["0", "0", "0"]]
        dateTimeForm[0][0] = pub_date[0][:]
        dateTimeForm[0][1] = pub_date[1][:-1]
        dateTimeForm[0][2] = pub_date[2][:]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime,dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm


    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#2

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Utah Datapoints"
    # print "\n"
    article = Article('http://utahdatapoints.com/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://utahdatapoints.com/"

    letters = soups.find_all("div", class_="entry-meta")
    for element in letters:
        url = element.a["href"]
        pub_date = element.text.split()

        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
        dateTimeForm[0][0] = pub_date[2][:]
        dateTimeForm[0][1] = pub_date[3][:-1]
        dateTimeForm[0][2] = pub_date[4][:]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#3

0

显示文件

def main(current_time):
    # print "EXECUTIVE BRANCH:"
    article = Article('http://utahpoliticalcapitol.com/category/on-the-hill/executive-branch/')
    article.download()
    soups = BeautifulSoup(article.html)
    soupTime = BeautifulSoup(article.html)
    tempListud = {}
    articleTime = current_time[:]
    tempList = []
    count = 0
    prefix = "h"
    letters = soups.find_all("h2", class_="entry-title taggedlink")
    soupDate = soupTime.find_all("time", class_="published")
    for element in letters:
        url = (prefix + ((str(element).split())[3]).encode('utf-8').strip('href=">'))
        tempList.append(url)
    for pub in soupDate:
        dateTimeForm = [["0", "0", "0"],["0", "0", "0"]]
        pub = pub.encode('utf-8').replace(">", " ").split()
        dateTimeForm[0][0] = pub[3][:]
        dateTimeForm[0][1] = pub[4][:-1]
        dateTimeForm[0][2] = articleTime[0][2][:]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime,dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[tempList[count]] = dateTimeForm
        count = count + 1

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#4

0

显示文件

文件： grabber_utah_reps.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Utah Reps"
    # print "\n"
    article = Article('http://www.utahreps.net/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://www.utahreps.net"


    letters = soups.find_all("article")
    for element in letters:
        url = element.a["href"]
        if url != "http://www.utahreps.net/ourperspective":
            pub_date = element.p.text.split()
            dateTimeForm = [["0", "0", "0"],["0", "0", "0"]]
            dateTimeForm[0][0] = pub_date[0][:]
            dateTimeForm[0][1] = pub_date[1][:-1]
            dateTimeForm[0][2] = pub_date[2][:]
            dateTimeForm[1][0] = articleTime[1][0][:]
            dateTimeForm[1][1] = articleTime[1][1][:]
            dateTimeForm[1][2] = articleTime[1][2][:]
            dateDiff = date_subtracter.main(articleTime,dateTimeForm)
            if dateDiff[0] == 1:
                tempListud[url] = dateTimeForm


    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#5

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Daily Utah Chronicle"
    # print "\n"
    article = Article('http://dailyutahchronicle.com/category/news/politics/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://dailyutahchronicle.com"
    letters = soups.find_all("div", class_="td-module-thumb")

    for element in letters:
        url = element.a["href"]
        article = Article(url)
        article.download()
        soups = BeautifulSoup(article.html)
        publishDate = soups.find_all("span", class_="td-post-date")
        pub_date = publishDate[0].text.split()
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
        dateTimeForm[0][0] = pub_date[0][:]
        dateTimeForm[0][1] = pub_date[1][:-1]
        dateTimeForm[0][2] = pub_date[2][:]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    # print article, tempListud[article]
    return tempListud

示例#6

0

显示文件

文件： grabber_stgeorge.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "St George"
    # print "\n"
    article = Article(
        'https://www.stgeorgeutah.com/news/archive/category/news/politics/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "https://www.stgeorgeutah.com"

    letters = soups.find_all("div", class_="recent-story")
    for element in letters:
        url = element.a["href"]
        pub_date = element.text.split()
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
        dateTimeForm[0][0] = pub_date[-4][:]
        dateTimeForm[0][1] = pub_date[-3][:-3]
        dateTimeForm[0][2] = pub_date[-2][:]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#7

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "KUOW"
    # print "\n"
    article = Article('http://kuow.org/term/washington-state-legislature')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://kuow.org"

    articles = soups.find_all("div", class_="large-12 columns")
    for element in articles:
        url = prefix + element.find("div", class_="title-info").a['href']
        pub_date = element.find("span", class_="pub-date").text.replace(":", " ").split()

        if pub_date[2] == "ago":
            if pub_date[1] == "minutes":
                if articleTime[1][0] == "0" and int(articleTime[1][1]) - int(pub_date[0]) <= 0:
                    pub_date[1] = str(int(articleTime[0][1]) - 1) + ","
                else:
                    pub_date[1] = articleTime[1][1] + ","
            elif pub_date[len(pub_date)-2] == "hours":
                if int(articleTime[1][0]) - int(pub_date[0]) <= 0:
                    pub_date[1] = str(int(articleTime[0][1]) - 1) + ","
                else:
                    pub_date[1] = articleTime[1][1] + ","

            pub_date[0] = articleTime[0][0]
            pub_date[2] = articleTime[0][2]


        dateTimeForm = [["0", "0", "0"],["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[0][:]
        dateTimeForm[0][1] = pub_date[1][:-1]
        dateTimeForm[0][2] = pub_date[2][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime,dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#8

0

显示文件

文件： grabber_kitsap_sun.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Kitsap Sun"
    # print "\n"
    article = Article('http://www.kitsapsun.com/news/politics/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://www.kitsapsun.com"

    articles = soups.find_all(
        "h1",
        class_="hero-hed hero-headline-pack-hed hero-text-hed placeholder-hide"
    )
    articles = articles + soups.find_all("li", class_="hero-list-item")
    articles = articles + soups.find_all("li", class_="hgpm-item")
    for element in articles:
        url = prefix + element.a['href']
        article = Article(url)
        article.download()
        soups = BeautifulSoup(article.html)

        pub_date = soups.find("span",
                              class_="asset-metabar-time").text.split("|")
        pub_date = pub_date[0].replace(":", " ").split()

        if pub_date[3] == "p.m.":
            pub_date[1] = str(int(pub_date[1]) + 12)
        if pub_date[5][len(pub_date[5]) - 1] == ".":
            pub_date[5] = pub_date[5][:-1]

        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[5][:]
        dateTimeForm[0][1] = pub_date[6][:-1]
        dateTimeForm[0][2] = pub_date[7][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = pub_date[1][:]
        dateTimeForm[1][1] = pub_date[2][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#9

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "King 5"
    # print "\n"
    try:

        article = Article('http://www.king5.com/politics')
        article.download()
        soups = BeautifulSoup(article.html)
        tempList = []
        tempListud = {}
        articleTime = current_time[:]
        prefix = "http://www.king5.com"

        letters = soups.find_all("div", "story-snapshot-with-abstract__headline")
        letters = letters + soups.find_all("li", class_="headline-list-with-abstract__item")
        letters = letters + soups.find_all("div", class_="text-only-headline-list__headline")
        for element in letters:
            url = element.a["href"]
            url = prefix + url

            article = Article(url)
            article.download()
            soups = BeautifulSoup(article.html)
            pub_date = soups.find("span", class_="author__date").text.replace(":", " ").split()
            dateTimeForm = [["0", "0", "0"],["0", "0", "0"]]


            # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
            # Date [Month,Day,Year]
            dateTimeForm[0][0] = pub_date[4][:]
            dateTimeForm[0][1] = pub_date[5][:-1]
            dateTimeForm[0][2] = pub_date[6][:]
            # Time [Hour,Min,Sec]
            if pub_date[2] == "PM":
                pub_date[0] = str(int(pub_date[0]) + 12)
            dateTimeForm[1][0] = pub_date[0][:]
            dateTimeForm[1][1] = pub_date[1][:]
            dateTimeForm[1][2] = articleTime[1][2][:]
            dateDiff = date_subtracter.main(articleTime,dateTimeForm)
            if dateDiff[0] == 1:
                tempListud[url] = dateTimeForm

        # for article in tempListud:
        #     print article, tempListud[article]
        return tempListud

    except:
        print "ERROR:       An error occured while grabber for articles in \n King 5 \n"

示例#10

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Q13 Fox"
    # print "\n"
    article = Article('http://q13fox.com/category/news/politics/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    # prefix = "http://q13fox.com"

    articles = soups.find_all("h2", class_="entry-title")
    articles = articles + soups.find_all("h4", class_="entry-title")
    for element in articles:
        url = element.a['href']

        article = Article(url)
        article.download()
        soups = BeautifulSoup(article.html)

        pub_date = soups.find("span",
                              class_="posted-time").text.replace(":",
                                                                 " ").split()
        # print pub_date

        if pub_date[3] == "PM":
            pub_date[1] += 12

        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[4][:]
        dateTimeForm[0][1] = pub_date[5][:-1]
        dateTimeForm[0][2] = pub_date[6][:-1]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = pub_date[1][:]
        dateTimeForm[1][1] = pub_date[2][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#11

0

显示文件

文件： grabber_fox13.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # # print "\n"
    # # print "\n"
    # print "Fox 13"
    # print "\n"
    article = Article('http://fox13now.com/category/news/politics/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    dateList = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "h"
    feature_letters = soups.find_all("h2", class_="entry-title")
    letters = soups.find_all("h4", class_="entry-title")
    for element in letters:
        element = str(element).replace("<", " ")
        element = element.replace(">", " ")
        tempList.append((prefix + ((element.split())[3]).encode('utf-8').strip('href=">')))
    ft_element = str(feature_letters[0]).replace("<", " ")
    ft_element = ft_element.replace(">", " ")
    tempList.append((prefix + ((ft_element.split())[3]).encode('utf-8').strip('href=">')))
    for url in tempList:
        article = Article(url)
        article.download()
        soups = BeautifulSoup(article.html)
        publishDate = soups.find_all("span", class_="posted-time")
        dateTimeForm = [["0", "0", "0"],["0", "0", "0"]]
        pub = str(publishDate).encode('utf-8').split()
        pub[5] = pub[5][:-1]
        pub[2] = pub[2].encode('utf-8').replace(":", " ").split()
        if pub[3] == "pm":
            pub[2][0] = str(int(pub[2][0]) + int("12"))
        dateTimeForm[0][0] = pub[4][:]
        dateTimeForm[0][1] = pub[5][:]
        dateTimeForm[0][2] = articleTime[0][2][:]
        dateTimeForm[1][0] = pub[2][0][:]
        dateTimeForm[1][1] = pub[2][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime,dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]

    return tempListud

示例#12

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Capitol Record"
    # print "\n"
    article = Article('https://www.tvw.org/blog/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "https://www.tvw.org"

    articles = soups.find_all("h2", class_="entry-title")
    for element in articles:
        url = prefix + element.a['href']
        pub_date = element.text.split()

        if pub_date[3][-1] == ".":
            pub_date[3] = pub_date[3][:-1]
        if pub_date[5][-1] == ":":
            pub_date[5] = pub_date[5][:-1]

        if pub_date[3] == "Nov.1st,":
            pass
        else:
            dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

            # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
            # Date [Month,Day,Year]
            dateTimeForm[0][0] = pub_date[3][:]
            dateTimeForm[0][1] = pub_date[4][:-3]
            dateTimeForm[0][2] = pub_date[5][:]
            # Time [Hour,Min,Sec]
            dateTimeForm[1][0] = articleTime[1][0][:]
            dateTimeForm[1][1] = articleTime[1][1][:]
            dateTimeForm[1][2] = articleTime[1][2][:]
            dateDiff = date_subtracter.main(articleTime, dateTimeForm)
            if dateDiff[0] == 1:
                tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#13

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Spokesman Review"
    # print "\n"
    article = Article('http://www.spokesman.com/washington-government/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://www.spokesman.com"

    articles = soups.find_all("article", class_="mb5 cf cb pb5 bb b--black-10")
    for element in articles:
        element = element.find("header", class_="mb3")
        url = prefix + element.a['href']
        pub_date = element.find(
            "p", class_="mt0 f6 tu gray sans-serif ").text.split()

        if pub_date[0] == "UPDATED:":
            pub_date = pub_date[1:]
        if pub_date[3][-1] == ",":
            pub_date[3] = pub_date[3][:-1]

        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[1][:-1]
        dateTimeForm[0][1] = pub_date[2][:-1]
        dateTimeForm[0][2] = pub_date[3][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#14

0

显示文件

文件： grabber_olympian.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "The Olympian"
    # print "\n"
    article = Article('http://www.theolympian.com/news/politics-government/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://www.theolympian.com/"

    letters = soups.find_all("h4", class_="title ")
    for element in letters:
        url = element.a['href']
        # print url

        article = Article(url)
        article.download()
        soups = BeautifulSoup(article.html)
        pub_date = soups.find("p", class_="published-date")
        if pub_date:
            # print pub_date.text.replace(":", " ").split()
            # print "\n"
            pub_date = pub_date.text.replace(":", " ").split()
            dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

            # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
            # Date [Month,Day,Year]
            dateTimeForm[0][0] = pub_date[0][:]
            dateTimeForm[0][1] = pub_date[1][:-1]
            dateTimeForm[0][2] = pub_date[2][:]
            # Time [Hour,Min,Sec]
            dateTimeForm[1][0] = articleTime[1][0][:]
            dateTimeForm[1][1] = articleTime[1][1][:]
            dateTimeForm[1][2] = articleTime[1][2][:]
            dateDiff = date_subtracter.main(articleTime, dateTimeForm)
            if dateDiff[0] == 1:
                tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#15

0

显示文件

文件： grabber_news_tribune.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "The News Tribune"
    # print "\n"
    article = Article('http://www.thenewstribune.com/news/local/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    # prefix = "http://www.thenewstribune.com/"

    letters = soups.find_all("article", class_="politics_government media ")
    for element in letters:
        url = element.find("h4", class_="title ").a['href']

        article = Article(url)
        article.download()
        soups = BeautifulSoup(article.html)
        pub_date = soups.find("p", class_="published-date").text.replace(":", " ").split()
        dateTimeForm = [["0", "0", "0"],["0", "0", "0"]]


        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[0][:]
        dateTimeForm[0][1] = pub_date[1][:-1]
        dateTimeForm[0][2] = pub_date[2][:]
        # Time [Hour,Min,Sec]
        if pub_date[5] == "PM" and pub_date[3] != "12":
            pub_date[3] = str(int(pub_date[3]) + 12)
        dateTimeForm[1][0] = pub_date[3][:]
        dateTimeForm[1][1] = pub_date[4][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime,dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#16

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "KSL"
    # print "\n"

    r = urllib.urlopen('http://www.ksl.com/?nid=599').read()
    prefix = "https://www.ksl.com/"
    soup = BeautifulSoup(r)
    letters = soup.find_all("div", class_="headline")
    publishDate = soup.find_all("span", class_="short")
    tempList = []
    tempListud = {}
    article_step = 0
    articleTime = current_time[:]
    for letter in letters:
        url = prefix + letter.a["href"]
        dates = publishDate[article_step].text
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
        dates = dates.replace("-", " ")
        dates = dates.replace(":", " ")
        dates = dates.split()
        dates[1] = dates[1][:-2]
        if dates[3][2:] == "pm":
            if int(dates[2]) != 12:
                dates[2] = str(int(dates[2]) + int("12"))
        dates[3] = dates[3].replace("a", "").replace("m", "").replace("p", "")
        dateTimeForm[0][0] = dates[0]
        dateTimeForm[0][1] = dates[1]
        dateTimeForm[0][2] = articleTime[0][2][:]
        dateTimeForm[1][0] = dates[2]
        dateTimeForm[1][1] = dates[3]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

        article_step = article_step + 1

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#17

0

显示文件

文件： grabber_washington_education_association.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Washington Education Association"
    # print "\n"
    article = Article('https://www.washingtonea.org/ourvoice/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    # prefix = "https://www.washingtonea.org"

    articles = soups.find_all("div", class_="featured-post rtecontent")
    articles = articles + soups.find_all("li", class_="list_item")
    for element in articles:
        url = element.a['href']
        pub_date = element.find("span",
                                class_="icon date").text.replace("/",
                                                                 " ").split()

        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[0][:]
        dateTimeForm[0][1] = pub_date[1][:]
        dateTimeForm[0][2] = pub_date[2][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#18

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Salt Lake Tribune"
    # print "\n"
    article = Article('http://www.sltrib.com/news/politics/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    count = 1
    article_total = 24
    article_count = 0
    articleTime = current_time[:]
    postdate = soups.find_all("div", class_="extras")

    for element in postdate:
        if element.a["href"]:
            element_url = element.a["href"][:-14]
            article_count = article_count + 1
            pub_date = element.text.replace(":", " ").split()
            if pub_date[6] == "pm":
                pub_date[4] = str(int(pub_date[4]) + 12)
            dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
            dateTimeForm[0][0] = pub_date[1][:]
            dateTimeForm[0][1] = pub_date[2][:]
            dateTimeForm[0][2] = pub_date[3][:]
            dateTimeForm[1][0] = pub_date[4][:]
            dateTimeForm[1][1] = pub_date[5][:]
            dateTimeForm[1][2] = articleTime[1][2][:]
            dateDiff = date_subtracter.main(articleTime, dateTimeForm)
            if dateDiff[0] == 1:
                tempListud[element_url] = dateTimeForm
            count = count + 1
        if article_count == article_total:
            break

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#19

0

显示文件

文件： grabber_washington_state_democrats.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Washington State Democrats"
    # print "\n"
    article = Article('https://www.wa-democrats.org/news')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "https://www.wa-democrats.org"

    urls = soups.find_all("h2", class_="node-title")
    dates = soups.find_all("p", class_="submitted")
    for element_urls in urls:
        url_list.append(prefix + element_urls.a['href'])

    for y, element_dates in enumerate(dates):
        pub_date = element_dates.text.split()
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[0][:]
        dateTimeForm[0][1] = pub_date[1][:-1]
        dateTimeForm[0][2] = pub_date[2][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url_list[y]] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#20

0

显示文件

文件： grabber_senate_site.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Senate Site"
    # print "\n"
    article = Article('http://www.senatesite.com/2017/blog/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    count = 1
    articleTime = current_time[:]
    prefix = "http://www.senatesite.com"
    letters = soups.find_all("h2",
                             class_="blog-shortcode-post-title entry-title")
    postdate = soups.find_all("span", class_="updated")
    # print postdate[0].text.replace("T", " ").replace("+", " ").split()

    for element in letters:
        url = (prefix + element.a["href"])

        pub = postdate[count].text.replace("T", " ").replace("+", " ").split()
        pub_date = pub[0].replace("-", " ").split()
        pub_time = pub[1].replace(":", " ").split()
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
        dateTimeForm[0][0] = pub_date[1][:]
        dateTimeForm[0][1] = pub_date[2][:]
        dateTimeForm[0][2] = pub_date[0][:]
        dateTimeForm[1][0] = pub_time[0][:]
        dateTimeForm[1][1] = pub_time[1][:]
        dateTimeForm[1][2] = pub_time[2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 0:
            tempListud[url] = dateTimeForm
        count = count + 1

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#21

0

显示文件

文件： grabber_utah_policy.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Utah Policy"
    # print "\n"
    article = Article(
        'http://utahpolicy.com/index.php/features/today-at-utah-policy')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "https://utahpolicy.com"
    letters = soups.find_all("td", class_="list-title")
    for element in letters:
        url = (prefix +
               ((str(element).split())[3]).encode('utf-8').strip('href=">'))
        # print url
        tempList.append(url)
        article = Article(url)
        article.download()
        soups = BeautifulSoup(article.html)
        publishDate = soups.find_all("dd", class_="create")
        for pub in publishDate:
            dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
            pub = pub.encode('utf-8').split()
            dateTimeForm[0][0] = pub[9][:]
            dateTimeForm[0][1] = pub[8][:]
            dateTimeForm[0][2] = pub[10][:]
            dateTimeForm[1][0] = articleTime[1][0][:]
            dateTimeForm[1][1] = articleTime[1][1][:]
            dateTimeForm[1][2] = articleTime[1][2][:]
            dateDiff = date_subtracter.main(articleTime, dateTimeForm)
            if dateDiff[0] == 1:
                tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#22

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Washington State Republican Party"
    # print "\n"
    article = Article('https://wsrp.org/media/press-releases/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    # prefix = "https://wsrp.org/"

    urls = soups.find_all("h2", class_="green-text")
    dates = soups.find_all("div", class_="postmonth")
    for element_urls in urls:
        url_list.append(element_urls.a['href'])

    for y, element_dates in enumerate(dates):
        pub_date = element_dates.text.split()
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[0][:-1]
        dateTimeForm[0][1] = pub_date[1][:]
        dateTimeForm[0][2] = pub_date[2][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url_list[y]] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#23

0

显示文件

文件： grabber_nw_news_network.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Northwest News Network"
    # print "\n"
    article = Article(
        'http://nwnewsnetwork.org/category/government-and-politics')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://nwnewsnetwork.org"

    articles = soups.find_all("div", class_="title-info")
    for element in articles:
        url = prefix + element.a['href']
        pub_date = element.find("span", class_="pub-date").text.split()

        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[0][:]
        dateTimeForm[0][1] = pub_date[1][:-1]
        dateTimeForm[0][2] = pub_date[2][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#24

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Seattle PI"
    # print "\n"
    article = Article('http://www.seattlepi.com/local/politics/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    # prefix = "http://www.seattlepi.com/"

    letters = soups.find_all("div", class_="story-content-wrapper equal-group")
    for element in letters:
        url = element.find("div", class_="story-content").a["href"]
        pub_date = element.find("span",
                                class_="story-date").text.replace(":",
                                                                  " ").split()
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[0][:]
        dateTimeForm[0][1] = pub_date[1][:-1]
        dateTimeForm[0][2] = pub_date[2][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#25

0

显示文件

文件： grabber_upc_flagged_bill_status.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "STATUS OF 2017 flagged bills:"
    article = Article(
        'http://utahpoliticalcapitol.com/2017-session-status-of-flagged-bills/'
    )
    article.download()
    soups = BeautifulSoup(article.html)
    soupTime = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    articleTime = current_time[:]
    count = 0
    prefix = "h"
    letters = soups.find_all("td", class_="column-7")
    soupDate = soupTime.find_all("time", class_="published")
    for element in letters:
        url = (prefix +
               ((str(element).split())[3]).encode('utf-8').strip('href=">'))
        tempList.append(url)
    for pub in soupDate:
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
        pub = pub.encode('utf-8').replace(">", " ")
        pub = pub.encode('utf-8').replace("<", " ").split()
        dateTimeForm[0][0] = pub[3][:]
        dateTimeForm[0][1] = pub[4][:-1]
        dateTimeForm[0][2] = pub[5][:]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[tempList[count]] = dateTimeForm
        count = count + 1

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#26

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Public News Service"
    # print "\n"
    article = Article('http://crosscut.com/category/politics/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    # prefix = "http://crosscut.com"

    articles = soups.find_all("div", class_="inner")
    for element in articles:
        url = element.a['href']
        if url != "http://crosscut.com":
            pub_date = element.text.split()[:3]
            dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

            # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
            # Date [Month,Day,Year]
            dateTimeForm[0][0] = pub_date[0][:]
            dateTimeForm[0][1] = pub_date[1][:-1]
            dateTimeForm[0][2] = pub_date[2][:]
            # Time [Hour,Min,Sec]
            dateTimeForm[1][0] = articleTime[1][0][:]
            dateTimeForm[1][1] = articleTime[1][1][:]
            dateTimeForm[1][2] = articleTime[1][2][:]
            dateDiff = date_subtracter.main(articleTime, dateTimeForm)
            if dateDiff[0] == 1:
                tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#27

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "House Democrats"
    # print "\n"
    article = Article('http://www.utahhousedemocrats.org/news/')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    count = 0
    articleTime = current_time[:]
    prefix = "http://www.utahhousedemocrats.org"
    headers = soups.find_all("header", class_="entry-header")
    letters = soups.find_all("h1", class_="entry-title p-name")

    for element in letters:
        url = prefix + element.a["href"]
        tempList.append(url)
        date_element = headers[count]
        date = date_element.text.split()[2:5]
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]
        dateTimeForm[0][0] = date[0][:]
        dateTimeForm[0][1] = date[1][:-1]
        dateTimeForm[0][2] = date[2][:]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm
        count = count + 1

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#28

0

显示文件

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Public News Service"
    # print "\n"
    article = Article('http://www.publicnewsservice.org/state-washington/WA')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    url_list = []
    tempListud = {}
    articleTime = current_time[:]
    prefix = "http://www.publicnewsservice.org"

    articles = soups.find_all("div", class_="group_info")
    for element in articles:
        url = prefix + element.find("div", class_="title").a['href']
        pub_date = element.find("div", class_="rundate").text.split()
        dateTimeForm = [["0", "0", "0"], ["0", "0", "0"]]

        # Date Time in the formate [Month,Day,Year][Hour,Min,Sec]
        # Date [Month,Day,Year]
        dateTimeForm[0][0] = pub_date[1][:]
        dateTimeForm[0][1] = pub_date[2][:-1]
        dateTimeForm[0][2] = pub_date[3][:]
        # Time [Hour,Min,Sec]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime, dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud

示例#29

0

显示文件

文件： grabber_senate_democrats.py 项目： johndpope/Choose_Your_Polotics

def main(current_time):
    # print "\n"
    # print "\n"
    # print "Senate Democrats"
    # print "\n"
    article = Article('http://www.utahsenatedemocrats.org/home/main/blog')
    article.download()
    soups = BeautifulSoup(article.html)
    tempList = []
    tempListud = {}
    count = 0
    articleTime = current_time[:]
    prefix = "http://www.utahsenatedemocrats.org"
    letters = soups.find_all("h4", class_="post-title")
    postdate = soups.find_all("div", class_="post-footer")
    for element in letters:
        url = element.a["href"]
        tempList.append(url)

        pub = postdate[count]
        dateTimeForm = [["0", "0", "0"],["0", "0", "0"]]
        pub = pub.text.split()
        dateTimeForm[0][0] = pub[1][:]
        dateTimeForm[0][1] = pub[0][:]
        dateTimeForm[0][2] = pub[2][:]
        dateTimeForm[1][0] = articleTime[1][0][:]
        dateTimeForm[1][1] = articleTime[1][1][:]
        dateTimeForm[1][2] = articleTime[1][2][:]
        dateDiff = date_subtracter.main(articleTime,dateTimeForm)
        if dateDiff[0] == 1:
            tempListud[url] = dateTimeForm
        count  = count + 1

    # for article in tempListud:
    #     print article, tempListud[article]
    return tempListud