Пример #1
0
    def downloadChart(self):
        """
        Fetch user chart.
        """
        u = url.url(
            "https://www.odesk.com/api/team/v1/snapshots/"
            + self.settings.company
            + "/"
            + self.settings.username
            + "/"
            + str(int(time.time()))
            + ".xml"
        )
        u.addParam("api_key", self.settings.apiKey)
        u.addParam("api_token", self.settings.token)
        u.addParam("api_sig", self.apiSig(u))
        c = urllib2.urlopen(u.asString())
        xml = minidom.parse(c)

        onlinePresence = xml.getElementsByTagName("online_presence")[0].firstChild.nodeValue
        chartUrl = (
            "http://chart.apis.google.com/chart?chs=70x22&cht=ls&chco=00FF00"
            + "&chf=bg,lg,90,000000&chm=B,00FF00,0,0,0&chd=t:"
            + onlinePresence
        )

        c = urllib2.urlopen(chartUrl)
        chart = open("/tmp/chart.png", "w")
        chart.write(c.read())
        chart.close()
Пример #2
0
def crawl_web(seeds, search_type):

    snippet_lookup = dict()
    index = dict()
    graph = dict()
    total_pages = 40

    for item in seeds:
        to_crawl = [item[0]]

        max_pages = item[1]

        crawled = []

        while to_crawl:

            current_domain = ""

            if search_type == "BFS":
                current_page = to_crawl[0]
                to_crawl = to_crawl[1:]
                if 'https://' in current_page or 'http://' in current_page:
                    o = urlparse(current_page)
                    current_domain = o.scheme + "://" + o.netloc

            crawled.append(current_page)
            print "crawling" + str(len(crawled))
            print current_page
            if len(crawled) - 1 >= max_pages:
                break
            content = get_page(current_page)
            if content:
                soup = BeautifulSoup(content)
                title = soup.title.string
                snippet = get_snippet(current_page, soup)
                snippet_lookup[current_page] = (title, snippet)
                add_page_to_index(index, current_page, content)
                outlinks = get_all_links(content)
                graph[current_page] = outlinks
                for link in outlinks:
                    if not link in crawled and not link == "#":

                        if link.find('https://') != -1 or link.find(
                                'http://') != -1:
                            to_crawl.append(link)

                        if link[0] == "/" and link.find(
                                "//") == -1 and url.url(current_domain + link):

                            to_crawl.append(current_domain + link)
    return index, graph, snippet_lookup
def beginProcess(info, identifier, gdbm_files, filter_file, path, category):
    log = logging.getLogger('classify')
    log.debug("classify.beginProcess()")

    if identifier == "w":
        url_obj = url.url(gdbm_files, filter_file, path, category)
        url_obj.processHTML(info)
    elif identifier == "f":
        textfile_obj = textfile.textfile(gdbm_files, filter_file, path, category)
        textfile_obj.processUTFFile(info)
    elif identifier == "h":
        textfile_obj = textfile.textfile(gdbm_files, filter_file, path, category)
        textfile_obj.processHTMLFile(info)
    else:
        log.debug("identifier value is not valid")
        return

    log.debug("program terminated")
    return
Пример #4
0
    def daySnapshots(self, date):
        """
        Returns the number of snapshots in specified date
        """
        u = url.url(
            "https://www.odesk.com/api/team/v1/workdiaries/"
            + self.settings.company
            + "/"
            + self.settings.username
            + "/"
            + date
            + ".xml"
        )
        u.addParam("api_key", self.settings.apiKey)
        u.addParam("api_token", self.settings.token)
        u.addParam("api_sig", self.apiSig(u))
        c = urllib2.urlopen(u.asString())
        xml = minidom.parse(c)

        return len(xml.getElementsByTagName("snapshot"))
Пример #5
0
    def getToken(self):
        # TODO: finish it
        u = url.url("https://www.odesk.com/api/auth/v1/keys/frobs.xml")
        u.addParam("api_key", self.settings.apiKey)
        u.addParam("api_sig", self.apiSig(u))
        handler = urllib2.urlopen(u.asString())
        xml = minidom.parse(handler)
        frob = xml.getElementsByTagName("frob")[0].firstChild.nodeValue

        u = url("https://www.odesk.com/services/api/auth/")
        u.addParam("api_key", self.settings.apiKey)
        u.addParam("frob", frob)
        u.addParam("api_sig", self.apiSig(u))
        handler = urllib2.urlopen(u.asString())

        u = url("https://www.odesk.com/api/auth/v1/keys/tokens.xml")
        u.addParam("api_key", self.settings.apiKey)
        u.addParam("frob", frob)
        u.addParam("api_sig", self.apiSig(u))
        handler = urllib2.urlopen(u.asString())
        xml = minidom.parse(handler)
        return xml.getElementsByTagName("token")[0].firstChild.nodeValue
Пример #6
0
import re
import time
from companyList import company
from url import url

browser=webdriver.Chrome()
header = ['时间','公司名称','新闻标题','新闻正文','URL','详细时间','新闻来源']
today = time.strftime('%Y-%m-%d',time.localtime(time.time()))
news_list = []
companyObj = company()
companyListTest = companyObj.testList
companyList = companyObj.list
xpath_title = '/html/body/div[2]/div[4]/div/div[2]/div[3]/div'

#引入地址
urlObj = url()
url = urlObj.url


def info(x):
    
    # 新闻title
    if el.xpath(xpath_title + '[{}]/h3/a/text()'.format(x))  :
        title = el.xpath(xpath_title + '[{}]/h3/a/text()'.format(x))
        title = ''.join(str(i) for i in title)
        title = re.sub('[\n]','',title)
        print(title)
    else :
        title = {}

     # 新闻url
Пример #7
0
 def setUp(self):
     super(TestURL, self).setUp()
     self.url_obj = url.url(self.bot)
Пример #8
0
from gpiozero import MotionSensor
import time
import os
import numpy as np
import cv2
from url import url
from facedetection import facedetect

pir = MotionSensor(4)

while 1:
    pir.wait_for_motion()
    print("Motion detected")
    url()
    facedetect()
    print("back in main")
    pir.wait_for_no_motion()
Пример #9
0
 def __init__(self):
     self.o_urls = url.url()
     self.o_download = downlload.htmldowmloader()
     self.output = output.htmloutput()
     self.parser = parser_.parse1()
Пример #10
0
 def __init__(self):
     self.url = url.url()
     self.store = store.store()
     self.pursueHtml = pursueHtml.pursueHtml()