示例#1
0
文件: metro.py 项目: igoral5/synchro
 def process_racecoord(self, mv_id):
     handler = parsers.RaceCoordXMLParser()
     util.http_request('/getRaceCoord.php?mv_id=%d' % mv_id,
                       handler,
                       conf,
                       logger=logger)
     self.route['race_coord'] = handler.race_coord
示例#2
0
文件: metro.py 项目: igoral5/synchro
 def process_marshvariants(self):
     handler = parsers.MarshVariantsXMLParser(current_time)
     util.http_request('/getMarshVariants.php',
                       handler,
                       conf,
                       logger=logger)
     self.marsh_variants = handler.marsh_variants
示例#3
0
 def load(self):
     handler = parsers.StopsSubXMLParser()
     util.http_request('/station.php', handler, conf, logger)
     for st_id in handler.stations:
         station = handler.stations[st_id]
         station['in_moscow_region'] = self.in_moscow_region(station['location'])
         self.stations[st_id] = station
     self.load_old()
示例#4
0
文件: metro.py 项目: igoral5/synchro
 def process_racecard(self, mr_id, mv_id):
     handler = parsers.RaceCardXMLParser()
     util.http_request('/getRaceCard.php?mv_id=%d' % mv_id, handler, conf, logger=logger)
     self.route['race_card'] = handler.race_card
     for item in self.json['add_racecard']:
         if mr_id == item['mr_id']:
             self.route['race_card'][item['direction']].insert(item['index'], item['item'])
     for item in self.json['change_racecard']:
         if mr_id == item['mr_id']:
             self.route['race_card'][item['direction']][item['index']]['st_id'] = item['st_id']
示例#5
0
 def check_res(new_node):
     new_ip = new_node.split("@")[1]
     code, data = http_request(new_ip, cbase_port, (cbase_user, cbase_pwd),
                               'GET', '/pools/default/rebalanceProgress',
                               {})
     assert code == 200
     obj = json.loads(data)
     while obj["status"] == "none":
         _, data = http_request(new_ip, cbase_port, (cbase_user, cbase_pwd),
                                'GET', '/pools/default/rebalanceProgress',
                                {})
         obj = json.loads(data)
示例#6
0
文件: metro.py 项目: igoral5/synchro
 def process_stops(self):
     handler = parsers.StopsXMLParser()
     util.http_request('/getStops.php', handler, conf, logger=logger)
     self.stations = handler.stations
     for item in self.json['add_stations']:
         self.stations[item['id']] = {
             'name': item['name'],
             'location': {
                 'lat': item['lat'],
                 'long': item['long']
             },
             'tags': set()
         }
示例#7
0
文件: metro.py 项目: igoral5/synchro
 def process_racecard(self, mr_id, mv_id):
     handler = parsers.RaceCardXMLParser()
     util.http_request('/getRaceCard.php?mv_id=%d' % mv_id,
                       handler,
                       conf,
                       logger=logger)
     self.route['race_card'] = handler.race_card
     for item in self.json['add_racecard']:
         if mr_id == item['mr_id']:
             self.route['race_card'][item['direction']].insert(
                 item['index'], item['item'])
     for item in self.json['change_racecard']:
         if mr_id == item['mr_id']:
             self.route['race_card'][item['direction']][
                 item['index']]['st_id'] = item['st_id']
示例#8
0
def update_keywords_in_article(article, article_data):
    
    if (article_data == None):
        article_data = util.http_request(article.url)
        
    keyword_count_list = count_keywords_in_text(article_data, logic.getKeywordDict())
    article.keywords = create_list_of_keywords(keyword_count_list)
    article.weight = calculate_keywords_weight(keyword_count_list)
示例#9
0
def test_http_request2(url):
    method = 'GET'
    params = {'key1': 'aaa', 'key2': 'bbb', 'key3': ['a1', 'a2', 'a3']}
    #params = None
    headers = {'User-Agent': 'Mozilla/5.0 (x128) TestAgent/1.0'}
    res = util.http_request(url, method, params, 'user1', '1111', headers)
    text = str(res.getcode()) + '\n'
    text += res.read().decode('utf-8')
    return text
示例#10
0
 def check_container():
     for name in name_ips:
         ip = get_container_host(name)
         _, data = http_request(ip,
                                PORT,
                                user=(USER, PASSWORD),
                                method='GET',
                                path='/container/status/%s' % name)
         res = eval(data)
         if res["response"]["status"] == "stopped":
             _, data = http_request(ip,
                                    PORT,
                                    user=(USER, PASSWORD),
                                    method='POST',
                                    path='/container/start',
                                    data={"containerName": name})
             res = eval(data)
             if res["status"] != "started":
                 raise Exception("%s contaienr can not start" % name)
示例#11
0
    def add_node(cluster_name):
        new_container = get_new_container_name(cluster_name)
        new_ip = name_ips[new_container]
        old_ip = name_ips[list(get_old_container_name())[0]]

        data = {"hostname": new_ip, "user": cbase_user, "password": cbase_pwd}

        code, data = http_request(old_ip, cbase_port, (cbase_user, cbase_pwd),
                                  'POST', '/controller/addNode', data)
        assert code == 200
        update_old_container_names(new_container)
        obj = json.loads(data)
        return obj["otpNode"]
示例#12
0
    def rebalance(new_node):
        pair = new_node.split("@")
        node_flag = pair[0]
        new_ip = pair[1]

        names = get_old_container_name()
        nodes = []
        for name in names:
            nodes.append("%s@%s" % (node_flag, name_ips[name]))
        str_nodes = ','.join(nodes)
        data = {"knownNodes": str_nodes}
        code, data = http_request(new_ip, cbase_port, (cbase_user, cbase_pwd),
                                  'POST', '/controller/rebalance', data)
        assert code == 200
示例#13
0
    def check_res(cluster_name):
        import time
        time.sleep(5)
        _, data = http_request(MAIN_HOST,
                               PORT,
                               user=(USER, PASSWORD),
                               method='GET',
                               path='/containerCluster/createResult/%s' %
                               cluster_name)
        obj = json.loads(data)
        retry_count = 20
        while obj['response']['code'] != '000000' and retry_count > 0:
            time.sleep(3)
            _, data = http_request(MAIN_HOST,
                                   PORT,
                                   user=(USER, PASSWORD),
                                   method='GET',
                                   path='/containerCluster/createResult/%s' %
                                   cluster_name)
            obj = json.loads(data)
            retry_count -= 1

        if retry_count <= 0:
            raise Exception("expand container failed")
示例#14
0
def _has_talkback(url, data=None):
    if (data == None):
        data = http_request(url)
    # one method for determining talkback is by looking at the text of the <a> tags
    links = get_link_texts(data)
    for link in links:
        con = "".join(link)
        con = con.lower()
        for x in ["comments","talkback","trackback","talk-back","track-back","comentarios"]:
            if ((-1 != con.find(x)) and (-1 == con.find("commentary")) and (-1 == con.find("commented"))):
                logging.debug("TalkBack: Found '%s' in '%s'. Has talkbacks."%(x,con))
                return True
    # second method is looking for a <form> with ID "comment" or "something-back"
    if (search_comment_forms(data)):
        return True
    return False
示例#15
0
def fetchByUrl(NEWS_URL, language):
    articles = []
    dom = minidom.parseString(util.http_request(NEWS_URL))
    for node in dom.getElementsByTagName('item'):
        # the source is the last part and the rest is the title
        arr = node.getElementsByTagName('title')[0].firstChild.data.rsplit('-',1) 
        newArticle = common.Article()
        newArticle.title = arr[0].strip()
        logging.debug("Parsing article: '%s'"%newArticle.title)
        #replace the ' mark to its html encoding and save in source
        newArticle.source = arr[1].strip()
        url = node.getElementsByTagName('link')[0].firstChild.data
        newArticle.url = url 

        # Here we test if this article is already in the DB, and if so we continue:
        if (isURLInDB(url)):
            logging.debug("Article already in the DB, skipping")
            continue

        #extract text
        rawDescription = node.getElementsByTagName('description')[0].firstChild.data
        #newArticle.raw = rawDescription # used for debugging
        description = util.extract_text(rawDescription).split("...")[0].strip() # get all the text before the ...
        if (description.find(newArticle.source) > -1):
            newArticle.desc = description.split(newArticle.source)[1]
        else:
            newArticle.desc = description
        datestring = node.getElementsByTagName('pubDate')[0].firstChild.data
        if datestring != '':
            newArticle.created = datetime.datetime.strptime(datestring, '%a, %d %b %Y %H:%M:%S GMT+00:00' )
        
        soup = BeautifulSoup(rawDescription)
        thumbnail = soup.find('img')
        if thumbnail:
            try:
                newArticle.pic_url = thumbnail['src']
            except:
                pass 
        #@@ari
        newArticle.language = language
        articles.append(newArticle)
    
    return articles
示例#16
0
    def check_cbase():
        port = 8091
        user = '******'
        password = cluster_name

        bad_nodes = []
        for ip in name_ips.values():
            code, data = http_request(ip,
                                      port,
                                      user=(user, password),
                                      method='GET',
                                      path='/nodeStatuses')
            if code == 200:
                res = eval(data)
                for k, v in res.items():
                    if v['status'] != 'healthy':
                        bad_nodes.append(k)
                break
        print 'bad nodes:'
        print bad_nodes
def main(argv):
    remoteIP, filename, path, remotehost = util.get_remotehost(argv[1])
    print 'remote: ',remoteIP, filename, path, remotehost
    s = RawSocket()
    # 3 handshake
    s.connect(remoteIP, 80)
    # download file
    print 'tcp handshake finished, start http request......'
    s.send(util.http_request(path, remotehost))
    data = s.receive()
    # extract content from http content
    res = data.split("\r\n\r\n", 1)[-1]

    if not data.startswith("HTTP/1.1 200 OK"):
        print 'http request failed'
        s.close('send')
        sys.exit(1)
    
    with open(filename, "w") as f:
        f.write(res)
示例#18
0
def expand_container(cluster_name):
    def check_res(cluster_name):
        import time
        time.sleep(5)
        _, data = http_request(MAIN_HOST,
                               PORT,
                               user=(USER, PASSWORD),
                               method='GET',
                               path='/containerCluster/createResult/%s' %
                               cluster_name)
        obj = json.loads(data)
        retry_count = 20
        while obj['response']['code'] != '000000' and retry_count > 0:
            time.sleep(3)
            _, data = http_request(MAIN_HOST,
                                   PORT,
                                   user=(USER, PASSWORD),
                                   method='GET',
                                   path='/containerCluster/createResult/%s' %
                                   cluster_name)
            obj = json.loads(data)
            retry_count -= 1

        if retry_count <= 0:
            raise Exception("expand container failed")

    data = {
        "containerClusterName": cluster_name,
        "componentType": "cbase",
        "networkMode": "ip",
        "nodeCount": "1",
        "image": "10.160.140.32:5000/lihanlin1/cbase:V4",
    }
    code, data = http_request(MAIN_HOST, PORT, (USER, PASSWORD), 'POST',
                              '/containerCluster/node', data)
    assert code == 200

    check_res(cluster_name)
示例#19
0
def fetchByUrlYT(NEWS_URL, language):
    articles = []
    dom = minidom.parseString(util.http_request(NEWS_URL))
    for node in dom.getElementsByTagName('entry'):
        # the source is the last part and the rest is the title
        newArticle = common.Article()
        newArticle.title = node.getElementsByTagName('title')[0].firstChild.data
        logging.debug("Parsing article: '[YouTube] %s'"%newArticle.title)
        #replace the ' mark to its html encoding and save in source
        newArticle.source = "YouTube"
        url = node.getElementsByTagName('link')[0].getAttribute('href')
        newArticle.url = url

        # Here we test if this article is already in the DB, and if so we continue:
        if (isURLInDB(url)):
            logging.debug("Article already in the DB, skipping")
            continue

        #extract text
        rawDescription = node.getElementsByTagName('content')[0].firstChild.data
        #newArticle.raw = rawDescription # used for debugging
        #description = rawDescription.split('tyle="font-size: 12px; margin: 3px 0px;"&gt;&lt;span&gt;')[1].split('&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;')[0].strip() # get all the text before the ...
        description = util.extract_text(rawDescription).strip()[len(newArticle.title):]
        newArticle.desc = description
        datestring = node.getElementsByTagName('updated')[0].firstChild.data
        if datestring != '':
            newArticle.created = datetime.datetime.strptime(datestring, '%Y-%m-%dT%H:%M:%S.000Z' )

        thumbnail = re.findall(r'img alt="" src="(http://i.ytimg.com/[^"]+)"',rawDescription)
        if len(thumbnail) > 0:
            newArticle.pic_url = thumbnail[0]

        #@@ari
        newArticle.language = language
        articles.append(newArticle)

    return articles
示例#20
0
def process_article(article):
    logging.debug("Got article: %s (%s)"%(article.source,article.url))
    
    has_talkbacks = False
    volatile = False
    create_new_source = True
    article_data = None
    
    #Check if article has a known source
    source = db.GqlQuery("SELECT * FROM Sources WHERE name=:source",source=article.source).get()

    # if this is a known source
    if (source != None):
        # Reuters and guardian is big enough sources so we give them a delicate handling
        if (source.name.lower() == 'reuters'):
            logging.debug("A reuters article")
            article_data = http_request(article.url)
            has_talkbacks = _has_talkback_reuters(article.url, article_data)
            create_new_source = False
        elif (source.name.lower().find('guardian') != -1):
            logging.debug("A guardian article")
            article_data = http_request(article.url)
            has_talkbacks = _has_talkback_guardian(article.url, article_data)
            create_new_source = False
        else:
            if (source.volatile == False):
                logging.debug("known source, has talkbacks = %s"%source.has_tkbks)
                article.source_weight = source.weight
                has_talkbacks = source.has_tkbks
                create_new_source = False
            else:
                logging.debug("volatile source (%s)"%source.name)
                volatile = True

    if (create_new_source):
        source = Sources(name=article.source)
        article_data = http_request(article.url) 
        has_talkbacks = _has_talkback(url = article.url, data = article_data)
        logging.debug("A new source = %s, has talkbacks = %s"%(article.source,source.has_tkbks))
        #If this is not a volatile source, we should add it to the db
        if (not volatile):
            source.has_tkbks = has_talkbacks
            source.put()
    
    #If the source has no talkbacks, we shouldn't process the article
    if (has_talkbacks == False):
        logging.debug("Article has no talkbacks")
        return False
    
    article.source_weight = source.weight
    #Process the keywords in the article
    keywords.update_keywords_in_article(article, article_data)
    
    logging.debug("Article with src_weight = %d and keywords_weight = %d"%(article.source_weight, article.weight))
    
    if (article.weight > 0):
        logging.debug("Article was added")
        return True
    else:
        logging.debug("No keywords. Article was not added")
        return False   
示例#21
0
 def load(self):
     handler = parsers.MarshesSubXMLParser(logger)
     util.http_request('/', handler, conf, logger)
     self.marshes = handler.marshes
     self.load_old()
示例#22
0
 def process_stops(self):
     handler = parsers.StopsXMLParser()
     util.http_request('/getStops.php', handler, conf, logger)
     self.stations = handler.stations
示例#23
0
 def process_marshes(self):
     handler = parsers.MarshesXMLParser(set([7]), logger=logger)
     util.http_request('/getMarshes.php', handler, conf, logger=logger)
     self.marshes = handler.marshes
示例#24
0
文件: metro.py 项目: igoral5/synchro
 def process_stops(self):
     handler = parsers.StopsXMLParser()
     util.http_request('/getStops.php', handler, conf, logger=logger)
     self.stations = handler.stations
     for item in self.json['add_stations']:
         self.stations[item['id']] = {'name': item['name'], 'location': {'lat': item['lat'], 'long': item['long'] }, 'tags': set() }
示例#25
0
 def process_racecoord(self, mv_id):
     handler = parsers.RaceCoordXMLParser()
     util.http_request('/getRaceCoord.php?mv_id=%d' % mv_id, handler, conf, logger)
     self.route['race_coord'] = handler.race_coord
示例#26
0
 def process_marshes(self):
     handler = parsers.MarshesXMLParser(set([7]), logger=logger)
     util.http_request('/getMarshes.php', handler, conf, logger=logger)
     self.marshes = handler.marshes
示例#27
0
 def process_marshvariants(self):
     handler = parsers.MarshVariantsXMLParser(current_time)
     util.http_request('/getMarshVariants.php', handler, conf, logger)
     self.marsh_variants = handler.marsh_variants
示例#28
0
def _has_talkback_guardian(url, data):
    if (data == None):
        data = http_request(url)
    return (-1 != data.find('Comments in chronological order'))
示例#29
0
 def process_stops(self):
     handler = parsers.StopsXMLParser()
     util.http_request('/getStops.php', handler, conf, logger)
     self.stations = handler.stations
示例#30
0
def _has_talkback_reuters(url, data):
    if (data == None):
        data = http_request(url)
    logging.debug("Reuters returning %s"%(-1 != data.find('/articles/comments/')))
    return (-1 != data.find('/article/comments/'))