示例#1
0
def crawlPerson(index):
  logging.info("In CrawlPerson")
  result = Crawler().getMap(index)
  
  if 'error' in result.keys():
    logging.warn("error at index" + str(index) + ", error is " + result['error'])
    if result['error'] == 'page_not_found':
      logging.warn("Invalid index: " + str(index))
      raise Exception()
    if result['error'] == 'end of database':
      logging.warn("Index out of range: " + str(index))
      memcache.set("index", 1, 86400)
      SearchPosition(key_name="index", position=1).put()
  else:
    logging.info("putting results")
    putResult(result)
示例#2
0
def crawlPerson(index):
    logging.info("In CrawlPerson")

    if index:
        result = Crawler().getMap(index)
        logging.info(str(result))
        putResult(result)
        return

    mutex = Mutex('mutex lock')
    try:
        mutex.lock()
        index_from_ds = SearchPosition.get_by_id("index")
        if index_from_ds:
            index = index_from_ds.position
        else:
            index_from_ds = SearchPosition(id='index', position=1)
            index_from_ds.put()
            index = 1

        result = Crawler().getMap(index)
        logging.info(str(result))

        if 'error' in result.keys():
            logging.warn("error at index" + str(index) + ", error is " +
                         result['error'])
            if result['error'] == 'page_not_found':
                logging.warn("Invalid index: " + str(index))
                raise Exception()
            if result['error'] == 'end of database':
                logging.warn("Index out of range: " + str(index))
                index_from_ds.position = 1
                index_from_ds.put()
        else:
            logging.info("putting results")

            putResult(result)

            index_from_ds.position = (int(index) + 1)
            logging.info("INCREMENT " + str(index))
            index_from_ds.put()
            mutex.unlock()
    except Exception as e:
        raise e
    finally:
        mutex.unlock()
示例#3
0
def crawlPerson(index):
    logging.info("In CrawlPerson")

    if index:    
      result = Crawler().getMap(index)
      logging.info(str(result))
      putResult(result)
      return
    
    mutex = Mutex('mutex lock')
    try:
        mutex.lock()
        index_from_ds = SearchPosition.get_by_id("index")
        if index_from_ds:
            index = index_from_ds.position
        else:
            index_from_ds = SearchPosition(id='index',position=1)
            index_from_ds.put()
            index = 1
            
        result = Crawler().getMap(index)
        logging.info(str(result))
        
        if 'error' in result.keys():
            logging.warn("error at index" + str(index) + ", error is " + result['error'])
            if result['error'] == 'page_not_found':
                logging.warn("Invalid index: " + str(index))
                raise Exception()
            if result['error'] == 'end of database':
                logging.warn("Index out of range: " + str(index))
                index_from_ds.position = 1
                index_from_ds.put()
        else:
            logging.info("putting results")
    
            putResult(result)
    
            index_from_ds.position = (int(index) + 1)
            logging.info("INCREMENT " + str(index))
            index_from_ds.put()
            mutex.unlock()
    except Exception as e:
        raise e
    finally:
        mutex.unlock()