Exemplo n.º 1
0
def main():
    db_conn = sqa.connect_db()
    Session = sessionmaker(bind=db_conn)
    session = Session()
    
    res = session.query(sqa.UrlRecords).filter(sqa.UrlRecords.num_redir>='5').all()
    for i in res:
        print i.exp_url
Exemplo n.º 2
0
def main():
    logging.debug("main called")

    db_conn = sqa.connect_db()
    Session = sessionmaker(bind=db_conn) 
    session = Session()
    
    count1 = session.query(sqa.UrlRecords).count()
    session.close()

    in_queue = Queue()
    # pt = ParseTwitter(sys.argv[1], sys.argv[2], in_queue)
    # pt.parse()
    url_dump = FileIoUpd(sys.argv[1], sys.argv[2], in_queue)
    url_dump.run()

    count2 = session.query(sqa.UrlRecords).count()

    print "Records Added: ", count2-count1
Exemplo n.º 3
0
def redirect_urls(inputq, total_q, indb_q, todb_q):
    """ Updated: input remains the same, however the output gets
    written to the database """
    """ Takes the list of Url's as input and writes
    the input record and the number of redirects to a file"""
    
    db_conn = sqa.connect_db()
    Session = sessionmaker(bind=db_conn) 
    session = Session()

    while True:
        with lock4:
            try:
                item = inputq.get_nowait()
            except Exception, e:
                print "Empty exception", e
                session.close()
                # return
                sys.exit(0)

        with lock1:
            total_q.put(total_q.get() + 1)
        try:
            rec = json.loads(item)
        except:
            print "json error"
        tc_url = rec['twit_url']
        ex_url = rec['expanded_url']
        print "item", ex_url

        try:
            ex_url = ex_url.lower()  # Expect the encoding errors here, unicode string
        except Exception, e:
            logging.debug("terminating %s" %exp_url)
            inputq.task_done()
            continue