示例#1
0
文件: db.py 项目: hos/eksitools
def crawlListe(engineUrl, listePath, maxPages=None, drop=False, delay=10):

    if engineUrl:
        engineUrl = engineUrl

    print("Creating sqlite engine for %s"%(engineUrl))
    eng = create_engine(engineUrl, encoding='utf-8')

    if drop:
        et.Base.metadata.drop_all(eng)

    et.Base.metadata.create_all(eng)
    # conn = eng.connect()
    Session = sessionmaker(bind=eng)
    session = Session()


    l = et.Liste(path=listePath)
    l.setTimestamp(datetime.now())
    l = session.merge(l)
    session.commit()

    basliks, listeBasliks = l.getAllListeBasliks(delay=delay)

    # helper.insert_or_replace_all(session, basliks)
    # helper.insert_or_replace_all(session, listeBasliks)


    # session.add_all(basliks)
    # session.add_all(listeBasliks)

    helper.insert_or_replace_all(session, basliks)
    helper.insert_or_replace_all(session, listeBasliks)
    session.commit()


    # for i in basliks:
    #     entries = i.getAllEntries(queries={"a":"popular"})
    #     helper.insert_or_replace_all(session, entries)

    for n, i in enumerate(listeBasliks):
        logging.info("Getting entries from baslik %d/%d: %s"%(n+1, len(listeBasliks), i.path))

        entries = i.getAllEntries(maxPages=maxPages, delay=delay)
        minTime, maxTime = getMinMaxTimestamp(entries)
        i.firstEntryTimestamp = minTime
        i.lastEntryTimestamp = maxTime
        helper.insert_or_replace(session, i)
        helper.insert_or_replace_all(session, entries)
        session.commit()
示例#2
0
def main():
    from sqlalchemy import create_engine
    from sqlalchemy.orm import sessionmaker

    eng = create_engine("sqlite:///"+args.output)
    et.Base.metadata.create_all(eng)
    # conn = eng.connect()
    Session = sessionmaker(bind=eng)
    session = Session()


    if args.baslik:

        b = et.Baslik()
        b.setPath(args.baslik)
        entries =  b.getAllEntries()

        helper.insert_or_replace_all(session, entries)


    elif args.liste:
        liste = args.liste

        basliks, listeBasliks = et.Liste(name=liste).getAllListeBasliks()


        # session.add_all(listeBasliks)
        # session.add_all(basliks)
        # session.commit()
        # import pdb; pdb.set_trace()
        # helper.get_or_create_all(session, et.Baslik, instance=basliks)

        helper.insert_or_replace_all(session, basliks)
        helper.insert_or_replace_all(session, listeBasliks)

    else:
        parser.print_help()