def crawlListe(engineUrl, listePath, maxPages=None, drop=False, delay=10): if engineUrl: engineUrl = engineUrl print("Creating sqlite engine for %s"%(engineUrl)) eng = create_engine(engineUrl, encoding='utf-8') if drop: et.Base.metadata.drop_all(eng) et.Base.metadata.create_all(eng) # conn = eng.connect() Session = sessionmaker(bind=eng) session = Session() l = et.Liste(path=listePath) l.setTimestamp(datetime.now()) l = session.merge(l) session.commit() basliks, listeBasliks = l.getAllListeBasliks(delay=delay) # helper.insert_or_replace_all(session, basliks) # helper.insert_or_replace_all(session, listeBasliks) # session.add_all(basliks) # session.add_all(listeBasliks) helper.insert_or_replace_all(session, basliks) helper.insert_or_replace_all(session, listeBasliks) session.commit() # for i in basliks: # entries = i.getAllEntries(queries={"a":"popular"}) # helper.insert_or_replace_all(session, entries) for n, i in enumerate(listeBasliks): logging.info("Getting entries from baslik %d/%d: %s"%(n+1, len(listeBasliks), i.path)) entries = i.getAllEntries(maxPages=maxPages, delay=delay) minTime, maxTime = getMinMaxTimestamp(entries) i.firstEntryTimestamp = minTime i.lastEntryTimestamp = maxTime helper.insert_or_replace(session, i) helper.insert_or_replace_all(session, entries) session.commit()
def main(): from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker eng = create_engine("sqlite:///"+args.output) et.Base.metadata.create_all(eng) # conn = eng.connect() Session = sessionmaker(bind=eng) session = Session() if args.baslik: b = et.Baslik() b.setPath(args.baslik) entries = b.getAllEntries() helper.insert_or_replace_all(session, entries) elif args.liste: liste = args.liste basliks, listeBasliks = et.Liste(name=liste).getAllListeBasliks() # session.add_all(listeBasliks) # session.add_all(basliks) # session.commit() # import pdb; pdb.set_trace() # helper.get_or_create_all(session, et.Baslik, instance=basliks) helper.insert_or_replace_all(session, basliks) helper.insert_or_replace_all(session, listeBasliks) else: parser.print_help()