Python Database.isCrawled примеры использования

Язык программирования: Python

Пространство имен/Пакет: lib.db

Класс/Тип: Database

Метод/Функция: isCrawled

Примеров на hotexamples.com: 1

Python Database.isCrawled - 1 пример найден. Это лучшие примеры Python кода для lib.db.Database.isCrawled, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Database(26)

set_package(3)

insert(3)

has_package(3)

connect(2)

rm_package(2)

delete(2)

query(2)

update(2)

prune(2)

get_package(2)

get_packages(2)

purge(2)

set_version(1)

new_user(1)

new_message(1)

new_conversation(1)

isCrawled(1)

get_zombie(1)

insert2(1)

add(1)

get_version(1)

get_last_five_minutes(1)

get_failures(1)

get(1)

find_one(1)

find(1)

deanonymize_user(1)

close_conn(1)

close(1)

all(1)

validate_config(1)

Пример #1

Показать файл

Файл: main.py Проект: basilhskk/tor-crawler

def crawl(urloc:str) -> (str,list):
    db      = Database(PATH)
    parser  = Parser()
    session = connect_to_tor()

    # select here to find if in db 
    try:
        urlindb = db.isCrawled(urloc)
        if len(urlindb) > 0:
            # url already crawled
            del urlindb
            return urloc,[]
    except Exception as e:
        print(e)


    try:
        try:
            r = session.get(urloc,headers=TORBUNDLEHEADER,timeout=20)
            r.raise_for_status()

        except Exception as err:
            insert_data = {
            "protocol" : "Error",
            "url"      : urloc,
            "data"     : base64.b64encode(str(err).encode()),
            "lastvisit": int(time.time()),
            }

            try:
                db.insert(insert_data)
            except Exception as e:
                # if urloc in db dont crawl it again and return
                if "UNIQUE constraint failed" in str(e):
                    # update val
                    try:
                        db.update(insert_data)
                    except Exception as e:
                        pass    
        else:
            urls        = parser.urlExtractor(urloc,r.text)
            protocol    = urloc.split("://")[0]
            insert_data = {
            "protocol" : protocol,
            "url"      : urloc,
            "data"     : base64.b64encode(r.content),
            "lastvisit": int(time.time()),
            }
            
            try:
                db.insert(insert_data)
            except Exception as e:
                # if urloc in db dont crawl it again and return
                if "UNIQUE constraint failed" in str(e):
                    # update val
                    try:
                        db.update(insert_data)
                    except Exception as e:
                        pass

            retUrls = []
            for key, value in urls.items():
                if key == "http" or key == "https" :
                    # crawl only http protocol
                    for url in urls[key]:
                        tld = parser.tldExtractor(url)
                        # crawl only onion sites
                        if tld == "onion":
                            retUrls.append(url)
            return urloc, retUrls

    except Exception as e:
        return urloc,[]