Python clean примеры использования

Язык программирования: Python

Пространство имен/Пакет: scraper

Метод/Функция: clean

Примеров на hotexamples.com: 7

Python clean - 7 примеров найдено. Это лучшие примеры Python кода для scraper.clean, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: api_server.py Проект: jonnelafin/Kirjat.ml-api

def getBooksV2(booknames, store):
    print("[V2] Queries: " + str(booknames))
    result = {}
    for book in booknames:
        scraper.kirjat_scrape_err = ""
        bookname = book.replace("\r", "").replace("\n", "")

        cacheToUse = cache
        if store == "san":
            cacheToUse = cache_san
        usedCache = False
        if not bookname in cacheToUse.keys() or flag_nocache:
            print("[V2] \"" + bookname + "\" for the store \"" + store +
                  "\" not in cache, scraping...")
            if store == "jam":
                books = scrape_jam(bookname)
            elif store == "san":
                books = scrape_san(bookname)
            else:
                books = []
                print(f"[V2] Invalid store \"{store}\" specified")
            err = scraper.clean(scraper.kirjat_scrape_err)
            cacheToUse[bookname] = (books, err)
        else:
            usedCache = True
            print("[V2] \"" + bookname + "\" for the store \"" + store +
                  "\" in cache.")
            books, err = cache[bookname]
            scraper.kirjat_scrape_err = ""
        result[book] = ({
            "books": booklistTodictList(books),
            "result_was_cached": usedCache,
            "errors": err
        })
    return result

Пример #2

Показать файл

Файл: classifier.py Проект: rheyavlan/SentimentAnalysis

def get_data_TOI(data_path):
    """ Get training data from the articles folder. """
    all_data = []
    print(CATEGORIES.keys())
    for path in glob(os.path.join(data_path, 'toi.json')):
        with open(path, 'r') as jsonfile:
            data = json.loads(jsonfile.read())
            for article in data.get('articles'):
                all_data.extend([scraper.clean(article['content'])])
    jsonfile.close()
    return Bunch(categories=CATEGORIES.keys(), values=None, data=all_data)

Пример #3

Показать файл

Файл: classify.py Проект: raivathg/Classifying-news-into-different-categories

def get_data(data_path):

    all_data = []

    for path in glob(os.path.join(data_path, '*.json')):
        with open(path, 'r') as jsonfile:
            data = json.loads(jsonfile.read())
            for article in data.get('articles'):
                all_data.extend([scraper.clean(article['content'])])

    return Bunch(categories=scraper.CATEGORIES.keys(),
                 values=None,
                 data=all_data)

Пример #4

Показать файл

Файл: classifier.py Проект: belate/newsclassifier

def get_data(data_path):
    """ Get training data from the articles folder. """
    all_data = []

    for path in glob(os.path.join(data_path, '*.json')):
        with open(path, 'r') as jsonfile:
            data = json.loads(jsonfile.read())
            for article in data.get('articles'):
                all_data.extend([scraper.clean(article['content'])])

    return Bunch(categories=scraper.CATEGORIES.keys(),
                 values=None,
                 data=all_data)

Пример #5

Показать файл

Файл: reset_db.py Проект: openknowledgebe/dierentheater

def run():
    if raw_input("Are you sure? Then write 'yes'") == "yes":
        clean()

Пример #6

Показать файл

Файл: api_server.py Проект: jonnelafin/Kirjat.ml-api

def query():
    print(request.form)
    if 'query' in request.form.keys():
        bookname = request.form.get('query')
        usedCache = False
        if not bookname in cache.keys() or flag_nocache:
            print("\"" + bookname + "\" not in cache, scraping...")
            books = scrape_jam(bookname)
            err = scraper.clean(scraper.kirjat_scrape_err)
            cache[bookname] = (books, err)
        else:
            usedCache = True
            print("\"" + bookname + "\" in cache.")
            books, err = cache[bookname]
        scraper.kirjat_scrape_err = ""
        return jsonify({
            "data": booklistTodictList(books),
            "cached_result": usedCache,
            "err": err,
            "query": bookname
        })
    if 'querysan' in request.form.keys():
        bookname = request.form.get('querysan')
        usedCache = False
        if not bookname in cache_san.keys() or flag_nocache:
            print("\"" + bookname + "\" not in cache, scraping...")
            books = scrape_san(bookname)
            err = scraper.clean(scraper.kirjat_scrape_err)
            cache_san[bookname] = (books, err)
        else:
            usedCache = True
            print("\"" + bookname + "\" in cache.")
            books, err = cache_san[bookname]
        scraper.kirjat_scrape_err = ""
        return jsonify({
            "data": booklistTodictList(books),
            "cached_result": usedCache,
            "err": err,
            "query": bookname
        })
    if 'querym' in request.form.keys():
        booknames = request.form.get('querym').split("\n")
        print("Queries: " + str(booknames))
        result = []
        query = []
        for book in booknames:
            scraper.kirjat_scrape_err = ""
            bookname = book.replace("\r", "").replace("\n", "")
            query.append(bookname)
            usedCache = False
            if not bookname in cache.keys() or flag_nocache:
                print("\"" + bookname + "\" not in cache, scraping...")
                books = scrape_jam(bookname)
                err = scraper.clean(scraper.kirjat_scrape_err)
                cache[bookname] = (books, err)
            else:
                usedCache = True
                print("\"" + bookname + "\" in cache.")
                books, err = cache[bookname]
                scraper.kirjat_scrape_err = ""
            result.append({
                "data": booklistTodictList(books),
                "cached_result": usedCache,
                "err": err,
                "query": query
            })
        return jsonify(result)
    if 'querymsan' in request.form.keys():
        booknames = request.form.get('querymsan').split("\n")
        print("Queries: " + str(booknames))
        result = []
        query = []
        for book in booknames:
            scraper.kirjat_scrape_err = ""
            bookname = book.replace("\r", "").replace("\n", "")
            query.append(bookname)
            usedCache = False
            if not bookname in cache_san.keys() or flag_nocache:
                print("\"" + bookname + "\" not in cache, scraping...")
                books = scrape_san(bookname)
                err = scraper.clean(scraper.kirjat_scrape_err)
                cache_san[bookname] = (books, err)
            else:
                usedCache = True
                print("\"" + bookname + "\" in cache.")
                books, err = cache_san[bookname]
                scraper.kirjat_scrape_err = ""
            result.append({
                "data": booklistTodictList(books),
                "cached_result": usedCache,
                "err": err,
                "query": query
            })
        return jsonify(result)
    return jsonify({
        "code": 400,
        "reason":
        "400: Query form must contain the key \"query\" or \"querym\"",
        "stacktrace": ""
    }), 400

Пример #7

Показать файл

Файл: reset_db.py Проект: Psycojoker/dierentheater

 def handle(self, *args, **options):
     if raw_input("Are you sure? Then write 'yes'") == "yes":
         clean()