Python Cleaner примеры использования

Язык программирования: Python

Пространство имен/Пакет: src.cleaner

Класс/Тип: Cleaner

Примеров на hotexamples.com: 13

Python Cleaner - 13 примеров найдено. Это лучшие примеры Python кода для src.cleaner.Cleaner, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Cleaner(11)

make_final_string(5)

string_to_int(2)

fit(1)

lemmatization_dataframe(1)

lower_case(1)

remove_digits_dataframe(1)

remove_punctuation_dataframe(1)

transform(1)

Пример #1

Показать файл

    def __math_garden_area(plot_surface, build_surface):

        # If plot_surface and build_surface are not None:
        if plot_surface and build_surface:

            # Convert string to int
            plot_surface = Cleaner.string_to_int(plot_surface.strip())
            build_surface = Cleaner.string_to_int(build_surface.strip())

            # If plot_surface and build_surface exists:
            if plot_surface and build_surface:
                print(plot_surface - build_surface)
                return plot_surface - build_surface

        return None

Пример #2

Показать файл

Файл: compiler.py Проект: Heightsdesign/granpy

    def compile(self):

        self.token = Cleaner(self.questn).make_final_string()
        self.location = Geocoder(self.token).get_location()

        if self.location[0] == "OK":

            try:
                self.wikiresult = WikiSearcher(self.location).geolookup()
                self.wikiurl = WikiSearcher(self.location).get_url()

                self.finalData = {
                    "status": self.location[0],
                    "lat": self.location[1][0],
                    "long": self.location[1][1],
                    "wikiresult": self.wikiresult,
                    "wikiurl": self.wikiurl,
                    "granpyMessage": random.choice(ok_res),
                }

            except IndexError:

                self.finalData = {
                    "status": "NOK",
                    "warningMessage": random.choice(no_res),
                }

        else:

            self.finalData = {
                "status": self.location[0],
                "warningMessage": random.choice(nok_res),
            }

        return self.finalData

Пример #3

Показать файл

        def search_col_xs_7(title, room_type):
            """
            Search for a given room in a 'col_xs_7' under 'More Info'.
            Append its number to 'room_number'.
            """
            nonlocal rooms_number

            result = self.__get_text(
                self.__scrap_field_value('div', 'col-xs-7 info-name', title))
            if result:
                rooms_number += Cleaner.string_to_int(result, 1)
                has_found[room_type] = True

Пример #4

Показать файл

Файл: classifier.py Проект: zxz53000/NLP_aspect-based-sentiment-analysis

    def clean(self, tfile):
        """
        clean data
        Parameters
        ----------
        tfile: string
            the path of the data needing to be processed

        Returns
        -------
        new_data: pd.DataFrame
        """

        # load data
        data = pd.read_csv(
            tfile,
            sep='\t',
            header=None,
            names=['polarity', 'aspect', 'target', 'startend', 'message'])
        # clean the data
        cleaner = Cleaner()
        new_data = cleaner.remove_punctuation_dataframe(data)
        new_data = cleaner.remove_digits_dataframe(new_data)
        new_data = cleaner.lemmatization_dataframe(new_data)
        new_data = cleaner.lower_case(new_data)
        return new_data

Пример #5

Показать файл

    def start(self):

        # Grab the urls to scrap
        manager = Manager()
        manager.grabber(10)

        # Print starting message
        total_urls_number = len(manager.urls)
        print(f"[+] Scrapping phase started: 0/{total_urls_number}.")

        scrapped_urls = 0

        # Group the urls 10 by 10
        grouped_total_urls = self.grouper(manager.urls, self.url_pool_size)

        with ThreadPoolExecutor(max_workers=self.scrapper_workers) as executor:
            futures = [
                executor.submit(Manager.scrapper, urls)
                for urls in grouped_total_urls
            ]

            for entry in concurrent.futures.as_completed(futures):

                # Iterate the scrapped_urls and print a status message
                scrapped_urls += self.url_pool_size
                print(
                    f"[i] Urls scrapped: {scrapped_urls}/{total_urls_number}.")

                Cleaner(entry.result()).clean()

        print(
            f"[i] Urls scrapped: {total_urls_number}/{total_urls_number} - Complete !"
        )

        # Merge the pickles
        Merger().merge()

Пример #6

Показать файл

Файл: app.py Проект: jrhoff/clearpath_preprocess

def split():
    """
    This endpoint expects:
        1 - a file with raw OCR text
        2 - an email address for the patient/submitter of pathology report
    :return:
    """
    PRODUCTION_ENDPOINT, TEST_ENDPOINT = get_endpoints()
    content = request.get_json()
    text, email = content['text'], content['email'].strip()

    # Remove PHI
    de_identified_text = filter_task(text, scispacy)

    # Clean text
    cleaner = Cleaner(de_identified_text)
    cleaned_text = cleaner.text

    # Finally preprocess
    preprocessor = SpacyPreProcessor(scispacy)
    text, tokens_list = preprocessor.preprocess_sentences(cleaned_text)
    m = {'text': text, 'tokens': tokens_list, 'email': email}
    response = requests.post(url=TEST_ENDPOINT, json=m)
    return response.json()

Пример #7

Показать файл

    #mongo_cols = {'acct_type','user_type','email_domain','venue_state','venue_name'}
    client = MongoClient()
    db = client[client_name]
    tab = db[tab_name]
    cursor = tab.find(None)  #mongo_cols)
    df = pd.DataFrame(list(cursor))
    return df


if __name__ == '__main__':
    # read data
    dataframe = get_data()
    #print(dataframe)
    #clean data
    y = dataframe['acct_type'].str.contains('fraud').astype(int)
    X_train, X_test, y_train, y_test = \
           train_test_split(dataframe, y, random_state = 142)

    print('cleaning....')
    clean = Cleaner()
    clean.fit(X_train)
    X_train = clean.transform(X_train)
    X_test = clean.transform(X_test)

    print('Fitting....')
    #fit model
    gb = GBModel()
    gb.fit(X_train, y_train)

    print('score: {}'.format(gb.score(X_test, y_test)))

Пример #8

Показать файл

def test_get_question():
    sut = Cleaner("Granpy quelle est l'adresse de la Poste?")
    assert sut.question == "Granpy quelle est l'adresse de la Poste?"
    """ Turn question into all lowercases with lowercase method

Пример #9

Показать файл

def test_final_string():
    sut = Cleaner("Granpy quelle est l'adresse de la Tour Eiffel?")
    assert sut.make_final_string() == "tour+eiffel"

Пример #10

Показать файл

def test_concatenate():
    sut = Cleaner("Granpy quelle est l'adresse de la Tour Eiffel?")
    assert sut.make_final_string() == "tour+eiffel"
    """ Returns the final string """

Пример #11

Показать файл

def test_token():
    sut = Cleaner("Granpy quelle est l'adresse de la Poste?")
    assert sut.make_final_string() == "poste"
    """ Takes words from wordlist and adds a + in between """

Пример #12

Показать файл

def test_turn_string_to_wordlist():
    sut = Cleaner("Granpy quelle est l'adresse de la Tour Eiffel?")
    assert sut.make_final_string() == "tour+eiffel"
    """ Clean worldlist from unwanted commun words with stopword method """

Пример #13

Показать файл

def test_turn_lowercase():
    sut = Cleaner("Granpy quelle est l'adresse de la POSTE?")
    assert sut.make_final_string() == "poste"
    """ Turn question (string) in a world list with wordlist method