Exemplos de clean_text em Python, exemplos de utility.clean_text em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: tweets_cleaned.py Projeto: aytse/Twitter-Mining

def clean_tweets(input, output):
    # Creates the output text file
    output_file = open(output,'w')

    # Creates JSON array out of line delimted JSON file
    json_input = utility.to_json_array(input)

    # Initialize unicode count variable
    unicode_count = 0

    # Iterate through all JSON Objects within the array
    for currentObj in json_input:
        if 'text' in currentObj:
            text = parse.parse_json(currentObj, 'text')
            created_at = parse.parse_json(currentObj, 'created_at')

            # UNICODE CHECK
            if contain_unicode(text) or contain_unicode(created_at):
                unicode_count+=1
                text = utility.clean_text(text)
                created_at = utility.clean_text(created_at)

            # Write JSON Object Post Output
            #print(text + ' (timestamp: ' + created_at + ')\n')
            output_file.write(text + ' (timestamp: ' + created_at + ')\n')

    # Writing aggregate unicode into output file and test string
    output_file.write(str(unicode_count) + ' tweet(s) contained unicode.')
    #print(str(unicode_count) + ' tweet(s) contained unicode.')

    # Close files
    output_file.close()
    print("\nTweet clean completed\nOutput in: " + output)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: average_degree.py Projeto: aytse/Twitter-Mining

def average_degree(input, output):
    # Creates the output text file
    output_file = open(output,'w')

    # String that stores all the content of the output.txt
    # Used for debugging purposes
    #test = ''

    # Creates JSON array out of line delimted JSON file
    json_input = utility.to_json_array(input)

    adjacency_list = {}
    previous_threshold_time = None

    # Iterates through all objects within JSON Array
    for current_obj in json_input:
        # Checks if text exists within json_input
        if 'text' in current_obj:
            # Creates list of texts and timestamps of posts
            text = parse.parse_json(current_obj,'text')
            created_at = parse.parse_json(current_obj,'created_at')

            # Cleans texts and timestamps and parses timestamps
            text = utility.clean_text(text)
            created_at = utility.clean_text(created_at)
            parsed_time = parse.parse_timestamps(created_at)

            # Calculates current post threshold time
            threshold_time = utility.minute_offset(parsed_time);

            # Initializes previous threshold time so it can be used
            # for comparison and skip redundant checks
            if previous_threshold_time is None:
                previous_threshold_time = threshold_time

            # Removes all edges in adjacency list below threshold time if
            # threshold time was changed and resets threshold time to latest
            # post's time
            if threshold_time != previous_threshold_time:
                adjacency_list = remove_outdated(adjacency_list, threshold_time)
            previous_threshold_time = threshold_time

            # Creates
            # {'text': timestamp} -> {Node: [{edge: , timestamp: } {edge: , timestamp: }]}
            # Creates adjacency from hashtag list created from the post
            adjacency_list = identify_hashtags(adjacency_list, text, parsed_time)

            # Writes adjacency list to text output and testing console
            #print(debug.debug_adjacency_list(adjacency_list))
            #test += debug.debug_adjacency_list(adjacency_list)
            #output_file.write(debug.debug_adjacency_list(adjacency_list))

            # Creates degree list and calculates average degrees
            degree_list = create_degree_list(adjacency_list)
            average_degree = utility.calculate_average(degree_list)

            # Adds degree list and average degrees to output file and
            # debug text
            #print(debug.debug_degree_list(degree_list))
            #print('Average degree = ' + debug.debug_degrees(degree_list) + str(average_degree))
            #output_file.write(debug.debug_degree_list(degree_list))
            #output_file.write('Average degree = ' + debug.debug_degrees(degree_list) + ' = ' + str(average_degree) + '\n')
            #print(str(average_degree))
            output_file.write(str(average_degree) + '\n')
            #test += debug.debug_degree_list(degree_list)
            #test += 'Average degree = ' + str(average_degree)+'\n\n'

    # Close files
    output_file.close()
    print("\nAverage degree completed\nOutput in: " + output)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: run.py Projeto: yenniejun/covid-texting-service

def incoming_sms():
    """ Get the incoming message the user sent our Twilio number """
    resp = MessagingResponse()
    response_text = ''

    is_get_request = False

    print(request.args)

    if 'from' in request.args:
        is_get_request = True
        logging.info("Servicing a GET request")
        arg_from = request.args.get('from')
        body = request.args.get('message')
        logging.debug(
            f"Message phone number from: {arg_from}, message: {body}")

    elif 'Message' in request.args:
        is_get_request = True
        logging.info("Servicing a GET request FIRST TIME!!!")
        arg_from = request.args.get('PhoneNumber')
        body = request.args.get('Message')
        logging.debug(
            f"Message phone number from: {arg_from}, message: {body}")

    else:
        logging.info("Servicing a POST request")
        body = request.values.get('Body', None)

    if (body is None):
        # print("RESP", resp)
        logging.error("There is no body. What is happening?")
        resp.message("THERE IS NO BODY!!! IS IT A ZOMBIE?")
        return str(resp)

    search_term = utility.clean_text(body)
    logger.info(f"Search term: {search_term}")

    # helpful message

    if len(search_term) < 1 or search_term == "1usa":
        response_text = generic_message_first_time

    elif search_term == "hello" or search_term == "info":
        response_text = generic_message

    elif search_term == "source":
        logger.debug("SOURCE")
        response_text = source_reply

    elif search_term == "time":
        logger.debug("TIME")
        time = cases.get_last_refreshed_time()
        response_text = time

    elif search_term == "feedback":
        response_text = "Please text FEEDBACK followed by whatever message you would like to leave"

    elif search_term.split() and search_term.split()[0] == "feedback":
        # Write to a file
        logger.info("FEEDBACK: {0}".format(search_term))
        response_text = "Thank you for your feedback!"

    elif search_term == "total":
        total_cases = cases.get_total_cases()
        response_text = total_cases

    elif search_term == "cases":
        response_text = "Please specify the name of a US county/parish, US state, or global country.\n\nFor example: Cases in New York\n\nText TOTAL to get global stats"

    elif "cases in" in search_term:
        regexp = re.compile("cases in(.*)$")
        case_search = regexp.search(search_term).group(1)

        logger.debug("CASES Searching for cases in: {0}".format(case_search))
        result = cases.handle_cases(utility.clean_text(case_search))
        response_text = result

    # ask a question
    else:
        result = bot.handle_query(search_term)
        logger.debug(
            f"Returning answer to question: {' '.join(result.split())}")
        response_text = result

    if is_get_request:
        logger.debug(f"Returning response text to EXTexting: {response_text}")
        return response_text
    else:
        logger.debug(f"Returning response text to Twilio: {response_text}")
        resp.message(response_text)
        return str(resp)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: cases.py Projeto: yenniejun/covid-texting-service

def handle_cases(search_term):

    fetch_data()

    if (len(search_term.split()) < 1):
        logger.debug("CASES search term is empty. Returning early")
        return(apology_message)

    logger.debug(f"In handle cases. Looking for {search_term}")

    ### Special Cases ######################################
    if ("china" in search_term):
        search_term = "china (mainland)"
    if ("korea" in search_term):
        search_term = "south korea"
    if any([search_term in ["united states", "unitedstates", "us", "america"]]):
        search_term = "usa"
    if search_term == "washington dc":
        search_term = "district of columbia"

    ########################################################

    if search_term in en.countries_worldometer:
        my_country = ''
        for country in world_data['reports'][0]['table'][0]:
            if utility.clean_text(country['Country']) == search_term:
                my_country = country
                break

        if my_country == '':
            # Maybe if the country code changes in the API and what I have
            logger.warning("Could not find country for some reason...")
            return (apology_message)

        logger.debug(f"Found number of cases for {my_country['Country']}")

        obj = {"totalConfirmed": my_country['TotalCases'],
               "totalRecovered": my_country['TotalRecovered'],
               "totalDeaths": my_country['TotalDeaths'],
               "displayName": my_country['Country']}

        return format_response_for_cases(obj)
    
    # Total for each US state
    elif search_term in en.ctp_state_to_abbreviation_mapping:
        for state in state_data:   
            if state['state'] == en.ctp_state_to_abbreviation_mapping[search_term]:
                my_state = state
                break
        if my_state == '':
            # should not get here
            logger.warning("Could not find state for some reason...")
            return (apology_message)
            
        obj = {"totalConfirmed": my_state['positive'],
                   "totalRecovered": my_state['recovered'],
                   "totalDeaths": my_state['death'],
                   "displayName": search_term.title()}

        return(format_response_for_cases(obj))    

    else:
        my_county = get_county(search_term) 
        return(my_county)