Exemplo n.º 1
0
def load_game_comments(game_comments_dir):
    '''Load GameComments data into a solr instance.

    :param game_comments_dir: Directory containing XDATA NBA GameComments JSON
        files to load into a Solr instance.
    '''
    logger.info('Starting GameComments ingestion: ' + game_comments_dir)

    # Train the sentiment analyser that we'll use when processing
    # all the game comments.
    logger.info('Training sentiment analyser for comment ingestion')
    SentimentAnalyser.train()
    logger.info('Sentiment analyser training complete')

    # Get a list of all the files we need to load
    data_files = [
        os.path.join(game_comments_dir, f)
        for f in os.listdir(game_comments_dir)
        if os.path.isfile(os.path.join(game_comments_dir, f))
    ]

    # Determine the number of threads it will take to process that many files
    total_threads = len(data_files) / GAME_COMMENTS_FILES_PER_THREAD
    # If the number of files isn't evenly divisible by the number of files
    # per thread that we want to use we need to compensate for the remainder.
    total_threads += 1 if len(
        data_files) % GAME_COMMENTS_FILES_PER_THREAD else 0

    # Split the data files into chunks to pass to each thread.
    fpt = GAME_COMMENTS_FILES_PER_THREAD
    split_data_files = [
        data_files[(fpt * index):(fpt * index) + fpt]
        for index in range(total_threads)
    ]

    # Process all the files!
    thread_pool = multiprocessing.Pool(total_threads)
    results = thread_pool.map(load_game_comments_files, split_data_files)
    thread_pool.close()
    thread_pool.join()

    # Join result set here
    results = list(itertools.chain.from_iterable(results))

    # Send single hit to Solr here
    solr_url = SOLR_URL + GAME_COMMENTS_CORE + 'update?commit=true'
    data = etl.prepareDocsForSolr(results,
                                  unmarshall=False,
                                  encoding='latin-1')
    etl.postJsonDocToSolr(solr_url, data)

    logger.info('GameComments ingestions complete')
Exemplo n.º 2
0
def parse_comment_files(comment_file):
    logger.debug(comment_file)
    path, file_name = os.path.split(comment_file)
    file_name_split = file_name.split('_')
    game_id = file_name_split[0]
    source = file_name_split[1]

    logger.debug('Processing comment files for game: ' + str(game_id))

    records = []

    with open(comment_file, 'r') as comment_in:
        for index, line in enumerate(comment_in):
            split_line = line.split('::')

            records.append({
                'id':
                str(game_id) + '_' + str(index),
                'game_id':
                game_id,
                'comment_order':
                index,
                'commenter':
                split_line[0],
                'comment':
                split_line[1],
                'source':
                source,
                'sentiment':
                SentimentAnalyser.classify(split_line[1])
            })
    return records
Exemplo n.º 3
0
def parse_comment_files(comment_file):
    logger.debug(comment_file)
    path, file_name = os.path.split(comment_file)
    file_name_split = file_name.split('_')
    game_id = file_name_split[0]
    source = file_name_split[1]

    logger.debug('Processing comment files for game: ' + str(game_id))

    records = []

    with open(comment_file, 'r') as comment_in:
        for index, line in enumerate(comment_in):
            split_line = line.split('::')

            records.append({
               'id': str(game_id) + '_' + str(index),
               'game_id': game_id,
               'comment_order': index,
               'commenter': split_line[0],
               'comment': split_line[1],
               'source': source,
               'sentiment': SentimentAnalyser.classify(split_line[1])
            })
    return records
Exemplo n.º 4
0
def load_game_comments(game_comments_dir):
    '''Load GameComments data into a solr instance.

    :param game_comments_dir: Directory containing XDATA NBA GameComments JSON
        files to load into a Solr instance.
    '''
    logger.info('Starting GameComments ingestion: ' + game_comments_dir)

    # Train the sentiment analyser that we'll use when processing
    # all the game comments.
    logger.info('Training sentiment analyser for comment ingestion')
    SentimentAnalyser.train()
    logger.info('Sentiment analyser training complete')

    # Get a list of all the files we need to load
    data_files = [os.path.join(game_comments_dir, f)
                  for f in os.listdir(game_comments_dir)
                  if os.path.isfile(os.path.join(game_comments_dir, f))]

    # Determine the number of threads it will take to process that many files
    total_threads = len(data_files) / GAME_COMMENTS_FILES_PER_THREAD
    # If the number of files isn't evenly divisible by the number of files
    # per thread that we want to use we need to compensate for the remainder.
    total_threads += 1 if len(data_files) % GAME_COMMENTS_FILES_PER_THREAD else 0

    # Split the data files into chunks to pass to each thread.
    fpt = GAME_COMMENTS_FILES_PER_THREAD
    split_data_files = [data_files[(fpt * index):(fpt * index) + fpt]
                        for index in range(total_threads)]

    # Process all the files!
    thread_pool = multiprocessing.Pool(total_threads)
    results = thread_pool.map(load_game_comments_files, split_data_files)
    thread_pool.close()
    thread_pool.join()

    # Join result set here
    results = list(itertools.chain.from_iterable(results))

    # Send single hit to Solr here
    solr_url = SOLR_URL + GAME_COMMENTS_CORE + 'update?commit=true'
    data = etl.prepareDocsForSolr(results, unmarshall=False, encoding='latin-1')
    etl.postJsonDocToSolr(solr_url, data)

    logger.info('GameComments ingestions complete')
Exemplo n.º 5
0
import SentimentAnalyser as sa

print(sa.classify("ajshjsahf"))

def predict(query):
    sentiment = SentimentAnalyser.returnSentiment(query)
    return jsonify(sentiment)
Exemplo n.º 7
0
    try:
        mydb = mysql.connector.connect(host="localhost",
                                       user="******",
                                       passwd="",
                                       database="SentiStock")

        mycursor = mydb.cursor()
        sql = "INSERT INTO SS_News(ss_news_id,ss_source, ss_link, ss_time,ss_entry_time, ss_title, ss_image_link, ss_description, ss_sentiments, ss_symbol, ss_category,ss_full_description) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        for i in range(0, len(out_df)):
            sent = 'NA'
            symbol = 'NA'
            description = str(out_df['description'][i]).replace("#39",
                                                                "'").strip()
            if out_df['title'][i] != None and out_df['title'][i] != ' ':
                sent = sa.classify(out_df['title'][i])
                symbol = se.ExtractSymbol(out_df['title'][i])
            if symbol == '':
                symbol = 'NA'
            values = (str(out_df['titleDigest'][i]), str(out_df['source'][i]),
                      str(out_df['link'][i]), str(out_df['date'][i]),
                      getCurrentDateTime(), out_df['title'][i],
                      str(out_df['imagesrc'][i]),
                      str(out_df['description'][i]), sent, symbol,
                      str(out_df['category'][i]),
                      str(out_df['description'][i]))
            mycursor.execute(sql, values)
            mydb.commit()
            count += 1
            print(count, "record inserted.")
            # ClubbSimilar(out_df['title'][i], out_df['titleDigest'][i])
Exemplo n.º 8
0
                    ts = pd.Timestamp(year=int(temp1[0]), month=int(temp1[1]), day=int(
                        temp1[2]), hour=int(temp2[0]), minute=int(temp2[1]), second=int(temp2[2]), tz='utc')
                    ts = ts.to_julian_date()
                    cts = pd.Timestamp(year=1990, month=1, day=1,
                                       hour=0, minute=0, second=0, tz='utc')
                    cts = cts.now() - pd.Timedelta('1 day')
                    cts = int(cts.to_julian_date())
                    if(ts < cts):
                        flag = 1
                    else:
                        flag = 0
                elif child.tag == 'image':
                    imagesrc = child.text if child is not None else None
            if(flag == 1):
                continue
            sent = sa.classify(title) if title is not None else None
            symbol = se.ExtractSymbol(title)if title is not None else None
            if symbol == '':
                symbol = 'NA'
            print(symbol)
            if(flag == 0):
                out_df = out_df.append(pd.Series(
                    [title, description, source, link, date, imagesrc, sent, symbol, category], index=df_cols), ignore_index=True)

    mydb = mysql.connector.connect(host="192.168.2.89",
                                   user="******",
                                   passwd="uatmysql",
                                   database="PythonNews")

    mycursor = mydb.cursor()
<<<<<<< HEAD
Exemplo n.º 9
0
import SentimentAnalyser

query = "This feels great"
sentiment = SentimentAnalyser.returnSentiment(query)
print(sentiment)