Exemplo n.º 1
0
 def test_analyze_bounds(self):
     """
     Testing the bounds of the tweets values
     """
     ana = Analyzer()
     assert ana.analyze("this is a test neutral tweet") <= 1.0
     assert ana.analyze("this is a test neutral tweet") >= 0.0
Exemplo n.º 2
0
 def test_analyze_empty(self):
     """
     Testing empty tweets
     and tweets including words not
     in the dictionary
     """
     ana = Analyzer()
     assert ana.analyze("") == 0.5
     assert ana.analyze("hzoehfsdl") == 0.5
Exemplo n.º 3
0
 def test_analyze_judgement_weight(self):
     """
     Testing the value order
     of arbitrary tweets
     """
     ana = Analyzer()
     assert ana.analyze("i am so happy, great day :D") > ana.analyze(
         "i am so happy :D")
     assert ana.analyze("so sad, feeling depressed :'(") < ana.analyze(
         "so depressed :'(")
Exemplo n.º 4
0
 def test_analyze_judgement(self):
     """
     Testing the proper judgement of the sentiment analysis:
     * positive and negative
     * best and worse tweet values
     """
     ana = Analyzer()
     assert ana.analyze(":)") > 0.5 and ana.analyze(":'(") < 0.5
     assert ana.analyze("yahoo yahoo yahoo") == 1.0
     assert ana.analyze("zzz zzz zzz zzz zzz") == 0.0
Exemplo n.º 5
0
 def test_categories_cardinality(self):
     """
     Testing the cardinality of the different
     categorie sums (positive, negative, neutral)
     """
     ana = Analyzer()
     ctg_count = {'positive': 0, 'negative': 0, 'neutral': 0}
     text = 'great day today lol ;) but still have to work'
     assert ana.categories_cardinality(text, ctg_count) == 15
     assert ctg_count['positive'] == 4  # great day lol ;)
     assert ctg_count['neutral'] == 1  # today
     assert ctg_count['negative'] == 2  # work still
Exemplo n.º 6
0
 def test_categories_weight(self):
     """
     Testing the weights of the different
     categorie sums (positive, negative, neutral)
     """
     ana = Analyzer()
     ctg_total = {'positive': 0.0, 'negative': 0.0, 'neutral': 0.0}
     ctg_count = {'positive': 4, 'negative': 2, 'neutral': 1}
     data = [2, 3, 0, 2, 2, 0, -4, 0, 0, -2, 2]
     tot_pos, tot_neg, tot_neu = ana.weight_categories(
         data, ctg_total, ctg_count)
     assert (tot_pos, tot_neg,
             tot_neu) == (99.47646509317096, -49.392885301738836,
                          3.9750077625545726)
Exemplo n.º 7
0
def analyze(chart_ids: List[str] = [],
            src: str = CHART_PATH,
            dest: str = default_excel_path):
    """
        Analyzes charts given a list of IDs. If you want to analyze all levels
        in src, don't input any IDs.
    """
    if len(chart_ids) == 0:
        with os.scandir(src) as dir_items:
            chart_ids = [
                cid.name for cid in dir_items if is_chart_folder(cid.path)
            ]

    if len(chart_ids) == 0:
        click.echo("No charts in the folder!")

    stat_list = dict()
    src = os.path.abspath(src)
    dest = os.path.abspath(dest)
    os.makedirs(os.path.dirname(dest), exist_ok=True)

    with click.progressbar(chart_ids,
                           label=f"Analyzing {len(chart_ids)} charts...",
                           item_show_func=lambda x: x) as prog_bar:
        for chart_id in prog_bar:
            analyzer = Analyzer(src, chart_id)
            analyzer.start()
            stats = analyzer.get_stats_as_json()
            stat_list[chart_id] = stats

    click.echo(f"Done analyzing, now saving to {dest}...")
    dest_folder = os.path.dirname(dest)
    os.makedirs(dest_folder, exist_ok=True)

    stat_df = pd.DataFrame.from_dict(stat_list, orient="index")
    stat_df.index.name = "chart_id"

    excel_writer = ExcelWriter(stat_df, dest)
    excel_writer.format_table()
    excel_writer.close()

    click.echo("Stats successfully saved.")
Exemplo n.º 8
0
h = TweetLoader('', path='data/backup/', filename='hillary_2016-07-13.json')
t = TweetLoader('', path='data/backup/', filename='trump_2016-07-13.json')
h.load()
t.load()

# Join them together
full_tweets = pd.concat([h.tweets, t.tweets])

# Assign label (second array) for Hillary(0)/Trump(1) tweets
label_array = np.array([0] * len(h.tweets) + [1] * len(t.tweets))

# Run through part of the model to get the PCA results and loading factors
# This is not the full model, just a part of it for illustration purposes
max_words = 50
mod = Analyzer(full_tweets['text'],
               labels=label_array,
               max_words=max_words,
               load_pca=False)

# mod.load_words()
mod.get_words()
mod.create_dtm()
mod.run_pca()

loadings = mod.loadings
loadings.index = ['PC' + str(j + 1) for j in range(len(loadings))]

# loadings = loadings.iloc[0:30, :]  # Use only a subset of the data
loadings = loadings.transpose()  # Use rotation

words = loadings.columns.tolist()
pc_names = loadings.index.tolist()
Exemplo n.º 9
0
def main(argv=None):
    #read in params
    if argv is None:
        argv = sys.argv[1:]

    file = 'tulalens_survey_sample.csv'
    facet = 'result id'

    #standard python parsing for command line options
    opts = []
    args = []

    try:
        opts, args = getopt.getopt(argv, "hl",
                                   ["help", "list", "file=", "facet="])
    except getopt.GetoptError as msg:
        print(sys.stderr, msg)
        print >> sys.stderr, "For help use --help"
        return 2

    if len(args):
        print >> sys.stderr, "Invalid arg(s) %s" % args
        usage()
        return 2

    for (opt, val) in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
        if opt in ("-l", "--list"):
            list()
            return 0
        elif opt in ("--file"):
            file = val
        elif opt in ("--facet"):
            facet = val.lower()
        else:
            usage()
            return 2

    print("facet: %s" % facet)
    #check if facet given is in the list of survey questions
    #ideally this allows for quick entries with just the
    #question number, e.g. "--facet Q30"
    long_q = ''  #keep track of the long form for later use
    valid_facet = False
    for long, short in SHORT_QUESTIONS.items():
        #print("checking question: %s" % question)
        if facet in long:
            #turn the facet into easy to use question ids
            #p = "(^q\d\d?[.]).*"
            #m = re.match(p, long)
            facet = short
            long_q = long
            print("Question selected: %s" % long_q)
            valid_facet = True
            break

    if not valid_facet:
        sys.exit("facet selected is not a survey question")

    #parse csv file
    parser = CsvParse(file)
    answers = parser.parse()

    #generate analysis based on options
    #print("number of answer rows after parse: %s" % len(answers))

    analyze = Analyzer(answers)
    #find the unique occurrence of each answer to the question
    answers_count = analyze.group_by(facet)

    mean = analyze.find_mean(facet, answers_count)

    sys.exit()
Exemplo n.º 10
0
    "pem_name",
    help=
    "Name of the PEM file that is needed to connect to the data collection servers."
)
parser.add_argument(
    "database_ip",
    help="IP of the Postgres database that the results will be put into.")
parser.add_argument("data_collector_ips",
                    nargs='+',
                    help="List of IPs of the data collection servers.")

args = parser.parse_args()

ec = External_Connector(args.pem_name, args.database_ip)

# Create list of local files, first is twitter data, rest is news data
files = [
    "%s%d.txt" % (args.type, i) for i in range(0, len(args.data_collector_ips))
]

ec.get_data_files(args.data_collector_ips, files)

a = Analyzer()

# Run three analyses for each data file and upload them to database
for f in files:
    sentiment, mood, emoticon = a.run(args.type, f)
    ec.insert_sentiment(args.run_id, args.type, sentiment)
    ec.insert_mood(args.run_id, args.type, mood)
    ec.insert_emoticon(args.run_id, args.type, emoticon)
Exemplo n.º 11
0
    # timer
    STOP = time.time()

    print(f"\t-----> Done.")
    print(f"\t-----> Execution time: {round(STOP-START, 2)} sec")


if __name__ == "__main__":
    app_settings = {
        'client_id': os.getenv('SPOTIFY_CLIENT_ID'),
        'client_secret': os.getenv('SPOTIFY_CLIENT_SECRET'),
        'redirect_uri': os.getenv('SPOTIFY_REDIRECT_URI')
    }

    # init analyzer
    az = Analyzer(**app_settings)

    # get tracks and simulate lengths
    # get all playlists
    playlists = az.user_playlists(is_author=True)

    start = time.time()
    print("-----> Gathering all tracks...", end="")
    # get all tracks
    all_tracks = []
    for playlist in playlists:
        tracks = az.playlist_tracks(playlist['id'])
        # append the playlist meta data
        # to the track objects
        for i in range(len(tracks)):
            tracks[i]['playlist'] = playlist
Exemplo n.º 12
0
    def analyze(self, expr):
        name = "f"

        self.analyzer = Analyzer(expr)
        self.function_view.set_from_expression(expr, name=name + "(x)")
        self.function_view.set_font_size(40)

        box = ListBox("Dominio")
        self.box.pack_start(box, False, False, 0)

        domain_block = EqualBlock(TextBlock("D(f)"), TextBlock(interval_to_string(self.analyzer.domain)))
        box.make_row_with_child(domain_block)

        box = ListBox("Raíces")
        self.box.pack_start(box, False, False, 0)

        roots_block = TextBlock(set_to_string(self.analyzer.roots.keys()))
        box.make_row_with_child(roots_block)

        box = ListBox("Signo")
        self.box.pack_start(box, False, False, 0)

        if self.analyzer.positive.__class__ != sympy.EmptySet:
            positive_block = TextBlock("+  " + interval_to_string(self.analyzer.positive))
            box.make_row_with_child(positive_block)

        if self.analyzer.negative.__class__ != sympy.EmptySet:
            negative_block = TextBlock("-  " + interval_to_string(self.analyzer.negative))
            box.make_row_with_child(negative_block)

        box = ListBox("Continuidad")
        self.box.pack_start(box, False, False, 0)

        if self.analyzer.continuity == self.analyzer.domain:
            block = TextBlock("f es continua en todo su dominio.")

        else:
            block = TextBlock("f es continua para los x %s %s\n" % (Chars.BELONGS, interval_to_string(self.analyzer.continuity)))

        box.make_row_with_child(block)

        box = ListBox("Ramas")
        self.box.pack_start(box, False, False, 0)

        if self.analyzer.branches[sympy.oo] is not None:
            block = TextBlock("f posee %s cuando" % Branch.get_name(*self.analyzer.branches[sympy.oo]))
            row = box.make_row_with_child(block)
            trend_block = TrendBlock(TextBlock("x"), TextBlock("+" + Chars.INFINITY))
            trend_block.set_margin_left(10)
            row.add_child(trend_block)

        if self.analyzer.branches[-sympy.oo] is not None:
            block = TextBlock("f posee %s cuando" % Branch.get_name(*self.analyzer.branches[-sympy.oo]))
            row = box.make_row_with_child(block)
            trend_block = TrendBlock(TextBlock("x"), TextBlock("-" + Chars.INFINITY))
            trend_block.set_margin_left(10)
            row.add_child(trend_block)

        box = ListBox("Crecimiento")
        self.box.pack_start(box, False, False, 0)

        block = MathView.new_from_expression(self.analyzer.derived, name + "'(x)")
        box.make_row_with_child(block)

        if self.analyzer.derived_things.negative.__class__ != sympy.EmptySet:
            block = TextBlock(name + " decrece en ")
            row = box.make_row_with_child(block)
            row.add_child(make_interval_points(self.analyzer.derived_things.negative))

        if self.analyzer.derived_things.positive.__class__ != sympy.EmptySet:
            block = TextBlock(name + " crece en ")
            row = box.make_row_with_child(block)
            row.add_child(make_interval_points(self.analyzer.derived_things.positive))

        mins, maxs = self.analyzer.get_minimums_and_maximums()

        if mins:
            block = TextBlock("Mínimos: ")
            row = box.make_row_with_child(block)

            for point in mins:
                _x = MathView.new_from_expression(point[0])
                _y = MathView.new_from_expression(point[1])
                block = PointBlock(_x, _y)
                row.add_child(block)

        if maxs:
            block = TextBlock("Máximos: ")
            row = box.make_row_with_child(block)

            for point in maxs:
                _x = MathView.new_from_expression(point[0])
                _y = MathView.new_from_expression(point[1])
                block = PointBlock(_x, _y)
                row.add_child(block)

        box = ListBox("Concavidad")
        self.box.pack_start(box, False, False, 0)

        block = MathView.new_from_expression(self.analyzer.derived2, name + "''(x)")
        box.make_row_with_child(block)

        if self.analyzer.derived2_things.positive.__class__ != sympy.EmptySet:
            block = TextBlock("f tiene concavidad positiva en: ")
            row = box.make_row_with_child(block)
            row.add_child(make_interval_points(self.analyzer.derived2_things.positive))

        if self.analyzer.derived2_things.negative.__class__ != sympy.EmptySet:
            block = TextBlock("f tiene concavidad negativa en: ")
            row = box.make_row_with_child(block)
            row.add_child(make_interval_points(self.analyzer.derived2_things.negative))

        _analyzer = Analyzer(self.analyzer.derived)
        mins, maxs = _analyzer.get_minimums_and_maximums()
        inflection_points = mins + maxs

        if inflection_points:
            block = TextBlock("Puntos de inflexión: ")
            row = box.make_row_with_child(block)

            for point in inflection_points:
                _x = MathView.new_from_expression(point[0])
                _y = MathView.new_from_expression(point[1])
                block = PointBlock(_x, _y)
                block.set_margin_right(10)
                row.add_child(block)

        self.show_all()
Exemplo n.º 13
0
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.insert(1, "../tools")

from analysis import Analyzer
from plotting import Plotter
from training import Trainer

if __name__ == "__main__":
    sns.set()
    plot_dir = "plots"
    plot_file = os.path.join(plot_dir, "rdf.png")
    if not os.path.exists(plot_dir):
        os.mkdir(plot_dir)

    anl = Analyzer()
    plter = Plotter()
    r_cut = 6.0
    r, rdf = anl.calculate_rdf("trajs/training.traj", r_max=r_cut)
    rdf[np.nonzero(rdf)] /= max(rdf)
    cutoff = plter.polynomial(r, r_cut, gamma=5.0)

    plt.plot(r, rdf, label="Radial distribution function")
    plt.plot(r, cutoff, label="Polynomial cutoff, gamma=5.0")
    plt.legend()
    plt.title("Copper radial distribution function")
    plt.xlabel("Radial distance [Angstrom]")
    plt.ylabel("Radial distribution function (normalized to 1)")
    plt.savefig(plot_file)
Exemplo n.º 14
0
    def __init__(self, market):
        super(MarketThread, self).__init__()
        self.market = market

    def run(self):
        while not self._stop.isSet():
            time.sleep(settings.HEARTBEAT)
            self.market.update()


if __name__ == "__main__":
    q = Queue.Queue()

    p = Portfolio(20000)
    e = Executor(p)
    a = Analyzer(portfolio=p)
    m = Market(queue=q)

    trading_thread = TradingThread(queue=q, analyzer=a, events=e)
    market_thread = MarketThread(market=m)

    def receive_signal(signum, stack):
        print("You quit")
        trading_thread.stop()
        market_thread.stop()
        sys.exit(0)

    market_thread.start()
    trading_thread.start()
    signal.signal(signal.SIGINT, receive_signal)
Exemplo n.º 15
0
import matplotlib.pyplot as plt

# Load tweets
s2 = TweetLoader(filename='coolstars.json',
                 track_location=False,
                 path='coolstars19/data/')
s2.load()

df = s2.tweets.copy()
df.index = pd.DatetimeIndex(df['created_at'])

# Using the Analyzer class
max_words = 100
mod = Analyzer(df['text'],
               None,
               max_words=max_words,
               load_pca=False,
               load_svm=False,
               more_stop_words=['rt', 'cs19', 'cs19_uppsala'])

mod.get_words()
mod.create_dtm()
mod.run_pca()

# Exploration
print_dtm(mod.dtm, df['text'], 42)

# Top terms in components
top_factors(mod.load_squared, 0)

# Plots
make_biplot(mod.pcscores, None, mod.loadings, 0, 1)
Exemplo n.º 16
0
import pandas as pd
import numpy as np

# Some global defaults
max_words = 200

# Load most recent tweets from Hillary Clinton and Donald Trump
# s = TweetLoader(filename='search.json', track_location=True)
s = TweetLoader(filename='search_2016-07-13.json',
                track_location=True,
                path='data/backup/')
s.load()

# Calculate and grab model results
mod = Analyzer(s.tweets['text'],
               max_words=max_words,
               load_pca=True,
               load_svm=True)
predict = mod.load_full_model()  # Hillary=0  Trump=1
s.tweets['predict'] = predict

# Clean up missing coordinates
df = s.tweets['geo.coordinates']
bad = df.apply(lambda x: x is None)
df = df[~bad]
s.tweets = s.tweets[~bad]

lat = df.apply(lambda x: x[0])
lon = df.apply(lambda x: x[1])
# lat, lon = zip(*df)  # Alternate

# Remove Alaska and Hawaii
Exemplo n.º 17
0
    print("Unknown model " + args.model + ".\n")
    exit()
# Send model weights to the device
model.to(args.device)
print(model)

#%%
"""
###################
Initialize model and analyzer save
###################
"""
# Apply weight initialization
model.apply(initializer)
# Create an analyzer object
analyzer = Analyzer(args)

#%%
"""
###################
Create optimizer
###################
"""
# Optimizer and Loss
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor=0.5,
                                                       threshold=1e-6)
# Use cross-entropy loss
if args.model in ['ae', 'vae', 'wae', 'vae_flow']:
    criterion = nn.L1Loss()
# Merge tweets together, pass to Analyzer
df_tweets = pd.concat([h.tweets['text'], t.tweets['text']],
                      axis=0,
                      join='outer',
                      join_axes=None,
                      ignore_index=True,
                      keys=None,
                      levels=None,
                      names=None,
                      verify_integrity=False)

# Using the Analyzer class
mod = Analyzer(df_tweets,
               label_array,
               max_words=max_words,
               load_pca=False,
               load_svm=False,
               use_sentiment=True)

# mod.get_words()
# mod.create_dtm()
# mod.run_pca()
# mod.get_sentiment()
# test_predict, test_label = mod.run_svm()

# One-line alternative with defaults
test_predict, test_label = mod.create_full_model()

# Check a PCA plot
# mod.make_biplot(2, 3, max_arrow=0.2)
Exemplo n.º 19
0
    404 error handler
    used if a non existant route
    is requested
    """
    return render_template('404.html'), 404


@app.errorhandler(500)
def page_not_found(exc):
    """
    500 error handler
    used if there is a server error
    """
    return render_template('500.html'), 500


if __name__ == '__main__':
    analyzer = Analyzer()
    server = SocketIOServer(('', PORT), app, resource="socket.io")
    tw_thread = TweetWeather(server, analyzer, name="Tweet-Weather-Thread")
    tw_thread.daemon = True
    gevent.spawn(tw_thread.new_post, server)
    gevent.spawn(tw_thread.connexion_lost, server)
    print "Application Started: http://localhost:5000"
    try:
        server.serve_forever()
    except KeyboardInterrupt:
        tw_thread.stop()
        server.stop()
        sys.exit()
Exemplo n.º 20
0
# Assign label (second array) for Hillary(0)/Trump(1) tweets
label_array = np.array([0] * len(h.tweets) + [1] * len(t.tweets))

df_tweets = pd.concat([h.tweets['text'], t.tweets['text']],
                      axis=0,
                      join='outer',
                      join_axes=None,
                      ignore_index=True,
                      keys=None,
                      levels=None,
                      names=None,
                      verify_integrity=False)

# Using the Analyzer class to get sentiments
mod = Analyzer(df_tweets, label_array)
mod.get_sentiment()

# Group together tweets, labels, and sentiments
temp = pd.concat([h.tweets, t.tweets],
                 axis=0,
                 join='outer',
                 join_axes=None,
                 ignore_index=True,
                 levels=None)
df = pd.concat([temp, mod.sentiment,
                pd.DataFrame({'label': label_array})],
               axis=1,
               levels=None)