def get_timeline_count(tweets_file, keywords=None, timezones=None): # get dataframe l = Loader(tweets_file) data = l.get_dataframe() def valid_keyword(x): for keyword in keywords: if keyword in x: return True return False def valid_timezone(t): for timezone in timezones: if timezone in t: return True return False if not keywords is None: data = data[data["text"].apply(valid_keyword) == True] if not timezones is None: data = data[data["user_time_zone"].apply(valid_timezone) == True] print len(data.index) data["created_at"] = data["created_at"].astype("datetime64") a = data["created_at"].groupby(data["created_at"].dt.date).count() print a a = a.to_frame() return np.array(date2num(a.index)), np.array(a.values)
def test3(): l = Loader("./data/weight.json") print l.get_dataframe()
day = dates_set[i] for key in keys: if key in tweet: try: counts[day] += 1 except: counts[day] = 1 df = df.drop(idx) break return df, counts l = Loader("./data/May_16.csv") df = l.get_dataframe() df["created_at"] = pandas.to_datetime(df["created_at"]) dates = df["created_at"].dt.date.to_frame() dates = np.array(date2num(dates)).flatten() print "==== Size : {} ====".format(len(df.index)) # Remove tweets containing keywords mapped to a fixed sentiment df, counts_leave = count_for_set(df, dates, leave_keys) df, counts_other = count_for_set(df, dates, other_keys) df, counts_stay = count_for_set(df, dates, stay_keys) print "==== Size : {} ====".format(len(df.index)) print "Days : ", dates print "==== Leave ===="