示例#1
0
def get_timeline_count(tweets_file, keywords=None, timezones=None):
    # get dataframe
    l = Loader(tweets_file)
    data = l.get_dataframe()

    def valid_keyword(x):
        for keyword in keywords:
            if keyword in x:
                return True
        return False

    def valid_timezone(t):
        for timezone in timezones:
            if timezone in t:
                return True
        return False

    if not keywords is None:
        data = data[data["text"].apply(valid_keyword) == True]

    if not timezones is None:
        data = data[data["user_time_zone"].apply(valid_timezone) == True]

    print len(data.index)

    data["created_at"] = data["created_at"].astype("datetime64")
    a = data["created_at"].groupby(data["created_at"].dt.date).count()


    print a
    a = a.to_frame()
    return np.array(date2num(a.index)), np.array(a.values)
示例#2
0
文件: test.py 项目: OwenGY/twexit
def test3():
    l = Loader("./data/weight.json")
    print l.get_dataframe()
示例#3
0
        day = dates_set[i]
        for key in keys:
            if key in tweet:
                try:
                    counts[day] += 1
                except:
                    counts[day] = 1

                df = df.drop(idx)
                break

    return df, counts


l = Loader("./data/May_16.csv")
df = l.get_dataframe()
df["created_at"] = pandas.to_datetime(df["created_at"])
dates = df["created_at"].dt.date.to_frame()
dates = np.array(date2num(dates)).flatten()

print "==== Size : {} ====".format(len(df.index))

# Remove tweets containing keywords mapped to a fixed sentiment
df, counts_leave = count_for_set(df, dates, leave_keys)
df, counts_other = count_for_set(df, dates, other_keys)
df, counts_stay = count_for_set(df, dates, stay_keys)

print "==== Size : {} ====".format(len(df.index))

print "Days   : ", dates
print "==== Leave ===="