def add_twit_user(): if request.method == "POST": result = request.form username = result["username"] api = twitter_api() users = api.get_user(screen_name=username) # tweets = api.user_timeline(screen_name = username, count=300, # include_rts = False, exclude_replies=True) # tweets = api.user_timeline(screen_name = username, tweet_mode ="extend") db_users = Users() db_users.id = users.id db_users.username = users.screen_name db_users.full_name = users.name db_users.followers = users.followers_count db.session.add(db_users) print('Users') #tweet text raw_tweets = api.user_timeline(users.screen_name, count=300, include_rts=False, exclude_replies=True, tweet_mode="extended") print('raw_tweets') # 해당 user가 트윗을 한 개 이상 한 경우에만 db에 저장 if len(raw_tweets) >= 1: for tweet in raw_tweets: en = EmbeddingClient(host='54.180.124.154', port=8989) one_tweet = [tweet.full_text] print('one_tweet') embedding_result = en.encode(texts=one_tweet) print('embedding_result') insert_tweet = Tweet(id=tweet.id, text=tweet.full_text, embedding=embedding_result[0], user_id=users.id) db.session.add(insert_tweet) db.session.commit() return render_template('add_routesdd.html')
def analyze(): users = Users.query.all() text = [] id = [] prediction = 0 if request.method == "POST": print(dict(request.form)) result = request.form # import all datas from the table for user in users: tweets = Tweet.query.with_entities( Tweet.embedding).filter(Tweet.user_id == user.id).all() for tweet in tweets: append_to_with_label(text, tweet, id, user.id) # # 3D array to 2D array # text_array = np.array(text) # nsamples, nx, ny = text_array.shape # text_2d = text_array.reshape(nsamples, nx * ny) # Model import if os.path.isfile(FILEPATH): en = EmbeddingClient(host='54.180.124.154', port=8989) model = pickle.load(open('model.pkl', 'rb')) pred_id = model.predict(en.encode(texts=[result['text']])) prediction = int(pred_id[0]) else: model = LogisticRegression(warm_start=True) model.fit(text, id) pred_id = model.predict(en.encode(texts=[result['text']])) prediction = int(pred_id[0]) pickle.dump(model, open('model.pkl', 'wb')) # Predction result pred_res = Users.query.filter(Users.id == prediction).first() return render_template('analytics.html', prediction=pred_res)
def analyze(): if request.method == 'POST': users = Users.query.all() # prediction = "" # compare_text = "" raw_user_1 = request.form["User1"] raw_user_2 = request.form["User2"] user_1 = Users.query.filter_by(id=raw_user_1).one() user_2 = Users.query.filter_by(id=raw_user_2).one() embedding = [] labels = [] for tw_1 in user_1.tweets: embedding.append(tw_1.embedding) labels.append(user_1.username) for tw_2 in user_2.tweets: embedding.append(tw_2.embedding) labels.append(user_2.username) classifier = RandomForestClassifier() classifier.fit(embedding, labels) compare_text = request.form['text'] en = EmbeddingClient(host='54.180.124.154', port=8989) predict_embedding = en.encode(texts=[compare_text]) prediction = classifier.predict(predict_embedding) print(f"Compare string {compare_text}") print(f"Prediction Results {prediction}") return render_template("analytics.html", users=users, predict=prediction, compare_text=compare_text)
from flask_sqlalchemy import SQLAlchemy from flask_migrate import Migrate from embedding_as_service_client import EmbeddingClient db = SQLAlchemy() migrate = Migrate() en = EmbeddingClient(host='54.180.124.154', port=8989) # db 테이블 생성 class Users(db.Model): id = db.Column(db.Integer, primary_key=True) username = db.Column(db.Integer, nullable=False) full_name = db.Column(db.String, nullable=False, unique=True) followers = db.Column(db.Integer, nullable=False) location = db.Column(db.String, nullable=False) # tweet_id = db.Column(db.Integer, db.ForeignKey('Tweets.id')) # tweet = db.relationship('Tweets', backref='users', lazy=True) def __repr__(self): return f"<User{self.id} {self.username}>" class Tweets(db.Model): id = db.Column(db.Integer, primary_key=True) text = db.Column(db.String, nullable=False) embedding = db.Column(db.PickleType) user_id = db.Column(db.BigInteger, db.ForeignKey('users.id')) users = db.relationship('Users', backref='tweets', lazy=True)
from embedding_as_service_client import EmbeddingClient from typing import List from sklearn.linear_model import LogisticRegression en = EmbeddingClient(host="54.180.124.154", port=8989) def get_embeddings(text_list: List[str]) -> List[List[float]]: vecs = en.encode(texts=text_list) return vecs