Exemplo n.º 1
0
def generate_TFIDF_table():
    """
    Generates TF-IDF (Term Frequency - Inverse Document Frequency) table of
    the messages.

    Returns
    -------
    tfidf : pd.DataFrame
        TF-IDF table.

    """
    tfidf = pd.DataFrame()
    tfidf['_id'] = []

    init_db()
    for msg in CorrectedMessage.objects():
        text = base64.urlsafe_b64decode(msg.bodyBase64Plain.encode()).decode()
        tfidf = tfidf.append(tf_vector(text, tfidf.columns, tfidf, msg.msg_id,
                                       msg.corrections),
                             ignore_index=True)

    num_msg = len(tfidf)
    for word in tfidf.drop(columns='_id').columns:
        idf = log(num_msg / (len(tfidf[tfidf[word] > 0]) + 1))
        tfidf[word] = tfidf[word].apply(lambda tf: tf * idf)

    return tfidf
Exemplo n.º 2
0
    def init_db_and_get_data(self):
        init_db()
        account = Account.objects.get(account_name='chris_whu')
        user = User.objects.get(account=account, user_name='xrb')
        device = Device.objects.filter(user=user)[0]

        return account, user, device
Exemplo n.º 3
0
def connect_db():
    """Connects to the specific database."""

    if not os.path.exists(app.config['DATABASE']):
        initdb.init_db(app.config['DATABASE'], 'schema.sql')
    rv = sqlite3.connect(app.config['DATABASE'])
    rv.row_factory = sqlite3.Row
    return rv
Exemplo n.º 4
0
 def setUpClass(cls):
     config = init_db()
     global client_app
     wrapper = FlaskAppWrapper(providers=[AIDAIndex()],
                               external_config=config)
     wrapper.create_app()
     client_app = wrapper.test_app()
def main():
    init_db()
    df = pd.DataFrame([
        without_keys(m, cf.UNUSED_FIELDS)
        for m in json.loads(Metrics.objects().to_json())
    ])
    df.set_index('_id')
    transform_recipients_columns(df)
    # df.to_csv('dataframe.csv', index = False)
    # df = pd.read_csv('dataframe.csv')

    describe_dataframe(df, 'general_description')
    for t in RelationshipType:
        describe_dataframe(df[df.relationship.eq(t.name)],
                           t.name + '_description')

    normalized = df.drop(columns=['_id', 'relationship'])
    cols = list(normalized.columns.values)
    for c in cols:
        normalized[c] = (df[c] - df[c].min()) / (df[c].max() - df[c].min())

    # normalized.to_csv('normalized.csv', index = False)
    # normalized = pd.read_csv('normalized.csv')
    study_kmeans_silhouette_score('norm_', normalized)
    study_dbscan_silhouette_score('norm_', normalized, df['relationship'])
    get_scatter_matrix('norm_', normalized, df['relationship'])

    study_kmeans_silhouette_score('data_',
                                  df.drop(columns=['_id', 'relationship']))
    study_dbscan_silhouette_score('data_',
                                  df.drop(columns=['_id', 'relationship']),
                                  df['relationship'])
    get_scatter_matrix('data_', df.drop(columns=['_id', 'relationship']),
                       df['relationship'])

    pca_analysis(normalized, df['relationship'], 'norm_')
    pca_analysis(df.drop(columns=['_id', 'relationship']), df['relationship'],
                 'data_')

    for c in ['gini', 'entropy']:
        classify_with_decission_tree(df.drop(columns=['_id', 'relationship']),
                                     df['relationship'], c, 'data_')
        classify_with_decission_tree(normalized, df['relationship'], c,
                                     'norm_')
    def analyse(self, nextPageToken=None, sign=None):
        """
        Analyses all the messages of the given user.
        
        Parameters
        ----------
        nextPageToken: str, optional
            Token of the next page for extracting messages. The default is None.
        sign: str, optional
            Signature of the user in his emails. The default value is None.
            
        Returns
        -------
        None.
        
        """
        init_db()
        SessionTypoError.drop_collection()
        # PreprocessedMessage.drop_collection()
        # CorrectedMessage.drop_collection()
        # Metrics.drop_collection()

        self.__quota = self.__extractor.extract_sent_msg(
            self.__nres, nextPageToken)

        for ext in ExtractedMessage.objects():
            self.__preprocess_message(ext.to_json(), sign)

        for prep in PreprocessedMessage.objects():
            self.__correct_message(prep.to_json())

        for cor in CorrectedMessage.objects():
            self.__measure_style(cor.to_json())

        with open('log.txt', 'a') as f:
            f.write('\nANALYSIS FINISHED:\n')
            f.write(
                f'{ExtractedMessage.objects().count()} preprocessed messages.\n'
            )
            f.write(
                f'{PreprocessedMessage.objects().count()} typo-corrected messages.\n'
            )
            f.write(
                f'{CorrectedMessage.objects().count()} measured messages.\n')
Exemplo n.º 7
0
def update_realtime(symbols: Union[str, List[str]]):
    DatabaseUtils.save_realtime(GetStockData.get_real_time_price(symbols))


def update_daily(symbols: Union[str, List[str]]):
    DatabaseUtils.save_daily_history(GetStockData.get_daily_data(symbols))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('symbols', nargs='+')
    args = parser.parse_args()
    print(args.symbols)

    init_db(args.symbols)

    scheduler = BlockingScheduler()
    scheduler.add_job(update_realtime,
                      'interval',
                      seconds=3,
                      args=[
                          args.symbols,
                      ])
    scheduler.add_job(update_daily,
                      'interval',
                      minutes=1,
                      args=[
                          args.symbols,
                      ])
    try:
from flask import Flask, request, jsonify
from stylemeasuring.stylemeter import StyleMeter
from initdb import init_db
from confanalyser import NLP

os.chdir(initial_dir)

app = Flask(__name__)
stylemeter = StyleMeter(NLP)

@app.route('/stylemeter', methods=['POST'])
def measure_style():
    """
    Measures the writting style of the given message.

    Returns
    -------
    str: Message which confirms that it was successfully measured.

    """
    msg = request.json['message']
    
    msg['id'] = msg['_id']
    del msg['_id']
    
    return jsonify({'id' : stylemeter.measure_style(msg)})

if __name__ == '__main__':
    init_db()
    app.run(debug=True, port = 6000)
Exemplo n.º 9
0
 def setUpClass(cls):
     cls.config = init_db()
Exemplo n.º 10
0
def main():
    init_db()
    for m in Metrics.objects():
        for l in [m.to, m.cc, m.bcc]:
            for d in l:
                classify_contact(d)