class TweetProcessor: politicianList = [] def __init__(self): t = Politician() self.politicianList = t.getList() self.classifier = NBClassifier() self.e = Etl() def process(self, jsonString): jsonData = json.loads(jsonString) if "lang" in jsonData and jsonData["lang"] == "en": output = [] for politician in self.politicianList: givenName = politician["givenName"] familyName = politician["familyName"] if familyName in jsonString and givenName in jsonString: if "text" in jsonData: sentiment = self.classifier.classify(jsonData["text"]) self.e.put(jsonData, politician, sentiment) output.append([politician, sentiment]) else: raise Exception("No text in jsonData") return output else: raise Exception("Not an English tweet")
def exportGephi(self): e = Etl() from tkFileDialog import asksaveasfilename import tkMessageBox filename = asksaveasfilename() if e.exportGephi(filename): tkMessageBox.showinfo("Info", "File salvato con successo.")
def addRows(): """ only populates db if there is no data in it """ if Virus.query.first() == None: df = Etl().data df.to_sql('viruses', engine, if_exists="replace", index=False) print("done adding rows")
def predict(self): data = Data(self.training_url, self.testing_url) data.fetch_data() etl = Etl(data) etl.do_etl() model = Model(etl) model.train() #model.test(data.testing_data) self.today_data = data.testing_data[-1] self.today_suggestion = model.predict_one(self.today_data)
def cluster_data(data): etl = Etl() df = etl.process_data(data) df = etl.generate_rfm(df) df = etl.normalize_df(df) clustering = Clustering() [metrics, clusters] = clustering.generate_cluster(df) headers = {'Content-type': 'application/json', 'Accept': 'text/plain'} try: requests.post(server_url + '/metrics', headers=headers, json=metrics) requests.post(server_url + '/clusters', headers=headers, json=clusters) except Exception as e: print('Error', e)
def __init__(self, params): Etl.__init__(self, params) if params['feeder'] == 'mdm': self.fill_sql = sql_templates.FILL_RAW_SQL self.truncate_sql = sql_templates.TRUNCATE_SQL if params['table_type'] == 'hashed': self.fill_sql = sql_templates.FILL_HASHED_RAW_SQL if params['feeder'] == 'dwh': if params['table_type'] == 'hashed': self.truncate_sql = sql_templates.TRUNCATE_PARTITION_SQL self.fill_sql = sql_templates.FILL_HASHED_SQL
def main(): if len(sys.argv) == 2: data = Data(sys.argv[1]) elif len(sys.argv) == 3: data = Data(sys.argv[1], sys.argv[2]) else: data = Data() data.fetch_data() etl = Etl(data) etl.do_etl() model = Model(etl) model.train() #model.test(data.testing_data) result = model.predict_one(data.testing_data[-1]) print (result)
def lambda_connect(event, context): etl = Etl() etl.retrieve_all_stats() return 'pickle rick'
def __init__(self): t = Politician() self.politicianList = t.getList() self.classifier = NBClassifier() self.e = Etl()
def __init__(self, categories): self.list_categories = categories self.json_products = Etl(self.list_categories) self.all_prod = []
class Metrics(): '''To call Metrics''' def __init__(self): self.db = Database() self.etl = Etl() self.last_session = self.db.get_count(coll='brainwriting_sessions') - 1 def get_session(self, session): '''Returns the session document in the database :Parameters: - `session`: (Optional) - 'last' pega a última sessão :Returns: - instance of pymongo.document ''' if session: log.info(f'get_session: ID: {int(session)}') return self.etl.get_session_data(session_id=int(session)) elif self.last_session < 0: log.info(f'get_session: ID: 0') return self.etl.get_session_data(session_id=0) else: log.info(f'get_session: ID: {self.last_session}') return self.etl.get_session_data(session_id=self.last_session) def put_wit_session(self, session='last'): session_id = self.db.get_count(coll='brainwriting_sessions') - 1 try: if session == 'last': log.info(session_id) self.etl.process_ideas_wit(session_id=session_id) else: log.info(session) self.etl.process_ideas_wit(session_id=int(session)) return True except Exception as e: log.error(f'WitError: {e}') return Exception def put_gcp_session(self, session='last'): session_id = self.db.get_count(coll='brainwriting_sessions') - 1 try: if session == 'last': log.info(session_id) self.etl.process_ideas_gcp(session_id=session_id) else: log.info(session) self.etl.process_ideas_gcp(session_id=int(session)) return True except Exception as e: log.error(f'GcpError: {e}') return Exception
def __init__(self): self.db = Database() self.etl = Etl() self.last_session = self.db.get_count(coll='brainwriting_sessions') - 1