def initializeData(): global dataAttributeMap global visGenie dataFile = 'static/data/' + request.form['dataFileName'] dataProcessorObj = DataProcessor(dataFile) dataAttributeMap = dataProcessorObj.getDataAttributeMap() aliasFile = 'static/knowledgebase/aliases.json' with open(aliasFile, "r") as jsonFile: aliasKnowledgeMap = json.load(jsonFile) for dataFileLabel in aliasKnowledgeMap: if dataFileLabel == request.form['dataFileName']: for attribute in aliasKnowledgeMap[dataFileLabel]: for alias in aliasKnowledgeMap[dataFileLabel][attribute]: dataProcessorObj.addAlias(attribute, alias) if request.form['dataFileName'] == 'cars_2004.csv': dataProcessorObj.setAttributeDataType('Name', 'isLabel') elif request.form['dataFileName'] == 'nutrition.csv': dataProcessorObj.setAttributeDataType('NDB_No', 'isLabel') visGenie = VisGenie(dataAttributeMap, dataProcessorObj.getData()) return jsonify({ 'status': 'data initialization was successful!', 'dataAttributes': dataAttributeMap.keys() })
def initializeData(): global dataAttributeMap global visGenie dataFile = 'static/data/'+request.form['dataFileName'] dataProcessorObj = DataProcessor(dataFile) dataAttributeMap = dataProcessorObj.getDataAttributeMap() aliasFile = 'static/knowledgebase/aliases.json' with open(aliasFile, "r") as jsonFile: aliasKnowledgeMap = json.load(jsonFile) for dataFileLabel in aliasKnowledgeMap: if dataFileLabel==request.form['dataFileName']: for attribute in aliasKnowledgeMap[dataFileLabel]: for alias in aliasKnowledgeMap[dataFileLabel][attribute]: dataProcessorObj.addAlias(attribute,alias) if request.form['dataFileName']=='cars_2004.csv': dataProcessorObj.setAttributeDataType('Name','isLabel') elif request.form['dataFileName']=='nutrition.csv': dataProcessorObj.setAttributeDataType('NDB_No','isLabel') visGenie = VisGenie(dataAttributeMap,dataProcessorObj.getData()) return jsonify({'status':'data initialization was successful!','dataAttributes':dataAttributeMap.keys()})
def test(self,username,threshold): scrapper=Scrapper() folder_path=scrapper.dowload_data(username,threshold) dataProcessor=DataProcessor(folder_path) data=dataProcessor.create_dataframe_input() #print(data) class_names=['food and drink', 'entertainment', 'business and industry', 'family and relationships', 'fitness and wellness', 'hobbies and activities', 'shopping and fashion', 'sports and outdoors', 'technology'] model_path="./last_cnn_model.h5" cnnModel=CnnModel(class_names,model_path,data) model=cnnModel.load_model() test_generator=cnnModel.create_generator() prediction=cnnModel.getPrediction(model,test_generator) result=np.sum(prediction,axis=0) result*=(1/len(prediction)) return result
def test(self, folder_path, cnnModel, model): dataProcessor = DataProcessor(folder_path) data = dataProcessor.create_dataframe_input() class_names = [ 'food and drink', 'entertainment', 'business and industry', 'family and relationships', 'fitness and wellness', 'hobbies and activities', 'shopping and fashion', 'sports and outdoors', 'technology' ] """#print(data) class_names=['food and drink', 'entertainment', 'business and industry', 'family and relationships', 'fitness and wellness', 'hobbies and activities', 'shopping and fashion', 'sports and outdoors', 'technology'] model_path="./last_cnn_model.h5" cnnModel=CnnModel(class_names,model_path,data) #cnnModel.visualise_data() model=cnnModel.load_model()""" cnnModel.setData(data) test_generator = cnnModel.create_generator() prediction = cnnModel.getPrediction(model, test_generator) #print(prediction) #prediction=[[0.2,0.5,0.3],[0.4,0.3,0.3]] result = np.sum(prediction, axis=0) result *= (1 / len(prediction)) #print(result) return result, class_names
class AllTabs: """Serves as a communicator between the flask part of the application (__init__.py), the backend (DataProcessor), and each of the bokeh tabs (PivotTable, Scatterplot, Regression). """ def __init__(self): """Initialize allTabs object.""" self.dp = DataProcessor() def update_df(self, filename): """Update the backend when a file is uploaded. Called in __init__.py Keyword arguments: filename -- the filename of the csv file containing the data """ self.dp.setUp(filename) def clear_df(self): """Removes data from the data processor. Called in __init__.py upon logout.""" self.dp.clearDf() def runServer(self, doc): """Starts the bokeh server. Keyword arguments: doc -- passed automatically when called in __init__.py """ original = self.dp # Each tab is now it's own object so information can go between one another from this file pt = PivotTable(original) sp = Scatterplot(original) reg = Regression(original) def pivotCallback(): """Callback for pivot table.""" try: # attempt to update pivot table data pt.update_data() # only update backend if the categorical list was changed if self.dp.cat_indices != pt.checkPanel.active: self.dp.cat_indices = pt.checkPanel.active sp.catIndices = self.dp.cat_indices except Exception as e: # display exception in pivot table tab pt.toReturn.children[0].children[7].text = "<p style=\"color:red\">" + \ str(e) + "</p>" # on_change and on_click handlers need to go outside or else passing data between tabs can't happen for w in [pt.updateButton]: w.on_click(partial(pivotCallback)) def checkboxCallback(placeholder): """Callback for checkboxes in pivot tab. Updates the backend on change.""" # only update backend if the categorical list was changed if self.dp.cat_indices != pt.checkPanel.active: try: self.dp.cat_indices = pt.checkPanel.active sp.catIndices = self.dp.cat_indices # Update options for values in pivot table based on checkboxes pt.updateValueOptions() pt.toReturn.children[0].children[ 0].options = pt.valueOptions # Redraw scatterplot as soon as the checkboxes are changed sp.toReturn.children[1] = sp.plot(sp.x_select.value, sp.y_select.value) except Exception as e: pass # Pivot table listener for w in [pt.checkPanel]: w.on_click(partial(checkboxCallback)) # Scatter plot listener for x and y values for w in [sp.x_select, sp.y_select]: w.on_change('value', sp.update_data) # Scatter plot listener for title change sp.text.on_change('value', sp.update_title) # Regression listener reg.updateButton.on_click(reg.updateRegression) # toReturn corresponds to a bokeh row attribute in each object tab1 = Panel(child=pt.toReturn, title="Pivot Table") tab2 = Panel(child=sp.toReturn, title="Scatterplot") tab3 = Panel(child=reg.toReturn, title="Regression") allTabs = Tabs(tabs=[tab1, tab2, tab3]) doc.add_root(allTabs)
def __init__(self): """Initialize allTabs object.""" self.dp = DataProcessor()
from scrapper import Scrapper from dataProcessor import DataProcessor from cnnModel import CnnModel import os import numpy as np scrapper = Scrapper() username = "******" threshold = 3 folder_path = scrapper.dowload_data(username, threshold) dataProcessor = DataProcessor(folder_path) data = dataProcessor.create_dataframe_input() print(data) class_names = [ 'food and drink', 'entertainment', 'business and industry', 'family and relationships', 'fitness and wellness', 'hobbies and activities', 'shopping and fashion', 'sports and outdoors', 'technology' ] model_path = "./last_cnn_model.h5" cnnModel = CnnModel(class_names, model_path, data) #cnnModel.visualise_data() model = cnnModel.load_model() test_generator = cnnModel.create_generator() prediction = cnnModel.getPrediction(model, test_generator) print(prediction) #prediction=[[0.2,0.5,0.3],[0.4,0.3,0.3]] result = np.sum(prediction, axis=0) result *= (1 / len(prediction)) print(result)
log_dir = os.path.join(cfg.log_dir, cfg.version) if not os.path.exists(log_dir): os.makedirs(log_dir) file_handler = logging.FileHandler(os.path.join(log_dir, str(index) + '.log')) logger.addHandler(file_handler) batch_size = cfg.batch_size epochs = cfg.epochs dataset = cfg.dataset if dataset == 'MNIST': num_classes = 10 else: num_classes = 26 dataProcessor = DataProcessor(batch_size=batch_size, dataset=dataset, data_path='data/char74k_preprocessed') if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') capsNet = CapsNet(conv_in=3, num_classes=num_classes).to(device) cseloss = torch.nn.CrossEntropyLoss() adamOptimizer = torch.optim.Adam(params=capsNet.parameters()) best_val_acc = -1 best_val_epoch = 0 for epoch in tqdm(range(epochs)): epoch_loss = 0 for batch_id, (data, target) in enumerate(dataProcessor.train_loader): capsNet.train()