Пример #1
0
def initializeData():
    global dataAttributeMap
    global visGenie

    dataFile = 'static/data/' + request.form['dataFileName']

    dataProcessorObj = DataProcessor(dataFile)
    dataAttributeMap = dataProcessorObj.getDataAttributeMap()

    aliasFile = 'static/knowledgebase/aliases.json'
    with open(aliasFile, "r") as jsonFile:
        aliasKnowledgeMap = json.load(jsonFile)

    for dataFileLabel in aliasKnowledgeMap:
        if dataFileLabel == request.form['dataFileName']:
            for attribute in aliasKnowledgeMap[dataFileLabel]:
                for alias in aliasKnowledgeMap[dataFileLabel][attribute]:
                    dataProcessorObj.addAlias(attribute, alias)

    if request.form['dataFileName'] == 'cars_2004.csv':
        dataProcessorObj.setAttributeDataType('Name', 'isLabel')
    elif request.form['dataFileName'] == 'nutrition.csv':
        dataProcessorObj.setAttributeDataType('NDB_No', 'isLabel')

    visGenie = VisGenie(dataAttributeMap, dataProcessorObj.getData())

    return jsonify({
        'status': 'data initialization was successful!',
        'dataAttributes': dataAttributeMap.keys()
    })
Пример #2
0
def initializeData():
    global dataAttributeMap
    global visGenie

    dataFile = 'static/data/'+request.form['dataFileName']

    dataProcessorObj = DataProcessor(dataFile)
    dataAttributeMap = dataProcessorObj.getDataAttributeMap()
    
    aliasFile = 'static/knowledgebase/aliases.json'
    with open(aliasFile, "r") as jsonFile:
        aliasKnowledgeMap = json.load(jsonFile)

    for dataFileLabel in aliasKnowledgeMap:
        if dataFileLabel==request.form['dataFileName']:
            for attribute in aliasKnowledgeMap[dataFileLabel]:
                for alias in aliasKnowledgeMap[dataFileLabel][attribute]:
                    dataProcessorObj.addAlias(attribute,alias)

    if request.form['dataFileName']=='cars_2004.csv':
        dataProcessorObj.setAttributeDataType('Name','isLabel')
    elif request.form['dataFileName']=='nutrition.csv':
        dataProcessorObj.setAttributeDataType('NDB_No','isLabel')

    visGenie = VisGenie(dataAttributeMap,dataProcessorObj.getData())

    return jsonify({'status':'data initialization was successful!','dataAttributes':dataAttributeMap.keys()})
 def test(self,username,threshold):
     scrapper=Scrapper()
     folder_path=scrapper.dowload_data(username,threshold)
     dataProcessor=DataProcessor(folder_path)
     data=dataProcessor.create_dataframe_input()
     #print(data)
     class_names=['food and drink', 'entertainment', 'business and industry', 'family and relationships', 'fitness and wellness', 'hobbies and activities', 'shopping and  fashion', 'sports and outdoors', 'technology']
     model_path="./last_cnn_model.h5"
     cnnModel=CnnModel(class_names,model_path,data)
     model=cnnModel.load_model()
     test_generator=cnnModel.create_generator()
     prediction=cnnModel.getPrediction(model,test_generator)
     result=np.sum(prediction,axis=0)
     result*=(1/len(prediction))
     return result
 def test(self, folder_path, cnnModel, model):
     dataProcessor = DataProcessor(folder_path)
     data = dataProcessor.create_dataframe_input()
     class_names = [
         'food and drink', 'entertainment', 'business and industry',
         'family and relationships', 'fitness and wellness',
         'hobbies and activities', 'shopping and  fashion',
         'sports and outdoors', 'technology'
     ]
     """#print(data)
     class_names=['food and drink', 'entertainment', 'business and industry', 'family and relationships', 'fitness and wellness', 'hobbies and activities', 'shopping and  fashion', 'sports and outdoors', 'technology']
     model_path="./last_cnn_model.h5"
     cnnModel=CnnModel(class_names,model_path,data)
     #cnnModel.visualise_data()
     model=cnnModel.load_model()"""
     cnnModel.setData(data)
     test_generator = cnnModel.create_generator()
     prediction = cnnModel.getPrediction(model, test_generator)
     #print(prediction)
     #prediction=[[0.2,0.5,0.3],[0.4,0.3,0.3]]
     result = np.sum(prediction, axis=0)
     result *= (1 / len(prediction))
     #print(result)
     return result, class_names
class AllTabs:
    """Serves as a communicator between the flask part of the application (__init__.py), the backend (DataProcessor),
    and each of the bokeh tabs (PivotTable, Scatterplot, Regression).
    """
    def __init__(self):
        """Initialize allTabs object."""
        self.dp = DataProcessor()

    def update_df(self, filename):
        """Update the backend when a file is uploaded. Called in __init__.py

        Keyword arguments:
        filename -- the filename of the csv file containing the data
        """
        self.dp.setUp(filename)

    def clear_df(self):
        """Removes data from the data processor. Called in __init__.py upon logout."""
        self.dp.clearDf()

    def runServer(self, doc):
        """Starts the bokeh server.

        Keyword arguments:
        doc -- passed automatically when called in __init__.py
        """
        original = self.dp

        # Each tab is now it's own object so information can go between one another from this file
        pt = PivotTable(original)
        sp = Scatterplot(original)
        reg = Regression(original)

        def pivotCallback():
            """Callback for pivot table."""
            try:
                # attempt to update pivot table data
                pt.update_data()

                # only update backend if the categorical list was changed
                if self.dp.cat_indices != pt.checkPanel.active:
                    self.dp.cat_indices = pt.checkPanel.active
                    sp.catIndices = self.dp.cat_indices

            except Exception as e:
                # display exception in pivot table tab
                pt.toReturn.children[0].children[7].text = "<p style=\"color:red\">" + \
                    str(e) + "</p>"

        # on_change and on_click handlers need to go outside or else passing data between tabs can't happen
        for w in [pt.updateButton]:
            w.on_click(partial(pivotCallback))

        def checkboxCallback(placeholder):
            """Callback for checkboxes in pivot tab. Updates the backend on change."""
            # only update backend if the categorical list was changed
            if self.dp.cat_indices != pt.checkPanel.active:

                try:
                    self.dp.cat_indices = pt.checkPanel.active
                    sp.catIndices = self.dp.cat_indices

                    # Update options for values in pivot table based on checkboxes
                    pt.updateValueOptions()
                    pt.toReturn.children[0].children[
                        0].options = pt.valueOptions

                    # Redraw scatterplot as soon as the checkboxes are changed
                    sp.toReturn.children[1] = sp.plot(sp.x_select.value,
                                                      sp.y_select.value)

                except Exception as e:
                    pass

        # Pivot table listener
        for w in [pt.checkPanel]:
            w.on_click(partial(checkboxCallback))

        # Scatter plot listener for x and y values
        for w in [sp.x_select, sp.y_select]:
            w.on_change('value', sp.update_data)

        # Scatter plot listener for title change
        sp.text.on_change('value', sp.update_title)

        # Regression listener
        reg.updateButton.on_click(reg.updateRegression)

        # toReturn corresponds to a bokeh row attribute in each object
        tab1 = Panel(child=pt.toReturn, title="Pivot Table")
        tab2 = Panel(child=sp.toReturn, title="Scatterplot")
        tab3 = Panel(child=reg.toReturn, title="Regression")

        allTabs = Tabs(tabs=[tab1, tab2, tab3])
        doc.add_root(allTabs)
 def __init__(self):
     """Initialize allTabs object."""
     self.dp = DataProcessor()
Пример #7
0
from scrapper import Scrapper
from dataProcessor import DataProcessor
from cnnModel import CnnModel
import os
import numpy as np
scrapper = Scrapper()
username = "******"
threshold = 3
folder_path = scrapper.dowload_data(username, threshold)
dataProcessor = DataProcessor(folder_path)
data = dataProcessor.create_dataframe_input()
print(data)
class_names = [
    'food and drink', 'entertainment', 'business and industry',
    'family and relationships', 'fitness and wellness',
    'hobbies and activities', 'shopping and  fashion', 'sports and outdoors',
    'technology'
]
model_path = "./last_cnn_model.h5"
cnnModel = CnnModel(class_names, model_path, data)
#cnnModel.visualise_data()
model = cnnModel.load_model()
test_generator = cnnModel.create_generator()
prediction = cnnModel.getPrediction(model, test_generator)
print(prediction)
#prediction=[[0.2,0.5,0.3],[0.4,0.3,0.3]]
result = np.sum(prediction, axis=0)
result *= (1 / len(prediction))
print(result)
log_dir = os.path.join(cfg.log_dir, cfg.version)
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
file_handler = logging.FileHandler(os.path.join(log_dir, str(index) + '.log'))
logger.addHandler(file_handler)

batch_size = cfg.batch_size
epochs = cfg.epochs
dataset = cfg.dataset
if dataset == 'MNIST':
    num_classes = 10
else:
    num_classes = 26

dataProcessor = DataProcessor(batch_size=batch_size,
                              dataset=dataset,
                              data_path='data/char74k_preprocessed')
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

capsNet = CapsNet(conv_in=3, num_classes=num_classes).to(device)
cseloss = torch.nn.CrossEntropyLoss()
adamOptimizer = torch.optim.Adam(params=capsNet.parameters())
best_val_acc = -1
best_val_epoch = 0
for epoch in tqdm(range(epochs)):
    epoch_loss = 0
    for batch_id, (data, target) in enumerate(dataProcessor.train_loader):
        capsNet.train()