def elbow(dataset, measureDistance, clusterFilter, extractor, output): # Measure various distortion amounts for difference cluster counts for a dataset initialize = models.Dataset(dataset, 1, clusterFilter) initialize.initializeFeatures(extractor, output) estimate = clusters.calculateClusterCount(initialize.featuresSize) clusterCounts = clusters.nearRoundNumbers(estimate, measureDistance) distortions = {} for count in clusterCounts: model = models.Dataset(dataset, max(int(count), 1), clusterFilter) model.initialize(extractor, output) distortions[count] = model.vocabularyInertia return distortions
def create_dataset(): form = fm.Create_Dataset(fl.request.form) if fl.request.method == 'GET': return fl.render_template('leadanalyst/dataset/create.html', form=form) else: # process if form.validate_on_submit(): # submit to db user = fl.session['logged_in'] ds = ml.Dataset(name=form.name.data, \ search_type=int(form.search_type.data), user_created=int(user), \ year_start=form.year_start.data, year_end=form.year_end.data, \ owner=user) ds_auth_owner = ml.Dataset_Authd(access=user) ds_auth = ml.Dataset_Authd(access=form.access.data) time_created = datetime.datetime.now() ds.time_created = time_created ds.access.append(ds_auth) ds.access.append(ds_auth_owner) db.db_session.add(ds) db.db_session.commit() fl.flash("Added the dataset!", "success") return fl.render_template('leadanalyst/dataset/create.html', form=form) else: fl.flash("Dataset not created", "error") return fl.render_template('leadanalyst/dataset/create.html', form=form)
def create_model(csvdata, dtitle): fieldNames = csvdata['headers'] fieldValues = csvdata['content'] nFields = csvdata['n'] if nFields == 0: return None d = models.Dataset() d.title = dtitle d.data_id = generate_unique_id() d.save() for n in range(nFields): f = models.DataFields() f.fieldName = fieldNames[n] f.save() for m in range(len(fieldValues[n])): v = models.DataValues() v.order = m v.value = fieldValues[n][m] v.save() f.values.add(v) f.fieldType = f.infer_type() f.save() d.fields.add(f) d.save() return d
def create_dataset(): import datetime as dt from sqlalchemy import func, asc form = fm.Create_Dataset(fl.request.form) if fl.request.method == 'GET': return fl.render_template('leadanalyst/dataset/create.html', form=form) else: # process if form.validate_on_submit(): # submit to db user = fl.session['logged_in'] ds = ml.Dataset(name=form.name.data, \ search_type=int(form.search_type.data), user_created=int(user), \ year_start=form.year_start.data, year_end=form.year_end.data, \ owner=user, freq=int(form.freq.data)) ds_auth_owner = ml.Dataset_Authd(access=user) ds_auth = ml.Dataset_Authd(access=form.access.data) time_created = datetime.datetime.now() ds.time_created = time_created ds.access.append(ds_auth) ds.access.append(ds_auth_owner) db.db_session.add(ds) fl.flash("Added the dataset!", "success") # now break up this into the correct amount of tasks freq_list, start, end = get_time_list(form.year_start.data, \ form.year_end.data, form.freq.data) ds_id = ml.Dataset.query.order_by(asc(ml.Dataset.id)).first() if ds_id == None: ds_id = 1 else: ds_id = ds_id.id for i in range(0, len(freq_list), 1): # create a task for every frequency object t_cur = ml.Tasks() t_cur.nickname = freq_list[i] t_cur.date_created = dt.datetime.now() t_cur.dataset_owner = int(ds_id) t_cur.date_start = start[i] t_cur.date_end = end[i] t_cur.who_assigned = int(user) t_cur.stage = 1 t_cur.num_inv_found = 0 t_cur.num_inv_progressing = 0 t_cur.state = State.Working db.db_session.add(t_cur) db.db_session.commit() return fl.render_template('leadanalyst/dataset/create.html', form=form) else: # return str(form.freq.data) fl.flash("Dataset not created", "error") fl.flash(str(form.errors), 'error') return fl.render_template('leadanalyst/dataset/create.html', form=form)
def add_dataset(self, company, agency, subagency_name, dataset_name, dataset_url, rating): dataset = models.Dataset(datasetName=dataset_name, datasetURL=dataset_url, rating=rating, usedBy=company) if subagency_name == '': agency.datasets.append(dataset) else: for s in agency.subagencies: if subagency_name == s.name: s.datasets.append(dataset) agency.save()
xv, yv = torch.meshgrid([ torch.arange(0.0, 20.0) * 4.0 / 20.0, torch.arange(0.0, 20.0) * 4.0 / 20.0 ]) (v1, v2) = vector_field(xv, yv) # generate training data x_train = 4.0 * torch.rand(n_data, 2) x1_train = x_train[:, 0].unsqueeze(1) x2_train = x_train[:, 1].unsqueeze(1) (v1_t, v2_t) = vector_field(x1_train, x2_train) y1_train = v1_t + 0.1 * torch.randn(x1_train.size()) y2_train = v2_t + 0.1 * torch.randn(x1_train.size()) training_set = models.Dataset(x1_train, x2_train, y1_train, y2_train) # data loader Parameters DL_params = { 'batch_size': args.batch_size, 'shuffle': True, 'num_workers': args.num_workers, 'pin_memory': pin_memory } training_generator = data.DataLoader(training_set, **DL_params) # --------------- Set up and train the constrained model ------------------------------- criterion = torch.nn.MSELoss() optimizer = torch.optim.Adam( model.parameters(), lr=0.01) # these should also be setable parameters if args.scheduler == 1:
import sys import clusters import features import models import utility import vectors availableModels = { 'base': models.Baseline, 'grid': models.Grid, 'skyline': models.Skyline } availableCompleteDataSets = { 'w16': models.Dataset('W16', 6000, features.filterTwentiethImage), 'w17': models.Dataset('W17', 3000, features.filterTenthImage), 'w18': models.Dataset('W18', 4000, features.filterTwentiethImage) } availableDataSets = { 'sample': models.Dataset('Sample', 200, lambda path: True), 'example': models.Dataset('Sample', 200, lambda path: True), **availableCompleteDataSets } availableExtractors = {'sift': features.SIFT} availableVectorNormalizers = { 'sum': vectors.normalizeSum, 'length': vectors.normalizeLength } availableVectorComparators = { 'euclid': vectors.differenceEuclidianDistance, 'cosine': vectors.differenceCosineSimilarity