示例#1
0
def elbow(dataset, measureDistance, clusterFilter, extractor, output):
    # Measure various distortion amounts for difference cluster counts for a dataset
    initialize = models.Dataset(dataset, 1, clusterFilter)
    initialize.initializeFeatures(extractor, output)
    estimate = clusters.calculateClusterCount(initialize.featuresSize)
    clusterCounts = clusters.nearRoundNumbers(estimate, measureDistance)
    distortions = {}
    for count in clusterCounts:
        model = models.Dataset(dataset, max(int(count), 1), clusterFilter)
        model.initialize(extractor, output)
        distortions[count] = model.vocabularyInertia
    return distortions
示例#2
0
def create_dataset():
    form = fm.Create_Dataset(fl.request.form)
    if fl.request.method == 'GET':
        return fl.render_template('leadanalyst/dataset/create.html', form=form)
    else:
        # process
        if form.validate_on_submit():
            # submit to db
            user = fl.session['logged_in']
            ds = ml.Dataset(name=form.name.data, \
            search_type=int(form.search_type.data), user_created=int(user), \
            year_start=form.year_start.data, year_end=form.year_end.data, \
            owner=user)
            ds_auth_owner = ml.Dataset_Authd(access=user)
            ds_auth = ml.Dataset_Authd(access=form.access.data)
            time_created = datetime.datetime.now()
            ds.time_created = time_created
            ds.access.append(ds_auth)
            ds.access.append(ds_auth_owner)
            db.db_session.add(ds)
            db.db_session.commit()
            fl.flash("Added the dataset!", "success")
            return fl.render_template('leadanalyst/dataset/create.html',
                                      form=form)
        else:
            fl.flash("Dataset not created", "error")
            return fl.render_template('leadanalyst/dataset/create.html',
                                      form=form)
示例#3
0
文件: dataimport.py 项目: jo-dy/plotr
def create_model(csvdata, dtitle):
    fieldNames = csvdata['headers']
    fieldValues = csvdata['content']
    nFields = csvdata['n']
    if nFields == 0:
        return None
    d = models.Dataset()
    d.title = dtitle
    d.data_id = generate_unique_id()
    d.save()
    for n in range(nFields):
        f = models.DataFields()
        f.fieldName = fieldNames[n]
        f.save()
        for m in range(len(fieldValues[n])):
            v = models.DataValues()
            v.order = m
            v.value = fieldValues[n][m]
            v.save()
            f.values.add(v)
        f.fieldType = f.infer_type()
        f.save()
        d.fields.add(f)
    d.save()
    return d
示例#4
0
def create_dataset():
    import datetime as dt
    from sqlalchemy import func, asc
    form = fm.Create_Dataset(fl.request.form)
    if fl.request.method == 'GET':
        return fl.render_template('leadanalyst/dataset/create.html', form=form)
    else:
        # process
        if form.validate_on_submit():
            # submit to db
            user = fl.session['logged_in']
            ds = ml.Dataset(name=form.name.data, \
            search_type=int(form.search_type.data), user_created=int(user), \
            year_start=form.year_start.data, year_end=form.year_end.data, \
            owner=user, freq=int(form.freq.data))
            ds_auth_owner = ml.Dataset_Authd(access=user)
            ds_auth = ml.Dataset_Authd(access=form.access.data)
            time_created = datetime.datetime.now()
            ds.time_created = time_created
            ds.access.append(ds_auth)
            ds.access.append(ds_auth_owner)
            db.db_session.add(ds)
            fl.flash("Added the dataset!", "success")
            # now break up this into the correct amount of tasks
            freq_list, start, end = get_time_list(form.year_start.data, \
            form.year_end.data, form.freq.data)
            ds_id = ml.Dataset.query.order_by(asc(ml.Dataset.id)).first()
            if ds_id == None:
                ds_id = 1
            else:
                ds_id = ds_id.id
            for i in range(0, len(freq_list), 1):
                # create a task for every frequency object

                t_cur = ml.Tasks()
                t_cur.nickname = freq_list[i]
                t_cur.date_created = dt.datetime.now()
                t_cur.dataset_owner = int(ds_id)
                t_cur.date_start = start[i]
                t_cur.date_end = end[i]
                t_cur.who_assigned = int(user)
                t_cur.stage = 1
                t_cur.num_inv_found = 0
                t_cur.num_inv_progressing = 0
                t_cur.state = State.Working
                db.db_session.add(t_cur)

            db.db_session.commit()
            return fl.render_template('leadanalyst/dataset/create.html',
                                      form=form)
        else:
            # return str(form.freq.data)
            fl.flash("Dataset not created", "error")
            fl.flash(str(form.errors), 'error')
            return fl.render_template('leadanalyst/dataset/create.html',
                                      form=form)
示例#5
0
 def add_dataset(self, company, agency, subagency_name, dataset_name,
                 dataset_url, rating):
     dataset = models.Dataset(datasetName=dataset_name,
                              datasetURL=dataset_url,
                              rating=rating,
                              usedBy=company)
     if subagency_name == '':
         agency.datasets.append(dataset)
     else:
         for s in agency.subagencies:
             if subagency_name == s.name:
                 s.datasets.append(dataset)
     agency.save()
xv, yv = torch.meshgrid([
    torch.arange(0.0, 20.0) * 4.0 / 20.0,
    torch.arange(0.0, 20.0) * 4.0 / 20.0
])
(v1, v2) = vector_field(xv, yv)

# generate training data
x_train = 4.0 * torch.rand(n_data, 2)
x1_train = x_train[:, 0].unsqueeze(1)
x2_train = x_train[:, 1].unsqueeze(1)

(v1_t, v2_t) = vector_field(x1_train, x2_train)
y1_train = v1_t + 0.1 * torch.randn(x1_train.size())
y2_train = v2_t + 0.1 * torch.randn(x1_train.size())

training_set = models.Dataset(x1_train, x2_train, y1_train, y2_train)

# data loader Parameters
DL_params = {
    'batch_size': args.batch_size,
    'shuffle': True,
    'num_workers': args.num_workers,
    'pin_memory': pin_memory
}
training_generator = data.DataLoader(training_set, **DL_params)

# ---------------  Set up and train the constrained model -------------------------------
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(
    model.parameters(), lr=0.01)  # these should also be setable parameters
if args.scheduler == 1:
示例#7
0
import sys

import clusters
import features
import models
import utility
import vectors

availableModels = {
    'base': models.Baseline,
    'grid': models.Grid,
    'skyline': models.Skyline
}
availableCompleteDataSets = {
    'w16': models.Dataset('W16', 6000, features.filterTwentiethImage),
    'w17': models.Dataset('W17', 3000, features.filterTenthImage),
    'w18': models.Dataset('W18', 4000, features.filterTwentiethImage)
}
availableDataSets = {
    'sample': models.Dataset('Sample', 200, lambda path: True),
    'example': models.Dataset('Sample', 200, lambda path: True),
    **availableCompleteDataSets
}
availableExtractors = {'sift': features.SIFT}
availableVectorNormalizers = {
    'sum': vectors.normalizeSum,
    'length': vectors.normalizeLength
}
availableVectorComparators = {
    'euclid': vectors.differenceEuclidianDistance,
    'cosine': vectors.differenceCosineSimilarity