示例#1
0
def putFeatures(data):
    """ Saves features to be later used for prediction and learning"""
    if not isinstance(data['data'], list) and not len(data['data']) == 1:
        raise Exception(data['data'])

    problem = data['problem']
    features = db.Features(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart'])
    features.put(pd.Series(data['data'][0]))
示例#2
0
def doPrediction(data, problem, txn=None):
    features = db.Features(data['user'], data['hub'], data['track'],
                           problem['chrom'], problem['chromStart']).get()

    if features.empty:
        return False

    model = db.Prediction('model').get()

    if not isinstance(model, dict):
        return False

    colsToDrop = db.Prediction('badCols').get()

    featuresDropped = features.drop(labels=colsToDrop)

    prediction = predictWithFeatures(featuresDropped, model)

    if prediction is None:
        return False
    return prediction
示例#3
0
def getDataPoints():
    if not check():
        return

    dataPoints = pd.DataFrame()

    for key in db.ModelSummaries.db_key_tuples():
        modelSum = db.ModelSummaries(*key).get()
        if modelSum.empty:
            continue

        if modelSum['regions'].max() < 1:
            continue

        withPeaks = modelSum[modelSum['numPeaks'] > 0]

        noError = withPeaks[withPeaks['errors'] < 1]

        logPenalties = np.log10(noError['penalty'].astype(float))

        featuresDb = db.Features(*key)
        featuresTxn = db.getTxn()
        features = featuresDb.get(txn=featuresTxn, write=True)

        for penalty in logPenalties:
            datapoint = features.copy()

            datapoint['logPenalty'] = penalty

            dataPoints = dataPoints.append(datapoint, ignore_index=True)

        featuresTxn.commit()

    # TODO: Save datapoints, update ones which have changed, not all of them every time

    Y = dataPoints['logPenalty']
    X = dataPoints.drop('logPenalty', 1)

    return dropBadCols(X), Y
示例#4
0
def getFeatures(data):
    problem = data['problem']
    features = db.Features(data['user'], data['hub'], data['track'], problem['chrom'], problem['chromStart'])
    return features.get()