Пример #1
0
def kfold_cross_validate(directory, k):

	print 'Beginning k-fold cross validation...'

	subset_list = breakup_training(directory, k)
	results = [[] for i in xrange(10)] # outer array = each model, inner array = results per iteration

	# loop through each subset list, run training + validation
	for i in xrange( len(subset_list) ):

		# split the training docs into training + validation
		validation_set = set( subset_list[i] )
		remaining = subset_list[:i] + subset_list[i + 1:]
		train_set = set( [index for subset in remaining for index in subset] )

		# no resampling 
		hmm_model_0 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=True, resample=False) # smooth both
		hmm_model_1 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=True, resample=False) # smooth emission only
		hmm_model_2 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=False, resample=False) # smooth transition only
		hmm_model_3 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=False, resample=False) # no smoothing

		results[0].append( cross_validate_hmm(directory, hmm_model_0, validation_set) )
		results[1].append( cross_validate_hmm(directory, hmm_model_1, validation_set) )
		results[2].append( cross_validate_hmm(directory, hmm_model_2, validation_set) )
		results[3].append( cross_validate_hmm(directory, hmm_model_3, validation_set) )

		# with resampling
		hmm_model_4 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=True, resample=True) # smooth both
		hmm_model_5 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=True, resample=True) # smooth emission only
		hmm_model_6 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=False, resample=True) # smooth transition only
		hmm_model_7 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=False, resample=True) # no smoothing

		results[4].append( cross_validate_hmm(directory, hmm_model_4, validation_set) )
		results[5].append( cross_validate_hmm(directory, hmm_model_5, validation_set) )
		results[6].append( cross_validate_hmm(directory, hmm_model_6, validation_set) )
		results[7].append( cross_validate_hmm(directory, hmm_model_7, validation_set) )

		# baseline with and without resampling
		baseline_1 = baseline.Baseline(directory, train_set, resample=False)
		baseline_2 = baseline.Baseline(directory, train_set, resample=True)

		results[8].append( cross_validate_baseline(directory, baseline_1, validation_set) )
		results[9].append( cross_validate_baseline(directory, baseline_2, validation_set) )

		# status update
		print str((float(i + 1) / k) * 100) + '% complete'

	# return the avg results tuple for each model that we train/test across all k-fold cross-validation rounds
	return [get_avg_results(model_results, k) for model_results in results]
Пример #2
0
    def __init__(self, model_type=None, model_params=""):
        if (model_type == None):
            self.model = None
            self.vectorizer = None
            return

        if (model_type == "baseline"):
            self.model = baseline.Baseline()
        elif (model_type == "svm"):
            self.model = eval("SVC(" + model_params + ")")
            #self.model = SVC(kernel="linear")
        elif (model_type == "knn"):
            self.model = eval("KNeighborsClassifier(" + model_params + ")")
            #self.model = KNeighborsClassifier(n_neighbors=3)
        elif (model_type == "naive_bayes"):
            self.model = MultinomialNB()
        elif (model_type == "decision_trees"):
            self.model = DecisionTreeClassifier(random_state=0)
        elif (model_type == "log_regression"):
            self.model = eval("LogisticRegression(" + model_params + ")")
        elif (model_type == "perceptron"):
            self.model = eval("Perceptron(" + model_params + ")")
        else:
            print >> sys.stderr, "Model of type " + model_type + " is not supported."

        self.vectorizer = DictVectorizer(sparse=True)
Пример #3
0
def baselineClassify(train, test):
    """
    The function returns the tagging prediction by the baseline system as
    a dictionary.
    """
    model = baseline.Baseline(train)
    lines = preprocess.readFile(test)
    prediction = {'PER': [], 'LOC': [], 'ORG': [], 'MISC': []}
    lineNum = 1

    for line in lines:
        if (lineNum % 3) == 1:
            #Line with Tokens
            tokens = line.strip().split()
            tags = BLdebug(model.assignTags(tokens))
        elif (lineNum % 3) == 0:
            #Line with indexes
            indexes = line.strip().split()
            preClass = None
            firstIdx = None
            lastIdx = None
            NEcontinues = False

            for i in range(len(tags)):
                bioTag = tags[i][:1]

                if bioTag == 'B':
                    if NEcontinues:
                        #Previous tag ends
                        prediction[preClass].append(firstIdx + '-' + lastIdx)
                    preClass = tags[i][2:]
                    firstIdx = indexes[i]
                    lastIdx = indexes[i]
                    NEcontinues = True

                elif bioTag == 'I':
                    curClass = tags[i][2:]
                    assert NEcontinues and curClass == preClass
                    lastIdx = indexes[i]

                else:  # bioTag == 'O'
                    if NEcontinues:
                        prediction[preClass].append(firstIdx + '-' + lastIdx)
                    preClass = None
                    firstIdx = None
                    lastIdx = None
                    NEcontinues = False

        lineNum += 1

    return prediction
Пример #4
0
    def __init__(self,
                 speclist,
                 xtype='frequency',
                 xarr=None,
                 force=False,
                 **kwargs):

        if xarr is None:
            self.xarr = speclist[0].xarr
        else:
            self.xarr = xarr

        self.units = speclist[0].units
        self.header = speclist[0].header
        self.parse_header(self.header)

        for spec in speclist:
            if not isinstance(spec, Spectrum):
                raise TypeError(
                    "Must create an ObsBlock with a list of spectra.")
            if not np.array_equal(spec.xarr, self.xarr):
                if not force:
                    raise ValueError("Mismatch between X axes in ObsBlock")
            if spec.units != self.units:
                raise ValueError("Mismatched units")

        if force:
            self.speclist = [
                interpolation.interp(spec, self) for spec in speclist
            ]
        else:
            self.speclist = speclist
        self.nobs = len(self.speclist)

        # Create a 2-dimensional array of the data
        self.data = np.array([sp.data
                              for sp in self.speclist]).swapaxes(0,
                                                                 1).squeeze()
        self.error = np.array([sp.error
                               for sp in self.speclist]).swapaxes(0,
                                                                  1).squeeze()

        self.plotter = plotters.Plotter(self)
        self._register_fitters()
        self.specfit = fitters.Specfit(self, Registry=self.Registry)
        self.baseline = baseline.Baseline(self)
Пример #5
0
    def __init__(self, speclist, xunits='GHz', **kwargs):
        print "Creating spectra"
        speclist = list(speclist)
        for ii, spec in enumerate(speclist):
            if type(spec) is str:
                spec = Spectrum(spec)
                speclist[ii] = spec

        self.speclist = speclist

        print "Concatenating data"
        self.xarr = units.SpectroscopicAxes(
            [sp.xarr.as_unit(xunits) for sp in speclist])
        self.xarr.units = xunits
        self.xarr.xtype = units.unit_type_dict[xunits]
        self.data = np.ma.concatenate([sp.data for sp in speclist])
        self.error = np.ma.concatenate([sp.error for sp in speclist])
        self._sort()

        self.header = pyfits.Header()
        for spec in speclist:
            for key, value in spec.header.items():
                try:
                    self.header[key] = value
                except (ValueError, KeyError):
                    warn("Could not update header KEY=%s to VALUE=%s" %
                         (key, value))

        self.plotter = plotters.Plotter(self)
        self._register_fitters()
        self.specfit = fitters.Specfit(self, Registry=self.Registry)
        self.baseline = baseline.Baseline(self)

        self.units = speclist[0].units
        for spec in speclist:
            if spec.units != self.units:
                raise ValueError("Mismatched units")

        # Special.  This needs to be modified to be more flexible; for now I need it to work for nh3
        self.plot_special = None
        self.plot_special_kwargs = {}
Пример #6
0
    def __init__(self,
                 filename=None,
                 filetype=None,
                 xarr=None,
                 data=None,
                 error=None,
                 header=None,
                 doplot=False,
                 maskdata=True,
                 plotkwargs={},
                 xarrkwargs={},
                 **kwargs):
        """
        Create a Spectrum object.

        Must either pass in a filename or ALL of xarr, data, and header, plus
        optionally error.

        kwargs are passed to the file reader

        Parameters
        ----------
        filename : string
            The file to read the spectrum from.  If data, xarr, and error are
            specified, leave filename blank.
        filetype : string
            Specify the file type (only needed if it cannot be automatically
            determined from the filename)
        xarr : `units.SpectroscopicAxis` or `np.ndarray`
            The X-axis of the data.  If it is an np.ndarray, you must pass
            `xarrkwargs` or a valid header if you want to use any of the unit
            functionality.
        data : `np.ndarray`
            The data array (must have same length as xarr)
        error : `np.ndarray` 
            The error array (must have same length as the data and xarr arrays)
        header : `pyfits.Header` or dict
            The header from which to read unit information.  Needs to be a
            `pyfits.Header` instance or another dictionary-like object with the
            appropriate information
        maskdata : boolean
            turn the array into a masked array with all nan and inf values masked
        doplot : boolean
            Plot the spectrum after loading it?
        plotkwargs : dict
            keyword arguments to pass to the plotter
        xarrkwargs : dict
            keyword arguments to pass to the SpectroscopicAxis initialization
            (can be used in place of a header)

        Examples
        --------

        >>> sp = pyspeckit.Spectrum(data=np.random.randn(100),
                    xarr=np.linspace(-50, 50, 100), error=np.ones(100)*0.1, 
                    xarrkwargs={'unit':'km/s', 'refX':4.829, 'refX_units':'GHz',
                        'xtype':'VLSR-RAD'}, header={})

        >>> xarr = pyspeckit.units.SpectroscopicAxis(np.linspace(-50,50,100),
                    units='km/s', refX=6562.83, refX_units='angstroms')
        >>> data = np.random.randn(100)*5 + np.random.rand(100)*100
        >>> err = np.sqrt(data/5.)*5. # Poisson noise
        >>> sp = pyspeckit.Spectrum(data=data, error=err, xarr=xarr, header={}) 
        
        >>> # if you already have a simple fits file
        >>> sp = pyspeckit.Spectrum('test.fits')
        """

        if filename:
            if filetype is None:
                suffix = filename.rsplit('.', 1)[1]
                if suffix in readers.suffix_types:
                    # use the default reader for that suffix
                    filetype = readers.suffix_types[suffix][0]
                    reader = readers.readers[filetype]
                else:
                    raise TypeError("File with suffix %s is not recognized." %
                                    suffix)
            else:
                if filetype in readers.readers:
                    reader = readers.readers[filetype]
                else:
                    raise TypeError("Filetype %s not recognized" % filetype)

            self.data, self.error, self.xarr, self.header = reader(
                filename, **kwargs)

            # these should probably be replaced with registerable function s...
            if filetype in ('fits', 'tspec', 'pyfits', 'sdss'):
                self.parse_header(self.header)
            elif filetype is 'txt':
                self.parse_text_header(self.header)
            elif filetype in ('hdf5', 'h5'):
                self.parse_hdf5_header(self.header)

            if isinstance(filename, str):
                self.fileprefix = filename.rsplit(
                    '.', 1)[0]  # Everything prior to .fits or .txt
        elif xarr is not None and data is not None:
            # technically, this is unpythonic.  But I don't want to search for all 10 attributes required.
            if issubclass(type(xarr), units.SpectroscopicAxis):
                self.xarr = xarr
            else:
                self.xarr = units.SpectroscopicAxis(xarr, **xarrkwargs)
            self.data = data
            if error is not None:
                self.error = error
            else:
                self.error = data * 0
            if hasattr(header, 'get'):
                self.header = header
            else:  # set as blank
                warn("WARNING: Blank header.")
                self.header = pyfits.Header()
            self.parse_header(self.header)

        if maskdata:
            if hasattr(self.data, 'mask'):
                self.data.mask += np.isnan(self.data) + np.isinf(self.data)
                self.error.mask += np.isnan(self.data) + np.isinf(self.data)
            else:
                self.data = np.ma.masked_where(
                    np.isnan(self.data) + np.isinf(self.data), self.data)
                self.error = np.ma.masked_where(
                    np.isnan(self.data) + np.isinf(self.data), self.error)

        self.plotter = plotters.Plotter(self)
        self._register_fitters()
        self.specfit = fitters.Specfit(self, Registry=self.Registry)
        self.baseline = baseline.Baseline(self)
        self.speclines = speclines
        self._sort()

        # Special.  This needs to be modified to be more flexible; for now I need it to work for nh3
        self.plot_special = None
        self.plot_special_kwargs = {}

        if doplot: self.plotter(**plotkwargs)
Пример #7
0
    def slice(self,
              start=None,
              stop=None,
              units='pixel',
              copy=True,
              preserve_fits=False):
        """Slicing the spectrum

        .. WARNING:: this is the same as cropping right now, but it returns a
            copy instead of cropping inplace
        
        Parameters
        ----------
        start : numpy.float or int
            start of slice
        stop :  numpy.float or int
            stop of slice
        units : str
            allowed values are any supported physical unit, 'pixel'
        copy : bool
            Return a 'view' of the data or a copy?
        preserve_fits : bool
            Save the fitted parameters from self.fitter?
        """

        if units in ('pixel', 'pixels'):
            start_ind = start
            stop_ind = stop
        else:
            x_in_units = self.xarr.as_unit(units)
            start_ind = x_in_units.x_to_pix(start)
            stop_ind = x_in_units.x_to_pix(stop)
        if start_ind > stop_ind: start_ind, stop_ind = stop_ind, start_ind
        spectrum_slice = slice(start_ind, stop_ind)

        if copy:
            sp = self.copy()
        else:
            sp = self
        sp.data = sp.data[spectrum_slice]
        if sp.error is not None:
            sp.error = sp.error[spectrum_slice]
        sp.xarr = sp.xarr[spectrum_slice]

        if copy:
            # create new specfit / baseline instances (otherwise they'll be the wrong length)
            sp._register_fitters()
            sp.baseline = baseline.Baseline(sp)
            sp.specfit = fitters.Specfit(sp, Registry=sp.Registry)
        else:
            # inplace modification
            sp.baseline.crop(start_ind, stop_ind)
            sp.specfit.crop(start_ind, stop_ind)

        if preserve_fits:
            sp.specfit.modelpars = self.specfit.modelpars
            sp.specfit.parinfo = self.specfit.parinfo
            sp.baseline.baselinepars = self.baseline.baselinepars
            sp.baseline.order = self.baseline.order

        return sp
Пример #8
0
the values on a 2.9in Waveshare e-Paper display.
"""
import machine
import esp32
import utime
import ccs811
import bme280
import screen
import battery
import baseline
import config

_i2c = machine.I2C(scl=config.scl, sda=config.sda, freq=100000)
_rtc = machine.RTC()
_bat = battery.Battery(config.battery)
_baseline = baseline.Baseline()


def run():
    """Main entry point to execute this program."""
    try:
        bme = bme280.BME280(i2c=_i2c, mode=bme280.BME280_OSAMPLE_4)
        scr = screen.Screen(config)

        if _delete_ccs811_baseline_requested():
            _baseline.delete()

        if _is_first_run():
            # 20 runs (minutes), p9 of datasheet
            _set_runs_to_condition(20)
            ccs = ccs811.CCS811(_i2c, mode=ccs811.CCS811.DRIVE_MODE_60SEC)
Пример #9
0
import baseline as b
#import capture_data as d
#import visualize as v
import torch
import numpy as np

#Declare model
model = b.Baseline('cuda:0').to('cuda:0')

#Load in data
data = torch.as_tensor(
    np.load('../data/batch_0.npz')).float().to('cuda').permute(0, 3, 1, 2)

# checkpoint = torch.load('checkpoint.pth')
#checkpoint = torch.load('checkpoint.pth')

# model.do_train_on_vid('training_data/vid1', 100000, batch_size=4, checkpoint=checkpoint)
#model.do_train_on_vid('training_data/vid1', 100000, batch_size=8, checkpoint=checkpoint)
model.do_train(data, data, 1000, batch_size=8)
Пример #10
0
        words = pickle.load(f)
    with open('all_PoS.pickle', 'rb') as f:
        pos = pickle.load(f)

    # Clean data.
    data = utils.process_data_set(data)
    words, data = utils.handle_rare_words(words, data, 2)
    words, pos = utils.process_words_pos(words, pos)

    # Initialize training and testing set.
    training_set_size = int(0.8 * len(data))
    training_set = data[:training_set_size]
    test_set = data[training_set_size:]

    # Initialize and evaluate the baseline model.
    baseline = baseline.Baseline(pos, words, training_set)
    total_predictions, correct_predictions = evaluate(baseline.MAP, test_set)

    # Find the models accuracy.
    accuracy = (correct_predictions / total_predictions) * 100
    print('Baseline model accuracy: {}'.format(accuracy))

    # Initialize and evaluate the HMM model.
    hmm_model = hmm.HMM(pos, words, training_set)
    total_predictions, correct_predictions = evaluate(hmm_model.viterbi,
                                                      test_set)

    # Find the models accuracy.
    accuracy = (correct_predictions / total_predictions) * 100
    print('HMM model accuracy: {}'.format(accuracy))
Пример #11
0
sensor_funcs = {
    'cur': get_cur,
    'light': get_light,
    'level': get_level,
    'temp': get_temp,
    'humid': get_humid,
    'smoist': get_smoist
}

now = int(time.time())
# Convert to midnight
now += clock_start - time_since_midnight(now)

try:
    baseline_schedule = baseline.Baseline(args.baseline, now)
    now = baseline_schedule.time0
except:
    print('baseline file %s not found or parse error' % args.baseline)
    exit()

rospy.init_node('Simulator', anonymous=True)

generate_publishers()
generate_subscribers()

#print("Now: %f %s" %(now, clock_time(now)))
clock_pub.publish(rospy.Time.from_sec(now))
last_update = now

while not rospy.core.is_shutdown():
Пример #12
0
    newAsset = oldAsset.copy()
    newAsset.date = newAsset.date + datetime.timedelta(days=1)
    newAsset.todayIn = 0
    allAsset.append(newAsset)

if __name__ == "__main__":
    # 读取数据
    tradeList = pd.read_excel('/Users/test/OneDrive/Project/Stock/Datum/40047552对账单/All.xlsx')
    # 生成一个初始asset list
    allAsset = []
    initDate = datetime.datetime(2014, 8, 6) #这个是起始日期,开户第一天,有转账记录,但是没有实际金额
    asset = Asset(initDate)
    allAsset.append(asset)
    # 初始化基线统计参数,需要统计基线的净值、年度涨幅
    baseCode = '399006'
    baseline = baseline.Baseline(baseCode, initDate.strftime("%Y%m%d"))
    # 初始化持仓统计参数,需要统计净值、仓位、年度涨幅
    myStatistic = pd.DataFrame(data = np.array([1, 0, 0]).reshape(1, 3), columns = ['net', 'position', 'asset'], index = [initDate.strftime("%Y%m%d")]) # 行是日期
    myYearRaise = pd.Series() # 行是年,每年最后第一个自然日的日期,列是年内涨幅
    # 新的一天开始
    print(tradeList.shape[0] - 1)
    for dayNum in range(tradeList.shape[0] - 1):
        newDay(allAsset)
        today = allAsset[-1].date
        todayDateNum = int(today.strftime("%Y%m%d")) # 这里从对账单里面读出来的是int类型的,不是字符串,所以要转换一下
        if todayDateNum == 20151231:
            a = 1
        if todayDateNum == 20191206:
            break
        todayTradeOperateRecord = tradeList[tradeList.iloc[:, 0] == todayDateNum]
        # 处理盘中交易