def index_data(): """Simple example with equity index return data Segment the daily rates of return of a pair of equity indices between April 23rd, 1993 and July 14th, 2003. The indices are the Cotation Assistee en Continu (CAC) and the Deutscher Aktienindex (DAX). The rates of return are computed based on the daily closing price of each index. """ # Store the absolute path to the file containing the data. abspath = path.realpath(path.join(os.getcwd(), 'data')) abspath = path.join(abspath, 'equity-index-data.csv') time = [] val = [] # Read the data. with open(abspath, 'r') as fileobj: reader = csv.reader(fileobj, delimiter=',') row = reader.next() name = [] for field in row: if field != 'date': name.append(field.upper()) for row in reader: rec = [] for field in row: try: rec.append(float(field)) except: time.append(dp.parse(field)) val.append(rec) # Format the data. X = np.ones([len(val), 1]) Y = np.array(val).reshape([len(val), len(name)]) # Select daily returns from CAC and DAX. ind = ['CAC', 'DAX'] ind = [name.index(i) for i in ind] name = [name[i] for i in ind] Y = Y[:, ind] kwargs = {'ratefun': 1.0e-2, # 1% expected hazard rate 'mu': np.zeros([1, len(name)]), # 0% expected rate of return 'sigma': 1.0e-4 * np.eye(len(name)), # 1% expected volatility 'maxhypot': 50, 'minprob': 1.0e-16} # Compute the posterior probabilities over segment length hypotheses. Then, # find the most likely sequence segmentation. bcdm_probabilities = Bcdm(alg='sumprod', **kwargs) bcdm_segments = Bcdm(alg='maxprod', **kwargs) # Update the segment length hypotheses given the data. for x, y in zip(X, Y): bcdm_probabilities.update(x, y) bcdm_segments.update(x, y) # Recover the hypothesis probabilities and back-trace to find the most # likely segmentation of the sequence. hypotheses_probability = bcdm_probabilities.infer() segments = bcdm_segments.infer() # Create subplots with shared X-axis. fig, (upperaxes, loweraxes) = plt.subplots(2, sharex=True) fig.subplots_adjust(hspace=0) # Plot the response data. t = np.arange(1, len(val) + 1) upperaxes.plot(t, Y[:]) # Plot the posterior probabilities over segment length hypotheses. plot_probability(loweraxes, hypotheses_probability, cmap=plt.cm.gray) # Plot the changes detected by the segmentation algorithm as alternating # coloured spans. Plot the true segment boundaries as vertical lines. for ax in (upperaxes, loweraxes): plt.sca(ax) plot_segment_span(t, segments, facecolor='y', alpha=0.2, edgecolor='none') ax.set_xlim([0, len(val)]) fig.canvas.set_window_title('Equity index data') upperaxes.set_title('Equity index data') upperaxes.set_ylabel('Rate of return') loweraxes.set_xlabel('Trading day') loweraxes.set_ylabel('Hypothesis probability') upperaxes.legend(['CAC', 'DAX'], loc='upper left')
def non_sinusoidal(): """Simple example with triangular wave data.""" rate = 0.001 omega = 1.0e-3 * np.eye(2) sigma = 1.0e-6 * np.eye(3) samples = 1000 basis = lambda x: np.array([[1.0, x]]) # Create triangulare wave functions. square_wave = lambda x: np.sign(np.sin(x)) sawtooth_wave = lambda a, x: 2 * ((x/a) - np.floor(0.5 + (x/a))) triangle_wave = lambda a, x: 2 * np.abs(sawtooth_wave(a, x)) - 1 # Create input and outputs. X = np.linspace(0, 3*2*np.pi, samples).reshape(samples, 1) Y = np.hstack([square_wave(X), triangle_wave(2*np.pi, X - np.pi/2), sawtooth_wave(2*np.pi, X + np.pi/3)]) # Create Gaussian noise. Y += np.vstack([0.025 * np.random.randn(samples), 0.1 * np.random.randn(samples), 0.05 * np.random.randn(samples)]).T # Determine location of true boundaries. true_boundaries = np.hstack((np.pi * np.arange(0, 7), np.pi * np.arange(0, 6) + np.pi/2, 2*np.pi * np.arange(0, 4) + np.pi - np.pi/3)) true_boundaries = np.sort(true_boundaries[true_boundaries <= max(X)]) # Compute the posterior probabilities over segment length hypotheses. Then, # find the most likely segmentation of the sequence. bcdm_probabilities = Bcdm(alg='sumprod', ratefun=rate, basisfunc=basis, omega=omega, sigma=sigma) bcdm_segments = Bcdm(alg='maxprod', ratefun=rate, basisfunc=basis, omega=omega, sigma=sigma) # Update the segment length hypotheses given the data. for x, y in zip(X, Y): y = np.array([y]) basis_t = lambda xt: basis(xt - x) bcdm_probabilities.update(x, y, basisfunc=basis_t) bcdm_segments.update(x, y, basisfunc=basis_t) # Recover the hypothesis probabilities and back-trace to find the most # likely segmentation of the sequence. hypotheses_probability = bcdm_probabilities.infer() segments = bcdm_segments.infer() # Create subplots with shared X-axis. fig, (upperaxes, loweraxes) = plt.subplots(2, sharex=False) # Plot the response data. for i in range(Y.shape[1]): upperaxes.plot(X, Y[:, i]) # Plot the posterior probabilities over segment length hypotheses. plot_probability(loweraxes, hypotheses_probability, cmap=plt.cm.gray) # Plot the changes detected by the segmentation algorithm as alternating # coloured spans. Plot the true segment boundaries as vertical lines. plt.sca(upperaxes) plot_segment_span(X, segments, facecolor='y', alpha=0.2, edgecolor='none') plot_segment_boundaries(true_boundaries, color='k', linestyle=':') plt.sca(loweraxes) plot_segment_span(segments, facecolor='y', alpha=0.2, edgecolor='none') plot_segment_boundaries(samples * true_boundaries / max(X), color='k', linestyle=':') upperaxes.set_xlim([0, max(X)]) loweraxes.set_xlim([0, len(X)]) fig.canvas.set_window_title('Triangular wave data') upperaxes.set_title('Triangular wave data') upperaxes.set_ylabel('Signal values') loweraxes.set_xlabel('Observation') loweraxes.set_ylabel('Hypothesis probability')
def well_data(): """Simple example with nuclear response data collected a well drilling Segment the well log data used in Fearnhead and Clifford (1996). This data consist of measurements of the nuclear magnetic response of underground rocks, collected during the drilling of a well bore. The data are composed of piecewise constant segments, each segment relating to a stratum with a single type of rock. The jump discontinuities between segments occur at the boundaries between rock strata. P. Fearnhead and P. Clifford, "Online Inference for Hidden Markov Models via Particle Filters," Journal of the Royal Statistical Society: Series B (Statistical Methodology), Vol. 65, Issue 4, pp. 887-889, November 2003. """ loc = 1.0e5 scale = 1.0e4 rate = 1.0e-2 val = [] # Store the absolute path to the file containing the data. abspath = path.realpath(path.join(os.getcwd(), 'data')) abspath = path.join(abspath, 'well-data.txt') # Read the data. with open(abspath, 'r') as file: for line in file: try: val.append(float(line)) except: pass # Format the data. X = np.ones([len(val), 1]) Y = np.array(val).reshape([len(val), 1]) loc = np.array([(loc, )]) scale = np.array([(scale, )]) kwargs = {'ratefun': rate, 'mu': loc, 'sigma': scale} # Compute the posterior probabilities over segment length hypotheses. Then, # find the most likely sequence segmentation. bcdm_probabilities = Bcdm(alg='sumprod', **kwargs) bcdm_segments = Bcdm(alg='maxprod', **kwargs) # Update the segment length hypotheses given the data. for x, y in zip(X, Y): bcdm_probabilities.update(x, y) bcdm_segments.update(x, y) # Recover the hypothesis probabilities and back-trace to find the most # likely segmentation of the sequence. hypotheses_probability = bcdm_probabilities.infer() segments = bcdm_segments.infer() # Create subplots with shared X-axis. fig, (upperaxes, loweraxes) = plt.subplots(2, sharex=True) fig.subplots_adjust(hspace=0) # Plot the response data. t = np.arange(1, len(val) + 1) upperaxes.plot(t, Y[:]) # Plot the posterior probabilities over segment length hypotheses. plot_probability(loweraxes, hypotheses_probability, cmap=plt.cm.gray) # Plot the changes detected by the segmentation algorithm as alternating # coloured spans. Plot the true segment boundaries as vertical lines. for ax in (upperaxes, loweraxes): plt.sca(ax) plot_segment_span(t, segments, facecolor='y', alpha=0.2, edgecolor='none') ax.set_xlim([0, len(val)]) fig.canvas.set_window_title('Well log data') upperaxes.set_title('Well log data') upperaxes.set_ylabel('Nuclear magnetic response') loweraxes.set_xlabel('Measurement number') loweraxes.set_ylabel('Hypothesis probability')
def random_data(): """Simple example with synthetic data.""" # Set the size of the problem. numpred = 2 numresp = 2 numpoint = 200 numseg = 5 # Set parameters for generating the data. coeffparam = 0.5 noiseparam = 5.0 # Generate a sequence of segments and, for each segment, generate a set of # predictor-response data. segbound, X, Y = gen_random_data(numpred, numresp, numpoint, numseg, omega=coeffparam*np.eye(numpred), eta=noiseparam) rate = float(numseg) / float(numpoint - numseg) # Compute the posterior probabilities over segment length hypotheses. Then, # find the most likely segmentation of the sequence. bcdm_probabilities = Bcdm(alg='sumprod', ratefun=rate) bcdm_segments = Bcdm(alg='maxprod', ratefun=rate) # Update the segment length hypotheses given the data. for x, y in zip(X, Y): bcdm_probabilities.update(x, y) bcdm_segments.update(x, y) # Recover the hypothesis probabilities and back-trace to find the most # likely segmentation of the sequence. hypotheses_probability = bcdm_probabilities.infer() segments = bcdm_segments.infer() # Create subplots with shared X-axis. fig, (upperaxes, loweraxes) = plt.subplots(2, sharex=True) fig.subplots_adjust(hspace=0) # Plot the response data. t = np.arange(1, numpoint + 1) for i in range(numresp): upperaxes.plot(t, Y[:, i]) # Plot the posterior probabilities over segment length hypotheses. plot_probability(loweraxes, hypotheses_probability, cmap=plt.cm.gray) # Plot the changes detected by the segmentation algorithm as alternating # coloured spans. Plot the true segment boundaries as vertical lines. for ax in (upperaxes, loweraxes): plt.sca(ax) plot_segment_span(t, segments, facecolor='y', alpha=0.2, edgecolor='none') plot_segment_boundaries(t, segbound, color='k', linestyle=':') ax.set_xlim([0, numpoint]) fig.canvas.set_window_title('Randomly generated data') upperaxes.set_title('Randomly generated data') upperaxes.set_ylabel('Output values') loweraxes.set_xlabel('Observation') loweraxes.set_ylabel('Hypothesis probability')
def index_data(): """Simple example with equity index return data Segment the daily rates of return of a pair of equity indices between April 23rd, 1993 and July 14th, 2003. The indices are the Cotation Assistee en Continu (CAC) and the Deutscher Aktienindex (DAX). The rates of return are computed based on the daily closing price of each index. """ # Store the absolute path to the file containing the data. abspath = path.realpath(path.join(os.getcwd(), 'data')) abspath = path.join(abspath, 'equity-index-data.csv') time = [] val = [] # Read the data. with open(abspath, 'r') as fileobj: reader = csv.reader(fileobj, delimiter=',') row = reader.next() name = [] for field in row: if field != 'date': name.append(field.upper()) for row in reader: rec = [] for field in row: try: rec.append(float(field)) except: time.append(dp.parse(field)) val.append(rec) # Format the data. X = np.ones([len(val), 1]) Y = np.array(val).reshape([len(val), len(name)]) # Select daily returns from CAC and DAX. ind = ['CAC', 'DAX'] if len(ind) > 0: ind = [name.index(i) for i in ind] name = [name[i] for i in ind] Y = Y[:, ind] kwargs = { 'ratefun': 1.0e-2, # 1% expected hazard rate 'mu': np.zeros([1, len(name)]), # 0% expected rate of return 'sigma': 1.0e-4 * np.eye(len(name)), # 1% expected volatility 'maxhypot': 50, 'minprob': 1.0e-16 } # Compute the posterior probabilities over segment length hypotheses. Then, # find the most likely sequence segmentation. bcdm_probabilities = Bcdm(alg='sumprod', **kwargs) bcdm_segments = Bcdm(alg='maxprod', **kwargs) # Update the segment length hypotheses given the data. for x, y in zip(X, Y): bcdm_probabilities.update(x, y) bcdm_segments.update(x, y) # Recover the hypothesis probabilities and back-trace to find the most # likely segmentation of the sequence. hypotheses_probability = bcdm_probabilities.infer() segments = bcdm_segments.infer() # Create subplots with shared X-axis. fig, (upperaxes, loweraxes) = plt.subplots(2, sharex=True) fig.subplots_adjust(hspace=0) # Plot the response data. t = np.arange(1, len(val) + 1) upperaxes.plot(t, Y[:]) # Plot the posterior probabilities over segment length hypotheses. plot_probability(loweraxes, hypotheses_probability, cmap=plt.cm.gray) # Plot the changes detected by the segmentation algorithm as alternating # coloured spans. Plot the true segment boundaries as vertical lines. for ax in (upperaxes, loweraxes): plt.sca(ax) plot_segment_span(t, segments, facecolor='y', alpha=0.2, edgecolor='none') ax.set_xlim([0, len(val)]) fig.canvas.set_window_title('Equity index data') upperaxes.set_title('Equity index data') upperaxes.set_ylabel('Rate of return') loweraxes.set_xlabel('Trading day') loweraxes.set_ylabel('Hypothesis probability') upperaxes.legend(name, loc='upper left')
def non_sinusoidal(): """Simple example with triangular wave data.""" rate = 0.001 omega = 1.0e-3 * np.eye(2) sigma = 1.0e-6 * np.eye(3) samples = 1000 basis = lambda x: np.array([[1.0, x]]) # Create triangulare wave functions. square_wave = lambda x: np.sign(np.sin(x)) sawtooth_wave = lambda a, x: 2 * ((x / a) - np.floor(0.5 + (x / a))) triangle_wave = lambda a, x: 2 * np.abs(sawtooth_wave(a, x)) - 1 # Create input and outputs. X = np.linspace(0, 3 * 2 * np.pi, samples).reshape(samples, 1) Y = np.hstack([ square_wave(X), triangle_wave(2 * np.pi, X - np.pi / 2), sawtooth_wave(2 * np.pi, X + np.pi / 3) ]) # Create Gaussian noise. Y += np.vstack([ 0.025 * np.random.randn(samples), 0.1 * np.random.randn(samples), 0.05 * np.random.randn(samples) ]).T # Determine location of true boundaries. true_boundaries = np.hstack( (np.pi * np.arange(0, 7), np.pi * np.arange(0, 6) + np.pi / 2, 2 * np.pi * np.arange(0, 4) + np.pi - np.pi / 3)) true_boundaries = np.sort(true_boundaries[true_boundaries <= max(X)]) # Compute the posterior probabilities over segment length hypotheses. Then, # find the most likely segmentation of the sequence. bcdm_probabilities = Bcdm(alg='sumprod', ratefun=rate, basisfunc=basis, omega=omega, sigma=sigma) bcdm_segments = Bcdm(alg='maxprod', ratefun=rate, basisfunc=basis, omega=omega, sigma=sigma) # Update the segment length hypotheses given the data. for x, y in zip(X, Y): y = np.array([y]) basis_t = lambda xt: basis(xt - x) bcdm_probabilities.update(x, y, basisfunc=basis_t) bcdm_segments.update(x, y, basisfunc=basis_t) # Recover the hypothesis probabilities and back-trace to find the most # likely segmentation of the sequence. hypotheses_probability = bcdm_probabilities.infer() segments = bcdm_segments.infer() # Create subplots with shared X-axis. fig, (upperaxes, loweraxes) = plt.subplots(2, sharex=False) # Plot the response data. for i in range(Y.shape[1]): upperaxes.plot(X, Y[:, i]) # Plot the posterior probabilities over segment length hypotheses. plot_probability(loweraxes, hypotheses_probability, cmap=plt.cm.gray) # Plot the changes detected by the segmentation algorithm as alternating # coloured spans. Plot the true segment boundaries as vertical lines. plt.sca(upperaxes) plot_segment_span(X, segments, facecolor='y', alpha=0.2, edgecolor='none') plot_segment_boundaries(true_boundaries, color='k', linestyle=':') plt.sca(loweraxes) plot_segment_span(segments, facecolor='y', alpha=0.2, edgecolor='none') plot_segment_boundaries(samples * true_boundaries / max(X), color='k', linestyle=':') upperaxes.set_xlim([0, max(X)]) loweraxes.set_xlim([0, len(X)]) fig.canvas.set_window_title('Triangular wave data') upperaxes.set_title('Triangular wave data') upperaxes.set_ylabel('Signal values') loweraxes.set_xlabel('Observation') loweraxes.set_ylabel('Hypothesis probability')
def random_data(): """Simple example with synthetic data.""" # Set the size of the problem. numpred = 2 numresp = 2 numpoint = 200 numseg = 5 # Set parameters for generating the data. coeffparam = 0.5 noiseparam = 5.0 # Generate a sequence of segments and, for each segment, generate a set of # predictor-response data. segbound, X, Y = gen_random_data(numpred, numresp, numpoint, numseg, omega=coeffparam * np.eye(numpred), eta=noiseparam) rate = float(numseg) / float(numpoint - numseg) # Compute the posterior probabilities over segment length hypotheses. Then, # find the most likely segmentation of the sequence. bcdm_probabilities = Bcdm(alg='sumprod', ratefun=rate) bcdm_segments = Bcdm(alg='maxprod', ratefun=rate) # Update the segment length hypotheses given the data. for x, y in zip(X, Y): bcdm_probabilities.update(x, y) bcdm_segments.update(x, y) # Recover the hypothesis probabilities and back-trace to find the most # likely segmentation of the sequence. hypotheses_probability = bcdm_probabilities.infer() segments = bcdm_segments.infer() # Create subplots with shared X-axis. fig, (upperaxes, loweraxes) = plt.subplots(2, sharex=True) fig.subplots_adjust(hspace=0) # Plot the response data. t = np.arange(1, numpoint + 1) for i in range(numresp): upperaxes.plot(t, Y[:, i]) # Plot the posterior probabilities over segment length hypotheses. plot_probability(loweraxes, hypotheses_probability, cmap=plt.cm.gray) # Plot the changes detected by the segmentation algorithm as alternating # coloured spans. Plot the true segment boundaries as vertical lines. for ax in (upperaxes, loweraxes): plt.sca(ax) plot_segment_span(t, segments, facecolor='y', alpha=0.2, edgecolor='none') plot_segment_boundaries(t, segbound, color='k', linestyle=':') ax.set_xlim([0, numpoint]) fig.canvas.set_window_title('Randomly generated data') upperaxes.set_title('Randomly generated data') upperaxes.set_ylabel('Output values') loweraxes.set_xlabel('Observation') loweraxes.set_ylabel('Hypothesis probability')