-
Notifications
You must be signed in to change notification settings - Fork 0
/
func.py
1635 lines (1426 loc) · 63.6 KB
/
func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import print_function, division
import numpy as np
import sys
import os
from scipy.optimize import curve_fit
from scipy.odr import *
import scipy.fftpack as sft
import peakutils
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import LeaveOneOut
def kde1d(qs, bandwidth=None, **kwargs):
"""Return a `pym.curve` object which is the kde of `qs`.
:param list qs: a list of observations
:param float bandwidth: the bandwidth of the KDE
:return: a curve with that KDE
:rtype: `pym.curve`
"""
if bandwidth is None:
bandwidths = 10 ** np.linspace(-1, 1, 100)
grid = GridSearchCV(KernelDensity(kernel='gaussian'),
{'bandwidth': bandwidths},
cv=LeaveOneOut())
grid.fit(x[:, None])
bandwidth = grid.best_params_['bandwidth']
kde = KernelDensity(bandwidth=bandwidth, kernel='gaussian')
kde.fit(x[:, None])
# score_samples returns the log of the probability density
logprob = kde.score_samples(x_d[:, None])
curve = pym.curve(x_d, np.exp(logprob))
return curve(x_d, np.exp(logprob), **kwargs)
def log10space(x1, x2, N=10):
"""Return `N` values between `x1` and `x2`, spaced by their `log10` values.
Numpy's logspace is silly because it wants you to put in the first exponent
and last exponent, which means you have to call log10 anyways. This is my
shortcut.
:param float x1: start value
:param float x2: end value
:param int N: Number of values to create, default `10`
:return: values log10 spaced between `x1` and `x2`
:rtype: list
"""
return np.power(10., np.linspace(np.log10(x1), np.log10(x2), N))
class curve(object):
r"""An object to expose some numerical methods and plotting tools.
A ``curve`` object takes any two dimensional dataset and its uncertainty
(both in the :math:`x` and :math:`y` direction). Each data set includes
:math:`x` and :math:`y` data and uncertainty associated with that, as well
as a name and a data shape designation (whether this is smooth data or
binned).
There exist three ways to add uncertainty to the measurements. The first
is to define an array or list of values that define the absolute
uncertainty at each ``x``. The second is to define a list of tuples that
define the lower and upper absolute uncertainty at each ``x``,
respectively. The final way is to define a two dimensional array, where the
first row is the lower absolute uncertainty at each ``x``, and the second
row is the upper absolute uncertainty at each ``x``.
:param list-like x: The ordinate data of the curve
:param list-like u_x: The uncertainty in the ordinate data of the curve
:param list-like y: The abscissa data of the curve
:param list-like u_y: The uncertainty in the abscissa data of the curve
:param str name: The name of the data set, used for plotting, etc.
:param str data: The type of data, whether 'smooth' or 'binned'. This
parameter affects the interpolation (and in turn, many other functions)
by determining what the value is between data points. For smooth data,
linear interpolation is enacted to find values between points, for
binned data, constant interpolation is used.
:return: the ``curve`` object.
:rtype: curve
"""
###########################################################################
# Data Input - tests in tests/test_data_input.py
###########################################################################
def __init__(self, x, y, name='', u_x=None, u_y=None, data='smooth'):
self.plot_kwargs = {}
self.name = name
self.data = data
self.epsilon = 0.05
self.fit_transpose = False
self.binned_data_x = None
self.binned_data_y = None
# assert that x and y are 1d lists of same size
if isinstance(x, list):
self.x = np.array(x)
elif isinstance(x, float):
self.x = np.array(list(x))
else:
self.x = x
if isinstance(y, list):
self.y = np.array(y)
elif isinstance(y, float):
self.y = np.array(list(y))
else:
self.y = y
if isinstance(u_x, list):
self.u_x = np.array(u_x)
else:
self.u_x = u_x
if isinstance(u_y, list):
self.u_y = np.array(u_y)
else:
self.u_y = u_y
self.sort()
def set_plot_kwargs(self, **kwargs):
r"""Add or replace values in the plot kwargs dictionary."""
self.plot_kwargs.update(kwargs)
def rename(self, name):
r"""Rename the current curve."""
self.name = name
return self
def sort(self):
r"""Sort the list depending on the :math:`x` coordinate.
``sort()`` sorts all of the data input to the curve so that it is
ordered from decreasing :math:`x` to increasing :math:`x`.
:return: the ``curve`` object, but it has been sorted in-place.
:rtype: curve
"""
idx = self.x.argsort()
self.x = self.x[idx]
self.y = self.y[idx]
if self.u_x is not None:
if len(self.u_x.shape) > 1:
if self.u_x.shape[1] == len(self.x):
self.u_x = self.u_x[:, idx]
else:
self.u_x = self.u_x[idx, :]
else:
self.u_x = self.u_x[idx]
if self.u_y is not None:
if len(self.u_y.shape) > 1:
if self.u_y.shape[1] == len(self.y):
self.u_y = self.u_y[:, idx]
else:
self.u_y = self.u_y[idx, :]
else:
self.u_y = self.u_y[idx]
def add_data(self, x, y, u_x=None, u_y=None):
"""Add data to the already populated x and y.
:param list-like x: The ordinate data to add to the already populated
curve object.
:param list-like y: The abscissa data to add to the already populated
curve object.
:param list-like u_x: The uncertainty in the ordinate data to be added.
:param list-like u_y: The uncertainty in the abscissa data to be added.
:return: A curve object with the added data, fully sorted.
:rtype: curve
"""
if isinstance(x, float):
x = [x]
y = [y]
self.x = np.append([float(item) for item in self.x], list(x))
self.y = np.append([float(item) for item in self.y], list(y))
if self.u_x is not None:
self.u_x = np.append([float(item) for item in self.u_x], list(u_x))
if self.u_y is not None:
self.u_y = np.append([float(item) for item in self.u_y], list(u_y))
self.sort()
def copy(self, name=None):
r"""Perform a deep copy of the curve and passes it out to
another ``curve`` object so that it can be manipulated out-of-place.
:return: a copy of the ``curve`` object calling the function
:rtype: curve
"""
newx = self.x.copy()
newy = self.y.copy()
newuy = None
newux = None
if self.u_y is not None:
newuy = self.u_y.copy()
if self.u_x is not None:
newux = self.u_x.copy()
if name is not None:
newname = name
else:
newname = self.name
return curve(newx, newy, u_y=newuy, u_x=newux, data=self.data,
name=newname)
def crop(self, y_min=None, y_max=None, x_min=None, x_max=None,
replace=None):
r"""Crop the data within the specified rectange.
``crop(y_min, y_max, x_min, x_max, replace)`` will find any data
points that fall outside of the rectangle with corners at
``(x_min, y_min)`` to ``(x_max, y_max)`` and replace it with the value
specified as ``return``.
:param float x_min: A value for which any values with :math:`x<x_{min}`
will be replaced with the value ``replace``.
:param float x_max: A value for which any values with :math:`x>x_{max}`
will be replaced with the value ``replace``.
:param float y_min: A value for which any values with :math:`y<y_{min}`
will be replaced with the value ``replace``.
:param float y_max: A value for which any values with :math:`y>y_{max}`
will be replaced with the value ``replace``.
:param float replace: The value to replace any value outside of the
rectangle with. Default ``None``.
:return: the cropped ``curve`` object
"""
remove = [False for i in range(len(self.x))]
if y_min is not None:
for i in range(len(self.x)):
if self.y[i] < y_min:
if replace is None:
self.y[i] = y_min
elif replace is "remove":
remove[i] = True
elif isinstance(replace, float):
self.y[i] = replace
if self.u_y is not None:
if self.y[i] - self.u_y[i] < y_min:
self.u_y[i] = self.y[i] - y_min
if y_max is not None:
for i in range(len(self.x)):
if self.y[i] > y_max:
if replace is None:
self.y[i] = y_max
elif replace is "remove":
remove[i] = True
elif isinstance(replace, float):
self.y[i] = replace
if self.u_y is not None:
if self.y[i] + self.u_y[i] > y_max:
self.u_y[i] = y_max - self.y[i]
if x_min is not None:
for i in range(len(self.x)):
if self.x[i] < x_min:
if replace is None:
self.x[i] = x_min
elif replace is "remove":
remove[i] = True
elif isinstance(replace, float):
self.x[i] = replace
if x_max is not None:
for i in range(len(self.x)):
if self.x[i] > x_max:
if replace is None:
self.x[i] = x_max
elif replace is "remove":
remove[i] = True
elif isinstance(replace, float):
self.x[i] = replace
if replace == "remove":
self.x = np.delete(self.x, np.where(remove))
if self.u_x is not None:
self.u_x = np.delete(self.u_x, np.where(remove))
self.y = np.delete(self.y, np.where(remove))
if self.u_y is not None:
self.u_y = np.delete(self.u_y, np.where(remove))
return self
def find_first_above(self, y_min):
r"""Find the first point with y value above the given value y.
:param float y_min: the comparitor value
:returns: the tuple (x, y) which is the first in ``x`` space where
``y`` is above the given y_min
"""
i = 0
while i < len(self.x):
if self.y[i] > y_min:
return self.x[i], self.y[i]
i += 1
return (np.nan, np.nan)
def rebin(self, x=None):
r"""Redistribute the curve along a new set of x values.
``rebin(x)`` takes a list-like input of new points on the ordinate and
redistributes the abscissa so that the x values are only on those
points. For continuous/smooth data, this simply interpolates the
previous curve to the new points. For binned data, this integrates
between left bin points and redistributes the fraction of data between
those points.
:param list x: the new x values to redistribute the curve. If binned,
this indicates the left edge
:returns: the curve object with redistributed values
"""
if self.data == 'smooth':
newy = [self.at(_x) for _x in x]
#newuy = [self.u_y_at(_x) for _x in x]
elif self.data == 'binned':
bin_widths = [x2 - x1 for x1, x2 in zip(x[:-1], x[1:])]
#print (bin_widths[::5])
# assume the last bin has the same width
bin_widths = bin_widths + [bin_widths[-1]]
#print (bin_widths[::5])
newy = [self.integrate(x_min=_x, x_max=_x + bw)
for _x, bw in zip(x, bin_widths)]
#print (newy[::5])
self.x = np.array(x)
self.y = np.array(newy)
#self.u_y = np.array(newuy)
self.sort()
return self
def decimate(self, R=None, length=None):
r"""Remove all but every ``R`` th point in the curve.
:param int R: An integer value telling how often to save a point.
:param int length: *Alternate*, an integer telling how big you
want the final array.
:return: the decimated ``curve`` object
"""
if length is not None:
R = (len(self.x) / length) + 1
self.y = self.y[::R]
self.x = self.x[::R]
if self.u_x is not None:
self.u_x = self.u_x[::R]
if self.u_y is not None:
self.u_y = self.u_y[::R]
return self
###########################################################################
# Data Retrieving and Interpolation - tests in tests/test_data_interp.py
###########################################################################
def inrange(self, x):
"""Check if a point is within the range of data.
:param float x: The data point to check if it is in the range of the
existing curve data.
:return: Whether or not the data is in the range of the curve data.
:rtype: bool
"""
if x >= self.x.min() and x <= self.x.max():
return True
else:
return False
def at(self, x, extrapolation=True):
""" ``at(x)`` finds a value at x.
``at(x)`` uses interpolation or extrapolation to determine the value
of the curve at a given point, :math:`x`. The function first checks
if :math:`x` is in the range of the curve. If it is in the range, the
function calls :py:func:`interpolate` to determine the value. If it is
not in the range, the function calls :py:func:`extrapolate` to
determine the value.
:param float x: The coordinate of which the value is desired.
:returns: the value of the curve at point :math:`x`
:rtype: float
"""
if isinstance(x, float):
x = [x]
y = np.ones_like(x)
for index, xi in zip(range(len(x)), x):
if isinstance(xi, int):
xi = float(xi)
if xi in self.x:
y[index] = self.y[np.argwhere(self.x == xi).flatten()]#list(self.x).index(xi)]
else:
if xi > np.min(self.x) and xi < np.max(self.x.max()):
if self.data == 'binned':
_, y[index] = self.find_nearest_down(xi)
else:
# if it is in the data range, interpolate
y[index] = self.interpolate(xi)
else:
if extrapolation:
# if it is not in the data range, extrapolate
y[index] = self.extrapolate(xi)
else:
y[index] = np.nan
if len(y) == 1:
y = y[0]
return y
def __call__(self, x):
"""Returns the value of the function at point ``x``."""
return self.at(x)
def u_y_at(self, x, dx=0.0):
r""" ``u_y_at(x)`` finds a the uncertainty of a value at x.
``u_y_at(x)`` uses interpolation or extrapolation to determine the
uncertainty of the value of the curve at a given point, :math:`x`. The
function first checks if :math:`x` is in the range of the curve. If it
is in the range, the function calls :py:func:`interpolate` and
:py:func:`propogate_error` to find the uncertainty of the point. If it
is not in the range, the function calls :py:func:`extrapolate` and
:py:func:`propogate_error` to determine the value.
We use the following equation to perform the interpolation:
.. math::
y\left(x\right) = \left(x-x_{\downarrow}\right)
\frac{\left(y_{\uparrow}-y_{\downarrow}\right)}
{\left(x_{\uparrow}-x_{\downarrow}\right)}
And using the *error propagation formula* from (Knoll, 1999), which is
.. math::
\sigma_{\zeta}^{2} =
\left(\frac{\partial\zeta}{\partial x}\right)^{2}\sigma_{x}^{2}
+
\left(\frac{\partial\zeta}{\partial y}\right)^{2}\sigma_{y}^{2}
for a derived value :math:`\zeta`, we can apply this to interpolation
and get:
.. math::
\sigma_{y}^{2} =
\left(\frac{\partial y}{\partial x}\right)^{2}\sigma_{x}^{2}
+
\left(\frac{\partial y}{\partial x_{\downarrow}}\right)^{2}
\sigma_{x_{\downarrow}}^{2}
+
\left(\frac{\partial y}{\partial x_{\uparrow}}\right)^{2}
\sigma_{x_{\uparrow}}^{2}
+
\left(\frac{\partial y}{\partial y_{\downarrow}}\right)^{2}
\sigma_{y_{\downarrow}}^{2}
+
\left(\frac{\partial y}{\partial y_{\uparrow}}\right)^{2}
\sigma_{y_{\uparrow}}^{2}
and, performing the derivatives, we can get:
.. math::
\sigma_{y}^{2}=\left(\frac{\left(y_{\uparrow}-y_{\downarrow}\right)}
{\left(x_{\uparrow}-x_{\downarrow}\right)}\right)^{2}
\sigma_{x}^{2}+\left(-\left(x-x_{\uparrow}\right)
\frac{\left(y_{\uparrow}-y_{\downarrow}\right)}
{\left(x_{\uparrow}-x_{\downarrow}\right)^{2}}\right)^{2}
\sigma_{x_{\downarrow}}^{2}+\left(\left(x-x_{\downarrow}\right)
\frac{\left(y_{\uparrow}-y_{\downarrow}\right)}{
\left(x_{\uparrow}-x_{\downarrow}\right)^{2}}\right)^{2}
\sigma_{x_{\uparrow}}^{2}\\+\left(-\frac{\left(x-x_{\downarrow}
\right)}{\left(x_{\uparrow}-x_{\downarrow}\right)}\right)^{2}
\sigma_{y_{\downarrow}}^{2}+\left(\frac{
\left(x-x_{\downarrow}\right)}{\left(x_{\uparrow}-x_{\downarrow}
\right)}\right)^{2}\sigma_{y_{\uparrow}}^{2}
Finally, if we take :math:`m=\frac{\left(y_{\uparrow}-y_{\downarrow}
\right)}{\left(x_{\uparrow}-x_{\downarrow}\right)}`, and
:math:`\Delta\xi=\frac{\left(x-x_{\downarrow}\right)}{\left(x_{
\uparrow}-x_{\downarrow}\right)}`, we can get:
.. math::
\sigma_{y}^{2}=m^{2}\left[\sigma_{x}^{2}+
\sigma_{y_{\downarrow}}^{2}+\sigma_{y_{\uparrow}}^{2}+
\Delta\xi^{2}\left(\sigma_{x_{\downarrow}}^{2}+
\sigma_{x_{\uparrow}}^{2}\right)\right]
and the square root of that is the uncertainty.
.. math::
\sigma_{y}=m\sqrt{\sigma_{x}^{2}+\sigma_{y_{\downarrow}}^{2}+
\sigma_{y_{\uparrow}}^{2}+\Delta\xi^{2}\left(
\sigma_{x_{\downarrow}}^{2}+\sigma_{x_{\uparrow}}^{2}\right)}
Note that if an uncertainty in x is not supplied, that the first term
will go to zero, giving
.. math::
\require{cancel}
\sigma_{y}=m\sqrt{\cancel{\sigma_{x}^{2}}
+\sigma_{y_{\downarrow}}^{2}+
\sigma_{y_{\uparrow}}^{2}+\Delta\xi^{2}\left(
\sigma_{x_{\downarrow}}^{2}+\sigma_{x_{\uparrow}}^{2}\right)}
:param float x: The coordinate of which the value is desired.
:param float dx: *Optional* The uncertainty in the x coordinate
requested, given in the above equations as :math:`\sigma_{x}`.
:returns: :math:`\sigma_{y}`, the uncertainty of the value of the curve
at point :math:`x`
:rtype: float
"""
if isinstance(x, float):
x = [x]
u_y = np.ones_like(x)
for index, xi in zip(range(len(x)), x):
if xi in self.x:
u_y[index] = self.u_y[list(self.x).index(xi)]
else:
if xi > self.x.min() and xi < self.x.max():
# if it is in the data range, interpolate
xi1, y1, uxi1, uy1 = self.find_nearest_down(xi, error=True)
xi2, y2, uxi2, uy2 = self.find_nearest_up(xi, error=True)
if uxi1 is None:
uxi1 = 0.0
if uxi2 is None:
uxi2 = 0.0
m = (y2 - y1) / (xi2 - xi1)
dxi = (xi - xi1) / (xi2 - xi1)
u_y[index] = m * np.sqrt(dx**2. + uy1**2. + uy2**2. +
dxi**2. * (uxi1**2. + uxi2**2.))
else:
# if it is not in the data range, extrapolate
u_y[index] = self.extrapolate(xi)
# find the uncertainty extrapolated
if len(u_y) == 1:
u_y = u_y[0]
return u_y
def find_in_data(self, y):
idx = np.argwhere(self.y == y)
return self.x[idx]
def max(self):
return np.nanmax(self.y)
def min(self):
return np.nanmin(self.y)
def find_max(self):
return self.find_in_data(self.max())
def find_min(self):
return self.find_in_data(self.min())
def find(self, y):
r""" ``find(y)`` finds values of :math:`x` that have value :math:`y`
This function takes a parameter :math:`y` and finds all of the ordinate
coordinates that have that value. Basically, this is a root-finding
problem, but since we have a linear interpolation, the actual
root-finding is trivial. The function first finds all intervals in
the dataset that include the value :math:`y`, and then solves the
interpolation to find those :math:`x` values according to
.. math::
x=\left(y-y_{\downarrow}\right)\frac{\left(x_{\uparrow}
-x_{\downarrow}\right)}{\left(y_{\uparrow}-y_{\downarrow}\right)}
+x_{\downarrow}
:param float y: the value which ordinate values are desired
:return: a list of :math:`x` that have value :math:`y`
:rtype: list
"""
# take the entire list of y's and subtract the value. those intervals
# where the sign changes are where the function crosses the value
y_p = y - self.y
# find where the sign change is
(interval, ) = np.where(np.multiply(y_p[:-1], y_p[1:]) < 0.)
# using those intervals, create y_0s and y_1s
y_left = self.y[interval]
y_right = self.y[interval + 1]
x_left = self.x[interval]
x_right = self.x[interval + 1]
# generate an array by solving the point slope form equation
x_where = np.zeros_like(y_left)
for i in range(len(y_left)):
x_where[i] = ((x_right[i] - x_left[i]) / (y_right[i] - y_left[i]))\
* (y - y_left[i]) + x_left[i]
# return all of those intervals
return x_where
def interpolate(self, x):
r""" ``interpolate(x)`` finds the value of a point in the curve range.
The function uses linear interpolation to find the value of a point in
the range of the curve data. First, it uses
:py:func:`find_nearest_down` and :py:func:`find_nearest_up` to find the
two points comprising the interval which :math:`x` exists in. Then, it
casts the linear interpolation as a line in point slope form and solves
.. math::
y=\frac{\left(y_{1}-y_{0}\right)}{\left(x_{1}-x_{0}\right)}
\left(x-x_{0}\right)+y_{0}
:param float x: The coordinate of the desired value.
:return: the value of the curve at :math:`x`
:rtype: float
"""
# if not, we have to do linear interpolation
# find closest value below
x_down, y_down = self.find_nearest_down(x)
# find the closest value above
x_up, y_up = self.find_nearest_up(x)
# find the percentage of x distance between
x_dist = (x - x_down)
# find the slope
m = (y_up - y_down) / (x_up - x_down)
# find the y value
y = y_down + x_dist * m
#print ('interp', y_down, x_dist, y_up, x_down)
return y
def extrapolate(self, x):
r""" ``extrapolate(x)`` finds value of a point out of the curve range.
The function uses linear extrapolation to find the value of a point
without the range of the already existing curve. First, it determines
whether the requested point is above or below the existing data. Then,
it uses :py:func:`find_nearest_down` or :py:func:`find_nearest_up` to
find the nearest point. Then it uses :py:func:`find_nearest_down` or
:py:func:`find_nearest_up` to find the second nearest point. Finally,
it solves the following equation to determine the value
.. math::
y=\frac{\left(y_{\downarrow}-y_{\downarrow \downarrow}
\right)}{\left(x_{\downarrow}-x_{\downarrow \downarrow}\right)}
\left(x-x_{\downarrow}\right)+y_{\downarrow}
:param float x: the ordinate of the value requested
:returns: the value of the curve at point :math:`x`
:rtype: float
"""
# find whether the point is above or below
if x < np.min(self.x):
x1 = self.x[0]
x2 = self.x[1]
elif x > np.max(self.x):
x1 = self.x[-1]
x2 = self.x[-2]
# now find the slope
m = (self.at(x1) - self.at(x2)) / (x1 - x2)
# find the y change between closest point and new point
dy = m * (x - x1)
# find the new point
#print("extrap", x1, self.at(x1), dy)
return self.at(x1) + dy
def find_nearest_down(self, x, error=False):
r""" ``find_nearest_down(x)`` will find the actual data point that is
closest in negative ``x``-distance to the data point ``x`` passed to
the function.
:param float x: The data point ``x`` which to find the closest value
below.
:param bool error: If true, the u_x and u_y will be returned at that
point, even if they are ``None``.
:return: a tuple containing the ``x`` and ``y`` value of the data point
immediately below in ``x`` value to the value passed to the
function, optionally containing the ``u_x`` and ``u_y`` value.
"""
x = float(x)
dx = x - self.x
dx[dx < 0.] = np.inf
idx = np.abs(dx).argmin()
if error:
ux = None
uy = None
if self.u_x is not None:
ux = self.u_x[idx]
if self.u_y is not None:
uy = self.u_y[idx]
return (self.x[idx], self.y[idx], ux, uy)
else:
return (self.x[idx], self.y[idx])
def find_nearest_up(self, x, error=False):
r""" ``find_nearest_up(x, error=False)`` will find the actual data
point that is closest in positive ``x``-distance to the data point
``x`` passed to the function.
:param float x: The data point ``x`` which to find the closest value
above.
:param bool error: If true, the u_x and u_y will be returned at that
point, even if they are ``None``.
:return: a tuple containing the ``x`` and ``y`` value of the data point
immediately above in ``x`` value to the value passed to the
function, optionally containing the ``u_x`` and ``u_y`` value.
:rtype: tuple
"""
x = float(x)
dx = x - self.x
dx[dx > 0.] = np.inf
idx = np.abs(dx).argmin()
if error:
ux = None
uy = None
if self.u_x is not None:
ux = self.u_x[idx]
if self.u_y is not None:
uy = self.u_y[idx]
return (self.x[idx], self.y[idx], ux, uy)
else:
return (self.x[idx], self.y[idx])
def mean(self, *args, **kwargs):
return self.average(*args, **kwargs)
def average(self, xmin=None, xmax=None):
r""" ``average()`` will find the average ``y``-value across the entire
range.
:param float xmin: The lower bound of ``x``-value to include in the
average. Default: ``x.min()``
:param float xmax: The upper bound of ``x``-value to include in the
average. Default: ``x.max()``
:return: A float value equal to
.. math::
\bar{y} = \frac{\int_{x_{min}}^{x_{max}} y dx}
{\int_{x_{min}}^{x_{max}} dx}
:rtype: float
"""
if xmin is None:
xmin = self.x.min()
if xmax is None:
xmax = self.x.max()
mean = self.integrate(xmin, xmax) \
/ (xmax - xmin)
return mean
@staticmethod
def round_to_amt(num, amt):
r""" ``round_to_amt`` is a static method that round a number to an
arbitrary interval
Given a number ``num`` such as :math:`1.2` and an amount ``amt`` such
as :math:`0.25`, ``round_to_amt`` would return :math:`1.20` because
that is the closest value downward on a :math:`0.25` wide grid.
:param float num: the number to be rounded.
:param float amt: the amount to round the number to.
:returns: the number after it has been rounded.
"""
return float(np.floor(num / amt)) * amt
def rolling_avg(self, bin_width=0.1):
r""" ``rolling_avg(bin_width)`` redistributes the data on a certain bin
width, propogating the error needed.
If we have data in an array such as
.. math::
\left[\begin{array}{c}
\vec{x}\\
\vec{y}
\end{array}\right]=\left[\begin{array}{cccc}
0.1 & 0.75 & 1.75 & 1.9\\
1.0 & 2.0 & 3.0 & 4.0
\end{array}\right]
and we want to see the data only on integer bins, we will return
.. math::
\left[\begin{array}{c}
\vec{x}\\
\vec{y}
\end{array}\right]=\left[\begin{array}{cc}
0.0 & 2.0\\
1.5 & 3.5
\end{array}\right]
This function will also return the uncertainty in each bin, taking into
account both the uncertainty of each value in the bin, as well as the
uncertainty caused by standard deviation within the bin itself. This
can be expressed by
.. math::
\left[\begin{array}{c}
\vec{x}\\
\vec{y}\\
\vec{u}_{x}\\
\vec{u}_{y}
\end{array}\right]=\left[\begin{array}{c}
\frac{\sum_{x\text{ in bin}}x}{N_{x}}\\
\frac{\sum_{x\text{ in bin}}y}{N_{y}}\\
\frac{\sum_{x\text{ in bin}}\sqrt{
\left(\frac{\text{bin width}}{2}\right)^{2}
+\text{mean}\left(\sigma_{x}\right)^{2}}}{N_{x}}\\
\frac{\sum_{x\text{ in bin}}\sqrt{\sigma_{y}^{2}
+stdev_{y}^{2}}}{N_{x}}
\end{array}\right]
:param float bin_width: The width in which the redistribution will
happen.
:rtype: The redistributed curve.
"""
new = self.copy()
new_x = []
new_y = []
new_u_x = []
new_u_y = []
# find the start bin (round the minimum value to the next lowest bin)
bin_start = self.round_to_amt(np.min(self.x), bin_width)
# then, for everything in a certain bin:
for left in np.arange(bin_start, np.max(self.x) + bin_width,
bin_width):
# average to find the mean
sample = [y for x, y in zip(self.x, self.y)
if x >= left and x < left + bin_width]
if self.u_y is not None:
u_sample = [u_y for x, u_y in zip(self.x, self.u_y)
if x >= left and x < left + bin_width]
if self.u_x is not None:
u_left = [u_x for x, u_x in zip(self.x, self.u_x)
if x >= left and x < left + bin_width]
if len(sample) > 0:
# determine the standard deviation
std = np.std(sample)
# propagate the uncertainty and add the standard deviation
if self.u_y is not None:
u_y_sample = np.sqrt(np.mean(u_sample)**2 + std**2)
else:
u_y_sample = std
if self.u_x is not None:
u_x_sample = np.sqrt((bin_width / 2.)**2 +
(np.mean(u_left))**2)
else:
u_x_sample = bin_width / 2.
# add to new distribution
new_x.extend([left + bin_width / 2.])
new_y.extend([np.mean(sample)])
new_u_y.extend([u_y_sample])
new_u_x.extend([u_x_sample])
new.x = np.array(new_x)
new.y = np.array(new_y)
new.u_y = np.array(new_u_y)
new.u_x = np.array(new_u_x)
new.sort()
return new
###########################################################################
# Data Integration and Normalization - tests in tests/test_data_integ.py
###########################################################################
def integrate(self, x_min=None, x_max=None, quad='lin', numpoints=None):
r""" ``integrate`` integrates under the curve.
``integrate`` will integrate under the given curve, providing the
result to :math:`\int_{x_{min}}^{x_{max}}`. ``x_min`` and ``x_max``
can be provided to change the range of integration. ``quad`` can also
be provided to change the quadrature, but the only quadrature currently
supported is ``'lin'`` which uses trapezoidal rule to integrate the
curve.
:param float x_min: *Optional* the bottom of the range to be integrated.
:param float x_max: *Optional* the top of the range to be integrated.
:param str quad: *Optional* the "quadrature" to be used for numerical
integration.
:returns: the result of the integration.
"""
if self.data != 'binned':
if x_min is None:
x_min = np.min(self.x)
if x_max is None:
x_max = np.max(self.x)
if numpoints is None:
numpoints = 10
if quad == 'lin':
return self.trapezoidal(x_min=x_min, x_max=x_max, quad=quad, numpoints=numpoints)
elif quad == 4:
return self.boole(x_min=x_min, x_max=x_max, numpoints=numpoints)
else:
return self.bin_int(x_min, x_max)
def boole(self, x_min, x_max, numpoints):
_sum = 0.0
pts = np.linspace(x_min, x_max, numpoints)
for a, b in zip(pts[:-1], pts[1:]):
deltax = (b - a) / 4.0
_sum += (b - a) * (7.0 * self.at(a) + 32.0 * self.at(a + deltax)
+ 12.0 * self.at(a + 2.0 * deltax)
+ 32.0 * self.at(a + 3.0 * deltax)
+ self.at(b)) / 90.0
return _sum
def bin_int(self, x_min=None, x_max=None):
r""" ``bin_int`` integrates a bar chart.
``bin_int`` is a convenience function used through the class when
calling ``integrate``. It integrates for curves that have the
``.data`` property set to ``'binned'``. It does this simply by summing
the bin width and bin heights, such that
.. math::
\int_{x_{min}}^{x_{max}} \approx \sum_{i=1,\dots}^{N} \Delta x
\cdot y
Note that this function assumes that the last bin has the same bin
width as the penultimate bin width. This could be remedied in certain
ways, but I'm not sure which to choose yet.
:param float x_min: *Optional* the bottom of the range to be integrated.
:param float x_max: *Optional* the top of the range to be integrated.
:returns: the result of the integration.
"""
bin_widths = [x2 - x1 for x1, x2 in zip(self.x[:-1], self.x[1:])]
# assume the last bin has the same width
bin_widths = bin_widths + [bin_widths[-1]]
bin_heights = np.nan_to_num(self.y)
if x_min is None:
x_min = np.nanmin(self.x)
if x_max is None:
x_max = np.nanmax(self.x) + bin_widths[-1]
integral = 0.0
# for each bin, find what fraction is within the range
for _x, bw, bh in zip(self.x, bin_widths, bin_heights):
if bw > 0:
fractional_bin_width = np.nansum([np.nanmin([_x + bw, x_max]),
- np.nanmax([_x, x_min])])# / bw
else:
fractional_bin_width = 0.0
if fractional_bin_width < 0:
fractional_bin_width = 0.0
integral += fractional_bin_width * bh
return integral
def prime(self, epsilon=None):
if epsilon is None:
epsilon = (np.max(self.x) - np.min(self.x)) / 100.0
_x = self.x
_y = [self.derivative(__x, epsilon=epsilon) for __x in _x]
return curve(_x, _y, name=self.name + "_prime")
def find_zero_crossings(self):
zcs = []
for _x, y1, y2 in zip(self.x[:-11], self.y[:-1], self.y[1:]):
if np.sign(y1) != np.sign(y2):
zcs.append(_x)
return zcs
def derivative(self, x, epsilon=None):
r""" ``derivative(x)`` takes the derivative at point :math:`x`.
``derivative(x)`` takes the derivative at point provided ``x``, using a
surrounding increment of :math:`\varepsilon`, provided by ``epsilon``.
``epsilon`` has a default value of :math:`\min \frac{\Delta x}{100}`,
but you can specify this smaller if your points are closer. Because
we're currently only using linear integration, this won't change a thing
as long as its smaller than the change in your ordinate variable.
:param float x: The ordinate to take the derivative at.
:param float epsilon: The :math:`\Delta x` around the point at
:math:`x` used to calculate the derivative.
:returns: the derivative at point ``x``
"""
if epsilon is None:
xs = self.x[1:] - self.x[:-1]
epsilon = np.min(np.abs(xs)) / 100.
return (self.at(x + epsilon) - self.at(x - epsilon)) / (2. * epsilon)
def trapezoidal(self, x_min, x_max, quad='lin', numpoints=None):
r""" ``trapezoidal()`` uses the trapezoidal rule to integrate the curve.
``trapezoidal(x_min, x_max)`` integrates the curve using the
trapezoidal rule, i.e.
.. math::
\int_{x_{min}}^{x_{max}}y dx \approx
\sum_{i=1,\dots}^{N} \left(x_{\uparrow} - x_{\downarrow}\right)
\cdot \left( \frac{y_{\downarrow} + y_{uparrow}}{2}\right)
Right now, it uses :math:`10 \times N_{x}` points to integrate between
values, but that is completely arbitrary and I'll be looking into
changing this. There is also the ability to pass ``quad`` to the
function as ``'log'`` **CURRENTLY FAILING** and it will calculate the
trapezoids in logarithmic space, giving exact integrals for exponential
functions.
:param float x_min: the left bound of integration.
:param float x_max: the right bound of integration.
:param str quad: the type of quadrature to use, currently only ``'lin'``
or ``'log'``
:returns: the integral of the curve from trapezoidal rule.
"""
if numpoints is None:
numpoints = len(self.x) * 10