示例#1
0
def _removeBaseline(spectra, roi, method, index=-1, inplace=False, **kwargs):
    if inplace:
        spectra_c = spectra
    else:
        spectra_c = spectra.copy()

    if(index == -1):  # All signals
        for i in spectra_c.index:
            new_sig, __ = rp.baseline(spectra_c.wavenumbers,
                                      spectra_c.intensity[i], roi,
                                      method, **kwargs)
            spectra_c.intensity[i] = new_sig.reshape(-1,)
    else:
        if(isinstance(index, (tuple, list, np.ndarray))):  # Multiple signals
            for i in index:
                new_sig, __ = rp.baseline(spectra_c.wavenumbers,
                                          spectra_c.intensity[i], roi,
                                          method, **kwargs)
                spectra_c.intensity[i] = new_sig.reshape(-1,)
        elif(isinstance(index, int)):  # Only 1 signal
            new_sig, __ = rp.baseline(spectra_c.wavenumbers,
                                      spectra_c.intensity[index], roi,
                                      method, **kwargs)
            spectra_c.intensity[index] = new_sig.reshape(-1,)

    if not inplace:
        return spectra_c
示例#2
0
    def test_baseline(self):

        x2 = np.arange(1,100,0.5)
        base_ori = 0.001*x2
        base_exp = rampy.funexp(x2,0.1,0.05,50.)
        base_log = rampy.funlog(x2,1.,1.,1.,1.)


        y_ori = 1.0 * np.exp(-np.log(2) * ((x2-50.0)/10.0)**2) + 0.05*np.random.randn(len(x2))

        y2 = base_ori + y_ori
        y_exp = base_exp+y_ori
        y_log = base_log+y_ori


        # need to define some fitting regions for the spline
        roi2 = np.array([[1,20],[80,100]])

        # calculating the baselines
        ycalc1, base1 = rampy.baseline(x2,y2,roi2,'poly',polynomial_order=1)
        #ycalc2, base2 = rampy.baseline(x2,y2,roi2,'gcvspline',s=0.1 )
        ycalc3, base3 = rampy.baseline(x2,y2,roi2,'unispline',s=1e0)
        ycalc4, base4 = rampy.baseline(x2,y2,roi2,'als',lam=10**7,p=0.05)
        ycalc5, base5 = rampy.baseline(x2,y2,roi2,'arPLS',lam=10**7,ratio=0.1)
        ycalc6, base6 = rampy.baseline(x2,y2,roi2,'exp',p0_exp=[0.1,0.1,45])

        # Testing the shapes
        np.testing.assert_equal(ycalc1.shape,base1.shape)
        #np.testing.assert_equal(ycalc2.shape,base2.shape)
        np.testing.assert_equal(ycalc3.shape,base3.shape)
        np.testing.assert_equal(ycalc4.shape,base4.shape)
        np.testing.assert_equal(ycalc5.shape,base5.shape)
        np.testing.assert_equal(ycalc6.shape,base6.shape)
        #np.testing.assert_equal(ycalc7.shape,base7.shape)

        # testing the baselines
        np.testing.assert_almost_equal(base_ori,base1[:,0],0)
        #np.testing.assert_almost_equal(base_ori,base2[:,0],0)
        np.testing.assert_almost_equal(base_ori,base3[:,0],0)
        np.testing.assert_almost_equal(base_ori,base4[:,0],0)
        np.testing.assert_almost_equal(base_ori,base5[:,0],0)
        #exp-log cases
        np.testing.assert_almost_equal(base_exp,base6[:,0],0)
        #np.testing.assert_almost_equal(base_log,base7[:,0],0)

        #testing the corrected data
        np.testing.assert_almost_equal(y_ori,ycalc1[:,0],1)
        #np.testing.assert_almost_equal(y_ori,ycalc2[:,0],0)
        np.testing.assert_almost_equal(y_ori,ycalc3[:,0],0)
        np.testing.assert_almost_equal(y_ori,ycalc4[:,0],0)
        np.testing.assert_almost_equal(y_ori,ycalc5[:,0],0)
        np.testing.assert_almost_equal(y_ori,ycalc6[:,0],0)
示例#3
0
def Ctreat(Ramanshift1, Cho_I, n, Cho_path):
    Cho_I0 = np.mean(Cho_I, axis=0)
    # 截取数据350~4000cm-1, SG平滑处理 #
    Cho_I_SG = sp.savgol_filter(Cho_I0[64:1014], 5, 2)
    # 去基线处理 #
    x = Ramanshift1[64:1014]
    y2 = Cho_I_SG
    roi = np.array([[350, 4000]])
    y2_arpls, base_y2 = rampy.baseline(x, y2, roi, 'arPLS', lam=10 ** 5, ratio=0.001)
    # 归一化处理 #
    Cho_I_Nor = pd.Normalization(y2_arpls)
    Cho_I_Nor_n = np.around(Cho_I_Nor, decimals=3)
    x_ = x[:, np.newaxis]
    np.savez(Cho_path + '/' + 'Ramanspectra_  (' + str(n) + ').npz', x_=x_, Ramanspectra=Cho_I_Nor_n)
    # sio.savemat(Hem_path+'/' + 'Ramanspectra_  (' + str(n) + ').mat', {'ramanshift':x_, 'ramanspectra':Hem_I_Nor})
    # with open (Hem_path + '/all.mat', 'ab') as mt:
    #     sio.savemat(mt, {'ramanspectra'+str(n):Hem_I_Nor})
    # 下面被河蟹了
    x0 = pds.DataFrame(x, columns=['Ramanshift'])
    Cho_I_Nor = pds.DataFrame(Cho_I_Nor, columns=['Ramanspectra ' + str(n)])
    pds.merge(x0, Cho_I_Nor, how='outer', left_index=True, right_index=True). \
        to_csv(Cho_path + '/' + 'Ramanspectra_  (' + str(n) + ').csv', index=False, float_format='%.3f')
    a1 = open(Cho_path + '/all.csv')
    a = pds.read_csv(a1)
    b1 = open(Cho_path + '/' + 'Ramanspectra_  (' + str(n) + ').csv')
    b = pds.read_csv(b1)
    a.merge(b, how='outer', on='Ramanshift').to_csv(Cho_path + '/all.csv', index=False, float_format='%.3f')
    return Cho_I_Nor
示例#4
0
def subtract_background(x, y, method="arPLS", lam=10 ** 6):
    """
    Subtract baseline using defaults
    """
    bir = np.array([[np.min(x), np.max(x)]])
    yc, bg = rp.baseline(x, y, bir, method, lam=lam)

    return yc.T[0]
示例#5
0
def baseline_removal(df_spectrum):
    als_total = []
    # need to define some fitting regions for the spline
    roi = np.array([[0, 100], [200, 220], [280, 290], [420, 430], [480, 500]])
    # background: a large gaussian + linear
    x = np.linspace(50, 1400, 1015)
    bkg = (60.0 * np.exp(-np.log(2) *
                         ((x - 250.0) / 200.0)**2) + 0.1 * x) * 0.001
    for i in range(len(df_spectrum)):
        ycalc_als, base_als = rampy.baseline(x,
                                             df_spectrum.iloc[i],
                                             roi,
                                             'als',
                                             lam=10**7,
                                             p=0.05)
        B = np.asarray(ycalc_als)
        als_total.append(B)
    return (als_total)
示例#6
0
    def background(self, bir, method="poly", **kwargs):
        """correct a background from the initial signal I on a map using rampy.baseline

        Parameters
        ----------
        bir : ndarray
            arrays of the backgroudn interpolation regions.
        method : string
            see rampy.baseline documentation for methods available. Default is polynomial

        All kwargs argument for rampy.baseline() will be forwarded and can be used there.

        Returns
        -------
        Background and corrected spectra area available at self.background and self.I_corrected
        """
        self.I_background = np.copy(self.I)
        self.I_corrected = np.copy(self.I)
        for i in range(len(self.X)):
            y_, bkg_ = rp.baseline(self.w, self.I[:, i], bir, method, **kwargs)
            self.I_corrected[:, i] = y_.ravel()
            self.I_background[:, i] = bkg_.ravel()
示例#7
0
def data_process(data_array):
    """
    1. Remove first 500 and last 600 value for each data
    and make a new matrix - useful_data
    2. correct base line - corrected_data
    3. using filter, smooth the graph - filtered_data
    """
    num_data, num_sensor = data_array.shape
    # print("data array is ", num_data, num_sensor)

    time = data_array[500:num_data - 600, 0]
    kiri_1 = data_array[500:num_data - 600, 1]
    kiri_2 = data_array[500:num_data - 600, 2]
    kiri_3 = data_array[500:num_data - 600, 3]
    kiri_4 = data_array[500:num_data - 600, 4]
    kiri_5 = data_array[500:num_data - 600, 5]
    kiri_6 = data_array[500:num_data - 600, 6]
    roi = np.array([[1, 2], [5, 5.5], [9.5, 10]])

    useful_data = np.column_stack(
        (kiri_1, kiri_2, kiri_3, kiri_4, kiri_5, kiri_6))
    # print("useful_data is ", useful_data.shape, type(useful_data))

    y1, base1 = rampy.baseline(time, kiri_1, roi, 'poly', polynomial_order=1)
    y2, base2 = rampy.baseline(time, kiri_2, roi, 'poly', polynomial_order=1)
    y3, base3 = rampy.baseline(time, kiri_3, roi, 'poly', polynomial_order=1)
    y4, base4 = rampy.baseline(time, kiri_4, roi, 'poly', polynomial_order=1)
    y5, base5 = rampy.baseline(time, kiri_5, roi, 'poly', polynomial_order=1)
    y6, base6 = rampy.baseline(time, kiri_6, roi, 'poly', polynomial_order=1)

    base = np.column_stack((base1, base2, base3, base4, base5, base6))

    corrected_data = np.zeros((len(time), 6))
    filtered_data = np.zeros((len(time), 6))
    for x in range(6):
        corrected_data[:, x] = useful_data[:, x] - base[:, x]
        filtered_data[:, x] = signal.savgol_filter(corrected_data[:, x], 11, 5)

    processed_data = np.column_stack((time, filtered_data))

    return processed_data
        ########################## D, G, 2D peak fitting ############################

        #load data
        xg = filedatag[:,0]
        yg_org = filedatag[:,1]
        x2d = filedata2d[:,0]
        y2d_org = filedata2d[:,1]/ratio
        
        #smooth
        yg_s = rp.smooth(xg,yg_org,method="whittaker",Lambda=10)
        y2d_s = rp.smooth(x2d,y2d_org,method="whittaker",Lambda=10)        
        
        #remove background
            #g peak
        bir = np.array([(min(xg),1030),(1900,max(xg))])
        yg_cor, background = rp.baseline(xg,yg_s,bir,"arPLS",lam=10**8)
        yg_corr = yg_cor[:,0]
        
            #2d peak
        bir = np.array([(min(x2d),2550),(3100,max(x2d))])
        y2d_cor, background = rp.baseline(x2d,y2d_s,bir,"arPLS",lam=10**8)
        y2d_corr = y2d_cor[:,0]        
        
        #fix spectrum
        y = np.concatenate((y2d_corr,yg_corr))
        x = np.concatenate((x2d,xg))
        
                
        bir = np.array([(min(x),1050.),(1880.,2300.), (2400.,2500),(3050.,max(x))])
        yg_corrected, background = rp.baseline(x,y,bir,"arPLS",lam=10**8)
        y = yg_corrected[:,0]
示例#9
0
plt.xlabel("Raman shift, cm$^{-1}$", fontsize=12)
plt.ylabel("Normalized intensity, a. u.", fontsize=12)
plt.title("Fig. 1: the raw data", fontsize=12, fontweight="bold")

# We are interested in fitting the 870-1300 cm$^{-1}$ portion of this spectrum, which can be assigned to the various
# symmetric and assymetric stretching vibrations of Si-O bonds in the SiO$_2$ tetrahedra present in the glass network
# (see the above cited litterature for details).

# # Baseline Removal
#
# First thing we notice in Fig. 1, we have to remove a baseline because this spectrum is shifted from 0 by some
# "background" scattering. For that, we can use the rp.baseline() function

bir = np.array([(1000, 1100),
                (1800, 1900)])  # The regions where the baseline will be fitted
y_corr, y_base = rp.baseline(
    x, y, bir, 'poly', polynomial_order=2)  # We fit a polynomial background.

f2 = plt.figure(2, figsize=(10, 10))
plt.plot(x, y_corr)

# Now we will do some manipulation to have the interested portion of spectrum in a single variable. We will assume
# that the errors have not been drastically affected by the correction process (in some case it can be, but this one
# is quite straightforward), such that we will use the initial relative errors stored in the "ese0" variable.

# signal selection
lb = 1100  # The lower boundary of interest
hb = 1800  # The upper boundary of interest

x_fit = x[np.where((x > lb) & (x < hb))]
y_fit = y_corr[np.where((x > lb) & (x < hb))]
示例#10
0
    def test_baseline(self):

        x2 = np.arange(1, 100, 0.5)
        base_ori = 0.001 * x2
        base_exp = rampy.funexp(x2, 0.1, 0.05, 50.)
        base_log = rampy.funlog(x2, 1., 1., 1., 1.)

        y_ori = 1.0 * np.exp(-np.log(2) * (
            (x2 - 50.0) / 10.0)**2) + 0.05 * np.random.randn(len(x2))

        y2 = base_ori + y_ori
        y_exp = base_exp + y_ori
        y_log = base_log + y_ori

        # need to define some fitting regions for the spline
        roi2 = np.array([[1, 20], [80, 100]])

        # calculating the baselines
        ycalc1, base1 = rampy.baseline(x2,
                                       y2,
                                       roi2,
                                       'poly',
                                       polynomial_order=1)
        #ycalc2, base2 = rampy.baseline(x2,y2,roi2,'gcvspline',s=0.1 )
        ycalc3, base3 = rampy.baseline(x2, y2, roi2, 'unispline', s=1e0)
        ycalc4, base4 = rampy.baseline(x2, y2, roi2, 'als', lam=10**7, p=0.05)
        ycalc5, base5 = rampy.baseline(x2,
                                       y2,
                                       roi2,
                                       'arPLS',
                                       lam=10**7,
                                       ratio=0.1)
        ycalc6, base6 = rampy.baseline(x2, y2, roi2, 'drPLS')
        ycalc7, base7 = rampy.baseline(x2,
                                       y2,
                                       roi2,
                                       'exp',
                                       p0_exp=[0.1, 0.1, 45])

        # Testing the shapes
        np.testing.assert_equal(ycalc1.shape, base1.shape)
        #np.testing.assert_equal(ycalc2.shape,base2.shape)
        np.testing.assert_equal(ycalc3.shape, base3.shape)
        np.testing.assert_equal(ycalc4.shape, base4.shape)
        np.testing.assert_equal(ycalc5.shape, base5.shape)
        np.testing.assert_equal(ycalc6.shape, base6.shape)
        np.testing.assert_equal(ycalc7.shape, base7.shape)

        # testing the baselines
        np.testing.assert_almost_equal(base_ori, base1[:, 0], 0)
        #np.testing.assert_almost_equal(base_ori,base2[:,0],0)
        np.testing.assert_almost_equal(base_ori, base3[:, 0], 0)
        np.testing.assert_almost_equal(base_ori, base4[:, 0], 0)
        np.testing.assert_almost_equal(base_ori, base5[:, 0], 0)
        np.testing.assert_almost_equal(base_ori, base6[:, 0], 0)
        #exp-log cases
        np.testing.assert_almost_equal(base_exp, base7[:, 0], 0)

        #testing the corrected data
        np.testing.assert_almost_equal(y_ori, ycalc1[:, 0], 1)
        #np.testing.assert_almost_equal(y_ori,ycalc2[:,0],0)
        np.testing.assert_almost_equal(y_ori, ycalc3[:, 0], 0)
        np.testing.assert_almost_equal(y_ori, ycalc4[:, 0], 0)
        np.testing.assert_almost_equal(y_ori, ycalc5[:, 0], 0)
        np.testing.assert_almost_equal(y_ori, ycalc6[:, 0], 0)
        np.testing.assert_almost_equal(y_exp, ycalc7[:, 0], 0)
示例#11
0
def Htreat(Ramanshift1, Hem_I, n, Hem_path):
    Hem_I0 = np.mean(Hem_I, axis=0)
    # 截取数据350~4000cm-1, SG平滑处理 #
    Hem_I_SG = sp.savgol_filter(Hem_I0[64:1014], 5, 2)
    # 去基线处理 #
    x = Ramanshift1[64:1014]
    y3 = Hem_I_SG
    roi = np.array([[350, 4000]])
    y3_arpls, base_y3 = rampy.baseline(x, y3, roi, 'arPLS', lam=10 ** 5, ratio=0.001)
    # 归一化处理 #
    Hem_I_Nor = pd.Normalization(y3_arpls)
    Hem_I_Nor_n = np.around(Hem_I_Nor, decimals=3)
    x_ = x[:, np.newaxis]
    np.savez(Hem_path + '/' + 'Ramanspectra_  (' + str(n) + ').npz', x_=x_, Ramanspectra=Hem_I_Nor_n)
    # sio.savemat(Hem_path+'/' + 'Ramanspectra_  (' + str(n) + ').mat', {'ramanshift':x_, 'ramanspectra':Hem_I_Nor})
    # with open (Hem_path + '/all.mat', 'ab') as mt:
    #     sio.savemat(mt, {'ramanspectra'+str(n):Hem_I_Nor})
    # 下面被河蟹了
    x0 = pds.DataFrame(x, columns=['Ramanshift'])
    Hem_I_Nor = pds.DataFrame(Hem_I_Nor, columns=['Ramanspectra ' + str(n)])
    pds.merge(x0, Hem_I_Nor, how='outer', left_index=True, right_index=True). \
        to_csv(Hem_path + '/' + 'Ramanspectra_  (' + str(n) + ').csv', index=False, float_format='%.3f')
    a1 = open(Hem_path + '/all.csv')
    a = pds.read_csv(a1)
    b1 = open(Hem_path + '/' + 'Ramanspectra_  (' + str(n) + ').csv')
    b = pds.read_csv(b1)
    a.merge(b, how='outer', on='Ramanshift').to_csv(Hem_path + '/all.csv', index=False, float_format='%.3f')
    return Hem_I_Nor

    # # 绘图 #
    # # 控制图形的长和宽单位为英寸,
    # # 调用figure创建一个绘图对象,并且使它成为当前的绘图对象。
    # plt.figure(num=1, figsize=(8, 4))
    # # 可以让字体变得跟好看
    # # 给所绘制的曲线一个名字,此名字在图示(legend)中显示。
    # # 只要在字符串前后添加"$"符号,matplotlib就会使用其内嵌的latex引擎绘制的数学公式。
    # # color : 指定曲线的颜色
    # # linewidth : 指定曲线的宽度
    # plt.plot(x, Tri_I_Nor, label="$Tri$", color="blue", linewidth=1)
    # plt.plot(x, Cho_I_Nor + 1, label="$Cho$", color="red", linewidth=1)
    # plt.plot(x, Hem_I_Nor + 2, label="$Hem$", color="green", linewidth=1)
    #
    # plt.figure(num=2, figsize=(8, 4))
    # # 可以让字体变得跟好看
    # # 给所绘制的曲线一个名字,此名字在图示(legend)中显示。
    # # 只要在字符串前后添加"$"符号,matplotlib就会使用其内嵌的latex引擎绘制的数学公式。
    # # color : 指定曲线的颜色
    # # linewidth : 指定曲线的宽度
    # plt.plot(Ramanshift, Tri_I0, label="$Tri$", color="blue", linewidth=1)
    # # 设置X轴的文字
    # plt.xlabel("Raman shift/cm-1")
    # # 设置Y轴的文字
    # plt.ylabel("Intensity")
    # # 设置图表的标题
    # plt.title("Raman spectrum")
    # # 设置Y轴的范围
    # plt.ylim()
    # # 显示图示
    # plt.legend()
    # # 显示出我们创建的所有绘图对象。
    # plt.show()
示例#12
0
    def detect_image(self, raman_data):

        old_raman = copy.deepcopy(raman_data)

        raman_data = np.array(list(map(float, raman_data)),
                              dtype=np.float32).reshape(-1, 1, 1)
        raman_shape = np.array(np.shape(raman_data)[0:2])
        old_width = raman_shape[0]
        old_height = raman_shape[1]

        raman = np.array(raman_data, dtype=np.float64)

        raman = (raman - (np.min(raman))) / (np.max(raman) - np.min(raman))
        raman = np.expand_dims(raman, 0)
        # raman shape = [1,1044,1,1]
        preds = self.model_rpn.predict(raman)
        # 将预测结果进行解码
        anchors = get_anchors((66, 1), old_width, old_height)
        # preds rpn的预测结果 共有三个维度
        # 第一纬度 (1,198,1) 是包含物体的置信的
        # 第二维度 (1,198,4) 是先验框的调整参数
        # 第三个维度 (1,66,1,1024) 是feature map
        preds[1][..., 3] = 1
        anchors[:, 1] = 0
        rpn_results = self.bbox_util.detection_out(preds,
                                                   anchors,
                                                   1,
                                                   confidence_threshold=0)
        R = rpn_results[0][:, 2:]

        R[:,
          0] = np.array(np.round(R[:, 0] * old_width / self.config.rpn_stride),
                        dtype=np.int32)
        R[:, 1] = np.array(np.round(R[:, 1] * old_height), dtype=np.int32)
        R[:,
          2] = np.array(np.round(R[:, 2] * old_width / self.config.rpn_stride),
                        dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * old_height), dtype=np.int32)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :-1]) < self.confidence:
                    continue

                label = np.argmax(P_cls[0, ii, :-1])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :-1])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.
                cy = y + h / 2.
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :-1]))
                labels.append(label)

        if len(bboxes) == 0:
            print("None boxes")

            Raman_shift = Xexcel('./raman_data/raw_data/RamanShift.xlsx',
                                 'Sheet1')
            Normal_data = Yexcel(
                './raman_data/raw_data/yayin/no_origin_label0.xlsx',
                'no_origin_label0')  # Normal
            Normal_data = Ygetmean(Normal_data)

            Cancer_data = np.array(old_raman)
            Raman_shift = np.array(Raman_shift)
            Normal_data = np.array(Normal_data)

            # 截取数据350~4000cm-1 #
            Lower_limit = np.max(np.where(Raman_shift < 350)) + 1
            Upper_limit = np.min(np.where(Raman_shift > 4000)) + 1

            Raman_shift_limit = Raman_shift[Lower_limit:Upper_limit]
            Cancer_data_limit = Cancer_data[Lower_limit:Upper_limit]
            Normal_data_limit = Normal_data[Lower_limit:Upper_limit]

            # SG平滑处理#
            Cancer_data_SG = sp.savgol_filter(Cancer_data_limit, 11, 2)
            Normal_data_SG = sp.savgol_filter(Normal_data_limit, 11, 2)

            # 去基线处理 #
            roi = np.array([[350, 4000]])
            Cancer_data_final, Cancer_base_Intensity = rampy.baseline(
                Raman_shift_limit,
                Cancer_data_SG,
                roi,
                'arPLS',
                lam=10**6,
                ratio=0.001)
            Normal_data_final, Normal_base_Intensity = rampy.baseline(
                Raman_shift_limit,
                Normal_data_SG,
                roi,
                'arPLS',
                lam=10**6,
                ratio=0.001)

            plt.plot(Raman_shift_limit,
                     Normal_data_final,
                     ls="-",
                     lw=2,
                     c="c",
                     label="Normal")
            plt.plot(Raman_shift_limit,
                     Cancer_data_final,
                     ls="-",
                     lw=1,
                     c="b",
                     label="Cancer")

            plt.legend()
            plt.xlabel("yayin")
            # plt.savefig('./raman_data/raw_data/yayin/yayin_alter.jpg')
            plt.show()

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / old_width
        boxes[:, 3] = boxes[:, 3] * old_height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        # 画基本图
        Raman_shift = Xexcel('./raman_data/raw_data/RamanShift.xlsx', 'Sheet1')
        Normal_data = Yexcel(
            './raman_data/raw_data/yayin/no_origin_label0.xlsx',
            'no_origin_label0')  # Normal
        Normal_data = Ygetmean(Normal_data)

        Cancer_data = np.array(old_raman)
        Raman_shift = np.array(Raman_shift)
        Normal_data = np.array(Normal_data)

        # 截取数据350~4000cm-1 #
        Lower_limit = np.max(np.where(Raman_shift < 350)) + 1
        Upper_limit = np.min(np.where(Raman_shift > 4000)) + 1

        Raman_shift_limit = Raman_shift[Lower_limit:Upper_limit]
        Cancer_data_limit = Cancer_data[Lower_limit:Upper_limit]
        Normal_data_limit = Normal_data[Lower_limit:Upper_limit]

        # SG平滑处理#
        Cancer_data_SG = sp.savgol_filter(Cancer_data_limit, 11, 2)
        Normal_data_SG = sp.savgol_filter(Normal_data_limit, 11, 2)

        # 去基线处理 #
        roi = np.array([[350, 4000]])
        Cancer_data_final, Cancer_base_Intensity = rampy.baseline(
            Raman_shift_limit,
            Cancer_data_SG,
            roi,
            'arPLS',
            lam=10**6,
            ratio=0.001)
        Normal_data_final, Normal_base_Intensity = rampy.baseline(
            Raman_shift_limit,
            Normal_data_SG,
            roi,
            'arPLS',
            lam=10**6,
            ratio=0.001)

        plt.plot(Raman_shift_limit,
                 Normal_data_final,
                 ls="-",
                 lw=2,
                 c="c",
                 label="Normal")
        plt.plot(Raman_shift_limit,
                 Cancer_data_final,
                 ls="-",
                 lw=1,
                 c="b",
                 label="Cancer")

        plt.legend()
        plt.xlabel("yayin")

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]

            # left = max(1, np.floor(left + 0.5).astype('int32'))
            # right = min(1043, np.floor(right + 0.5).astype('int32'))

            left = max(-30, np.floor(left - 0.5).astype('int32') * 4)
            right = min(4080, np.floor(right - 0.5).astype('int32') * 4)

            label = '{} {:.2f}'.format(predicted_class, score)
            label = label.encode('utf-8')

            # print(label ,"  ", "[", left , ", " , right , "]", " ", "[",  X[left-1], ",", X[right-1], "]")
            # plt.axvspan(xmin=X[left-1], xmax=X[right-1], facecolor='y', alpha=0.3)

            print(label, "  ", "[", left, ", ", right, "]")
            plt.axvspan(xmin=left, xmax=right, facecolor='y', alpha=0.3)

        plt.show()
示例#13
0
def preparing_data(dataliste,**kwargs):
    """prepare the spectra before processing by the regression techniques
    
    Parameters
    ==========
    dataliste : Pandas dataframe
        A liste containing the name of the spectra, located in a folder indicated in pathin
        
    Options
    =======
    pathin : string
        the path of the spectra. Default = './raw/'
    cutoff : ndarray
        frequencies delimiting the region of interest for the regression. Default = np.array([850.,1040.])
    scale : float
        scaling coefficient for the intensity. Default = 1000
        
    Returns
    =======
    x : ndarray 
        the x axis as np.arange(300,1290,1.0)
    record : ndarray
        the y signal corrected from temperature and excitation line effects (23 °C, 532 nm)
    record_bas2 : ndarray
        the baseline fitted to record
    x_cut : ndarray
        the x axis of the region of interest
    record_hf_no_smo : ndarray
        the y signal in teh region of interest, scaled between 0 and 1 (no smoothing)
    record_hf : ndarray
        the y signal in the region of interest, scaled between 0 and 1 and smoothed with a whittaker algorithm.
    nb_exp : int
        number of experiments (= length of dataliste)
        
    Note
    ====
    Input spectra are assumed to have decreasing frequencies. If not, comment the line `data = rp.flipsp(data)`
    """
    
    #
    # Kwargs
    #
    
    cutoff = kwargs.get("cutoff",np.array([850.,1140.])) # roi of the linear baseline
    scale = kwargs.get("scale",1000) # scaling coefficient
    pathin = kwargs.get('pathin',"./raw/")
    
    # a new x axis for interpolation (all spectra may not have been sampled with the same x)
    x = np.arange(300,1300,1.0)
    
    # for the baseline fit, we grabe two points
    roi_cutoff = np.array([[cutoff[0]-0.4,cutoff[0]+0.4],[cutoff[1]-0.4,cutoff[1]+0.4]])
    
    # number of spectra
    nb_exp = len(dataliste)
    
    # array to record the treated spectra
    record = np.ones((x.shape[0],nb_exp))
    record_bas2 = np.ones((x.shape[0],nb_exp))
    
    # loop to read the spectra
    for i in range(nb_exp):
        data = np.genfromtxt(pathin+dataliste["spectra"].iloc[i],skip_header=1)
        
        # we need an increasing x axis for the interpolators so we check this point
        data = rp.flipsp(data)
        
        # finding the minimum between 1200 and 1300 to fit a constant baseline (bas1)
        idx_roi = np.where(data[:,1] == np.min(data[(data[:,0]>1200)&(data[:,0]<1300),1]))[0][0]  
        roi_bas1 = np.array([[data[idx_roi,0] - 15.,data[idx_roi,0] + 15.]])
        y_bas1, bas1 = rp.baseline(data[:,0],data[:,1],roi_bas1,"poly",polynomial_order=0)

        # resampling
        y_norm = rp.resample(data[:,0],y_bas1[:,0],x)
        
        # correcting from temperature and excitation line effect; the tlcorrection function automatically normalize to the area.
        trash, y_long, trash = rp.tlcorrection(x,y_norm,23.0,532.0)
    
        record[:,i] = y_long[:]*scale #with a scale factor to bring values closer to 1 for representation
        
        # now grabbing the signal above the cutting baseline (bas2) in the roi_cutoff portion of spectra
        y_corr, bas2 = rp.baseline(x,y_long[:],roi_cutoff,"poly",polynomial_order=1.0)
        
        x_cut = x[(roi_cutoff[0,0]<=x)&(x<=roi_cutoff[1,1])].reshape(-1,1)
        y_cut = y_corr[(roi_cutoff[0,0]<=x)&(x<=roi_cutoff[1,1])].reshape(-1,1)
        
        # initialisation of output arrays for signal of interest
        if i == 0: 
            record_hf = np.ones((y_cut.shape[0],nb_exp))
            record_hf_no_smo = np.ones((y_cut.shape[0],nb_exp))
        
        # Getting the good signal at HF (above the cut-off baseline) + Min-Max scaling
        record_hf_no_smo[:,i]= (y_cut[:,0]-np.min(y_cut[:,0]))/(np.max(y_cut[:,0])-np.min(y_cut[:,0]))
         
        # smoothing the signal with a Whittaker smoother = improves results
        record_hf[:,i] = rp.whittaker(record_hf_no_smo[:,i],Lambda = 10.0**3)
        
        # wew take care of correcting any deviation from 0 after smoothing
        y_r_2, _ = rp.baseline(x_cut,record_hf[:,i],roi_cutoff,"poly",p=1.0)
        record_hf[:,i] = ((y_r_2-np.min(y_r_2))/(np.max(y_r_2)-np.min(y_r_2))).reshape(-1)
        
        # for the baseline
        record_bas2[:,i] = bas2[:,0]*scale
        
    return x, record, record_bas2, x_cut, record_hf_no_smo, record_hf, nb_exp
示例#14
0
Lower_limit = np.max(np.where(Raman_shift < 350)) + 1
Upper_limit = np.min(np.where(Raman_shift > 4000)) + 1

Raman_shift_limit = Raman_shift[Lower_limit:Upper_limit]
Cancer_data_limit = Cancer_data[Lower_limit:Upper_limit]
Normal_data_limit = Normal_data[Lower_limit:Upper_limit]

# SG平滑处理#
Cancer_data_SG = sp.savgol_filter(Cancer_data_limit, 11, 2)
Normal_data_SG = sp.savgol_filter(Normal_data_limit, 11, 2)

# 去基线处理 #
roi = np.array([[350, 4000]])
Cancer_data_final, Cancer_base_Intensity = rampy.baseline(Raman_shift_limit,
                                                          Cancer_data_SG,
                                                          roi,
                                                          'arPLS',
                                                          lam=10**6,
                                                          ratio=0.001)
Normal_data_final, Normal_base_Intensity = rampy.baseline(Raman_shift_limit,
                                                          Normal_data_SG,
                                                          roi,
                                                          'arPLS',
                                                          lam=10**6,
                                                          ratio=0.001)

# she_label_1 = (she_label_1 - (np.min(she_label_1)))/(np.max(she_label_1) - np.min(she_label_1)) # 归一化
# plt.plot(Raman_shift_limit, Normal_data_final, ls="-", lw = 2, c = "c", label = "Normal")

# 帮师兄处理数据用道德
# Cancer_data_final = Cancer_data_final.reshape(-1,).tolist()
# print("预处理数据", len(Cancer_data_final))
示例#15
0
def fit_spectra(data_liste,method="LL2012",delim='\t',path_in='./raw/',laser=514.532,spline_coeff=0.001, poly_coeff=3):
    """Calculate the ratios of water and silicate signals from Raman spectra

    Parameters
    ----------
    data_liste: Pandas DataFrame
        Contains the list of spectra, see provided file as an example
    method: string
        The used method. LL2012: Le Losq et al. (2012); DG2017: Di Genova et al. (2017). See references.
    delim: string
        File delimiter. Use '\t' for tabulated text or ',' for comma separated text.
    path_in: string
        Path for the spectra
    laser: float
        Laser line wavelength in nm
    spline_coeff: float
        Smoothing coefficient for the spline baseline. An array of size len(data_liste) can be provided. Default = 0.001.
    poly_coeff: int
        Polynomial coefficient for the polynomial baseline function. Default = 3 (DG2017 method). Set to 2 for Behrens et al. (2006) method.

    Returns
    -------
    x: ndarray
        Common x axis.
    y_all: ndarray
        All raw spectra from data_liste in an array of length len(x) and with as many column as spectra.
    y_all_corr: ndarray
        All corrected spectra from data_liste in an array of length len(x) and with as many column as spectra.
    y_all_base: ndarray
        All baselines for spectra from data_liste in an array of length len(x) and with as many column as spectra.
    rws: ndarray
        The ratio of the water integrated intensity over that of silicate signals.
    rw: ndarray
        The integrated intensity of water signal.
    rs: ndarray
        The integrated intensity of silicate signals.

    Raises
    ------
    IOError
        If method is not set to LL2012 or DG2017.

    References
    ----------
    C. Le Losq, D. R. Neuville, R. Moretti, J. Roux, Determination of water content in silicate glasses using Raman spectrometry: Implications for the study of explosive volcanism. American Mineralogist. 97, 779–790 (2012).
    D. Di Genova et al., Effect of iron and nanolites on Raman spectra of volcanic glasses: A reassessment of existing strategies to estimate the water content. Chemical Geology. 475, 76–86 (2017).
    """

    import gcvspline
    
    x_all_lf = np.arange(50,1400,1.0)
    x_all_hf = np.arange(2800,3800,1.0)
    x = np.hstack((x_all_lf,x_all_hf))
    y_all = np.zeros((len(x),len(data_liste)))
    y_all_base = np.copy(y_all)
    y_all_corr = np.copy(y_all)

    rws = np.ones(len(data_liste))
    rw = np.ones(len(data_liste))
    rs = np.ones(len(data_liste))

    record_std = np.zeros((len(data_liste),2))

    rois = data_liste.loc[:,"ROI1 lb":"ROI6 hb"]

    for i in range(len(data_liste)):

        # importing the spectra
        sp = np.genfromtxt("./raw/"+data_liste["Name"][i],delimiter=delim,skip_header=1)

        # constructing an interpolator: this will allow an output of all data with the same X axis
        f = scipy.interpolate.interp1d(sp[:,0], sp[:,1],fill_value="extrapolate")

        # temperature and excitation line correction (see Rameau help)
        x, y_all[:,i], sdf = rp.tlcorrection(x,f(x),23.0,laser,normalisation='intensity')

        # getting the roi
        roi = np.array(rois.loc[i]).reshape(int(len(rois.loc[i])/2),2)

        # calculating baseline
        if method == "LL2012": # spline
            
            try:
                c_hf, b_hf = rp.baseline(x,y_all[:,i],roi,"gcvspline",s=spline_coeff)
            except:
                break

            y_all_corr[:,i]=c_hf[:,0]
            y_all_base[:,i]=b_hf[:,0]

        elif method == "DG2017": # polynomial 3 following DG2017 method

            # getting portion of interrest
            x_lf = x[np.where(x<2000.)].reshape(-1)
            x_hf = x[np.where(x>2000.)].reshape(-1)

            y_lf = y_all[np.where(x<2000.),i].reshape(-1)
            y_hf = y_all[np.where(x>2000.),i].reshape(-1)

            c_lf, b_lf = rp.baseline(x_lf,y_lf,np.array([[0,200],[1240,1500]]),"poly",polynomial_order = poly_coeff)
            c_hf, b_hf = rp.baseline(x_hf,y_hf,np.array([[2500,3100],[3750,3900]]),"poly",polynomial_order = poly_coeff)

            y_all_corr[:,i] = np.hstack((c_lf.reshape(-1),c_hf.reshape(-1)))
            y_all_base[:,i] = np.hstack((b_lf.reshape(-1),b_hf.reshape(-1)))

        else:
            raise TypeError('method should be set to LL2012 or DG2017')

        # Area / Integrated Intensity calculation
        S = np.trapz(y_all_corr[np.where((x>150)&(x<1250)),i],x[np.where((x>150)&(x<1250))])
        W = np.trapz(y_all_corr[np.where((x>3100)&(x<3750)),i],x[np.where((x>3100)&(x<3750))])

        # updating the Pandas dataframe rws
        rs[i] = S[0]
        rw[i] = W[0]
        rws[i] = W[0]/S[0]

    return x, y_all, y_all_corr, y_all_base, rws, rw, rs
示例#16
0
selected_samples = []

# List of the sample to get the baseline for

fits_by_id = {}

for key, df in df_raman.groupby('ID'):
    name_ech = df.name_ech.unique()[0]
    is_true = (np.isin(key, selected_samples) or get_all_ech)
    show_plot_init = is_true
    base_line = is_true
    if base_line:
        x, y = df["shift"].values, df["intensity"].values
        y_corr, y_base = rp.baseline(x_input=x,
                                     y_input=y,
                                     bir=regions_of_interest,
                                     method='poly',
                                     polynomial_order=3)
        y_corr_flat = y_corr[:, 0]
        in_boundaries = df["shift"].between(lb, hb)
        x_fit, y_fit = x[in_boundaries], y_corr_flat[in_boundaries]
        y_fit = y_fit / np.amax(y_fit) * 10
        fits_by_id[key] = pd.DataFrame({'x': x_fit, 'y': y_fit})
        # the relative errors after baseline subtraction
        ese0 = np.sqrt(abs(y_fit)) / abs(y_fit)
        # normalise spectra to maximum intensity, easier to handle
        max_intensity = np.amax(y_fit)
        sigma = abs(ese0 * y_fit)  # calculate good ese

    # create a new plot for showing the spectrum
    if show_plot_init: