示例#1
0
def SaltAndPepper(src, percetage):
    NoiseImg=src
    NoiseNum=int(percetage*src.shape[0]*src.shape[1])
    for i in range(NoiseNum):
        randX=random.random_integers(0,src.shape[0]-1)
        randY=random.random_integers(0,src.shape[1]-1)
        if random.random_integers(0,1)==0:
            NoiseImg[randX,randY]=0
        else:
            NoiseImg[randX,randY]=255          
    cv2.imshow('PepperandSalt', NoiseImg)
示例#2
0
def PepperandSalt(src, percetage):
    NoiseImg = src
    NoiseNum = int(percetage * src.shape[0] * src.shape[1])
    for i in range(NoiseNum):
        randX = random.random_integers(0, src.shape[0] - 1)
        randY = random.random_integers(0, src.shape[1] - 1)
        if random.random_integers(0, 1) <= 0.5:
            NoiseImg[randX, randY] = 0
        else:
            NoiseImg[randX, randY] = 255
    return NoiseImg
示例#3
0
def PepperandSalt(src,pepperPer,saltPer):   #椒盐噪声
    NoiseImg=src
    pepperNum = int(pepperPer*src.shape[0]*src.shape[1])
    saltNum  = int(saltPer*src.shape[0]*src.shape[1])
    for i in range(pepperNum):
        randX=random.random_integers(0,src.shape[0]-1)
        randY=random.random_integers(0,src.shape[1]-1)
        NoiseImg[randX, randY] = 255
    for i in range(saltNum):
        randX = random.random_integers(0, src.shape[0] - 1)
        randY = random.random_integers(0, src.shape[1] - 1)
        NoiseImg[randX,randY]=0
    return NoiseImg
示例#4
0
 def step(self, received):
     """ Called every frame to get commands """
     if self.send_name:
         self.send_name = False
         return "SimpleBot"
     else:
         # For now, always move the first ship randomly
         return "t 0 1 {}".format(random.random_integers(0,359))
示例#5
0
    def set_data(self, data, subset_data=None, **args):
        if args.get("skipIfSame", 1):
            if checksum(data) == checksum(self.raw_data) and \
               checksum(subset_data) == checksum(self.raw_subset_data):
                return

        self.domain_data_stat = []
        self.attr_values = {}
        self.original_data = self.original_subset_data = None
        self.scaled_data = self.scaled_subset_data = None
        self.no_jittering_scaled_data = self.no_jittering_scaled_subset_data = None
        self.valid_data_array = self.valid_subset_data_array = None

        self.raw_data = None
        self.raw_subset_data = None
        self.have_data = False
        self.have_subset_data = False
        self.data_has_class = False
        self.data_has_continuous_class = False
        self.data_has_discrete_class = False
        self.data_class_name = None
        self.data_domain = None
        self.data_class_index = None

        if data is None:
            return
        full_data = self.merge_data_sets(data, subset_data)

        self.raw_data = data
        self.raw_subset_data = subset_data

        len_data = data and len(data) or 0

        self.attribute_names = [attr.name for attr in full_data.domain]
        self.attribute_name_index = dict([(full_data.domain[i].name, i)
                                          for i in range(len(full_data.domain))])
        self.attribute_flip_info = {}

        self.data_domain = full_data.domain
        self.data_has_class = bool(full_data.domain.class_var)
        self.data_has_continuous_class = bool(self.data_has_class and
                                              full_data.domain.class_var.var_type == VarTypes.Continuous)
        self.data_has_discrete_class = bool(self.data_has_class and
                                            full_data.domain.class_var.var_type == VarTypes.Discrete)
        self.data_class_name = self.data_has_class and full_data.domain.class_var.name
        if self.data_has_class:
            self.data_class_index = self.attribute_name_index[self.data_class_name]
        self.have_data = bool(self.raw_data and len(self.raw_data) > 0)
        self.have_subset_data = bool(self.raw_subset_data and
                                     len(self.raw_subset_data) > 0)

        self.domain_data_stat = getCached(full_data,
                                          DomainBasicStats,
                                          (full_data,))

        sort_values_for_discrete_attrs = args.get("sort_values_for_discrete_attrs",
                                                  1)

        for index in range(len(full_data.domain)):
            attr = full_data.domain[index]
            if attr.var_type == VarTypes.Discrete:
                self.attr_values[attr.name] = [0, len(attr.values)]
            elif attr.var_type == VarTypes.Continuous:
                self.attr_values[attr.name] = [self.domain_data_stat[index].min,
                                               self.domain_data_stat[index].max]

        # the original_data, no_jittering_scaled_data and validArray are arrays
        # that we can cache so that other visualization widgets don't need to
        # compute it. The scaled_data on the other hand has to be computed for
        # each widget separately because of different
        # jitter_continuous and jitter_size values
        if getCached(data, "visualizationData") and subset_data == None:
            self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(data,
                                                                                                 "visualizationData")
            self.original_subset_data = self.no_jittering_scaled_subset_data = self.valid_subset_data_array = np.array(
                []).reshape([len(self.original_data), 0])
        else:
            no_jittering_data = np.hstack((full_data.X, full_data.Y)).T
            valid_data_array = no_jittering_data != np.NaN
            original_data = no_jittering_data.copy()

            for index in range(len(data.domain)):
                attr = data.domain[index]
                if attr.var_type == VarTypes.Discrete:
                    # see if the values for discrete attributes have to be resorted
                    variable_value_indices = get_variable_value_indices(data.domain[index],
                                                                        sort_values_for_discrete_attrs)
                    if 0 in [i == variable_value_indices[attr.values[i]]
                             for i in range(len(attr.values))]:
                        # make the array a contiguous, otherwise the putmask 
                        # function does not work
                        line = no_jittering_data[index].copy()
                        indices = [np.where(line == val, 1, 0)
                                   for val in range(len(attr.values))]
                        for i in range(len(attr.values)):
                            np.putmask(line, indices[i],
                                          variable_value_indices[attr.values[i]])
                        no_jittering_data[index] = line   # save the changed array
                        original_data[index] = line     # reorder also the values in the original data
                    no_jittering_data[index] = ((no_jittering_data[index] * 2.0 + 1.0)
                                                / float(2 * len(attr.values)))

                elif attr.var_type == VarTypes.Continuous:
                    diff = self.domain_data_stat[index].max - self.domain_data_stat[
                        index].min or 1     # if all values are the same then prevent division by zero
                    no_jittering_data[index] = (no_jittering_data[index] -
                                                self.domain_data_stat[index].min) / diff

            self.original_data = original_data[:, :len_data]
            self.original_subset_data = original_data[:, len_data:]
            self.no_jittering_scaled_data = no_jittering_data[:, :len_data]
            self.no_jittering_scaled_subset_data = no_jittering_data[:, len_data:]
            self.valid_data_array = valid_data_array[:, :len_data]
            self.valid_subset_data_array = valid_data_array[:, len_data:]

        if data:
            setCached(data, "visualizationData",
                      (self.original_data, self.no_jittering_scaled_data,
                       self.valid_data_array))
        if subset_data:
            setCached(subset_data, "visualizationData",
                      (self.original_subset_data,
                       self.no_jittering_scaled_subset_data,
                       self.valid_subset_data_array))

        # compute the scaled_data arrays
        scaled_data = np.concatenate([self.no_jittering_scaled_data,
                                         self.no_jittering_scaled_subset_data],
                                        axis=1)

        # Random generators for jittering
        random = np.random.RandomState(seed=self.jitter_seed)
        rand_seeds = random.random_integers(0, sys.maxsize - 1, size=len(data.domain))
        for index, rseed in zip(list(range(len(data.domain))), rand_seeds):
            # Need to use a different seed for each feature
            random = np.random.RandomState(seed=rseed)
            attr = data.domain[index]
            if attr.var_type == VarTypes.Discrete:
                scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \
                                      (random.rand(len(full_data)) - 0.5)

            elif attr.var_type == VarTypes.Continuous and self.jitter_continuous:
                scaled_data[index] += self.jitter_size / 50.0 * (0.5 - random.rand(len(full_data)))
                scaled_data[index] = np.absolute(scaled_data[index])       # fix values below zero
                ind = np.where(scaled_data[index] > 1.0, 1, 0)     # fix values above 1
                np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index]))

        if self.have_subset_data:
            # Fix all subset instances which are also in the main data
            # to have the same jittered values
            ids_to_indices = dict((inst.id, i)
                                  for i, inst in enumerate(self.raw_data))

            subset_ids_map = [[i, ids_to_indices[s.id]]
                              for i, s in enumerate(self.raw_subset_data)
                              if s.id in ids_to_indices]
            if len(subset_ids_map):
                subset_ids_map = np.array(subset_ids_map)
                subset_ids_map[:, 0] += len_data
                scaled_data[:, subset_ids_map[:, 0]] = \
                    scaled_data[:, subset_ids_map[:, 1]]

        self.scaled_data = scaled_data[:, :len_data]
        self.scaled_subset_data = scaled_data[:, len_data:]
			for geom in link.GetGeometries():
				geom.SetDiffuseColor([0.862745,0.862745,0.862745,0.2])
				geom.SetTransparency(0.2)
	ind = ind+1
print "Number of joints:------", repr(robot.GetActiveDOF())
pdb.set_trace()

h=1
phi=2.5
xxrange=array([-0.8,-0.4])
yyrange=array([-0.6,0.6])
nb=20
noise = 0.0
Psurf,xx,yy,zz = GenerateSurface(h, phi, xxrange,yyrange,nb)
Psurfnoise,xxnoise,yynoise,zznoise = GenerateSurface(h, phi, xxrange,0.5*yyrange,nb,noise)
id = random.random_integers(0, shape(Psurfnoise)[0]-1,100)
numpy.savetxt('KUKASurf.txt',Psurfnoise[id,:]) #save the data for learning

handles.append(env.plot3(points=Psurfnoise[id,:],pointsize=0.015,colors=array(((1,0.5,0))),drawstyle=1))
for i in range(nb):
	pxmesh = np.vstack([xx[i,:],yy[i,:],zz[i,:]]).T
	handles.append(env.drawlinestrip(points=pxmesh,linewidth=2.5,colors=array(((0,1,0,0.5)))))
for i in range(nb):
	pxmesh = np.vstack([xx[:,i],yy[:,i],zz[:,i]]).T
	handles.append(env.drawlinestrip(points=pxmesh,linewidth=2.5,colors=array(((0,1,0,0.5)))))

env.UpdatePublishedBodies()
#raw_input('press enter to continue')

# Joints=numpy.zeros(7)
# Joints[0] = radians(-30)
示例#7
0
    def set_data(self, data, **args):
        if args.get("skipIfSame", 1):
            if checksum(data) == checksum(self.raw_data):
                return

        self.domain_data_stat = []
        self.attr_values = {}
        self.original_data = None
        self.scaled_data = None
        self.no_jittering_scaled_data = None
        self.valid_data_array = None

        self.raw_data = None
        self.have_data = False
        self.data_has_class = False
        self.data_has_continuous_class = False
        self.data_has_discrete_class = False
        self.data_class_name = None
        self.data_domain = None
        self.data_class_index = None

        if data is None:
            return
        full_data = data
        self.raw_data = data

        len_data = data and len(data) or 0

        self.attribute_names = [attr.name for attr in full_data.domain]
        self.attribute_name_index = dict([(full_data.domain[i].name, i)
                                          for i in range(len(full_data.domain))])
        self.attribute_flip_info = {}

        self.data_domain = full_data.domain
        self.data_has_class = bool(full_data.domain.class_var)
        self.data_has_continuous_class = full_data.domain.has_continuous_class
        self.data_has_discrete_class = full_data.domain.has_discrete_class

        self.data_class_name = self.data_has_class and full_data.domain.class_var.name
        if self.data_has_class:
            self.data_class_index = self.attribute_name_index[self.data_class_name]
        self.have_data = bool(self.raw_data and len(self.raw_data) > 0)

        self.domain_data_stat = getCached(full_data,
                                          DomainBasicStats,
                                          (full_data,))

        sort_values_for_discrete_attrs = args.get("sort_values_for_discrete_attrs",
                                                  1)

        for index in range(len(full_data.domain)):
            attr = full_data.domain[index]
            if attr.is_discrete:
                self.attr_values[attr.name] = [0, len(attr.values)]
            elif attr.is_continuous:
                self.attr_values[attr.name] = [self.domain_data_stat[index].min,
                                               self.domain_data_stat[index].max]

        if 'no_data' in args:
            return

        # the original_data, no_jittering_scaled_data and validArray are arrays
        # that we can cache so that other visualization widgets don't need to
        # compute it. The scaled_data on the other hand has to be computed for
        # each widget separately because of different
        # jitter_continuous and jitter_size values
        if getCached(data, "visualizationData"):
            self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(data,
                                                                                                 "visualizationData")
        else:
            no_jittering_data = np.c_[full_data.X, full_data.Y].T
            valid_data_array = ~np.isnan(no_jittering_data)
            original_data = no_jittering_data.copy()

            for index in range(len(data.domain)):
                attr = data.domain[index]
                if attr.is_discrete:
                    # see if the values for discrete attributes have to be resorted
                    variable_value_indices = get_variable_value_indices(data.domain[index],
                                                                        sort_values_for_discrete_attrs)
                    if 0 in [i == variable_value_indices[attr.values[i]]
                             for i in range(len(attr.values))]:
                        # make the array a contiguous, otherwise the putmask
                        # function does not work
                        line = no_jittering_data[index].copy()
                        indices = [np.where(line == val, 1, 0)
                                   for val in range(len(attr.values))]
                        for i in range(len(attr.values)):
                            np.putmask(line, indices[i],
                                          variable_value_indices[attr.values[i]])
                        no_jittering_data[index] = line   # save the changed array
                        original_data[index] = line     # reorder also the values in the original data
                    no_jittering_data[index] = ((no_jittering_data[index] * 2.0 + 1.0)
                                                / float(2 * len(attr.values)))

                elif attr.is_continuous:
                    diff = self.domain_data_stat[index].max - self.domain_data_stat[
                        index].min or 1     # if all values are the same then prevent division by zero
                    no_jittering_data[index] = (no_jittering_data[index] -
                                                self.domain_data_stat[index].min) / diff

            self.original_data = original_data
            self.no_jittering_scaled_data = no_jittering_data
            self.valid_data_array = valid_data_array

        if data:
            setCached(data, "visualizationData",
                      (self.original_data, self.no_jittering_scaled_data,
                       self.valid_data_array))

        # compute the scaled_data arrays
        scaled_data = self.no_jittering_scaled_data

        # Random generators for jittering
        random = np.random.RandomState(seed=self.jitter_seed)
        rand_seeds = random.random_integers(0, 2 ** 30 - 1,
                                            size=len(data.domain))
        for index, rseed in zip(list(range(len(data.domain))), rand_seeds):
            # Need to use a different seed for each feature
            random = np.random.RandomState(seed=rseed)
            attr = data.domain[index]
            if attr.is_discrete:
                scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \
                                      (random.rand(len(full_data)) - 0.5)

            elif attr.is_continuous and self.jitter_continuous:
                scaled_data[index] += self.jitter_size / 50.0 * (0.5 - random.rand(len(full_data)))
                scaled_data[index] = np.absolute(scaled_data[index])       # fix values below zero
                ind = np.where(scaled_data[index] > 1.0, 1, 0)     # fix values above 1
                np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index]))

        self.scaled_data = scaled_data[:, :len_data]
示例#8
0
    def set_data(self, data, **args):
        if args.get("skipIfSame", 1):
            if checksum(data) == checksum(self.raw_data):
                return

        self.domain_data_stat = []
        self.attr_values = {}
        self.original_data = None
        self.scaled_data = None
        self.no_jittering_scaled_data = None
        self.valid_data_array = None

        self.raw_data = None
        self.have_data = False
        self.data_has_class = False
        self.data_has_continuous_class = False
        self.data_has_discrete_class = False
        self.data_class_name = None
        self.data_domain = None
        self.data_class_index = None

        if data is None:
            return
        full_data = data
        self.raw_data = data

        len_data = data and len(data) or 0

        self.attribute_names = [attr.name for attr in full_data.domain]
        self.attribute_name_index = dict([
            (full_data.domain[i].name, i) for i in range(len(full_data.domain))
        ])
        self.attribute_flip_info = {}

        self.data_domain = full_data.domain
        self.data_has_class = bool(full_data.domain.class_var)
        self.data_has_continuous_class = full_data.domain.has_continuous_class
        self.data_has_discrete_class = full_data.domain.has_discrete_class

        self.data_class_name = self.data_has_class and full_data.domain.class_var.name
        if self.data_has_class:
            self.data_class_index = self.attribute_name_index[
                self.data_class_name]
        self.have_data = bool(self.raw_data and len(self.raw_data) > 0)

        self.domain_data_stat = getCached(full_data, DomainBasicStats,
                                          (full_data, ))

        sort_values_for_discrete_attrs = args.get(
            "sort_values_for_discrete_attrs", 1)

        for index in range(len(full_data.domain)):
            attr = full_data.domain[index]
            if attr.is_discrete:
                self.attr_values[attr.name] = [0, len(attr.values)]
            elif attr.is_continuous:
                self.attr_values[attr.name] = [
                    self.domain_data_stat[index].min,
                    self.domain_data_stat[index].max
                ]

        if 'no_data' in args:
            return

        # the original_data, no_jittering_scaled_data and validArray are arrays
        # that we can cache so that other visualization widgets don't need to
        # compute it. The scaled_data on the other hand has to be computed for
        # each widget separately because of different
        # jitter_continuous and jitter_size values
        if getCached(data, "visualizationData"):
            self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(
                data, "visualizationData")
        else:
            no_jittering_data = np.c_[full_data.X, full_data.Y].T
            valid_data_array = ~np.isnan(no_jittering_data)
            original_data = no_jittering_data.copy()

            for index in range(len(data.domain)):
                attr = data.domain[index]
                if attr.is_discrete:
                    # see if the values for discrete attributes have to be resorted
                    variable_value_indices = get_variable_value_indices(
                        data.domain[index], sort_values_for_discrete_attrs)
                    if 0 in [
                            i == variable_value_indices[attr.values[i]]
                            for i in range(len(attr.values))
                    ]:
                        # make the array a contiguous, otherwise the putmask
                        # function does not work
                        line = no_jittering_data[index].copy()
                        indices = [
                            np.where(line == val, 1, 0)
                            for val in range(len(attr.values))
                        ]
                        for i in range(len(attr.values)):
                            np.putmask(line, indices[i],
                                       variable_value_indices[attr.values[i]])
                        no_jittering_data[
                            index] = line  # save the changed array
                        original_data[
                            index] = line  # reorder also the values in the original data
                    no_jittering_data[index] = (
                        (no_jittering_data[index] * 2.0 + 1.0) /
                        float(2 * len(attr.values)))

                elif attr.is_continuous:
                    diff = self.domain_data_stat[
                        index].max - self.domain_data_stat[
                            index].min or 1  # if all values are the same then prevent division by zero
                    no_jittering_data[index] = (
                        no_jittering_data[index] -
                        self.domain_data_stat[index].min) / diff

            self.original_data = original_data
            self.no_jittering_scaled_data = no_jittering_data
            self.valid_data_array = valid_data_array

        if data:
            setCached(data, "visualizationData",
                      (self.original_data, self.no_jittering_scaled_data,
                       self.valid_data_array))

        # compute the scaled_data arrays
        scaled_data = self.no_jittering_scaled_data

        # Random generators for jittering
        random = np.random.RandomState(seed=self.jitter_seed)
        rand_seeds = random.random_integers(0,
                                            2**30 - 1,
                                            size=len(data.domain))
        for index, rseed in zip(list(range(len(data.domain))), rand_seeds):
            # Need to use a different seed for each feature
            random = np.random.RandomState(seed=rseed)
            attr = data.domain[index]
            if attr.is_discrete:
                scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \
                                      (random.rand(len(full_data)) - 0.5)

            elif attr.is_continuous and self.jitter_continuous:
                scaled_data[index] += self.jitter_size / 50.0 * (
                    0.5 - random.rand(len(full_data)))
                scaled_data[index] = np.absolute(
                    scaled_data[index])  # fix values below zero
                ind = np.where(scaled_data[index] > 1.0, 1,
                               0)  # fix values above 1
                np.putmask(scaled_data[index], ind,
                           2.0 - np.compress(ind, scaled_data[index]))

        self.scaled_data = scaled_data[:, :len_data]
示例#9
0
    def set_data(self, data, subset_data=None, **args):
        if args.get("skipIfSame", 1):
            if checksum(data) == checksum(self.raw_data) and \
               checksum(subset_data) == checksum(self.raw_subset_data):
                return

        self.domain_data_stat = []
        self.attr_values = {}
        self.original_data = self.original_subset_data = None
        self.scaled_data = self.scaled_subset_data = None
        self.no_jittering_scaled_data = self.no_jittering_scaled_subset_data = None
        self.valid_data_array = self.valid_subset_data_array = None

        self.raw_data = None
        self.raw_subset_data = None
        self.have_data = False
        self.have_subset_data = False
        self.data_has_class = False
        self.data_has_continuous_class = False
        self.data_has_discrete_class = False
        self.data_class_name = None
        self.data_domain = None
        self.data_class_index = None

        if data is None:
            return
        full_data = self.merge_data_sets(data, subset_data)

        self.raw_data = data
        self.raw_subset_data = subset_data

        len_data = data and len(data) or 0

        self.attribute_names = [attr.name for attr in full_data.domain]
        self.attribute_name_index = dict([(full_data.domain[i].name, i)
                                          for i in range(len(full_data.domain))])
        self.attribute_flip_info = {}

        self.data_domain = full_data.domain
        self.data_has_class = bool(full_data.domain.class_var)
        self.data_has_continuous_class = \
            isinstance(full_data.domain.class_var, ContinuousVariable)
        self.data_has_discrete_class = \
            isinstance(full_data.domain.class_var, DiscreteVariable)

        self.data_class_name = self.data_has_class and full_data.domain.class_var.name
        if self.data_has_class:
            self.data_class_index = self.attribute_name_index[self.data_class_name]
        self.have_data = bool(self.raw_data and len(self.raw_data) > 0)
        self.have_subset_data = bool(self.raw_subset_data and
                                     len(self.raw_subset_data) > 0)

        self.domain_data_stat = getCached(full_data,
                                          DomainBasicStats,
                                          (full_data,))

        sort_values_for_discrete_attrs = args.get("sort_values_for_discrete_attrs",
                                                  1)

        for index in range(len(full_data.domain)):
            attr = full_data.domain[index]
            if isinstance(attr, DiscreteVariable):
                self.attr_values[attr.name] = [0, len(attr.values)]
            elif isinstance(attr, ContinuousVariable):
                self.attr_values[attr.name] = [self.domain_data_stat[index].min,
                                               self.domain_data_stat[index].max]

        # the original_data, no_jittering_scaled_data and validArray are arrays
        # that we can cache so that other visualization widgets don't need to
        # compute it. The scaled_data on the other hand has to be computed for
        # each widget separately because of different
        # jitter_continuous and jitter_size values
        if getCached(data, "visualizationData") and subset_data == None:
            self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(data,
                                                                                                 "visualizationData")
            self.original_subset_data = self.no_jittering_scaled_subset_data = self.valid_subset_data_array = np.array(
                []).reshape([len(self.original_data), 0])
        else:
            no_jittering_data = np.hstack((full_data.X, full_data.Y)).T
            valid_data_array = no_jittering_data != np.NaN
            original_data = no_jittering_data.copy()

            for index in range(len(data.domain)):
                attr = data.domain[index]
                if isinstance(attr, DiscreteVariable):
                    # see if the values for discrete attributes have to be resorted
                    variable_value_indices = get_variable_value_indices(data.domain[index],
                                                                        sort_values_for_discrete_attrs)
                    if 0 in [i == variable_value_indices[attr.values[i]]
                             for i in range(len(attr.values))]:
                        # make the array a contiguous, otherwise the putmask
                        # function does not work
                        line = no_jittering_data[index].copy()
                        indices = [np.where(line == val, 1, 0)
                                   for val in range(len(attr.values))]
                        for i in range(len(attr.values)):
                            np.putmask(line, indices[i],
                                          variable_value_indices[attr.values[i]])
                        no_jittering_data[index] = line   # save the changed array
                        original_data[index] = line     # reorder also the values in the original data
                    no_jittering_data[index] = ((no_jittering_data[index] * 2.0 + 1.0)
                                                / float(2 * len(attr.values)))

                elif isinstance(attr, ContinuousVariable):
                    diff = self.domain_data_stat[index].max - self.domain_data_stat[
                        index].min or 1     # if all values are the same then prevent division by zero
                    no_jittering_data[index] = (no_jittering_data[index] -
                                                self.domain_data_stat[index].min) / diff

            self.original_data = original_data[:, :len_data]
            self.original_subset_data = original_data[:, len_data:]
            self.no_jittering_scaled_data = no_jittering_data[:, :len_data]
            self.no_jittering_scaled_subset_data = no_jittering_data[:, len_data:]
            self.valid_data_array = valid_data_array[:, :len_data]
            self.valid_subset_data_array = valid_data_array[:, len_data:]

        if data:
            setCached(data, "visualizationData",
                      (self.original_data, self.no_jittering_scaled_data,
                       self.valid_data_array))
        if subset_data:
            setCached(subset_data, "visualizationData",
                      (self.original_subset_data,
                       self.no_jittering_scaled_subset_data,
                       self.valid_subset_data_array))

        # compute the scaled_data arrays
        scaled_data = np.concatenate([self.no_jittering_scaled_data,
                                         self.no_jittering_scaled_subset_data],
                                        axis=1)

        # Random generators for jittering
        random = np.random.RandomState(seed=self.jitter_seed)
        rand_seeds = random.random_integers(0, sys.maxsize - 1, size=len(data.domain))
        for index, rseed in zip(list(range(len(data.domain))), rand_seeds):
            # Need to use a different seed for each feature
            random = np.random.RandomState(seed=rseed)
            attr = data.domain[index]
            if isinstance(attr, DiscreteVariable):
                scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \
                                      (random.rand(len(full_data)) - 0.5)

            elif isinstance(attr, ContinuousVariable) and self.jitter_continuous:
                scaled_data[index] += self.jitter_size / 50.0 * (0.5 - random.rand(len(full_data)))
                scaled_data[index] = np.absolute(scaled_data[index])       # fix values below zero
                ind = np.where(scaled_data[index] > 1.0, 1, 0)     # fix values above 1
                np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index]))

        if self.have_subset_data:
            # Fix all subset instances which are also in the main data
            # to have the same jittered values
            ids_to_indices = dict((inst.id, i)
                                  for i, inst in enumerate(self.raw_data))

            subset_ids_map = [[i, ids_to_indices[s.id]]
                              for i, s in enumerate(self.raw_subset_data)
                              if s.id in ids_to_indices]
            if len(subset_ids_map):
                subset_ids_map = np.array(subset_ids_map)
                subset_ids_map[:, 0] += len_data
                scaled_data[:, subset_ids_map[:, 0]] = \
                    scaled_data[:, subset_ids_map[:, 1]]

        self.scaled_data = scaled_data[:, :len_data]
        self.scaled_subset_data = scaled_data[:, len_data:]
示例#10
0
 def DistortAN(self, movechance=.15):
     ''' Randomly replace atom types. '''
     for i in range(0, self.atoms.shape[0]):
         if (random.uniform(0, 1) < movechance):
             self.atoms[i] = random.random_integers(
                 1, PARAMS["MAX_ATOMIC_NUMBER"])
						line=fin.readline()
						line=line.replace("[&R]","")	# reformat rate multiplier for STEM
						line=line.replace("'","")
						line=line.replace(":0.0;",";")	# Remove the 0.0 branch length at the end
						treeVector.append(line)
						fin.close()
						#save the k trees into a file
					
					try:
						treeDirectory = "RandomlyChosenTrees_T1_100k/"
						os.mkdir(treeDirectory)
					except OSError:
						pass #directory already exists
					treeFileOut = treeDirectory + "Ne" + str(n) + "_t" + str(t) +  "_k" + str(k) + "_rep" +  str(r) + ".tre"
					fout=open(treeFileOut, 'w')
					fout.writelines(treeVector)
					fout.close()
			
			
			#with open(filename) as f: # automatically closes input file as soon as it's done
			#	fileData=f.readlines()

		
if __name__== '__main__':

	#K= [2,4,8,16,32,64]
	randNums = random.random_integers(0, 100, 126)		# Picks k random numbers between 1 and 99 with replacement
	getTrees(100, randNums)		#reps=100

	#for k in K:
		#getTrees(k, 100, randNums)		#reps=100
示例#12
0
def get_list(key, page_size, page_num, param_dict):
    re_login()
    page_data = {
        'curpage': page_num
        , 'RecordsPerPage': page_size
        , 'QueryID': random.random_integers(1, 9)
        , 'ID': ''
        , 'turnpage': page_num - 1 if page_num - 1 > 0 else page_num + 1
        , 'tpagemode': 'L'
        , 'Fields': ''
        , 'DisplayMode': 'listmode'
        , 'dbPrefix': param_dict['dbPrefix']
        , 'PageName': param_dict['pagename']
        , 'sorttype': "(FFD,'RANK') desc"
        , 'isinEn': param_dict['isinEn']
    }
    # 获取查询列表
    list_url = 'https://kns.cnki.net/kns/brief/brief.aspx?' + urllib.parse.urlencode(page_data)
    r_list_doc = session.get(list_url, headers=headers, timeout=global_timeout)
    r_list_doc.encoding = 'utf-8'
    log.info(list_url)

    soup = BeautifulSoup(r_list_doc.text, 'lxml', from_encoding='utf-8')
    headers['Referer'] = list_url
    trs = soup.select('.GridTableContent tr')
    # 去除标题栏
    err_cn = 0
    for tr in trs[1:]:
        tds = tr.select('td')
        # 序号
        tr_order = tds[0].text

        # 标题名
        tr_title = tds[1].select('a')[0].text
        tr_title = tr_title.replace("'", "-")

        # 作者
        tr_authors = ""
        authors_a = tds[2].select('a')
        for author_a in authors_a:
            tr_authors = tr_authors + "_" + author_a.text
        # 首作
        tr_author = ""
        if len(authors_a) > 0:
            tr_author = authors_a[0].text
            tr_author = tr_author.replace("'", "-")

        # 刊名
        from_source = ""
        if len(tds) > 3:
            if len(tds[3].select('a')) > 0:
                from_source = tds[3].select('a')[0].text

        # 发表时间
        tr_time = ""
        if len(tds) > 4:
            tr_time = tds[4].text

        # 被引
        tr_db = ""
        if len(tds) > 5:
            tr_db = tds[5].text

        # 下载 https://kns.cnki.net/kns/download.aspx
        tr_down_url = ""
        tr_down_title = ""
        if len(tds) > 6:
            if len(tds[6].select('a')) > 0:
                tr_down_url = tds[6].select('a')[0].attrs['href']
                tr_down_title = tds[6].select('a')[0].attrs['title']

        # 阅读
        type = ""
        if len(tds) > 7:
            if len(tds[7].select('a')) > 0:
                type = tds[7].select('a')[0].attrs['title']
        if type == "HTML阅读":
            tr_file_type = ".pdf"
        elif type == "阅读":
            tr_file_type = ".caj"
        else:
            tr_file_type = ""
            log.info("文件类型未知,原文类型{}".format(type))

        # 输出表格列表
        log.info(
            "{},{},{},{},{},{}".format(tr_order, tr_title, tr_author, tr_file_type, tr_time.strip(), tr_db.strip()))
        # 文件重复去重
        file_will_write = os.path.join(file_dir, tr_title)

        if check_if_preserve(tr_title, tr_author):
            log.info('\t文章没有权限下载,继续下一个 ... {}'.format(tr_down_title))
            time.sleep(15)
            continue

        if check_before_download(tr_title, tr_author, from_source):
            log.info('\t文件不存在,开始下载 ... {}'.format(file_will_write))

            article_url = 'https://kns.cnki.net' + tds[1].select('a')[0].attrs['href']
            article_response = session.get(article_url, headers=headers, timeout=global_timeout)
            article_soup = BeautifulSoup(article_response.text, 'lxml', from_encoding='utf-8')
            pdf_down = article_soup.select_one("#pdfDown")
            # 有pdf下载按钮才会触发
            if pdf_down:
                download_url = pdf_down.attrs['href']
                if not str(download_url).startswith("http"):
                    download_url = 'https://kns.cnki.net' + download_url
                if str(download_url).startswith("https://chkdx.cnki.net"):
                    log.info('\tpdf下载链接无权限 ... 文章链接{}'.format(download_url))
                else:
                    log.info('\t下载链接 ... {}'.format(download_url))
                    try:
                        download(tr_title, tr_author, download_url, tr_authors)
                    except:
                        log.error(traceback.format_exc())
                        log.error("下载失败: {0},{1}".format(tr_title, download_url))
                        err_cn = err_cn + 1
                        if err_cn >= 10:
                            exit()
                    time.sleep(15)
            else:
                log.info('\t无pdf下载链接 ... 文章链接{}'.format(article_url))
def get_list(key, page_num, param_dict):
    page_data = {
        'curpage': page_num
        , 'RecordsPerPage': '20'
        , 'QueryID': random.random_integers(1, 9)
        , 'ID': ''
        , 'turnpage': page_num - 1 if page_num - 1 > 0 else page_num + 1
        , 'tpagemode': 'L'
        , 'Fields': ''
        , 'DisplayMode': 'listmode'
        , 'dbPrefix': param_dict['dbPrefix']
        , 'PageName': param_dict['pagename']
        , 'sorttype': "(FFD,'RANK') desc"
        , 'isinEn': param_dict['isinEn']
    }
    # 获取查询列表
    list_url = 'http://kns.cnki.net/kns/brief/brief.aspx?' + urllib.parse.urlencode(page_data)
    r_list_doc = session.get(list_url, headers=headers)
    r_list_doc.encoding = 'utf-8'
    # print(r_list_doc.text)
    print(list_url)

    soup = BeautifulSoup(r_list_doc.text, 'lxml', from_encoding='utf-8')
    headers['Referer'] = list_url
    trs = soup.select('.GridTableContent tr')
    # 去除标题栏
    for tr in trs[1:]:
        tds = tr.select('td')
        # 序号
        tr_order = tds[0].text

        # 标题名
        tr_title = tds[1].select('a')[0].text

        # 作者
        tr_authors = ""
        authors_a = tds[2].select('a')
        for author_a in authors_a:
            tr_authors = tr_authors + "_" + author_a.text
        # 首作
        tr_author = ""
        if len(authors_a) > 0:
            tr_author = authors_a[0].text

        # 发表时间
        tr_time = tds[4].text

        # 数据库
        tr_db = tds[5].text

        # 下载链接 http://kns.cnki.net/kns/download.aspx
        tr_down_url = tds[7].select('a')[0].attrs['href']

        # 文件类型
        type = tds[8].select('a')[0].attrs['title']
        if type == "HTML阅读":
            tr_file_type = ".pdf"
        elif type == "阅读":
            tr_file_type = ".caj"
        else:
            tr_file_type = ""
            log.info("文件类型未知,原文类型{}".format(type))

        # 输出表格列表
        log.info("{},{},{},{},{},{}".format(tr_order, tr_title, tr_author, tr_file_type, tr_time.strip(), tr_db.strip()))
        # 文件重复去重
        file_will_write = os.path.join(file_dir, tr_title)

        if_down = True
        # 去掉包含关键字的题目
        key_ignore = ["总目次", "索引", "总目录"]
        for key_i in key_ignore:
            if key_i in tr_title:
                log.info('\t当前文章标题包含关键字 {} ,已忽略下载'.format(key_i))
                if_down = False
                break

        # 相同网站文件重复去重-标题名加作者
        if tr_title+"_"+tr_author in files_m:
            log.info('\t文件已存在当前网站目录列表 ... {}'.format(os.path.join(file_dir, tr_title)))
            if_down = False

        # 不同网站重复去重-根据标题
        if tr_title in other_list:
            log.info('\t文件已存在其他网站目录列表 ... {}'.format(os.path.join(file_dir, tr_title)))
            if_down = False

        # for f in file_dir_files:
        #     if f.startswith(tr_title):
        #         print('\t{},{}'.format(f, tr_title))
        #         print('\t文件已存在 ... {}'.format(os.path.join(file_dir, f)))
        #         if_down = False
        #         with open(file_m, "a") as fm:
        #             fm.write(tr_title + "," + os.path.join(file_dir, f) + "\n")
        #         continue
        if if_down:
            log.info('\t文件不存在,开始下载 ... {}'.format(file_will_write))
            download_url = 'http://kns.cnki.net/kns' + tr_down_url[2:] + '&dflag=pdfdown'
            log.info('\t下载链接 ... {}'.format(download_url))
            download(tr_title, tr_author, download_url)
            time.sleep(6)