def load_data(path): """ load data from tiny-imagenet note that in validation set, label information is in val_annotations.txt """ train_size = 100000 val_size = 10000 test_size = 10000 # for training data set X_train = np.zeros((train_size, 3, 64, 64), dtype="uint8") # y_train = np.zeros((train_size,), dtype="str") y_train = np.chararray((train_size,), itemsize=10) # for validation data set X_val = np.zeros((val_size, 3, 64, 64), dtype="uint8") # y_val = np.zeros((val_size,), dtype="str") y_val = np.chararray((val_size,), itemsize=10) #path_train = os.path.join(path, 'train') #path_val = os.path.join(path, 'val') print "load training data..." for idx, (label, img) in enumerate(read_files(path,'train')): # reshape (64, 64, 3) -> (3, 64, 64) # gray color image is combined ... e.g. n04366367_182.JPEG # Grey-scale means that all values have the same intensity. Set all channels # (in RGB) equal to the the grey value and you will have the an RGB black and # white image. if img.ndim == 2: img = np.array([img[:, :], img[:, :], img[:, :]]) elif img.ndim == 3: img = np.array([img[:, :, 0], img[:, :, 1], img[:, :, 2]]) X_train[idx, :, :, :] = img y_train[idx] = label # change text label(n04366367, ...) to (0, 1, 2, ...) print "encoding labels for training data..." le = LabelEncoder() y_train = le.fit_transform(y_train) print "load validation data..." for idx, (label, img) in enumerate(read_files(path,'val')): # reshape (64, 64, 3) -> (3, 64, 64) # gray color image is combined ... e.g. n04366367_182.JPEG # Grey-scale means that all values have the same intensity. Set all channels # (in RGB) equal to the the grey value and you will have the an RGB black and # white image. if img.ndim == 2: img = np.array([img[:, :], img[:, :], img[:, :]]) elif img.ndim == 3: img = np.array([img[:, :, 0], img[:, :, 1], img[:, :, 2]]) X_val[idx, :, :, :] = img y_val[idx] = label # change text label(n04366367, ...) to (0, 1, 2, ...) print "encoding labels for validation data..." y_val = le.transform(y_val.tolist()) return le, (X_train, y_train), (X_val, y_val)
def crea_liste(month, year): calendario = calendar.Calendar().itermonthdays(year, month) day = ["Lunedi", "Martedi", "Mercoledi", "Giovedi", "Venerdi", "Sabato", "Domenica"] cont = 0 tupla = [] tupla2 = [] mese = str(month) + "/" + str(year) for i in calendario: tupla.append(i) tupla2.append(day[cont % len(day)]) cont += 1 tupla3 = [] tupla4 = [] for i in xrange(0, len(tupla)): if tupla[i] != 0: tupla3.append(tupla[i]) tupla4.append(tupla2[i]) tupla = None tupla2 = None # Inizializzo la matrice mat = numpy.chararray((len(tupla3), 7)) mat = numpy.chararray(mat.shape, itemsize="40") mat[:] = " " return [tupla4, mat, mese]
def partition(data_set, target_set, training_ratio): # Number of targets (3 targets here, either 'a', 'b', or 'c') n_targets = len(np.unique(target_set)) print "Number of target values: %d" % n_targets # Number of samples taken per target (23 in this example) n_samples = len(data)/n_targets print "Number of samples per target: %d" % n_samples # Size of traning set training_samples = int(round(n_samples * training_ratio)) print "Size of training set: %d" % training_samples # Size of test set test_samples = int(round(n_samples - training_samples)) print "Size of test set: %d" % test_samples # Array to hold the targets for the training set train_target = np.chararray(int(n_targets*training_samples)) # Array to hold the targes for the testing set test_target = np.chararray(int(n_targets*test_samples)) # Matrix to hold the training data train_data = np.empty([len(train_target), len(data[1])]) # Matrix to hold the test data test_data = np.empty([len(test_target), len(data[1])]) # Initialize values for each array/matrix to its corresponding value for target in np.arange(n_targets): for i in np.arange(n_samples): if i<= training_samples-1: train_target[i+(target*training_samples-1)] = target_set[target*n_samples] train_data[i+(target*training_samples-1)] = data_set[i+(target*n_samples-1)] else: test_target[(target*test_samples)+n_samples-i-1] = target_set[target*n_samples] test_data[(target*test_samples)+n_samples-i-1] = data_set[i+(target*n_samples-1)] return train_data, train_target, test_data, test_target
def most_weighted(X, CV, n=10, save=False): """Finds the most weighted words within an array. Args ---- X: ndarray Term-document array with books on each row and words for each column CV: vectorizer object Provides the vocabulary. Can either be a CountVectoriser or TfidfVectoriser. n: int The top n weighted words save: bool Whether to save out Returns ------- out: ndarray Array of Strings """ # You have a 2d array: rows are books, cols are words # You have a vocab dict with 'word': array_index vocab = CV.vocabulary_ # Create zeros array of size vocab vocab_array = np.chararray((len(vocab),), itemsize=18) # Convert dict to an actual 1D array, where you have the right word at the right index for k, v in vocab.items(): vocab_array[v] = k # Get the sorted indices ind = X.argsort(axis=1) out = np.chararray((ind.shape[0], n), itemsize=18) # For each row in ind for i in range(ind.shape[0]): # Grab the row from ind (this is the ordering you need to make it sorted) ind_row = ind[i, :] # Index your 1D words at indexes array with the row from ind - which puts it in order # (basically, sorts according to the counts from X) sorted = vocab_array[ind_row] # Grab the last N values using [-n:] out[i, :] = sorted[-n:] top = pd.DataFrame(out, index=abbrev, columns=np.arange(10, 0, -1)) if save: top.to_csv("top_{}_words.csv".format(n)) return top
def visualize(rings): # the position of each ball is found using the equation of a circle theta = 2 * math.pi / 20 radius = 5 xdata_left = np.zeros(20) ydata_left = np.zeros(20) xdata_right = np.zeros(20) ydata_right = np.zeros(20) plt.clf() # get the positions for the left and right rings for ix in xrange(0,20): xdata_left[ix] = 5 + (radius * math.cos((ix + 5 * math.pi / 6) * theta)) ydata_left[ix] = 5 + (radius * math.sin((ix + 5 * math.pi / 6) * theta)) xdata_right[ix] = 12.08 + (radius * math.cos((ix + 63 * math.pi / 16) * -theta)) ydata_right[ix] = 5.3 + (radius * math.sin((ix + 63 * math.pi / 16) * -theta)) # set up the colors for the left and right ring colors_left = np.chararray(20) colors_right = np.chararray(20) for ix in xrange(0,20): if rings[0][ix] == 0: colors_left[ix] = 'r' elif rings[0][ix] == 1: colors_left[ix] = 'b' elif rings[0][ix] == 2: colors_left[ix] = 'y' elif rings[0][ix] == 3: colors_left[ix] = 'k' if rings[1][ix] == 0: colors_right[ix] = 'r' elif rings[1][ix] == 1: colors_right[ix] = 'b' elif rings[1][ix] == 2: colors_right[ix] = 'y' elif rings[1][ix] == 3: colors_right[ix] = 'k' # plot each ball individually to get the correct color for ix in xrange(0,20): plt.scatter(xdata_left[ix], ydata_left[ix], 1500, colors_left[ix]) plt.scatter(xdata_right[ix], ydata_right[ix], 1500, colors_right[ix]) plt.axis('equal') plt.show() plt.draw()
def bottom_up_longest_subsequence(x, y): m = len(x) n = len(y) cache = numpy.zeros((m + 1, n + 1)) subsequences = numpy.chararray((m + 1, n + 1)) subsequences[:] = "" for i in xrange(1, m + 1): for j in xrange(1, n + 1): up = cache[i - 1][j] left = cache[i][j - 1] if x[i - 1] == y[j - 1]: cache[i][j] = cache[i - 1][j - 1] + 1 subsequences[i][j] = "\\" elif up >= left: cache[i][j] = up subsequences[i][j] = "^" else: cache[i][j] = left subsequences[i][j] = "<" return cache, subsequences
def SAX_coding_function(PAA_in,alphabet_size): #Function to get the PAA coding of data #inputs: #-- PAA_in : Input data processed represented as PAA #-- alphabet_size : Number of levels for the coding # #Output: #-- SAX_result : Result of applying SAX on data #We obtain the SAX cut points by looking to a certain distribution: cut_points = read_levels_distribution(alphabet_size); #We initialize the outputs of SAX: levels and codes SAX_result_level = numpy.empty((PAA_in.size)); SAX_result = numpy.chararray(PAA_in.size); #Equivalence leve-string for SAX: string_equivalent = numpy.array(['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']); #We go through the PAA data and we code it according to SAX: for i in range(PAA_in.size): #We check where, in the borders of the distribution, our sample is: SAX_result_level[i] = numpy.count_nonzero(cut_points <= PAA_in[i]); #We code the level: SAX_result[i] = string_equivalent[SAX_result_level[int(i)]-1]; write_SAX_result(SAX_result); return SAX_result
def LSISort(s): for col in range(s.shape[1]): counts = numpy.zeros(26+1).astype('int') # Get the frequency count of the characters for c in s[:,col]: counts[ord(c) - ord('a') + 1] += 1 # Get cummulative frequencies for i in range(1,27): counts[i] = counts[i-1] + counts[i] # construct a copy res = numpy.chararray(s.shape) # store in sorted order for row in range(s.shape[0]): c = s[row,col] c_ord = ord(c) - ord('a') + 1 res[counts[c_ord-1],:] = s[row,:] counts[c_ord-1] += 1 s = res return res
def cycle_decrypt(key, message, show=False): ''' Perform the decryption algorithm on the given ciphertext. Note, since this is an asymmetric encoding algorithm we need two different operations. :param string: key :param string; message ''' plaintext = '' size = len(key) message_length = len(message) column_length = message_length / size words = [message[x:x + column_length] for x in range(0, len(message), column_length)] grid = np.zeros(((message_length / size), size)) # Create zero grid order = [(ord(key[x]), x) for x in range(size)] order.sort(key=lambda tup: tup[0]) for i in range(size): column = words[i] letters = list(column) position = order[i][1] for j in range(message_length / size): grid[j, position] = ord(letters[j]) for row in grid: chunk = '' for column in row: chunk += chr(int(column)) plaintext += chunk if show: string_grid = np.chararray(grid.shape) for i in range(len(grid)): for j in range(len(grid[0])): string_grid[i, j] = chr(int(grid[i, j])) print string_grid return plaintext
def return_bpt_type(pdata): bpt_flag = np.chararray(pdata['oiii_hb'].shape[0],itemsize=12) #### 50th percentile determinations sf_line1 = 0.61 / (pdata['nii_ha'][:,0] - 0.05) + 1.3 sf_line2 = 0.61 / (pdata['nii_ha'][:,0] - 0.47) + 1.19 composite = (pdata['oiii_hb'][:,0] > sf_line1) & (pdata['oiii_hb'][:,0] < sf_line2) agn = pdata['oiii_hb'][:,0] > sf_line2 #### from the chains for i, (oiii_hb,nii_ha) in enumerate(zip(pdata['oiii_hb_chain'],pdata['nii_ha_chain'])): sf_line1 = 0.61 / (nii_ha - 0.05) + 1.3 sf_line2 = 0.61 / (nii_ha - 0.47) + 1.19 ### 1 sigma composite composite_one = (oiii_hb > sf_line1) & (oiii_hb < sf_line2) if composite_one.sum()/float(composite_one.shape[0]) > 0.16: composite[i] = True ### 1 sigma AGN agn_one = oiii_hb > sf_line2 if agn_one.sum()/float(agn_one.shape[0]) > 0.16: agn[i] = True #continue bpt_flag[:] = 'star-forming' bpt_flag[composite] = 'composite' bpt_flag[agn] = 'AGN' return bpt_flag
def toChararray(arr, aligned=False): arr = array(arr, dtype='|S') try: ndim, dtype_, shape = arr.ndim, arr.dtype, arr.shape except AttributeError: raise TypeError('arr is not a Numpy array') if ndim < 1: raise ValueError('arr.ndim should be at least 1') if dtype_.char != 'S': raise ValueError('arr must be a character array') if ndim != 2: n_seq = shape[0] l_seq = dtype_.itemsize new_arr = chararray((n_seq, l_seq)) for i, s in enumerate(arr): for j in range(l_seq): if j < len(s): new_arr[i, j] = chr2(s[j]) else: if aligned: raise ValueError('arr does not the same lengths') new_arr[i, j] = '.' else: new_arr = array(arr, dtype='|S1') return new_arr
def main(): """ Updates given HDF5 with readme text provided in a text file. Text gets saved as attribute "readme" in the root group. """ parser = argparse.ArgumentParser() parser.add_argument("--h5file", help="HDF5 File to be updated") parser.add_argument("--readme", help="Text file with readme content") args = parser.parse_args() if not args.h5file: print("No HDF5 given") return -1 if not args.readme: print("No readme file given") return -1 f = h5py.File(args.h5file, 'a') with open(args.readme, 'r', encoding="latin-1") as readme_file: text = readme_file.read() char_array = np.chararray((), itemsize=len(text)) char_array[()] = text #print(char_array) f.attrs.create('readme', char_array) f.close() print("bye")
def create_iso(fileList, ageList, rot=True): """ Given a set of isochrone files downloaded from http://obswww.unige.ch/Recherche/evoldb/index/Isochrone/, put in correct iso.fits format for parse_iso code. fileList: list of downloaded isochrone files (could be one) ageList: list of lists of ages associated with each file in filelist. MUST BE IN SAME ORDER AS ISOCHRONES IN FILE! Also needs to be in logAge rot = TRUE: assumes that models are rotating, will add appropriate column This code writes the individual files, which is then easiest to combine by hand in aquamacs """ # Read each file in fileList individually, add necessary columns for i in range(len(fileList)): t = Table.read(fileList[i],format='ascii') ages = ageList[i] # Find places where new models start; mass here is assumed to be 0.8 start = np.where(t['M_ini'] == 0.8) # Now, each identified start is assumed to be associated with the # corresponding age in ages if len(start[0]) != len(ages): print 'Ages mismatched in file! Quitting...' return age_arr = np.zeros(len(t)) for j in range(len(start[0])): low_ind = start[0][j] # Deal with case at end of file if (j == len(start[0])-1): high_ind = len(t) else: high_ind = start[0][j+1] ind = np.arange(low_ind, high_ind, 1) age_arr[ind] = ages[j] # Add ages_arr column to column 1 in ischrone, as well as column # signifying rotation col_age = Column(age_arr, name = 'logAge') rot_val = np.chararray(len(t)) rot_val[:] = 'r' if not rot: rot_val[:] = 'n' col_rot = Column(rot_val, name='Rot') t.add_column(col_rot, index=0) t.add_column(col_age, index=0) t.write('tmp'+str(i)+'.fits') return
def __init__(self, filename): """ Args: filename_list (str): list of strings with filenames. These filenames are expected to be in the FITS format for targets. """ hdulist = fits.open(filename) self.filename = filename self.ra = hdulist[1].data['RA'] self.dec = hdulist[1].data['DEC'] self.type = hdulist[1].data['OBJTYPE'] self.id = np.int_(hdulist[1].data['TARGETID']) self.tile_ra = hdulist[1].header['TILE_RA'] self.tile_dec = hdulist[1].header['TILE_DEC'] self.tile_id = hdulist[1].header['TILE_ID'] self.n = np.size(self.ra) fc = desimodel.focalplane.FocalPlane(ra=self.tile_ra, dec=self.tile_dec) self.x, self.y = fc.radec2xy(self.ra, self.dec) # this is related to the fiber assignment self.fiber = -1.0 * np.ones(self.n, dtype='i4') # This section is related to the number of times a galaxy has been observed, # the assigned redshift and the assigned type self.n_observed = np.zeros(self.n, dtype='i4') self.assigned_z = -1.0 * np.ones(self.n) self.assigned_type = np.chararray(self.n, itemsize=8) self.assigned_type[:] = 'NONE'
def test__try_fl_mv_right__try_intify_mv_left(self): cs = ColSplitter() cs._token_col_lengths = [-1, -1, 3] charr = np.chararray((3, 3), 5) charr[0, 0] = cs._null charr[0, 1] = '1.0' charr[0, 2] = cs._null charr[1, 0] = cs._null charr[1, 1] = '12.0' charr[1, 2] = cs._null charr[2, 0] = cs._null charr[2, 1] = '0.34' charr[2, 2] = cs._null res = cs._try_fl_mv_right__try_intify_mv_left(charr, 0, 1) self.assertEqual(cs._null, res[0, 0]) self.assertEqual(cs._null, res[0, 1]) self.assertEqual(b'1.0', res[0, 2]) res = cs._try_fl_mv_right__try_intify_mv_left(charr, 1, 1) self.assertEqual(b'12', res[1, 0]) self.assertEqual(cs._null, res[1, 1]) self.assertEqual(cs._null, res[1, 2]) res = cs._try_fl_mv_right__try_intify_mv_left(charr, 2, 1) self.assertEqual(cs._null, res[2, 0]) self.assertEqual(b'0.34', res[2, 1]) self.assertEqual(cs._null, res[2, 2])
def test__merge_cols(self): cs = ColSplitter() cs._token_col_types = [cs._int, cs._float] cs._token_col_lengths = [-1, -1] charr = np.chararray((7, 2), 5) charr[0, 0] = cs._null charr[1, 0] = '23' charr[2, 0] = cs._null charr[3, 0] = cs._null charr[4, 0] = '42' charr[5, 0] = '123' charr[6, 0] = cs._null charr[0, 1] = '12.0' charr[1, 1] = cs._null charr[2, 1] = '13.0' charr[3, 1] = cs._null charr[4, 1] = cs._null charr[5, 1] = cs._null charr[6, 1] = cs._null res = cs._merge_cols(charr) # self.assertEqual((5, 1), res.shape) self.assertEqual(b'12', res[0, 0]) self.assertEqual(b'23', res[1, 0]) self.assertEqual(b'13', res[2, 0]) self.assertEqual(cs._null, res[3, 0]) self.assertEqual(b'42', res[4, 0])
def depth_count(): ops = ['+','-','*','/','^'] parantheses=['(',')'] input = raw_input('*') j=0 k=0 l=0 processed = np.chararray((len(input),1)) oplist = [] parlist = [] for e in input: if e in ops: oplist.append(e) processed[j,0]=k k = k + 1 else: if e in parantheses: oplist.append(0) parlist.append(e) processed[j,0]=l l = l + 1 else: processed[j,0]=e j=j+1 print processed print oplist print parlist return k
def _get_dates(self, uid,lim): locations_index = np.transpose(self._locations[:, USER_ID] == int(uid)) date_locations = self._locations[locations_index, :] user_locations = date_locations[:, (LATITUDE, LONGITUDE,ORDINAL_DATE)] user_location_size=np.shape(user_locations) datedanszone=[] for i in range(int(user_location_size[0])): contains= self._inside(lim,user_locations[i,0],user_locations[i,1]) if contains: datedanszone.append(user_locations[i,2]) datedanszone=list(set(datedanszone)) if len(datedanszone)==0: return {} datematrix=np.chararray((len(datedanszone),2),itemsize=30) week=['Lundi','Mardi','Mercredi','Jeudi','Vendredi','Samedi','Dimanche'] month=['Janvier','Fevrier','Mars','Avril','Mai','Juin','Juillet','Aout','Septembre','Octobre','Novembre','Decembre'] i=0 for d in datedanszone: datematrix[i,0]=int(d) datedisp=dt.datetime.fromordinal(int(d)) datematrix[i,1]=week[datedisp.weekday()]+" "+str(datedisp.day)+" "+month[datedisp.month-1]+" "+str(datedisp.year) i=i+1 return { "availableOptionsForDate": [{"date":d,"datedisp":dd} for d,dd in datematrix ], "selectedOptionfordate": {"date":datedanszone[0]} }
def jdToFull (jd, form='H'): """Return a textual representation of a Julian date. :arg double jd: a Julian date :arg character form: the output format, described below. Defaults to "H". :returns: the textualization of the Julian date :raises: :exc:`MiriadError` in case of buffer overflow (should never happen) The possible output formats are: ========== ==================================== Character Result ========== ==================================== *H* "yyMONdd:mm:mm:ss.s" ("MON" is the three-letter abbreviation of the month name.) *T* "yyyy-mm-ddThh:mm:ss.s" (The "T" is literal.) *D* "yyMONdd.dd" *V* "dd-MON-yyyy" (loses fractional day) *F* "dd/mm/yy" (loses fractional day) ========== ==================================== """ calday = N.chararray (120) _miriad_f.julday (jd, form, calday) for i in xrange (calday.size): if calday[i] == '': return calday[:i].tostring () raise MiriadError ('Output from julday exceeded buffer size')
def main(): Data() # Initialize data set attribute_indices = np.arange(1, 23) obj = np.arange(4062) tree = build_dt(attribute_indices, obj) if Data.display_tree_flag: tree_list_by_level = [] list_decision_tree(tree_list_by_level, tree, 1) display_decision_tree(tree_list_by_level) test_num = 2031 result = np.chararray(test_num) test_obj = np.arange(test_num) classify(tree, test_obj, result) if Data.mode == 'i': print "Entropy,", else: print "Misclassification Error,", print "Confidence Level: {}".format(args.confidence_level), if not Data.validation_flag: print "Accuracy: {}".format(np.sum(result == Data.test[test_obj, 0]) / float(test_num)) else: print "Result printed in validation_result.txt." f = open('validation_result.txt', 'w') for x in result: print >>f, x f.close()
def combineTechnicalIndicators(ticker): dates, prices = getDateAndPrice(ticker) np_dates = np.chararray(len(dates), itemsize=len(dates[0])) for day in range(len(dates)): np_dates[day] = dates[day] percentChange = calcDailyPercentChange(prices) vol = calc30DayVol(percentChange) RSI = calcRSI(prices) if ticker == PREDICTED: np_prices = np.array(prices) label = np.zeros_like(np_prices) #create label for price of SPY for x in range(len(np_prices[:-lagTime])): print x if np_prices[x] < np_prices[x + lagTime]: label[x] = 1 else: label[x] = 0 features = np.column_stack((np_dates, percentChange, vol, RSI, label)) headers = ['date', 'return_'+ ticker, 'vol_'+ ticker, 'RSI_'+ ticker, 'label'] else: features = np.column_stack((np_dates, percentChange, vol, RSI)) headers = ['date', 'return_'+ ticker, 'vol_'+ ticker, 'RSI_'+ ticker] df_features = pd.DataFrame(features, columns=headers) print df_features[25:35] return df_features
def __init__(self,cards,parent=None): super(header_view,self).__init__(parent) self.setupUi(self) font = QFont("Courier",11) font.setFixedPitch(1) self.cardlist.setFont(font) key = [] keymax = 0 value = [] extra = [] for index in range(len(cards)): item = cards[index] #item = str(item).split('=') one = str(item[0]).strip() two = str(item[1]).strip() try: extra.append(str(item[2]).strip()) except: extra.append('') key.append(one) value.append(two) if len(one) > keymax: keymax = len(one) for k in range(len(key)): string = np.chararray(1,keymax+5-len(key[k])) string=(keymax+5-len(key[k]))*' ' res = key[k]+str(string)+'= '+value[k]+' / '+extra[k] temp = QListWidgetItem(res) temp.setTextAlignment(1) self.cardlist.addItem(temp) QObject.connect(self.okbutton,SIGNAL('clicked()'),self.close) self.exec_()
def keyIndexSort(s): counts = numpy.zeros(26+1).astype('int') # Get the frequency count of the characters for c in s: counts[ord(c) - ord('a') + 1] += 1 # Get cummulative frequencies for i in range(1,27): counts[i] = counts[i-1] + counts[i] # construct a copy res = numpy.chararray(len(s)) # store in sorted order for c in s: c_ord = ord(c) - ord('a') + 1 res[counts[c_ord-1]] = c counts[c_ord-1] += 1 if type(s) == str: res = res.tostring() return res
def calculateAllelicCount(self): self.appLogger.info("Calculating Allelic counts") sequenceDescriptions=self.msa.keys() sequenceSize=0; nSequences=0; nSequences=len(sequenceDescriptions) for sequence in self.msa: sequenceSize=max(sequenceSize,len(self.msa[sequence])) matrix=np.chararray((nSequences,sequenceSize), itemsize=1) for row in range(0,nSequences): key=sequenceDescriptions[row] seq=self.msa[key] for pos in range(0, sequenceSize): matrix[row,pos]=seq[pos] self.appLogger.warning("Sequence size: {} and matrix cols: {}".format(sequenceSize,matrix.shape[1])) A=np.zeros(sequenceSize); C=np.zeros(sequenceSize); T=np.zeros(sequenceSize); G=np.zeros(sequenceSize); N=np.zeros(sequenceSize); GAP=np.zeros(sequenceSize); TOTAL=np.zeros(sequenceSize) for pos in range(0,sequenceSize): c=Counter(matrix[:,pos]) A[pos]=c["A"] C[pos]=c["C"] T[pos]=c["T"] G[pos]=c["G"] N[pos]=c["N"] GAP[pos]=c["-"] if A[pos] > 0: TOTAL[pos]+=1 if C[pos] > 0: TOTAL[pos]+=1 if T[pos] > 0: TOTAL[pos]+=1 if G[pos] > 0: TOTAL[pos]+=1 return A,C,G,T,N,GAP,TOTAL,nSequences,sequenceSize
def llz2utm(lon,lat,projection_zone='None'): ''' Convert lat,lon to UTM ''' from numpy import zeros,where,chararray import utm from pyproj import Proj from scipy.stats import mode x=zeros(lon.shape) y=zeros(lon.shape) zone=zeros(lon.shape) b=chararray(lon.shape) if projection_zone==None: #Determine most suitable UTM zone for k in range(len(lon)): #x,y,zone[k],b[k]=utm.from_latlon(lat[k],lon[k]-360) x,y,zone[k],b[k]=utm.from_latlon(lat[k],lon[k]) zone_mode=mode(zone) i=where(zone==zone_mode)[0] letter=b[i[0]] z=str(int(zone[0]))+letter else: z=projection_zone print z p = Proj(proj='utm',zone=z,ellps='WGS84') x,y=p(lon,lat) return x,y
def findIndex(ids, bedids): N1 = ids.shape[0] com1 = SP.chararray(N1, itemsize=30) #com1 = {} N2 = bedids.shape[0] com2 = {} for i in range(N1): com1[i] = ids[i,0] + "_" + ids[i,1] #com1[ ids[i,0] + "_" + ids[i,1] ] = i for i in range(N2): com2[ bedids[i,0] + "_" + bedids[i,1] ] = i if (N1 <= N2): index = SP.zeros(N1) count = 0 for i in range(N1): try: ind = com2[ com1[i] ] except KeyError: continue index[count] = ind count = count + 1 else: index = SP.zeros(N2) count = 0 for i in range(N2): try: ind = com2[ com1[i] ] except KeyError: continue index[count] = ind count = count + 1 index = index[0:count] #index = index[index != -1] return index.astype('I')
def generateData(fname,bidders,test=False): data=pd.read_csv(fname) data=data.values data_x=np.zeros((len(data),feat_size)) data_y=np.zeros(len(data)) id=np.chararray(len(data),itemsize=37) for i in tqdm(range(len(data))): gc.collect() bidder_name=data[i][0] id[i]=bidder_name if bidder_name in bidders: bid_info=bidders[bidder_name] data_x[i,:]=bidderFeatEng(bid_info) ''' for j in range(min(len(bid_info),feat_size)): idx = 7*j for k in range(7): data_x[i,idx+k]=bid_info[j][k+1] ''' if not test: data_y[i]=data[i][3] if not test: return data_x,data_y,id else: return data_x,id
def main(): args = parse_args() N = not args.N out = args.out #out='/home/vorberg/test.psc' #N=1000 msa = np.chararray((N, 4)) set_1 = [io.AMINO_ACIDS[a] for a in np.random.choice(range(1, 11), N/2)] set_2 = [io.AMINO_ACIDS[a] for a in np.random.choice(range(11, 21), N/2)] set_3 = [io.AMINO_ACIDS[21-io.AMINO_INDICES[a]] for a in set_2] set_4 = [io.AMINO_ACIDS[21-io.AMINO_INDICES[a]] for a in set_1] msa[:, 0] = set_4 + set_3 msa[:, 1] = set_1 + set_2 msa[:, 2] = set_2 + set_1 msa[:, 3] = set_3 + set_4 np.savetxt(out, msa, delimiter='', newline='\n', fmt='%s')
def _setup_krd(*args): krd = np.chararray((10, 8), order='F') for i in xrange(10): _chrcopy(krd, i, " ") for i, arg in enumerate(args): _chrcopy(krd, i, arg) return krd
def levelGen(size): world = np.chararray((size,size)) world[:]='.' world[0,]='|' world[-1,]='|' world[:,0]='|' world[:,-1]='|' def r(): return random.randint(1,size-2) area = size**2 density = float(world.count('|').sum()-size*4)/float(area) ''' while density <0.15: x,y=r(),r() for i in range(random.randint(1,7)): world[x%size,y%size]='|' next = random.choice([-1,1]) if random.randint(0,1): x+=next else: y+=next density = float(world.count('|').sum()-size*4)/float(area) ''' #sets spawnpoint world[r(),r()]=random.choice(['N','E','S','W']) world= '\n'.join(map(''.join,world.tolist())) return world
from sklearn.ensemble import GradientBoostingClassifier warnings.filterwarnings("ignore", category=DeprecationWarning) did = sys.argv[1] did = str(did) genre_list = ['R&B', 'Country', 'Rap', 'Reggae', 'Religious', 'Metal'] sample_song_dataset = pickle.load( open('song_vectors_for_genre_dataset_' + did + '.pickle', 'rb')) train = pickle.load(open("../dataset/80_20_train_" + did + ".pickle", "rb")) test = pickle.load(open("../dataset/80_20_test_" + did + ".pickle", "rb")) id_detail = pickle.load(open("id_detail_" + did + ".pickle", "rb")) count = 0 ab = np.random.rand(len(sample_song_dataset['train']), 300) label = np.chararray(len(sample_song_dataset['train']), itemsize=9) label[:] = '' for songId in sample_song_dataset['train']: j = 0 for feature in sample_song_dataset['train'][songId]['vector']: ab[count][j] = feature j += 1 label[count] = train[id_detail[count]]['genre'] count += 1 clf = SVC(random_state=10) clf.fit(ab, label) print("Total Count:" + str(count) + " Original Count:" + str(len(sample_song_dataset['train'])))
n1 = n[:,1].reshape(row_no, col_no) n2 = n[:,2].reshape(row_no, col_no) n3 = n[:,3].reshape(row_no, col_no) n0[n0 == 80] = 0 n1[n0 == 80] = 0 n2[n0 == 80] = 0 n3[n0 == 80] = 0 ns = (n0 + n1 + n2 + n3) / 4 nm = np.maximum.reduce([n0,n1,n2,n3]) height = n0.shape[0] width = n0.shape[1] best = np.chararray((height, width), itemsize=1, unicode=True) max_q = np.zeros((height, width)) print n0.shape print best.shape for i in xrange(height): for j in xrange(width): if n0[i,j] == 80 or n0[i,j] == -10: best[i,j] = 'B' continue up = n0[i,j] left = n1[i,j] down = n2[i,j] right = n3[i,j] highest = max(up,left,down,right)
def plot_raters(dataframe, ax=None, width=101, size=0.40): raters = sorted(dataframe.columns.ravel().tolist()) dataframe["notnan"] = np.any(np.isnan(dataframe[raters]), axis=1).astype(int) dataframe = dataframe.sort_values(by=["notnan"] + raters, ascending=True) for rater in raters: dataframe[rater] = dataframe[[rater]].astype(str) matrix = dataframe.as_matrix() nsamples, nraters = dataframe.shape matrix = fill_matrix(matrix, width) nblocks = 1 if matrix.shape[0] > width: matrices = [] nblocks = (matrix.shape[0] // width) + 1 nas = np.chararray((width, 1), itemsize=3) nas[:] = "n/a" for i in range(nblocks): if i > 0: matrices.append(nas) start_index = i * width stop_index = (i + 1) * width matrices.append(matrix[start_index:stop_index, ...]) matrices[-1] = fill_matrix(matrices[-1], width) matrix = np.hstack(tuple(matrices)) palette = { "1.0": "limegreen", "0.0": "dimgray", "-1.0": "tomato", "n/a": "w", } ax = ax if ax is not None else plt.gca() # ax.patch.set_facecolor('gray') ax.set_aspect("equal", "box") ax.xaxis.set_major_locator(plt.NullLocator()) ax.yaxis.set_major_locator(plt.NullLocator()) nrows = ((nsamples - 1) // width) + 1 xlims = (-14.0, width) ylims = (-0.07 * nraters, nrows * nraters + nraters * 0.07 + (nrows - 1)) ax.set_xlim(xlims) ax.set_ylim(ylims) offset = 0.5 * (size / 0.40) for (x, y), w in np.ndenumerate(matrix): if w not in list(palette.keys()): w = "n/a" color = palette[w] rect = plt.Circle([x + offset, y + offset], size, facecolor=color, edgecolor=color) ax.add_patch(rect) # text_x = ((nsamples - 1) % width) + 6.5 text_x = -8.5 for i, rname in enumerate(raters): nsamples = sum(dataframe[rname] != "n/a") good = 100 * sum(dataframe[rname] == "1.0") / nsamples bad = 100 * sum(dataframe[rname] == "-1.0") / nsamples text_y = 1.5 * i + (nrows - 1) * 2.0 ax.text( text_x, text_y, "%2.0f%%" % good, color="limegreen", weight=1000, size=16, horizontalalignment="right", verticalalignment="center", transform=ax.transData, ) ax.text( text_x + 3.50, text_y, "%2.0f%%" % max((0.0, 100 - good - bad)), color="dimgray", weight=1000, size=16, horizontalalignment="right", verticalalignment="center", transform=ax.transData, ) ax.text( text_x + 7.0, text_y, "%2.0f%%" % bad, color="tomato", weight=1000, size=16, horizontalalignment="right", verticalalignment="center", transform=ax.transData, ) # ax.autoscale_view() ax.invert_yaxis() plt.grid(False) # Remove and redefine spines for side in ["top", "right", "bottom"]: # Toggle the spine objects ax.spines[side].set_color("none") ax.spines[side].set_visible(False) ax.spines["left"].set_linewidth(1.5) ax.spines["left"].set_color("dimgray") # ax.spines["left"].set_position(('data', xlims[0])) ax.set_yticks([0.5 * (ylims[0] + ylims[1])]) ax.tick_params(axis="y", which="major", pad=15) ticks_font = FontProperties( family="FreeSans", style="normal", size=20, weight="normal", stretch="normal", ) for label in ax.get_yticklabels(): label.set_fontproperties(ticks_font) return ax
def world(n, m): grid = np.chararray((n, m)) grid = [['-' for j in i] for i in grid] return grid
#converting it to a dictionary and storing it to a list #tweets = [i.asdict() for i in t] #print tweets[0] pub_tweets = [] for tweet in tweets: pub_tweets.append(tweet.text) #pub_tweets =public_tweets.encode('ascii','ignore') print pub_tweets tweet_count = len(pub_tweets) senti = np.chararray((1, tweet_count), itemsize=8) emotion = np.empty([1, tweet_count]) for i in range(tweet_count): analysis = TextBlob(pub_tweets[i]) emotion[0, i] = analysis.sentiment.polarity #storing polarity for tweets #sentiment according to polarity thresholds of negative, 0 and positive #values respectively if emotion[0, i] < 0: senti[0, i] = 'negative' elif emotion[0, i] == 0: senti[0, i] = 'neutral' else: senti[0, i] = 'positive'
Counting fields starts from 1. """ while True: try: game_size = int(raw_input("Size of game field: ")) break except ValueError: print "You have to give an intiger!" minesweeper_field = np.random.randint(6, size=(game_size, game_size)) for x in np.nditer(minesweeper_field, op_flags=['readwrite']): if x > 1: x[...] = 0 game_filed = np.chararray((game_size, game_size)) game_filed[:] = "?" while True: print game_filed chosen_field = (raw_input("Chose filed> ")).split(",") chosen_field[0] = int(chosen_field[0]) - 1 chosen_field[1] = int(chosen_field[1]) - 1 if minesweeper_field[chosen_field[0], chosen_field[1]] == 1: print "GAME OVER!" print minesweeper_field break try:
def __init__(self, x, y): self._x = x self._y = y self._image = np.chararray((x, y)) self._color = '' self.can_die = False
# crater name and buffer extent fieldname1 = arcpy.ValidateFieldName("CRATER_ID") fieldname2 = arcpy.ValidateFieldName("BUFFER_TXT") # add fields arcpy.AddField_management(infile, fieldname1, "TEXT", "", "", 30) # get the number of rows in infile n = int(arcpy.GetCount_management(infile)[0]) # prepare empty arrays diam = np.ones(n) x_coord = np.ones(n) y_coord = np.ones(n) crater_id = np.chararray(n, itemsize=30) buffer_txt = np.chararray(n, itemsize=30) #crater_id_list = ['flamsteed_s', 'herigonius_k'] #crater_id_list = ['flamsteed_s', 'herigonius_k', 'unnamed_0000' ,'encke_x', # 'lassell_d','unnamed_0001','samir','unnamed_0002','unnamed_0003', # 'unnamed_0004','unnamed_0005','unnamed_0006','unnamed_0007','unnamed_0008', # 'unnamed_0009','unnamed_0010','unnamed_0011','unnamed_0012','unnamed_0013'] #crater_id = np.array(crater_id_list) with arcpy.da.UpdateCursor( infile, ["Diam_km", "CRATER_ID", "x_coord", "y_coord"]) as cursor: ix = 0 for row in cursor:
csv_as_list = list(reader) # reader = unicode_csv_reader(open(source_data_path)) # csv_as_list = list(reader) return np.asarray(csv_as_list) csv_as_ndarray = open_csv_as_numpy() csv_as_df = pd.DataFrame(csv_as_ndarray[1:]).sort_values([2]) size = csv_as_df.groupby(2).count().min()[0] # sample size replace = False # with replacement fn = lambda obj: obj.loc[np.random.choice(obj.index, size, replace),:] csv_as_df = csv_as_df.groupby(2, as_index=False).apply(fn) csv_as_ndarray = np.vstack((csv_as_ndarray[0], csv_as_df)) new_shape = [batch_size, class_names.shape[0]] formatted_ndarray = np.chararray(new_shape) formatted_ndarray[:] = '0' num_of_batches = int(math.ceil(csv_as_ndarray.shape[0] / float(batch_size))) fout = open(output_path, 'ab') for i in range(num_of_batches - 1): lo = i*batch_size if i < num_of_batches - 1: hi = (i+1)*batch_size elif i == num_of_batches - 1: hi = csv_as_ndarray.shape[0] output_buffer = formatted_ndarray[:hi-lo].astype('U256') if i == 0: output_buffer[0] = np.array(class_names).astype('U256') for j in range(0, class_names.shape[0]): output_buffer[csv_as_ndarray[lo:hi,2] == class_names[j], j] = '1' # csv_as_ndarray = np.resize(csv_as_ndarray, new_shape)
def main(inimgtemplate, inbgrcsv, inshape, outimg): inDS = gdal.Open(inimgtemplate, gdal.GA_ReadOnly) gt = inDS.GetGeoTransform() ns = inDS.RasterXSize nl = inDS.RasterYSize print(gt) tabdata = np.genfromtxt(inbgrcsv, dtype=[('names', '|S43'), ('blue', 'f8'), ('green', 'f8'), ('red', 'f8')], delimiter=',', skip_header=0) tabnames = np.chararray(len(tabdata), itemsize=10) for d in range(len(tabdata)): tabnames[d] = tabdata['names'][d][0:9] newstuff = np.zeros((2, len(tabdata)), dtype=np.int64) shp = ogr.Open(inshape) lyr = shp.GetLayer() numfeat = lyr.GetFeatureCount() ## if input image already exists, read it and update, ## otherwise, Create output image if (os.path.isfile(outimg)): outDS = gdal.Open(outimg, gdal.GA_Update) band1 = outDS.GetRasterBand(1) band2 = outDS.GetRasterBand(2) band3 = outDS.GetRasterBand(3) band4 = outDS.GetRasterBand(4) band5 = outDS.GetRasterBand(5) band6 = outDS.GetRasterBand(6) blue = band1.ReadAsArray() green = band2.ReadAsArray() red = band3.ReadAsArray() bluesd = band4.ReadAsArray() greensd = band5.ReadAsArray() redsd = band6.ReadAsArray() else: drv = gdal.GetDriverByName('GTiff') outDS = drv.Create(outimg, xsize=inDS.RasterXSize, ysize=inDS.RasterYSize, \ bands=inDS.RasterCount * 2, eType=gdal.GDT_Float32, options=["COMPRESS=LZW"]) outDS.SetProjection(inDS.GetProjection()) outDS.SetGeoTransform(inDS.GetGeoTransform()) blue = np.zeros((inDS.RasterYSize, inDS.RasterXSize), dtype=np.float32) green = np.zeros((inDS.RasterYSize, inDS.RasterXSize), dtype=np.float32) red = np.zeros((inDS.RasterYSize, inDS.RasterXSize), dtype=np.float32) bluesd = np.zeros((inDS.RasterYSize, inDS.RasterXSize), dtype=np.float32) greensd = np.zeros((inDS.RasterYSize, inDS.RasterXSize), dtype=np.float32) redsd = np.zeros((inDS.RasterYSize, inDS.RasterXSize), dtype=np.float32) ## for each point ASD feature, match the root name to the list and get ## Blue, Green, and Red values to insert into the pixel at its location. pix = np.zeros(numfeat, dtype=np.int64) lin = np.zeros(numfeat, dtype=np.int64) textit = np.chararray(numfeat, itemsize=11) featnames = [] for featnum in range(numfeat): feat = lyr.GetNextFeature() featnames.append((feat.GetField("specname"))[0:9]) geom = feat.GetGeometryRef() xval = geom.GetX() yval = geom.GetY() pix[featnum] = math.floor((xval - gt[0]) / gt[1]) lin[featnum] = math.floor((yval - gt[3]) / gt[5]) textit[featnum] = ("%05d %05d" % (pix[featnum], lin[featnum])) uniqrowcol, uniqind = np.unique(textit, return_index=True) templist = [] for t, k in enumerate(uniqrowcol.tolist()): pixlin = [int(k.split()[0].decode()), int(k.split()[1].decode())] ## ind = np.logical_and(np.equal(pix, pixlin[0]), np.equal(lin, pixlin[1])) ## numvals = ind.sum() print(pixlin) set1 = np.char.equal(k, textit) setfeatnames = np.asarray(featnames)[set1] pixlistblue = [] pixlistgreen = [] pixlistred = [] for thename in setfeatnames.tolist(): for j, tabrow in enumerate(tabnames): if (tabrow.decode() == thename): pixlistblue.append(tabdata['blue'][j]) pixlistgreen.append(tabdata['green'][j]) pixlistred.append(tabdata['red'][j]) break print(k, len(pixlistblue)) meanvalblue = np.mean(np.asarray(pixlistblue)) sdvalblue = np.std(np.asarray(pixlistblue)) meanvalgreen = np.mean(np.asarray(pixlistgreen)) sdvalgreen = np.std(np.asarray(pixlistgreen)) meanvalred = np.mean(np.asarray(pixlistred)) sdvalred = np.std(np.asarray(pixlistred)) blue[pixlin[1], pixlin[0]] = meanvalblue green[pixlin[1], pixlin[0]] = meanvalgreen red[pixlin[1], pixlin[0]] = meanvalred bluesd[pixlin[1], pixlin[0]] = sdvalblue greensd[pixlin[1], pixlin[0]] = sdvalgreen redsd[pixlin[1], pixlin[0]] = sdvalred shp, lyr = None, None print("All point features processed") band1 = outDS.GetRasterBand(1) band1.SetNoDataValue(0.0) band1.WriteArray(blue) band2 = outDS.GetRasterBand(2) band2.SetNoDataValue(0.0) band2.WriteArray(green) band3 = outDS.GetRasterBand(3) band3.SetNoDataValue(0.0) band3.WriteArray(red) band4 = outDS.GetRasterBand(4) band4.SetNoDataValue(0.0) band4.WriteArray(bluesd) band5 = outDS.GetRasterBand(5) band5.SetNoDataValue(0.0) band5.WriteArray(greensd) band6 = outDS.GetRasterBand(6) band6.SetNoDataValue(0.0) band6.WriteArray(redsd) band1, band2, band3, band4, band5, band6 = None, None, None, None, None, None inDS, outDS = None, None
#!/usr/bin/python import time import sys import numpy as np # YOUR FUNCTIONS GO HERE ------------------------------------- # 1. Populate the scoring matrix and the backtracking matrix seq1 = 'AC' seq2 = 'AG' score_matrix = np.zeros((len(seq2) + 1, len(seq1) + 1), dtype=int) backtrack_matrix = np.chararray((len(seq2) + 1, len(seq1) + 1), unicode=True) backtrack_matrix[:] = 'D' def populate(seq1, seq2): global backtrack_matrix global score_matrix for i in range(len(seq2) + 1): for j in range(len(seq1) + 1): if i == 0 and j == 0: score_matrix[i][j] = 0 backtrack_matrix[i][j] = 'E' elif i == 0 and j != 0: score_matrix[i][j] = -2 * j backtrack_matrix[i][j] = 'L'
#train=np.loadtxt("blend_train7.txt")[:,0:18] #test=np.loadtxt("blend_pred7.txt")[:,0:18] train = np.loadtxt("blend_train8_50.txt") test = np.loadtxt("blend_pred8_50.txt") target = pd.read_csv('./target.csv', index_col=0) submission = pd.read_csv('./submission.csv') est = xgb.XGBClassifier(max_depth=7, learning_rate=0.023577270815059184, n_estimators=189, gamma=0.074789906865142142, min_child_weight=3.066587035368701, subsample=0.49697592546415631, colsample_bytree=0.95166695052920525, reg_alpha=0.20645978460961734, objective='multi:softmax') est.fit(train, target['status_group']) pred = est.predict(test) importances = est.booster().get_fscore() sorted_imp = sorted(importances.items(), key=operator.itemgetter(1)) output = np.chararray(len(pred), itemsize=30) output[pred == 0] = 'functional' output[pred == 1] = 'functional needs repair' output[pred == 2] = 'non functional' submission['status_group'] = output submission.to_csv('output.csv', index=False) etime = float(time.time() - stime)
def catalogue(refresh=None, **kwargs): """ Read UPSCALE catalogue for JASMIN >>> cat = catalogue(Experiment='present_climate', Frequency='monthly') refresh = True: refresh CMIP5 cataloge >>> cat = catalogue(refresh=True, Experiment='future_climate', Frequency='6hourly') """ ### Location of catologue file cat_file = baspy_path + '/upscale_catalogue.npy' ### If cat_file does not exist, then set refresh=True if (os.path.isfile(cat_file) == False): print( "Catalogue of data files does not exist, this may be the first time you've run this code" ) print("Building catalogue now... this could take a few minutes") refresh = True if (refresh == True): ### Get all paths for all model data dirs = glob.glob(upscale_dir + '/GA3/*/*/*/m??s??i???/*') dirs = filter(lambda f: os.path.isdir(f), dirs) ### Convert list to numpy array dirs = np.array(dirs, dtype=str) ### Keep only dirs which include standard UPSCALE job IDs GA3_job_ids = [ 'xhqij', 'xhqik', 'xhqil', 'xhqin', 'xhqio', 'xgxqo', 'xgxqp', 'xgxqq', 'xgxqe', 'xgxqf', 'xgxqg', 'xgxqh', 'xgxqi', 'xhqir', 'xhqis', 'xgyip', 'xgyid', 'xgyie', 'xgyif', 'xgxqk', 'xgxql', 'xgxqm' ] keep_dirs = [] for dir in dirs: for jobid in GA3_job_ids: if jobid in dir: keep_dirs.append(dir) dirs = keep_dirs ### setup character arrays GA_str = np.chararray(len(dirs), itemsize=14) exp_str = np.chararray(len(dirs), itemsize=16) res_str = np.chararray(len(dirs), itemsize=14) freq_str = np.chararray(len(dirs), itemsize=14) stash_str = np.chararray(len(dirs), itemsize=14) job_str = np.chararray(len(dirs), itemsize=14) for i in range(0, len(dirs)): split_str = re.split('/', dirs[i]) GA_str[i] = split_str[4] exp_str[i] = split_str[5] res_str[i] = split_str[6] freq_str[i] = split_str[7] stash_str[i] = split_str[8] job_str[i] = split_str[9] dt = np.dtype([('GA', '|S14'), ('Experiment', '|S16'), ('Resolution', '|S14'), ('Frequency', '|S14'), ('Stash', '|S14'), ('JobID', '|S14')]) a = np.zeros(len(dirs), dt) a['GA'] = GA_str a['Experiment'] = exp_str a['Resolution'] = res_str a['Frequency'] = freq_str a['Stash'] = stash_str a['JobID'] = job_str np.save(cat_file, a) ### Read catalogue cat = np.load(cat_file) ### Filter data names = kwargs.viewkeys() for name in names: uniq_label = np.unique(cat[name]) cat_bool = np.zeros(len(cat), dtype=bool) vals = kwargs[name] if (vals.__class__ == str): vals = [vals] for val in vals: if (val not in uniq_label): raise ValueError(val + ' not found. See available: ' + np.array_str(uniq_label)) cat_bool = np.add(cat_bool, (cat[name] == val)) cat = cat[cat_bool] return cat
def test_unique_1d(self): def check_all(a, b, i1, i2, c, dt): base_msg = 'check {0} failed for type {1}' msg = base_msg.format('values', dt) v = unique(a) assert_array_equal(v, b, msg) msg = base_msg.format('return_index', dt) v, j = unique(a, 1, 0, 0) assert_array_equal(v, b, msg) assert_array_equal(j, i1, msg) msg = base_msg.format('return_inverse', dt) v, j = unique(a, 0, 1, 0) assert_array_equal(v, b, msg) assert_array_equal(j, i2, msg) msg = base_msg.format('return_counts', dt) v, j = unique(a, 0, 0, 1) assert_array_equal(v, b, msg) assert_array_equal(j, c, msg) msg = base_msg.format('return_index and return_inverse', dt) v, j1, j2 = unique(a, 1, 1, 0) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) msg = base_msg.format('return_index and return_counts', dt) v, j1, j2 = unique(a, 1, 0, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, c, msg) msg = base_msg.format('return_inverse and return_counts', dt) v, j1, j2 = unique(a, 0, 1, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i2, msg) assert_array_equal(j2, c, msg) msg = base_msg.format(('return_index, return_inverse ' 'and return_counts'), dt) v, j1, j2, j3 = unique(a, 1, 1, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) assert_array_equal(j3, c, msg) a = [5, 7, 1, 2, 1, 5, 7]*10 b = [1, 2, 5, 7] i1 = [2, 3, 0, 1] i2 = [2, 3, 0, 1, 0, 2, 3]*10 c = np.multiply([2, 1, 2, 2], 10) # test for numeric arrays types = [] types.extend(np.typecodes['AllInteger']) types.extend(np.typecodes['AllFloat']) types.append('datetime64[D]') types.append('timedelta64[D]') for dt in types: aa = np.array(a, dt) bb = np.array(b, dt) check_all(aa, bb, i1, i2, c, dt) # test for object arrays dt = 'O' aa = np.empty(len(a), dt) aa[:] = a bb = np.empty(len(b), dt) bb[:] = b check_all(aa, bb, i1, i2, c, dt) # test for structured arrays dt = [('', 'i'), ('', 'i')] aa = np.array(list(zip(a, a)), dt) bb = np.array(list(zip(b, b)), dt) check_all(aa, bb, i1, i2, c, dt) # test for ticket #2799 aa = [1. + 0.j, 1 - 1.j, 1] assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j]) # test for ticket #4785 a = [(1, 2), (1, 2), (2, 3)] unq = [1, 2, 3] inv = [0, 1, 0, 1, 1, 2] a1 = unique(a) assert_array_equal(a1, unq) a2, a2_inv = unique(a, return_inverse=True) assert_array_equal(a2, unq) assert_array_equal(a2_inv, inv) # test for chararrays with return_inverse (gh-5099) a = np.chararray(5) a[...] = '' a2, a2_inv = np.unique(a, return_inverse=True) assert_array_equal(a2_inv, np.zeros(5))
N = int(inputsize[0]) M = int(inputsize[1]) X_train = np.zeros((N, M)) #zero array of N rows, M columns y_train = np.zeros((N)).astype(int) toBinary = {"+1": 1, "-1": 0} toOutput = {"1": "+1", "0": "-1"} for row in range(N): line = sys.stdin.readline().rstrip().split(' ') y_train[row] = toBinary[line[1]] for col in range(M): X_train[row, col] = float(line[col + 2].split(':')[1]) N_test = int(sys.stdin.readline().rstrip()) names = np.chararray((N_test), itemsize=5) X_test = np.zeros((N_test, M)) #zero array of N rows, M columns for row in range(N_test): line = sys.stdin.readline().rstrip().split(' ') names[row] = line[0] for col in range(M): X_test[row, col] = float(line[col + 1].split(':')[1]) # Splitting the dataset into the Training set and Test set # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random.randint(1, 100)) grad_clf = GradientBoostingClassifier(max_depth=1, n_estimators=300, learning_rate=0.3) grad_clf.fit(X_train, y_train)
starty = 0. endy = 2500. dy = 10. numy = int(float(endy - starty) / dy) + 1 x = np.linspace(startx, endx, numx) y = np.linspace(starty, endy, numy) nx = len(x) ny = len(y) # Bathymetry X, Y = np.meshgrid(x,y) Z = np.zeros(X.shape) - 50. depths = Z.T[:,:, np.newaxis] sediments = np.chararray((nx,ny,1), itemsize=20) sediments[:] = "loose sand" strata = {"values": {'depth': depths, 'sediment': sediments}, "coords": [x, y, ["layer 1"]]} # Mannings G = np.zeros((nx, ny)) + 0.3 geo_raw = {"values": G, "coords": [x, y]} # Soil characteristics max_temp = 10. max_soil_res = 10. target_burial_depth = 10
plt.ylabel("Sepal width in cm") plt.show() import codecademylib3_seaborn import matplotlib.pyplot as plt import numpy as np from sklearn import datasets from sklearn.cluster import KMeans import pandas as pd iris = datasets.load_iris() samples = iris.data target = iris.target model = KMeans(n_clusters=3) model.fit(samples) labels = model.predict(samples) species = np.chararray(target.shape, itemsize=150) for i in range(len(samples)): if target[i] == 0: species[i] = 'setosa' elif target[i] == 1: species[i] = 'versicolor' elif target[i] == 2: species[i] = 'virginica' df = pd.DataFrame({'labels': labels, 'species': species}) print(df) ct = pd.crosstab(df['labels'], df['species']) print(ct) import codecademylib3_seaborn import matplotlib.pyplot as plt import numpy as np
def test_empty_indexing(): """Regression test for ticket 1948.""" # Check that indexing a chararray with an empty list/array returns an # empty chararray instead of a chararray with a single empty string in it. s = np.chararray((4,)) assert_(s[[]].size == 0)
def _read_dataset(self, data_path, shape=None): """Read the dataset into memory. Note, the images are not fetched into memory, only their filenames. Args: data_path: Where is the relative location of the dataset. shape (optional): The shape of the input images. """ def to_img_path(filename): """The image filenames from file have to be converted, if the png format is used. """ if self.png_format_used: filename = os.path.splitext(filename)[0] + '.png' return filename # FIXME If we use the attributes as outputs, then this is a multi-label # classification task. Though, we don't capture this case in the # Dataset base class yet (it would destroy the current implementation # of one hot encodings). self._data['classification'] = False self._data['sequence'] = False self._data['num_classes'] = 40 # 40 different attributes. self._data['is_one_hot'] = False if shape is not None: assert (len(shape) == 2) self._data['in_shape'] = shape + [3] else: self._data['in_shape'] = [218, 178, 3] self._data['out_shape'] = [self._data['num_classes']] self._data['celeba'] = dict() # The annotations dict will contain the annotations of each image # except its attributes (i.e., the stuff we currently don't use). annotations = defaultdict(dict) ## Identity # Read the identities. Images with the same identity show the same # person. ident_fn = os.path.join(data_path, CelebAData._IDENTITY) with open(ident_fn) as f: ident_file = f.readlines() for line in ident_file: img_ident, ident = line.split() img_ident = to_img_path(img_ident) annotations[img_ident]['ident'] = int(ident) # Initialize the actual data arrays. num_imgs = len(annotations.keys()) max_str_len = len(max(annotations.keys(), key=len)) in_data = np.chararray([num_imgs, 1], itemsize=max_str_len, unicode=True) out_data = np.empty([num_imgs, self._data['num_classes']], dtype=np.float32) ## Attributes # Read the list of attributes. This will become the output of this # dataset. attr_fn = os.path.join(data_path, CelebAData._ATTRIBUTES) with open(attr_fn) as f: nis = int(f.readline()) attr_names = f.readline().split() attribute_lines = f.readlines() assert (nis == num_imgs) assert (len(attr_names) == self._data['num_classes']) self._data['celeba']['attr_names'] = attr_names assert (len(attribute_lines) == num_imgs) for i, line in enumerate(attribute_lines): words = line.split() img_ident = to_img_path(words[0]) attrs = [int(i) > 0 for i in words[1:]] assert (len(attrs) == self._data['num_classes']) # The actual index of the sample in the dataset. annotations[img_ident]['index'] = i ### Fill input and output data. in_data[i, :] = img_ident out_data[i, :] = attrs self._data['in_data'] = in_data self._data['out_data'] = out_data ## Landmarks # Landmarks of aligned and cropped images. # The following landmarks are specified for each image: # ['lefteye', 'righteye', 'nose', 'leftmouth', 'rightmouth'] lm_fn = os.path.join(data_path, CelebAData._LANDMARKS) with open(lm_fn) as f: nis = int(f.readline()) lm_names_raw = f.readline().split() lm_lines = f.readlines() assert (nis == num_imgs) # A landmark always consists of an x and y coordinate. assert (len(lm_names_raw) % 2 == 0) assert (len(lm_lines) == num_imgs) lm_names = [] for i in range(0, len(lm_names_raw), 2): assert (lm_names_raw[i].endswith('_x') and \ lm_names_raw[i + 1].endswith('_y')) lm_names.append(lm_names_raw[i][:-2]) self._data['celeba']['landmark_names'] = lm_names for line in lm_lines: words = line.split() img_ident = to_img_path(words[0]) locs = [int(i) for i in words[1:]] assert (len(locs) == len(lm_names_raw)) lms = dict() for i in range(0, len(locs), 2): lms[lm_names[i // 2]] = (locs[i], locs[i + 1]) annotations[img_ident]['landmarks'] = lms ## Partitioning # Load partitioning (what samples belong to train (0), test (2) and # val (1) set?). part_fn = os.path.join(data_path, CelebAData._PARTITIONS) with open(part_fn) as f: partitions = f.readlines() assert (len(partitions) == num_imgs) train_inds = [] test_inds = [] val_inds = [] for i, line in enumerate(partitions): img_ident, partition = line.split() img_ident = to_img_path(img_ident) partition = int(partition) assert (i == annotations[img_ident]['index']) if partition == 0: train_inds.append(i) elif partition == 1: val_inds.append(i) else: test_inds.append(i) self._data['train_inds'] = np.asarray(train_inds) self._data['test_inds'] = np.asarray(test_inds) self._data['val_inds'] = np.asarray(val_inds) assert (len(train_inds) + len(test_inds) + len(val_inds) == num_imgs) self._data['celeba']['anno'] = annotations
def visualize_nearest_neighbor(featuremap_dict, num_neighbor=5, top_number=5, vis=True, save_csv=False, csv_save_path=None, save_vis=False, save_img=False, save_thumb_name='nearest_neighbor.png', img_src_folder=None, ext_filter='.jpg', nn_save_folder=None, debug=True): ''' visualize nearest neighbor for featuremap from images parameter: featuremap_dict: a dictionary contains image path as key, and featuremap as value, the featuremap needs to be numpy array with any shape. No flatten needed num_neighbor: number of neighbor to visualize, the first nearest is itself top_number: number of top to visualize, since there might be tons of featuremap (length of dictionary), we choose the top ten with lowest distance with their nearest neighbor csv_save_path: path to save .csv file which contains indices and distance array for all elements nn_save_folder: save the nearest neighbor images for top featuremap return: all_sorted_nearest_id: a 2d matrix, each row is a feature followed by its nearest neighbor in whole feature dataset, the column is sorted by the distance of all nearest neighbor each row selected_nearest_id: only top number of sorted nearest id ''' print('processing feature map to nearest neightbor.......') if debug: assert isdict(featuremap_dict), 'featuremap should be dictionary' assert all( isnparray(featuremap_tmp) for featuremap_tmp in featuremap_dict. values()), 'value of dictionary should be numpy array' assert isinteger( num_neighbor ) and num_neighbor > 1, 'number of neighborhodd is an integer larger than 1' if save_csv and csv_save_path is not None: assert is_path_exists_or_creatable( csv_save_path), 'path to save .csv file is not correct' if save_vis or save_img: if nn_save_folder is not None: # save image directly assert isstring(ext_filter), 'extension filter is not correct' assert is_path_exists( img_src_folder), 'source folder for image is not correct' assert all( isstring(path_tmp) for path_tmp in featuremap_dict.keys() ) # key should be the path for the image assert is_path_exists_or_creatable( nn_save_folder ), 'folder to save top visualized images is not correct' assert isstring( save_thumb_name), 'name of thumbnail is not correct' if ext_filter.find('.') == -1: ext_filter = '.%s' % ext_filter # flatten the feature map nn_feature_dict = dict() for key, featuremap_tmp in featuremap_dict.items(): nn_feature_dict[key] = featuremap_tmp.flatten() num_features = len(nn_feature_dict) # nearest neighbor featuremap = np.array(nn_feature_dict.values()) nearbrs = NearestNeighbors(n_neighbors=num_neighbor, algorithm='ball_tree').fit(featuremap) distances, indices = nearbrs.kneighbors(featuremap) if debug: assert featuremap.shape[ 0] == num_features, 'shape of feature map is not correct' assert indices.shape == ( num_features, num_neighbor), 'shape of indices is not correct' assert distances.shape == ( num_features, num_neighbor), 'shape of indices is not correct' # convert the nearest indices for all featuremap to the key accordingly id_list = nn_feature_dict.keys() max_length = len(max( id_list, key=len)) # find the maximum length of string in the key nearest_id = np.chararray(indices.shape, itemsize=max_length + 1) for x in range(nearest_id.shape[0]): for y in range(nearest_id.shape[1]): nearest_id[x, y] = id_list[indices[x, y]] if debug: assert list(nearest_id[:, 0]) == id_list, 'nearest neighbor has problem' # sort the feature based on distance print('sorting the feature based on distance') featuremap_distance = np.sum(distances, axis=1) if debug: assert featuremap_distance.shape == ( num_features, ), 'distance is not correct' sorted_indices = np.argsort(featuremap_distance) all_sorted_nearest_id = nearest_id[sorted_indices, :] # save to the csv file if save_csv and csv_save_path is not None: print('Saving nearest neighbor result as .csv to path: %s' % csv_save_path) with open(csv_save_path, 'w+') as file: np.savetxt(file, distances, delimiter=',', fmt='%f') np.savetxt(file, all_sorted_nearest_id, delimiter=',', fmt='%s') file.close() # choose the best to visualize selected_sorted_indices = sorted_indices[0:top_number] if debug: for i in range(num_features - 1): assert featuremap_distance[ sorted_indices[i]] < featuremap_distance[sorted_indices[ i + 1]], 'feature map is not well sorted based on distance' selected_nearest_id = nearest_id[selected_sorted_indices, :] if save_vis: fig, axarray = plt.subplots(top_number, num_neighbor) for index in range(top_number): for nearest_index in range(num_neighbor): img_path = os.path.join( img_src_folder, '%s%s' % (selected_nearest_id[index, nearest_index], ext_filter)) if debug: print('loading image from %s' % img_path) img = imread(img_path) if isgrayimage_dimension(img): axarray[index, nearest_index].imshow(img, cmap='gray') elif iscolorimage_dimension(img): axarray[index, nearest_index].imshow(img) else: assert False, 'unknown error' axarray[index, nearest_index].axis('off') save_thumb = os.path.join(nn_save_folder, save_thumb_name) fig.savefig(save_thumb) if vis: plt.show() plt.close(fig) # save top visualization to the folder if save_img and nn_save_folder is not None: for top_index in range(top_number): file_list = selected_nearest_id[top_index] save_subfolder = os.path.join(nn_save_folder, file_list[0]) mkdir_if_missing(save_subfolder) for file_tmp in file_list: file_src = os.path.join(img_src_folder, '%s%s' % (file_tmp, ext_filter)) save_path = os.path.join(save_subfolder, '%s%s' % (file_tmp, ext_filter)) if debug: print('saving %s to %s' % (file_src, save_path)) shutil.copyfile(file_src, save_path) return all_sorted_nearest_id, selected_nearest_id
def sim_reads(self): simLibObj = libOps.libObj(settingsfilename='libsettings.txt', output_prefix='_') enforced_rev_read_len = 100 [for_read_len, rev_read_len] = simLibObj.get_min_allowed_readlens( simLibObj.filter_amplicon_window) rev_read_len = int(enforced_rev_read_len) ''' simLibObj.seqform_for_params and simLibObj.seqform_rev_params are already stored in current object's memory Form of these variables is a list of the following: Element 1: [start_pos,end_pos] Element 2: np.ndarray(seq_bool_vec, dtype=np.bool_) Element 3: np.ndarray(capital_bool_vec, dtype=np.bool_) Element 4: np.ndarray(ambig_vec, dtype=np.bool_) ''' [subdirnames, filenames] = sysOps.get_directory_and_file_list() for_umi_seqs = list() rev_umi_seqs = list() rev_umi_amplicon_list = list() uei_seqs = list() base_order = 'ACGT' sysOps.throw_status('Generating simulated sequences ...') amplicon_list = list() if "-amplicon" in simLibObj.mySettings: amplicon_list = [ simLibObj.mySettings["-amplicon"][i].upper().split(',') for i in range(len(simLibObj.mySettings["-amplicon"])) ] for for_umi_i in range(self.Nbcn): for_param_index = np.random.randint( len(simLibObj.seqform_for_params)) if len(simLibObj.seqform_for_params[for_param_index]) > 1: sysOps.throw_exception( 'Error: len(simLibObj.seqform_for_params[for_param_index]) = ' + str(len(simLibObj.seqform_for_params[for_param_index]))) sysOps.exitProgram() my_for_umi_param = simLibObj.seqform_for_params[for_param_index][ 0]['U'][0] [start_pos, end_pos] = my_for_umi_param[0] seq_bool_vec = my_for_umi_param[1] my_for_umi = str('') for pos in range(end_pos - start_pos): possible_bases = np.where(seq_bool_vec[(pos * 4):((pos + 1) * 4)])[0] my_for_umi += base_order[possible_bases[np.random.randint( possible_bases.shape[0])]] for_umi_seqs.append([int(for_param_index), str(my_for_umi)]) for for_uei_i in range(self.Nuei): for_param_index = 0 # there should be no difference across UMI's my_for_uei_param = simLibObj.seqform_for_params[for_param_index][ 0]['U'][1] [start_pos, end_pos] = my_for_uei_param[0] seq_bool_vec = my_for_uei_param[1] my_for_uei = str('') for pos in range(end_pos - start_pos): possible_bases = np.where(seq_bool_vec[(pos * 4):((pos + 1) * 4)])[0] my_for_uei += base_order[possible_bases[np.random.randint( possible_bases.shape[0])]] uei_seqs.append(str(my_for_uei)) for rev_umi_i in range(self.Ntrg): rev_param_index = np.random.randint( len(simLibObj.seqform_rev_params)) my_rev_umi_param = simLibObj.seqform_rev_params[rev_param_index][ 0]['U'][0] [start_pos, end_pos] = my_rev_umi_param[0] seq_bool_vec = my_rev_umi_param[1] my_rev_umi = str('') for pos in range(end_pos - start_pos): possible_bases = np.where(seq_bool_vec[(pos * 4):((pos + 1) * 4)])[0] my_rev_umi += base_order[possible_bases[np.random.randint( possible_bases.shape[0])]] if len(amplicon_list) == 0: encoded_amplicon = str('') else: this_gsp_primer_amplicon_pair = list( amplicon_list[np.random.randint(len(amplicon_list))] ) # already properly oriented # already properly oriented # generate single error on amplicon lenamp = len(this_gsp_primer_amplicon_pair[1]) rand_loc = np.random.randint(lenamp) this_gsp_primer_amplicon_pair[1] = str( this_gsp_primer_amplicon_pair[1][:rand_loc] + base_order[np.random.randint(4)] + this_gsp_primer_amplicon_pair[1][(rand_loc + 1):]) encoded_amplicon = ''.join(this_gsp_primer_amplicon_pair) tmp_umi_index = float(rev_umi_i) if tmp_umi_index == 0: encoded_amplicon += base_order[0] else: for myexponent in range( int(np.floor(np.log(tmp_umi_index) / np.log(4.0))), -1, -1): mydigit = np.floor(tmp_umi_index / np.power(4.0, myexponent)) encoded_amplicon += base_order[int(mydigit)] tmp_umi_index -= mydigit * np.power(4.0, myexponent) rev_umi_seqs.append( [int(rev_param_index), str(my_rev_umi), str(encoded_amplicon)]) sysOps.throw_status('Writing simulated reads ...') for filename in filenames: if filename.endswith('_sim_ueifile.csv'): ueifile = np.int64( np.loadtxt(sysOps.globaldatapath + filename, delimiter=',')) newdirname = filename[:filename.find('_')] read_list = list() for i in range(ueifile.shape[0]): for myread in range(ueifile[i, 3]): read_list.append(np.array([ueifile[i, :3]])) read_list = np.concatenate( read_list, axis=0 ) # re-write array so that there is now one row per read # randomly permute: read_list = read_list[ np.random.permutation(read_list.shape[0]), :] for_chararray = np.chararray((for_read_len)) rev_chararray = np.chararray((rev_read_len)) for_fastq_outfile = open(newdirname + '_for.fastq', "w") rev_fastq_outfile = open(newdirname + '_rev.fastq', "w") for i in range(read_list.shape[0]): for_param_index = for_umi_seqs[read_list[i, 1]][0] for_umi_seq = for_umi_seqs[read_list[i, 1]][1] rev_param_index = rev_umi_seqs[read_list[i, 2]][ 0] # both beacon and target indices are at this point are independently indexed from 0 rev_umi_seq = rev_umi_seqs[read_list[i, 2]][1] rev_amp_seq = rev_umi_seqs[read_list[i, 2]][2] uei_seq = uei_seqs[read_list[i, 0]] for j in range(for_read_len): for_chararray[j] = 'N' for j in range(rev_read_len): rev_chararray[j] = 'N' my_for_umi_param = simLibObj.seqform_for_params[ for_param_index][0]['U'][0] [start_pos, end_pos] = my_for_umi_param[0] for j in range(end_pos - start_pos): for_chararray[j + start_pos] = for_umi_seq[j] my_for_uei_param = simLibObj.seqform_for_params[ for_param_index][0]['U'][1] [start_pos, end_pos] = my_for_uei_param[0] for j in range(end_pos - start_pos): for_chararray[j + start_pos] = uei_seq[j] for my_for_param in simLibObj.seqform_for_params[ for_param_index][0]['P']: [start_pos, end_pos] = my_for_param[0] for j in range(end_pos - start_pos): for_chararray[j + start_pos] = base_order[np.where( my_for_param[1][(4 * j):(4 * (j + 1))])[0][0]] my_rev_umi_param = simLibObj.seqform_rev_params[ rev_param_index][0]['U'][0] [start_pos, end_pos] = my_rev_umi_param[0] for j in range(end_pos - start_pos): rev_chararray[j + start_pos] = rev_umi_seq[j] my_rev_amp_param = simLibObj.seqform_rev_params[ rev_param_index][0]['A'][0] start_pos = my_rev_amp_param[0][0] for j in range(len(rev_amp_seq)): rev_chararray[j + start_pos] = rev_amp_seq[j] if 'P' in simLibObj.seqform_rev_params[rev_param_index][0]: for my_rev_param in simLibObj.seqform_rev_params[ rev_param_index][0]['P']: [start_pos, end_pos] = my_rev_param[0] for j in range(end_pos - start_pos): rev_chararray[j + start_pos] = base_order[np.where( my_rev_param[1][(4 * j):( 4 * (j + 1))])[0][0]] for_record = SeqIO.SeqRecord( Seq.Seq(for_chararray.tostring())) for_record.id = '-' + str(i) + '-' + str(read_list[i, 1]) for_record.description = '' for_record.letter_annotations['phred_quality'] = list( [30 for j in range(for_read_len)]) rev_record = SeqIO.SeqRecord( Seq.Seq(rev_chararray.tostring())) rev_record.id = '-' + str(i) + '-' + str(read_list[i, 2]) rev_record.description = '' rev_record.letter_annotations['phred_quality'] = list( [30 for j in range(rev_read_len)]) SeqIO.write(for_record, for_fastq_outfile, "fastq") SeqIO.write(rev_record, rev_fastq_outfile, "fastq") for_fastq_outfile.close() rev_fastq_outfile.close() os.mkdir(newdirname) with open('libsettings.txt', 'rU') as oldsettingsfile: with open(newdirname + '//libsettings.txt', 'w') as newsettingsfile: for oldsettings_row in oldsettingsfile: if oldsettings_row.startswith('-source_for'): newsettingsfile.write('-source_for ..//' + newdirname + '_for.fastq\n') elif oldsettings_row.startswith('-source_rev'): newsettingsfile.write('-source_rev ..//' + newdirname + '_rev.fastq\n') else: newsettingsfile.write(oldsettings_row) sysOps.throw_status('Done.') return
# trow, tcol are x,y for the top right trow = 0 tcol = BOARD_COL - 1 #init the states grid #this is the store the state value (of every cell) of the individual sweep states = np.zeros(BOARD_ROW * BOARD_COL).reshape(BOARD_ROW, BOARD_COL) #this is to store the cumulative value after the sweep ends statesnew = np.zeros(BOARD_ROW * BOARD_COL).reshape(BOARD_ROW, BOARD_COL) states[trow, tcol] = 1 # terminal state value states[trow + 1, tcol] = -1 # terminal state value runningpolicy = np.chararray(BOARD_ROW * BOARD_COL) latestpolicy = np.chararray(BOARD_ROW * BOARD_COL) ''' Note on actions The agent can move either up,down,left or right (4 possible actions) From some cell on the border of the grid one or two actions may not be possible as in the case of the top right and bottom left. All other edges have only 3 actions Actions have equal probability of 0.25% each (0.25 * 4 = 100%) The sum of probabilites of all actions has to be 100% Each action has a reward of -1 ''' def getreward(row, col): reward = -0.04
def gen_random_data(): new_data = np.chararray((80), unicode=True) for i in range(len(new_data)): tile_index = np.random.randint(len(tiles)) new_data[i] = tiles[tile_index] return new_data
3 ) #column starting indices in which a vertical victory is possible (rows 1-3) win_rows = range( rows) #rows in which a horizontal victory is possible (all of them) win_cols = range( cols) #columns in which a vertical victory is possible (all of them) diag_cols = range( 4) #columns in which a diagonal victory is possible (columns 1-4) diag_rows_down = range( 3) #rows in which a downward diagonal victory is possible (top 3 rows) diag_rows_up = range( 3, rows )[::-1] #rows in which an upward diagonal victory is possible (bottom 3 rows) #create board as 6x7 zeros array board = np.chararray((rows, cols)) board[:] = 0 print(board) #play the game! while True: #determine who's turn it is if numTurns % 2 != 0: player = players['player2'][0] playerID = players['player2'][1] else: player = players['player1'][0] playerID = players['player1'][1] #take player's input
names=True, dtype=None) pgc = table['PGC'] ra = table['RAJ'] # deg dec = table['DEJ'] # deg d1 = table['MAJ'] # arcmin d2 = table['MIN'] # arcmin pa = table['PA'] ty = table['Type'] uu = table['u'] gg = table['g'] rr = table['r'] ii = table['i'] zz = table['z'] temp = np.chararray(len(pgc)) N = 60 db_root = '/home/ehsan/db_esn/data/' mags = np.zeros([N, 5]) no = 0 for i in range(N): db_id = ra_db(ra[i]) pgc_id = 'pgc' + str(pgc[i]) filters = ['u', 'g', 'r', 'i', 'z'] for p in range(5):
def initialize_timestep_data(bpr, weather_data): """ initializes the time step data with the weather data and the minimum set of variables needed for computation. :param bpr: a collection of building properties for the building used for thermal loads calculation :type bpr: BuildingPropertiesRow :param weather_data: data from the .epw weather file. Each row represents an hour of the year. The columns are: ``drybulb_C``, ``relhum_percent``, and ``windspd_ms`` :type weather_data: pandas.DataFrame :return: returns the `tsd` variable, a dictionary of time step data mapping variable names to ndarrays for each hour of the year. :rtype: dict """ # Initialize dict with weather variables tsd = { 'T_ext': weather_data.drybulb_C.values, 'T_ext_wetbulb': weather_data.wetbulb_C.values, 'rh_ext': weather_data.relhum_percent.values, 'T_sky': weather_data.skytemp_C.values, 'u_wind': weather_data.windspd_ms } # fill data with nan values nan_fields_electricity = [ 'Eaux', 'Eaux_ve', 'Eaux_hs', 'Eaux_cs', 'Eaux_ww', 'Eaux_fw', 'Ehs_lat_aux', 'GRID', 'GRID_a', 'GRID_l', 'GRID_v', 'GRID_data', 'GRID_pro', 'GRID_aux', 'GRID_ww', 'GRID_hs', 'GRID_cs' 'GRID_cdata', 'GRID_cre', 'PV', 'Eal', 'Edata', 'Epro', 'E_sys', 'E_ww', 'E_hs', 'E_cs', 'E_cre', 'E_cdata' ] nan_fields = [ 'mcpww_sys', 'mcptw', 'mcpcre_sys', 'mcpcdata_sys', 'SOLAR_ww', 'SOLAR_hs', 'NG_hs', 'COAL_hs', 'OIL_hs', 'WOOD_hs', 'NG_ww', 'COAL_ww', 'OIL_ww', 'WOOD_ww', 'vfw_m3perh' ] nan_fields.extend(TSD_KEYS_HEATING_LOADS) nan_fields.extend(TSD_KEYS_COOLING_LOADS) nan_fields.extend(TSD_KEYS_HEATING_TEMP) nan_fields.extend(TSD_KEYS_COOLING_TEMP) nan_fields.extend(TSD_KEYS_COOLING_FLOWS) nan_fields.extend(TSD_KEYS_HEATING_FLOWS) nan_fields.extend(TSD_KEYS_COOLING_SUPPLY_FLOWS) nan_fields.extend(TSD_KEYS_COOLING_SUPPLY_TEMP) nan_fields.extend(TSD_KEYS_HEATING_SUPPLY_FLOWS) nan_fields.extend(TSD_KEYS_HEATING_SUPPLY_TEMP) nan_fields.extend(TSD_KEYS_RC_TEMP) nan_fields.extend(TSD_KEYS_MOISTURE) nan_fields.extend(TSD_KEYS_ENERGY_BALANCE_DASHBOARD) nan_fields.extend(TSD_KEYS_SOLAR) nan_fields.extend(TSD_KEYS_VENTILATION_FLOWS) nan_fields.extend(nan_fields_electricity) nan_fields.extend(TSD_KEYS_PEOPLE) tsd.update(dict((x, np.zeros(HOURS_IN_YEAR) * np.nan) for x in nan_fields)) # initialize system status log tsd['sys_status_ahu'] = np.chararray(HOURS_IN_YEAR, itemsize=20) tsd['sys_status_aru'] = np.chararray(HOURS_IN_YEAR, itemsize=20) tsd['sys_status_sen'] = np.chararray(HOURS_IN_YEAR, itemsize=20) tsd['sys_status_ahu'][:] = 'unknown' tsd['sys_status_aru'][:] = 'unknown' tsd['sys_status_sen'][:] = 'unknown' return tsd
def re_identification(clientId, nb_time_max, name0): simple_message( clientId, u'Veuillez rapprocher vers la camera, ou bouger votre tête...') global global_vars global_var = (item for item in global_vars if item["clientId"] == str(clientId)).next() global_var['text'] = '' global_var['text2'] = '' global_var['text3'] = '' tb_old_name = np.chararray( shape=(nb_time_max + 1), itemsize=10) # All of the old recognition results, which are wrong tb_old_name[:] = '' tb_old_name[0] = name0 nb_time = 0 global_var['flag_enable_recog'] = 1 global_var['flag_reidentify'] = 1 global_var['flag_ask'] = 0 while (nb_time < nb_time_max): time.sleep(wait_time) # wait until after the re-identification is done name1 = global_var['nom'] # New result if np.all(tb_old_name != name1) and global_var['flag_recog']: print 'Essaie ' + str(nb_time + 1) + ': reconnu comme ' + str(name1) resp = validate_recognition(clientId, str(name1)) print resp if (resp == 1): result = 1 name = name1 break else: result = 0 nb_time += 1 tb_old_name[nb_time] = name1 elif (not global_var['flag_recog']): print 'Essaie ' + str(nb_time + 1) + ': personne inconnue' result = 0 nb_time += 1 if (result == 1): # User confirms that the recognition is correct now global_var['flag_enable_recog'] = 0 # global_var['flag_reidentify'] = 0 global_var['flag_wrong_recog'] = 0 get_face_emotion_api_results(clientId) time.sleep(2) go_to_formation(clientId, xls_filename, name) else: # Two time failed to recognized global_var[ 'flag_enable_recog'] = 0 # Disable recognition when two tries have failed # global_var['flag_reidentify'] = 0 simple_message( clientId, u'Désolé je vous reconnaît pas, veuillez me donner votre identifiant' ) name = ask_name(clientId, 0) if os.path.exists( imgPath + str(name) + ".0" + suffix ): # Assume that user's face-database exists if the photo 0.png exists simple_message( clientId, 'Bonjour ' + str(name) + ', je vous conseille de changer vos photos') flag_show_photos = 1 step_time = 1 thread_show_photos3 = Thread(target=show_photos, args=(clientId, imgPath, name), name='thread_show_photos3_' + clientId) thread_show_photos3.start() time.sleep(0.5) thread_retake_validate_photos2 = Thread( target=retake_validate_photos, args=(clientId, step_time, flag_show_photos, imgPath, name), name='thread_retake_validate_photos2_' + clientId) thread_retake_validate_photos2.start() else: simple_message( clientId, "Malheureusement, les photos correspondant au nom " + str(name) + " n'existent pas. Je vous conseille de reprendre vos photos") time.sleep(1) global_var['flag_take_photo'] = 1 # Enable photo taking global_var['flag_reidentify'] = 0
soundResp = goodQualCells.behavZscore.apply( lambda x: np.max(np.abs(x[~np.isnan(x)])) >= maxZThreshold) moreRespLowFreq = soundResp & goodQualCells.behavZscore.apply( lambda x: abs(x[~np.isnan(x)][0]) > abs(x[~np.isnan(x)][-1])) moreRespHighFreq = soundResp & goodQualCells.behavZscore.apply( lambda x: abs(x[~np.isnan(x)][-1]) > abs(x[~np.isnan(x)][0])) goodSoundRespCells = goodQualCells[soundResp] goodLowFreqRespCells = goodQualCells[moreRespLowFreq] goodHighFreqRespCells = goodQualCells[moreRespHighFreq] print '{} cells were sound responsive for both areas'.format( len(goodSoundRespCells)) soundRespInds = goodSoundRespCells.index lowFreqRespInds = goodLowFreqRespCells.index highFreqRespInds = goodHighFreqRespCells.index aveSpikeCountByBlockAllCells = np.zeros((2, len(timeVec) - 1, len(celldb))) brainAreaEachCell = np.chararray(len(celldb), itemsize=9) if bestFreq: print( 'Caculating for all sound responsive cells using only trials with best freq.' ) for indC, cell in goodLowFreqRespCells.iterrows(): cellObj = ephyscore.Cell(cell) print 'Calculating ave spike count by block for cell {}'.format(indC) subject = cell.subject date = cell.date depth = cell.depth brainArea = cell.brainArea brainAreaEachCell[indC] = brainArea evlockFile = '{0}_{1}_{2}_T{3}_c{4}_{5}.npz'.format( subject, date, depth, cell.tetrode, cell.cluster, alignment)
def match_stroke(self): dir_b = SAVE_PATH + '/' + TYPE_2 + '/' + self.character if not os.path.exists(dir_b): print('SG: %s does not has coorespond BK.' % self.character) return # load start_end.txt of SG stroke point_file_a = '%s_start_end.txt' % (self.character) start_end_a = np.loadtxt(SAVE_PATH + '/' + TYPE_1 + '/' + self.character + '/' + point_file_a, delimiter=',', dtype=np.int16) num_stroke_SG = int(start_end_a.shape[0] / 3) num_stroke_BK = int(len(os.listdir(dir_b))/4) match_table = np.chararray((num_stroke_SG, 2), itemsize=4, unicode=True) dis_table = np.zeros((num_stroke_SG, num_stroke_BK)) dis_table[:] = 999999 # print('start_end_a:') # print(start_end_a) for len_a in range(num_stroke_SG): match_table[len_a, 0] = len_a + 1 _, start_x_a, start_y_a = start_end_a[3 * len_a] _, mid_x_a, mid_y_a = start_end_a[3 * len_a + 1] _, end_x_a, end_y_a = start_end_a[3 * len_a + 2] #print('start_x_a, start_y_a:', start_x_a, start_y_a) #print('mid_x_a, mid_y_a:', mid_x_a, mid_y_a) #print('end_x_a, end_y_a', end_x_a, end_y_a) min_distance = 999999 #print('################################################') for i in range(1, num_stroke_BK+1): # load start_end.txt of BK stroke start_end_b = np.loadtxt(dir_b + '/%s_%02d_start_end.txt' % (self.character, i), delimiter=',', dtype=np.int16) _, start_x_b, start_y_b = start_end_b[0] _, mid_x_b, mid_y_b = start_end_b[1] _, end_x_b, end_y_b = start_end_b[2] #print('Comparing SG: %s_%d with BK: %04d' % (self.img_name[:-4], len_a+1, i)) #print('start_x_b, start_y_b:', start_x_b, start_y_b) #print('mid_x_b, mid_y_b:', mid_x_b, mid_y_b) #print('end_x_b, end_y_b', end_x_b, end_y_b) # compute two direction inner stroke # direction 1: dis_start = (start_x_a - start_x_b)**2 + (start_y_a - start_y_b)**2 dis_mid = (mid_x_a - mid_x_b)**2 + (mid_y_a - mid_y_b)**2 dis_end = (end_x_a - end_x_b)**2 + (end_y_a - end_y_b)**2 dis_1 = dis_start + dis_mid + dis_end # direction 2: dis_start = (start_x_a - end_x_b)**2 + (start_y_a - end_y_b)**2 dis_mid = (mid_x_a - mid_x_b)**2 + (mid_y_a - mid_y_b)**2 dis_end = (end_x_a - start_x_b)**2 + (end_y_a - start_y_b)**2 dis_2 = dis_start + dis_mid + dis_end # find the actual distance in correct direction dis = min(dis_1, dis_2) dis_table[len_a, i-1] = dis #print('min dis:', min_distance, 'dis:', dis) if dis < min_distance: min_distance = dis match_table[len_a, 1] = '%04d' % i #print('SG: %s_%d is matched to BK: %4s' % (self.character, len_a+1, match_table[len_a, 1])) #print('################################################') #print(dis_table) match_result = np.zeros((num_stroke_SG)) match_result[:] = -1 print('################################################') print(dis_table) while -1 in match_result: min_dis = np.unravel_index(np.argmin(dis_table), dis_table.shape) # min_dis[0] : stroke order of SK # min_dis[1] : stroke order of BK # SG stroke is not matched if match_result[min_dis[0]] == -1: #print('################################################') # BK stroke is not matched #if (min_dis[1] + 1) not in match_result: match_result[min_dis[0]] = min_dis[1] + 1 #print('matched.') #print('min:', dis_table[min_dis]) #print('min idx: (%d, %d)' % (min_dis[0]+1, min_dis[1]+1)) #print(dis_table) dis_table[min_dis] = 999999 #print(match_result) #print('################################################') SG_img = Image.open('result/SG/%s/SG_%s_%02d.jpg' % (self.character, self.character, min_dis[0] + 1)) BK_img = Image.open('result/BK/%s/BK_%s_%02d.jpg' % (self.character, self.character, min_dis[1] + 1)) plt.figure(figsize=(8,4)) plt.subplot(1, 2, 1) plt.title('%s : %d (瘦金體)' % (self.character, min_dis[0] + 1)) plt.axis('off') plt.imshow(SG_img) plt.subplot(1, 2, 2) plt.title('%s : %d (標楷體)' % (self.character, min_dis[1] + 1)) plt.imshow(BK_img) plt.axis('off') plt.savefig('result/match_img/%s_%02d.jpg' % (self.character, min_dis[0] + 1)) # plt.show() # SG stroke is matched else: dis_table[min_dis] = 999999 print(match_result) print('################################################') np.savetxt(SAVE_PATH + '/' + TYPE_1 + '/' + '%s_match.txt' % self.character, match_result, fmt='%d', delimiter=',')
passband=[.009, 9999])) #MyStruct(outid='00P+AROMAAgg',usearoma=False,n_init2drop=0,nonaggr=False, # noise=[],expansion=0, # spkreg=0,fdthr=99,dvrthr=99,addnoise=baseregressors+['AROMAAggrComp*','aroma_motion*'],passband=[.009,9999]), #MyStruct(outid='01P+AROMAAgg',usearoma=False,n_init2drop=0,nonaggr=False, # noise=['GlobalSignal','global_signal'],expansion=0, # spkreg=0,fdthr=99,dvrthr=99,addnoise=baseregressors+['AROMAAggrComp*','aroma_motion*'],passband=[.009,9999]), #MyStruct(outid='02P+AROMAAgg',usearoma=False,n_init2drop=0,nonaggr=False, # noise=['WhiteMatter','CSF','white_matter','csf'],expansion=0, # spkreg=0,fdthr=99,dvrthr=99,addnoise=baseregressors+['AROMAAggrComp*','aroma_motion*'],passband=[.009,9999]), #MyStruct(outid='03P+AROMAAgg',usearoma=False,n_init2drop=0,nonaggr=False, # noise=['GlobalSignal','WhiteMatter','CSF','global_signal','white_matter','csf'],expansion=0, # spkreg=0,fdthr=99,dvrthr=99,addnoise=baseregressors+['AROMAAggrComp*','aroma_motion*'],passband=[.009,9999]) ) idlist = np.chararray((len(funcdat), len(pipelines)), itemsize=len(os.path.basename(funcdat[0]).split('_')[0]), unicode=True) atlaslist = np.chararray((len(funcdat), len(pipelines)), itemsize=len(atlas), unicode=True) ses = np.chararray((len(funcdat), len(pipelines)), itemsize=2, unicode=True) task = np.chararray((len(funcdat), len(pipelines)), itemsize=5, unicode=True) run = np.chararray((len(funcdat), len(pipelines)), itemsize=5, unicode=True) fdthr = np.zeros((len(funcdat), len(pipelines))) dvthr = np.zeros((len(funcdat), len(pipelines))) ntr = np.zeros((len(funcdat), len(pipelines))) ntrabovethr = np.zeros((len(funcdat), len(pipelines))) pctdflost = np.zeros((len(funcdat), len(pipelines))) mfd = np.zeros((len(funcdat), len(pipelines))) medfd = np.zeros((len(funcdat), len(pipelines))) maxfd = np.zeros((len(funcdat), len(pipelines)))