def handle_data(self, data): if len(self.flag) != 0: if self.flag[-1] == "span": # print(str(data)) self.span += data self.flag.pop() elif self.flag[-1] == "strong": self.strong = data self.flag.pop() elif self.flag[-1] == "houseName": # print(str(data)) self.houseName.append(str(strip(data))) self.flag.pop() elif self.flag[-1] == "villageName": # print(str(data)) self.villageName.append(str(strip(data))) self.flag.pop() elif self.flag[-1] == "houseTotlePrice_2": # print(str(data)) self.houseTotlePrice.append(self.strong + data) self.strong = "" self.flag.pop() elif self.flag[-1] == "houseUnitPrice": # print(str(data)) self.houseUnitPrice.append(data) self.flag.pop()
def ejecutar(self, tabla, arbol): super().ejecutar(tabla,arbol) resultado = self.valor.ejecutar(tabla,arbol) if isinstance(resultado, Excepcion): return resultado if self.valor.tipo.tipo== Tipo_Dato.CHAR or self.valor.tipo.tipo== Tipo_Dato.VARCHAR or self.valor.tipo.tipo== Tipo_Dato.VARYING or self.valor.tipo.tipo== Tipo_Dato.CHARACTER or self.valor.tipo.tipo== Tipo_Dato.TEXT: self.tipo = Tipo(Tipo_Dato.TEXT) return strip(str(resultado)) error = Excepcion('42883',"Semántico",f"No existe la función TRIM({self.valor.tipo.toString()})",self.linea,self.columna) arbol.excepciones.append(error) arbol.consola.append("HINT: Ninguna función coincide en el nombre y tipos de argumentos. Puede ser necesario agregar conversión explícita de tipos.") arbol.consola.append(error.toString()) return error
def remove_cols(data, skip_cols): conv = [] colnr = 0 for col in data: if colnr % 200 == 0: print('processing column {0:d}...'.format(colnr)) gc.collect() if colnr not in skip_cols: col = strip(col, '"') col = replace(col, '', '0') col = replace(col, 'NA', '0') col = replace(col, 'false', '0') col = replace(col, 'true', '1') conv.append(col.astype(int16)) colnr += 1 gc.collect() return array(conv)
def ejecutar(self, tabla, arbol): super().ejecutar(tabla, arbol) if isinstance(self.valor, Primitivo): if self.valor.tipo.tipo == Tipo_Dato.CHAR or self.valor.tipo.tipo == Tipo_Dato.VARCHAR or self.valor.tipo.tipo == Tipo_Dato.VARYING or self.valor.tipo.tipo == Tipo_Dato.CHARACTER or self.valor.tipo.tipo == Tipo_Dato.TEXT: return strip(str(self.valor.valor)) elif isinstance(self.valor, Identificador): print("FALTA PROGRAMAR PARA IDENTIFICADOR TRIM") error = Excepcion( '42883', "Semántico", f"No existe la función TRIM({self.valor.tipo.toString()})", self.linea, self.columna) arbol.excepciones.append(error) arbol.consola.append( "HINT: Ninguna función coincide en el nombre y tipos de argumentos. Puede ser necesario agregar conversión explícita de tipos." ) arbol.consola.append(error.toString()) return error
def to_ints_only(data): conv = [] failed = [] colnr = 0 for col in data: colnr += 1 if colnr % 100 == 0: print('converting column {0:d}...'.format(colnr)) col = strip(col, '"') col = replace(col, '', '0') col = replace(col, 'NA', '0') col = replace(col, 'false', '0') col = replace(col, 'true', '1') try: irow = col.astype(int16) except ValueError as err: skiprows.append(colnr - 1) failed.append(str(err).split(':', 1)[1]) except OverflowError as err: print(str(err)) skiprows.append(colnr - 1) # except OverflowError as err: # print(str(err)) # print('will look for overflow error value...') # for v in col: # try: # v.astype(int) # except: # print 'overflow:', v else: conv.append(irow) del col gc.collect() # free memory print('failed for (excluding overflows): "{0:s}"'.format( '", "'.join(failed))) print('{0:d} columns removed'.format(len(failed))) return array(conv)
def to_ints_only(data): conv = [] failed = [] colnr = 0 for col in data: colnr += 1 if colnr % 100 == 0: print('converting column {0:d}...'.format(colnr)) col = strip(col, '"') col = replace(col, '', '0') col = replace(col, 'NA', '0') col = replace(col, 'false', '0') col = replace(col, 'true', '1') try: irow = col.astype(int16) except ValueError as err: skiprows.append(colnr - 1) failed.append(str(err).split(':', 1)[1]) except OverflowError as err: print(str(err)) skiprows.append(colnr - 1) # except OverflowError as err: # print(str(err)) # print('will look for overflow error value...') # for v in col: # try: # v.astype(int) # except: # print 'overflow:', v else: conv.append(irow) del col gc.collect() # free memory print('failed for (excluding overflows): "{0:s}"'.format('", "'.join(failed))) print('{0:d} columns removed'.format(len(failed))) return array(conv)
infile = 'suchyta-papers-2.bib' selectedfile = 'suchyta-papers-selected-modified.bib' additionalfile = 'suchyta-papers-additional-modified.bib' with open(infile) as f: bib = bibtexparser.load(f) with open(infile) as f: selected = bibtexparser.load(f) with open(infile) as f: additional = bibtexparser.load(f) spos = 0 apos = 0 for j in range(len(bib.entries)): authors = npchar.strip(bib.entries[j]['author'].split(' and')) select = True omit = False if len(authors) > num: authors = authors[0:num] last = 'et~al.' if me not in authors: last = '%s (including %s)' % (last, ME) select = False if bib.entries[j][u'ID'] in [ '2012SPIE.8451E..12H', '2015AJ....150..150F', '2016JPhCS.759a2095K', 'choi2016stream' ]: select = True authors = np.append(authors, last)
def strip(iterable, *args, **kwargs): from numpy.core.defchararray import strip for item in iterable: yield strip(item, *args, **kwargs)
return width, height def modis_url(time, extent, resolution): """ time: utc time in iso format EG: 2020-02-19T00:00:00Z extent: [lower_latitude, left_longitude, higher_latitude, right_longitude], EG: [51.46162974683544,-22.94768591772153,53.03698575949367,-20.952234968354432] resolution: represents the pixel resolution, i.e. km/pixel. Should be a value from this list: [0.03, 0.06, 0.125, 0.25, 0.5, 1, 5, 10] """ width, height = calculate_width_height(extent, resolution) extent = ','.join(map(lambda x: str(x), extent)) return width, height, URL.format(width, height, extent, time) invalid_dates = ['2002-01-28', '2002-05-11', '2002-05-12', '2005-10-31'] directory = '/Users/yaoxiaoyi/Desktop/HLD' save_dir = '/Users/yaoxiaoyi/Desktop/HLD_Images' for filename in os.listdir(directory): shpfile = directory + '/' + filename + '/' + filename sh = shapefile.Reader(shpfile) date = filename[19:29] #if date not in invalid_dates: time = date + 'T00:00:00Z' width, height, url = modis_url( time, [sh.bbox[1], sh.bbox[0], sh.bbox[3], sh.bbox[2]], .25) response = requests.get(strip(url)) img = BytesIO(response.content) im = Image.open(img) im.save(save_dir + '/' + filename + '.jpg')
def predictSentiment(): if request.method == 'POST': try: prediction = "" # Reading the inputs given by the user text = request.form['tweetText'] if strip(text) != "": # Get Google sentiment gScore= googleSentiment(text) gSentiment = {"Score": float("{:.2f}".format((gScore.score) * 100)), "Magnitude" : float("{:.2f}".format(gScore.magnitude))} # Get Sentiment if gScore.score > 0: gSentimentT ="Positive" elif gScore.score < 0: gSentimentT ="Negative" else: gSentimentT ="Netural" # Get Veder sentiment sVeder= vaderSentiment(text) print(sVeder) vSentiment= {"Positive" : float("{:.2f}".format((sVeder['pos']) * 100)), "Netural": float("{:.2f}".format((sVeder['neu']) * 100)), "Negative": float("{:.2f}".format((sVeder['neg']) * 100)), "Compound": float("{:.2f}".format((sVeder['compound']) * 100)) } # Get Sentiment if sVeder['compound'] >= 0.05: vSentimenT ="Positive" elif sVeder['compound'] <= - 0.05: vSentimenT ="Negative" else: vSentimenT ="Netural" # Get textBlob sentiment tScore= textBlobSentiment(text) tSentiment = {"Polarity": float("{:.2f}".format((tScore.polarity)* 100)), "Subjectivity": float("{:.2f}".format((tScore.subjectivity)* 100))} # Get Sentiment if tScore.polarity > 0: tSentimentT ="Positive" elif tScore.polarity < 0: tSentimentT ="Negative" else: tSentimentT ="Netural" # Loading the model file loaded_model = pickle.load(open(model_file_name, 'rb')) tfidf_model = pickle.load(open(tfidf_model_file_name, "rb")) # Transform input value pred_data = tfidf_model.transform([text]) # Predict sentiment - 3 models for 3 sentiments positive = (float("{:.1f}".format(loaded_model[0].predict_proba(pred_data)[0][1])) * 100) negative = (float("{:.1f}".format(loaded_model[1].predict_proba(pred_data)[0][1])) * 100) neutral = (float("{:.1f}".format(loaded_model[2].predict_proba(pred_data)[0][1])) * 100) if positive > 50: sentText = "Positive" elif negative > 50: sentText = "Negative" else: sentText = "Neutral" # Build a dictionary to return values prediction = {"Positive": positive, "Negative": negative, "Neutral": neutral} # Print prediction print('prediction is', prediction) # Show the prediction results in a UI return render_template('demo.html', prediction=prediction, gSentiment= gSentiment,vSentiment=vSentiment,tSentiment=tSentiment, gSentimentT= gSentimentT,vSentimentT=vSentimenT,tSentimentT=tSentimentT, sText=text, sentText=sentText) else: return render_template('demo.html', prediction='', sText='') except Exception as e: print('The Exception message is: ', e) return render_template('demo.html', prediction='') else: return render_template('demo.html')
pagede = requests.get("https://www.@@@@@@@@@@@@@@@.it/ricette-cat/page" + str(page) + "/" + str(porta) + "/" + str(theme)) soup = BeautifulSoup(pagede.text, 'html.parser') ricette_div = soup.find_all("div", class_="gz-content-recipe-horizontal") for ricetta_soup in ricette_div: name = ricetta_soup.h2.a.text ricette.append(name) descrizione = ricetta_soup.find('div', class_='gz-description').text descrizioni.append(descrizione) difficolta = ricetta_soup.find('div', class_="gz-col-flex gz-double gz-mTop10").text[25:37] difficolta_list.append(strip(difficolta)) portata_list.append(porta) thematics_list.append(theme) #sleep(randint(2,7)) print("nuova portata...", datetime.datetime.now()) ricette = pd.DataFrame({ 'Ricetta': ricette, 'Descrizione': descrizioni, 'Difficoltà': difficolta_list, 'Portata': portata_list, 'Tipo': thematics_list }) ricette.head() ricette.to_csv(path, sep=",", encoding="utf-16")
def receiveUserInput(): Tk().withdraw() return strip(str(simpledialog.askstring(title="Input Dialog", prompt="Insert a character:\n Empty character = #")))
def work(in_train_arch, in_test_arch, in_train_csv, in_test_csv, out_h5): from pypipes import unzip,as_key,del_key,getitem,setitem from nppipes import (genfromtxt, place,astype,as_columns,label_encoder,fit_transform, transform,stack ) from nppipes import take as np_take from numpy.core.defchararray import strip from numpy import s_,mean,in1d,putmask from collections import Counter from h5pipes import h5new @P.Pipe def replace_missing_with(iterable, ftor): from numpy import isnan for item in iterable: for i in range(item.shape[1]): mask = isnan(item[:, i]) value = ftor(item[~mask, i]) item[mask, i] = value pass yield item missing_cidx = [11, 14, 16, 28, 33, 34, 35, 36, 37, 46, 51, 60, 68] unseen_nominal_cidx = [2, 12, 38, 69, 74] seen_nominal_cidx = [0, 1, 4, 5, 6, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 75, 76, 77] nominal_cidx = seen_nominal_cidx + unseen_nominal_cidx data = ( in_train_arch | unzip(in_train_csv) | genfromtxt(delimiter=',', dtype=str) | place(lambda d: d == '', 'nan') | as_key('train') | as_key('train_col_names', lambda d: strip(d['train'][0], '"')) | as_key('train_labels', lambda d: d['train'][1:, 0].astype(int)) | as_key('train_X', lambda d: d['train'][1:, 1:-1]) | as_key('train_y', lambda d: d['train'][1:, -1].astype(int)) | del_key('train') | as_key('test', lambda d: in_test_arch | unzip(in_test_csv) | genfromtxt(delimiter=',', dtype=str) | place(lambda d: d == '', 'nan') | P.first ) | as_key('test_col_names', lambda d: strip(d['test'][0], '"')) | as_key('test_labels', lambda d: d['test'][1:, 0].astype(int)) | as_key('test_X', lambda d: d['test'][1:, 1:]) | del_key('test') | as_key('train_X', lambda d: (d['train_X'],) | np_take(missing_cidx, axis=1) | astype(float) | replace_missing_with(mean) | astype(str) | setitem(d['train_X'].copy(), s_[:, missing_cidx]) | P.first ) | as_key('label_encoders', lambda d: len(nominal_cidx) | label_encoder | P.as_tuple ) | as_key('train_X', lambda d: (d['train_X'],) | np_take(nominal_cidx, axis=1) | as_columns | fit_transform(d['label_encoders']) | stack(axis=1) | setitem(d['train_X'].copy(), s_[:, nominal_cidx]) | P.first ) | as_key('test_X', lambda d: (d['test_X'],) | np_take(seen_nominal_cidx, axis=1) | as_columns | transform(d['label_encoders'][:-len(unseen_nominal_cidx)]) | stack(axis=1) | setitem(d['test_X'].copy(), s_[:, seen_nominal_cidx]) | P.first ) | as_key('test_X', lambda d: (d['test_X'],) | np_take(unseen_nominal_cidx, axis=1) | as_key('test_unseen_nominals_features') | as_key('test_unseen_nominals', lambda d2: zip(d2['test_unseen_nominals_features'].T, d['label_encoders'][-len(unseen_nominal_cidx):]) | P.select(lambda t: list(set(t[0]) - set(t[1].classes_))) | P.as_list ) | as_key('train_most_common_nominals', lambda d2: zip(d['train_X'][:, unseen_nominal_cidx].T.astype(int), d['label_encoders'][-len(unseen_nominal_cidx):]) | P.select(lambda t: t[1].inverse_transform(t[0])) | P.select(lambda s: Counter(s).most_common(1)[0][0]) | P.as_list ) | as_key('test_corrected_features', lambda d2: zip(d2['test_unseen_nominals_features'].copy().T, d2['test_unseen_nominals'], d2['train_most_common_nominals']) | P.select(lambda t: putmask(t[0], in1d(t[0], t[1]), t[2]) or t[0].T) | stack(axis=1) | P.first ) | getitem('test_corrected_features') | as_columns | transform(d['label_encoders'][-len(unseen_nominal_cidx):]) | stack(axis=1) | setitem(d['test_X'].copy(), s_[:, unseen_nominal_cidx]) | P.first ) | del_key('label_encoders') | as_key('test_X', lambda d: (d['test_X'],) | np_take(missing_cidx, axis=1) | astype(float) | replace_missing_with(mean) | astype(str) | setitem(d['test_X'].copy(), s_[:, missing_cidx]) | P.first ) | P.first ) #print(data.keys()) ( (out_h5,) | h5new | as_key('train_X', lambda _: data['train_X'].astype(float)) | as_key('train_y', lambda _: data['train_y'].astype(float)) | as_key('test_X', lambda _: data['test_X'].astype(float)) | as_key('train_labels', lambda _: data['train_labels']) | as_key('test_labels', lambda _: data['test_labels']) | P.first ) return