iris >> dp.mutate(redondeado=X.PetalLength.round(), redondeado2=X.SepalLength.round()) iris.assign(redondeado = lambda x: x.PetalLength.round(), redondeado2 = lambda x: x.SepalLength.round()) #ifelse(y==0, 0, 1) np.where((y == 0), 0, 1) # data=iris %>% # distinct(Species, Sepal.Length, .keep_all = T) iris >> dp.distinct(X.SepalLength) iris.drop_duplicates() iris.drop_duplicates(subset='PetalLength') # #ordenando # data=iris %>% # arrange(Sepal.Length, Sepal.Width) iris >> dp.arrange(X.PetalLength) iris.sort_values("PetalLength", ascending=False) # data$ceros=0 iris['ceros']=0 # data$ceros=NULL del(iris['ceros']) # data=data.frame(x1=rep(1,10), x2=rep(2, 10)) data = pd.DataFrame({'x1':np.ones(10), 'x2':np.repeat(2, 10)}) # data2=data.frame(x3=rep(3,10), x4=rep(4, 10)) data2 = pd.DataFrame({'x3':np.repeat(3, 10), 'x4':np.repeat(4, 10)}) # data3=data.frame(x1=rep(1,10), x5=rep(5, 10)) data3 = pd.DataFrame({'x1':np.ones(10), 'x5':np.repeat(5, 10)}) # total=left_join(data, data3, by="x1") # total2=merge(data, data2, by="x1")
def main(argv): yURL = None outdir = None maxFrames = 500 yURL = input("Enter the youtube url:") outdir = input("Enter the output directory:") maxFrames = int(input("Enter the maximum number of frames to check:")) faceDet = cv2.CascadeClassifier( "haarcascade/haarcascade_frontalface_default.xml") faceDet2 = cv2.CascadeClassifier( "haarcascade/haarcascade_frontalface_alt2.xml") faceDet3 = cv2.CascadeClassifier( "haarcascade/haarcascade_frontalface_alt.xml") faceDet4 = cv2.CascadeClassifier( "haarcascade/haarcascade_frontalface_alt_tree.xml") # pdata, pframes, pfacedims = getNewInstances(yURL, faceDet, faceDet2, faceDet3, faceDet4, maxCount=maxFrames) # headers = dict() headers['Ocp-Apim-Subscription-Key'] = ms_key1 headers['Content-Type'] = 'application/octet-stream' # resultsDf = pd.DataFrame() frameId = 0 for image in pframes: print("posting frame %d of %d" % (frameId, len(pframes))) #sending the frame image to MS cognitive services resultMS = processRequest(image, headers) #isinstance == type() if isinstance(resultMS, list): for result in resultMS: if isinstance(result, dict): resFrameList = [] for res in result['scores'].items(): resFrameList.append( (frameId, res[0], res[1], result["faceRectangle"]['left'], result["faceRectangle"]['top'], result["faceRectangle"]['width'], result["faceRectangle"]['height'])) appendDf = pd.DataFrame(resFrameList, columns=[ "frameId", "emotionLabel", "conf", "faceleft", "facetop", "faceW", "faceH" ]) resultsDf = resultsDf.append(appendDf) time.sleep(2) frameId += 1 # # print(resultsDf) #we append all the data to the dataframe #http://bluescreen.club/2017/06/18/import-pandas-as-pd/ #then we convert the dataframe to a Dplyframe object which allows us to do higher level data analytics #for this one, we will select out the top most ranking face frames for each of the emotions #microsoft provides us with around 8 emotions #so we sort out 8 faces for 8 emotions and then save them accordingly dfFaces = DplyFrame(resultsDf) # print(dfFaces) topFaces = ( dfFaces >> group_by(X.emotionLabel) >> sift(X.conf == X.conf.max()) >> sift(X.frameId == X.frameId.min()) >> ungroup() >> group_by( X.frameId) >> sift(X.conf == X.conf.max()) >> ungroup() >> arrange( X.emotionLabel)) topFaces = topFaces.drop_duplicates() #print(topFaces) i = 0 for index, row in topFaces.iterrows(): print("saving emotion frame %d of %d" % (i, len(topFaces.index))) # emotion = row["emotionLabel"] confid = int(row["conf"] * 100) image = pframes[int(row["frameId"])] faceL = row["faceleft"] faceT = row["facetop"] faceW = row["faceW"] faceH = row["faceH"] #save cropped face imageW = image[faceT:faceT + faceH, faceL:faceL + faceW] cv2.imwrite( os.path.expanduser("%s/Cropped_%s.jpg" % (outdir, emotion)), imageW) #if you wish to put a rectangle on the faces then uncomment below # # cv2.rectangle( image,(faceL,faceT), # (faceL+faceW, faceT + faceH), # color = (255,0,0), thickness = 5 ) # cv2.putText( image, emotion, (faceL,faceT-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1 ) # cv2.imwrite(os.path.expanduser("%s/%s.jpg" % (outdir, emotion)), image) i += 1
"""5.2 wordcloud""" """turns the word freq to dict""" d = {} for a, x in dff.values: d[a] = x wordcloud = WordCloud(width = 1000, height = 1000, background_color ='white', min_font_size =15, max_font_size=120).generate_from_frequencies(frequencies=d) plt.figure(figsize = (8, 8), facecolor = None) plt.imshow(wordcloud) plt.axis("off") plt.tight_layout(pad = 0) plt.show() """stacked bar plot""" dfx = (dfr >> arrange(-X.tot)) dfx=dfx.head(50) from plotnine import * (ggplot(dfx, aes(x='word', y='tot', fill='source')) + geom_col() + theme(axis_text_x=element_text(rotation=45, hjust=1)) ) """each newspaper""" dfr = DplyFrame(output) df_tele =(dfr >>sift(X.source=="guardian")) df_tele = (df_tele >> group_by(X.word, X.source) >> summarize(tot=X.count.sum())) df_tele = (df_tele >>select(X.word, X.tot ))
>> pull('carat')) # DataFrame transformation diamonds >> mutate(x_plus_y=X.x + X.y) >> select(columns_from('x')) >> head(3) diamonds >> mutate(x_plus_y=X.x + X.y, y_div_z=(X.y / X.z)) >> select(columns_from('x')) >> head(3) # The transmute() function is a combination of a mutate and a selection of the created variables. diamonds >> transmute(x_plus_y=X.x + X.y, y_div_z=(X.y / X.z)) >> head(3) # group_by() and ungroup() diamonds >> head(5) >> group_by(X.color) >> mutate(avg_price=X.price.mean()) (diamonds >> group_by(X.cut) >> mutate(price_lead=lead(X.price), price_lag=lag(X.price)) >> head(2) >> select(X.cut, X.price, X.price_lead, X.price_lag)) # ungroup() (diamonds >> group_by(X.cut) >> arrange(X.price) >> head(3) >> ungroup() >> mask(X.carat < 0.23)) # Reshaping diamonds >> arrange(X.table, ascending=False) >> head(5) diamonds >> rename(CUT=X.cut, COLOR='color') >> head(2) # transform a "wide" DataFrame into a "long" format # gather(key, value, *columns, add_id=True) melts the specified columns into two key:value columns diamonds >> gather('variable', 'value', ['price', 'depth','x','y','z']) >> head(5) elongated = diamonds >> gather('variable', 'value', add_id=True) # transform a "long" DataFrame into a "wide" format # In this case the _ID column comes in handy since it is necessary to not have any duplicated identifiers. widened = elongated >> spread(X.variable, X.value) widened.dtypes # all are objects
def main(argv): ytURL = None outdir = None maxFrames = 500 try: opts, args = getopt.getopt(argv, "hy:o:m:", ["yturl=", "odir=", "maxframes="]) except getopt.GetoptError: print 'Error: shellScript.py -y <yturl> -o <odir> -m <maxframes>' sys.exit(2) #print opts for opt, arg in opts: if opt == '-h': print 'help: shellScript.py -y <yturl> -o <odir> -m <maxframes>' sys.exit() elif opt in ("-y", "--yturl"): print("--yturl={}".format(arg)) ytURL = arg elif opt in ("-o", "--odir"): print("--odir={}".format(arg)) outdir = arg elif opt in ("-m", "--maxframes"): print("--maxframes={}".format(arg)) maxFrames = int(arg) # if ytURL is None: print 'bad yt: shellScript.py -y <yturl> -o <odir> -m <maxframes>' sys.exit() # if outdir is None: print 'bad outdir: shellScript.py -y <yturl> -o <odir> -m <maxframes>' sys.exit() # if False == isinstance(maxFrames, (int, long)): print 'bad maxFrames: shellScript.py -y <yturl> -o <odir> -m <maxframes>' sys.exit() # # faceDet = cv2.CascadeClassifier( "haarcascade/haarcascade_frontalface_default.xml") faceDet2 = cv2.CascadeClassifier( "haarcascade/haarcascade_frontalface_alt2.xml") faceDet3 = cv2.CascadeClassifier( "haarcascade/haarcascade_frontalface_alt.xml") faceDet4 = cv2.CascadeClassifier( "haarcascade/haarcascade_frontalface_alt_tree.xml") # pdata, pframes, pfacedims = getNewInstances(ytURL, faceDet, faceDet2, faceDet3, faceDet4, maxCount=maxFrames) # headers = dict() headers['Ocp-Apim-Subscription-Key'] = ms_key1 headers['Content-Type'] = 'application/octet-stream' # resultsDf = pd.DataFrame() frameId = 0 for image in pframes: print("posting frame %d of %d" % (frameId, len(pframes))) resultMS = processRequest(image, headers) # if isinstance(resultMS, list): for result in resultMS: if isinstance(result, dict): resFrameList = [] for res in result['scores'].items(): resFrameList.append( (frameId, res[0], res[1], result["faceRectangle"]['left'], result["faceRectangle"]['top'], result["faceRectangle"]['width'], result["faceRectangle"]['height'])) appendDf = pd.DataFrame(resFrameList, columns=[ "frameId", "emotionLabel", "conf", "faceleft", "facetop", "faceW", "faceH" ]) resultsDf = resultsDf.append(appendDf) time.sleep(2) frameId += 1 # dfFaces = DplyFrame(resultsDf) # topFaces = ( dfFaces >> group_by(X.emotionLabel) >> sift(X.conf == X.conf.max()) >> sift(X.frameId == X.frameId.min()) >> ungroup() >> group_by( X.frameId) >> sift(X.conf == X.conf.max()) >> ungroup() >> arrange( X.emotionLabel)) topFaces = topFaces.drop_duplicates() #print(topFaces) # i = 0 for index, row in topFaces.iterrows(): print("saving emotion frame %d of %d" % (i, len(topFaces.index))) # emotion = row["emotionLabel"] confid = int(row["conf"] * 100) image = pframes[int(row["frameId"])] faceL = row["faceleft"] faceT = row["facetop"] faceW = row["faceW"] faceH = row["faceH"] # #save cropped face imageW = image[faceT:faceT + faceH, faceL:faceL + faceW] cv2.imwrite( os.path.expanduser("%s/Cropped_%s.jpg" % (outdir, emotion)), imageW) # cv2.rectangle(image, (faceL, faceT), (faceL + faceW, faceT + faceH), color=(255, 0, 0), thickness=5) cv2.putText(image, emotion, (faceL, faceT - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1) # cv2.imwrite(os.path.expanduser("%s/box%s.jpg" % (outdir, emotion)), image) i += 1