iris >> dp.mutate(redondeado=X.PetalLength.round(), redondeado2=X.SepalLength.round())

iris.assign(redondeado = lambda x: x.PetalLength.round(), redondeado2 = lambda x: x.SepalLength.round())

#ifelse(y==0, 0, 1)
np.where((y == 0), 0, 1)
# data=iris %>% 
# distinct(Species, Sepal.Length, .keep_all = T)
iris >> dp.distinct(X.SepalLength)

iris.drop_duplicates()
iris.drop_duplicates(subset='PetalLength')
# #ordenando
# data=iris %>% 
# arrange(Sepal.Length, Sepal.Width)
iris >> dp.arrange(X.PetalLength)

iris.sort_values("PetalLength", ascending=False)
# data$ceros=0
iris['ceros']=0
# data$ceros=NULL
del(iris['ceros'])

# data=data.frame(x1=rep(1,10), x2=rep(2, 10))
data = pd.DataFrame({'x1':np.ones(10), 'x2':np.repeat(2, 10)})
# data2=data.frame(x3=rep(3,10), x4=rep(4, 10))
data2 = pd.DataFrame({'x3':np.repeat(3, 10), 'x4':np.repeat(4, 10)})
# data3=data.frame(x1=rep(1,10), x5=rep(5, 10))
data3 = pd.DataFrame({'x1':np.ones(10), 'x5':np.repeat(5, 10)})
# total=left_join(data, data3, by="x1")
# total2=merge(data, data2, by="x1")
def main(argv):
    yURL = None
    outdir = None
    maxFrames = 500
    yURL = input("Enter the youtube url:")
    outdir = input("Enter the output directory:")
    maxFrames = int(input("Enter the maximum number of frames to check:"))

    faceDet = cv2.CascadeClassifier(
        "haarcascade/haarcascade_frontalface_default.xml")
    faceDet2 = cv2.CascadeClassifier(
        "haarcascade/haarcascade_frontalface_alt2.xml")
    faceDet3 = cv2.CascadeClassifier(
        "haarcascade/haarcascade_frontalface_alt.xml")
    faceDet4 = cv2.CascadeClassifier(
        "haarcascade/haarcascade_frontalface_alt_tree.xml")
    #
    pdata, pframes, pfacedims = getNewInstances(yURL,
                                                faceDet,
                                                faceDet2,
                                                faceDet3,
                                                faceDet4,
                                                maxCount=maxFrames)
    #
    headers = dict()
    headers['Ocp-Apim-Subscription-Key'] = ms_key1
    headers['Content-Type'] = 'application/octet-stream'
    #
    resultsDf = pd.DataFrame()
    frameId = 0
    for image in pframes:
        print("posting frame %d of %d" % (frameId, len(pframes)))
        #sending the frame image to MS cognitive services
        resultMS = processRequest(image, headers)
        #isinstance == type()
        if isinstance(resultMS, list):
            for result in resultMS:
                if isinstance(result, dict):
                    resFrameList = []
                    for res in result['scores'].items():
                        resFrameList.append(
                            (frameId, res[0], res[1],
                             result["faceRectangle"]['left'],
                             result["faceRectangle"]['top'],
                             result["faceRectangle"]['width'],
                             result["faceRectangle"]['height']))
                        appendDf = pd.DataFrame(resFrameList,
                                                columns=[
                                                    "frameId", "emotionLabel",
                                                    "conf", "faceleft",
                                                    "facetop", "faceW", "faceH"
                                                ])
                        resultsDf = resultsDf.append(appendDf)
        time.sleep(2)
        frameId += 1
    #
    # print(resultsDf)
    #we append all the data to the dataframe
    #http://bluescreen.club/2017/06/18/import-pandas-as-pd/
    #then we convert the dataframe to a Dplyframe object which allows us to do higher level data analytics
    #for this one, we will select out the top most ranking face frames for each of the emotions
    #microsoft provides us with around 8 emotions
    #so we sort out 8 faces for 8 emotions and then save them accordingly
    dfFaces = DplyFrame(resultsDf)
    # print(dfFaces)
    topFaces = (
        dfFaces >> group_by(X.emotionLabel) >> sift(X.conf == X.conf.max()) >>
        sift(X.frameId == X.frameId.min()) >> ungroup() >> group_by(
            X.frameId) >> sift(X.conf == X.conf.max()) >> ungroup() >> arrange(
                X.emotionLabel))

    topFaces = topFaces.drop_duplicates()
    #print(topFaces)
    i = 0
    for index, row in topFaces.iterrows():
        print("saving emotion frame %d of %d" % (i, len(topFaces.index)))
        #
        emotion = row["emotionLabel"]
        confid = int(row["conf"] * 100)
        image = pframes[int(row["frameId"])]
        faceL = row["faceleft"]
        faceT = row["facetop"]
        faceW = row["faceW"]
        faceH = row["faceH"]
        #save cropped face
        imageW = image[faceT:faceT + faceH, faceL:faceL + faceW]
        cv2.imwrite(
            os.path.expanduser("%s/Cropped_%s.jpg" % (outdir, emotion)),
            imageW)
        #if you wish to put a rectangle on the faces then uncomment below
        #
        # cv2.rectangle( image,(faceL,faceT),
        #               (faceL+faceW, faceT + faceH),
        #                color = (255,0,0), thickness = 5 )
        # cv2.putText( image, emotion, (faceL,faceT-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1 )
        #
        cv2.imwrite(os.path.expanduser("%s/%s.jpg" % (outdir, emotion)), image)
        i += 1
示例#3
0
"""5.2 wordcloud"""
"""turns the word freq to dict"""
d = {}
for a, x in dff.values:
    d[a] = x
wordcloud = WordCloud(width = 1000, height = 1000,
                background_color ='white',
                min_font_size =15, max_font_size=120).generate_from_frequencies(frequencies=d)
plt.figure(figsize = (8, 8), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()

"""stacked bar plot"""
dfx = (dfr >>
arrange(-X.tot))
dfx=dfx.head(50)

from plotnine import *
(ggplot(dfx, aes(x='word', y='tot', fill='source'))
 + geom_col()  +
 theme(axis_text_x=element_text(rotation=45, hjust=1))
)
"""each newspaper"""
dfr = DplyFrame(output)
df_tele =(dfr >>sift(X.source=="guardian"))
df_tele = (df_tele >> 
  group_by(X.word, X.source) >> 
  summarize(tot=X.count.sum()))

df_tele = (df_tele >>select(X.word, X.tot ))
 >> pull('carat'))

# DataFrame transformation
diamonds >> mutate(x_plus_y=X.x + X.y) >> select(columns_from('x')) >> head(3)
diamonds >> mutate(x_plus_y=X.x + X.y, y_div_z=(X.y / X.z)) >> select(columns_from('x')) >> head(3)
# The transmute() function is a combination of a mutate and a selection of the created variables.
diamonds >> transmute(x_plus_y=X.x + X.y, y_div_z=(X.y / X.z)) >> head(3)

# group_by() and ungroup()
diamonds >> head(5) >> group_by(X.color) >> mutate(avg_price=X.price.mean())

(diamonds >> group_by(X.cut) >>
 mutate(price_lead=lead(X.price), price_lag=lag(X.price)) >>
 head(2) >> select(X.cut, X.price, X.price_lead, X.price_lag))
# ungroup()
(diamonds >> group_by(X.cut) >> arrange(X.price) >>
 head(3) >> ungroup() >> mask(X.carat < 0.23))

# Reshaping
diamonds >> arrange(X.table, ascending=False) >> head(5)
diamonds >> rename(CUT=X.cut, COLOR='color') >> head(2)

# transform a "wide" DataFrame into a "long" format
# gather(key, value, *columns, add_id=True) melts the specified columns into two key:value columns
diamonds >> gather('variable', 'value', ['price', 'depth','x','y','z']) >> head(5)
elongated = diamonds >> gather('variable', 'value', add_id=True) 

# transform a "long" DataFrame into a "wide" format
# In this case the _ID column comes in handy since it is necessary to not have any duplicated identifiers.
widened = elongated >> spread(X.variable, X.value)
widened.dtypes # all are objects
def main(argv):
    ytURL = None
    outdir = None
    maxFrames = 500
    try:
        opts, args = getopt.getopt(argv, "hy:o:m:",
                                   ["yturl=", "odir=", "maxframes="])
    except getopt.GetoptError:
        print 'Error: shellScript.py -y <yturl> -o <odir> -m <maxframes>'
        sys.exit(2)
    #print opts
    for opt, arg in opts:
        if opt == '-h':
            print 'help: shellScript.py -y <yturl> -o <odir> -m <maxframes>'
            sys.exit()
        elif opt in ("-y", "--yturl"):
            print("--yturl={}".format(arg))
            ytURL = arg
        elif opt in ("-o", "--odir"):
            print("--odir={}".format(arg))
            outdir = arg
        elif opt in ("-m", "--maxframes"):
            print("--maxframes={}".format(arg))
            maxFrames = int(arg)
    #
    if ytURL is None:
        print 'bad yt: shellScript.py -y <yturl> -o <odir> -m <maxframes>'
        sys.exit()
    #
    if outdir is None:
        print 'bad outdir: shellScript.py -y <yturl> -o <odir> -m <maxframes>'
        sys.exit()
    #
    if False == isinstance(maxFrames, (int, long)):
        print 'bad maxFrames: shellScript.py -y <yturl> -o <odir> -m <maxframes>'
        sys.exit()
    #
    #
    faceDet = cv2.CascadeClassifier(
        "haarcascade/haarcascade_frontalface_default.xml")
    faceDet2 = cv2.CascadeClassifier(
        "haarcascade/haarcascade_frontalface_alt2.xml")
    faceDet3 = cv2.CascadeClassifier(
        "haarcascade/haarcascade_frontalface_alt.xml")
    faceDet4 = cv2.CascadeClassifier(
        "haarcascade/haarcascade_frontalface_alt_tree.xml")
    #
    pdata, pframes, pfacedims = getNewInstances(ytURL,
                                                faceDet,
                                                faceDet2,
                                                faceDet3,
                                                faceDet4,
                                                maxCount=maxFrames)
    #
    headers = dict()
    headers['Ocp-Apim-Subscription-Key'] = ms_key1
    headers['Content-Type'] = 'application/octet-stream'
    #
    resultsDf = pd.DataFrame()
    frameId = 0
    for image in pframes:
        print("posting frame %d of %d" % (frameId, len(pframes)))
        resultMS = processRequest(image, headers)
        #
        if isinstance(resultMS, list):
            for result in resultMS:
                if isinstance(result, dict):
                    resFrameList = []
                    for res in result['scores'].items():
                        resFrameList.append(
                            (frameId, res[0], res[1],
                             result["faceRectangle"]['left'],
                             result["faceRectangle"]['top'],
                             result["faceRectangle"]['width'],
                             result["faceRectangle"]['height']))
                        appendDf = pd.DataFrame(resFrameList,
                                                columns=[
                                                    "frameId", "emotionLabel",
                                                    "conf", "faceleft",
                                                    "facetop", "faceW", "faceH"
                                                ])
                        resultsDf = resultsDf.append(appendDf)
        time.sleep(2)
        frameId += 1
    #
    dfFaces = DplyFrame(resultsDf)
    #
    topFaces = (
        dfFaces >> group_by(X.emotionLabel) >> sift(X.conf == X.conf.max()) >>
        sift(X.frameId == X.frameId.min()) >> ungroup() >> group_by(
            X.frameId) >> sift(X.conf == X.conf.max()) >> ungroup() >> arrange(
                X.emotionLabel))

    topFaces = topFaces.drop_duplicates()
    #print(topFaces)
    #
    i = 0
    for index, row in topFaces.iterrows():
        print("saving emotion frame %d of %d" % (i, len(topFaces.index)))
        #
        emotion = row["emotionLabel"]
        confid = int(row["conf"] * 100)
        image = pframes[int(row["frameId"])]
        faceL = row["faceleft"]
        faceT = row["facetop"]
        faceW = row["faceW"]
        faceH = row["faceH"]
        #
        #save cropped face
        imageW = image[faceT:faceT + faceH, faceL:faceL + faceW]
        cv2.imwrite(
            os.path.expanduser("%s/Cropped_%s.jpg" % (outdir, emotion)),
            imageW)
        #
        cv2.rectangle(image, (faceL, faceT), (faceL + faceW, faceT + faceH),
                      color=(255, 0, 0),
                      thickness=5)
        cv2.putText(image, emotion, (faceL, faceT - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
        #
        cv2.imwrite(os.path.expanduser("%s/box%s.jpg" % (outdir, emotion)),
                    image)
        i += 1