Пример #1
0
def downloadImages(dataset):
    print('Start reading features')
    with open(dataset) as f:
        allImgs = []
        shapes = []
        allResults = []
        notProcessed = 0
        totalImgs = 0
        correctShape = 0
        for row in csv.DictReader(f):
            if (float(row["likeRatio"]) > 1.):
                continue
            print(totalImgs)
            totalImgs += 1
            try:
                image = imageProcess.Image(row["imgUrl"], True)
                imageShape = image.getImageShape()
                shapes.append(imageShape)
                # squaredImage = imageShape[0] == imageShape[1]
                # isRgb = imageShape[2] == 3;
                # if (not squaredImage) or (not isRgb):    
                #     continue
                # image_rescaled = rescale(image.skimageImage, RESIZE_FACTOR, anti_aliasing=False, multichannel=True)
                image_rescaled = resize(image.skimageImage, (TARGET_X, TARGET_Y),anti_aliasing=False)
            except Exception as e:
                notProcessed += 1
                print(e)
                continue
            allImgs.append(image_rescaled)
            allResults.append(float(row["likeRatio"]))
    
    print("not processed: " + str(notProcessed/totalImgs))
    slashIndex = dataset.find("/")
    slashIndex += 1
    plt.figure()
    plt.plot(shapes)
    plt.title('image shape distribution')
    plt.ylabel('width')
    plt.xlabel('height')
    plt.savefig(f"datasets/{dataset[slashIndex:-4]}_distribution.png")
    np.save(f"allImgs_{dataset[slashIndex:-4]}.npy", allImgs)
    np.save(f"allResults_{dataset[slashIndex:-4]}.npy", allResults)
    return allImgs, allResults
Пример #2
0
def extractFeaturesFromDataset(filename):
    net = imageProcess.runFaceDetectDNN()
    print('Start reading features')
    with open(filename) as f:
        listFeatureVectorsWithResult = []
        notProcessed = 0
        for row in csv.DictReader(f):
            featureVector = defaultdict(float)
            imageNotProcessed = False
            for key in row:  #  each row is a dict
                if (key == "timestamp"):
                    hourOfDay = datetime.fromtimestamp(int(row[key])).hour
                    between2and6 = (hourOfDay >= 2 and hourOfDay < 6)
                    between6and10 = (hourOfDay >= 6 and hourOfDay < 10)
                    between10and14 = (hourOfDay >= 10 and hourOfDay < 14)
                    between14and18 = (hourOfDay >= 14 and hourOfDay < 18)
                    between18and22 = (hourOfDay >= 18 and hourOfDay < 22)
                    between22and2 = (hourOfDay >= 22) or (hourOfDay < 2)
                    featureVector['between2and6'] = between2and6
                    featureVector['between6and10'] = between6and10
                    # featureVector['between10and14'] = between10and14
                    featureVector['between14and18'] = between14and18
                    featureVector['between18and22'] = between18and22
                    featureVector['between22and2'] = between22and2

                if (key == "caption"):
                    # featureVector["captionLength"] = (len(row[key]))
                    featureVector["capContainsFood"] = 1 if "food" in row[
                        key].lower() else 0
                    featureVector["capContainsFollow"] = 1 if "follow" in row[
                        key].lower() else 0
                    featureVector[
                        "capContainsAd"] = 1 if "ad" in row[key].lower() else 0

                # if key == "hashtags":
                #     hashtags = ast.literal_eval(row[key])
                #     hashtags = [n.strip() for n in hashtags]
                # featureVector["numHash"] = 1 if len(hashtags) == 0 else 1./len(hashtags)

                if key == "imgUrl":
                    try:
                        image = imageProcess.Image(row[key], True)
                    except Exception as e:
                        print(e)
                        imageNotProcessed = True
                        break
                    # featureVector["colourfulness"] = imageProcess.extractSectorsFeature(image, 20, 20)
                    faceInfo = imageProcess.extractFaceInfo(image, net)
                    # featureVector["numFaces"] = imageProcess.extractNumFaces(faceInfo)
                    featureVector[
                        "percentageFaces"] = imageProcess.extractTotalPercentAreaFaces(
                            faceInfo)

                if (key == "likeRatio" or key == "likeCount"
                        or key == "commentCount" or key == "timestamp"):
                    continue
                # this should fail all the time we have a string as the value feature
                # probably bad style but  python has no better way to check if
                # a string contains a float or not
                try:
                    val = float(row[key])
                    featureVector[key] = val
                except:
                    continue
            if (imageNotProcessed):
                notProcessed += 1
                continue

            label = float(row["likeRatio"])
            listFeatureVectorsWithResult.append((featureVector, label))

        print('Finished extracting features')
        limitLen = 2 * int(len(listFeatureVectorsWithResult) / 3)
        trainData = listFeatureVectorsWithResult[:limitLen]
        testData = listFeatureVectorsWithResult[limitLen:]
        print(
            f"Not processed ratio: {notProcessed/len(listFeatureVectorsWithResult)}"
        )
        # plusOneCount = 0
        # minusOneCount = 0
        # for data in trainData:
        #     if data[1] == 1: plusOneCount+=1
        #     else: minusOneCount+=1
        # for data in testData:
        #     if data[1] == 1: plusOneCount+=1
        #     else: minusOneCount+=1
        # print(plusOneCount)
        # print(minusOneCount)
        # print(plusOneCount/(plusOneCount+minusOneCount))
        return (trainData, testData)
Пример #3
0
def extractFeaturesFromDataset(filename):
    print("PELE MEJOR QUE MARADONA!")
    # net = imageProcess.runFaceDetectDNN()
    print('Start reading features')
    with open(filename) as f:
        featureVectors = []
        results = []
        allImgs = []
        allResults = []
        shapes = []
        notProcessed = 0
        totalImgs = 0
        correctShape = 0
        a = True
        for row in csv.DictReader(f):
            if a:
                print(row.keys())
                a = False
            print(totalImgs)
            totalImgs += 1
            featureVector = []
            somethingFailed = False
            
            for key in row: #  each row is a dict
                try:
                    if (key == "timestamp"): 

                        hourOfDay = datetime.fromtimestamp(int(row[key])).hour
                        between2and6 = (hourOfDay >= 2 and hourOfDay < 6)
                        between6and10 = (hourOfDay >= 6 and hourOfDay < 10)
                        between10and14 = (hourOfDay >= 10 and hourOfDay < 14)
                        between14and18 = (hourOfDay >= 14 and hourOfDay < 18)
                        between18and22 = (hourOfDay >= 18 and hourOfDay < 22)
                        between22and2 = (hourOfDay >= 22) or (hourOfDay < 2)
                        featureVector.append(int(between2and6))
                        featureVector.append(int(between6and10))
                        featureVector.append(int(between10and14))
                        featureVector.append(int(between14and18))
                        featureVector.append(int(between18and22))
                        featureVector.append(int(between22and2))

                        dayOfWeek = ep_to_day(int(row[key]))
                        if dayOfWeek == "Sunday":
                            featureVector.append(1)
                        else:
                            featureVector.append(0)                        
                        if dayOfWeek == "Monday":
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if dayOfWeek == "Tuesday":
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if dayOfWeek == "Wednesday":
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if dayOfWeek == "Thursday":
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if dayOfWeek == "Friday":
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if dayOfWeek == "Saturday":
                            featureVector.append(1)
                        else:
                            featureVector.append(0) 
                
                    
                    elif (key == "accessibilityCaption"):

                        accessibilityCaption = row[key]
                        if "people" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "and" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "one" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "or" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "more" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "standing" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "nature" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "closeup" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "sitting" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "tree" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "photo" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "no" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "description" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "available" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "cloud" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "beard" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "mountain" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "child" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "playing" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "sports" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "sunglasses" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "on" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "grass" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "suit" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "selfie" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "crowd" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "1" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "person" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "wedding" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)
                        if "baby" in accessibilityCaption:
                            featureVector.append(1)
                        else:
                            featureVector.append(0)

                    elif key == "imgUrl":

                        image = imageProcess.Image(row[key], True)
                        imageShape = image.getImageShape()
                        shapes.append((imageShape[0], imageShape[1]))
                        # squaredImage = imageShape[0] == imageShape[1]
                        # isRgb = imageShape[2] == 3;
                        # if (not squaredImage) or (not isRgb):    
                        #     continue
                        # image_rescaled = rescale(image.skimageImage, RESIZE_FACTOR, anti_aliasing=False, multichannel=True)
                        image_rescaled = resize(image.skimageImage, (TARGET_X, TARGET_Y),anti_aliasing=False)
                        # featureVector.append(imageProcess.extractSectorsFeature(image, 20, 20))
                        # faceInfo = imageProcess.extractFaceInfo(image, net)
                        # featureVector.append(imageProcess.extractNumFaces(faceInfo))
                        # featureVector.append(imageProcess.extractTotalPercentAreaFaces(faceInfo))
                    elif key == "likeRatio": # we will append the result at the end

                        continue #allResults.append(float(row[key]))
                    elif (key == "likeCount" or key == "commentCount"):

                        continue
                        featureVector.append(row[key])
                    elif (key == "isBusinessAcc"):
                        featureVector.append(int(row[key]=="True"))
                    elif (key == "isVerified"):
                        featureVector.append(int(row[key]=="True"))
                    elif (key == "hasChannel"):
                        featureVector.append(int(row[key]=="True"))

                    # this should fail all the time we have a string as the value feature
                    # probably bad style but  python has no better way to check if 
                    # a string contains a float or not
                    else:
                        try:
                            val = float(row[key])
                            featureVector[key] = val
                        except Exception as e:
                            continue
                except Exception as e:
                    somethingFailed = True
                    notProcessed += 1
                    print(e)
                    break
            if (somethingFailed):
                continue
            label = float(row["likeCount"])/float(row["userAverageLikes"])
            allResults.append(label)
            allImgs.append(image_rescaled)
            featureVectors.append(featureVector)
        slashIndex = filename.find("/")
        slashIndex += 1
        featureVectors = np.array(featureVectors)
        allResults = np.array(allResults)
        allImgs = np.array(allImgs)

        plt.figure()    
        plt.title('image shape distribution')
        plt.ylabel('width')
        plt.xlabel('height')
        plt.scatter(*zip(*shapes))
        plt.savefig(f"datasets/{filename[slashIndex:-4]}_distribution.png")
        np.save(f"allImgs_{filename[slashIndex:-4]}.npy", allImgs)
        np.save(f"allResults_{filename[slashIndex:-4]}.npy", allResults)
        np.save(f"featureVectors_{filename[slashIndex:-4]}.npy", featureVectors)
        return allImgs, featureVectors, allResults
Пример #4
0
import imageprocess as imageProcess
import csv
from skimage.transform import rescale, resize, downscale_local_mean

RESIZE_FACTOR = 0.25
print('Start reading features')
with open('datasets/neuralnet-firstdataset.csv') as f:
    allImgs = []
    allResults = []
    notProcessed = 0
    totalImgs = 0
    correctShape = 0
    for row in csv.DictReader(f):
        totalImgs += 1
        try:
            image = imageProcess.Image(row["imgUrl"], True)
            if image.getImageShape() != (1080, 1080, 3):
                continue
            image_rescaled = rescale(image.skimageImage,
                                     RESIZE_FACTOR,
                                     anti_aliasing=False,
                                     multichannel=True)
            correctShape += 1
        except Exception as e:
            notProcessed += 1
            continue
        allImgs.append(image_rescaled)
        allResults.append(float(row["likeRatio"]))
print(f"not processed: {notProcessed/totalImgs}")
print(f"correct shape total: {correctShape}")
print(f"correct shape ratio: {correctShape/totalImgs}")