def processItemFullSize(arr): data = arr['data'] q = Meme.objects.filter(threadLink = 'http://reddit.com' + data['permalink']) #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day. if q.count() > 1: raise Exception("More than one of the same permalink in db for permalink:" + data['permalink']) if q.count() == 1: #if we have, update the score and move on m = q[0] m.score = data['score'] m.save() elif ".jpeg" not in data['url']: #have not evaluated this submission yet, run tests and store filepath = dropBoxDir + 'target.jpg' if ".jpg" in data['url'] or ".png" in data['url']: fullSize = data['url'] else: fullSize = fullSizePhoto(data['url']) f = open(filepath, 'wb') f.write(urllib3.PoolManager().request('GET', fullSize).data) f.close() img_corrupt = False c2 = False #classify.classify() gets 2 elements: image macro/none, strong/weak classification = classify.classify(filepath) if classification[0] == None and classification[2] != None: c2 = True macro = None #try classifying on potential libs classification2 = classify.classify(filepath, directory = dropBoxDir + 'potential_libs/') if classification2[0] == None: #add image to potential_libs p = PotentialImageMacro(thumbnailLink = data['thumbnail'], fullSizeLink = fullSize, score = data['score'], submitter = data['author'], source = 'adviceanimals', created = data['created'] , threadLink = 'http://reddit.com' + data['permalink'], title = data['permalink'].replace('/', '') + '.jpg') p.save() potentialize(data['permalink'].replace('/', '')) elif classification2[2] < 20: #only classify as potential_lib if very confident librarize(classification2[0][8:]) macro = ImageMacro.objects.get(filename = 'library/' + classification2[0][8:]) print "Added " + classification2[0][8:] + " to the library while classifying: " + fullSize classification = classification2 elif classification[2] == None: macro = None img_corrupt = True else: macro = ImageMacro.objects.get(filename = classification[0]) m = Meme(classification = macro, thumbnailLink = data['thumbnail'], fullSizeLink = fullSize, score = data['score'], submitter = data['author'], topDist = classification[2] , topCorr = classification[3] , source = 'adviceanimals', created = data['created'], threadLink = 'http://reddit.com' + data['permalink'], strong_classification = classification[1], img_corrupt = img_corrupt) m.save() if classification[2] < 25 and classification[0] != None: if c2: merge(classification[0], macro, detract = 1) else: merge(classification[0], macro)
def processItem(arr, target): q = Meme.objects.filter(threadLink = arr['threadLink']).distinct() #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day, or otherwise making the filter stronger print "Processing thread: " + arr['threadLink'] if q.count() > 1: print "More than one of the same permalink in db for permalink:" + arr['threadLink'] return if q.count() == 1: #if we have, update the score and move on print "Repeat submission. Updating score and moving on..." m = q[0] m.score = arr['score'] m.save() else: #have not evaluated this submission yet, run tests and store #classify.classify() gets 2 elements: image macro/none, strong/weak img_corrupt = False classification = classify.classify(target, 'macros') if classification[0] == None and classification[1] != None: macro = None #try classifying on potential libs classification = classify.classify(target, 'potentialmacros') if classification[0] == None: #add image to potential_libs p = PotentialImageMacro(thumbnailLink = arr['thumbnailLink'], fullSizeLink = arr['fullSizeLink'], score = arr['score'], submitter = arr['author'], source = arr['source'], created = arr['created'] , threadLink = arr['threadLink'], key = arr['threadLink'].replace('/', '')) p.save() potentialize(arr['threadLink'].replace('/', ''), target) print "Added as potential macro." elif classification[1] < 14: #only classify as potential if very confident librarize(classification[0]) macro = ImageMacro.objects.get(key = classification[0]) merge(macro, target) print "Moved " + classification[0] + " over to the library, and classified this item as such." #Unaddressed case: weak classification. Do not want to classify as potential because #doing sois going out on a limb without strong reason to do so. Also do not want to #add it as a potential macro because it is likely reduntant. #Image must be corrput because a value was not attained for closest with distance elif classification[1] == None: print "Image corrupt" macro = None img_corrupt = True else: macro = ImageMacro.objects.get(key = classification[0]) if classification[1] < 25: merge(macro, target) print "Classified as " + classification[0] m = Meme(classification = macro, thumbnailLink = arr['thumbnailLink'], fullSizeLink = arr['fullSizeLink'], score = arr['score'], submitter = arr['author'], topDist = classification[1] , topCorr = classification[2] , source = arr['source'], created = arr['created'], threadLink = arr['threadLink'], img_corrupt = img_corrupt, name = name(arr['fullSizeLink'])) m.save() if m.classification != None: updateName(m.classification)
def processItemFullSize(arr): data = arr['data'] q = Meme.objects.filter(threadLink='http://reddit.com' + data['permalink']) #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day. if q.count() > 1: raise Exception( "More than one of the same permalink in db for permalink:" + data['permalink']) if q.count() == 1: #if we have, update the score and move on m = q[0] m.score = data['score'] m.save() elif ".jpeg" not in data['url']: #have not evaluated this submission yet, run tests and store filepath = dropBoxDir + 'target.jpg' if ".jpg" in data['url'] or ".png" in data['url']: fullSize = data['url'] else: fullSize = fullSizePhoto(data['url']) f = open(filepath, 'wb') f.write(urllib3.PoolManager().request('GET', fullSize).data) f.close() img_corrupt = False c2 = False #classify.classify() gets 2 elements: image macro/none, strong/weak classification = classify.classify(filepath) if classification[0] == None and classification[2] != None: c2 = True macro = None #try classifying on potential libs classification2 = classify.classify(filepath, directory=dropBoxDir + 'potential_libs/') if classification2[0] == None: #add image to potential_libs p = PotentialImageMacro( thumbnailLink=data['thumbnail'], fullSizeLink=fullSize, score=data['score'], submitter=data['author'], source='adviceanimals', created=data['created'], threadLink='http://reddit.com' + data['permalink'], title=data['permalink'].replace('/', '') + '.jpg') p.save() potentialize(data['permalink'].replace('/', '')) elif classification2[ 2] < 20: #only classify as potential_lib if very confident librarize(classification2[0][8:]) macro = ImageMacro.objects.get(filename='library/' + classification2[0][8:]) print "Added " + classification2[0][ 8:] + " to the library while classifying: " + fullSize classification = classification2 elif classification[2] == None: macro = None img_corrupt = True else: macro = ImageMacro.objects.get(filename=classification[0]) m = Meme(classification=macro, thumbnailLink=data['thumbnail'], fullSizeLink=fullSize, score=data['score'], submitter=data['author'], topDist=classification[2], topCorr=classification[3], source='adviceanimals', created=data['created'], threadLink='http://reddit.com' + data['permalink'], strong_classification=classification[1], img_corrupt=img_corrupt) m.save() if classification[2] < 25 and classification[0] != None: if c2: merge(classification[0], macro, detract=1) else: merge(classification[0], macro)