def main(): print("Welcome to Fish Classification System...") # Clear previously identified fish images result from Test_Classification, Test_Image_Detection_Results and Result folder. print(">>>>>>>>>>>>>>>>>>>") print("Cleaning previous result...") clear_directory(fp.detection_results_folder) clear_directory(fp.image_classification_source) # Find the Images path for the object detection using YOLOv3. print(">>>>>>>>>>>>>>>>>>>") print("Managing photos for detection...") test_image_YOLOv3 = find_test_image_YOLOv3() # Use YOLOv3 for object detection and return path. print(">>>>>>>>>>>>>>>>>>>") print("Running YOLOv3 for detection...") detection_results_folder = dt.detect(test_image_YOLOv3) # Function to crop the detected image from the YOLOv3 prediction. print(">>>>>>>>>>>>>>>>>>>") print("Cropping detected images from.." + detection_results_folder) crop_detected_image(detection_results_folder) # classify images using tiny VGGNet print(">>>>>>>>>>>>>>>") print("Running Smaller VGGNet for classification...") cs.classify(fp.vggnet_model_path, fp.pickle_path, fp.image_classification_source, fp.image_classification_output)
def run_tees(q, id_list): current_dir = os.getcwd() ignore_list_path = current_dir + '/text_files/id_ignore_list.txt' pmid_ignore_list = [] with open(ignore_list_path, 'r') as f: reader = csv.reader(f, delimiter='\t') for pmid_list in reader: pmid_ignore_list = pmid_list file_path_list = [] for pmid in id_list: if pmid in pmid_ignore_list: pass else: file_path = current_dir + 'output/genes/{0}/{1}'.format(q, pmid) addition = '-pred.xml.gz' file_path_check = file_path + addition file_path_list.append(file_path) if os.path.exists(file_path_check): print '--------------------------------SKIPPING ALREADY DOWNLOADED ABSTRACT {0}-------------------------------------------'.format( pmid) pass else: try: classify.classify(pmid, 'GE11', file_path) except (ValueError, UnicodeEncodeError, AssertionError, IndexError) as e: file_path_list.remove(file_path) with open(ignore_list_path, 'a') as f: f.write(pmid + '\t') f.close() pass return file_path_list
def get_family(pprs): try: classify.classify([p.seq_record for p in pprs]) except RuntimeError, e: print e raw_input("Press enter to continue") raise (e)
def main(): args = parse_args() log(args, '\n' + str(args)) if args.mode == 'train': comparision.train(args) elif args.mode == 'classify': classify(args) elif args.mode == 'dataset': dataset.build_dataset(args) elif args.mode == 'run': run_thread = run.MyThread(0, args) network_thread = run.MyThread(1, args) run_thread.start() network_thread.start() while network_thread.is_alive(): continue elif args.mode == 'normalize': start_time = time.clock() if args.split_dir != 'none': splits = dataset.split_images(args, start_time) log( args, '{:.5f}'.format(time.clock() - start_time) + 's ' + 'Images have been split ') if args.normalize: dataset.normalize(args, start_time) if args.split_dir != 'none': dataset.normalize(args, start_time, dirs=splits) else: log(args, 'Please select a mode using the tag --mode, use --help for help.', True)
def run_tees(q, id_list): current_dir = os.getcwd() ignore_list_path = current_dir + '/text_files/id_ignore_list.txt' pmid_ignore_list = [] with open (ignore_list_path, 'r') as f: reader = csv.reader(f,delimiter='\t') for pmid_list in reader: pmid_ignore_list = pmid_list file_path_list = [] for pmid in id_list: if pmid in pmid_ignore_list: pass else: file_path = current_dir + 'output/genes/{0}/{1}' .format(q ,pmid) addition = '-pred.xml.gz' file_path_check = file_path + addition file_path_list.append(file_path) if os.path.exists(file_path_check): print '--------------------------------SKIPPING ALREADY DOWNLOADED ABSTRACT {0}-------------------------------------------' .format(pmid) pass else: try: classify.classify(pmid,'GE11',file_path) except (ValueError, UnicodeEncodeError, AssertionError, IndexError) as e: file_path_list.remove(file_path) with open(ignore_list_path, 'a') as f: f.write(pmid + '\t') f.close() pass return file_path_list
def testTrees(treeSet, testSet, attributes, tieBreaker): binaryClassification = [] sampleSize = [] distribution = [] for i in range(len(testSet)): row = [] for j in range(len(treeSet)): row.append(c.classify(treeSet[j], testSet[i], attributes)[0][0]) binaryClassification.append(row) binaryClassification = np.vstack(binaryClassification) for i in range(len(testSet)): row = [] for j in range(len(treeSet)): row.append(c.classify(treeSet[j], testSet[i], attributes)[1][0]) sampleSize.append(row) sampleSize = np.vstack(sampleSize) for i in range(len(testSet)): row = [] for j in range(len(treeSet)): row.append(c.classify(treeSet[j], testSet[i], attributes)[2][0]) distribution.append(row) distribution = np.vstack(distribution) output = finalPredictions(binaryClassification, sampleSize, distribution, tieBreaker) return output
def test_class_any_failure_throws(): class A: a = str b = int c = float with raises(TypeError): classify(A, {'a': 'Hello world!', 'b': 1.1, 'c': 1.1})
def processItemFullSize(arr): data = arr['data'] q = Meme.objects.filter(threadLink = 'http://reddit.com' + data['permalink']) #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day. if q.count() > 1: raise Exception("More than one of the same permalink in db for permalink:" + data['permalink']) if q.count() == 1: #if we have, update the score and move on m = q[0] m.score = data['score'] m.save() elif ".jpeg" not in data['url']: #have not evaluated this submission yet, run tests and store filepath = dropBoxDir + 'target.jpg' if ".jpg" in data['url'] or ".png" in data['url']: fullSize = data['url'] else: fullSize = fullSizePhoto(data['url']) f = open(filepath, 'wb') f.write(urllib3.PoolManager().request('GET', fullSize).data) f.close() img_corrupt = False c2 = False #classify.classify() gets 2 elements: image macro/none, strong/weak classification = classify.classify(filepath) if classification[0] == None and classification[2] != None: c2 = True macro = None #try classifying on potential libs classification2 = classify.classify(filepath, directory = dropBoxDir + 'potential_libs/') if classification2[0] == None: #add image to potential_libs p = PotentialImageMacro(thumbnailLink = data['thumbnail'], fullSizeLink = fullSize, score = data['score'], submitter = data['author'], source = 'adviceanimals', created = data['created'] , threadLink = 'http://reddit.com' + data['permalink'], title = data['permalink'].replace('/', '') + '.jpg') p.save() potentialize(data['permalink'].replace('/', '')) elif classification2[2] < 20: #only classify as potential_lib if very confident librarize(classification2[0][8:]) macro = ImageMacro.objects.get(filename = 'library/' + classification2[0][8:]) print "Added " + classification2[0][8:] + " to the library while classifying: " + fullSize classification = classification2 elif classification[2] == None: macro = None img_corrupt = True else: macro = ImageMacro.objects.get(filename = classification[0]) m = Meme(classification = macro, thumbnailLink = data['thumbnail'], fullSizeLink = fullSize, score = data['score'], submitter = data['author'], topDist = classification[2] , topCorr = classification[3] , source = 'adviceanimals', created = data['created'], threadLink = 'http://reddit.com' + data['permalink'], strong_classification = classification[1], img_corrupt = img_corrupt) m.save() if classification[2] < 25 and classification[0] != None: if c2: merge(classification[0], macro, detract = 1) else: merge(classification[0], macro)
def test_intersection(): class A: a = int class B: b = str i_type = intersection(A, B) classify(i_type, {'a': 1, 'b': 'foobar'})
def processItem(arr, target): q = Meme.objects.filter(threadLink = arr['threadLink']).distinct() #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day, or otherwise making the filter stronger print "Processing thread: " + arr['threadLink'] if q.count() > 1: print "More than one of the same permalink in db for permalink:" + arr['threadLink'] return if q.count() == 1: #if we have, update the score and move on print "Repeat submission. Updating score and moving on..." m = q[0] m.score = arr['score'] m.save() else: #have not evaluated this submission yet, run tests and store #classify.classify() gets 2 elements: image macro/none, strong/weak img_corrupt = False classification = classify.classify(target, 'macros') if classification[0] == None and classification[1] != None: macro = None #try classifying on potential libs classification = classify.classify(target, 'potentialmacros') if classification[0] == None: #add image to potential_libs p = PotentialImageMacro(thumbnailLink = arr['thumbnailLink'], fullSizeLink = arr['fullSizeLink'], score = arr['score'], submitter = arr['author'], source = arr['source'], created = arr['created'] , threadLink = arr['threadLink'], key = arr['threadLink'].replace('/', '')) p.save() potentialize(arr['threadLink'].replace('/', ''), target) print "Added as potential macro." elif classification[1] < 14: #only classify as potential if very confident librarize(classification[0]) macro = ImageMacro.objects.get(key = classification[0]) merge(macro, target) print "Moved " + classification[0] + " over to the library, and classified this item as such." #Unaddressed case: weak classification. Do not want to classify as potential because #doing sois going out on a limb without strong reason to do so. Also do not want to #add it as a potential macro because it is likely reduntant. #Image must be corrput because a value was not attained for closest with distance elif classification[1] == None: print "Image corrupt" macro = None img_corrupt = True else: macro = ImageMacro.objects.get(key = classification[0]) if classification[1] < 25: merge(macro, target) print "Classified as " + classification[0] m = Meme(classification = macro, thumbnailLink = arr['thumbnailLink'], fullSizeLink = arr['fullSizeLink'], score = arr['score'], submitter = arr['author'], topDist = classification[1] , topCorr = classification[2] , source = arr['source'], created = arr['created'], threadLink = arr['threadLink'], img_corrupt = img_corrupt, name = name(arr['fullSizeLink'])) m.save() if m.classification != None: updateName(m.classification)
def process(pkt, node, timeSeen): ip = pkt.getlayer(IP) ether = pkt.getlayer(Ether) pprint.pprint(pkt) d.default('[+] Time: {}'.format(timeSeen)) # add to db id = db.addData(ether.src, ether.dst, ip.src, ip.dst, timeSeen, node, "", "", "icmp") # forward data to classify cf.classify(pkt, "icmp", node, timeSeen, id)
def test_class_any_failure_in_nesting_throws(): class B: d = int class A: a = str b = B c = float with raises(TypeError): classify(A, {'a': 'Hello world!', 'b': {'d': 1.1}, 'c': 1.1})
def start_classify(trainDir, testDir, pathName, train, counter, alg): try: if not train: classify(testDir, path='%s_%s' % (pathName, 'test_score'), counter=counter, alg=alg) if train: classify(trainDir, path='%s_%s' % (pathName, 'train_score'), counter=counter, alg=alg) create_confusion_matrix(trainDir, testDir, out_dir=os.path.abspath(os.path.join(trainDir)), path_name=pathName, counter=counter, alg=alg) except Exception as e: print e.message
def problem(args): p = GenericProblem( args["active_constraints"], args["passive_constraints"], args["leaf_constraints"], args["root_constraints"], leaf_allow_all=(args["leaf_constraints"] == []), root_allow_all=(args["root_constraints"] == []), flags=ProblemFlags(is_tree=args["is_tree"], is_cycle=args["is_cycle"], is_path=args["is_path"]), ) classified_problem = get_classified_problem_obj(p) if classified_problem is not None: return jsonify({ "problem": classified_problem.to_problem().dict(), "result": classified_problem.to_response().dict(), }) else: res = classify(p) if not (res.det_lower_bound == CONST and res.det_upper_bound == UNSOLVABLE and res.rand_lower_bound == CONST and res.rand_upper_bound == UNSOLVABLE): store_problem_and_classification(p, res) return jsonify({"problem": p.dict(), "result": res.dict()})
def classify_post(): input = request.get_json().get('text') if input and len(input) > 0: sent, prob = classify(input) return jsonify({'sentiment': sent, 'prob': prob}) else: return jsonify({'error', 'There was an error with your request'})
def test_class_returns_instanceof_type(): class A: a = str b = int c = float assert isinstance(classify(A, {'a': 'Hello world!', 'b': 1, 'c': 1.1}), A)
def main(_): img, cookies = getCaptcha() with open('test.jpg', 'wb') as f: f.write(img) crop_image() create_graph() chinese = recog_chinese() fetch_image_baidu(chinese) img = mpimg.imread('test.jpg') plt.imshow(img) plt.show() result = run_inference_on_image("images/",8) for item in result: print(item) result_reference = run_inference_on_image("tmp/",10,add_id=False) for item in result_reference: print(item) res = classify(result, result_reference) print('Check "test.jpg"') print(chinese) (code, msg) = checkCaptcha(res, cookies) if code != 4: print('Error #%d: %s' % (code, msg)) else: print('Pass!')
def normal_detect(self): #print self.normfname, type(self.normfname) dataset = str(self.DataSetChooser.currentText()) self.normx = png2cvs(str(self.normfname), dataset) self.normy = classify(self.normx, dataset) label = 1 if dataset == 'roadsign': checkbox = { 18: 'can not go in', 7: 'walkman', 22: 'Can\' t go in', 32: 'limit 70 kph', 25: 'large car', 31: 'double car rouble', 28: 'stop' } try: b = checkbox[int(self.normy)] message = 'this is a picture showing ' + b except KeyError: message = 'this is a picture showing roadsign number' + str( self.normy) + '\n' #dist = calc_single_bim(self.normx,self.sess) #label = detect(dist) else: message = "This picture is number" + str(self.normy) + "\n" if label == 0: message = message + "this is a normal picture" else: pass QMessageBox.information(self, "Classify Results", QString(message))
def attack(inputImage, actualperson, minconf, baselinesuccess, maxrounds, initialperturbation, perturbationscale): # read image m = cv2.imread(inputImage, 0) h, w = np.shape(m) # initialize variable for keeping track of number of perturbations perturbationcount = 0 # baseline confidence for this attack baselineconf = minconf # set initial perturbation values perturbation = initialperturbation for iter in range(maxrounds): filename = 'attack' + str(perturbationcount) + '.jpg' # apply perturbation for i in range(0, h): for j in range(0, w): # apply perturbation using checkerboard approach, bounding possible pixel values between 0 and 255 if (j % 2 == 1 and i % 2 == 0) or (j % 2 == 0 and i % 2 == 1): if (m[i][j] + perturbation > 255): m[i][j] = 255 else: m[i][j] = m[i][j] + perturbation elif (j % 2 == 0 and i % 2 == 1) or (j % 2 == 1 and i % 2 == 0): if (m[i][j] - perturbation < 0): m[i][j] = 0 else: m[i][j] = m[i][j] - perturbation # save perturbed image cv2.imwrite(filename, m) # apply classifier to perturbed image labelresults = classify.classify(actualperson, filename) # success indicates whether image was misclassified (1=yes, 0=no) success = labelresults[0] # targetconf is the classifier's confidence level that the image is the actual person id targetconf = labelresults[1] # personclass is the id of the person who the classifier classified the image as personclass = labelresults[2] # print(actualperson + ": " + str(targetconf)+"\n") # update minimum confidence if (targetconf < minconf): minconf = targetconf if success == 1: break else: # update perturbation amount based on perturbation scale variable perturbation = perturbationscale # increment count of perturbations perturbationcount += 1 # if after max rounds, we were not able to get classifier to mis-classify the image if (success == 0): print("Was not able to get classifier to mis-classify the person.") # make a copy of the final perturbed image newFileName = actualperson + "-round" + str(perturbationcount) + ".jpg" copyfile(filename, newFileName) # clean up unnecessary files for f in os.listdir('.'): if re.search("attack*", f): os.remove(os.path.join('.', f)) return success, minconf, newFileName, perturbationcount, personclass
def basic_classifier(self): """ basic classifier given as example in the Assigment_2 zip file :return: """ total_instances = 0 # Variable that will store the total instances that will be tested total_correct = 0 # Variable that will store the correctly predicted instances for trainIndex, testIndex in self.kf.split(self.x): train_set = self.x[trainIndex] test_set = self.x[testIndex] train_labels = self.y[trainIndex] test_labels = self.y[testIndex] predicted_labels = classify(train_set, train_labels, test_set) correct = 0 for i in range(test_set.shape[0]): if predicted_labels[i] == test_labels[i]: correct += 1 print('Accuracy: ' + str(float(correct) / test_labels.size)) total_correct += correct total_instances += test_labels.size print('Total Accuracy: ' + str(total_correct / float(total_instances)))
def crawl(domains): pool = WorkerManager(1000) for domain in domains: pool.add_job(fetch, domain) pool.wait_for_complete() domain_results = {} keywords_list = [] need_classify_domains = [] while True: try: domain, keywords, category = pool.get_result(block=False) if category is None: need_classify_domains.append(domain) keywords_list.append(keywords) else: domain_results[domain] = category except Queue.Empty: break pred = classify(keywords_list) classify_results = dict(zip(need_classify_domains, pred)) domain_results.update(classify_results) set_domain_category(domain_results) return domain_results
def getDatasetPosition(): #try: if True: url = "http://localhost:4040/environment/" openfile = urllib.urlopen(url) html = openfile.read().decode("utf-8") soup = BeautifulSoup(html, "html.parser") #con = str(html) tables = soup.findAll('table') content = str(tables[2]) command = "echo \"" + content + "\" | grep -o \"<td>sun.java.command</td><td>[^<]*</td>\"" line = os.popen(command).read().strip() javacommand = line.replace("<td>sun.java.command</td>", "").replace("<td>", "").replace("</td>", "") #print javacommand program = csf.classify(javacommand) command = "echo \"" + javacommand + "\" | grep -o \"file:[^ ]*\" | sed -n 1p" line = os.popen(command).read().strip() print "+++++++++++++++" #print line res = "" res += program res += " " + line return res
def review_pipe(review: str, aspect_sent: dict, terms_dict=dict) -> tuple: """ The function fixes co-referencing, splits review into sentences, removes special characters from sentences, does lematization, and classify sentence using pre-trained model. Finds sentiments in each sentence and assigns it to aspects. Returns: aspect_sent: defaultdict Dictionary of aspects with total positive and negative sentiments Example: {'ambience': Counter({'pos': 568.75, 'neg': 251.0})} terms_dict: defaultdict Dictionary of aspects with respective terms and their values Example: {'ambience': Counter({'atmosphere': 59.25, 'location': 33.75})} """ review = replace_pronouns(review) sentences = split_sentence(review) sentiment_dict = Counter() for sentence in sentences: sentence = remove_special_chars(str(sentence)) sentence = lemmatize_sentence(sentence) sentence = fix_output(sentence) predicted_aspect = classify(sentence.lower()) sentiment_dict = find_sentiments(sentence.lower()) aspect_sent, terms_dict = assign_term_to_aspect( aspect_sent, terms_dict, sentiment_dict, predicted_aspect[0]) return aspect_sent, terms_dict
def main(): try: threshold = float(request.form.get('threshold')) img_bytes = BytesIO(request.files.get('img').read()) except Exception: return 'bad request', 400 try: img = Image.open(img_bytes).resize((224, 224)) img_numpy = np.asarray(img.getdata()).astype('uint8') if img_numpy.size == 224*224*1: img_numpy = np.array([img_numpy] * 3) elif img_numpy.size == 224*224*4: img_numpy = img_numpy.reshape((224, 224, 4))[:,:,:3] img_numpy = img_numpy[:,:,:3] except IOError: return 'bad image', 400 results = classify(img_numpy, threshold) resp = '\n'.join(['<tag>{}</tag>'.format(tag) for tag in results]) return resp
def homepage(): if request.method == "POST": s3 = boto3.resource('s3') bucket = s3.Bucket('imagen50') bucket.upload_fileobj(request.files['input-b1'], request.files['input-b1'].filename, ExtraArgs={'ContentType': 'image/jpeg'}) link = 'https://s3.us-east-2.amazonaws.com/imagen50/%s' % urllib.quote_plus( request.files['input-b1'].filename) classifications = watson.classify(link) classes = {} classif = [] for classification in classifications['images'][0]['classifiers'][0][ 'classes']: classes[classification['class']] = classification['score'] classif.append(classification['class']) #classif = sorted(classes, key=lambda x: -classes[x]) content = '<table class="table table-hover table-bordered text-center thead-light"><thead><tr><th>Rank</th><th>Guess</th></tr></thead><tbody>' return render_template( 'homepage.html', WordCount=content + "<tr><td>1st Guess</td><td>" + classif[0] + "</td></tr><tr><td>2nd Guess</td><td>" + classif[1] + "</td></tr><tr><td>3rd Guess</td><td>" + classif[2] + "</td></tr></tbody></table>") else: return render_template('homepage.html', WordCount="")
def main(): #load the dataset train, test = mmv.format() #get the mean, median, and variance patterns for each number 0-9 meanmedianvar = mmv.mmv(train) #get the final weight maps for each number 0-9 weightmatrix = gen_weights.gen_weights(meanmedianvar) #get the bias outputs = [ applyWeights.applyWeights(weightmatrix, np.array(train[i]).T) for i in range(10) ] b = bias.gen_bias(weightmatrix) #initialize variables misclassified = [[], [], [], [], [], [], [], [], [], []] #run on test data for i in range(len(test)): for j in test[i]: wx = applyWeights.applyWeights(weightmatrix, j) wxb = classify.classify(wx, b) if i != wxb: misclassified[i] += [j] #output images and the weight patterns after classification output_results(meanmedianvar, weightmatrix, misclassified, test)
def duenote(index): target = list_of_readable_files[int(index)] text = documents.get_text_from_file(target) textSnippets = search.search(text) data = [] for text in textSnippets: labels = classify(text) data.append(labels) scheduling, todo = parseJSON(data) google_calendar = GoogleCalendar() google_task = GoogleTask() for t in todo: print("Sending task\n") google_task.add_task(t) t.due_date = datetime.strftime(parser.parse(t.due_date), '%m/%d/%y') for s in scheduling: print("Sending cal\n") google_calendar.send_to_google_calendar(s) s.start_time = datetime.strftime(parser.parse(s.start_time), '%m/%d/%y') return render_template('duenote.html', scheduling=scheduling, todo=todo)
def handwriting(): hwlabel = [] filelist = os.listdir('trainingDigits') m = len(filelist) trainMat = np.zeros((m, 1024)) for i in range(m): name = filelist[i].split('.')[0] num = int(name.split('_')[0]) hwlabel.append(num) trainMat[i, :] = image_parser.image2vector('trainingDigits/' + filelist[i]) filelist = os.listdir('testDigits') m = len(filelist) error = 0 testMat = np.zeros((1, 1024)) for i in range(m): label = int(filelist[i].split('.')[0].split('_')[0]) testMat = image_parser.image2vector('testDigits/' + filelist[i]) res = classify.classify(testMat, trainMat, hwlabel, 3) #print "Return value : %d Actual value : %d" % (res, label) if res != label: error += 1 print "Files : %d" % (m) print "Errors : %d " % (error) print "Error percentage : %f" % (error * 100 / float(m))
def duenote(index): target = list_of_readable_files[int(index)] text = documents.get_text_from_file(target) textSnippets = search.search(text) data = [] for text in textSnippets: labels = classify(text) data.append(labels) scheduling, todo = parseJSON(data) google_calendar = GoogleCalendar() google_task = GoogleTask() for t in todo: print ("Sending task\n") google_task.add_task(t) t.due_date = datetime.strftime(parser.parse(t.due_date), '%m/%d/%y') for s in scheduling: print ("Sending cal\n") google_calendar.send_to_google_calendar(s) s.start_time = datetime.strftime(parser.parse(s.start_time), '%m/%d/%y') return render_template('duenote.html', scheduling=scheduling,todo=todo)
def plot(dataset_filename): filtered_tweets = filter.filter(dataset_filename) relevant_tweets = classify.classify(filtered_tweets) counts_per_time_unit = dict() for tweet in relevant_tweets: # get python date from the tweet time tweet_date = dateparser.parse(tweet.time) # get key to uniquely identify the date and hour key = tweet_date.strftime("%Y-%m-%d: %H") if key in counts_per_time_unit: counts_per_time_unit[key] += 1 else: counts_per_time_unit[key] = 1 dates = sorted(counts_per_time_unit.keys()) if not os.path.exists('distributions'): os.makedirs('distributions') # get distribution filename from the data source's filename distribution_filename = dataset_filename.split('/')[len(dataset_filename.split('/')) - 1] # remove the previous file extension distribution_filename = distribution_filename.split('.')[0] f = open('distributions/' + distribution_filename + '.csv', 'w') for key in dates: f.write(str(key) + "," + str(counts_per_time_unit[key]) + "\n") print "Successfully generated file", 'distributions/' + distribution_filename + '.csv'
def post(self): json_data = request.get_json() c_class, f_class = cq.classify(json_data["question"], model_path, en_nlp) output_message = {"c_class": str(c_class), "f_class": str(f_class)} return output_message, 201
def predict(): filestr = request.files['image'].read() npimg = np.fromstring(filestr, np.uint8) image = cv2.imdecode(npimg, cv2.IMREAD_COLOR) pred = classify(image, face_model, svm_model) return pred
def __init__(self, name, *args, **kw): super().__init__(name, *args, **kw) self.dir_name = os.path.expanduser("~/" + "Downloads") filenames, Y, bag = classify.read_dir(self.dir_name) cls, bag = classify.prepare_data(filenames, Y, bag, from_files=False) prediction = classify.classify(self.dir_name, cls, bag) self.prediction = dict([(x, prediction[x]) for x in sorted(prediction.keys())])
def do(image): result = classify.classify(image) if result[0] in ('enter', ): sendmail.go(image, result) if result[0] in ('exit', ): sendmail.go(image, result)
def make_generic_qns(article): category = classify.classify(article) # such efficiency, opening it twice... with open(article) as f: first_line = f.readline() name = re.match(r'(.*?)\s*\(', first_line) if name == None: name = first_line.rstrip('\n') generic_qns = generic.lookup[category] for qn in generic_qns: print qn.format(name)
def main(q1, q2, articles): id_list = pmids.main(q1,q2,articles) # pmid_xml = pmids_xml.main(q1,q2,articles) # tees_wrapper.main(pmid_xml) print id_list classify.classify('9668063','GE11','home/ubuntu/output/one_at_a_time/oneatatime') # for pmid in id_list: # file_path = '/home/ubuntu/output/pmids/%s' % pmid # if os.path.isdir(file_path): # continue # else: # classify.classify(pmid,'GE11',file_path) # f = tempfile.NamedTemporaryFile() # try: # print 'temp:', temp # print 'temp.name:', temp.name # f.write(pmid_xml) print 'a'
def do_classify_weighted(cf, remote_cf, model, lastimage): if remote_cf: t = classify.classify_remote(model, lastimage, 0, remote_cf) else: t = classify.classify(model, lastimage, 0) targets = dict(sorted(t.items(), key=lambda x: -x[1])[:cf['ntween']]) if cf['nsample'] < cf['ntween']: ts = random.sample(targets.keys(), cf['nsample']) ots = targets targets = { t:ots[t] for t in ts } print targets return targets
def do_classify(cf, remote_cf, model, lastimage): if cf['weighted']: return do_classify_weighted(cf, remote_cf, model, lastimage) targets = [] if remote_cf: targets = classify.classify_remote(model, lastimage, cf['ntween'], remote_cf) else: targets = classify.classify(model, lastimage, cf['ntween']) if cf['nsample'] < cf['ntween']: targets = random.sample(targets, cf['nsample']) print targets return ','.join([ str(x) for x in targets ])
def main(filename,ratio,k): trainset=pd.DataFrame() testset=pd.DataFrame() dataset=pd.DataFrame() trainset,testset,dataset=loaddata.loadDataset(filename,ratio,trainset,testset) #datasetplot.datasetplot(dataset) #datasetplot.datasetplot(trainset) #datasetplot.datasetplot(testset) result=classify.classify(trainset,testset,k) #print 'The result is:\n',result #print 'The result accuracy rate is:',testresult.testresult(testset,result),'%' return testresult.testresult(testset,result)
def main(parser): """ Drives the program. """ args = parser.parse_args() if args.confidence not in (0, 95, 99): print("chisq argument invalid; must be either 0, 95, 99") sys.exit() print(args.confidence) train_data = [] # read the file read_file(train_data, args.train) decision_tree = id3.build_tree(train_data, args.confidence) draw_tree(decision_tree, str(args.confidence)) validation_data = [] read_file(validation_data, args.validation) classify.classify(decision_tree, validation_data, False, str(args.confidence), args.ipython)
def classify_cmd(args): """Uses verbs.txt in output dir to set sentiment values for triplets. Allows for manual sentiment coding if desired. """ from classify import classify # Set parameters in config.py. if args.SRC is None: name, project_path = get_opened() config.project_name = name config.source_dir = project_path config.output_dir = project_path elif utils.valid_dir(args.SRC): config.source_dir = args.SRC config.output_dir = args.SRC else: print "Project dir error. Exiting." sys.exit(1) init() verbs = os.path.join(config.source_dir, 'verbs.txt') classify(verbs)
def run(self): while 1: try: data = record() parsed_data = parse(data) ax = parsed_data[0] ay = parsed_data[1] az = parsed_data[2] gx = parsed_data[3] gy = parsed_data[4] gz = parsed_data[5] clazz = classify(ax, ay, az, gx, gy, gz) self.perform_action(clazz) print 'Detected: ' + clazz except Exception as e: print "Something went wrong in gestureThread: ", e
def pipeline(audio_file): timestamp = create_timestamp() spectrogram_dir = "%s_%s_spectrograms" % (timestamp, audio_file) if not os.path.exists(spectrogram_dir): os.mkdir(spectrogram_dir) if not os.path.isdir(spectrogram_dir): sys.exit("%s is not a directory." % spectrogram_dir) generate_spectrograms.create_and_write_spectrograms(os.path.dirname(audio_file), os.path.basename(audio_file), spectrogram_dir) spectrograms = os.listdir(spectrogram_dir) spectrograms = [os.path.join(spectrogram_dir, spectrogram) for spectrogram in spectrograms] results = [classify.classify(spectrograms)][0] print results print np.average(results, axis=0).argmax()
def parse(self, response): my_item = eventsEvent() # my_item['title'] = response.xpath('//title/text()').extract() description = response.xpath('//div[@class="event-description"]/text()').extract() # join(desc, opt) ## desc: event description ## opt = -1: no classifier ## opt = 0: whitelist ## opt = 1: naive bayes ## opt = 2: svm ## opt = 3: knn ## opt = 4: decision tree if classify(" ".join(description),3): date_extract = response.xpath('//time/text()').extract() month = re.findall(regex_date, date_extract[0], flags=0) my_item['month'] = date_dict[month[0][:3]] my_item['day'] = int(re.findall(regex_num, date_extract[0])[0]) my_item['url'] = response.url yield my_item
def run(): data.load() server.connect() for (uid, message) in server.fetch()[:50]: try: #if message['from'] in data.contacted: # logger.log("contacted", message) #else: scams = classify(message) if scams: scam = random.choice(scams) response = random.choice(data.responses[scam]) else: response = random.choice(data.default) server.reply(message, response) data.contacted.add(message['from']) logger.log("classified", str(scams), message.as_string(), response) server.seen(uid) except Exception, e: print e
def update_playlist(): while True: time.sleep(5) json_dump = {} try: json_dump = load_json_dump('classified_songs.json') except: pass songs = glob.glob('static/downloaded_songs/*.mp3') for index, song in enumerate(songs): if song not in json_dump: video_title = song.split('/')[-1] video_title = video_title[:len(video_title)-4:] print video_title, song.split('/')[-1] genre, data = classify(song) json_dump[song] = {'filename': song, 'genre': genre, 'mp3': song, 'title': jobs[video_title][1], 'artist': jobs[video_title][0], 'duration': '2:30', 'rating': 4} #json_dump[song] = {'filename': song, 'genre': 'rap', 'mp3': song, 'title': title, 'artist': artist, 'duration': '2:23', 'rating': 4} with open('classified_songs.json', 'w') as outfile: json.dump(json_dump, outfile, indent=4, sort_keys=True)
def main (): x = time.clock () # trainPath = '/Users/robertabbott/Desktop/CS/projects/kaggle/pizza/pizza_request_dataset.json' trainPath = '/Users/robertabbott/Desktop/CS/projects/kaggle/pizza/train.json' testPath = '/Users/robertabbott/Desktop/CS/projects/kaggle/pizza/pizza_request_dataset.json' # testPath = '/Users/robertabbott/Desktop/CS/projects/kaggle/pizza/test.json' testData = train (testPath) trainData = train (trainPath) trainData.mapData () # trainData.addDataSet (trainPath) c = classify (trainData) # y = getProbability (testData, c) y = c.getProbability (testData) print y[0:2] print 'positives: ' + str(y[2]), 'negatives: ' + str(y[3]) print 'false positives: ' + str(y[4]), 'false negatives: ' + str(y[5]) print 'recall = ' + str(float(y[2])/float(y[2]+y[4])) print 'precision = ' + str(float(y[2])/float(y[2]+y[5])) print 'accuracy = ' + str(float(y[0]) / (float(y[0])+float(y[1])))
def processItem(arr): data = arr['data'] q = Meme.objects.filter(threadLink = 'http://reddit.com' + data['permalink']) #Have we evaluated this submission yet? Might be worth considering only checking #memes within the last day. if q.count() == 1: #if we have, update the score and move on m = q[0] m.score = data['score'] m.save() elif data['thumbnail'] != 'default': #have not evaluated this submission yet, run tests and store thumbnailPage = urllib3.PoolManager().request('GET', data['thumbnail']) filepath = dropBoxDir + 'target.jpg' f = open(filepath, 'wb') f.write(urllib3.PoolManager().request('GET', data['thumbnail']).data) f.close() #classify.classify() gets elements #1: image macro filepath that it belongs to- (0 if no match) #2: topTwoDist touple - (best match, 2nd best match) #3: topTwoCorr touple - (best match, 2nd best match) classification = classify.classify(filepath) if ".jpg" in data['url']: fullSize = data['url'] else: fullSize = fullSizePhoto(data['url']) if classification[0] == None: macro = None else: macro = ImageMacro.objects.get(filename = classification[0]) m = Meme(classification = macro, thumbnailLink = data['thumbnail'], fullSizeLink = fullSize, score = data['score'], submitter = data['author'], corrDict = repr(classification[2]), distDict = repr(classification[1]), source = 'adviceanimals', created = data['created'], threadLink = 'http://reddit.com' + data['permalink']) m.save()
def run(data, test_size, repeat, leave=False): classified = [classify(data=data, repeat=repeat, test_size=test_size, clf=svm.SVC(C=1.0, kernel='linear'), leave=leave), classify(data=data, repeat=repeat, test_size=test_size, clf=svm.SVC(C=1.0, kernel='rbf'), leave=leave), classify(data=data, repeat=repeat, test_size=test_size, clf=svm.SVC(C=1.0, kernel='sigmoid'), leave=leave), classify(data=data, repeat=repeat, test_size=test_size, clf=QuadraticDiscriminantAnalysis(), leave=leave), classify(data=data, repeat=repeat, test_size=test_size, clf=LinearDiscriminantAnalysis(solver='lsqr'), leave=leave), classify(data=data, repeat=repeat, test_size=test_size, clf=DecisionTreeClassifier(), leave=leave), classify(data=data, repeat=repeat, test_size=test_size, clf=KNeighborsClassifier(n_neighbors=5, weights='uniform'), leave=leave)] for cl in classified: print("") for x, y in cl.items(): print(x, y)
Y[index] = int(data[i, 0]) # Store the type of the trip of the current visit else: # If visit number has not changed, it's still the same visit num_products += 1 # Increase the number of products of the current visit X[index, departmentIndex[data[i, 5]]] += 1 X[index, length - 1] += float(data[i, 4]) if data[i, 6] == "": X[index, length - 2] = 0 else: X[index, length - 2] += float(data[i, 6]) cnt += 1 # X[index,filenumberIndex[data[i,6]]] += 1 kf = KFold(X.shape[0], n_folds=10) # Initialize cross validation iterations = 0 # Variable that will store the total iterations totalLogloss = 0 # Variable that will store the correctly predicted intances for trainIndex, testIndex in kf: trainSet = X[trainIndex] testSet = X[testIndex] trainLabels = Y[trainIndex] testLabels = Y[testIndex] predictions, trips = classify(trainSet, trainLabels, testSet) logloss = log_loss(testLabels, predictions, trips) print "Log Loss: ", logloss totalLogloss += logloss iterations += 1 print "Average Log Loss: ", totalLogloss / iterations
ruta1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\images' ruta2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train\\images' savepath1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val' savepath2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train' build_database(ruta1,savepath1); build_database(ruta2,savepath2); get_features(ruta1,savepath1,savepath1); get_features(ruta2,savepath2,savepath2); savepath_principal=os.path.dirname(os.path.abspath(__file__)) features_val=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val' features_train=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train' rank(features_val,features_train,savepath_principal); feat=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\Features.txt' path_out=os.path.dirname(os.path.abspath(__file__)) labels=os.path.dirname(os.path.abspath(__file__))+'\\labels.txt' classify(feat,path_out,labels) path=os.path.dirname(os.path.abspath(__file__)) Gt_val_test=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\annotation.txt' evaluate_ranking(path,Gt_val_test) automatic_annot=os.path.dirname(os.path.abspath(__file__))+'\\classify.txt' annotation=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\annotation.txt' evaluate_classification(automatic_annot,annotation)
import Train import classify def train(): Train.initialise() choice = raw_input("\nRun training scripts(1/0)-") if(choice=='1'): train() classify.classify()
def classify(ques, mother_hash): mother_hash['ques_class'] = ques_classify.classify(ques) return mother_hash
trainSet1 = data trainLabels1 = y csv_file_object2 = csv.reader(open('test.csv', 'rb')) # Load in the csv file header = csv_file_object2.next() data1 = [] for row in csv_file_object2: # Skip through each row in the csv file, data1.append(row[0:]) # adding each row to the data variable G = array(data1) passId = G[:,0] testSet1 = preprocess(G,y) predictedLabels1 = classify(trainSet1,trainLabels1,testSet1) target = open('myResult.csv', 'w') target.write('PassengerId,Survived') target.write("\n") for i in range(len(passId)): target.write(passId[i]) target.write(',') if predictedLabels1[i] == 1: target.write(str(1)) else: target.write(str(0)) target.write("\n")
from classify import classify from eval_classification import eval_classification from eval_classification import plot_confusion_matrix import warnings warnings.filterwarnings("ignore") #Extraccio dels parametres params=get_params() #Creacio de la base de dades params['split']='train' build_database(params) params['split']='val' build_database(params) #Extraccio de les caracteristiques get_features(params) #Entrenem un model de classificacio train_classifier(params) #Classificacio classify(params) #Avaluacio de la classificacio f1, precision, recall, accuracy,cm, labels = eval_classification(params) print "Mesures:\n" print f1 print "-F1:", np.mean(f1) print "-Precision:", np.mean(precision) print "-Recall:", np.mean(recall) print "-Accuracy:", accuracy print "-Confusion matrix:\n", cm plot_confusion_matrix(cm, labels,normalize = True)
from classify import classify f = open("teamnames.txt", 'r') successRates = 0 for teamname in f: successRates += classify(str.strip(teamname)) #number = classify("NYJ") #print number overallSuccess = successRates/32 print "Overall Accuracy is: " + str(overallSuccess)
#extract biggest contour and topmost point of that cnt=contours[max_index] # area: (x,y), (x+w, y+h) x,y,w,h = cv2.boundingRect(cnt) drawing = np.zeros(crop_img.shape, np.uint8) cv2.drawContours(drawing, contours, max_index, (0,255,0), 2) cv2.rectangle(drawing, (x,y), (x+w, y+h), (0,255,0)) cv2.imshow("tracking", drawing[y:y+h,x:x+w]) hand_fig = img[y:y+h,x:x+w] # cv2.imshow('hand', hand_fig) prediction = classify(drawing[y:y+h,x:x+w]) feature_params = dict( maxCorners = 100, qualityLevel = 0.3, minDistance = 7, blockSize = 7 ) frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if prediction[0] == 'one': cv2.putText(img,"MODEL ONE", (50,50), cv2.FONT_HERSHEY_SIMPLEX, 2, (51, 102, 204)) # direction = directionCalculate(first_frame, frame, np.array(cnt, dtype=np.float32)) # print 'one %s' % (direction) elif prediction[0] == 'two': cv2.putText(img,"MODEL TWO", (50,50), cv2.FONT_HERSHEY_SIMPLEX, 2, (51, 102, 204)) # direction = directionCalculate(first_frame, frame, np.array(cnt, dtype=np.float32))
def simple_extract(target, localization = None, verbose=False): """Extract all the PPRs found in target""" if not isinstance(target, SeqRecord): raise TypeError("simple_extract requires a Bio.SeqRecord, not {}".format( type(target))) if verbose: print "Searching..." #find all easy-to-locate PPR motifs search = HMMER.hmmsearch(hmm = models[3], targets = target) #get features for each motif motifs = search.getFeatures(target) if verbose: print "Got {} motifs, grouping...".format(len(motifs)) #group features by frame and locatiion groups = group_motifs(motifs, max_gap=1500) if verbose: print "Got {} groups, extracting envelopes...".format(len(groups)) pprs = [] dbg_env = [] while groups: if verbose: print "Got {} groups, extracting envelopes...".format(len(groups)) #extract the sequence envelope around each group envelopes = [get_envelope(group, target, margin=1000) for group in groups] dbg_env += envelopes if verbose: print "Got {} envelopes, locating PPRs...".format(len(envelopes)) #locate the PPR within each envelope for envelope in envelopes: ppr = locate_ppr(envelope) if ppr: pprs.append(ppr) #look for overlapping pprs groups = remove_overlaps(pprs) ol = len(groups) if verbose: print "{} conflicts".format(ol) groups += remove_overgrown(pprs, 500) if verbose: print "{} overgrown PPRs".format(len(groups) - ol) pprs = [add_source(p, target) for p in pprs] if verbose: print "Got {} PPRs, cleaning...".format(len(pprs)) #clean the gaps between features pprs = [clean_ends(fill_gaps(ppr)) for ppr in pprs] #annotate the tail region and classify each PPR classify.classify(pprs) #predict each PPR's target targetp.targetp(pprs, annotation='localization') #filter the desired location if localization: pprs = [p for p in pprs if p.annotations['localization'] == localization] #return a list of nicely presented PPRs return pprs