def analyszie_folder(wiki_folder,
                     xlsx_folder,
                     isGraphseg,
                     use_xlsx_sub_folders=False):

    acc = accuracy.Accuracy()

    input_files = get_files(wiki_folder)
    if use_xlsx_sub_folders:
        annotated_files_folders = []
        for f in os.listdir(xlsx_folder):
            sub_folder_path = xlsx_folder + f
            if os.path.isdir(sub_folder_path):
                annotated_files_folders.append(sub_folder_path)
    else:
        annotated_files_folders = [xlsx_folder]

    for file in input_files:
        id = os.path.basename(file)
        file_name = id + ".xlsx" if not isGraphseg else id
        xlsx_file_paths = [
            os.path.join(xlsx_folder, file_name)
            for xlsx_folder in annotated_files_folders
        ]
        print str(xlsx_file_paths)
        print str(file)

        for xlsx_file_path in xlsx_file_paths:
            if os.path.isfile(xlsx_file_path):
                if (isGraphseg):
                    tested_segments = get_graphseg_segments(xlsx_file_path)
                else:
                    tested_segments = get_xlsx_segments(xlsx_file_path)
            else:
                tested_segments = None

            gold_segments = get_gold_segments(file)
            if (tested_segments is not None) and (len(tested_segments) !=
                                                  len(gold_segments)):
                print "(len(tested_segments) != len(gold_segments))"
                print "stop run"
                return 1000, 1000
            if tested_segments is not None:
                acc.update(tested_segments, gold_segments)

    #Print results:
    calculated_pk, calculated_windiff = acc.calc_accuracy()
    print('Finished testing.')
    print('Pk: {:.4}.'.format(calculated_pk))
    print('')

    return calculated_pk, calculated_windiff
示例#2
0
def test(model, args, epoch, dataset, logger, threshold):
    model.eval()
    with tqdm(desc='Testing', total=len(dataset)) as pbar:
        acc = accuracy.Accuracy()
        for i, (data, target, paths) in enumerate(dataset):
            if True:
                if i == args.stop_after:
                    break
                pbar.update()
                output = model(data)
                output_softmax = F.softmax(output, 1)
                targets_var = Variable(maybe_cuda(torch.cat(target, 0),
                                                  args.cuda),
                                       requires_grad=False)
                output_seg = output.data.cpu().numpy().argmax(axis=1)
                target_seg = targets_var.data.cpu().numpy()
                preds_stats.add(output_seg, target_seg)

                current_idx = 0

                for k, t in enumerate(target):
                    document_sentence_count = len(t)
                    to_idx = int(current_idx + document_sentence_count)

                    output = ((output_softmax.data.cpu().numpy()[
                        current_idx:to_idx, :])[:, 1] > threshold)
                    h = np.append(output, [1])
                    tt = np.append(t, [1])

                    acc.update(h, tt)

                    current_idx = to_idx

                    # acc.update(output_softmax.data.cpu().numpy(), target)

            #
            # except Exception as e:
            #     # logger.info('Exception "%s" in batch %s', e, i)
            #     logger.debug('Exception while handling batch with file paths: %s', paths, exc_info=True)

        epoch_pk, epoch_windiff = acc.calc_accuracy()

        logger.debug(
            'Testing Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . '
            .format(epoch + 1, preds_stats.get_accuracy(), epoch_pk,
                    epoch_windiff, preds_stats.get_f1()))
        preds_stats.reset()

        return epoch_pk
def main(args):
    sys.path.append(str(Path(__file__).parent))

    logger = utils.setup_logger(__name__,  'cross_validate_choi.log')

    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)
    logger.debug('Running with config %s', utils.config)

    configure(os.path.join('runs', args.expname))

    if not args.test:
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(utils.config['word2vecfile'], binary=True)
    else:
        word2vec = None


    dataset_path = Path(args.flat_choi)

    with open(args.load_from, 'rb') as f:
        model = torch.load(f)
    model.eval()
    model = maybe_cuda(model)

    test_accuracy = accuracy.Accuracy()

    for j in range(5):
        validate_folder_numbers = range(5)
        validate_folder_numbers.remove(j)
        validate_folder_names = [dataset_path.joinpath(str(num)) for num in validate_folder_numbers]
        dev_dataset = ChoiDataset(dataset_path , word2vec, folder=True, folders_paths=validate_folder_names)
        test_dataset = ChoiDataset(dataset_path, word2vec, folder=True, folders_paths=[dataset_path.joinpath(str(j))])

        dev_dl = DataLoader(dev_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                            num_workers=args.num_workers)
        test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)

        _, threshold = validate(model, args, j, dev_dl, logger)
        test_pk = test(model, args, j, test_dl, logger, threshold, test_accuracy)
        logger.debug(colored('Cross validation section {} with p_k {} and threshold {}'.format(j, test_pk, threshold),'green'))

    cross_validation_pk, _ = test_accuracy.calc_accuracy()
    print ('Final cross validaiton Pk is: ' + str(cross_validation_pk))
    logger.debug(
        colored('Final cross validaiton Pk is: {}'.format(cross_validation_pk), 'green'))
示例#4
0
def main(args):
    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    algo_delimeter = graphseg_delimeter

    files = get_files(args.folder)
    acc = accuracy.Accuracy()

    for file_path in files:
        file = open(str(file_path), "r")
        raw_content = file.read()
        file.close()
        sentences = [
            s for s in raw_content.decode('utf-8').strip().split("\n")
            if len(s) > 0 and s != "\n"
        ]
        sentences_length = []
        h = []
        t = []
        is_first_sentence = True
        for sentence in sentences:
            if sentence == truth:
                if not is_first_sentence:
                    t[-1] = 1
                continue
            if sentence == algo_delimeter:
                if not is_first_sentence:
                    h[-1] = 1
                continue
            words = extract_sentence_words(sentence)
            sentences_length.append(len(words))
            t.append(0)
            h.append(0)
            is_first_sentence = False
        t[-1] = 1  # end of last segment
        h[-1] = 1  # they already segment it correctly.

        acc.update(h, t)

    calculated_pk, calculated_windiff = acc.calc_accuracy()
    print 'Pk: {:.4}.'.format(calculated_pk)
    print 'Win_diff: {:.4}.'.format(calculated_windiff)
def main(args):
    start = timer()

    sys.path.append(str(Path(__file__).parent))

    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    logger.debug('Running with config %s', utils.config)
    print('Running with threshold: ' + str(args.seg_threshold))
    preds_stats = utils.predictions_analysis()

    if not args.test:
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(
            utils.config['word2vecfile'], binary=True)
    else:
        word2vec = None

    word2vec_done = timer()
    print 'Loading word2vec ellapsed: ' + str(word2vec_done -
                                              start) + ' seconds'
    dirname = 'test'

    if args.wiki:
        dataset_folders = [Path(utils.config['wikidataset']) / dirname]
        if (args.wiki_folder):
            dataset_folders = []
            dataset_folders.append(args.wiki_folder)
        print 'running on wikipedia'
    else:
        if (args.bySegLength):
            dataset_folders = getSegmentsFolders(utils.config['choidataset'])
            print 'run on choi by segments length'
        else:
            dataset_folders = [utils.config['choidataset']]
            print 'running on Choi'

    with open(args.model, 'rb') as f:
        model = torch.load(f)

    model = maybe_cuda(model)
    model.eval()

    if (args.naive):
        model = naive.create()

    for dataset_path in dataset_folders:

        if (args.bySegLength):
            print 'Segment is ', os.path.basename(dataset_path), " :"

        if args.wiki:
            if (args.wiki_folder):
                dataset = WikipediaDataSet(dataset_path,
                                           word2vec,
                                           folder=True,
                                           high_granularity=False)
            else:
                dataset = WikipediaDataSet(dataset_path,
                                           word2vec,
                                           high_granularity=False)
        else:
            dataset = ChoiDataset(dataset_path, word2vec)

        dl = DataLoader(dataset,
                        batch_size=args.bs,
                        collate_fn=collate_fn,
                        shuffle=False)

        with tqdm(desc='Testing', total=len(dl)) as pbar:
            total_accurate = 0
            total_count = 0
            total_loss = 0
            acc = accuracy.Accuracy()

            for i, (data, targets, paths) in enumerate(dl):
                if i == args.stop_after:
                    break

                pbar.update()
                output = model(data)
                targets_var = Variable(maybe_cuda(torch.cat(targets, 0),
                                                  args.cuda),
                                       requires_grad=False)
                batch_loss = 0
                output_prob = softmax(output.data.cpu().numpy())
                output_seg = output_prob[:, 1] > args.seg_threshold
                target_seg = targets_var.data.cpu().numpy()
                batch_accurate = (output_seg == target_seg).sum()
                total_accurate += batch_accurate
                total_count += len(target_seg)
                total_loss += batch_loss
                preds_stats.add(output_seg, target_seg)

                current_target_idx = 0
                for k, t in enumerate(targets):
                    document_sentence_count = len(t)
                    sentences_length = [s.size()[0] for s in data[k]
                                        ] if args.calc_word else None
                    to_idx = int(current_target_idx + document_sentence_count)
                    h = output_seg[current_target_idx:to_idx]

                    # hypothesis and targets are missing classification of last sentence, and therefore we will add
                    # 1 for both
                    h = np.append(h, [1])
                    t = np.append(t.cpu().numpy(), [1])

                    acc.update(h, t, sentences_length=sentences_length)

                    current_target_idx = to_idx

                logger.debug('Batch %s - error %7.4f, Accuracy: %7.4f', i,
                             batch_loss, batch_accurate / len(target_seg))
                pbar.set_description('Testing, Accuracy={:.4}'.format(
                    batch_accurate / len(target_seg)))

        average_loss = total_loss / len(dl)
        average_accuracy = total_accurate / total_count
        calculated_pk, _ = acc.calc_accuracy()

        logger.info('Finished testing.')
        logger.info('Average loss: %s', average_loss)
        logger.info('Average accuracy: %s', average_accuracy)
        logger.info('Pk: {:.4}.'.format(calculated_pk))
        logger.info('F1: {:.4}.'.format(preds_stats.get_f1()))

        end = timer()
        print('Seconds to execute to whole flow: ' + str(end - start))
示例#6
0
 def __init__(self):
     self.thresholds = np.arange(0, 1, 0.05)
     self.accuracies = {k: accuracy.Accuracy() for k in self.thresholds}
示例#7
0
class DatasetGui(QtWidgets.QWidget):

    utils = Utils()
    featureExtractor = FeatureExtractor()
    bpn = BPNHandler(True)
    accuracy = accuracy.Accuracy()

    # Constructor of the DatasetGui class
    #
    # @param	None
    # @return	None
    def __init__(self):
        super(DatasetGui, self).__init__()
        self.setWindowTitle("Pointing Gesture Recognition - Dataset recording")

        # Retrieve all settings
        self.settings = Settings()

        # Load sounds
        self.countdownSound = QtMultimedia.QSound(
            self.settings.getResourceFolder() + "countdown.wav")
        self.countdownEndedSound = QtMultimedia.QSound(
            self.settings.getResourceFolder() + "countdown-ended.wav")

        # Get the context and initialise it
        self.context = Context()
        self.context.init()

        # Create the depth generator to get the depth map of the scene
        self.depth = DepthGenerator()
        self.depth.create(self.context)
        self.depth.set_resolution_preset(RES_VGA)
        self.depth.fps = 30

        # Create the image generator to get an RGB image of the scene
        self.image = ImageGenerator()
        self.image.create(self.context)
        self.image.set_resolution_preset(RES_VGA)
        self.image.fps = 30

        # Create the user generator to detect skeletons
        self.user = UserGenerator()
        self.user.create(self.context)

        # Initialise the skeleton tracking
        skeleton.init(self.user)

        # Start generating
        self.context.start_generating_all()
        print "Starting to detect users.."

        # Create a new dataset item
        self.data = Dataset()

        # Create a timer for an eventual countdown before recording the data
        self.countdownTimer = QtCore.QTimer()
        self.countdownRemaining = 10
        self.countdownTimer.setInterval(1000)
        self.countdownTimer.setSingleShot(True)
        self.countdownTimer.timeout.connect(self.recordCountdown)

        # Create a timer to eventually record data for a heat map
        self.heatmapRunning = False
        self.heatmapTimer = QtCore.QTimer()
        self.heatmapTimer.setInterval(10)
        self.heatmapTimer.setSingleShot(True)
        self.heatmapTimer.timeout.connect(self.recordHeatmap)

        # Create the global layout
        self.layout = QtWidgets.QVBoxLayout(self)

        # Create custom widgets to hold sensor's images
        self.depthImage = SensorWidget()
        self.depthImage.setGeometry(10, 10, 640, 480)

        # Add these custom widgets to the global layout
        self.layout.addWidget(self.depthImage)

        # Hold the label indicating the number of dataset taken
        self.numberLabel = QtWidgets.QLabel()
        self.updateDatasetNumberLabel()

        # Create the acquisition form elements
        self.createAcquisitionForm()

        # Register a dialog window to prompt the target position
        self.dialogWindow = DatasetDialog(self)

        # Allow to save the data when the right distance is reached
        self.recordIfReady = False

        # Create and launch a timer to update the images
        self.timerScreen = QtCore.QTimer()
        self.timerScreen.setInterval(30)
        self.timerScreen.setSingleShot(True)
        self.timerScreen.timeout.connect(self.updateImage)
        self.timerScreen.start()

    # Update the depth image displayed within the main window
    #
    # @param	None
    # @return	None
    def updateImage(self):
        # Update to next frame
        self.context.wait_and_update_all()

        # Extract informations of each tracked user
        self.data = skeleton.track(self.user, self.depth, self.data)

        # Get the whole depth map
        self.data.depth_map = np.asarray(
            self.depth.get_tuple_depth_map()).reshape(480, 640)

        # Create the frame from the raw depth map string and convert it to RGB
        frame = np.fromstring(self.depth.get_raw_depth_map_8(),
                              np.uint8).reshape(480, 640)
        frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)

        # Get the RGB image of the scene
        self.data.image = np.fromstring(self.image.get_raw_image_map_bgr(),
                                        dtype=np.uint8).reshape(480, 640, 3)

        # Will be used to specify the depth of the current hand wished
        currentDepth, showCurrentDepth = 0, ""

        if len(self.user.users) > 0 and len(self.data.skeleton["head"]) > 0:
            # Highlight the head
            ui.drawPoint(frame, self.data.skeleton["head"][0],
                         self.data.skeleton["head"][1], 5)

            # Display lines from elbows to the respective hands
            ui.drawElbowLine(frame, self.data.skeleton["elbow"]["left"],
                             self.data.skeleton["hand"]["left"])
            ui.drawElbowLine(frame, self.data.skeleton["elbow"]["right"],
                             self.data.skeleton["hand"]["right"])

            # Get the pixel's depth from the coordinates of the hands
            leftPixel = self.utils.getDepthFromMap(
                self.data.depth_map, self.data.skeleton["hand"]["left"])
            rightPixel = self.utils.getDepthFromMap(
                self.data.depth_map, self.data.skeleton["hand"]["right"])

            if self.data.hand == self.settings.LEFT_HAND:
                currentDepth = leftPixel
            elif self.data.hand == self.settings.RIGHT_HAND:
                currentDepth = rightPixel

            # Get the shift of the boundaries around both hands
            leftShift = self.utils.getHandBoundShift(leftPixel)
            rightShift = self.utils.getHandBoundShift(rightPixel)

            # Display a rectangle around both hands
            ui.drawHandBoundaries(frame, self.data.skeleton["hand"]["left"],
                                  leftShift, (50, 100, 255))
            ui.drawHandBoundaries(frame, self.data.skeleton["hand"]["right"],
                                  rightShift, (200, 70, 30))

        # Record the current data if the user is ready
        if self.recordIfReady:
            cv2.putText(frame, str(self.data.getWishedDistance()), (470, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (252, 63, 253), 5)

            if self.data.getWishedDistance(
            ) >= int(currentDepth) - 10 and self.data.getWishedDistance(
            ) <= int(currentDepth) + 10:
                self.record([])
                self.recordIfReady = False
            else:
                if int(currentDepth) < self.data.getWishedDistance():
                    showCurrentDepth = str(currentDepth) + " +"
                else:
                    showCurrentDepth = str(currentDepth) + " -"
        else:
            showCurrentDepth = str(currentDepth)

        cv2.putText(frame, showCurrentDepth, (5, 60), cv2.FONT_HERSHEY_SIMPLEX,
                    2, (50, 100, 255), 5)

        # Update the frame
        self.depthImage.setPixmap(ui.convertOpenCVFrameToQPixmap(frame))

        self.timerScreen.start()

    # Update the label indicating the number of dataset elements saved so far for the current type
    #
    # @param	None
    # @return	None
    def updateDatasetNumberLabel(self):
        if self.data.type == Dataset.TYPE_POSITIVE:
            self.numberLabel.setText("Dataset #%d" %
                                     (self.utils.getFileNumberInFolder(
                                         self.settings.getPositiveFolder())))
        elif self.data.type == Dataset.TYPE_NEGATIVE:
            self.numberLabel.setText("Dataset #%d" %
                                     (self.utils.getFileNumberInFolder(
                                         self.settings.getNegativeFolder())))
        elif self.data.type == Dataset.TYPE_ACCURACY:
            self.numberLabel.setText("Dataset #%d" %
                                     (self.utils.getFileNumberInFolder(
                                         self.settings.getAccuracyFolder())))
        else:
            self.numberLabel.setText("Dataset #%d" %
                                     (self.utils.getFileNumberInFolder(
                                         self.settings.getDatasetFolder())))

    # Record the actual informations
    #
    # @param	obj					Initiator of the event
    # @return	None
    def record(self, obj):
        # If the user collects data to check accuracy, prompts additional informations
        if self.data.type == Dataset.TYPE_ACCURACY:
            self.saveForTarget()
        # If the user collects data for a heat map, let's do it
        elif self.data.type == Dataset.TYPE_HEATMAP:
            # The same button will be used to stop recording
            if not self.heatmapRunning:
                self.startRecordHeatmap()
            else:
                self.stopRecordHeatmap()
        else:
            # Directly save the dataset and update the label number
            self.data.save()
            self.countdownEndedSound.play()
            self.updateDatasetNumberLabel()

    # Handle a countdown as a mean to record the informations with a delay
    #
    # @param	None
    # @return	None
    def recordCountdown(self):
        # Decrease the countdown and check if it needs to continue
        self.countdownRemaining -= 1

        if self.countdownRemaining <= 0:
            # Re-initialise the timer and record the data
            self.countdownTimer.stop()
            self.countdownButton.setText("Saving..")
            self.countdownRemaining = 10
            self.record([])
        else:
            self.countdownTimer.start()
            self.countdownSound.play()

        # Display the actual reminaining
        self.countdownButton.setText("Save in %ds" % (self.countdownRemaining))

    # Record a heatmap representation of the informations by successive captures
    #
    # @param	None
    # @return	None
    def recordHeatmap(self):
        if self.data.hand == self.settings.NO_HAND:
            print "Unable to record as no hand is selected"
            return False

        if len(self.user.users) > 0 and len(self.data.skeleton["head"]) > 0:
            # Input the data into the feature extractor
            result = self.bpn.check(
                self.featureExtractor.getFeatures(self.data))

            # Add the depth of the finger tip
            point = self.featureExtractor.fingerTip[result[1]]
            point.append(self.utils.getDepthFromMap(self.data.depth_map,
                                                    point))

            # Verify that informations are correct
            if point[0] != 0 and point[1] != 0 and point[2] != 0:
                # Add the result of the neural network
                point.append(result[0])

                self.heatmap.append(point)
                self.countdownSound.play()

        # Loop timer
        self.heatmapTimer.start()

    # Start the recording of the heatmap
    #
    # @param	None
    # @return	None
    def startRecordHeatmap(self):
        self.saveButton.setText("Stop recording")
        self.heatmapRunning = True
        self.heatmapTimer.start()

    # Stop the recording of the heatmap
    #
    # @param	None
    # @return	None
    def stopRecordHeatmap(self):
        self.heatmapTimer.stop()
        self.heatmapRunning = False
        self.countdownEndedSound.play()

        self.saveButton.setText("Record")

        self.accuracy.showHeatmap(self.heatmap, "front")
        self.heatmap = []

    # Raise a flag to record the informations when the chosen distance will be met
    #
    # @param	None
    # @return	None
    def startRecordWhenReady(self):
        self.recordIfReady = True

    # Hold the current informations to indicate the position of the target thanks to the dialog window
    #
    # @param	None
    # @return	None
    def saveForTarget(self):
        # Freeze the data
        self.timerScreen.stop()
        self.countdownEndedSound.play()

        # Translate the depth values to a frame and set it in the dialog window
        frame = np.fromstring(self.depth.get_raw_depth_map_8(),
                              np.uint8).reshape(480, 640)
        frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
        self.dialogWindow.setFrame(frame)

        # Prompt the position of the target
        self.dialogWindow.exec_()

    # Toggle the type of dataset chosen
    #
    # @param	value				Identifier of the new type of dataset
    # @return	None
    def toggleType(self, value):
        self.data.toggleType(value)

        if value == self.data.TYPE_HEATMAP:
            self.saveButton.setText("Record")
            self.countdownButton.setText("Record in %ds" %
                                         (self.countdownRemaining))
            self.readyButton.setEnabled(False)

            # Create an array to hold all points
            self.heatmap = []
        else:
            self.updateDatasetNumberLabel()
            if hasattr(self, 'saveButton'):
                self.saveButton.setText("Save")
                self.countdownButton.setText("Save in %ds" %
                                             (self.countdownRemaining))
                self.readyButton.setEnabled(True)

    # Create the acquisition form of the main window
    #
    # @param	None
    # @return	None
    def createAcquisitionForm(self):
        globalLayout = QtWidgets.QHBoxLayout()
        vlayout = QtWidgets.QVBoxLayout()

        # Drop down menu of the distance to record the informations when the pointing hand meet the corresponding value
        hlayout = QtWidgets.QHBoxLayout()
        label = QtWidgets.QLabel("Distance")
        label.setFixedWidth(100)
        comboBox = QtWidgets.QComboBox()
        comboBox.currentIndexChanged.connect(self.data.toggleDistance)
        comboBox.setFixedWidth(200)
        comboBox.addItem("550")
        comboBox.addItem("750")
        comboBox.addItem("1000")
        comboBox.addItem("1250")
        comboBox.addItem("1500")
        comboBox.addItem("1750")
        comboBox.addItem("2000")
        comboBox.setCurrentIndex(0)
        hlayout.addWidget(label)
        hlayout.addWidget(comboBox)
        vlayout.addLayout(hlayout)

        # Drop down menu to select the type of hand of the dataset
        hlayout = QtWidgets.QHBoxLayout()
        label = QtWidgets.QLabel("Pointing hand")
        label.setFixedWidth(100)
        comboBox = QtWidgets.QComboBox()
        comboBox.currentIndexChanged.connect(self.data.toggleHand)
        comboBox.setFixedWidth(200)
        comboBox.addItem("Left")
        comboBox.addItem("Right")
        comboBox.addItem("None")
        comboBox.setCurrentIndex(0)
        hlayout.addWidget(label)
        hlayout.addWidget(comboBox)
        vlayout.addLayout(hlayout)

        # Drop down menu of the dataset type
        hlayout = QtWidgets.QHBoxLayout()
        label = QtWidgets.QLabel("Type")
        label.setFixedWidth(100)
        comboBox = QtWidgets.QComboBox()
        comboBox.currentIndexChanged.connect(self.toggleType)
        comboBox.setFixedWidth(200)
        comboBox.addItem("Positive")
        comboBox.addItem("Negative")
        comboBox.addItem("Accuracy")
        comboBox.addItem("Heat map")
        comboBox.setCurrentIndex(0)
        hlayout.addWidget(label)
        hlayout.addWidget(comboBox)
        vlayout.addLayout(hlayout)

        globalLayout.addLayout(vlayout)
        vlayout = QtWidgets.QVBoxLayout()

        self.numberLabel.setAlignment(QtCore.Qt.AlignCenter)
        vlayout.addWidget(self.numberLabel)

        # Action buttons to record the way that suits the most
        hLayout = QtWidgets.QHBoxLayout()
        self.readyButton = QtWidgets.QPushButton(
            'Save when ready', clicked=self.startRecordWhenReady)
        self.saveButton = QtWidgets.QPushButton('Save', clicked=self.record)
        hLayout.addWidget(self.readyButton)
        vlayout.addLayout(hLayout)

        item_layout = QtWidgets.QHBoxLayout()
        self.countdownButton = QtWidgets.QPushButton(
            "Save in %ds" % (self.countdownRemaining),
            clicked=self.countdownTimer.start)
        self.saveButton = QtWidgets.QPushButton('Save', clicked=self.record)
        item_layout.addWidget(self.countdownButton)
        item_layout.addWidget(self.saveButton)
        vlayout.addLayout(item_layout)

        globalLayout.addLayout(vlayout)
        self.layout.addLayout(globalLayout)
示例#8
0
def test(model, args, epoch, dataset, logger, threshold):
    model.eval()
    with tqdm(desc='Testing', total=len(dataset)) as pbar:
        acc_1 = accuracy.Accuracy()
        acc_2 = accuracy.Accuracy()
        acc_3 = accuracy.Accuracy()
        acc_4 = accuracy.Accuracy()
        acc_5 = accuracy.Accuracy()
        for i, (data, target, paths, sent_bert_vec, target_idx) in enumerate(dataset):
            if True:
                if i == args.stop_after:
                    break
                pbar.update()
                output, _ = model(data, sent_bert_vec, target_idx)
                output_softmax = F.softmax(output, 1)
                targets_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False)
                output_seg = output.data.cpu().numpy().argmax(axis=1)
                target_seg = targets_var.data.cpu().numpy()
                preds_stats.add(output_seg, target_seg)

                current_idx = 0

                for k, t in enumerate(target):
                    document_sentence_count = len(t)
                    to_idx = int(current_idx + document_sentence_count)

                    #output = ((output_softmax.data.cpu().numpy()[current_idx: to_idx, :])[:, 1] > threshold)
                    output_1 = ((output_softmax.data.cpu().numpy()[current_idx: to_idx, :])[:, 1] > 0.1)
                    output_2 = ((output_softmax.data.cpu().numpy()[current_idx: to_idx, :])[:, 1] > 0.2)
                    output_3 = ((output_softmax.data.cpu().numpy()[current_idx: to_idx, :])[:, 1] > 0.3)
                    output_4 = ((output_softmax.data.cpu().numpy()[current_idx: to_idx, :])[:, 1] > 0.4)
                    output_5 = ((output_softmax.data.cpu().numpy()[current_idx: to_idx, :])[:, 1] > 0.5)
                    h_1 = np.append(output_1, [1])
                    h_2 = np.append(output_2, [1])
                    h_3 = np.append(output_3, [1])
                    h_4 = np.append(output_4, [1])
                    h_5 = np.append(output_5, [1])
                    tt = np.append(t, [1])

                    t_pred = output_softmax.data.cpu().numpy()[current_idx: to_idx, :]
                    t_gold = t

                    acc_1.update(h_1, tt)
                    acc_2.update(h_2, tt)
                    acc_3.update(h_3, tt)
                    acc_4.update(h_4, tt)
                    acc_5.update(h_5, tt)

                    current_idx = to_idx

                    # acc.update(output_softmax.data.cpu().numpy(), target)

            #
            # except Exception as e:
            #     # logger.info('Exception "%s" in batch %s', e, i)
            #     logger.debug('Exception while handling batch with file paths: %s', paths, exc_info=True)

        epoch_pk_1, epoch_windiff_1 = acc_1.calc_accuracy()
        epoch_pk_2, epoch_windiff_2 = acc_2.calc_accuracy()
        epoch_pk_3, epoch_windiff_3 = acc_3.calc_accuracy()
        epoch_pk_4, epoch_windiff_4 = acc_4.calc_accuracy()
        epoch_pk_5, epoch_windiff_5 = acc_5.calc_accuracy()

        logger.debug('Testing Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . '.format(epoch + 1,
                                                                                                          preds_stats.get_accuracy(),
                                                                                                          epoch_pk_1,
                                                                                                          epoch_windiff_1,
                                                                                                          preds_stats.get_f1()))
        logger.debug('Testing Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . '.format(epoch + 1,
                                                                                                          preds_stats.get_accuracy(),
                                                                                                          epoch_pk_2,
                                                                                                          epoch_windiff_2,
                                                                                                          preds_stats.get_f1()))
        logger.debug('Testing Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . '.format(epoch + 1,
                                                                                                          preds_stats.get_accuracy(),
                                                                                                          epoch_pk_3,
                                                                                                          epoch_windiff_3,
                                                                                                          preds_stats.get_f1()))
        logger.debug('Testing Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . '.format(epoch + 1,
                                                                                                          preds_stats.get_accuracy(),
                                                                                                          epoch_pk_4,
                                                                                                          epoch_windiff_4,
                                                                                                          preds_stats.get_f1()))
        logger.debug('Testing Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . '.format(epoch + 1,
                                                                                                          preds_stats.get_accuracy(),
                                                                                                          epoch_pk_5,
                                                                                                          epoch_windiff_5,
                                                                                                          preds_stats.get_f1()))
        preds_stats.reset()

        return epoch_pk_1
def main():
    parser = argparse.ArgumentParser(
        description='Hierarchical Clustering and Classification')
    parser.add_argument(
        '--batchsize',
        '-b',
        type=int,
        default=256,
        help='Number of images in each mini-batch for clustering')
    parser.add_argument(
        '--batchsize2',
        '-b2',
        type=int,
        default=64,
        help='Number of images in each mini-batch for classification')
    parser.add_argument('--data_type',
                        '-d',
                        type=str,
                        default='toy',
                        help='dataset name')
    parser.add_argument('--model_type',
                        '-m',
                        type=str,
                        default='linear',
                        help='model to use')
    parser.add_argument('--model_path',
                        '-mp',
                        type=str,
                        default='',
                        help='pre-trained model if necessary')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='gpu number to use')
    parser.add_argument('--cluster',
                        '-c',
                        type=int,
                        default=2,
                        help='the size of cluster')
    parser.add_argument('--weight_decay',
                        '-w',
                        type=float,
                        default=0.0000,
                        help='weight decay for classification')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=300,
                        help='unit size for DocModel')
    parser.add_argument('--alpha',
                        '-a',
                        type=float,
                        default=0.001,
                        help='learning rate for clustering')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=10,
                        help='the number of epochs for clustering')
    parser.add_argument('--epoch2',
                        '-e2',
                        type=int,
                        default=100,
                        help='the number of epochs for classification')
    parser.add_argument('--mu',
                        '-mu',
                        type=float,
                        default=150.0,
                        help='the hyper-parameter for clustering')
    parser.add_argument('--out',
                        '-o',
                        type=str,
                        default='results',
                        help='output directory for result file')
    parser.add_argument('--train_file',
                        '-train_f',
                        type=str,
                        default='',
                        help='training dataset file')
    parser.add_argument('--test_file',
                        '-test_f',
                        type=str,
                        default='',
                        help='test dataset file')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='resume the training from snapshot')
    parser.add_argument('--resume2',
                        '-r2',
                        default='',
                        help='resume the training from snapshot')
    parser.add_argument('--optimizer',
                        '-op',
                        type=str,
                        default='Adam',
                        help='optimizer for clustering')
    parser.add_argument('--optimizer2',
                        '-op2',
                        type=str,
                        default='Adam',
                        help='optimizer for classification')
    parser.add_argument('--initial_lr',
                        type=float,
                        default=0.001,
                        help='initial learning rate for classification')
    parser.add_argument(
        '--lr_decay_rate',
        type=float,
        default=0.5,
        help='decay rate for classification if MomentumSGD is used')
    parser.add_argument(
        '--lr_decay_epoch',
        type=float,
        default=25,
        help='decay epoch for classification if MomentumSGD is used')
    parser.add_argument('--random',
                        action='store_true',
                        default=False,
                        help='Use random assignment or not')
    parser.add_argument('--valid',
                        '--v',
                        action='store_true',
                        help='Use random assignment or not')
    args = parser.parse_args()

    random.seed(args.seed)
    np.random.seed(args.seed)

    gpu = args.gpu
    data_type = args.data_type
    model_type = args.model_type
    num_clusters = args.cluster
    initial_lr = args.initial_lr
    lr_decay_rate = args.lr_decay_rate
    lr_decay_epoch = args.lr_decay_epoch
    opt1 = args.optimizer
    opt2 = args.optimizer2
    model_path = args.model_path
    rand_assign = args.random
    train_file = args.train_file
    test_file = args.test_file

    unit = args.unit
    alpha = args.alpha
    sparse = False

    ndim = 1
    n_in = None
    train_transform = None
    test_transform = None
    if data_type == 'toy':
        model = network.LinearModel(2, 2)
    elif data_type == 'mnist':
        if model_type == 'linear':
            model = network.LinearModel(784, num_clusters)
        elif model_type == 'DNN':
            model = network.MLP(1000, num_clusters)
        elif model_type == 'CNN':
            ndim = 3
            model = network.CNN(num_clusters)
        else:
            raise ValueError
    elif data_type == 'cifar100':
        train_transform = partial(dataset.transform,
                                  mean=0.0,
                                  std=1.0,
                                  train=True)
        test_transform = partial(dataset.transform,
                                 mean=0.0,
                                 std=1.0,
                                 train=False)
        if model_type == 'Resnet50':
            model = network.ResNet50(num_clusters)
            n_in = 2048
            load_npz(model_path, model, not_load_list=['fc7'])
        elif model_type == 'VGG':
            model = network.VGG(num_clusters)
            n_in = 1024
            load_npz(model_path, model, not_load_list=['fc6'])
        else:
            raise ValueError
    elif data_type == 'LSHTC1':
        sparse = True
        if model_type == 'DocModel':
            model = network.DocModel(n_in=1024, n_mid=unit, n_out=num_clusters)
        elif model_type == 'DocModel2':
            model = network.DocModel2(n_in=1024,
                                      n_mid=unit,
                                      n_out=num_clusters)
        elif model_type == 'linear':
            model = network.LinearModel(n_in=92586, n_out=num_clusters)
        else:
            raise ValueError
    elif data_type == 'Dmoz':
        sparse = True
        if model_type == 'DocModel':
            model = network.DocModel(n_in=561127,
                                     n_mid=unit,
                                     n_out=num_clusters)
        elif model_type == 'linear':
            model = network.LinearModel(n_in=1024, n_out=num_clusters)
        else:
            raise ValueError
    else:
        if model_type == 'Resnet50':
            model = network.ResNet50(num_clusters)
        elif model_type == 'Resnet101':
            model = network.ResNet101(num_clusters)
        elif model_type == 'VGG':
            model = network.VGG(num_clusters)
        elif model_type == 'CNN':
            model = network.CNN(num_clusters)
        else:
            raise ValueError

    if gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    (train_instances, train_labels), (test_instances, test_labels), num_classes \
        = load_data(data_type, ndim, train_file, test_file)

    if rand_assign:
        assignment, count_classes = random_assignment(num_clusters,
                                                      num_classes)
    else:
        if opt1 == 'Adam':
            optimizer = chainer.optimizers.Adam(alpha=alpha)
        else:
            optimizer = chainer.optimizers.SGD(lr=alpha)
        optimizer.setup(model)

        train = clustering.dataset.Dataset(*(train_instances, train_labels),
                                           sparse)
        test = clustering.dataset.Dataset(*(test_instances, test_labels),
                                          sparse)

        train_iter = chainer.iterators.SerialIterator(
            train, batch_size=args.batchsize)

        train_updater = clustering.updater.Updater(model,
                                                   train,
                                                   train_iter,
                                                   optimizer,
                                                   num_clusters=num_clusters,
                                                   device=gpu,
                                                   mu=args.mu)

        trainer = training.Trainer(train_updater, (args.epoch, 'epoch'),
                                   out=args.out)

        trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'iteration', 'main/loss', 'main/loss_cc',
                'main/loss_mut_info', 'main/H_Y', 'main/H_YX', 'elapsed_time'
            ]))
        trainer.extend(extensions.snapshot(), trigger=(5, 'epoch'))

        if args.resume:
            chainer.serializers.load_npz(args.resume, trainer)

        trainer.run()
        """
        end clustering
        """
        cluster_label = separate.det_cluster(model,
                                             train,
                                             num_classes,
                                             batchsize=128,
                                             device=gpu,
                                             sparse=sparse)

        assignment, count_classes = separate.assign(cluster_label, num_classes,
                                                    num_clusters)

        del optimizer
        del train_iter
        del train_updater
        del trainer
        del train
        del test

        print(assignment)
    """
    start classification
    """
    model = h_net.HierarchicalNetwork(model,
                                      num_clusters,
                                      count_classes,
                                      n_in=n_in)
    if opt2 == 'Adam':
        optimizer2 = chainer.optimizers.Adam(alpha=initial_lr)
    elif opt2 == 'SGD':
        optimizer2 = chainer.optimizers.SGD(lr=initial_lr)
    else:
        optimizer2 = chainer.optimizers.MomentumSGD(lr=initial_lr)
    optimizer2.setup(model)
    if args.weight_decay > 0:
        optimizer2.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))

    if gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    train = classification.dataset.Dataset(train_instances,
                                           train_labels,
                                           assignment,
                                           _transform=train_transform,
                                           sparse=sparse)
    test = classification.dataset.Dataset(test_instances,
                                          test_labels,
                                          assignment,
                                          _transform=test_transform,
                                          sparse=sparse)

    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=args.batchsize2)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 batch_size=1,
                                                 repeat=False)

    train_updater = classification.updater.Updater(model,
                                                   train,
                                                   train_iter,
                                                   optimizer2,
                                                   num_clusters,
                                                   device=gpu)

    trainer = training.Trainer(train_updater, (args.epoch2, 'epoch'), args.out)

    acc = accuracy.Accuracy(model, assignment, num_clusters)
    trainer.extend(extensions.Evaluator(test_iter, acc, device=gpu))

    trainer.extend(
        extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'),
        trigger=(20, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/loss_cluster', 'main/loss_class',
            'validation/main/accuracy', 'validation/main/cluster_accuracy',
            'validation/main/loss', 'elapsed_time'
        ]))

    if opt2 != 'Adam':
        trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate),
                       trigger=(lr_decay_epoch, 'epoch'))

    if args.resume2:
        chainer.serializers.load_npz(args.resume2, trainer)

    trainer.run()
示例#10
0
 def evalModel(self):
     self.one = Variable(torch.FloatTensor([1.0]))
     self.one = self.one.to(self.device)
     self.accObj = accuracy.Accuracy()
     self.computeEmbeddingQuality()
示例#11
0
def main(args):
    start = timer()

    sys.path.append(str(Path(__file__).parent))

    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    logger.debug('Running with config %s', utils.config)
    print('Running with threshold: ' + str(args.seg_threshold))
    preds_stats = utils.predictions_analysis()
    probs_stats = [[], []]
    article_stats = []
    export = []
    #samples = []

    # Let's use Amazon S3
    s3 = boto3.resource(
        's3')  #s3 = boto3.client('s3', profile_name='signal-rnd')
    mybucket = s3.Bucket('data.data-science.signal')
    myfolder = 'summaries-segmentation'
    #pullBucketSamples(mybucket, myfolder+'/samples')
    print('Samples pulled successfully into container')

    workbook = excel.Workbook('output.xlsx')
    #workbook = excel.Workbook('/output/output.xlsx')#when running from container
    worksheet = workbook.add_worksheet()

    if not args.test:
        #key = myfolder + utils.config['word2vecfile']
        #word2vec = gensim.models.KeyedVectors.load_word2vec_format(mybucket.Object(key).get()['Body'].read(), binary=True)
        #word2vec = gensim.models.KeyedVectors.load_word2vec_format(io.BytesIO(mybucket.Object(key).get()['Body'].read()), binary=True)
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(
            utils.config['word2vecfile'], binary=True)
        #response = urllib2.urlopen('https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit?usp=sharing')
        #word2vec = gensim.models.KeyedVectors.load_word2vec_format(response.read(), binary=True)

        #mybucket.Object(key).download_file('GoogleNews_vectors')
        #word2vec = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews_vectors', binary=True)
    else:
        word2vec = None

    word2vec_done = timer()
    print 'Loading word2vec ellapsed: ' + str(word2vec_done -
                                              start) + ' seconds'
    dirname = 'test'

    if args.wiki:
        dataset_folders = [Path(utils.config['wikidataset']) / dirname]
        if (args.wiki_folder):
            dataset_folders = []
            dataset_folders.append(args.wiki_folder)
        print 'running on wikipedia'
    else:
        if (args.bySegLength):
            dataset_folders = getSegmentsFolders(utils.config['choidataset'])
            print 'run on choi by segments length'
        else:
            dataset_folders = [utils.config['choidataset']]
            print 'running on Choi'

    key = myfolder + args.model
    #model = torch.load(mybucket.Object(key).get()['Body'].read())
    #fileobj = io.BytesIO()
    #mybucket.Object(key).download_fileobj(fileobj)
    mybucket.Object(key).download_file('trained_model')

    #with open(args.model, 'rb') as f:
    with open('trained_model', 'rb') as f:
        model = torch.load(f)

    model = maybe_cuda(model)
    model.eval()

    if (args.naive):
        model = naive.create()

    for dataset_path in dataset_folders:

        if (args.bySegLength):
            print 'Segment is ', os.path.basename(dataset_path), " :"

        if args.wiki:
            if (args.wiki_folder):
                dataset = WikipediaDataSet(dataset_path,
                                           word2vec,
                                           folder=True,
                                           high_granularity=False)
            else:
                dataset = WikipediaDataSet(dataset_path,
                                           word2vec,
                                           high_granularity=False)
        else:
            dataset = ChoiDataset(dataset_path, word2vec)

        dl = DataLoader(dataset,
                        batch_size=args.bs,
                        collate_fn=collate_fn,
                        shuffle=False)

        with tqdm(desc='Testing', total=len(dl)) as pbar:
            total_accurate = 0
            total_count = 0
            total_loss = 0
            acc = accuracy.Accuracy()

            for i, (data, targets, paths) in enumerate(dl):
                if i == args.stop_after:
                    break

                pbar.update()
                output = model(data)
                targets_var = Variable(maybe_cuda(torch.cat(targets, 0),
                                                  args.cuda),
                                       requires_grad=False)
                batch_loss = 0
                output_prob = softmax(output.data.cpu().numpy())
                #if i < 5:
                #print output_prob.shape
                probs_stats[0].append(output_prob.tolist())
                #samples.append(data)
                output_seg = output_prob[:, 1] > args.seg_threshold
                target_seg = targets_var.data.cpu().numpy()
                probs_stats[1].append(target_seg.tolist())
                batch_accurate = (output_seg == target_seg).sum()
                total_accurate += batch_accurate
                total_count += len(target_seg)
                total_loss += batch_loss
                preds_stats.add(output_seg, target_seg)

                current_target_idx = 0
                article_stats.append([])
                for k, t in enumerate(targets):
                    document_sentence_count = len(t)
                    article_stats[i].append(document_sentence_count)
                    sentences_length = [s.size()[0] for s in data[k]
                                        ] if args.calc_word else None
                    to_idx = int(current_target_idx + document_sentence_count)
                    h = output_seg[current_target_idx:to_idx]

                    # hypothesis and targets are missing classification of last sentence, and therefore we will add
                    # 1 for both
                    h = np.append(h, [1])
                    t = np.append(t.cpu().numpy(), [1])

                    acc.update(h, t, sentences_length=sentences_length)

                    current_target_idx = to_idx

                logger.debug('Batch %s - error %7.4f, Accuracy: %7.4f', i,
                             batch_loss, batch_accurate / len(target_seg))
                pbar.set_description('Testing, Accuracy={:.4}'.format(
                    batch_accurate / len(target_seg)))

        average_loss = total_loss / len(dl)
        average_accuracy = total_accurate / total_count
        calculated_pk, _ = acc.calc_accuracy()

        article = 0
        for batch, probs in enumerate(probs_stats[0]):
            boundary = 0
            for sentences in article_stats[batch]:
                export.append([])
                for sentence in range(0, sentences):
                    export[article].append(probs[boundary][1])
                    worksheet.write(sentence, 2 * article, probs[boundary][1])
                    worksheet.write(sentence, 2 * article + 1,
                                    probs_stats[1][batch][boundary])
                    #worksheet.write(sentence, 3*article + 2, " ".join(samples[batch][boundary][:5]))
                    boundary += 1
                article += 1

        #Save dataset as pickle
        #data_out = np.asarray(export)
        with open('LSTM_probs.pkl', 'wb') as f:
            #with open('/output/LSTM_probs.pkl', 'wb') as f:#when rnuning from container
            pkl.dump({'probs': export}, f, pkl.HIGHEST_PROTOCOL
                     )  #, 'labels': y_train }, f, pkl.HIGHEST_PROTOCOL)
        workbook.close()

        key = myfolder + '/testing/softmax_probs.jsonl'
        mybucket.Object(key).upload_file('LSTM_probs.pkl')
        key = myfolder + '/testing/output.xlsx'
        mybucket.Object(key).upload_file('output.xlsx')

        logger.info('Finished testing.')
        logger.info('Average loss: %s', average_loss)
        logger.info('Average accuracy: %s', average_accuracy)
        logger.info('Pk: {:.4}.'.format(calculated_pk))
        logger.info('F1: {:.4}.'.format(preds_stats.get_f1()))

        end = timer()
        print('Seconds to execute to whole flow: ' + str(end - start))
示例#12
0
import matplotlib.pyplot as plt
#import mpl_toolkits.axisartist as axisartist

font_size = 8
fig_width = 3.0

font = {'family': 'serif', 'serif': ['Times'], 'size': font_size}

plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.rcParams['text.latex.preamble'] = r'\usepackage{siunitx}'

#matplotlib.rc('font', **font)

speed_data = speed.Speed()
accuracy_data = accuracy.Accuracy()

speed_averages = {}
speed_per_mat_element = {}
accuracy_averages = {}
accuracy_per_mat_element = {}

series_to_rowcount = {}
series_to_rowcount[1000.0] = 3000.0
series_to_rowcount[2000.0] = 5400.0
series_to_rowcount[3000.0] = 8526.0
series_to_rowcount[4000.0] = 12288.0
series_to_rowcount[6000.0] = 18468.0
series_to_rowcount[8000.0] = 24000.0
series_to_rowcount[12000.0] = 33396.0
series_to_rowcount[16000.0] = 50700.0
示例#13
0
fileList = os.listdir(openDir)
eval = []
count = 0
totalLen = len(fileList)

for item in fileList:
    count += 1
    inFiles = os.path.join(openDir, item)
    outName = "output" + str(shouldNormalize) + str(
        similarityType) + "-" + str(item)
    outFiles = os.path.join(outDir, outName)

    ac1 = accuracy.Accuracy(iF=inFiles,
                            vF=vectorFile,
                            sN=shouldNormalize,
                            sT=similarityType,
                            oF=outFiles)
    vectorDict = ac1.vectorIn()
    inputRead = ac1.inputIn()
    finalVector = ac1.processVector(vectorDict=vectorDict, inputRead=inputRead)
    ans = ac1.finalAns(vectorDict=vectorDict,
                       inputRead=inputRead,
                       finalVector=finalVector)
    fileName = str(item).split(".")[0]
    eval.append(fileName + ": " + str(ans / len(inputRead)) + "\n")
    print(fileName + " " + str(count) + "/" + str(totalLen))

finalEval = "".join(eval)
with open(evalFile, "w") as eva:
    eva.write(finalEval)