Exemplo n.º 1
1
def test(test_file_path, model_path):
    preprocessor = PreProcessor()
    test_data = preprocessor.get_test(test_file_path, GRAMMAR_FILE,
                                      PRIMITIVE_TYPES)
    print('load model from [%s]' % model_path, )

    params = torch.load(model_path, map_location=lambda storage, loc: storage)
    transition_system = params['transition_system']
    vocab = params['vocab']
    parser = TranxParser(vocab, transition_system)
    parser.load_state_dict(params['state_dict'])
    parser.cuda()
    parser.eval()

    evaluator = ConalaEvaluator(transition_system)

    eval_results, decode_results = evaluation.evaluate(
        test_data,
        parser,
        evaluator,
        BEAM_SIZE,
        verbose=True,
        return_decode_result=True)

    print(eval_results)
    pickle.dump(decode_results, open(SAVE_DECODE_TO, 'wb'))
Exemplo n.º 2
0
    def __init__(self,
                 L_layer_count,
                 k_hash_functions_per_layer,
                 w_parameter=0.20):
        self.preprocessor = PreProcessor(
        )  # initialize the preprocessor instance
        self.data_dict = dict(
        )  # declaring a dicitonary where key is image id and value is list of features. The values will be indexed.
        self.L_layer_count = L_layer_count  # the number of layers
        self.k_hash_functions_per_layer = k_hash_functions_per_layer  # number of hash functions per layer
        self.feature_count = 0  # feature_count, will be given a value in init_data(). Used to generate the random projections.
        self.image_ids = list(
        )  # The list of image IDs from the data_dict maintained separately. Ordering isn't a problem as in Python 3.7, dictionaries are ordered in terms of insertion.
        self.data_matrix = []  # The data matrix of features alone
        self.init_data(
        )  # Initializes the image IDs, data matrix and also the feature count as it is dependent on input data
        self.w_parameter = w_parameter  # w in (1)
        self.hash_tables = list()  # The list of hash tables or layers

        print("Initializing the hash tables...")
        for value in range(
                self.L_layer_count
        ):  # create L hash tables with k hash functions per layer
            self.hash_tables.append(
                HashTable(self.k_hash_functions_per_layer, self.feature_count,
                          self.w_parameter))

        self.fill_all_hashtables(
        )  # Index all the data points in all the layers
Exemplo n.º 3
0
 def __init__(self, *args, **kwargs):
     super(AbstractTestContract, self).__init__(*args, **kwargs)
     self.pp = PreProcessor()
     self.s = t.state()
     self.s.block.number = HOMESTEAD_BLOCK
     # t.gas_limit = 4712388
     t.gas_limit = 2000000
Exemplo n.º 4
0
def ncd_loop(doInit, dlThreadNum):
	ndutil.setTimezone()

#read config
	cnfManager = CnfManager()
	cnfManager.load('./ndc.cnf')
	cnfData = cnfManager.getCnfData()

#check dirs
	ndutil.enableDir(cnfData['dirWorking'])
	ndutil.enableDir(cnfData['dirStore'])

#ndlcom
	logger = Logger('nDroid-Crawler', cnfData['ndlComHost'], cnfData['ndlComPort'])
	logger.logger('Initiating')

	dbManager = DbManager(cnfData['dbHost'], cnfData['dbUser'], cnfData['dbPass'], cnfData['dbName'])
	if doInit:
		dbManager.create_table()
		os.system('rm -f %s/*' % cnfData['dirWorking'])
		os.system('rm -f %s/*' % cnfData['dirStore'])

	#logger.logger('Customizing Spiders')
	#spiderGenerator = SpiderGenerator('template', 'spider/spiders')
	#for spider in cnfData['spiders']:
	#	spiderGenerator.gen_spider(spider, cnfData[spider]['startPage'], cnfData[spider]['stopPage'])

	rpQueue = Queue()
	pdQueue = Queue()
	dpQueue = Queue()
	pdLock = threading.Lock()

	rpcMonitor = RpcMonitor(logger, cnfData['rpcPort'], cnfData['rpcAuth'], 'RpcMonitor')
	rpcMonitor.setGlobalInfo(ndutil.getCurrentTimeStr(), 'Standalone', dlThreadNum)
	rpcMonitor.setDownloadTotal(pdQueue.qsize())
	rpcMonitor.setPdQueueSize(pdQueue.qsize())
	
	botScheduler = BotScheduler(logger, rpcMonitor, cnfData['spiders'], cnfData['spiderCnfs'], 'BotScheduler')
	receiver = Receiver(logger, rpcMonitor, rpQueue, cnfData['receiverPort'], 'Receiver')
	preProcessor = PreProcessor(logger, rpcMonitor, rpQueue, pdQueue, pdLock, dbManager, 'PreProcessor')
	downloader = Downloader([logger, rpcMonitor, pdQueue, dpQueue, pdLock, dlThreadNum, cnfData['dirWorking']], 'Downloader')
	processor = Processor(logger, rpcMonitor, dpQueue, pdLock, pdQueue, dbManager, cnfData['dirWorking'], cnfData['dirStore'], 'Processor')

	logger.logger('Starting Threads')
	rpcMonitor.start()
	botScheduler.start()
	receiver.start()
	preProcessor.start()
	downloader.start()
	processor.start()
	
	processor.join()
	downloader.join()
	preProcessor.join()
	receiver.join()
	botScheduler.join()
	rpcMonitor.join()
Exemplo n.º 5
0
def check_review_preprocess():
    data = pd.read_csv('test_data/test_revs.csv')

    preprocess = PreProcessor()
    preprocessed_revs = preprocess.preprocess_review_list(data['text'])

    if len(preprocessed_revs) > 0:
        print('Testing passed - Preprocessing is Successfully !')
    else:
        print('Preprocessing has failed')
Exemplo n.º 6
0
 def predict_comment(self):
     with open('selected_words.txt', 'r', encoding='UTF-8') as fd:
         selected_words = json.loads(fd.read())
     model = models.load_model("toxic_comment_model.h5")
     pprocess = PreProcessor()
     token = pprocess.tokenize(self.input_comment)
     tf = pprocess.term_frequency(token, selected_words)
     data = np.expand_dims(np.asarray(tf).astype('float32'), axis=0)
     score = float(model.predict(data))  # 클린 댓글 일수록 낮은 숫자
     return score
Exemplo n.º 7
0
 def tokenize_reviews(self, all_reviews):
     all_review_tokens = []
     preprocessing = PreProcessor()
     for review_text in all_reviews:
         # remove symbols
         review_text = preprocessing.remove_symbols(review_text)
         # clean review
         tokens = preprocessing.clean_review(review_text)
         # collect all the tokens
         all_review_tokens.append(tokens)
     return all_review_tokens
Exemplo n.º 8
0
 def __init__(self, db, ldamodel, topic_classifier, severity_classifier,
              sentiment_classifier, config):
     super().__init__()
     self.__db = db
     self.__preprocessor = PreProcessor(db)
     self.__analyzer = Analyzer(db, ldamodel, topic_classifier,
                                severity_classifier, sentiment_classifier,
                                config)
     self.__dataviewer = Dataviewer(db, config)
     self.__ldamodel = ldamodel
     self.__config = config
 def __init__(self, *args, **kwargs):
     super(TestContract, self).__init__(*args, **kwargs)
     self.s = t.state()
     self.s.block.number = self.HOMESTEAD_BLOCK
     t.gas_limit = 4712388
     self.pp = PreProcessor()
     self.multisig_abi = self.s.abi_contract(
         self.pp.process('MultiSigWalletWithDailyLimit.sol', contract_dir='solidity/', add_dev_code=True),
         language='solidity',
         constructor_parameters=([accounts[0]], 1, 0),
         contract_name="MultiSigWalletWithDailyLimit"
     ).translator
Exemplo n.º 10
0
    def __init__(self, input_file):
        """
        Initializes Transformer class and instantiates all the pipeline
        step classes

        :param input_file: input XML file, downloaded from NAIS
        :type input_file: str
        """

        self.reader = EDRReader(input_file)
        self.preprocessor = PreProcessor(tokenize_words)
        self.beneficiary_categorizer = HasBeneficiaryOwnershipRecord()
        self.parser = HeuristicBasedParser()
Exemplo n.º 11
0
    def aspect_common_details(self, threshold):
        pp = PreProcessor()

        for aspect, aspect_detail in self.reviews_per_aspect.items():
            reviews = aspect_detail.review_list

            reviews = [pp.preprocess_review(review, False) for review in reviews]

            generate_mentions = MentionsGenerator()
            common_mentions = generate_mentions.get_common_mentions(reviews, threshold)
            aspect_detail.common_mentions = common_mentions

            generate_common_terms = CommonTermsGenerator()
            common_term_details = generate_common_terms.get_common_term_details(reviews, threshold)
            aspect_detail.common_term_details = common_term_details
Exemplo n.º 12
0
    def __init__(self):

        conf_path_nn = '/src/nn/conf/nn.yaml'
        with open(conf_path_nn, 'r') as fd:
            self.conf = yaml.safe_load(fd)

        self.batch_size = self.conf['testing']['batch_size']
        self.checkpoint_path = self.conf['misc']['checkpoint_path']

        self.logger = Logger
        self.preProcessor = PreProcessor(Logger)
        self.nn = Nn(Logger, conf_path_nn, testing=True)
        self.sess = tf.compat.v1.Session()

        self.start()
Exemplo n.º 13
0
def compare_test_to_db(test_query):
    preprocessor = PreProcessor()
    facts = []
    with open('facts.txt', 'r') as fact_file:
        fact_data = fact_file.read().split("\n")
    for data in fact_data:
        fact = Fact(data, preprocessor.preprocess(data), FactType.TRUTH)
        facts.append(fact)

    text_comp = TextComparator()
    # text_comp.train_model(facts)
    file_name = "test"
    # text_comp.save_model(file_name)
    text_comp.load_model(file_name)
    test_fact = Fact(test_query, preprocessor.preprocess(
        test_query), FactType.TRUTH)
    return text_comp.match_fact(test_fact, facts, topn=3)
Exemplo n.º 14
0
    def __init__(self):

        conf_path_cnn = '/src/cnn/conf/cnn.yaml'
        with open(conf_path_cnn, 'r') as fd:
            self.conf = yaml.safe_load(fd)

        # load configuration
        self.num_epochs = self.conf['training']['epochs']
        self.batch_size = self.conf['training']['batch_size']
        self.checkpoint_path = self.conf['misc']['checkpoint_path']

        self.logger = Logger
        self.preProcessor = PreProcessor(Logger)
        self.cnn = Cnn(Logger, conf_path_cnn)
        self.sess = tf.compat.v1.Session()

        self.start()
Exemplo n.º 15
0
class TestContract(TestCase):
    """
    run test with python -m unittest tests.test_daily_limit_overflow
    """

    HOMESTEAD_BLOCK = 1150000

    def __init__(self, *args, **kwargs):
        super(TestContract, self).__init__(*args, **kwargs)
        self.s = t.state()
        self.s.block.number = self.HOMESTEAD_BLOCK
        t.gas_limit = 4712388
        self.pp = PreProcessor()

    def test(self):
        # Create wallet
        required_accounts = 2
        daily_limit = 2000
        wa_1 = 1
        wa_2 = 2
        constructor_parameters = ([accounts[wa_1], accounts[wa_2]],
                                  required_accounts, daily_limit)
        self.multisig_wallet = self.s.abi_contract(
            self.pp.process('MultiSigWalletWithDailyLimit.sol',
                            contract_dir='solidity/',
                            add_dev_code=True),
            language='solidity',
            constructor_parameters=constructor_parameters,
            contract_name="MultiSigWalletWithDailyLimit")
        # Send money to wallet contract
        deposit = 10000
        self.s.send(keys[wa_1], self.multisig_wallet.address, deposit)
        self.assertEqual(
            self.s.block.get_balance(self.multisig_wallet.address), deposit)
        self.assertEqual(self.multisig_wallet.dailyLimit(), daily_limit)
        # Withdraw daily limit
        value = 2000
        tx_1 = self.multisig_wallet.submitTransaction(accounts[wa_1],
                                                      value,
                                                      "",
                                                      sender=keys[wa_2])
        # Transaction succeeds
        self.assertTrue(self.multisig_wallet.transactions(tx_1)[3])
        # Try to overflow spentToday to reset it to 0.
        tx_2 = self.multisig_wallet.submitTransaction(accounts[wa_1],
                                                      2**256 - value,
                                                      "",
                                                      sender=keys[wa_2])
        # Transaction cannot complete and spentToday is still == 2000.
        self.assertFalse(self.multisig_wallet.transactions(tx_2)[3])
        self.assertEqual(self.multisig_wallet.spentToday(), daily_limit)
        # User tries to withdraw daily limit again on same day. It fails as daily limit was withdrawn already.
        tx_3 = self.multisig_wallet.submitTransaction(accounts[wa_1],
                                                      value,
                                                      "",
                                                      sender=keys[wa_2])
        self.assertFalse(self.multisig_wallet.transactions(tx_3)[3])
Exemplo n.º 16
0
class TestContract(TestCase):
    """
    run test with python -m unittest tests.test_owner_replacement
    """

    HOMESTEAD_BLOCK = 1150000

    def __init__(self, *args, **kwargs):
        super(TestContract, self).__init__(*args, **kwargs)
        self.s = t.state()
        self.s.block.number = self.HOMESTEAD_BLOCK
        t.gas_limit = 4712388
        self.pp = PreProcessor()

    def test(self):
        # Create wallet
        required_accounts = 2
        wa_1 = 1
        wa_2 = 2
        constructor_parameters = ([accounts[wa_1],
                                   accounts[wa_2]], required_accounts)
        self.multisig_wallet = self.s.abi_contract(
            self.pp.process('MultiSigWallet.sol',
                            contract_dir='solidity/',
                            add_dev_code=True),
            language='solidity',
            constructor_parameters=constructor_parameters)
        self.assertTrue(self.multisig_wallet.isOwner(accounts[wa_1]))
        self.assertTrue(self.multisig_wallet.isOwner(accounts[wa_2]))
        # Create ABIs
        multisig_abi = self.multisig_wallet.translator
        # Exchange owner wa_2 with wa_3
        wa_3 = 3
        exchange_owner_data = multisig_abi.encode(
            'replaceOwner', [accounts[wa_2], accounts[wa_3]])
        # Only a wallet owner (in this case wa_1) can do this. Owner confirms transaction at the same time.
        transaction_id = self.multisig_wallet.submitTransaction(
            self.multisig_wallet.address,
            0,
            exchange_owner_data,
            sender=keys[wa_1])
        # Other owner wa_2 confirms and executes transaction at the same time as min sig are available
        self.assertFalse(self.multisig_wallet.transactions(transaction_id)[3])
        self.assertEqual(
            self.multisig_wallet.getOwners(),
            [accounts[wa_1].encode('hex'), accounts[wa_2].encode('hex')])
        self.multisig_wallet.confirmTransaction(transaction_id,
                                                sender=keys[wa_2])
        # Transaction was executed
        self.assertTrue(self.multisig_wallet.transactions(transaction_id)[3])
        # Owner was switched
        self.assertFalse(self.multisig_wallet.isOwner(accounts[wa_2]))
        self.assertTrue(self.multisig_wallet.isOwner(accounts[wa_3]))
        self.assertEqual(
            self.multisig_wallet.getOwners(),
            [accounts[wa_1].encode('hex'), accounts[wa_3].encode('hex')])
Exemplo n.º 17
0
def classify(text):
    text = PreProcessor.clean_text(text)
    data = np.array([text])
    data = tokenizer.texts_to_sequences(data)
    data = tf.keras.preprocessing.sequence.pad_sequences(data, maxlen=MAX_DOCUMENT_LENGTH)

    y_predicted = np.argmax(classifier.predict(data), axis=1)

    topic = news_classes.class_map[str(y_predicted[0]+1)]
    return topic
Exemplo n.º 18
0
 def __init__(self, protocol, host, port, add_dev_code, verify_code,
              contract_dir, gas, gas_price, private_key):
     self.pp = PreProcessor()
     self.s = t.state()
     self.s.block.number = 1150000  # Homestead
     t.gas_limit = int(gas)
     self.json_rpc = EthJsonRpc(protocol=protocol, host=host, port=port)
     if private_key:
         self.user_address = '0x' + privtoaddr(
             private_key.decode('hex')).encode('hex')
     else:
         self.user_address = self.json_rpc.eth_coinbase()["result"]
     self.add_dev_code = add_dev_code == 'true'
     self.verify_code = verify_code == 'true'
     self.contract_dir = contract_dir
     self.gas = int(gas)
     self.gas_price = int(gas_price)
     self.private_key = private_key
     self.contract_addresses = {}
     self.contract_abis = {}
Exemplo n.º 19
0
    def is_review_in_aspect(self, aspect, review):
        pp = PreProcessor()
        review = pp.preprocess_review(review, False)
        tokens = word_tokenize(review)

        # use pos tag to find all NN and NNS
        tagged_words = pos_tag(tokens)

        is_noun = lambda pos: pos[:2] == 'NN'
        is_nouns = lambda pos: pos[:2] == 'NNS'
        nouns = [word for (word, pos) in tagged_words if is_noun(pos) or is_nouns(pos)]

        asb = AspectSynonymBank()
        aspect_syn = asb.get_synonyms_for_aspect(aspect)

        match = [word for word in nouns if word in aspect_syn]

        if len(match) > 0:
            return True

        return False
Exemplo n.º 20
0
 def __init__(self, read_count=5000, seg_length=.05):
     '''
     INPUTS: read_count = total number of songs to be read;
     seg_length = length of song segments that song will be split into (in seconds)
     '''
     '''
     band_type controls how frequencies are binned, can have 
     logarithmic (nonlinear) or linear spacing
     '''
     self.band_type = "log"
     '''limit the number of files that are read'''
     self.files_read_count = read_count
     ''' total number of frequency bins'''
     self.freq_bin_total = 15
     '''
     song is splitted into chunks of length segment_length (.05 seconds default)
     and then Fourier transform is performed on each of these chunks
     '''
     self.segment_length = seg_length
     '''read in song class labels'''
     prepro = PreProcessor()
     self.labels = prepro.getSongLabels()
Exemplo n.º 21
0
def main(train_file, test_file):
    # FILE_NAME = argv[0]
    pprocess = PreProcessor()
    train_data = pprocess.load_data(train_file)
    test_data = pprocess.load_data(test_file)

    labeled_train_df = pprocess.determine_pos_neg_label(train_data)
    labeled_test_df = pprocess.determine_pos_neg_label(test_data)

    selected_words = pprocess.preprocess_data(labeled_train_df)
    train_model(pprocess, labeled_train_df, labeled_test_df, selected_words)
Exemplo n.º 22
0
    def __init__(self, de_context, preprocessor=PreProcessor()):
        """
        Parameters
        ----------
        de_context: DeContext
        _preprocessor: PreProcessor
        
        Attributes
        ----------
        _train_data_location: str
            training data location including schema
        _valid_data_location: str
            training data location including schema
        _preprocessor: PreProcessor
        _dp_queue: Queue
            data pointer queue
        _data_queue: Queue
            data queue, which holds mini-batch data
        _m_queue: Queue
            message queue
        _n_epoch: int
        _batch_size: int
            batch size
        read_workers: List of Process
            read_workers read data from _dp_queue
        N_READ_WORKERS: int
        N_SHUFFLE: int
        """

        # Attributes
        self._train_data_location = de_context.data_config.train_data_location
        self._valid_data_location = de_context.data_config.valid_data_location

        self._preprocessor = preprocessor
        self._n_epoch = de_context.global_config.n_epoch
        self._batch_size = de_context.global_config.batch_size

        self._dp_queue = Queue(0)
        self._data_queue = Queue(0)
        self._m_queue = Queue()
        self.read_workers = []

        # Put data pointers
        self._put_all_data_pointers()

        # Create and run workers
        self._create_and_run_read_workers()

        # Set signal handler
        signal.signal(signal.SIGINT, self._signal_handler)
        signal.signal(signal.SIGTERM, self._signal_handler)
Exemplo n.º 23
0
class DNDFileEventHandler(PatternMatchingEventHandler):
    patterns = ["*.json"]

    def __init__(self, db, ldamodel, topic_classifier, severity_classifier,
                 sentiment_classifier, config):
        super().__init__()
        self.__db = db
        self.__preprocessor = PreProcessor(db)
        self.__analyzer = Analyzer(db, ldamodel, topic_classifier,
                                   severity_classifier, sentiment_classifier,
                                   config)
        self.__dataviewer = Dataviewer(db, config)
        self.__ldamodel = ldamodel
        self.__config = config

    def on_modified(self, event):
        self.process(event.src_path)

    def on_created(self, event):
        self.process(event.src_path)

    def process(self, filepath):
        # print(f'event type: {event.event_type}  path : {event.src_path}')

        file_name = os.path.basename(filepath)

        with open(filepath, 'r') as myfile:
            data = myfile.read()

            obj = json.loads(data)
            upproc_docid = self.__db.insert_unprocessed(file_name, obj)

            #self.__ldamodel.run_unseendata(str(obj['title']))

            #PRE-PROCESSING
            #print('Started Pre-Processsing .......')
            preproc_docid = self.__preprocessor.preprocess(upproc_docid)
            #print('Completed Pre-Processsing .......')

            # ANALYZE
            print('Started Analyzing .......')
            is_analyzed = self.__analyzer.analyze(preproc_docid, upproc_docid)
            print('Completed Analyzing .......')

            if is_analyzed == True and self.__config['Settings'][
                    'batch-runner'] == 0:
                topic_to_watch = self.__config['Settings']['topic-to-watch']
                # UPDATE VIEWER
                print('Started Graph .......')
                self.__dataviewer.plot(topic_to_watch)
                print('Completed Graph .......')
Exemplo n.º 24
0
class Train:

    def __init__(self):

        conf_path_nn = '/src/nn/conf/nn.yaml'
        with open(conf_path_nn, 'r') as fd:
            self.conf = yaml.safe_load(fd)

        # load configuration
        self.num_epochs = self.conf['training']['epochs']
        self.batch_size = self.conf['training']['batch_size']
        self.checkpoint_path = self.conf['misc']['checkpoint_path']

        self.logger = Logger
        self.preProcessor = PreProcessor(Logger)
        self.nn = Nn(Logger, conf_path_nn)
        self.sess = tf.compat.v1.Session()

        self.start()

    def start(self):
        train_x, val_x, test_x, train_y, val_y, test_y = self.preProcessor.get_data()

        self.sess.run(tf.compat.v1.global_variables_initializer())
        self.sess.run(tf.compat.v1.local_variables_initializer())

        saver = tf.compat.v1.train.Saver()
        num_iterations = int(len(train_x) / self.batch_size)

        self.logger.info('training...')
        for epoch in range(self.num_epochs):

            for iter_ in range(num_iterations):
                batch_x = train_x[iter_ * self.batch_size:(iter_ + 1) * self.batch_size, :]
                batch_y = train_y[iter_ * self.batch_size:(iter_ + 1) * self.batch_size, :]

                self.sess.run(self.nn.training, feed_dict={self.nn.layer_input: batch_x, self.nn.ground_truth: batch_y})

            # train loss
            _, loss_train, acc_train = self.nn.analyze_epoch(self.sess, train_x, train_y)

            # val loss
            _, loss_val, acc_val = self.nn.analyze_epoch(self.sess, val_x, val_y)

            self.logger.info('epoch: {0}/{1}, loss_train: {2}, acc_train: {3}, loss_val: {4}, acc_val: {5}'
                             .format(epoch + 1, self.num_epochs, loss_train, acc_train, loss_val, acc_val))

        # save model
        saved_path = saver.save(self.sess, '{0}model.ckpt'.format(self.checkpoint_path))
        self.logger.info('model saved in {0}'.format(saved_path))
        self.sess.close()
Exemplo n.º 25
0
 def __init__(self, read_count=5000, seg_length=.05):
     '''
     INPUTS: read_count = total number of songs to be read;
     seg_length = length of song segments that song will be split into (in seconds)
     '''
     
     '''
     band_type controls how frequencies are binned, can have 
     logarithmic (nonlinear) or linear spacing
     '''
     self.band_type = "log"
     '''limit the number of files that are read'''
     self.files_read_count = read_count
     ''' total number of frequency bins'''
     self.freq_bin_total = 15          
     '''
     song is splitted into chunks of length segment_length (.05 seconds default)
     and then Fourier transform is performed on each of these chunks
     '''
     self.segment_length = seg_length
     '''read in song class labels'''
     prepro = PreProcessor()
     self.labels = prepro.getSongLabels()
Exemplo n.º 26
0
    def generate_summary(self, review):
        pp = PreProcessor()

        formatted_text = pp.preprocess_review(review, True)
        sentences = sent_tokenize(review)

        word_frequencies = {}
        for word in word_tokenize(formatted_text):
            stop_words = set(stopwords.words('english'))
            if word not in stop_words:
                if word not in word_frequencies.keys():
                    word_frequencies[word] = 1
                else:
                    word_frequencies[word] += 1

        maximum_frequency = max(word_frequencies.values())

        for word in word_frequencies.keys():
            word_frequencies[word] = (word_frequencies[word] /
                                      maximum_frequency)

        sentence_scores = {}
        for sent in sentences:
            for word in word_tokenize(sent.lower()):
                if word in word_frequencies.keys():
                    if len(sent.split(' ')) < 80:
                        if sent not in sentence_scores.keys():
                            sentence_scores[sent] = word_frequencies[word]
                        else:
                            sentence_scores[sent] += word_frequencies[word]

        sent_summary = heapq.nlargest(2,
                                      sentence_scores,
                                      key=sentence_scores.get)
        summarized = ' '.join(sent_summary)

        return summarized
Exemplo n.º 27
0
    def __init__(self, train_test_dir, vocab, test_ground_truth_path):
        self.model = PreProcessor(train_test_dir, vocab)

        self.train_test_dir = train_test_dir
        self.test_ground_truth_path = test_ground_truth_path

        # { Class(int) : Prior Probability of class (double) }
        self.prior = {}

        # { (Class(int) , Feature (string)) : Conditional Probability (double) }
        self.condProb = {}

        # list of (Feature(string),CCE(double))
        self.featureCCE = []

        """
Exemplo n.º 28
0
def main():
    r0 = CXXParser("ctpapi/a.cpp").parse()
    r1 = PreProcessor(r0).process()
    r1.dict_classes.clear()

    constants = r0.variables
    constants.update(r1.const_macros)
    constants = {k: v for k, v in constants.items() if not k.startswith('_')}

    functions = r0.functions
    classes = r1.classes

    # make all api "final" to improve performance
    for c in classes.values():
        type = c.name[-3:]
        if type == "Api":
            for ms in c.functions.values():
                for m in ms:
                    if m.is_virtual:
                        m.is_pure_virtual = False
                        m.is_final = True
        elif type == 'Spi':
            for ms in c.functions.values():
                for m in ms:
                    m.is_virtual = True
                    # m.is_pure_virtual = True
                    m.is_final = False

    options = GeneratorOptions(
        typedefs=r0.typedefs,
        constants=constants,
        functions=functions,
        classes=classes,
        dict_classes=r1.dict_classes,
        enums=r0.enums,
    )

    saved_files = Generator(options=options).generate()
    output_dir = "./generated_files"
    # clear output dir
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    clear_dir(output_dir)

    for name, data in saved_files.items():
        with open(f"{output_dir}/{name}", "wt") as f:
            f.write(data)
Exemplo n.º 29
0
def main():
	fpath = argv[1]
	with open(fpath, 'r') as f:
		data = f.read()
	
	auto_process_libs = [
		'math',
		'os.path',
	]

	auto_import_libs = [
		*auto_process_libs,
		'os',
	]

	options = Options(
		debug=True,
		imports=auto_import_libs,
		eval_mod=auto_process_libs,
	)

	data = PreProcessor(options=options).preprocess(data)
	data = Transpiler(options=options).transpile(data)
	tree = parse(data)
	tree = Generator(options=options).generate(tree)
	tree = Optimizer(options=options).optimize(tree)
	tree = Inliner(options=options).inline(tree)
	tree = Importer( options=options).clean_imports(tree)
	tree = UnusedRemover(options=options).remove_unused(tree)
	code = Unparser.unparse(tree)
	code = Minifier(options=options).minify(code)

	log(dump(tree))
	log(code.replace('\n', '\\n\n'))

	path_parts = fpath.split('.')
	out_path = join(getcwd(), 'dist',
					''.join(path_parts[:-1])[1:] + '.py')
	out_dir = dirname(out_path)

	if not exists(out_dir):
		mkdir(out_dir)

	with open(out_path, 'w') as f:
		f.write(code)
Exemplo n.º 30
0
class TestContract(TestCase):
    """
    run test with python -m unittest tests.test_daily_limit_dos
    """

    HOMESTEAD_BLOCK = 1150000

    def __init__(self, *args, **kwargs):
        super(TestContract, self).__init__(*args, **kwargs)
        self.s = t.state()
        self.s.block.number = self.HOMESTEAD_BLOCK
        t.gas_limit = 4712388
        self.pp = PreProcessor()

    def test(self):
        # Create wallet
        required_accounts = 2
        daily_limit = 2000
        wa_1 = 1
        wa_2 = 2
        constructor_parameters = (
            [accounts[wa_1], accounts[wa_2]],
            required_accounts,
            daily_limit
        )
        self.multisig_wallet = self.s.abi_contract(
            self.pp.process('MultiSigWalletWithDailyLimit.sol', contract_dir='solidity/', add_dev_code=True),
            language='solidity',
            constructor_parameters=constructor_parameters,
            contract_name="MultiSigWalletWithDailyLimit"
        )
        self.fail_account = self.s.abi_contract('contract FailAccount { function () {} }', language='solidity')
        # Send money to wallet contract
        deposit = 10000
        self.s.send(keys[wa_1], self.multisig_wallet.address, deposit)
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), deposit)
        self.assertEqual(self.multisig_wallet.dailyLimit(), daily_limit)
        # Withdraw daily limit
        value = 2000
        tx_1 = self.multisig_wallet.submitTransaction(self.fail_account.address, value, "", sender=keys[wa_2])
        # Transaction fails and spentToday remains 0
        self.assertFalse(self.multisig_wallet.transactions(tx_1)[3])
        self.assertEqual(self.multisig_wallet.spentToday(), 0)
Exemplo n.º 31
0
class Test:
    def __init__(self):

        conf_path_nn = '/src/nn/conf/nn.yaml'
        with open(conf_path_nn, 'r') as fd:
            self.conf = yaml.safe_load(fd)

        self.batch_size = self.conf['testing']['batch_size']
        self.checkpoint_path = self.conf['misc']['checkpoint_path']

        self.logger = Logger
        self.preProcessor = PreProcessor(Logger)
        self.nn = Nn(Logger, conf_path_nn, testing=True)
        self.sess = tf.compat.v1.Session()

        self.start()

    def start(self):
        train_x, val_x, test_x, train_y, val_y, test_y = self.preProcessor.get_data(
        )

        self.logger.info('testing...')

        # restore model
        self.sess = tf.compat.v1.Session()
        saver = tf.compat.v1.train.Saver()
        model_path = self.conf['testing']['model']
        saver.restore(self.sess, model_path)
        self.logger.info('{0} restored'.format(model_path))

        # test
        self.sess.run(tf.compat.v1.local_variables_initializer())
        pred_y, loss, acc = self.nn.analyze_epoch(self.sess, test_x, test_y)
        self.logger.info('test_loss: {0}, test_acc: {1}'.format(loss, acc))

        self.sess.close()
        return pred_y
Exemplo n.º 32
0


if __name__ == "__main__":

    parser = argparse.ArgumentParser(\
        description='Demo script for "inlier" detection benchmark')
    parser.add_argument("-t", "--training-data", dest="training_data",
                        help="csv file of the training set")
    parser.add_argument('-s', "--test-data", dest="test_data",
                        help='Csv file of the test data')

    args = parser.parse_args()


    pp = PreProcessor(args.training_data, args.test_data)

    # idx = pp.index(["roisize","n2_avg"])

    nu = 0.01
    x, y = estimate_gamma(pp.traindata, nu)

    idx = y[y <= 0.25].size - 1
    print ' best gamma:', x[idx], 'with SV frac', y[idx]

    fig =  plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(x, y, 'ro', label='estimate')

    # ax.semilogx()
    ax.grid()
Exemplo n.º 33
0
    dy = np.diff(Y[:, 0])[0]
    return Z.sum()*dx*dy


if __name__ == "__main__":

    parser = argparse.ArgumentParser(\
        description='Demo script for "inlier" detection benchmark')
    parser.add_argument("-t", "--training-data", dest="training_data",
                        help="csv file of the training set")
    parser.add_argument('-s', "--test-data", dest="test_data",
                        help='Csv file of the test data')

    args = parser.parse_args()

    pp = PreProcessor(args.training_data, args.test_data)
    idx = pp.index(["roisize","n2_avg"])

    traindata = pp.traindata[:, idx]
    testdata = pp.testdata[:, idx]

    cov0 = np.cov(pp.traindata.T)
    corr0 = np.corrcoef(pp.traindata.T)
    det0 = la.det(corr0)
    fig = plt.figure()
    axes = fig.add_subplot(1,1,1)
    axes.matshow(cov0, cmap=cm.Greens)
    axes.set_title("covariance of training data")

    pca =  PCA(pp.traindata, minfrac=0.01)
    cov0pca = np.cov(pca.project(pp.traindata).T)
class TestContract(TestCase):
    """
    run test with python -m unittest tests.test_multisig_wallet_with_daily_limit
    """

    HOMESTEAD_BLOCK = 1150000
    TWENTY_FOUR_HOURS = 86400  # 24h

    def __init__(self, *args, **kwargs):
        super(TestContract, self).__init__(*args, **kwargs)
        self.s = t.state()
        self.s.block.number = self.HOMESTEAD_BLOCK
        t.gas_limit = 4712388
        self.pp = PreProcessor()

    def test(self):
        # Create wallet
        required_accounts = 2
        daily_limit = 3000
        wa_1 = 1
        wa_2 = 2
        wa_3 = 3
        constructor_parameters = (
            [accounts[wa_1], accounts[wa_2], accounts[wa_3]],
            required_accounts,
            daily_limit
        )
        gas = self.s.block.gas_used
        self.multisig_wallet = self.s.abi_contract(
            self.pp.process('MultiSigWalletWithDailyLimit.sol', contract_dir='solidity/', add_dev_code=True),
            language='solidity',
            constructor_parameters=constructor_parameters,
            contract_name="MultiSigWalletWithDailyLimit"
        )
        self.assertLess(self.s.block.gas_used - gas, 2000000)
        print "Deployment costs: {}".format(self.s.block.gas_used - gas)
        # Create ABIs
        multisig_abi = self.multisig_wallet.translator
        # Send money to wallet contract
        deposit = 10000
        self.s.send(keys[wa_1], self.multisig_wallet.address, deposit)
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), deposit)
        self.assertEqual(self.multisig_wallet.dailyLimit(), daily_limit)
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), daily_limit)
        # Withdraw daily limit
        value_1 = 2000
        wa_1_balance = self.s.block.get_balance(accounts[wa_1])
        self.multisig_wallet.submitTransaction(accounts[wa_1], value_1, "", sender=keys[wa_2])
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), deposit - value_1)
        self.assertEqual(self.s.block.get_balance(accounts[wa_1]), wa_1_balance + value_1)
        # Update daily limit
        daily_limit_updated = 2000
        update_daily_limit = multisig_abi.encode("changeDailyLimit", [daily_limit_updated])
        transaction_id = self.multisig_wallet.submitTransaction(self.multisig_wallet.address, 0,
                                                                update_daily_limit, sender=keys[wa_1])
        self.multisig_wallet.confirmTransaction(transaction_id, sender=keys[wa_2])
        self.assertEqual(self.multisig_wallet.dailyLimit(), daily_limit_updated)
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), 0)
        self.s.block.timestamp += self.TWENTY_FOUR_HOURS + 1
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), daily_limit_updated)
        # Withdraw daily limit
        value_2 = 1000
        wa_1_balance = self.s.block.get_balance(accounts[wa_1])
        self.multisig_wallet.submitTransaction(accounts[wa_1], value_2, "", sender=keys[wa_2])
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), deposit - value_1 - value_2)
        self.assertEqual(self.s.block.get_balance(accounts[wa_1]), wa_1_balance + value_2)
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), daily_limit_updated - value_2)
        self.multisig_wallet.submitTransaction(accounts[wa_1], value_2, "", sender=keys[wa_2])
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), deposit - value_1 - value_2*2)
        self.assertEqual(self.s.block.get_balance(accounts[wa_1]), wa_1_balance + value_2*2)
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), daily_limit_updated - value_2*2)
        # Third time fails, because daily limit was reached
        transaction_id = self.multisig_wallet.submitTransaction(accounts[wa_1], value_2, "", sender=keys[wa_2])
        self.assertFalse(self.multisig_wallet.transactions(transaction_id)[3])
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), deposit - value_1 - value_2*2)
        self.assertEqual(self.s.block.get_balance(accounts[wa_1]), wa_1_balance + value_2*2)
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), 0)
        # Let one day pass
        self.s.block.timestamp += self.TWENTY_FOUR_HOURS + 1
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), daily_limit_updated)
        # Execute transaction should work now but fails, because it is triggered from a non owner address
        self.assertRaises(TransactionFailed, self.multisig_wallet.executeTransaction, transaction_id, sender=keys[9])
        # Execute transaction also fails if the sender is a wallet owner but didn't confirm the transaction first
        self.assertRaises(TransactionFailed, self.multisig_wallet.executeTransaction, transaction_id, sender=keys[wa_1])
        # But it works with the right sender
        self.multisig_wallet.executeTransaction(transaction_id, sender=keys[wa_2])
        self.assertTrue(self.multisig_wallet.transactions(transaction_id)[3])
        # Let one day pass
        self.s.block.timestamp += self.TWENTY_FOUR_HOURS + 1
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), daily_limit_updated)
        # User wants to withdraw more than the daily limit. Withdraw is unsuccessful.
        value_3 = 3000
        wa_1_balance = self.s.block.get_balance(accounts[wa_1])
        self.multisig_wallet.submitTransaction(accounts[wa_1], value_3, "", sender=keys[wa_2])
        # Wallet and user balance remain the same.
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), deposit - value_1 - value_2*3)
        self.assertEqual(self.s.block.get_balance(accounts[wa_1]), wa_1_balance)
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), daily_limit_updated)
        # Daily withdraw is possible again
        self.multisig_wallet.submitTransaction(accounts[wa_1], value_2, "", sender=keys[wa_2])
        # Wallet balance decreases and user balance increases.
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), deposit - value_1 - value_2*4)
        self.assertEqual(self.s.block.get_balance(accounts[wa_1]), wa_1_balance + value_2)
        self.assertEqual(self.multisig_wallet.calcMaxWithdraw(), daily_limit_updated - value_2)
        # Try to execute a transaction tha does not exist fails
        transaction_id = 999
        self.assertRaises(TransactionFailed, self.multisig_wallet.executeTransaction, transaction_id, sender=keys[wa_1])
Exemplo n.º 35
0
Arquivo: gen_text.py Projeto: amsqr/hd
import pandas as pd
import os, sys
import config
from preprocessor import PreProcessor


csvname = 'df_data.csv'
df_data = pd.read_csv(os.path.join('tmp2', csvname), index_col=0)

print('df_data')
       
csvname = 'attributes.csv'
df_attr = pd.read_csv(os.path.join('../data', csvname))

preproc = PreProcessor(config)      
df_attr = preproc.clean_text(df_attr)

print(df_attr)

with open('data.text','wt') as f:
    for col in ['q','t','d']:
        f.write(' '.join(df_data[col].tolist()))
        f.write(' ')
    f.write(' '.join(df_attr['value'].tolist()))
class TestContract(TestCase):
    """
    run test with python -m unittest tests.test_execution_after_requirements_changed
    """

    HOMESTEAD_BLOCK = 1150000

    def __init__(self, *args, **kwargs):
        super(TestContract, self).__init__(*args, **kwargs)
        self.s = t.state()
        self.s.block.number = self.HOMESTEAD_BLOCK
        t.gas_limit = 4712388
        self.pp = PreProcessor()

    def test(self):
        # Create wallet
        required_accounts = 2
        wa_1 = 1
        wa_2 = 2
        wa_3 = 3
        constructor_parameters = (
            [accounts[wa_1], accounts[wa_2], accounts[wa_3]],
            required_accounts
        )
        self.multisig_wallet = self.s.abi_contract(
            self.pp.process('MultiSigWallet.sol', contract_dir='solidity/', add_dev_code=True),
            language='solidity',
            constructor_parameters=constructor_parameters
        )
        # Create ABIs
        multisig_abi = self.multisig_wallet.translator
        # Send money to wallet contract
        deposit = 1000
        self.s.send(keys[wa_1], self.multisig_wallet.address, deposit)
        self.assertEqual(self.s.block.get_balance(self.multisig_wallet.address), 1000)
        # Add owner wa_4
        wa_4 = 4
        add_owner_data = multisig_abi.encode('addOwner', [accounts[wa_4]])
        # Only a wallet owner (in this case wa_1) can do this. Owner confirms transaction at the same time.
        transaction_id = self.multisig_wallet.submitTransaction(self.multisig_wallet.address, 0, add_owner_data,
                                                                  sender=keys[wa_1])
        # There is one pending transaction
        exclude_pending = False
        include_pending = True
        exclude_executed = False
        include_executed = True
        self.assertEqual(
            self.multisig_wallet.getTransactionIds(0, 1, include_pending, exclude_executed), [transaction_id])
        # Update required to 1
        new_required = 1
        update_requirement_data = multisig_abi.encode('changeRequirement', [new_required])
        # Submit successfully
        transaction_id_2 = self.multisig_wallet.submitTransaction(self.multisig_wallet.address, 0,
                                                                  update_requirement_data, sender=keys[wa_1])
        self.assertEqual(
            self.multisig_wallet.getTransactionIds(0, 2, include_pending, exclude_executed),
            [transaction_id, transaction_id_2])
        # Confirm change requirement transaction
        self.multisig_wallet.confirmTransaction(transaction_id_2, sender=keys[wa_2])
        self.assertEqual(self.multisig_wallet.required(), new_required)
        self.assertEqual(
            self.multisig_wallet.getTransactionIds(0, 1, exclude_pending, include_executed),
            [transaction_id_2])
        # Execution fails, because sender is not wallet owner
        self.assertRaises(TransactionFailed, self.multisig_wallet.executeTransaction, transaction_id, sender=keys[9])
        # Because the # required confirmations changed to 1, the addOwner transaction can be executed now
        self.multisig_wallet.executeTransaction(transaction_id, sender=keys[wa_1])
        self.assertEqual(
            self.multisig_wallet.getTransactionIds(0, 2, exclude_pending, include_executed),
            [transaction_id, transaction_id_2])
class TestContract(TestCase):
    """
    run test with python -m unittest tests.test_multisig_wallet_with_daily_limit_factory
    """

    HOMESTEAD_BLOCK = 1150000

    def __init__(self, *args, **kwargs):
        super(TestContract, self).__init__(*args, **kwargs)
        self.s = t.state()
        self.s.block.number = self.HOMESTEAD_BLOCK
        t.gas_limit = 4712388
        self.pp = PreProcessor()
        self.multisig_abi = self.s.abi_contract(
            self.pp.process('MultiSigWalletWithDailyLimit.sol', contract_dir='solidity/', add_dev_code=True),
            language='solidity',
            constructor_parameters=([accounts[0]], 1, 0),
            contract_name="MultiSigWalletWithDailyLimit"
        ).translator

    def multisig_transaction(self, contract_address, func_name, params=(), sender=0):
        result = self.multisig_abi.decode(
            func_name,
            self.s.send(
                keys[sender], contract_address, 0, self.multisig_abi.encode(func_name, params)
            )
        )
        return result[0] if len(result) == 1 else result

    def test(self):
        # Create factory
        gas = self.s.block.gas_used
        self.multisig_wallet_factory = self.s.abi_contract(
            self.pp.process('MultiSigWalletWithDailyLimitFactory.sol', contract_dir='solidity/', add_dev_code=True),
            language='solidity',
            contract_name="MultiSigWalletWithDailyLimitFactory"
        )
        self.assertLess(self.s.block.gas_used - gas, 2500000)
        print "Deployment costs: {}".format(self.s.block.gas_used - gas)
        # Create wallet
        required_accounts = 2
        daily_limit = 1000
        wa_1 = 1
        wa_2 = 2
        wa_3 = 3
        multisig_wallet_address = self.multisig_wallet_factory.create([accounts[wa_1], accounts[wa_2], accounts[wa_3]],
                                                                      required_accounts,
                                                                      daily_limit)
        wallet_count = self.multisig_wallet_factory.getInstantiationCount(accounts[0])
        multisig_wallet_address_confirmation = self.multisig_wallet_factory.instantiations(accounts[0], wallet_count-1)
        self.assertEqual(multisig_wallet_address, multisig_wallet_address_confirmation)
        self.assertTrue(self.multisig_wallet_factory.isInstantiation(multisig_wallet_address))
        # Send money to wallet contract
        deposit = 10000
        self.s.send(keys[wa_1], multisig_wallet_address, deposit)
        self.assertEqual(self.s.block.get_balance(multisig_wallet_address), deposit)
        self.assertEqual(self.multisig_transaction(multisig_wallet_address, "dailyLimit"), daily_limit)
        self.assertEqual(self.multisig_transaction(multisig_wallet_address, "calcMaxWithdraw"), daily_limit)
        # Update daily limit
        daily_limit_updated = 2000
        update_daily_limit = self.multisig_abi.encode("changeDailyLimit", [daily_limit_updated])
        transaction_id = self.multisig_transaction(multisig_wallet_address,
                                                   "submitTransaction",
                                                   (multisig_wallet_address, 0,  update_daily_limit),
                                                   wa_1)
        self.multisig_transaction(multisig_wallet_address,
                                  "confirmTransaction",
                                  (transaction_id, ),
                                  wa_2)
        self.assertEqual(self.multisig_transaction(multisig_wallet_address, "dailyLimit"), daily_limit_updated)
        self.assertEqual(self.multisig_transaction(multisig_wallet_address, "calcMaxWithdraw"), daily_limit_updated)
Exemplo n.º 38
0
class BernoulliNaiveBayes:

    MAX_CLASS = 40

    def __init__(self, train_test_dir, vocab, test_ground_truth_path):
        self.model = PreProcessor(train_test_dir, vocab)

        self.train_test_dir = train_test_dir
        self.test_ground_truth_path = test_ground_truth_path

        # { Class(int) : Prior Probability of class (double) }
        self.prior = {}

        # { (Class(int) , Feature (string)) : Conditional Probability (double) }
        self.condProb = {}

        # list of (Feature(string),CCE(double))
        self.featureCCE = []

        """
        ######## the variable following for extra part

        # { Class(int) : Prior Probability of class (double) }
        self.priorBi = {}

        # list of bigram tuple (feature1(string),feature2(string))
        self.biFeat = []

        # { (Class(int), Feature (tuple)) : multinomial Prob(double) }
        self.MulProb = {}
        """



    # Define Train function
    def train(self):
        for c in self.model.classes:
            self.prior[c] = len(self.model.trainFileNames[c]) * 1.0 / self.model.N

            counter = {}
            for feature in self.model.feature:
                counter[feature] = 0

            for filename in self.model.trainFileNames[c]:
                words = set()
                with open(filename, 'r') as file:
                    for line in file:
                        for word in self.model.word_tokenize(line):
                            words.add(word)

                for feature in self.model.feature:
                    if feature in words:
                        counter[feature] += 1

            for feature in self.model.feature:
                self.condProb[(c, feature)] = (counter[feature] * 1.0 + 1) / (len(self.model.trainFileNames[c]) + 2)


    # For Debugging
    def printCondProb(self):
        for c in self.model.classes:
            print "class " + str(c) + " : " + str(self.prior[c])
            for feature in self.model.feature:
                print feature + " : " + str(self.condProb[(c, feature)])



    # Define Test function
    # Return a list of test result as a int list
    def test(self, testfilename):

        maxProb = float("-Inf")
        c_star = 0

        words = set()
        with open(testfilename, 'r') as file:
            for line in file:
                for word in self.model.word_tokenize(line):
                    words.add(word)

        for c in self.model.classes:
            probability = log(2, self.prior[c])
            for feature in self.model.feature:
                if feature in words:
                    probability += log(self.condProb[(c, feature)], 2)
                else:
                    probability += log(1-self.condProb[(c, feature)], 2)
            if probability > maxProb:
                c_star = c
                maxProb = probability

        return c_star

    def testAll(self):
        testFileNames = glob(self.train_test_dir + "/*sample*")
        testFileNames.sort()

        testResult = []
        actualResult = []
        for testFileName in testFileNames:
            testResult.append(self.test(testFileName))

        problemNum = self.getProblemNumber()
        with open(self.test_ground_truth_path, 'r') as actualFile:
            for line in actualFile.readlines():
                if len(line) > 10 and line.startswith("problem" + problemNum):
                    actualResult.append(int(line[-3 : -1]))

        self.printAccuracy(actualResult,testResult)
        self.model.printConfMat(actualResult, testResult)
        self.rankFeature()
        print "Top 20 features:"
        self.printTop20()


    def printAccuracy(self,actualResult,testResult):
        accuracyNum = 0
        for i in range(len(testResult)):
            if actualResult[i] == testResult[i]:
                accuracyNum += 1
        accuracy = accuracyNum * 1.0 / len(actualResult)
        print accuracy


    def getProblemNumber(self):
        files = glob(self.train_test_dir + "/*")
        paths = files[0].split("/")
        return paths[-1][0]

    def rankFeature(self):
        for feature in self.model.feature:
            CCE = 0
            for c in self.model.classes:
                CCE -= self.prior[c] * self.condProb[(c,feature)] * log(self.condProb[(c,feature)],2)
            self.featureCCE.append((feature,CCE))


    def printTop20(self):
        sortedCCE = sorted(self.featureCCE, key=lambda item:item[1])[::-1]
        for i in range(20):
            feature = sortedCCE[i][0]
            CCE = sortedCCE[i][1]
            print feature +"  " + str(CCE)

    def featureFreq(self):
        words = {}
        featureTimes = []
        for c in self.model.classes:
            for filename in self.model.trainFileNames[c]:
                with open(filename, 'r') as file:
                    for line in file:
                        for word in self.model.word_tokenize(line):
                            words[word] = words.get(word, 0) + 1
        for feature in self.model.feature:
            words[feature] = words.get(feature,0)
            featureTimes.append((feature,words[feature]))
        self.featFreq = sorted(featureTimes, key=lambda item:item[1])[::-1]


    def resetFeatures(self,Num):
        features = []
        if Num > len(self.featFreq):
            Num = len(self.featFreq)
        for i in range(Num):
            features.append(self.featFreq[i][0])
        return features

    def featureCurve(self):
        self.featureFreq()
        # print self.featFreq
        Num = 0
        while Num <= 420:
            Num += 10
            self.model.feature = self.resetFeatures(Num)
            print "select " + str(len(self.model.feature)) + " features"
            self.train()

            testFileNames = glob(self.train_test_dir + "/*sample*")
            testFileNames.sort()

            testResult = []
            actualResult = []
            for testFileName in testFileNames:
                testResult.append(self.test(testFileName))

            problemNum = self.getProblemNumber()
            with open(self.test_ground_truth_path, 'r') as actualFile:
                for line in actualFile.readlines():
                    if len(line) > 10 and line.startswith("problem" + problemNum):
                        actualResult.append(int(line[-3 : -1]))

            self.printAccuracy(actualResult,testResult)
Exemplo n.º 39
0
class TestContract(TestCase):
    """
    run test with python -m unittest tests.test_execution_after_requirements_changed
    """

    HOMESTEAD_BLOCK = 1150000

    def __init__(self, *args, **kwargs):
        super(TestContract, self).__init__(*args, **kwargs)
        self.s = t.state()
        self.s.block.number = self.HOMESTEAD_BLOCK
        t.gas_limit = 4712388
        self.pp = PreProcessor()

    def test(self):
        # Create wallet
        required_accounts = 2
        wa_1 = 1
        wa_2 = 2
        wa_3 = 3
        constructor_parameters = ([
            accounts[wa_1], accounts[wa_2], accounts[wa_3]
        ], required_accounts)
        self.multisig_wallet = self.s.abi_contract(
            self.pp.process('MultiSigWallet.sol',
                            contract_dir='solidity/',
                            add_dev_code=True),
            language='solidity',
            constructor_parameters=constructor_parameters)
        # Create ABIs
        multisig_abi = self.multisig_wallet.translator
        # Send money to wallet contract
        deposit = 1000
        self.s.send(keys[wa_1], self.multisig_wallet.address, deposit)
        self.assertEqual(
            self.s.block.get_balance(self.multisig_wallet.address), 1000)
        # Add owner wa_4
        wa_4 = 4
        add_owner_data = multisig_abi.encode('addOwner', [accounts[wa_4]])
        # Only a wallet owner (in this case wa_1) can do this. Owner confirms transaction at the same time.
        transaction_id = self.multisig_wallet.submitTransaction(
            self.multisig_wallet.address, 0, add_owner_data, sender=keys[wa_1])
        # There is one pending transaction
        exclude_pending = False
        include_pending = True
        exclude_executed = False
        include_executed = True
        self.assertEqual(
            self.multisig_wallet.getTransactionIds(0, 1, include_pending,
                                                   exclude_executed),
            [transaction_id])
        # Update required to 1
        new_required = 1
        update_requirement_data = multisig_abi.encode('changeRequirement',
                                                      [new_required])
        # Submit successfully
        transaction_id_2 = self.multisig_wallet.submitTransaction(
            self.multisig_wallet.address,
            0,
            update_requirement_data,
            sender=keys[wa_1])
        self.assertEqual(
            self.multisig_wallet.getTransactionIds(0, 2, include_pending,
                                                   exclude_executed),
            [transaction_id, transaction_id_2])
        # Confirm change requirement transaction
        self.multisig_wallet.confirmTransaction(transaction_id_2,
                                                sender=keys[wa_2])
        self.assertEqual(self.multisig_wallet.required(), new_required)
        self.assertEqual(
            self.multisig_wallet.getTransactionIds(0, 1, exclude_pending,
                                                   include_executed),
            [transaction_id_2])
        # Execution fails, because sender is not wallet owner
        self.assertRaises(TransactionFailed,
                          self.multisig_wallet.executeTransaction,
                          transaction_id,
                          sender=keys[9])
        # Because the # required confirmations changed to 1, the addOwner transaction can be executed now
        self.multisig_wallet.executeTransaction(transaction_id,
                                                sender=keys[wa_1])
        self.assertEqual(
            self.multisig_wallet.getTransactionIds(0, 2, exclude_pending,
                                                   include_executed),
            [transaction_id, transaction_id_2])
 def __init__(self, *args, **kwargs):
     super(TestContract, self).__init__(*args, **kwargs)
     self.s = t.state()
     self.s.block.number = self.HOMESTEAD_BLOCK
     t.gas_limit = 4712388
     self.pp = PreProcessor()