Exemplo n.º 1
0
def main():
    c: Configuration = Configuration()

    # set configuration params
    c.simulator_write_debug_level = DebugLevel.HIGH

    main_runner = MainRunner(c)
    analyzer = Analyzer(main_runner.main_services)
    analyzer.analyze_algorithms()
def main():
    opt = OptionParser()
    opt.parser.add_argument('target', help='target file path', type=str)
    opt_args = opt.get_commandline_arguments()
    LOG.reset_logger(opt_args)
    target = opt_args.target
    person_names = []
    is_debug = opt_args.debug
    analyzer = Analyzer()
    analyzer.execute(target, person_names, is_debug)
    print('>> FINISHED!!')
    return 0
Exemplo n.º 3
0
def analyze_movie(file):
    analyzer = Analyzer(file)
    video = Video.objects.create(file=file)

    for t, frame in enumerate(analyzer.get_histogram()):
        r, g, b = frame
        print("Frame:" + str(t))
        for num_bin in range(0, len(r)):
            Histogram.objects.create(video=video, t=t, num_bin=num_bin, bin_value=r[num_bin], color=Histogram.RED)
            Histogram.objects.create(video=video, t=t, num_bin=num_bin, bin_value=g[num_bin], color=Histogram.GREEN)
            Histogram.objects.create(video=video, t=t, num_bin=num_bin, bin_value=b[num_bin], color=Histogram.BLUE)
    print("Finished")
Exemplo n.º 4
0
    def run(self):
        if self.main_services.settings.generator:
            Generator.main(self)
        elif self.main_services.settings.trainer:
            Trainer.main(self)
        elif self.main_services.settings.analyzer:
            Analyzer.main(self)
        elif self.main_services.settings.load_simulator:
            simulator: Simulator = Simulator(self.main_services)
            simulator.start()

        if self.main_services.settings.clear_cache:
            self.main_services.resources.cache_dir.clear()
Exemplo n.º 5
0
    def __init__(self, gdrive_certificat_path):
        self.server = Flask(__name__,
                            static_url_path='',
                            static_folder='frontend/static')

        self.socketio = SocketIO(self.server)
        self.session_maker = run_orm()
        self.analyzer = Analyzer()
        self.drive = Drive(gdrive_certificat_path)

        self.server.route('/', methods=['GET'])(self.index)
        self.server.route('/upload', methods=['POST'])(self.upload_task)
        self.server.route('/end_check', methods=['POST'])(self.end_check)
Exemplo n.º 6
0
def main(argv):
    print("#" * 64)
    print("##" + " " * 23 + "KICKASS DISASS" + " " * 23 + "##")
    print("##" + " " * 60 + "##")
    print("##" + "Powered by Capstone {:d}.{:d} ##".format(
        CS_API_MAJOR, CS_API_MINOR).rjust(62))
    print("#" * 64 + "\n")

    if len(argv) <= 1:
        raise Exception("Usage: python3 main.py binary")

    loaders = load_modules()

    # open file and check for loaders that are able to disass the file
    with open(argv[1], 'rb') as f:
        valid_loaders = [l for l in loaders if l.accept_file(f)]

        if not valid_loaders:
            print("+ No loaders recognize the file")
            sys.exit(-1)

        if len(valid_loaders) >= 2:
            loader = prompt_user_for_loader(valid_loaders)
        else:
            loader = valid_loaders[0]

        # launch the actual analysis
        anal = Analyzer(loader, f)
Exemplo n.º 7
0
    def _analyze_and_output(self, src: Story, person_names: list,
                            is_debug: bool) -> ResultData:
        # serialize and compile as text
        mode = CompileMode.NOVEL_TEXT
        fmode = FormatMode.DEFAULT
        LOG.info('Serialize for Analyzer')
        result = assertion.is_instance(Serializer().execute(src, mode),
                                       ResultData)
        if not result.is_succeeded:
            return result
        tmp = assertion.is_instance(result.data, CodeList)
        LOG.info('Validate for Analyzer')
        result = assertion.is_instance(Validater().execute(tmp), ResultData)
        if not result.is_succeeded:
            return result
        tmp = assertion.is_instance(result.data, CodeList)
        LOG.info('Compile for Analyzer')
        result = assertion.is_instance(
            Compiler().execute(tmp, mode, {}, False, False), ResultData)
        if not result.is_succeeded:
            return result
        tmp = assertion.is_instance(result.data, RawData)
        LOG.info('Format for Analyzer')
        result = assertion.is_instance(Formatter().execute(tmp, fmode),
                                       ResultData)
        if not result.is_succeeded:
            return result
        tmp = assertion.is_instance(result.data, TextList)

        LOG.info('RUN: call Analyzer')
        result = Analyzer().execute(tmp, person_names, is_debug)
        return ResultData([], True, None)
Exemplo n.º 8
0
def _test(_dir):
    sample_names = get_sample_names(_dir)
    monitor_data = []

    first_http_response = None
    for sample_name in sample_names:
        http_response = HttpResponse(open("%s/%s" % (_dir, sample_name)).read())

        if first_http_response == None:
            first_http_response = http_response
        parsed_data = parser.parse_ms_monitor_result(http_response.body)
        parsed_data["sample_time"] = datetime.fromtimestamp(int(sample_name.split(".")[0]))

        monitor_data.append(parsed_data)

    az = Analyzer()
    result = az.do_analyze(monitor_data[0]["sample_time"], first_http_response, monitor_data[1:])

    return result
Exemplo n.º 9
0
def test_00():
    url = "http://10.33.0.57/tag_live_monitor/tvie/zhso/xfw"
    stream_id = 0

    _map = {}
    analyzer = Analyzer()
    http_client = MediaServerAdminQueryer(analyzer, stream_id, url, _map)

    asyncore.loop(map=_map)
    print http_client.response.body
Exemplo n.º 10
0
def _test(_dir):
    sample_names = get_sample_names(_dir)
    monitor_data = []

    first_http_response = None
    for sample_name in sample_names:
        http_response = HttpResponse(
            open("%s/%s" % (_dir, sample_name)).read())

        if first_http_response == None:
            first_http_response = http_response
        parsed_data = parser.parse_ms_monitor_result(http_response.body)
        parsed_data['sample_time'] = datetime.fromtimestamp(
            int(sample_name.split('.')[0]))

        monitor_data.append(parsed_data)

    az = Analyzer()
    result = az.do_analyze(monitor_data[0]['sample_time'], first_http_response,
                           monitor_data[1:])

    return result
Exemplo n.º 11
0
 def __init__(self):
     # Initialising fields
     self.parameters = {}
     # Loading config
     self.config = self.loadConfig()
     self.analyzer = Analyzer(self.config)
     global analyzer_global
     analyzer_global = self.analyzer
     # Selecting mode
     if len(sys.argv) > 1:
         self.parse_args()
         self.analyzer.error_occurred.connect(self.print_error)
         filename = self.parameters["input"]
         if not os.path.exists(filename):
             self.print_error("File {} does not exist".format(filename))
             sys.exit()
         try:
             text = self.analyzer.load_file(self.parameters["input"])
             if not self.analyzer.isTextValid(text):
                 self.print_error(
                     "File {} does not contain valid text".format(filename))
                 sys.exit()
         except Exception as e:
             self.print_error("Error loading file {}:\n{}".format(
                 filename, e))
             sys.exit()
         result = self.analyzer.analyze(text, self.parameters)
         if result is None:
             self.print_error("Unknown error occurred")
             sys.exit()
         result.save_to_file(self.parameters["output"],
                             self.parameters["threshold"],
                             n_digits=5)
         sys.exit(0)
     else:
         show_splashscreen()
         self.ui = GUI(analyzer=self.analyzer, config=self.config)
Exemplo n.º 12
0
def main(argv):

    if len(argv) < 2:
        raise Exception("Usage: python3 main.py filename (mode)")

    loaders = load_modules()

    # open file and check for loaders that are able to disass the file
    with open(argv[1], 'rb') as f:
        valid_loaders = [l for l in loaders if l.accept_file(f)]

        # TODO check whether elf or not
        exe = ELFExecutable(argv[1])
        exe._extract_symbol_table()

        if not valid_loaders:
            print("+ No loaders recognize the file")
            sys.exit(-1)

        if len(valid_loaders) >= 2:
            loader = prompt_user_for_loader(valid_loaders)
        else:
            loader = valid_loaders[0]

        fname = os.path.basename(f.name)
        if len(argv) >= 3:
            os.system("python3 init_main.py " + fname + " disasm_all_mode")
        else:
            os.system("python3 init_main.py " + fname)

        if not os.path.isdir("input/" + fname):
            print("Input file(input/" + fname + ") does not exist")
            sys.exit()
        """
        print("#" * 64)
        print("##" + " " * 23 + "KICKASS DISASS" + " " * 23 + "##")
        print("##" + " " * 60 + "##")
        print(
            "##" +
            "Powered by Capstone {:d}.{:d} ##".format(
                CS_API_MAJOR,
                CS_API_MINOR).rjust(62))
        print("#" * 64 + "\n")
        """

        if len(argv) >= 3:
            anal = Analyzer(loader, exe, f, argv[2])
        else:
            anal = Analyzer(loader, exe, f)
        anal.start()
class MainApp(Tk):
    # analyzer is completely isolated from UI, check the '/analyzer' module
    analyzer = Analyzer()

    def __init__(self):
        super(MainApp, self).__init__()
        self.setup()
        self.top_frame = TopFrame(self)
        self.body_frame = BodyFrame(self)

    def setup(self):
        self.title('GitHub User Performance Analyzer')
        self.configure(bg='white')
        self.minsize(0, 500)
        self.resizable(0, 0)
        self.grid_columnconfigure(0, weight=1)
        self.grid_rowconfigure(1, weight=1)

    @staticmethod
    def confirm(question, icon='question'):
        result = messagebox.askquestion("Confirmation", question, icon=icon)
        return result == 'yes'
Exemplo n.º 14
0
 def setUp(self):
     self.analyzer = Analyzer()
     self.supervisor = Supervisor()
     self.test_data = read_test_data()
Exemplo n.º 15
0
class PlagiarismTest(unittest.TestCase):
    def setUp(self):
        self.analyzer = Analyzer()
        self.supervisor = Supervisor()
        self.test_data = read_test_data()

    def tearDown(self):
        pass

    def test_find_similarity_groups_by_rows(self):
        similarity_matrix = np.asarray([[0, 6, 12, 8, 1], [5, 0, 4, 14, 9],
                                        [11, 5, 0, 7, 3], [7, 12, 8, 0, 14],
                                        [2, 10, 4, 12, 0]])

        result = self.analyzer.find_similarity_groups_by_rows(
            similarity_matrix)
        self.assertEqual(result[1], 1)
        self.assertEqual(result[2], 2)
        self.assertEqual(result[3], 1)
        self.assertEqual(result[4], 3)

    def test_find_similarity_groups_by_mean_group_similarity(self):
        similarity_matrix = np.asarray([[0, 6, 12, 8, 1], [5, 0, 4, 14, 9],
                                        [11, 5, 0, 7, 3], [7, 12, 8, 0, 14],
                                        [2, 10, 4, 12, 0]])
        result = self.analyzer.find_similarity_groups_by_mean_group_similarity(
            similarity_matrix)
        self.assertEqual(result[1], 1)
        self.assertEqual(result[2], 2)
        self.assertEqual(result[3], 1)
        self.assertEqual(result[4], 1)

    def test_analyze_test_via_supervisor(self):
        text = self.test_data["observer_test_text"]
        result = self.supervisor.markup(text)
        self.assertEqual(result.text, text)
        self.assertEqual(result.num_letters, 377)
        self.assertEqual(result.num_sentences, 10)
        self.assertEqual(result.num_words, 62)
        self.assertEqual(len(result.tokens), 62)
        self.assertEqual(len(result.morph_tokens), 62)

    def test_create_plagiarism_matrix(self):
        essays = self.test_data["plagiarism_test_essays"]
        essays = [self.supervisor.markup(essay) for essay in essays]
        matrix, coincidences = plagiarism.create_plagiarism_matrix(essays)

        self.assertEqual((matrix.shape == np.array([len(essays),
                                                    len(essays)])).all(), True)
        # для полюсного эссе номер 1
        self.assertEqual(matrix[0, 1] == 100,
                         True)  # эссе имеет сходство по 100% предолжений
        self.assertEqual(matrix[0, 2] > 33 and matrix[0, 2] < 65,
                         True)  # эссе имеет сходство по 50% предолжений
        self.assertEqual(matrix[0, 3] == 0, True)

        # для полюсного эссе номер 2
        self.assertEqual(matrix[3, 0] == 0, True)
        self.assertEqual(matrix[3, 1] == 0,
                         True)  # эссе имеет сходство по 0% предолжений
        self.assertEqual(matrix[3, 2] > 35 and matrix[3, 2] < 65,
                         True)  # эссе имеет сходство по 50% предолжений

    def test_similarity_matrix(self):
        essays = self.test_data["similarity_test_essays"]
        essays = [self.supervisor.markup(essay) for essay in essays]
        result = similarity.create_similarity_matrix(essays)

        self.assertEqual((result.shape == np.array([len(essays),
                                                    len(essays)])).all(), True)
        # для эссе номер 1
        self.assertEqual(result[0, 1] > 13, True)  # эссе на ту же тему
        self.assertEqual(result[0, 2] > 4 and result[0, 2] < 10,
                         True)  # эссе на пересекающуюся тему
        self.assertEqual(result[0, 3] < 4, True)  # эссе на обособленную тему

    def test_read_from_file(self):
        expected_text = self.test_data["lecture_reader_expected_text"].strip()
        presentation_text = lecture_reader.read_from_file(
            TEST_PRESENTATION_PATH).strip()
        self.assertEqual(presentation_text, expected_text)

    def test_analyzer(self):
        lecture = Presentation(TEST_PRESENTATION_PATH)
        lecture_text = read_from_presentation(lecture)
        essays = self.test_data["analyzer_test_essays"]
        pd_essays = pd.DataFrame(data=essays, columns=["text"])
        report = self.analyzer.analyze(lecture_text, pd_essays)
        self.assert_lecture(self.test_data["lecture_reader_expected_text"],
                            report.lecture, 176)
        self.assert_essay(essays[0], report.essays[0], GradeType.FAIL, 1,
                          [LabelType.FAIL, LabelType.LECTURE_PLAGIARISM], 302)
        self.assert_essay(essays[1], report.essays[1], GradeType.SUCCESS, 1,
                          [LabelType.SUCCESS], 338)
        self.assert_essay(essays[2], report.essays[2], GradeType.FAIL, 2,
                          [LabelType.FAIL], 246)

    def assert_lecture(self, text, lecture, num_words):
        self.assertEqual(lecture.text, text)
        self.assertEqual(lecture.statistic.num_letters, len(text))
        self.assertEqual(lecture.statistic.num_sentences,
                         len(sent_tokenize(text)))
        self.assertEqual(lecture.statistic.num_words, num_words)

    def assert_essay(self, text, essay, grade, group, labels, num_words):
        self.assertEqual(essay.text, text)
        self.assertEqual(essay.statistic.num_letters, len(text))
        self.assertEqual(essay.statistic.num_sentences,
                         len(sent_tokenize(text)))
        self.assertEqual(essay.statistic.num_words, num_words)
        self.assertEqual(essay.grade, grade)
        self.assertEqual(essay.group, group)
        self.assertEqual(len(labels), len(essay.labels))
        for label in essay.labels:
            if label.type in labels:
                labels.remove(label.type)
            else:
                self.assertEqual(True, False)

        self.assertEqual(len(labels), 0)

    def test_analyzer_incorrect_essay_list(self):
        lecture = Presentation(TEST_PRESENTATION_PATH)
        essays = self.test_data["analyzer_test_essays"]
        pd_essays = pd.DataFrame(data=essays, columns=["roles"])
        self.assertRaises(NotFoundEssayColumn, self.analyzer.analyze, lecture,
                          pd_essays)
Exemplo n.º 16
0
 def __init__(self, strategy_data):
     self.strategy_data = strategy_data
     self.analyzer = Analyzer(self)
     self.email_service = EmailService()
Exemplo n.º 17
0
class Strategy:
    def __init__(self, strategy_data):
        self.strategy_data = strategy_data
        self.analyzer = Analyzer(self)
        self.email_service = EmailService()

    def initialize(self, context):
        context.algo_id = self.strategy_data.get('algo_id')
        context.live_trading = self.strategy_data.get('live_trading')
        self.strategy_data.get('initialize')(context)
        if self.strategy_data.get('live_trading', False) is False:
            self.analyzer.initialize()
        else:
            self.email_service.initialize()

    def SendMessage(self, subject, message):
        if self.strategy_data.get('live_trading', False) is True:
            self.email_service.SendMessage(subject, message)

    def handle_data(self, context, data):
        self.strategy_data.get('handle_data')(context, data)
        if self.strategy_data.get('live_trading', False) is False:
            self.analyzer.handle_data(context)

    def analyze(self, context, data):
        print("Analyse method got called")
        self.strategy_data.get('analyze')(context, data)
        if self.strategy_data.get('live_trading', False) is False:
            self.analyzer.finalize()
        else:
            algo_id = self.strategy_data.get('algo_id')
            db_engine = create_engine('sqlite:///{}'.format(
                os.path.join(str(Path.home()), 'algodb.db')))
            prev_date_sql = "select date from prev_run_date where algo_id={}".format(
                algo_id)
            prev_run_date = pd.read_sql(prev_date_sql, db_engine)['date'][0]
            run_date = str(context.datetime.date())

            prev_pos_sql = "select holding_name, quantity, buy_price, last_price from daily_holdings " \
                           "where algo_id={} and date='{}'".format(algo_id, prev_run_date)
            prev_pos = pd.read_sql(prev_pos_sql, db_engine)
            if prev_pos.empty:
                prev_pos_list = []
            else:
                prev_pos_list = list(prev_pos['holding_name'])
            prev_pos.set_index('holding_name', inplace=True)
            curr_positions = context.portfolio.positions.values()
            stock_email_columns = [
                'Holding', 'Shares', 'Buy Price', 'Yest Price',
                'Current Price', 'Dollar Gain Today', 'Pct Gain Today',
                'Dollar Gain Net', 'Pct Gain Net', 'Market Value'
            ]
            stock_email = pd.DataFrame(columns=stock_email_columns)
            sold_list = []
            for position in list(curr_positions):
                if position.sid.symbol in prev_pos_list:
                    if position.amount == 0:
                        sold_list.append(position)
                        continue
                    prev_stock_pos = prev_pos.loc[position.sid.symbol]
                    gain_today = position.last_sale_price - prev_stock_pos[
                        'last_price']
                    pct_gain_today = str(
                        round(
                            (gain_today / prev_stock_pos['last_price']) * 100,
                            4)) + ' %'
                    gain_total = position.last_sale_price - position.cost_basis
                    pct_gain_total = str(
                        round((gain_total / position.cost_basis) * 100,
                              4)) + ' %'
                    stock_email.loc[position.asset] = [
                        position.asset.symbol, position.amount,
                        round(position.cost_basis, 4),
                        prev_stock_pos['last_price'], position.last_sale_price,
                        gain_today, pct_gain_today, gain_total, pct_gain_total,
                        position.amount * position.last_sale_price
                    ]
                else:
                    stock_email.loc[position.asset] = [
                        position.asset.symbol, position.amount,
                        round(position.cost_basis,
                              4), '-', position.last_sale_price, '-', '-', '-',
                        '-', position.amount * position.last_sale_price
                    ]

            portfolio = context.portfolio
            # stock_email = stock_email.join(pd.DataFrame(portfolio.current_portfolio_weights, columns=['Weightage']))
            # stock_email['Weightage'] = round(stock_email['Weightage'] * 100, 4).astype(str) + ' %'
            port_email = pd.Series([
                round(portfolio.portfolio_value, 4),
                round(portfolio.pnl, 4),
                str(
                    round(
                        portfolio.pnl /
                        (portfolio.portfolio_value - portfolio.pnl), 4)) +
                ' %',
                round(portfolio.cash, 4),
                round(portfolio.positions_value, 4)
            ],
                                   index=[
                                       'Portfolio Value', 'Net Gain',
                                       'Percent Net Gain', 'Cash Value',
                                       'Position Value'
                                   ])

            message = "<p><h3>Holdings Summary</h3></p>" + stock_email.to_html(index=False) \
                      + "<p><h3>Portfolio Summary</h3></p>" + pd.DataFrame(port_email).T.to_html(index=False)
            subject = '{} : Daily Summary - {}'.format(
                self.strategy_data.get('algo_name'), run_date)
            self.email_service.SendNotifications(subject, message)

            prev_run_update_sql = "update prev_run_date set date='{}' where algo_id={}".format(
                run_date, algo_id)
            with db_engine.connect() as connection:
                try:
                    for position in list(curr_positions):
                        if position in sold_list:
                            continue
                        insert_holding_sql = "Insert into daily_holdings (date, algo_id, holding_name, quantity, " \
                                             "buy_price, last_price) values ('{}',{},'{}',{},{},{})"\
                            .format(run_date, algo_id, position.sid.symbol, position.amount,
                                    round(position.cost_basis, 4), position.last_sale_price)
                        connection.execute(insert_holding_sql)
                    connection.execute(prev_run_update_sql)
                except Exception as e:
                    print(e)
            self.strategy_data.get('after_trading_end')(context, data)

    def before_trading_start(self, context, data):
        self.strategy_data.get('before_trading_start')(context, data)
        if self.strategy_data.get('live_trading', False) is False:
            self.analyzer.before_trading_start()

    def run_algorithm(self):
        live_trading = self.strategy_data.get('live_trading', False)
        # If live_trading true, trade with Virtual broker using database prices, else run normal backtest
        # database prices are updated from master_algo

        kwargs = {
            'start': self.strategy_data.get('start'),
            'end': self.strategy_data.get('end'),
            'initialize': self.initialize,
            'handle_data': self.handle_data,
            'analyze': self.analyze,
            'before_trading_start': self.before_trading_start,
            'bundle': 'quandl',
            'capital_base': self.strategy_data.get('capital_base'),
            'tws_uri': self.strategy_data.get('tws_uri'),
            'live_trading': live_trading
        }

        run_algo_thread = threading.Thread(target=run_algorithm, kwargs=kwargs)
        run_algo_thread.start()

        if self.strategy_data.get('live_trading', False) is False:
            self.analyzer.show_plot()
            sys.exit(self.analyzer.app.exec_())

        run_algo_thread.join()
Exemplo n.º 18
0
            threshold = self.main_window.opt_bar.threshold.value()
            result.save_to_file(filename=filename, threshold=threshold, n_digits=5)
        except Exception as e:
            self.error_occurred.emit("Не удалось экспортировать результат")

    def on_analysis_completed(self, result: Predict):
        params = self.main_window.opt_bar.options_to_dict()
        self.main_window.result_widget.show_output(result, params)


def show_splashscreen():
    splash = qw.QSplashScreen(QPixmap(":/Splash_email_v2.png"), qc.Qt.WindowStaysOnTopHint)
    time = qc.QTime()
    splash.show()
    time.start()
    while time.elapsed() <= 3000:
        pass
    splash.finish(None)


if __name__ == '__main__':
    from PyQt5.QtWidgets import QApplication
    import sys
    from configparser import ConfigParser

    a = QApplication(sys.argv)
    c = ConfigParser()
    c.read("../config.ini")
    g = GUI(config=c, analyzer=Analyzer(c))
    a.exec()
Exemplo n.º 19
0
def run(argv):
    if argv == 'downloader':
        Downloader.run()
    elif argv == 'analyzer':
        Analyzer().run()
Exemplo n.º 20
0
import datetime

from eye.eye import Eye
from analyzer.analyzer import Analyzer
from sender.sender import Sender
from config.config import Config

configuration = Config()

eye = Eye()
analyzer = Analyzer()
sender = Sender()

frame = eye.capture()
while True:
    data = analyzer.analyze(frame)

    sender.send(data)

    frame = eye.capture()
Exemplo n.º 21
0
def get_analyzer(holiday_plan_id):
    if HolidayPlan.query.filter_by(id=holiday_plan_id).scalar() is None:
        return Analyzer()        
    analyzer_as_json = \
        HolidayPlan.query.filter_by(id=holiday_plan_id).first().analyzer_as_json
    return Analyzer().from_json(analyzer_as_json)
Exemplo n.º 22
0
    def run_multiple(self):
        #Two options, generator
        if self.main_services.settings.generator and self.main_services.settings.trainer:
            Generator.main(self)
            Trainer.main(self)
        elif self.main_services.settings.generator and self.main_services.settings.analyzer:
            Generator.main(self)
            Analyzer.main(self)
        elif self.main_services.settings.generator and self.main_services.settings.load_simulator:
            Generator.main(self)
            simulator: Simulator = Simulator(self.main_services)
            simulator.start()
        #Three options, generator
        elif self.main_services.settings.generator and self.main_services.settings.trainer and self.main_services.settings.analyzer:
            Generator.main(self)
            Trainer.main(self)
            Analyzer.main(self)
        elif self.main_services.settings.generator and self.main_services.settings.trainer and self.main_services.settings.load_simulator:
            simulator: Simulator = Simulator(self.main_services)
            simulator.start()
            Generator.main(self)
            Trainer.main(self)
        #Four options
        elif self.main_services.settings.generator and self.main_services.settings.trainer and self.main_services.settings.analyzer and self.main_services.settings.load_simulator:
            simulator: Simulator = Simulator(self.main_services)
            simulator.start()
            Generator.main(self)
            Trainer.main(self)
            Analyzer.main(self)
        #Trainer
        elif self.main_services.settings.trainer and self.main_services.settings.analyzer:
            Trainer.main(self)
            Analyzer.main(self)
        elif self.main_services.settings.trainer and self.main_services.settings.load_simulator:
            simulator: Simulator = Simulator(self.main_services)
            simulator.start()
            Trainer.main(self)
        elif self.main_services.settings.trainer and self.main_services.settings.analyzer and self.main_services.settings.load_simulator:
            simulator: Simulator = Simulator(self.main_services)
            simulator.start()
            Trainer.main(self)
            Analyzer.main(self)
        #Analyzer
        elif self.main_services.settings.analyzer and self.main_services.settings.load_simulator:
            simulator: Simulator = Simulator(self.main_services)
            simulator.start()
            Analyzer.main(self)
        #Singles
        elif self.main_services.settings.generator:
            Generator.main(self)
        elif self.main_services.settings.trainer:
            Trainer.main(self)
        elif self.main_services.settings.analyzer:
            Analyzer.main(self)
        elif self.main_services.settings.load_simulator:
            simulator: Simulator = Simulator(self.main_services)
            simulator.start()

        if self.main_services.settings.clear_cache:
            self.main_services.resources.cache_dir.clear()
Exemplo n.º 23
0
args = vars(parser.parse_args())

print("\n\n")
print(datetime.datetime.now().strftime('%d %b %G %I:%M%p'))
print("\n")

params = dict()
params['index_by'] = 'titles'
params['enable_filtering'] = False
params['resources'] = "./res/"
params['similarity_threshold'] = .4
params['server_mode'] = False

# parse argument
if args['server']:
    params['server_mode'] = True


def print_configuration(params):
    print("Configuration:")
    print("\tIndex by: %s" % params['index_by'])
    print("\tFiltering enabled: %s" % repr(params['enable_filtering']))
    print("\tResource folder: %s" % params['resources'])
    print("\tSimilarity threshold: %.2f" % params['similarity_threshold'])
    print("\tServer mode enabled: %s" % repr(params['server_mode']))


print_configuration(params)

a = Analyzer(params)
a.begin()
Exemplo n.º 24
0
import sys

from parser.parser import Parser
from compiler.compiler import Compiler
from analyzer.analyzer import Analyzer

BUILTINS_FILE = "src/runtime/builtins_signatures"

if __name__ == '__main__':
    if len(sys.argv) != 3:
        puts(
            "Error! Provide source and target files as argunents to this script"
        )
        exit(1)

    code_file = sys.argv[1]
    target_file = sys.argv[2]

    builtins = BUILTINS_FILE

    ast = Parser(code_file, [builtins]).parse()
    if Analyzer().validate(ast):
        Compiler(target_file).compile(ast)
Exemplo n.º 25
0
def scan_file(path):
    path = os.path.normpath(path)
    tree = build_ast(path)
    analyzer = Analyzer(path)
    analyzer.visit(tree)
    analyzer.report()
Exemplo n.º 26
0
class EstimatorServer():
    def __init__(self, gdrive_certificat_path):
        self.server = Flask(__name__,
                            static_url_path='',
                            static_folder='frontend/static')

        self.socketio = SocketIO(self.server)
        self.session_maker = run_orm()
        self.analyzer = Analyzer()
        self.drive = Drive(gdrive_certificat_path)

        self.server.route('/', methods=['GET'])(self.index)
        self.server.route('/upload', methods=['POST'])(self.upload_task)
        self.server.route('/end_check', methods=['POST'])(self.end_check)

    def item_callback(self, current_item_id, count_items):
        self.socketio.emit(
            'changed-report-status',
            json.dumps({
                'status':
                'handling',
                'description':
                f'Обработано ответов из архива {current_item_id} из {count_items}'
            }))

    def load_lecture_file(self, file):
        extension = re.findall(r'\.\w+$', file.filename)[0]

        if extension == '.pptx':
            lecture = Presentation(file)
            return read_from_presentation(lecture)
        if extension == '.txt':
            wrapper = io.TextIOWrapper(file, encoding='utf-8')
            return wrapper.read()
        if extension == '.docx':
            lecture = docx.Document(file)
            fullText = []
            for para in lecture.paragraphs:
                fullText.append(para.text)
            return ' '.join(fullText)

        raise NotSupportLectureExtensionType(extension)

    def load_essays_file(self, file):
        extension = re.findall(r'\.\w+$', file.filename)[0]

        if extension == '.csv':
            return pd.read_csv(file)
        if extension == '.xlsx':
            return pd.read_excel(file)
        if extension == '.zip':
            archive = zipfile.ZipFile(file, 'r')
            return download_archive(self.drive, archive, self.item_callback)

        raise NotSupportEssayExtensionType(extension)

    def index(self):
        return self.server.send_static_file("index.html")

    def upload_task(self):
        try:
            if 'lecture' not in request.files or 'essays' not in request.files:
                return redirect(request.url)

            lecture = request.files['lecture']
            essays = request.files['essays']

            if lecture.filename == '' or essays.filename == '':
                return redirect(request.url)

            self.socketio.emit(
                'changed-report-status',
                json.dumps({
                    'status': 'handling',
                    "description": "Обработка файлов"
                }))

            lecture = self.load_lecture_file(lecture)
            essays = self.load_essays_file(essays)
            essays = essays.dropna(axis=0)

            self.socketio.emit(
                'changed-report-status',
                json.dumps({
                    'status': 'handling',
                    "description": "Оценка эссе"
                }))

            report = self.analyzer.analyze(lecture, essays)
            report_schema = ReportSchema()
            session = self.session_maker()
            session.add(report)
            session.commit()

            self.socketio.emit('changed-report-status',
                               json.dumps({'status': 'handled'}))
            return report_schema.dump(report)

        except Exception as e:
            print(e)
            print(traceback.print_exc())

            if type(e) == NotFoundEssayColumn or type(e) == NotSupportEssayExtensionType\
                    or type(e) == NotSupportLectureExtensionType:
                return json.dumps({"status": "error", "text": str(e)}), 500
            else:
                return json.dumps({
                    "status": "error",
                    "text": "Ошибка оценки загруженных эссе"
                }), 500

    def end_check(self):
        try:
            session = self.session_maker()
            report_schema = ReportSchema()
            json_data = json.loads(request.data)
            report = report_schema.load(data=json_data, session=session)

            if report.lecture is None:
                raise Exception("Received incorrect data. Report not found")

            session.commit()
            return json.dumps({
                "status":
                "success",
                "text":
                "Результаты проверки эссе успешно сохранены"
            })

        except Exception as e:
            print(e)
            print(traceback.print_exc())
            session.close()
            return json.dumps({
                "status": "error",
                "text": "Ошибка сохранения резултатов проверки"
            }), 500

    def start(self):
        self.socketio.run(self.server, host='localhost', port=5000)
Exemplo n.º 27
0
def main():
    parser = argparse.ArgumentParser(description="Gaussian Input Output HMM")

    parser.add_argument('--data',
                        type=str,
                        default='./dataset/ptb/',
                        help='location of the data corpus')
    parser.add_argument('--batch', type=int, default=256)
    parser.add_argument('--optim', choices=['sgd', 'adam'], default='adam')
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lr_decay',
                        type=float,
                        default=0.999995,
                        help='Decay rate of learning rate')
    parser.add_argument('--amsgrad', action='store_true', help='AMD Grad')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=0.001,
                        help='weight for l2 norm decay')
    parser.add_argument('--warmup_steps',
                        type=int,
                        default=0,
                        metavar='N',
                        help='number of steps to warm up (default: 0)')
    parser.add_argument('--var_scale', type=float, default=1.0)
    parser.add_argument('--log_dir',
                        type=str,
                        default='./output/' +
                        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S") +
                        "/")
    parser.add_argument('--dim', type=int, default=10)
    parser.add_argument('--gpu', action='store_true')
    parser.add_argument('--random_seed', type=int, default=10)
    parser.add_argument('--in_mu_drop', type=float, default=0.0)
    parser.add_argument('--in_cho_drop', type=float, default=0.0)
    parser.add_argument('--t_mu_drop', type=float, default=0.0)
    parser.add_argument('--t_cho_drop', type=float, default=0.0)
    parser.add_argument('--out_mu_drop', type=float, default=0.0)
    parser.add_argument('--out_cho_drop', type=float, default=0.0)
    parser.add_argument('--trans_cho_method',
                        type=str,
                        choices=['random', 'wishart'],
                        default='random')
    parser.add_argument(
        '--input_cho_init',
        type=float,
        default=0.0,
        help=
        'init method of input cholesky matrix. 0 means random. The other score means constant'
    )
    parser.add_argument(
        '--trans_cho_init',
        type=float,
        default=1.0,
        help='init added scale of random version init_cho_init')
    parser.add_argument(
        '--output_cho_init',
        type=float,
        default=0.0,
        help=
        'init method of output cholesky matrix. 0 means random. The other score means constant'
    )
    # i_comp_num = 1, t_comp_num = 1, o_comp_num = 1, max_comp = 1,
    parser.add_argument('--input_comp_num',
                        type=int,
                        default=1,
                        help='input mixture gaussian component number')
    parser.add_argument('--tran_comp_num',
                        type=int,
                        default=1,
                        help='transition mixture gaussian component number')
    parser.add_argument('--output_comp_num',
                        type=int,
                        default=1,
                        help='output mixture gaussian component number')
    parser.add_argument(
        '--threshold',
        type=float,
        default=1.0,
        help=
        'pruning hyper-parameter, greater than 1 is max component, less than 1 is max value'
    )
    parser.add_argument('--unk_replace',
                        type=float,
                        default=0.0,
                        help='The rate to replace a singleton word with UNK')
    parser.add_argument('--tran_weight', type=float, default=0.0001)
    parser.add_argument('--input_weight', type=float, default=0.0)
    parser.add_argument('--output_weight', type=float, default=0.0)
    parser.add_argument('--emission_cho_grad', type=bool, default=False)
    parser.add_argument('--transition_cho_grad', type=bool, default=True)
    parser.add_argument('--decode_cho_grad', type=bool, default=False)
    parser.add_argument('--gaussian_decode', action='store_true')
    parser.add_argument('--analysis', action='store_true')
    parser.add_argument('--sep_normalize', type=float, default=0.01)

    args = parser.parse_args()

    np.random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    random.seed(args.random_seed)

    log_dir = args.log_dir

    # setting optimizer
    optim = args.optim
    batch_size = args.batch
    # optim = 'sgd'
    lr = args.lr
    lr_decay = args.lr_decay
    warmup_steps = args.warmup_steps
    amsgrad = args.amsgrad
    weight_decay = args.weight_decay

    root = args.data
    in_mu_drop = args.in_mu_drop
    in_cho_drop = args.in_cho_drop
    t_mu_drop = args.t_mu_drop
    t_cho_drop = args.t_cho_drop
    out_mu_drop = args.out_mu_drop
    out_cho_drop = args.out_cho_drop
    tran_cho_method = args.trans_cho_method
    input_cho_init = args.input_cho_init
    trans_cho_init = args.trans_cho_init
    output_cho_init = args.output_cho_init
    input_num_comp = args.input_comp_num
    tran_num_comp = args.tran_comp_num
    output_num_comp = args.output_comp_num
    threshold = args.threshold
    unk_replace = args.unk_replace
    normalize_weight = [
        args.tran_weight, args.input_weight, args.output_weight
    ]
    gaussian_decode = args.gaussian_decode
    sep_normalize = args.sep_normalize

    analysis = args.analysis

    EMISSION_CHO_GRAD = args.emission_cho_grad
    TRANSITION_CHO_GRAD = args.transition_cho_grad
    DECODE_CHO_GRAD = args.decode_cho_grad

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    # save parameter
    save_parameter_to_json(log_dir, vars(args))

    logger = get_logger('Sequence-Labeling')
    change_handler(logger, log_dir)
    # logger = LOGGER
    logger.info(args)

    device = torch.device('cuda') if args.gpu else torch.device('cpu')

    # Loading data
    logger.info('Load PTB data....')
    alphabet_path = os.path.join(root, 'alphabets')
    train_path = os.path.join(root, 'train.conllu')
    dev_path = os.path.join(root, 'dev.conllu')
    test_path = os.path.join(root, 'test.conllu')
    word_alphabet, char_alphabet, pos_alphabet, type_alphabet = conllx_data.create_alphabets(
        alphabet_path,
        train_path,
        data_paths=[dev_path, test_path],
        embedd_dict=None,
        max_vocabulary_size=1e5,
        min_occurrence=1)

    train_dataset = conllx_data.read_bucketed_data(train_path, word_alphabet,
                                                   char_alphabet, pos_alphabet,
                                                   type_alphabet)
    num_data = sum(train_dataset[1])
    dev_dataset = conllx_data.read_data(dev_path, word_alphabet, char_alphabet,
                                        pos_alphabet, type_alphabet)
    test_dataset = conllx_data.read_data(test_path, word_alphabet,
                                         char_alphabet, pos_alphabet,
                                         type_alphabet)

    logger.info("Word Alphabet Size: %d" % word_alphabet.size())
    logger.info("Character Alphabet Size: %d" % char_alphabet.size())
    logger.info("POS Alphabet Size: %d" % pos_alphabet.size())
    ntokens = word_alphabet.size()
    nlabels = pos_alphabet.size()

    # init analyzer
    if analysis:
        analyzer = Analyzer(word_alphabet=word_alphabet,
                            pos_alphabet=pos_alphabet)
    else:
        analyzer = None

    # build model
    if threshold >= 1.0:
        model = MixtureGaussianSequenceLabeling(
            dim=args.dim,
            ntokens=ntokens,
            nlabels=nlabels,
            t_cho_method=tran_cho_method,
            t_cho_init=trans_cho_init,
            in_cho_init=input_cho_init,
            out_cho_init=output_cho_init,
            in_mu_drop=in_mu_drop,
            in_cho_drop=in_cho_drop,
            t_mu_drop=t_mu_drop,
            t_cho_drop=t_cho_drop,
            out_mu_drop=out_mu_drop,
            out_cho_drop=out_cho_drop,
            i_comp_num=input_num_comp,
            t_comp_num=tran_num_comp,
            o_comp_num=output_num_comp,
            max_comp=int(threshold),
            gaussian_decode=gaussian_decode)
    else:
        model = ThresholdPruningMGSL(dim=args.dim,
                                     ntokens=ntokens,
                                     nlabels=nlabels,
                                     t_cho_method=tran_cho_method,
                                     t_cho_init=trans_cho_init,
                                     in_cho_init=input_cho_init,
                                     out_cho_init=output_cho_init,
                                     in_mu_drop=in_mu_drop,
                                     in_cho_drop=in_cho_drop,
                                     t_mu_drop=t_mu_drop,
                                     t_cho_drop=t_cho_drop,
                                     out_mu_drop=out_mu_drop,
                                     out_cho_drop=out_cho_drop,
                                     i_comp_num=input_num_comp,
                                     t_comp_num=tran_num_comp,
                                     o_comp_num=output_num_comp,
                                     threshold=threshold,
                                     gaussian_decode=gaussian_decode)

    # model = RNNSequenceLabeling("LSTM", ntokens=ntokens, nlabels=nlabels, ninp=args.dim, nhid=args.dim, dropout=in_mu_drop)
    # model = WeightIOHMM(vocab_size=ntokens, nlabel=nlabels, num_state=100)
    model.to(device)
    logger.info('Building model ' + model.__class__.__name__ + '...')
    # optimizer = optim.Adam(model.parameters(), lr=lr)
    parameters_need_update = filter(lambda p: p.requires_grad,
                                    model.parameters())
    optimizer, scheduler = get_optimizer(parameters_need_update,
                                         optim,
                                         lr,
                                         amsgrad,
                                         weight_decay,
                                         lr_decay=lr_decay,
                                         warmup_steps=warmup_steps)
    # depend on dev ppl
    best_epoch = (-1, 0.0, 0.0)
    num_batches = num_data // batch_size + 1

    def train(best_epoch, thread=6):
        epoch = 0
        while epoch - best_epoch[0] <= thread:
            epoch_loss = 0
            num_back = 0
            num_words = 0
            num_insts = 0
            model.train()
            for step, data in enumerate(
                    iterate_data(train_dataset,
                                 batch_size,
                                 bucketed=True,
                                 unk_replace=unk_replace,
                                 shuffle=True)):
                # for j in tqdm(range(math.ceil(len(train_dataset) / batch_size))):
                optimizer.zero_grad()
                # samples = train_dataset[j * batch_size: (j + 1) * batch_size]
                words, labels, masks = data['WORD'].to(device), data['POS'].to(
                    device), data['MASK'].to(device)
                loss = 0.0
                if threshold >= 1.0:
                    # sentences, labels, masks, revert_order = standardize_batch(samples)
                    loss = model.get_loss(words,
                                          labels,
                                          masks,
                                          normalize_weight=normalize_weight,
                                          sep_normalize=sep_normalize)
                else:
                    for i in range(batch_size):
                        loss += model.get_loss(
                            words[i],
                            labels[i],
                            masks[i],
                            normalize_weight=normalize_weight,
                            sep_normalize=sep_normalize)
                # loss = model.get_loss(words, labels, masks)
                loss.backward()
                optimizer.step()
                scheduler.step()
                epoch_loss += (loss.item()) * words.size(0)
                num_words += torch.sum(masks).item()
                num_insts += words.size()[0]
                if step % 10 == 0:
                    torch.cuda.empty_cache()
                    sys.stdout.write("\b" * num_back)
                    sys.stdout.write(" " * num_back)
                    sys.stdout.write("\b" * num_back)
                    curr_lr = scheduler.get_lr()[0]
                    log_info = '[%d/%d (%.0f%%) lr=%.6f] loss: %.4f (%.4f)' % (
                        step, num_batches, 100. * step / num_batches, curr_lr,
                        epoch_loss / num_insts, epoch_loss / num_words)
                    sys.stdout.write(log_info)
                    sys.stdout.flush()
                    num_back = len(log_info)
            logger.info('Epoch ' + str(epoch) + ' Loss: ' +
                        str(round(epoch_loss / num_insts, 4)))
            if threshold >= 1.0:
                acc, _ = evaluate(dev_dataset, batch_size, model, device)
            else:
                acc, _ = evaluate(dev_dataset, 1, model, device)
            logger.info('\t Dev Acc: ' + str(round(acc * 100, 3)))
            if analysis:
                analyse(model,
                        dev_dataset,
                        batch_size,
                        device,
                        analyzer,
                        log_dir + '/dev_' + str(epoch),
                        buckted=False)
                analyse(model,
                        test_dataset,
                        batch_size,
                        device,
                        analyzer,
                        log_dir + '/test_' + str(epoch),
                        buckted=False)

            if best_epoch[1] < acc:
                test_acc, _ = evaluate(test_dataset, batch_size, model, device)
                logger.info('\t Test Acc: ' + str(round(test_acc * 100, 3)))
                best_epoch = (epoch, acc, test_acc)
                patient = 0
            else:
                patient += 1
            epoch += 1
            if patient > 4:
                print('reset optimizer momentums')
                scheduler.reset_state()
                patient = 0

        logger.info("Best Epoch: " + str(best_epoch[0]) + " Dev ACC: " +
                    str(round(best_epoch[1] * 100, 3)) + "Test ACC: " +
                    str(round(best_epoch[2] * 100, 3)))
        return best_epoch

    best_epoch = train(best_epoch, thread=10)
    # logger.info("After tunning mu. Here we tunning variance")
    # # flip gradient
    #
    # for parameter in model.parameters():
    #     # flip
    #     parameter.requires_grad = not parameter.requires_grad

    # best_epoch = train(best_epoch)
    with open(log_dir + '/' + 'result.json', 'w') as f:
        final_result = {
            "Epoch": best_epoch[0],
            "Dev": best_epoch[1] * 100,
            "Test": best_epoch[2] * 100
        }
        json.dump(final_result, f)
Exemplo n.º 28
0
class ATC:
    section = "AvailableOptions"

    def __init__(self):
        # Initialising fields
        self.parameters = {}
        # Loading config
        self.config = self.loadConfig()
        self.analyzer = Analyzer(self.config)
        global analyzer_global
        analyzer_global = self.analyzer
        # Selecting mode
        if len(sys.argv) > 1:
            self.parse_args()
            self.analyzer.error_occurred.connect(self.print_error)
            filename = self.parameters["input"]
            if not os.path.exists(filename):
                self.print_error("File {} does not exist".format(filename))
                sys.exit()
            try:
                text = self.analyzer.load_file(self.parameters["input"])
                if not self.analyzer.isTextValid(text):
                    self.print_error(
                        "File {} does not contain valid text".format(filename))
                    sys.exit()
            except Exception as e:
                self.print_error("Error loading file {}:\n{}".format(
                    filename, e))
                sys.exit()
            result = self.analyzer.analyze(text, self.parameters)
            if result is None:
                self.print_error("Unknown error occurred")
                sys.exit()
            result.save_to_file(self.parameters["output"],
                                self.parameters["threshold"],
                                n_digits=5)
            sys.exit(0)
        else:
            show_splashscreen()
            self.ui = GUI(analyzer=self.analyzer, config=self.config)

    def parse_args(self):
        description = "Automated Text Classifier for VINITI. Чтобы запустить графический сеанс, " \
                      "запустите программу без аргументов"
        argparser = ArgumentParser(prog="ATC", description=description)
        formats = self.config.get(self.section, "formats").split(", ")
        languages = self.config.get(self.section, "languages").split(", ")
        norm_options = self.config.get(self.section,
                                       "norm_predict").split(", ")
        argparser.add_argument("-i",
                               "--input",
                               help="полный путь к файлу с текстом",
                               required=True)
        # type=unescaped_str
        argparser.add_argument(
            "-o",
            "--output",
            help="полный путь к файлу, в который будет записан результат",
            required=True)
        argparser.add_argument("-id",
                               "--rubricator-id",
                               help="идентификатор рубрикатора",
                               required=True)
        argparser.add_argument("-f",
                               "--format",
                               help="формат входного файла",
                               choices=formats,
                               required=False)
        argparser.add_argument("-l",
                               "--language",
                               help="язык входного текста",
                               choices=languages,
                               required=True)
        argparser.add_argument("-t",
                               "--threshold",
                               help="пороговое значение вероятности. " +
                               "Ответы классификатора с вероятностью ниже " +
                               "заданной выведены не будут",
                               default=0.0,
                               type=float,
                               required=False)
        argparser.add_argument(
            "-n",
            "--normalize",
            help="нормировать ли предсказание классификатора",
            choices=norm_options,
            required=False,
            default="not")
        subparsers = argparser.add_subparsers(help="Commands")
        # Creating server command
        server_parser = subparsers.add_parser("server",
                                              help="запустить режим сервера")
        server_parser.add_argument(
            "port",
            help="номер порта, на котором запустить сервер",
            action=LaunchServer,
            type=int)

        self.parameters = vars(argparser.parse_args())

    @staticmethod
    def print_error(error_msg: str):
        print(error_msg, file=sys.stderr)

    @staticmethod
    def loadConfig():
        parser = ConfigParser()
        parser.read([os.path.join(os.path.dirname(__file__), "config.ini")],
                    encoding="utf-8")
        return parser