Пример #1
0
    def __separate_data(self, data_filename):
        data_analyzer = DataAnalyzer(data_filename)
        self.mean_list = data_analyzer.get_mean_list()
        self.stdev_list = data_analyzer.get_stdev_list()

        # feature scale, and add column of 1s to X
        all_X = data_analyzer.X
        all_X = self.__apply_feature_scaling(all_X)
        all_X = np.c_[np.ones(all_X.shape[0]), all_X]
        all_Y = data_analyzer.Y
        all_data = np.c_[all_X, all_Y]

        np.random.shuffle(all_data)
        split_row_index = int(all_data.shape[0] *
                              0.8)  # top 80% of rows will be for training

        training_data = all_data[:split_row_index, :]
        validation_data = all_data[split_row_index:, :]

        self.training_X = training_data[:, :-1]
        self.training_Y = training_data[:, -1]
        self.training_Y = self.training_Y.reshape(self.training_Y.shape[0], 1)

        self.validation_X = validation_data[:, :-1]
        self.validation_Y = validation_data[:, -1]
        self.validation_Y = self.validation_Y.reshape(
            self.validation_Y.shape[0], 1)
Пример #2
0
 def __init__(self,
              path,
              start_date,
              interested_symbols,
              interested_start_date,
              interested_end_date,
              num_std=3,
              frequency=FrequencyMap.Minute,
              forward=True,
              model_type='CloseToClose',
              clean=True):
     """
     :param path: str
     :param start_date: datetime.date
     :param interested_symbols: list[str]
     :param interested_start_date: datetime.date
     :param interested_end_date: datetime.date
     :param num_std: int
     :param frequency: FrequencyMap
     :param forward: boolean
     :param model_type: str
     :param clean: boolean
     """
     self.interested_symbols = interested_symbols
     self.interested_start_date = interested_start_date
     self.interested_end_date = interested_end_date
     self.frequency = frequency
     self.analysis_window = 30 if frequency is not FrequencyMap.Month else 10
     self.loader = DataLoader(path, start_date)
     self.pre_process = DataPreProcessor(num_std, frequency, forward)
     self.estimator = VolatilityEstimator(model_type, clean, frequency)
     self.data_analyzer = DataAnalyzer()
     self.model = Garch11('Constant', 'Garch')
     self.error_estimator = ErrorEstimator(self.model, self.estimator,
                                           self.frequency)
Пример #3
0
 def on_pre_enter(self):
     try:
         self.Dm = DataAnalyzer('data/tweets.json')
         self.Dm.create_dataframe()
         self.Dm.sentiment_analysis()
         self.Dm.create_csv()
         self.Dm.create_graphs()
     except:
         pass
Пример #4
0
 def __init_training_data(self, training_filename):
     data_analyzer = DataAnalyzer(training_filename)
     self.mean_list = data_analyzer.get_mean_list()
     self.stdev_list = data_analyzer.get_stdev_list()
     # feature scale, and add column of 1s to X
     self.training_X = data_analyzer.X
     self.training_X = self.__apply_feature_scaling(self.training_X)
     self.training_X = np.c_[np.ones(self.training_X.shape[0]),
                             self.training_X]
     self.training_Y = data_analyzer.Y
Пример #5
0
def main():
	# check argv
	if len(sys.argv) != 2:
		print('usage: ' + Fore.RED + 'python3' + Fore.BLUE + ' histogram.py ' + Fore.RESET + 'data_file.csv')
		sys.exit(-1)
	data_file = sys.argv[1]
	try:
		data_analyzer = DataAnalyzer(data_file)
		data_analyzer.show_histograms()
	except IOError as e:
		print(Style.BRIGHT + Fore.RED + 'I/O Error: ' + Style.RESET_ALL + Fore.RESET + str(e))
	except ParserException as e:
		print(Style.BRIGHT + Fore.RED + 'ParserException: ' + Style.RESET_ALL + Fore.RESET + str(e))
Пример #6
0
class Task:
    """
    Core class of this volatility library
    It schedules each component.
    Read raw data.
    Pre process data.
    Analyze data.
    Make prediction.
    Output results.
    """
    def __init__(self, path, start_date, interested_symbols, interested_start_date, interested_end_date,
                 num_std=3, frequency=FrequencyMap.Minute, forward=True, model_type='CloseToClose', clean=True):
        """
        :param path: str
        :param start_date: datetime.date
        :param interested_symbols: list[str]
        :param interested_start_date: datetime.date
        :param interested_end_date: datetime.date
        :param num_std: int
        :param frequency: FrequencyMap
        :param forward: boolean
        :param model_type: str
        :param clean: boolean
        """
        self.interested_symbols = interested_symbols
        self.interested_start_date = interested_start_date
        self.interested_end_date = interested_end_date
        self.frequency = frequency
        self.analysis_window = 30 if frequency is not FrequencyMap.Month else 10
        self.loader = DataLoader(path, start_date)
        self.pre_process = DataPreProcessor(num_std, frequency, forward)
        self.estimator = VolatilityEstimator(model_type, clean, frequency)
        self.data_analyzer = DataAnalyzer()
        self.model = Garch11('Constant', 'Garch')
        self.error_estimator = ErrorEstimator(self.model, self.estimator, self.frequency)

    def execute(self):
        self.loader.load()
        output = pd.DataFrame()
        for symbol in self.interested_symbols:
            df = self.loader.fetch(symbol, self.interested_start_date, self.interested_end_date)
            df = self.pre_process.pre_process(df)
            self.data_analyzer.analyze_data(df.copy())
            self.estimator.analyze_realized_vol(df, self.interested_start_date, self.interested_end_date, self.analysis_window)
            sample_size, error = self.error_estimator.get_best_sample_size(df)
            predictions = self.model.get_predictions(df, sample_size, self.frequency)
            output[symbol] = predictions[TimeSeriesDataFrameMap.Volatility]
            index = predictions.index
        output.set_index(index)
        file_name = r'D:\programming\VOL\{frequency}_predictions.csv'.format(frequency=self.frequency)
        output.to_csv(file_name)
Пример #7
0
class DataDisplay(Screen):
    pos_wc = ObjectProperty(None)
    neg_wc = ObjectProperty(None)
    pie_chart = ObjectProperty(None)
    line_chart = ObjectProperty(None)

    def on_pre_enter(self):
        try:
            self.Dm = DataAnalyzer('data/tweets.json')
            self.Dm.create_dataframe()
            self.Dm.sentiment_analysis()
            self.Dm.create_csv()
            self.Dm.create_graphs()
        except:
            pass

    def on_enter(self):
        if os.path.exists('graphs/pos.png'):
            self.pos_wc.source = 'graphs/pos.png'
            self.pos_wc.reload()
        if os.path.exists('graphs/neg.png'):
            self.neg_wc.source = 'graphs/neg.png'
            self.neg_wc.reload()
        if os.path.exists('graphs/pie.png'):
            self.pie_chart.source = 'graphs/pie.png'
            self.pie_chart.reload()
        if os.path.exists('graphs/line.png'):
            self.line_chart.source = 'graphs/line.png'
            self.line_chart.reload()
Пример #8
0
	def train_data(self, filename):
		print(Style.BRIGHT + 'Training model for data in ' + Fore.MAGENTA + filename + Fore.RESET + Style.RESET_ALL)
		data_analyzer = DataAnalyzer(filename)
		self.__sample_count = data_analyzer.sample_count
		Y_labeled = data_analyzer.Y_labeled
		self.__X = data_analyzer.X[:, 9:]
		self.__mean_list = [data_analyzer.all_descriptions[i].mean for i in range(9, len(data_analyzer.all_descriptions))]
		self.__stdev_list = [data_analyzer.all_descriptions[i].standard_deviation for i in range(9, len(data_analyzer.all_descriptions))]

		# Apply feature scaling
		for i in range(self.__X.shape[1]):
			self.__X[:, i] = (self.__X[:, i] - self.__mean_list[i]) / self.__stdev_list[i]
		self.__X = np.c_[np.ones(self.__sample_count), self.__X]

		# Find theta for a model that determines GRYFFINDOR or NOT
		print(Style.BRIGHT + Fore.BLUE + 'Running gradient descent to determine GRYFFINDOR or NOT...' + Style.RESET_ALL + Fore.RESET)
		theta_gryffindor = self.__run_gradient_descent(np.where(Y_labeled == 'Gryffindor', 1, 0))
		# Find theta for a model that determines HUFFLEPUFF or NOT
		print(Style.BRIGHT + Fore.BLUE + 'Running gradient descent to determine HUFFLEPUFF or NOT...' + Style.RESET_ALL + Fore.RESET)
		theta_hufflepuff = self.__run_gradient_descent(np.where(Y_labeled == 'Hufflepuff', 1, 0))
		# Find theta for a model that determines RAVENCLAW or NOT
		print(Style.BRIGHT + Fore.BLUE + 'Running gradient descent to determine RAVENCLAW or NOT...' + Style.RESET_ALL + Fore.RESET)
		theta_ravenclaw = self.__run_gradient_descent(np.where(Y_labeled == 'Ravenclaw', 1, 0))
		# Find theta for a model that determines SLYTHERIN or NOT
		print(Style.BRIGHT + Fore.BLUE + 'Running gradient descent to determine SLYTHERIN or NOT...' + Style.RESET_ALL + Fore.RESET)
		theta_slytherin = self.__run_gradient_descent(np.where(Y_labeled == 'Slytherin', 1, 0))
		# Merge all theta in one matrix
		self.__theta = np.c_[theta_gryffindor, theta_hufflepuff, theta_ravenclaw, theta_slytherin]
		self.__save_weights_file()
Пример #9
0
 def __init__(self, path, start_date, interested_symbols, interested_start_date, interested_end_date,
              num_std=3, frequency=FrequencyMap.Minute, forward=True, model_type='CloseToClose', clean=True):
     """
     :param path: str
     :param start_date: datetime.date
     :param interested_symbols: list[str]
     :param interested_start_date: datetime.date
     :param interested_end_date: datetime.date
     :param num_std: int
     :param frequency: FrequencyMap
     :param forward: boolean
     :param model_type: str
     :param clean: boolean
     """
     self.interested_symbols = interested_symbols
     self.interested_start_date = interested_start_date
     self.interested_end_date = interested_end_date
     self.frequency = frequency
     self.analysis_window = 30 if frequency is not FrequencyMap.Month else 10
     self.loader = DataLoader(path, start_date)
     self.pre_process = DataPreProcessor(num_std, frequency, forward)
     self.estimator = VolatilityEstimator(model_type, clean, frequency)
     self.data_analyzer = DataAnalyzer()
     self.model = Garch11('Constant', 'Garch')
     self.error_estimator = ErrorEstimator(self.model, self.estimator, self.frequency)
    def sentiment_dost_analysis(self, massive_comments):

        if massive_comments == False:
            return 'There are no comments on the video, and we cannot analyze it at the moment'
        else:
            analysis_dictionary = DataAnalyzer.get_dost_analysis(
                massive_comments)
            self.analysis_string += f"Анализ третьей(положительные) - {analysis_dictionary['positive']}, анализ третьей(негативные) - {analysis_dictionary['negative']}"
Пример #11
0
def make_la_figs(stock_data):
    da = DataAnalyzer(stock_data)
    data = da.normalize_dataframe()

    # making the figures
    # normalized data
    da.plot_data(data,
                 'months', 
                 'stock data', 
                 'Stock Data',
                 '../data/figures/no_all.png')
    # mean of normalized data
    da.plot_data(data.mean(axis=1),
                 'months', 
                 'stock data', 
                 'Stock Data',
                 '../data/figures/no_mean.png')
Пример #12
0
def make_graphs(stock_data):
    da = DataAnalyzer(stock_data.data)
    data = da.normalize_dataframe()

    # making the figures
    # norms
    da.plot_data(data,
                 'months', 
                 'stock data',
                 'Test',
                 '../data/figures/all.png')
    # mean of the norms             
    da.plot_data(data.mean(axis=1),
                 'months', 
                 'stock data mean', 
                 'Price Means', 
                 '../data/figures/mean.png') 
Пример #13
0
import click
import sys
from tkinter import *
from data_analyzer import DataAnalyzer

data_analyzer = DataAnalyzer()


@click.group()
def analyzer():
    """This is the cli for analyzing the data of the rated movies by the users"""


@analyzer.command()
def get_ratings_movies():
    """Get a joined table of ratings and movies"""
    click.echo(data_analyzer.ratings_movies)


@analyzer.command()
def get_ratings_users():
    """Get a joined table of ratings and users"""
    click.echo(data_analyzer.ratings_users)


@analyzer.command()
def get_ratings_movies_users():
    """Get a joined table of ratings, movies and users"""
    click.echo(data_analyzer.ratings_movies_users)

from .form import Form
from tkinter import *
from data_analyzer import DataAnalyzer

data_analyzer = DataAnalyzer.get_instance()


class CompareTwoMoviesByTitlesForm(Form):
    __instance = None

    @staticmethod
    def get_instance():
        if CompareTwoMoviesByTitlesForm.__instance is None:
            CompareTwoMoviesByTitlesForm()
        return CompareTwoMoviesByTitlesForm.__instance

    def __init__(self):
        self._labels = {
            'Title One': StringVar(),
            'Title Two': StringVar(),
            'Graphic Results': BooleanVar()
        }

        self._dropdowns = {'Title One', 'Title Two'}

        self._data = None

        if CompareTwoMoviesByTitlesForm.__instance is not None:
            raise Exception("This class is a singleton!")
        else:
            CompareTwoMoviesByTitlesForm.__instance = self
Пример #15
0
 def make_graph(self, data):
     analyzer = DataAnalyzer(data)
     data_norm = analyzer.normalize_dataframe()
     analyzer.plot_data(data_norm, 'month', 'stuff', 'title stuff',
                        '../data/figures/local_eco.png')
Пример #16
0
class ArmAnalyzer(Analyzer):
    """Analyzer 30: in-line assembling of low-level code and data into the gdb's arm simulator"""

    # definition of subanalyzers
    address_analyzer = AddressAnalyzer()
    data_analyzer = DataAnalyzer()
    instdat_analyzer = InstdatAnalyzer()
    instmul_analyzer = InstmulAnalyzer()
    instjmp_analyzer = InstjmpAnalyzer()
    instmem_analyzer = InstmemAnalyzer()
    instmsc_analyzer = InstmscAnalyzer()

    # definition of internal helper functions (methods)
    def update_address(self, match):
        override = 0
        if match:  # new address = specified address + size of elements * number of elements
            result = self.result  # helper variable to avoid using 'self.' so many times
            rid = 0 if len(result) == 2 else 2  # reference index (2 in case of relative PC addressing mode)
            if result[rid] % result[rid + 1][0] != 0:  # if current address is misaligned
                previous_address = result[rid]  # compute the remainder positions up to the next aligned address
                remainder_shift = result[rid + 1][0] - (previous_address % self.result[rid + 1][0])
                alignment_type = 'halfword' if (result[rid + 1][0] == 2) else 'word'
                if self.state == 2:  # in case of implicit address setting
                    result[rid] = previous_address + remainder_shift  # update current address
                    if result[rid] >= 2 ** 32:
                        override = -4006  # overriding error: too big address error after alignment
                    if rid == 2:  # if there is a second address + data entry
                        result[0] = result[0] + remainder_shift  # also move forward the second address
                        if result[0] >= 2 ** 32:
                            override = -4006  # second overriding error: too big address error after alignment
                    print "WARNING: implicit %s address automatically aligned skipping %d position%s,\n\tfrom 0x%X to 0x%X" \
                          % (alignment_type, remainder_shift, 's' if remainder_shift > 1 else '',
                             previous_address, result[rid])
                else:  # in case of explicit address (self.state == 1)
                    print "WARNING: explicit %s address misaligned by %d position%s" \
                          % (alignment_type, result[rid + 1][0] - remainder_shift,
                             's' if (result[rid + 1][0] - remainder_shift) > 1 else '')
                    # update the implicit address for next instructions
            self.next_address = result[rid] + result[rid + 1][0] * (len(result[rid + 1]) - 1)
        return override

    # definition of internal transition actions (methods)
    def implicit_address(self, match, sub_result, sub_state, super_result):
        if match:  # use the current address as starting address
            self.result.append(self.next_address)
        return 0

    def stack_info(self, match, sub_result, sub_state, super_result):
        override = 0
        if match:
            self.result.append(list(sub_result))
            sub_result *= 0
            override = self.update_address(match)
        else:
            override = self.error_spring(match, sub_result, sub_state, super_result)
        return override

    def __init__(self):
        Analyzer.__init__(self)
        self.next_address = 0x8000
        # definition of error spring list
        self.error_list = [-1002, -1003, -1004, -1005, -1006,
                           -1102, -1103, -1104, -1105, -1202, -1203, -1204,
                           -1301, -1302, -1303, -1304, -1403, -1502, -1503, -1504,
                           -1603, -1604, -1605, -1606, -1607, -1702, -1703, -1704, -1705, -1706,
                           -2002, -2003, -2004,
                           -2102, -2103, -2104, -2105, -2106, -2107,
                           -2204, -2205, -2207, -2302, -2303, -2304, -2306, -2308, -2310, -2311,
                           -2402, -2403, -2404, -2405, -2406, -2407, -2408, -2409, -2410, -2411, -2412,
                           -2502, -2503, -2504, -2505, -2506, -2510, -2511, -2512, -2513,
                           -3102, -3104, -3105,
                           -3202, -3204, -3205, -3207, -3208,
                           -3302, -3304, -3305, -3307, -3308,
                           -3403, -3404, -3405, -3406, -3407, -3408, -3409, -3410,
                           -3502, -3504, -3505
                           ]
        # definition of the (instance) parsing graph
        self.graph = {0:  # initial state
                          ([(None, None, -4001, None),  # T40.0.0 EOSeq -> missing hex address
                            (' ', None, 0, None),  # T40.0.1 skip initial spaces
                            ('>', ' ', -4003, None,  # T40.0.2a found '>' at end of seqence
                             2, self.implicit_address,  # T40.0.2b found '> ', stack address and go to 2
                             -4003, None),  # t40.0.2c found '>' followed by strange char
                            (self.address_analyzer, None, 1, self.error_spring)],  # T40.0.3 get the address
                           -4002),  # T40.0.4 wrong initial hex address
                      1:  # decoder state after hex address
                          ([(None, None, -4004, None),  # T40.1.0 EOSeq -> missing info
                            (self.data_analyzer, None, 1000, self.stack_info),  # T40.1.1 get the data
                            (self.instdat_analyzer, None, 1000, self.stack_info),  # T40.1.2 get data instr.
                            (self.instmul_analyzer, None, 1000, self.stack_info),  # T40.1.3 get multiply instr.
                            (self.instjmp_analyzer, None, 1000, self.stack_info),  # T40.1.4 get branch instr.
                            (self.instmem_analyzer, None, 1000, self.stack_info),  # T40.1.5 get mem transfer instr.
                            (self.instmsc_analyzer, None, 1000, self.stack_info)],  # T40.1.6 get miscellanea instr.
                           -4005),  # T40.1.7 unrecognized instruction or directive
                      2:  # decoder state after '>' symbol
                          ([(None, None, -4004, None),  # T40.2.0 EOSeq -> missing info
                            (self.data_analyzer, None, 1000, self.stack_info),  # T40.2.1 get the data
                            (self.instdat_analyzer, None, 1000, self.stack_info),  # T40.2.2 get data instr.
                            (self.instmul_analyzer, None, 1000, self.stack_info),  # T40.2.3 get multiply instr.
                            (self.instjmp_analyzer, None, 1000, self.stack_info),  # T40.2.4 get branch instr.
                            (self.instmem_analyzer, None, 1000, self.stack_info),  # T40.2.5 get mem transfer instr.
                            (self.instmsc_analyzer, None, 1000, self.stack_info)],  # T40.2.6 get miscellanea instr.
                           -4005)  # T40.2.7 unrecognized instruction or directive
                      }
Пример #17
0
def plot_data():
    da = DataAnalyzer()
    # da.show()
    da.plot_data()
    da.plot_ratio()
    da.plot_order()
evolution = GeneticAlgorithm(population_size=10000, chromosome_size=65)
evolution.createPopulation()
total_generations = evolution.evolve(file_training,
                                     crossing_probability=1,
                                     mutation_rate=0.1,
                                     plague_max_percent=0.4,
                                     plague_probability=0.1,
                                     selection_exp_const=1,
                                     min_num_vars=3,
                                     max_generations=200,
                                     const_num_digits=3,
                                     satisfactory_mse=0.01)

print("\n\n\nBest subject: ", evolution.best_expr)
print("sqrt(MSE): ", np.sqrt(min(evolution.mse_list)))
print("In ", total_generations, "generations")

analysis = DataAnalyzer('testing.csv')
strength = analysis.strength(evolution.best_expr)

with open('sub0.csv', mode='w') as f:
    sample_writer = csv.writer(f,
                               delimiter=',',
                               quotechar='"',
                               quoting=csv.QUOTE_MINIMAL)
    sample_writer.writerow(['ID', 'strength'])
    i = 722
    for value in strength:
        sample_writer.writerow([i, value])
        i += 1
Пример #19
0
def main():
    analyzer = DataAnalyzer.create()

    with open('../stats.json') as stats_input:
        stats = map(json.loads, stats_input.readlines())
    print 'Read stats.json: %s entries found' % len(stats)

    # by_champion is a object with 126 keys, one for each champion id.
    # The value assigned to each key is a list of their game data generated
    # from stats.json
    by_champion = {}
    for stat in stats:
        champion = stat['champion']
        if by_champion.get(champion, None):
            by_champion[champion].append(stat)
        else:
            by_champion[champion] = [stat]

    # build_stats is an object with 126 keys, one for each champion id.
    # The value assigned to each key is a dict of the format:
    # { itemId: effectivenessScore, itemId: effectivenessScore ...... }
    # effectivenessScore is a score of the item's effectiveness based on
    # win and KDA, and increases linearly with number of times built.
    build_stats = {}
    for champion in by_champion:
        championName = analyzer.get_champion_name_by_id(champion)
        build_stats[championName] = {}
        for game in by_champion[champion]:
            kda = 0
            if game['kills']:
                kda += game['kills']
            if game['assists']:
                kda += game['assists'] / 2
            if game['deaths']:
                kda /= float(game['deaths'])
            effectivenessScore = kda
            if game.get('win', False):
                effectivenessScore += 2
            for i in range(1, 7):
                item = game.get('item%s' % i, None)
                if item:
                    if build_stats[championName].get(item, None):
                        build_stats[championName][item] += effectivenessScore
                    else:
                        build_stats[championName][item] = effectivenessScore

    # The build stats will be written into the /stats-by-champion directory
    # with one JSON file per champion. We split this build data into viable
    # end game builds and intermediate builds by sorting the build data by
    # effectiveness score and parsing it then.
    for champion in build_stats:
        # trinkets, boots, endgame, and consumables all store items that people
        # have built for this champion, sorted in order of effectiveness.
        # buildObj is a dict that when dumped to a JSON object, becomes a valid
        # build file that someone can put in the League of Legends directory and
        # use.
        build_output = {}
        for category in CATEGORIES:
            build_output[category] = []

        effectiveness_sorted_items = sorted(
            build_stats[champion], key=build_stats[champion].get)[::-1]
        for item in effectiveness_sorted_items:
            if analyzer.is_irrelevant(
                    item) or not analyzer.get_item_name_by_id(item):
                continue
            elif analyzer.is_starter(item):
                build_output['Starting Items'].append(item)
            elif analyzer.is_boot(item) and analyzer.get_item_depth(item) >= 2:
                build_output['Boots'].append(item)
            elif analyzer.is_jungle(
                    item) and analyzer.get_item_depth(item) >= 2:
                build_output['Jungle Items'].append(item)
            elif analyzer.is_elixir(item):
                build_output['Elixirs'].append(item)
            elif not analyzer.get_items_built_from(item) and (
                    not analyzer.is_trinket(item)):
                build_output['Endgame Items'].append(item)

        # For each category of item, we will only show a certain number of items and
        # will will generate the item set for each category. We will also sort
        # the items on build tree, where lower tier items will come first.
        generator = ItemSetGenerator.create(champion)
        for category in CATEGORIES:
            build_output[category] = build_output[
                category][:ITEM_LIMIT[category]]
            if SORT_TIER[category]:
                build_output[category] = sorted(
                    build_output[category],
                    key=lambda item: analyzer.get_item_data_by_id(item).get(
                        'depth', 0))

            items = ItemSetBlockItems()
            for item in build_output[category]:
                items.add_item(item, 1)
            generator.add_block('Recommended %s' % category, False,
                                items.get_items(), category == 'Jungle Items')
            build_output[category] = map(analyzer.get_item_data_by_id,
                                         build_output[category])
        item_set = generator.get_item_set()

        # We will write the build JSON file to one file and the parsed data to
        # another.
        data_file = Util.normalize_champion_name(champion)
        with open('../builds/%s.json' % data_file, 'w') as champion_output:
            champion_output.write(Util.json_dump(build_output))
            print 'Wrote %s.json' % data_file

        # As of now, only the build files are used, but we will store the champion
        # data as well for future extendability.
        build_file = '%s_build' % Util.normalize_champion_name(champion)
        with open('../builds/%s.json' % build_file, 'w') as build_file_output:
            build_file_output.write(Util.json_dump(item_set))
            print 'Wrote %s.json' % build_file

    print 'Successfully wrote champion data.'
Пример #20
0
class Task:
    """
    Core class of this volatility library
    It schedules each component.
    Read raw data.
    Pre process data.
    Analyze data.
    Make prediction.
    Output results.
    """
    def __init__(self,
                 path,
                 start_date,
                 interested_symbols,
                 interested_start_date,
                 interested_end_date,
                 num_std=3,
                 frequency=FrequencyMap.Minute,
                 forward=True,
                 model_type='CloseToClose',
                 clean=True):
        """
        :param path: str
        :param start_date: datetime.date
        :param interested_symbols: list[str]
        :param interested_start_date: datetime.date
        :param interested_end_date: datetime.date
        :param num_std: int
        :param frequency: FrequencyMap
        :param forward: boolean
        :param model_type: str
        :param clean: boolean
        """
        self.interested_symbols = interested_symbols
        self.interested_start_date = interested_start_date
        self.interested_end_date = interested_end_date
        self.frequency = frequency
        self.analysis_window = 30 if frequency is not FrequencyMap.Month else 10
        self.loader = DataLoader(path, start_date)
        self.pre_process = DataPreProcessor(num_std, frequency, forward)
        self.estimator = VolatilityEstimator(model_type, clean, frequency)
        self.data_analyzer = DataAnalyzer()
        self.model = Garch11('Constant', 'Garch')
        self.error_estimator = ErrorEstimator(self.model, self.estimator,
                                              self.frequency)

    def execute(self):
        self.loader.load()
        output = pd.DataFrame()
        for symbol in self.interested_symbols:
            df = self.loader.fetch(symbol, self.interested_start_date,
                                   self.interested_end_date)
            df = self.pre_process.pre_process(df)
            self.data_analyzer.analyze_data(df.copy())
            self.estimator.analyze_realized_vol(df, self.interested_start_date,
                                                self.interested_end_date,
                                                self.analysis_window)
            sample_size, error = self.error_estimator.get_best_sample_size(df)
            predictions = self.model.get_predictions(df, sample_size,
                                                     self.frequency)
            output[symbol] = predictions[TimeSeriesDataFrameMap.Volatility]
            index = predictions.index
        output.set_index(index)
        file_name = r'D:\programming\VOL\{frequency}_predictions.csv'.format(
            frequency=self.frequency)
        output.to_csv(file_name)
Пример #21
0
def main():
    analyzer = DataAnalyzer.create()

    with open("../stats.json") as stats_input:
        stats = map(json.loads, stats_input.readlines())
    print "Read stats.json: %s entries found" % len(stats)

    # by_champion is a object with 126 keys, one for each champion id.
    # The value assigned to each key is a list of their game data generated
    # from stats.json
    by_champion = {}
    for stat in stats:
        champion = stat["champion"]
        if by_champion.get(champion, None):
            by_champion[champion].append(stat)
        else:
            by_champion[champion] = [stat]

    # build_stats is an object with 126 keys, one for each champion id.
    # The value assigned to each key is a dict of the format:
    # { itemId: effectivenessScore, itemId: effectivenessScore ...... }
    # effectivenessScore is a score of the item's effectiveness based on
    # win and KDA, and increases linearly with number of times built.
    build_stats = {}
    for champion in by_champion:
        championName = analyzer.get_champion_name_by_id(champion)
        build_stats[championName] = {}
        for game in by_champion[champion]:
            kda = 0
            if game["kills"]:
                kda += game["kills"]
            if game["assists"]:
                kda += game["assists"] / 2
            if game["deaths"]:
                kda /= float(game["deaths"])
            effectivenessScore = kda
            if game.get("win", False):
                effectivenessScore += 2
            for i in range(1, 7):
                item = game.get("item%s" % i, None)
                if item:
                    if build_stats[championName].get(item, None):
                        build_stats[championName][item] += effectivenessScore
                    else:
                        build_stats[championName][item] = effectivenessScore

    # The build stats will be written into the /stats-by-champion directory
    # with one JSON file per champion. We split this build data into viable
    # end game builds and intermediate builds by sorting the build data by
    # effectiveness score and parsing it then.
    for champion in build_stats:
        # trinkets, boots, endgame, and consumables all store items that people
        # have built for this champion, sorted in order of effectiveness.
        # buildObj is a dict that when dumped to a JSON object, becomes a valid
        # build file that someone can put in the League of Legends directory and
        # use.
        build_output = {}
        for category in CATEGORIES:
            build_output[category] = []

        effectiveness_sorted_items = sorted(build_stats[champion], key=build_stats[champion].get)[::-1]
        for item in effectiveness_sorted_items:
            if analyzer.is_irrelevant(item) or not analyzer.get_item_name_by_id(item):
                continue
            elif analyzer.is_starter(item):
                build_output["Starting Items"].append(item)
            elif analyzer.is_boot(item) and analyzer.get_item_depth(item) >= 2:
                build_output["Boots"].append(item)
            elif analyzer.is_jungle(item) and analyzer.get_item_depth(item) >= 2:
                build_output["Jungle Items"].append(item)
            elif analyzer.is_elixir(item):
                build_output["Elixirs"].append(item)
            elif not analyzer.get_items_built_from(item) and (not analyzer.is_trinket(item)):
                build_output["Endgame Items"].append(item)

        # For each category of item, we will only show a certain number of items and
        # will will generate the item set for each category. We will also sort
        # the items on build tree, where lower tier items will come first.
        generator = ItemSetGenerator.create(champion)
        for category in CATEGORIES:
            build_output[category] = build_output[category][: ITEM_LIMIT[category]]
            if SORT_TIER[category]:
                build_output[category] = sorted(
                    build_output[category], key=lambda item: analyzer.get_item_data_by_id(item).get("depth", 0)
                )

            items = ItemSetBlockItems()
            for item in build_output[category]:
                items.add_item(item, 1)
            generator.add_block("Recommended %s" % category, False, items.get_items(), category == "Jungle Items")
            build_output[category] = map(analyzer.get_item_data_by_id, build_output[category])
        item_set = generator.get_item_set()

        # We will write the build JSON file to one file and the parsed data to
        # another.
        data_file = Util.normalize_champion_name(champion)
        with open("../builds/%s.json" % data_file, "w") as champion_output:
            champion_output.write(Util.json_dump(build_output))
            print "Wrote %s.json" % data_file

        # As of now, only the build files are used, but we will store the champion
        # data as well for future extendability.
        build_file = "%s_build" % Util.normalize_champion_name(champion)
        with open("../builds/%s.json" % build_file, "w") as build_file_output:
            build_file_output.write(Util.json_dump(item_set))
            print "Wrote %s.json" % build_file

    print "Successfully wrote champion data."
Пример #22
0
        elif prefix is None or prefix == '':
            print('Prefix must be specified!')
        else:
            break
    method = 'metric'
    model_type = 'resnet'  # should be in ['resnet', 'inception3']
    train_root = '/home/ubuntu/Program/Tableware/DataArgumentation/dataset/o_train/'
    test_root = '/home/ubuntu/Program/Tableware/DataArgumentation/dataset/n_test/'
    sample_file_dir = '/home/ubuntu/Program/Tableware/DataArgumentation/dataset/n_base_sample_5'

    load_model_path = None
    # load_model_path = './model/pretrained/inception_v3_google-1a9a5a14.pth'
    trainer = Trainer(model_type=model_type, load_model_path=load_model_path)
    trainer.set_super_training_parameters(train_root=train_root,
                                          test_root=test_root,
                                          sample_file_dir=sample_file_dir,
                                          prefix=prefix,
                                          batch_size=128)
    save_dir, maxacc = trainer.metric_training(balance_testset=False)

    best_model_path = './model/keep/resnet_%s_%s_conv0.05_%.2f.tar' % (
        prefix, method, maxacc * 100)
    shutil.copy(os.path.join(save_dir, '%s_%s.pth.tar' % (prefix, method)),
                best_model_path)

    analyzer = DataAnalyzer(sample_file_dir=sample_file_dir,
                            test_dir=test_root,
                            num_of_classes=42,
                            prefix=prefix)
    analyzer.analysis_for_inter_exter_acc(model_path=best_model_path)