示例#1
0
    def __init__(self, db_name, forum_id, interval, config, log_root_path):
        """
        :type db_name: str
        :param db_name: the name of an existing DB

        :type forum_id: int
        :param forum_id: the id of an existing forum in the DB

        :type interval: list int
        :param interval: a list of topic ids to import

        :type config: dict
        :param config: the DB configuration file

        :type log_root_path: str
        :param log_root_path: the log path
        """

        self._log_root_path = log_root_path
        self._interval = interval
        self._db_name = db_name
        self._forum_id = forum_id
        self._config = config

        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()

        self._fileHandler = None
        self._logger = None
        self._querier = None
        self._dao = None
示例#2
0
    def __init__(self, config, db_name, log_root_path):
        """
        :type config: dict
        :param config: the DB configuration file

        :type db_name: str
        :param config: name of an existing DB

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._dsl_util = DslUtil()
        self._date_util = DateUtil()
        self._db_util = DbUtil()

        self._logging_util = LoggingUtil()
        self._log_path = log_root_path + "export-report-" + db_name + ".log"
        self._logger = self._logging_util.get_logger(self._log_path)
        self._fileHandler = self._logging_util.get_file_handler(self._logger, self._log_path, "info")

        self._db_name = db_name
        self._config = config
        self._cnx = self._db_util.get_connection(self._config)
        self._db_util.set_database(self._cnx, self._db_name)
        self._db_util.set_settings(self._cnx)

        self._chart_generator = ChartGenerator(self._cnx, self._logger)
        self._html_generator = HtmlGenerator(self._logger)
    def __init__(self, cnx, logger):
        """
        :type cnx: Object
        :param cnx: DB connection

        :type logger: Object
        :param logger: logger
        """
        self._cnx = cnx
        self._logger = logger
        self._date_util = DateUtil()
示例#4
0
    def __init__(self, db_name, project_name,
                 type, forum_name, url, before_date, num_processes,
                 config, log_root_path):
        """
        :type db_name: str
        :param db_name: the name of an existing DB

        :type project_name: str
        :param project_name: the name of an existing project in the DB

        :type type: str
        :param type: type of the forum (Stackoverflow, Eclipse forum)

        :type forum_name: str
        :param forum_name: the name of the forum to import

        :type url: str
        :param url: the URL of the forum

        :type before_date: str
        :param before_date: import data before date (YYYY-mm-dd)

        :type num_processes: int
        :param num_processes: number of processes to import the data (default 2)

        :type config: dict
        :param config: the DB configuration file

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._log_path = log_root_path + "import-eclipse-forum-" + db_name + "-" + project_name + "-" + forum_name
        self._type = type
        self._url = url
        self._forum_name = forum_name
        self._project_name = project_name
        self._db_name = db_name
        self._before_date = before_date

        config.update({'database': db_name})
        self._config = config

        if num_processes:
            self._num_processes = num_processes
        else:
            self._num_processes = EclipseForum2DbMain.NUM_PROCESSES

        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()

        self._logger = None
        self._fileHandler = None
        self._querier = None
        self._dao = None
示例#5
0
    def __init__(self, config, logger):
        """
        :type config: dict
        :param config: the DB configuration file

        :type logger: Object
        :param logger: logger
        """
        self._config = config
        self._logger = logger
        self._git_dao = GitDao(self._config, self._logger)
        self._date_util = DateUtil()
示例#6
0
    def __init__(self, url, product, logger):
        """
        :type url: str
        :param url: the URL of the Bugzilla issue tracker

        :type product: str
        :param product: the name of the product to import from the Bugzilla issue tracker

        :type logger: Object
        :param logger: logger
        """
        self._logger = logger
        self._bzapi = self._init_bzapi(url)
        self._product = product
        self._date_util = DateUtil()
示例#7
0
    def __init__(self, config, db_name, log_root_path):
        """
        :type config: dict
        :param config: the DB configuration file

        :type db_name: str
        :param config: name of an existing DB

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._date_util = DateUtil()
        self._db_util = DbUtil()

        self._logging_util = LoggingUtil()
        self._log_path = log_root_path + "export-file-json-" + db_name
        self._logger = self._logging_util.get_logger(self._log_path)
        self._fileHandler = self._logging_util.get_file_handler(
            self._logger, self._log_path, "info")

        self._db_name = db_name
        config.update({'database': db_name})
        self._config = config

        self._cnx = self._db_util.get_connection(self._config)
        self._db_util.set_database(self._cnx, self._db_name)
        self._db_util.set_settings(self._cnx)
        self._file_util = FileUtil(self._config, self._logger)
示例#8
0
    def __init__(self, git_repo_path, logger):
        """
        :type git_repo_path: str
        :param git_repo_path: local path of the Git repository

        :type logger: Object
        :param logger: logger
        """
        try:
            self._logger = logger
            self._repo = Repo(git_repo_path, odbt=GitCmdObjectDB)
            self._gitt = self._repo.git
            self._date_util = DateUtil()
        except:
            self._logger.error("GitQuerier init failed")
            raise
示例#9
0
    def __init__(self, token, logger):
        """
        :type token: str
        :param token: the token to access the Slack API

        :type logger: Object
        :param logger: logger
        """
        try:
            self._token = token
            self._logger = logger
            self._date_util = DateUtil()
            self._slack = Slacker(self._token)
        except:
            self._logger.error("SlackQuerier init failed")
            raise
示例#10
0
    def __init__(self, db_name, project_name, forum_name, eclipse_forum_url,
                 num_processes, config, log_root_path):
        """
        :type db_name: str
        :param db_name: the name of an existing DB

        :type project_name: str
        :param project_name: the name of an existing project in the DB

        :type forum_name: str
        :param forum_name: the name of an existing forum in the DB to update

        :type eclipse_forum_url: str
        :param eclipse_forum_url: the URL of the forum

        :type num_processes: int
        :param num_processes: number of processes to import the data (default 2)

        :type config: dict
        :param config: the DB configuration file

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._log_path = log_root_path + "update-eclipse-forum-" + db_name + "-" + project_name + "-" + forum_name
        self._project_name = project_name
        self._url = eclipse_forum_url
        self._db_name = db_name
        self._forum_name = forum_name

        config.update({'database': db_name})
        self._config = config

        if num_processes:
            self._num_processes = num_processes
        else:
            self._num_processes = EclipseForum2DbUpdate.NUM_PROCESSES

        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()

        self._logger = None
        self._fileHandler = None
        self._querier = None
        self._dao = None
示例#11
0
 def get_order_time_lst(self):
     order_time = []
     close_lst = list(set(self.close_time_list))
     for time_str in close_lst:
         t_item = time_str.split(":")
         hour, minute = DateUtil.date_diff_min(int(t_item[0]), int(t_item[1]), int(ConfigUtil.instance().ahead_min) * -1)
         order_time.append(OrderTimeItem(hour, minute))
         pass
     return order_time
示例#12
0
    def __call__(self):
        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()
        log_path = self._log_root_path + "-topic2db-" + str(
            self._interval[0]) + "-" + str(self._interval[-1])
        self._logger = self._logging_util.get_logger(log_path)
        self._fileHandler = self._logging_util.get_file_handler(
            self._logger, log_path, "info")

        try:
            self._querier = EclipseForumQuerier(None, self._logger)
            self._dao = EclipseForumDao(self._config, self._logger)
            self.extract()
        except Exception:
            self._logger.error("EclipseTopic2Db failed", exc_info=True)
        finally:
            if self._dao:
                self._dao.close_connection()
    def __init__(self, db_name, repo_id, issue_tracker_id, url, product,
                 interval, config, log_root_path):
        """
        :type db_name: str
        :param db_name: the name of an existing DB

        :type repo_id: int
        :param repo_id: the id of an existing repository in the DB

        :type issue_tracker_id: int
        :param issue_tracker_id: the id of an existing issue tracker in the DB

        :type url: str
        :param url: the URL of the bugzilla issue tracker

        :type product: str
        :param product: the name of the product in the bugzilla issue tracker

        :type interval: list int
        :param interval: a list of issue ids to import

        :type config: dict
        :param config: the DB configuration file

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._log_root_path = log_root_path
        self._url = url
        self._product = product
        self._db_name = db_name
        self._repo_id = repo_id
        self._issue_tracker_id = issue_tracker_id
        self._interval = interval
        self._config = config

        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()

        self._fileHandler = None
        self._logger = None
        self._querier = None
        self._dao = None
示例#14
0
    def __init__(self, token, logger):
        """
        :type token: str
        :param token: the token to access the Stackoverflow API

        :type logger: Object
        :param logger: logger
        """
        try:
            self._token = token
            self._logger = logger
            self._token_util = TokenUtil(self._logger, "stackoverflow")
            self._date_util = DateUtil()
            self._so = stackexchange.Site(stackexchange.StackOverflow, app_key=self._token)
            self._so.impose_throttling = True
            self._so.throttle_stop = False
        except:
            self._logger.error("StackOverflowQuerier init failed")
            raise
示例#15
0
    def __init__(self, url, token, logger):
        """
        :type url: str
        :param url: full name of the GitHub repository

        :type token: str
        :param token: a GitHub token

        :type logger: Object
        :param logger: logger
        """
        try:
            self._logger = logger
            self._url = url
            self._token = token
            self._github = Github(token)
            self._repo = self._load_repo(self._url)
            self._token_util = TokenUtil(self._logger, "github")
            self._date_util = DateUtil()
        except:
            self._logger.error("GitHubQuerier init failed")
            raise
示例#16
0
文件: main.py 项目: douyou/meican
def order(meican):
    try:
        order_week = ConfigUtil.instance().order_week
        cur_week = str(DateUtil.curr_week())
        if cur_week not in order_week:
            critical(
                "meican | order week not in config!!!!!!!!!!!!, cur_week:<%s>"
                % cur_week)
            return
        info("meican |begin order")
        meican.order()
    except Exception as e:
        info(traceback.format_exc())
def two_rate_model_test(local_test_dir) -> None:
    """
    Approval test for TermRateModel with single calibration across all currencies.

    The test is successful if output files match git state.
    """

    country_count: int = 2
    seed = 0
    rand = np.random.RandomState(seed)
    lag_months = 6
    lag_label = DateUtil.get_lag_label(lag_months=lag_months)

    # Perform simulation
    model = ShortRateModel()
    model.year_count = 1
    model.seed = seed
    model.countries = [
        "C" + str(country_index + 1).zfill(4)
        for country_index in range(country_count)
    ]
    model.vol = [0.01] * country_count
    model.rev = [0.2] * country_count
    model.cap_rev = [0.2] * country_count
    model.floor_rev = [0.5] * country_count
    model.soft_cap = [0.10] * country_count
    model.soft_floor = [0.02] * country_count
    model.target = [0.05] * country_count
    model.short_rate_0 = [
        rand.uniform(-0.1, 0.30) for c in range(country_count)
    ]
    model.simulate(caller_file=__file__)

    # Create history plots
    short_rate_plot = LinePlot()
    short_rate_plot.input_files = ["history.short_rate"]
    short_rate_plot.title = "history.short_rate"
    short_rate_plot.save_plot(caller_file=__file__)

    # Create sample with time shift
    sample = LagSample()
    sample.features = ["short_rate"]
    sample.lag_months = lag_months
    sample.create_sample(caller_file=__file__)

    # Create sample plot
    short_rate_plot = ScatterPlot()
    short_rate_plot.input_file = "lag_sample"
    short_rate_plot.columns = ["short_rate(t)", f"short_rate(t{lag_label})"]
    short_rate_plot.title = "lag_sample.short_rate"
    short_rate_plot.save_plot(caller_file=__file__)
    def get_lag_label_test(self):
        """Test for get_lag_label method."""

        assert DateUtil.get_lag_label(lag_months=6) == "+6m"
        assert DateUtil.get_lag_label(lag_months=-6) == "-6m"
        assert DateUtil.get_lag_label(lag_months=12) == "+1y"
        assert DateUtil.get_lag_label(lag_months=-12) == "-1y"
        assert DateUtil.get_lag_label(lag_months=24) == "+2y"
        assert DateUtil.get_lag_label(lag_months=-24) == "-2y"
示例#19
0
    def __call__(self):
        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()
        log_path = self._log_root_path + "-issue2db-" + str(self._interval[0]) + "-" + str(self._interval[-1])
        self._logger = self._logging_util.get_logger(log_path)
        self._fileHandler = self._logging_util.get_file_handler(self._logger, log_path, "info")

        try:
            self._querier = GitHubQuerier(self._url, self._token, self._logger)
            self._dao = GitHubDao(self._config, self._logger)
            self.extract()
        except Exception:
            self._logger.error("GitHubIssue2Db failed", exc_info=True)
        finally:
            if self._dao:
                self._dao.close_connection()
示例#20
0
    def __call__(self):
        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()
        log_path = self._log_root_path + "-pr2db-" + str(
            self._interval[0]) + "-" + str(self._interval[-1])
        self._logger = self._logging_util.get_logger(log_path)
        self._fileHandler = self._logging_util.get_file_handler(
            self._logger, log_path, "info")

        try:
            self._querier = GitHubQuerier(self._url, self._token, self._logger)
            self._dao = GitHubDao(self._config, self._logger)
            self._git_dao = GitDao(self._config, self._logger)
            self.extract()
        except Exception, e:
            self._logger.error("GitHubPullRequest2Db failed", exc_info=True)
示例#21
0
    def __call__(self):
        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()
        log_path = self._log_root_path + "-issue2db-" + str(
            self._interval[0]) + "-" + str(self._interval[-1])
        self._logger = self._logging_util.get_logger(log_path)
        self._fileHandler = self._logging_util.get_file_handler(
            self._logger, log_path, "info")

        try:
            self._querier = BugzillaQuerier(self._url, self._product,
                                            self._logger)
            self._dao = BugzillaDao(self._config, self._logger)
            self.extract()
        except Exception, e:
            self._logger.error("BugzillaIssue2Db failed", exc_info=True)
示例#22
0
    def save_plot(self, *, caller_file: str) -> None:
        """
        Create plot from sample.

        Pass __file__ variable of the caller script as caller_file
        parameter. It will be used as both input and output file prefix.
        """

        # Prefix for all data files
        caller_name = FileUtil.get_caller_name(caller_file=caller_file)

        fig = go.Figure()
        plot_title = self.title
        x_axis_label = "Month"
        y_axis_label = "Value"
        for input_file in self.input_files:
            df = pd.read_csv(f'{caller_name}.{input_file}.csv')
            df = df.loc[df['FREQUENCY'] == 'M']
            if self.countries is not None:
                df = df.loc[df['LOCATION'].isin(self.countries)]

            countries = df['LOCATION'].unique().tolist()
            for country in countries:
                times = [DateUtil.get_sequential_month(year_month=t) for t in
                         df.loc[df['LOCATION'] == country]['TIME']]
                values = df.loc[df['LOCATION'] == country]['Value']
                fig.add_trace(
                    go.Scatter(
                        x=times,
                        y=values,
                        mode='lines', line=dict(width=3.0), name=input_file + "." + country))
                fig.update_layout(margin=dict(l=80, r=20, t=80, b=40),
                                  title={
                                      'text': plot_title,
                                      'font': {'family': "Roboto", 'size': 18},
                                      'x': 0.5
                                  },
                                  xaxis=dict(showgrid=True, tickangle=0,
                                             title={'text': x_axis_label, 'font': {'family': "Roboto", 'size': 13}}),
                                  yaxis=dict(showgrid=True, tickformat='.2f', nticks=20,
                                             title={'text': y_axis_label, 'font': {'family': "Roboto", 'size': 13}})
                                  )

        # Save plot file
        file_name = f"{caller_name}.{self.title.lower()}.png"
        # fig.update_layout(template=plot_util.get_plot_template())
        fig.write_image(file_name)
    def __init__(self, db_name,
                 repo_id, issue_tracker_id, url, interval, token,
                 config, log_root_path):
        """
        :type db_name: str
        :param db_name: the name of an existing DB

        :type repo_id: int
        :param repo_id: the id of an existing repository in the DB

        :type issue_tracker_id: int
        :param issue_tracker_id: the id of an existing issue tracker in the DB

        :type url: str
        :param url: full name of the GitHub repository

        :type interval: list int
        :param interval: a list of issue ids to import

        :type token: str
        :param token: a GitHub token

        :type config: dict
        :param config: the DB configuration file

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._log_root_path = log_root_path
        self._url = url
        self._db_name = db_name
        self._repo_id = repo_id
        self._issue_tracker_id = issue_tracker_id
        self._interval = interval
        self._token = token
        self._config = config

        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()

        self._fileHandler = None
        self._logger = None
        self._querier = None
        self._dao = None
from model.short_rate_model import ShortRateModel
from util.date_util import DateUtil
from util.line_plot import LinePlot
from util.lag_sample import LagSample
from util.scatter_plot import ScatterPlot

if __name__ == "__main__":

    # Run two rate model with single calibration across all currencies.

    country_count: int = 50
    seed = 0
    rand = np.random.RandomState(seed)
    lag_months = 60
    lag_label = DateUtil.get_lag_label(lag_months=lag_months)

    # Perform simulation
    model = ShortRateModel()
    model.year_count = 30
    model.seed = seed
    model.countries = [
        "C" + str(country_index + 1).zfill(4)
        for country_index in range(country_count)
    ]
    model.vol = [0.01] * country_count
    model.rev = [0.05] * country_count
    model.cap_rev = [0.2] * country_count
    model.floor_rev = [0.5] * country_count
    model.soft_cap = [0.10] * country_count
    model.soft_floor = [0.02] * country_count
class ChartGenerator():
    """
    This class handles the generation of charts
    """
    def __init__(self, cnx, logger):
        """
        :type cnx: Object
        :param cnx: DB connection

        :type logger: Object
        :param logger: logger
        """
        self._cnx = cnx
        self._logger = logger
        self._date_util = DateUtil()

    def _get_db_data(self, query):
        # queries the database
        cursor = self._cnx.cursor()
        cursor.execute(query)

        results_y = []
        results_x = []
        row = cursor.fetchone()
        while row:
            counter = int(row[0])
            span = int(row[1])
            results_y.append(counter)
            results_x.append(span)
            row = cursor.fetchone()

        cursor.close()

        return results_x, results_y

    def create(self, query, x_label, y_label, time_dimension):
        """
        creates the charts

        :type query: str
        :param query: SQL query

        :type x_label: str
        :param x_label: name of the x label

        :type y_label: str
        :param y_label: name of the y label

        :type time_dimension: str
        :param time_dimension: time dimension (week, month, year)
        """
        intervals, counters = self._get_db_data(query)

        if "year" in time_dimension:
            span = [self._date_util.get_month_from_int(i) for i in intervals]
        elif "month" in time_dimension:
            span = intervals
        elif "week" in time_dimension:
            span = [
                self._date_util.get_weekday_from_int(i - 1) for i in intervals
                if i <= 7
            ]

        if '_' in y_label:
            y_label = y_label.replace('_', ' ')

        line_chart = pygal.Bar(style=LightColorizedStyle)
        line_chart.title = y_label + " * " + x_label
        line_chart.x_labels = span
        line_chart.add(y_label, counters)
        chart = line_chart.render()

        return chart
示例#26
0
    def create_sample(self, *, caller_file: str) -> None:
        """
        Create sample from history record.

        Pass __file__ variable of the caller script as caller_file
        parameter. It will be used as both input and output file prefix.
        """

        # Prefix for all data files
        caller_name = FileUtil.get_caller_name(caller_file=caller_file)

        # Create DF where the results will be merged
        sample_df = None
        shifted_sample_df = None
        for feature in self.features:

            # Read and transform time series for each feature
            time_series_df = pd.read_csv(
                f"{caller_name}.history.{feature}.csv")

            # Filter by monthly frequency
            time_series_df = time_series_df[time_series_df["FREQUENCY"] == "M"]

            # Filter by country if country list is specified
            if self.countries is not None:
                time_series_df = time_series_df[
                    time_series_df["LOCATION"].isin(self.countries)]

            # Create sequential month list
            unshifted_months = [
                DateUtil.get_sequential_month(year_month=ym)
                for ym in time_series_df["TIME"]
            ]

            # Create DF with unshifted data
            values = time_series_df["Value"]
            location = time_series_df["LOCATION"]
            unshifted_df = pd.DataFrame({
                "LOCATION": location,
                "Month": unshifted_months,
                f"{feature}(t)": values.values
            })

            # Merge unshifted time series for the feature
            if sample_df is None:
                sample_df = unshifted_df
            else:
                sample_df = sample_df.merge(unshifted_df)

            # Add features with the specified time shift if not None
            if self.lag_months is not None:

                # Create sequential month list shifted backwards(!) by the specified time shift
                shifted_months = [
                    m - self.lag_months for m in unshifted_months
                ]
                shift_label = DateUtil.get_lag_label(
                    lag_months=self.lag_months)

                # Merge shifted data
                shifted_df = pd.DataFrame({
                    "LOCATION":
                    location,
                    "Month":
                    shifted_months,
                    f"{feature}(t{shift_label})":
                    values.values
                })
                if shifted_sample_df is None:
                    shifted_sample_df = shifted_df
                else:
                    shifted_sample_df = shifted_sample_df.merge(shifted_df)

        sample_df = sample_df.merge(shifted_sample_df)

        # Drop month and location columns
        sample_df.drop(["Month"], axis=1, inplace=True)

        # Save sample to file
        sample_df.to_csv(f"{caller_name}.lag_sample.csv",
                         index=False,
                         float_format="%.6f")
示例#27
0
class FileUtil():
    """
    This class provides utilities for the files stored in the Gitana DB
    """
    def __init__(self, config, logger):
        """
        :type config: dict
        :param config: the DB configuration file

        :type logger: Object
        :param logger: logger
        """
        self._config = config
        self._logger = logger
        self._git_dao = GitDao(self._config, self._logger)
        self._date_util = DateUtil()

    def _get_directory_path(self, path_elements):
        directory_path = ''
        path_elements.reverse()
        for p in path_elements:
            directory_path = directory_path + p + '/'

        return directory_path

    def get_directories(self, file_path):
        """
        extracts the directories where the file is located

        :type file_path: str
        :param file_path: path of the file
        """
        directories = []
        dir = file_path.split('/')[:-1]
        dir.reverse()

        for d in range(0, len(dir)):
            dir_path = self._get_directory_path(dir[d:])
            directories.append(dir_path)

        if not directories:
            directories.append("/")

        return directories

    def _process_date(self, d):
        if d:
            if not self._date_util.check_format_timestamp(d, "%Y-%m-%d"):
                d = None
                self._logger.warning(
                    "the date " + str(d) +
                    " does not follow the pattern %Y-%m-%d, all changes be retrieved"
                )

        return d

    def get_file_history_by_id(self,
                               file_id,
                               ref_id,
                               reversed=False,
                               before_date=None):
        """
        get file history for a given file id within a reference and before a given date

        :type file_id: int
        :param file_id: the id of the target file

        :type ref_id: str
        :param ref_id: the id of the reference

        :type reversed: bool
        :param reversed: if True, it returns the changes from the most recent to the earliest

        :type before_date: str (YYYY-mm-dd)
        :param before_date: if not null, it returns the last version of the file before the given date
        """
        before_date = self._process_date(before_date)

        previous_renamings = [file_id]

        changes = self._git_dao.select_file_changes(file_id,
                                                    ref_id,
                                                    before_date,
                                                    patch=False,
                                                    code=True)
        renamings_to_process = self._git_dao.select_file_renamings(
            file_id, ref_id)

        if renamings_to_process:
            while renamings_to_process != []:
                current_renamings = renamings_to_process
                for previous_file_id in current_renamings:
                    changes = changes + self._git_dao.select_file_changes(
                        previous_file_id, ref_id, before_date)
                    previous_renamings.append(previous_file_id)
                    renamings_to_process = renamings_to_process + self._git_dao.select_file_renamings(
                        previous_file_id, ref_id)
                renamings_to_process = list(
                    set(renamings_to_process) - set(previous_renamings))

        return sorted(changes,
                      key=lambda k: k['authored_date'],
                      reverse=reversed)

    def get_file_history_by_name(self,
                                 repo_name,
                                 file_name,
                                 reference_name,
                                 reversed=False,
                                 before_date=None):
        """
        get file history for a given file name within a reference and before a given date

        :type repo_name: str
        :param repo_name: the name of the repository to import. It cannot be null

        :type file_name: dict
        :param file_name: the name of the target file

        :type reference_name: str
        :param reference_name: the name of the reference

        :type reversed: bool
        :param reversed: if True, it returns the changes from the most recent to the earliest

        :type before_date: str (YYYY-mm-dd)
        :param before_date: if not null, it returns the last version of the file before the given date
        """
        history = []
        try:
            repo_id = self._git_dao.select_repo_id(repo_name)
            file_id = self._git_dao.select_file_id(repo_id, file_name)
            reference_id = self._git_dao.select_reference_id(
                repo_id, reference_name)
            history = self.get_file_history_by_id(file_id, reference_id,
                                                  reversed, before_date)
        except:
            self._logger.error("FileUtil failed", exc_info=True)
        finally:
            if self._git_dao:
                self._git_dao.close_connection()

            return history

    def get_file_version_by_id(self, file_id, ref_id, before_date=None):
        """
        get file version for a given file id within a reference and before a given date

        :type file_id: int
        :param file_id: the id of the target file

        :type ref_id: str
        :param ref_id: the id of the reference

        :type before_date: str (YYYY-mm-dd)
        :param before_date: if not null, it returns the last version of the file before the given date
        """
        before_date = self._process_date(before_date)
        changes = self._git_dao.select_file_changes(file_id,
                                                    ref_id,
                                                    before_date,
                                                    patch=True)
        sorted(changes, key=lambda k: k['committed_date'], reverse=False)

        # the digestion is needed because the library diff-match-patch requires that the preamble of the diff information (@@ -.. +.. @@)
        # appears alone in one line. Sometimes GitPython returns such a preamble mixed with other data
        diff_util = diff_match_patch()
        diff_util.Diff_Timeout = 0
        diff_util.Match_Distance = 5000
        diff_util.Match_Threshold = 0.8
        diff_util.Patch_DeleteThreshold = 0.8
        content = ""
        res_merge = []
        for change in changes:
            digested_patches = []
            p = change.get('patch')
            for line in p.split('\n'):
                m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@", line)
                if m:
                    rest = line.split(m.group())[1]
                    digested_patches.append(m.group())
                    if rest:
                        digested_patches.append(rest.rstrip())
                else:
                    digested_patches.append(line)

            ps = diff_util.patch_fromText("\n".join(digested_patches))
            res = diff_util.patch_apply(ps, content)
            content = res[0]

            res_merge = res_merge + res[1]

        self._logger.info(
            str(len([r for r in res_merge if r])) + " out of " +
            str(len(res_merge)) +
            " patches were successfully used to rebuild the file")

        return content

    def get_file_version_by_name(self,
                                 repo_name,
                                 file_name,
                                 reference_name,
                                 before_date=None):
        """
        get file version for a given file name within a reference and before a given date

        :type repo_name: str
        :param repo_name: the name of the repository to import. It cannot be null

        :type file_name: dict
        :param file_name: the name of the target file

        :type reference_name: str
        :param reference_name: the name of the reference

        :type before_date: str (YYYY-mm-dd)
        :param reversed: if not null, it returns the last version of the file before the given date
        """
        content = ""
        try:
            repo_id = self._git_dao.select_repo_id(repo_name)
            file_id = self._git_dao.select_file_id(repo_id, file_name)
            reference_id = self._git_dao.select_reference_id(
                repo_id, reference_name)

            content = self.get_file_version_by_id(file_id, reference_id,
                                                  before_date)
        except:
            self._logger.error("FileUtil failed", exc_info=True)
        finally:
            if self._git_dao:
                self._git_dao.close_connection()

            return content
示例#28
0
class EclipseTopic2Db(object):
    """
    This class handles the import of Eclipse forum topics
    """

    TOPIC_URL = 'https://www.eclipse.org/forums/index.php/t/'

    def __init__(self, db_name, forum_id, interval, config, log_root_path):
        """
        :type db_name: str
        :param db_name: the name of an existing DB

        :type forum_id: int
        :param forum_id: the id of an existing forum in the DB

        :type interval: list int
        :param interval: a list of topic ids to import

        :type config: dict
        :param config: the DB configuration file

        :type log_root_path: str
        :param log_root_path: the log path
        """

        self._log_root_path = log_root_path
        self._interval = interval
        self._db_name = db_name
        self._forum_id = forum_id
        self._config = config
        self._fileHandler = None
        self._logger = None
        self._querier = None
        self._dao = None

    def __call__(self):
        self._logging_util = LoggingUtil()
        self._date_util = DateUtil()
        log_path = self._log_root_path + "-topic2db-" + str(
            self._interval[0]) + "-" + str(self._interval[-1])
        self._logger = self._logging_util.get_logger(log_path)
        self._fileHandler = self._logging_util.get_file_handler(
            self._logger, log_path, "info")

        try:
            self._querier = EclipseForumQuerier(None, self._logger)
            self._dao = EclipseForumDao(self._config, self._logger)
            self.extract()
        except Exception:
            self._logger.error("EclipseTopic2Db failed", exc_info=True)
        finally:
            if self._dao:
                self._dao.close_connection()

    def _get_message_attachments_info(self, message_id, message):
        #get attachment informatio of messages
        attachments = self._querier.message_get_attachments(message)

        for a in attachments:
            url = self._querier.get_attachment_url(a)
            own_id = self._querier.get_attachment_own_id(a)
            name = self._querier.get_attachment_name(a)
            extension = name.split('.')[-1].strip('').lower()
            size = self._querier.get_attachment_size(a)

            self._dao.insert_message_attachment(url, own_id, name, extension,
                                                size, message_id)

    def _get_message_info(self, topic_id, message, pos):
        #get information of topic messages
        own_id = self._querier.get_message_own_id(message)
        created_at = self._date_util.get_timestamp(
            self._querier.get_created_at(message), "%a, %d %B %Y %H:%M")
        body = self._querier.get_message_body(message)
        author_name = self._querier.get_message_author_name(message)
        message_id = self._dao.insert_message(
            own_id, pos, self._dao.get_message_type_id("reply"), topic_id,
            body, None, self._dao.get_user_id(author_name), created_at)

        if self._querier.message_has_attachments(message):
            self._get_message_attachments_info(message_id, message)

        if pos == 1:
            self._dao.update_topic_created_at(topic_id, created_at,
                                              self._forum_id)

    def extract(self):
        """
        extracts Eclipse forum topic data and stores it in the DB
        """
        self._logger.info("EclipseTopic2Db started")
        start_time = datetime.now()

        for topic_id in self._interval:
            topic_own_id = self._dao.get_topic_own_id(self._forum_id, topic_id)

            self._querier.set_url(EclipseTopic2Db.TOPIC_URL +
                                  str(topic_own_id) + "/")
            self._querier.start_browser()
            time.sleep(3)

            if 'index.php/e/' in self._querier._url:
                self._logger.warning("No URL exists for the topic id " +
                                     str(topic_id) + " - " +
                                     str(self._forum_id))

            next_page = True
            pos = 1

            while next_page:
                messages_on_page = self._querier.get_messages()

                for message in messages_on_page:
                    self._get_message_info(topic_id, message, pos)
                    pos += 1

                next_page = self._querier.go_next_page()

        self._querier.close_browser()
        end_time = datetime.now()
        minutes_and_seconds = self._logging_util.calculate_execution_time(
            end_time, start_time)
        self._logger.info("EclipseTopic2Db finished after " +
                          str(minutes_and_seconds[0]) + " minutes and " +
                          str(round(minutes_and_seconds[1], 1)) + " secs")
        self._logging_util.remove_file_handler_logger(self._logger,
                                                      self._fileHandler)
class StackOverflowQuerier():
    """
    This class collects the data available on Stackoverflow via its API
    """
    def __init__(self, token, logger):
        """
        :type token: str
        :param token: the token to access the Stackoverflow API

        :type logger: Object
        :param logger: logger
        """
        try:
            self._token = token
            self._logger = logger
            self._token_util = TokenUtil(self._logger, "stackoverflow")
            self._date_util = DateUtil()
            self._so = stackexchange.Site(stackexchange.StackOverflow,
                                          app_key=self._token)
            self._so.impose_throttling = True
            self._so.throttle_stop = False
        except:
            self._logger.error("StackOverflowQuerier init failed")
            raise

    def get_topic_ids(self, search_query, before_date):
        """
        gets the data source topic ids

        :type search_query: str
        :param search_query: a label used to mark questions in Stackoverflow

        :type before_date: str
        :param before_date: selects questions with creation date before a given date (YYYY-mm-dd)
        """
        questions = []
        self._token_util.wait_is_usable(self._so)
        for question in self._so.questions(tagged=[search_query],
                                           pagesize=10).fetch():
            questions.append(question)
            self._token_util.wait_is_usable(self._so)

        if before_date:
            questions = [
                q for q in questions
                if q.creation_date <= self._date_util.get_timestamp(
                    before_date, "%Y-%m-%d")
            ]

        return [question.id for question in questions]

    def get_topic(self, question_id):
        """
        gets the topic body

        :type question_id: int
        :param question_id: the data source question id
        """
        try:
            self._token_util.wait_is_usable(self._so)
            question = self._so.question(question_id, body="True")
        except:
            question = None
        return question

    def get_topic_name(self, question):
        """
        gets the topic title

        :type question: Object
        :param question: the Object representing the question
        """
        return question.title

    def get_container_own_id(self, container):
        """
        gets the data source container id

        :type container: Object
        :param container: the Object representing the container
        """
        return container.id

    def get_container_votes(self, container):
        """
        gets the data source container votes

        :type container: Object
        :param container: the Object representing the container
        """
        return container.score

    def get_topic_labels(self, question):
        """
        gets the topic labels

        :type question: Object
        :param question: the Object representing the question
        """
        try:
            labels = question.tags
        except:
            labels = []
        return labels

    def get_topic_views(self, question):
        """
        gets the topic view count

        :type question: Object
        :param question: the Object representing the question
        """
        return question.view_count

    def is_accepted_answer(self, answer):
        """
        checks if the answer is the accepted one

        :type answer: Object
        :param answer: the Object representing the answer
        """
        try:
            found = answer.accepted
        except:
            found = False

        return found

    def get_container_created_at(self, container):
        """
        gets the container creation date

        :type container: Object
        :param container: the Object representing the container
        """
        return container.creation_date

    def get_topic_last_change_at(self, question):
        """
        gets the topic last change date

        :type question: Object
        :param question: the Object representing the question
        """
        return question.last_activity_date

    def get_container_body(self, container):
        """
        gets the container body

        :type container: Object
        :param container: the Object representing the container
        """
        return container.body

    def remove_html_tags(self, html_text):
        """
        removes HTML tags from html text

        :type html_text: str
        :param html_text: the html text of a question/answer/comment
        """
        return BeautifulSoup(html_text).text

    def get_container_author(self, container):
        """
        gets the container author

        :type container: Object
        :param container: the Object representing the container
        """
        self._token_util.wait_is_usable(self._so)
        user = self._so.user(container.owner_id).display_name
        return user

    def get_comments(self, container):
        """
        gets the container comments

        :type container: Object
        :param container: the Object representing the container
        """
        comments = []
        try:
            self._token_util.wait_is_usable(self._so)
            for comment in container.comments.fetch():
                comments.append(comment)
                self._token_util.wait_is_usable(self._so)
        except:
            self._logger.error("Stackexchange error when retrieving comments")

        return comments

    def get_answers(self, question):
        """
        gets the answer of a question

        :type question: Object
        :param question: the Object representing the question
        """
        answers = []
        self._token_util.wait_is_usable(self._so)
        for answer in question.answers:
            answers.append(answer)
            self._token_util.wait_is_usable(self._so)

        return answers

    def get_attachments(self, body):
        """
        extracts the attachments from a text

        :type body: str
        :param body: text of a question/comment/answer
        """
        p = re.compile("<a href=[^ ]*a>")
        matches = p.findall(body)

        attachments = []
        for m in matches:
            attachments.append(m)

        return attachments

    def get_attachment_name(self, html_tag):
        """
        extracts the attachment name

        :type html_tag: str
        :param html_tag: text
        """
        p = re.compile(">.*</a>")
        matches = p.findall(html_tag)

        found = None
        if matches:
            found = matches[0].strip('</a>')[0:]
        else:
            self._logger.info("url name not extracted for: " + html_tag)

        return found

    def get_attachment_url(self, html_tag):
        """
        extracts the attachment url

        :type html_tag: str
        :param html_tag: text
        """
        p = re.compile("\".*\"")
        matches = p.findall(html_tag)

        found = None
        if matches:
            found = matches[0].strip('"')
        else:
            self._logger.info("url not extracted for: " + html_tag)

        return found

    def generate_attachment_id(self, message_id, pos):
        """
        creates id for attachment using the message id and position

        :type message_id: int
        :param message_id: id of the message where the attachment was found

        :type pos: int
        :param pos: position of the message where the attachment was found
        """
        return str(message_id) + str(pos)
示例#30
0
class ActivityReportExporter():
    """
    This class handles the generation of reports
    """

    LOG_FOLDER_PATH = "logs"
    INPUT_PATH = os.path.join(os.path.dirname(resources.__file__), 'queries.json')

    def __init__(self, config, db_name, log_root_path):
        """
        :type config: dict
        :param config: the DB configuration file

        :type db_name: str
        :param config: name of an existing DB

        :type log_root_path: str
        :param log_root_path: the log path
        """
        self._dsl_util = DslUtil()
        self._date_util = DateUtil()
        self._db_util = DbUtil()

        self._logging_util = LoggingUtil()
        self._log_path = log_root_path + "export-report-" + db_name + ".log"
        self._logger = self._logging_util.get_logger(self._log_path)
        self._fileHandler = self._logging_util.get_file_handler(self._logger, self._log_path, "info")

        self._db_name = db_name
        self._config = config
        self._cnx = self._db_util.get_connection(self._config)
        self._db_util.set_database(self._cnx, self._db_name)
        self._db_util.set_settings(self._cnx)

        self._chart_generator = ChartGenerator(self._cnx, self._logger)
        self._html_generator = HtmlGenerator(self._logger)

    def _create_log_folder(self, name):
        #creates the log folder
        if not os.path.exists(name):
            os.makedirs(name)

    def _create_output_file(self, filename):
        #creates the output folder
        if not os.path.exists(os.path.dirname(filename)):
            try:
                os.makedirs(os.path.dirname(filename))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise

    def _load_report_exporter_json(self, json_path):
        #loads the JSON that drives the report export process
        with open(json_path) as json_data:
            data = json.load(json_data)

        return data.get('report')

    def _find_entity_id(self, type, name):
        #finds the id of the tools stored in the DB
        found = None

        if type == "project":
            found = self._db_util.select_project_id(self._cnx, name, self._logger)
        elif type == "repo":
            found = self._db_util.select_repo_id(self._cnx, name, self._logger)
        elif type == "issuetracker":
            found = self._db_util.select_issue_tracker_id(self._cnx, name, self._logger)
        elif type == "forum":
            found = self._db_util.select_forum_id(self._cnx, name, self._logger)
        elif type == "instantmessaging":
            found = self._db_util.select_instant_messaging_id(self._cnx, name, self._logger)

        if not found:
            self._logger.error("ReporExporter: entity " + str(type) + " with name " + str(name) + " not found!")

        return found

    def _get_parameter(self, key, parameters):
        #gets parameters of the JSON
        found = None
        if key in ["AFTERDATE", "INTERVAL"]:
            found = parameters.get(key.lower())
        else:
            if key.endswith("ID"):
                found = parameters.get(key[:-2].lower())
        if not found:
            self._logger.error("ReportExporter: parameter " + str(key) + " not found!")

        return found

    def _load_query_json(self, metric_name, parameters):
        #loads the queries in the JSON configuration file
        with open(ActivityReportExporter.INPUT_PATH) as json_data:
            data = json.load(json_data)

        metrics = data.get('queries')

        try:
            found = [m for m in metrics if m.get('name') == metric_name][0]
            query = found.get('query')

            for k in found.keys():
                if k not in ['name', 'query']:

                    k_value = str(self._get_parameter(k, parameters))

                    query = query.replace(k, k_value)

            return query
        except:
            self._logger.error("ReportExporter: metric " + str(metric_name) + " not found!")

    def _get_activity_name(self, activity):
        #gets the name of the activity
        return activity.replace("_", " ")

    def _get_activity_type(self, activity):
        #gets the type of the activity
        return activity.replace("_activity", "").replace("_", "")

    def _generate_charts(self, activity, activity_data, project_id, time_span):
        #generates charts
        entity2charts = {}
        after_date, interval = self._calculate_time_information(time_span)
        activity_type = self._get_activity_type(activity)
        names = activity_data.get('names')
        measures = activity_data.get('measures')

        for entity_name in names:
            entity_id = self._dsl_util.find_entity_id(self._cnx, activity_type, entity_name, self._logger)
            charts = []
            for measure in measures:
                query = self._load_query_json(measure, {activity_type: entity_id, 'project': project_id, 'afterdate': after_date, 'interval': interval})
                charts.append(self._chart_generator.create(query, interval.lower(), measure, time_span))

            entity2charts.update({entity_name: charts})

        return entity2charts

    def _calculate_time_information(self, time_span):
        #calculates the time span information
        start = None
        interval = None
        current_time = datetime.now() #test datetime.strptime("2015-10-10", "%Y-%m-%d")
        if time_span == "this_week":
            start = self._date_util.get_start_time_span(current_time, "week", "%Y-%m-%d")
            interval = "DAY"
        elif time_span == "this_month":
            start = self._date_util.get_start_time_span(current_time, "month", "%Y-%m-%d")
            interval = "DAY"
        elif time_span == "this_year":
            start = self._date_util.get_start_time_span(current_time, "year", "%Y-%m-%d")
            interval = "MONTH"
        else:
            self._logger.error("ReportExporter: time span " + str(time_span) + " not recognized! Options are: this_week, this_month, this_year")

        return start, interval

    def export(self, file_path, json_path):
        """
        exports the Gitana data to a report

        :type file_path: str
        :param file_path: the path where to export the report

        :type json_path: str
        :param json_path: the path of the JSON that drives the export process
        """
        try:
            self._logger.info("ReportExporter started")
            start_time = datetime.now()

            exporter_data = self._load_report_exporter_json(json_path)

            project_name = exporter_data.get('project')
            project_id = self._dsl_util.find_entity_id(self._cnx, "project", project_name, self._logger)

            time_span = exporter_data.get('time_span')

            activity2charts = {}
            for activity in [attr for attr in exporter_data.keys() if attr.endswith('activity')]:
                activity_name = self._get_activity_name(activity)
                charts = self._generate_charts(activity, exporter_data.get(activity), project_id, time_span)
                activity2charts.update({activity_name: charts})

            html_page = self._html_generator.create(project_name, activity2charts)

            with codecs.open(file_path, 'w', encoding='utf8') as f:
                f.write(html_page)

            self._db_util.close_connection(self._cnx)

            end_time = datetime.now()
            minutes_and_seconds = self._logging_util.calculate_execution_time(end_time, start_time)
            self._logger.info("ReportExporter: process finished after " + str(minutes_and_seconds[0])
                             + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs")
            self._logging_util.remove_file_handler_logger(self._logger, self._fileHandler)
        except:
            self._logger.error("ReportExporter failed", exc_info=True)