def stable_instance_statistics():
    query = "SELECT * FROM stable_unique_classes_all_level;"
    data = execute_query(query)
    commitThresholds = data["level" == 1]["commitThreshold"]
    plot_x_y(commitThresholds, data["unique_class_files_fraction"])
    data = data.groupby("level")
    return data
示例#2
0
def query_raw(table_name: str,  metrics, descriptor: str):
    file_path = f"results/Metrics/{table_name}_{descriptor}_raw.csv"
    Path(path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
    if not path.exists(file_path):
        metrics_query = ', '.join([f"{metric}" for metric in metrics])
        query = f"SELECT {metrics_query} FROM {table_name}"
        dataframe = execute_query(query)
        dataframe.to_csv(file_path, index=False)
        log(f"Got the raw data from {table_name} for these metrics: {metrics}.")
    else:
        dataframe = pd.read_csv(file_path)
    return dataframe
    def get_non_refactored_instances(self, datasets: Iterable[str]):
        """
        Get all non-refactored (stable) instances of the same level of the
        refactoring, e.g. Level 2 for refactoring "Extract Method".

        Parameter:
            dataset (str) (optional): filter the non-refactored
            for this dataset. If no dataset is specified, no filter is applied.
        """
        return execute_query(
            get_level_stable(int(self._level), self._commit_threshold,
                             datasets))
    def get_refactored_instances(self,
                                 datasets: Iterable[str] = [],
                                 projects=[]):
        """
        Get all refactoring instances for this refactoring,
         e.g. for refactoring "Extract Method".

        Parameter:
            dataset (str) (optional): filter the refactoring instances
            for this dataset. If no dataset is specified, no filter is applied.
        """
        return execute_query(
            get_level_refactorings(int(self._level), self._name, datasets))
示例#5
0
def query_avg(table_name: str, function: str, metrics, descriptor: str, group: bool):
    file_path = f"results/Metrics/{table_name}_{function}_{descriptor}.csv"
    Path(path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
    if not path.exists(file_path):
        metrics_query = ', '.join([f"{function}({metric}) AS \"{metric}\"" for metric in metrics])
        if group:
            query = f"SELECT {metrics_query} FROM {table_name} group by level"
        else:
            query = f"SELECT {metrics_query} FROM {table_name}"
        dataframe = execute_query(query)
        dataframe.to_csv(file_path, index=False)
        log(f"Got the data from {table_name} for these metrics: {metrics} for the aggregate function: {function}.")
    else:
        dataframe = pd.read_csv(file_path)
    return dataframe
def retrieve_columns(sql_query, columns, samples=-1):
    # Hash the query
    query_hash = hashlib.sha1(sql_query.encode()).hexdigest()

    # Create the filepath
    cache_dir = path.join(CACHE_DIR_PATH, "_cache")
    file_path = path.join(cache_dir, f"{query_hash}.csv")

    if path.exists(file_path):
        data = pd.read_csv(file_path, usecols=columns)
        if samples < 0 or len(data) < samples:
            return data
        else:
            return data.sample(samples)
    else:
        return execute_query(sql_query)
from configs import DATASETS, Level, VALIDATION_DATASETS
from db.QueryBuilder import get_all_level_stable, get_level_refactorings_count, get_level_refactorings
from db.DBConnector import execute_query
from utils.log import log_init, log_close, log
import time

log_init()
log('Begin cache warm-up')
start_time = time.time()

for dataset in (DATASETS + VALIDATION_DATASETS):
    log("\n**** dataset: " + dataset)
    for level in Level:
        log("-- non refactored instances for " + str(level))
        non_refactored = execute_query(
            get_all_level_stable(int(level), dataset))
        log(
            str(len(non_refactored)) +
            " non-refactored instances were found for level: " + str(level))

        log("-- " + str(level) + " refactoring types with count")
        refactorings = execute_query(
            get_level_refactorings_count(int(level), dataset))
        log(refactorings.to_string())
        for refactoring_name in refactorings['refactoring']:
            refactoring_instances = execute_query(
                get_level_refactorings(int(level), refactoring_name, dataset))

log('Cache warm-up took %s seconds.' % (time.time() - start_time))
log_close()
示例#8
0
 def get_non_refactored_instances(self, dataset):
     return execute_query(get_all_level_stable(int(self._level), dataset))
示例#9
0
 def get_refactored_instances(self, dataset):
     return execute_query(
         get_level_refactorings(int(self._level), self._name, dataset))