def _extract(self): extractor = DataExtractor(self.project) extractor.extract(True) path = extractor.get_bugged_methods_path(self.version, True) df = pd.read_csv(path, sep=';') key = 'method_id' bugged = df.groupby(key).apply(lambda x: dict( zip(["is_method_buggy"], x.is_method_buggy))).to_dict() self.data.set_raw_data(bugged)
class DataExtractorTest(unittest.TestCase): def setUp(self): self.extractor = DataExtractor(raw_data) self.extractor.extract() self.race = self.extractor.get_race() def test_extracts_heat(self): self.assertEquals(60, self.race.heat) def test_extracts_race_date_and_time(self): self.assertEquals(datetime.date(2011,12,23), self.race.date) self.assertEquals(datetime.time(20,36), self.race.time) def test_extract_driver_list(self): drivers = [u'CiglaR', u'CASPER', u'Brzi', u'bR1ck', u'gogoGT', u'Shorty', u'dastrong', u'skrla', u'slavisha', u'VINKO'] self.assertEquals(drivers, self.race.driver_list)
def extract_data(project_ref): index = project_ref[0] project = project_ref[1] general_log = logging.getLogger(__name__) success_log = logging.getLogger("success") failure_log = logging.getLogger("failure") failure_verbose_log = logging.getLogger("failure_verbose") general_log.info(str(index) + ": " + project.github()) try: extractor = DataExtractor(project) extractor.extract() success_log.info("Succeeded to extract {0}.".format(project.github())) except Exception as e: failure_log.error("Failed to extract {0}.".format(project.github())) failure_verbose_log.exception("Failed to extract {0}.".format( project.github())) return e return
def test_extract(self): project = ProjectName.CommonsLang.value extractor = DataExtractor(project) extractor.extract()
from data_extractor import DataExtractor # from model import build_model from preprocess import write_to_file, preprocess_data import sys dataset_folder = sys.argv[1] dataset_file = "dataset.json" normalised_dataset_file = "normalised_data.json" # extract data from review.json and business.json data_extractor = DataExtractor(dataset_folder) data_extractor.extract() data_extractor.write_to_file() # preprocess data and write final datasets to normalised_data.json preprocess_data(dataset_file) # build model # build_model(normalised_dataset_file)