Exemplo n.º 1
0
 def __new__(cls, end_date):
     min_datetime = pendulum.from_format(settings.FOUNDATION_DATE,
                                         settings.FILE_DATE_FORMAT)
     max_datetime = pendulum.today()
     try:
         end_date = pendulum.from_format(end_date,
                                         settings.FILE_DATE_FORMAT).date()
     except (ValueError, TypeError):
         raise exceptions.ArgumentTypeError(settings.END_DATE_ERROR)
     else:
         if min_datetime.date() <= end_date <= max_datetime.date():
             return super().__new__(cls, end_date.year, end_date.month,
                                    end_date.day)
         else:
             raise exceptions.ArgumentTypeError(settings.END_DATE_ERROR)
Exemplo n.º 2
0
 def __new__(cls, cpu_count):
     if not cpu_count:
         cpu_count = mp.cpu_count()
         return super().__new__(cls, cpu_count)
     elif cpu_count in list(range(1, mp.cpu_count() + 1)):
         return super().__new__(cls, cpu_count)
     else:
         raise exceptions.ArgumentTypeError(settings.CPU_COUNT_ERROR)
Exemplo n.º 3
0
    def __new__(cls, directory_path):

        if not directory_path:
            directory_path = pathlib.Path.cwd()
            return super().__new__(cls, directory_path)
        try:
            directory_path = pathlib.Path(directory_path)
        except TypeError:
            raise exceptions.ArgumentTypeError(settings.DIRECTORY_PATH_ERROR)
        else:
            return super().__new__(cls, directory_path)
Exemplo n.º 4
0
 def __init__(self, name, region_names):
     regions = sc.download_regions(name)
     if set(region_names).issubset(regions.keys()):
         super().__init__(sorted(set(region_names)))
     else:
         raise exceptions.ArgumentTypeError(settings.REGION_NAME_ERROR)
Exemplo n.º 5
0
 def __new__(cls, periodicity):
     if periodicity in settings.PERIODICITY_CODES:
         return super().__new__(cls, periodicity)
     else:
         raise exceptions.ArgumentTypeError(settings.PERIODICITY_ERROR)
Exemplo n.º 6
0
 def __new__(cls, name):
     if name in settings.NAME_CODES:
         return super().__new__(cls, name)
     else:
         raise exceptions.ArgumentTypeError(settings.NAME_ERROR)
Exemplo n.º 7
0
 def __init__(self,
              name,
              periodicity,
              region_names,
              begin_date,
              end_date,
              cpu_count=None,
              directory_path=None):
     self._name = classes.Name(name)
     self._periodicity = classes.Periodicity(periodicity)
     self._region_names = classes.RegionNames(name, region_names)
     self._begin_date = classes.BeginDate(begin_date)
     self._end_date = classes.EndDate(end_date)
     if self._begin_date > self._end_date:
         raise exceptions.ArgumentTypeError(settings.DATE_RANGE_ERROR)
     self._cpu_count = classes.CpuCount(cpu_count)
     self._directory_path = classes.DirectoryPath(directory_path)
     name_code = settings.NAME_CODES[self.name]
     periodicity_code = settings.PERIODICITY_CODES[self.periodicity]
     all_regions = download_regions(self.name)
     regions = {}
     for region_name in all_regions:
         if region_name in self.region_names:
             regions[region_name] = all_regions[region_name]
     regions_items = sorted(regions.items(), key=lambda region: region[0])
     regions_items = auto.tqdm(regions_items)
     if self.name == 'top200':
         column_names = copy.deepcopy(settings.TOP200_CHART_COLUMN_NAMES)
     else:
         column_names = copy.deepcopy(settings.VIRAL50_CHART_COLUMN_NAMES)
     column_names.extend(['region_name', 'date'])
     begin_date = self.begin_date.format(settings.FILE_DATE_FORMAT)
     end_date = self.end_date.format(settings.FILE_DATE_FORMAT)
     extension = settings.FILE_EXTENSION
     for region_name, region_code in regions_items:
         file_name = f'{self.name}_{self.periodicity}_charts_from_{begin_date}_to_{end_date}.{extension}'
         directory_path = pathlib.Path(self.directory_path).joinpath(
             self.name, self.periodicity, region_name)
         directory_path.mkdir(parents=True, exist_ok=True)
         file_path = directory_path.joinpath(file_name)
         if file_path.exists():
             region_charts = pd.read_csv(file_path,
                                         sep=settings.FILE_DELIMITER,
                                         encoding=settings.FILE_ENCODING)
         else:
             region_charts = pd.DataFrame(columns=column_names)
         file_dates = []
         for file_date in region_charts['date'].unique():
             file_date = pendulum.instance(
                 pd.Timestamp(file_date).to_pydatetime()).date()
             file_dates.append(file_date)
         current_time = pendulum.now().format(
             settings.PROGRESS_BAR_TIME_FORMAT)
         description = f'{current_time} | {region_name}'
         regions_items.set_description(description)
         all_dates = download_dates(self.name, self.periodicity,
                                    region_name)
         urls = []
         dates = []
         for date, date_code in all_dates.items():
             if self.begin_date <= date <= self.end_date and date not in file_dates:
                 url = f'{settings.SPOTIFY_CHARTS_URL}/{name_code}/{region_code}/{periodicity_code}/{date_code}'
                 urls.append(url)
                 dates.append(date)
         logger.info(f'{region_name}:{len(urls)}')
         with mp.Pool(self.cpu_count) as pool:
             downloaded_charts = pool.map(classes.Chart, urls)
         for chart, date in zip(downloaded_charts, dates):
             if not chart.empty:
                 chart['region_name'] = region_name
                 chart['date'] = date
                 chart['date'] = pd.to_datetime(chart['date'])
         if downloaded_charts:
             data = region_charts.append(downloaded_charts, sort=True)
             data.reset_index(drop=True, inplace=True)
             data.sort_values(by=['date', 'track_position'],
                              ascending=[False, True],
                              inplace=True)
             data = data[column_names]
             data.to_csv(file_path,
                         sep=settings.FILE_DELIMITER,
                         encoding=settings.FILE_ENCODING,
                         index=False)
     regions_items.close()