def parse(self, taggers=list()):
        """
        Open the csv file and dump it in a tablib.Dataset object
        """
        self.logger.info("Will parse input %(csv_path)s csv file" % {"csv_path": self.csv_path})

        data = Dataset()

        with open(self.csv_path, "rb") as csv_file:

            google_contact = UnicodeReader(csv_file)

            for row_num, row in enumerate(google_contact):
                if row_num == 0:
                    data.headers = row
                    continue
                gRow = GoogleContactRow(headers=data.headers, row=row)
                gRow.standard_cleanup()
                gRow.format_names()
                tags = []
                for tagger in taggers:
                    tags += getattr(gRow, tagger)()
                tags = list(set(tags))

                # Get the row index
                index = format_index(gRow[data.headers.index("Name")])

                # Empty index
                # drop this row
                if not index:
                    self.logger.info("Ignored row without index (%(row_num)d)" % {"row_num": row_num})
                    continue

                # Duplicate?
                if self.is_duplicate(index):
                    self.logger.info(
                        "Found duplicate row for %(name)s (num: %(row_num)d)" % {"name": index, "row_num": row_num}
                    )
                    # Drop this row
                    if self.drop:
                        self.logger.debug("Dropped duplicate row %(row_num)d" % {"row_num": row_num})
                        continue

                    # Merge this row
                    if self.merge:
                        row_dst = self.hash.index(index)
                        data[row_dst] = merge_lists(gRow, data[row_dst])
                        self.logger.debug(
                            "Merged duplicate row %(row_src)d with %(row_dst)d"
                            % {"row_src": row_num, "row_dst": row_dst}
                        )
                        continue

                self.hash += (index,)

                data.append(gRow, tags=tags)
                self.logger.debug("row %d tags %s", row_num, tags)

        self.data = data
        self.logger.debug("File columns are:\n%s", "\n".join(self.data.headers))
Exemplo n.º 2
0
	def transpose(self):
		"""Transpose a :class:`Dataset`, turning rows into columns and vice
		versa, returning a new ``Dataset`` instance. The first row of the
		original instance becomes the new header row."""

		# Don't transpose if there is no data
		if not self:
			return

		_dset = Dataset()
		# The first element of the headers stays in the headers,
		# it is our "hinge" on which we rotate the data
		new_headers = [self.headers[0]] + self[self.headers[0]]

		_dset.headers = new_headers
		for column in self.headers:

			if column == self.headers[0]:
				# It's in the headers, so skip it
				continue

			# Adding the column name as now they're a regular column
			row_data = [column] + self[column]
			row_data = Row(row_data)
			_dset.append(row=row_data)

		return _dset
def as_tablib_dataset(report, parent_context):
    render_context = _report(report, parent_context)

    dataset = Dataset()
    for element in render_context["elements"].values():
        table = element["table"]
        dataset.append_separator(element["title"])
        for i, row in enumerate(table.as_values()):
            dataset.append(row)

    return dataset
Exemplo n.º 4
0
 def _prepare_table(string):
     dataset = Dataset()
     for i, line in enumerate(string.split('\n')[1:]):
         if '====' in line or not line:
             continue
         row = line.split()
         if i == 1:
             dataset.headers = row
         else:
             dataset.append([numberfy(num) for num in row])
     return dataset
Exemplo n.º 5
0
 def _prepare_cluters(string):
     dataset = Dataset()
     string = string.replace('PULSE HEIGHT',
                             'PULSE-HEIGHT').replace(' +/-', '_+/-')
     for i, line in enumerate(string.split('\n')[3:]):
         if '*****' in line or not line:
             continue
         line = line.split()
         if i == 0:
             dataset.headers = line
         else:
             dataset.append(
                 [numberfy(cell.replace('_', ' ')) for cell in line])
     return dataset
Exemplo n.º 6
0
    def _prepare_table(string):

        dataset = Dataset()
        for i, line in enumerate(string.split('\n')[1:]):
            if '*****' in line or not line:
                continue
            row = line.split()
            if i == 1:
                row[4] += ' ' + row[5]
                dataset.headers = row[:5]
            else:
                dataset.append([numberfy(i) for i in row])

        return dataset
Exemplo n.º 7
0
	def sort(self, col, reverse=False, include_tags=True):
		"""Sort a :class:`Dataset` by a specific column, given string (for
		header) or integer (for column index). The order can be reversed by
		setting ``reverse`` to ``True``.

		Returns a new :class:`Dataset` instance where columns have been
		sorted.
		"""

		if isinstance(col, str) or isinstance(col, unicode):
			if not self.headers:
				raise HeadersNeeded
			else:
				if self.headers:
					col = self.headers[col]

		old_headers = self.headers[:]
		if include_tags:
			if len(self._tags_list) != self.height:
				raise InvalidDimensions
			else:
				_dset = copy(self)
				_dset.rpush_col(self._tags_list, header='tags')
				tag_getter = itemgetter(self.width +1)
				_dset._tags_list = []

				_sorted = sorted(_dset.dict, key=itemgetter(col), reverse=reverse)
				_dataset = Dataset(headers=self.headers)
				for item in _sorted:
					if self.headers:
						tags = tag_getter(item).split(' ')
						changed_row = Row([item[key] for key in old_headers], tags=tags)

					else:
						changed_row = item
					_dataset.append(row=changed_row._row, tags=changed_row.tags, include_tags=True)

		else:
			_dset = copy(self)
			_sorted = sorted(_dset.dict, key=itemgetter(col), reverse=reverse)
			_dataset = Dataset(headers=self.headers)
			for item in _sorted:
				if self.headers:
					row = [item[key] for key in self.headers]
				else:
					row = item
				_dataset.append(row=row)

		return _dataset
Exemplo n.º 8
0
 def _prepare_table(string):
     result = []
     for line in string.split('\n'):
         if '*****' in line or not line:
             continue
         line = line.lstrip().strip('*').split('*')
         row = []
         for cell in line:
             cell = cell.strip()
             row.append(cell)
         result.append(row)
     dataset = Dataset()
     dataset.headers = result[0]
     for row in result[1:]:
         dataset.append([numberfy(i) for i in row])
     return dataset
Exemplo n.º 9
0
 def _prepare_clusters_str(self, data):
     dataset = Dataset()
     stringRep = data.replace('PULSE HEIGHT',
                              'PULSE-HEIGHT').replace(' +/-', '_+/-')
     for i, line in enumerate(stringRep.split('\n')[3:]):
         if '*****' in line or not line or 'GEANT' in line:
             continue
         if 'NO.  PULSE-HEIGHT' not in line and '+/-' not in line:
             continue
         parts = line.split()
         if i == 0:
             dataset.headers = parts
         else:
             if '+/-' in line:
                 dataset.append(
                     [numberfy(cell.replace('_', ' ')) for cell in parts])
     return dataset
Exemplo n.º 10
0
    def _prepare_table(string):
        dataset = Dataset()
        good = None
        for i, line in enumerate(string.split('\n')[1:]):
            if '====' in line or not line:
                continue
            row = line.split()
            if i == 1:
                dataset.headers = row
            else:
                if len(row) == len(dataset.headers):
                    good = line
                    dataset.append([numberfy(num) for num in row])
                else:
                    print(good)
                    print(row)

        return dataset
Exemplo n.º 11
0
            row_raw.append(number_of_decays + 1)
            row_raw.append(momentum)

            y1 = event.calorimeter.clusters.clusters[0].y.value
            z1 = event.calorimeter.clusters.clusters[0].z.value
            y2 = event.calorimeter.clusters.clusters[1].y.value
            z2 = event.calorimeter.clusters.clusters[1].z.value
            ph1 = event.calorimeter.clusters.clusters[0].pulse_height
            ph2 = event.calorimeter.clusters.clusters[1].pulse_height

            row_raw.append(y1)
            row_raw.append(z1)
            row_raw.append(ph1)
            row_raw.append(y2)
            row_raw.append(z2)
            row_raw.append(ph2)

            raw.append(row_raw)
            number_of_decays += 1

        with open(RAW_OUTPUT, 'wb') as f:
            f.write(raw.export('xlsx'))

    if (progress < 98):
        print('Something went wrong. Please run the program again.')
    else:
        print('Execution completed.')
        print(event_id, 'event(s) were simulated.')
        print('There were', number_of_decays, 'interesting events out of',
              event_id, 'events.')
Exemplo n.º 12
0
        dataset.headers = ('P', 'pulseheight', 'x', 'dx', 'y', 'dy', 'z', 'dz',
                           'ywidth', 'zwidth')
        for event in events:
            row = []
            if len(event.clusters.clusters.clusters) is 1:
                row.append(event.energy)
                row.append(event.clusters.clusters.clusters[0].pulse_height)
                row.append(event.clusters.clusters.clusters[0].x.value)
                row.append(event.clusters.clusters.clusters[0].x.error)
                row.append(event.clusters.clusters.clusters[0].y.value)
                row.append(event.clusters.clusters.clusters[0].y.error)
                row.append(event.clusters.clusters.clusters[0].z.value)
                row.append(event.clusters.clusters.clusters[0].z.error)
                row.append(event.clusters.clusters.clusters[0].ywidth)
                row.append(event.clusters.clusters.clusters[0].zwidth)
                dataset.append(row)
        with open(EXEL_OUTPUT, 'wb') as f:
            f.write(dataset.export('xls'))

    elif particle is 'electron' or particle is 'muon':
        events = parse(text)
        dataset = Dataset()
        dataset.headers = ('P', 'tandip', 'Kappa', 'd Kappa',
                           'Calorimeter Pulse Heights')
        for event in events:
            row = []
            row.append(event.energy)
            if len(event.tracks.tracks) is not 0:
                if event.tracks.tracks[0].parameters.tandip is not None:
                    row.append(event.tracks.tracks[0].parameters.tandip)
                else: