def test_discrete_values_sort(self):
     """ Values in the discrete variable should be naturally sorted """
     dirname = os.path.dirname(__file__)
     path = os.path.join(dirname, "data-csv-types.tab")
     options = owcsvimport.Options(
         encoding="ascii", dialect=csv.excel_tab(),
         columntypes=[
             (range(0, 1), ColumnType.Auto),
             (range(1, 2), ColumnType.Categorical),
             (range(2, 5), ColumnType.Auto)
         ]
     )
     widget = self.create_widget(
         owcsvimport.OWCSVFileImport,
         stored_settings={
             "_session_items": [
                 (path, options.as_dict())
             ],
             "__version__": 2  # guessing works for versions >= 2
         }
     )
     widget.commit()
     self.wait_until_finished(widget)
     output = self.get_output("Data", widget)
     self.assertTupleEqual(('1', '3', '4', '5', '12'), output.domain.attributes[1].values)
示例#2
0
 def test_tsv_with_headers_use_dialect_object(self):
     with open(self.tsv_data_with_headers) as source_iterator:
         csv = CsvDataset(source_iterator,
                          has_headers=True,
                          dialect=csv_import.excel_tab())
         self.assertSequenceEqual(self.expected_file_headers, csv.headers())
         rows = list(itertools.islice(csv.reader(), 50))
         self.assertEqual(9, len(rows))
         self.assertEqual("What?", rows[8][3])  # just a spot check
示例#3
0
def get_test_result(filename, full_graph=False, chain_graph=False):
    columns = [
        'Count of nodes', 'BFS average time', 'BFS delta time', 'BFS memory',
        'DFS average time', 'DFS delta time', 'DFS memory',
        'Dijkstra average time', 'Dijkstra delta time', 'Dijkstra memory',
        'Ford-Bellman average time', 'Ford-Bellman delta time',
        'Ford-Bellman memory', 'A_Star average time', 'A_Star delta time',
        'A_Star memory'
    ]
    with open(filename, 'w', newline='') as file:
        writer = csv.DictWriter(file,
                                dialect=csv.excel_tab(),
                                fieldnames=columns)
        writer.writeheader()
        tester = Tester()
        for i in range(NUMBER_OF_TESTS):
            if not chain_graph:
                start_node = random.randint(0, NODE_COUNT + STEP * i - 1)
                finish_node = random.randint(0, NODE_COUNT + STEP * i - 1)
            else:
                start_node = 0
                finish_node = NODE_COUNT + STEP * i - 1
            base_graph_generator = GraphGenerator(NODE_COUNT + STEP * i, False,
                                                  False, full_graph,
                                                  chain_graph)
            graph = base_graph_generator.generate_graph()
            weighted_ordered_graph_generator = GraphGenerator(
                NODE_COUNT + STEP * i, True, True, full_graph, chain_graph)
            weighted_graph = weighted_ordered_graph_generator.generate_graph()
            negative_weighted_graph_generator = GraphGenerator(
                NODE_COUNT + STEP * i,
                True,
                True,
                full_graph,
                chain_graph,
                negative_weighted=True)
            negative_weighted_graph = (
                negative_weighted_graph_generator.generate_graph())
            planar_graph_generator = GraphGenerator(NODE_COUNT + STEP * i,
                                                    True,
                                                    True,
                                                    chain_graph=chain_graph,
                                                    planar=True)
            planar_graph = planar_graph_generator.generate_graph()
            result_line = tester.get_test_line(NODE_COUNT + STEP * i,
                                               start_node, finish_node, graph,
                                               weighted_graph, planar_graph,
                                               negative_weighted_graph)
            writer.writerow(result_line)
示例#4
0
    def test_import_widget(self):
        w = textimport.CSVImportWidget()
        w.setDialect(csv.excel())
        w.setSampleContents(io.BytesIO(DATA1))
        view = w.dataview
        model = view.model()
        self.assertEqual(model.columnCount(), 4)
        self.assertEqual(model.rowCount(), 1)
        self.assertEqual(model.canFetchMore(), False)
        w.setSampleContents(io.BytesIO(DATA2))
        model = view.model()
        self.assertEqual(model.columnCount(), 0)
        self.assertEqual(model.rowCount(), 0)
        self.assertEqual(model.canFetchMore(), False)
        w.setSampleContents(io.BytesIO(DATA4))
        model = view.model()
        self.assertEqual(model.columnCount(), 4)
        self.assertEqual(model.rowCount(), 3)

        types = {
            0: ColumnTypes.Categorical,
            1: ColumnTypes.Numeric,
            2: ColumnTypes.Text,
            3: ColumnTypes.Time,
        }
        w.setColumnTypes(types)
        self.assertEqual(w.columnTypes(), types)
        rs = w.rowStates()
        self.assertEqual(rs, {})
        w.setStateForRow(0, textimport.TablePreview.Header)
        w.setRowStates({0: textimport.TablePreview.Header})
        self.assertEqual(w.rowStates(), {0: textimport.TablePreview.Header})
        w.setStateForRow(1, textimport.TablePreview.Skipped)
        view.grab()

        w.setSampleContents(io.BytesIO(DATA5))
        model = view.model()
        self.assertEqual(model.columnCount(), 1)
        w.setDialect(csv.excel_tab())
        w.setSampleContents(io.BytesIO(DATA5))
        model = view.model()
        self.assertEqual(model.columnCount(), 2)
        self.assertTrue(model.canFetchMore())
        rows = model.rowCount()
        spy = QSignalSpy(model.rowsInserted)
        model.fetchMore()
        self.assertGreater(model.rowCount(), rows)
        self.assertEqual(len(spy), 1)
    def test_import_widget(self):
        w = textimport.CSVImportWidget()
        w.setDialect(csv.excel())
        w.setSampleContents(io.BytesIO(DATA1))
        view = w.dataview
        model = view.model()
        self.assertEqual(model.columnCount(), 4)
        self.assertEqual(model.rowCount(), 1)
        self.assertEqual(model.canFetchMore(), False)
        w.setSampleContents(io.BytesIO(DATA2))
        model = view.model()
        self.assertEqual(model.columnCount(), 0)
        self.assertEqual(model.rowCount(), 0)
        self.assertEqual(model.canFetchMore(), False)
        w.setSampleContents(io.BytesIO(DATA4))
        model = view.model()
        self.assertEqual(model.columnCount(), 4)
        self.assertEqual(model.rowCount(), 3)

        types = {
            0: ColumnTypes.Categorical,
            1: ColumnTypes.Numeric,
            2: ColumnTypes.Text,
            3: ColumnTypes.Time,
        }
        w.setColumnTypes(types)
        self.assertEqual(w.columnTypes(), types)
        rs = w.rowStates()
        self.assertEqual(rs, {})
        w.setStateForRow(0, textimport.TablePreview.Header)
        w.setRowStates({0: textimport.TablePreview.Header})
        self.assertEqual(w.rowStates(), {0: textimport.TablePreview.Header})
        w.setStateForRow(1, textimport.TablePreview.Skipped)
        view.grab()

        w.setSampleContents(io.BytesIO(DATA5))
        model = view.model()
        self.assertEqual(model.columnCount(), 1)
        w.setDialect(csv.excel_tab())
        w.setSampleContents(io.BytesIO(DATA5))
        model = view.model()
        self.assertEqual(model.columnCount(), 2)
        self.assertTrue(model.canFetchMore())
        rows = model.rowCount()
        spy = QSignalSpy(model.rowsInserted)
        model.fetchMore()
        self.assertGreater(model.rowCount(), rows)
        self.assertEqual(len(spy), 1)
示例#6
0
def csvReader(path):
    with open(path) as csvfile:
        dialect = csv.excel_tab()
        reader = csv.reader(csvfile, dialect=dialect)
        data = list(reader)
        data_split = []
        for value in data:
            value_re = value[0].replace(
                ', ',
                ' ')  # odstranenie ciarky s medzerou pre dalsie parsrovanie
            # print value_re
            value_split = re.split(
                ',|;|\t', value_re
            )  # znaky oddelovaca v csv subore , alebo ; alebo tab (medzera ako odelovac nepodporovany)
            data_split.append(value_split)

    return data_split
示例#7
0
    def _read(self, last_pass=False):
        """
        Generator method that returns next portion of data

        :type last_pass: bool
        """

        def mcs2sec(val):
            return int(val) / 1000000.0

        self.stats_reader.read_file()

        lines = self.file.get_lines(size=1024 * 1024, last_pass=last_pass)

        fields = ("timeStamp", "label", "elapsed",
                  "Connect", "Send", "Latency", "Receive",
                  "internal",
                  "bsent", "brecv",
                  "opretcode", "responseCode")
        dialect = csv.excel_tab()

        rows = csv.DictReader(lines, fields, dialect=dialect)

        for row in rows:
            label = row["label"]

            try:
                rtm = mcs2sec(row["elapsed"])
                ltc = mcs2sec(row["Latency"])
                cnn = mcs2sec(row["Connect"])
                # NOTE: actually we have precise send and receive time here...
            except BaseException:
                raise ToolError("PBench reader: failed record: %s" % row)

            if row["opretcode"] != "0":
                error = strerror(int(row["opretcode"]))
                rcd = error
            else:
                error = None
                rcd = row["responseCode"]

            tstmp = int(float(row["timeStamp"]) + rtm)
            byte_count = int(row["brecv"])
            concur = 0
            yield tstmp, label, concur, rtm, cnn, ltc, rcd, error, '', byte_count
示例#8
0
    def _read(self, last_pass=False):
        """
        Generator method that returns next portion of data

        :type last_pass: bool
        """

        def mcs2sec(val):
            return int(val) / 1000000.0

        self.stats_reader.read_file()

        lines = self.file.get_lines(size=1024 * 1024, last_pass=last_pass)

        fields = ("timeStamp", "label", "elapsed",
                  "Connect", "Send", "Latency", "Receive",
                  "internal",
                  "bsent", "brecv",
                  "opretcode", "responseCode")
        dialect = csv.excel_tab()

        rows = csv.DictReader(lines, fields, dialect=dialect)

        for row in rows:
            label = row["label"]

            try:
                rtm = mcs2sec(row["elapsed"])
                ltc = mcs2sec(row["Latency"])
                cnn = mcs2sec(row["Connect"])
                # NOTE: actually we have precise send and receive time here...
            except BaseException:
                raise ToolError("PBench reader: failed record: %s" % row)

            if row["opretcode"] != "0":
                error = strerror(int(row["opretcode"]))
                rcd = error
            else:
                error = None
                rcd = row["responseCode"]

            tstmp = int(float(row["timeStamp"]) + rtm)
            byte_count = int(row["brecv"])
            concur = 0
            yield tstmp, label, concur, rtm, cnn, ltc, rcd, error, '', byte_count
示例#9
0
def orthologs(groups, csv_source):
    """
    creates gene:(geneIDs) for each group in gene:(groups)
    """
    orthologs = dict()
    parsed = dict()
    for gene in groups:
        csvfile = open(csv_source)
        print("Searching gene " + str(list(groups).index(gene)) + "/" + str(len(groups)))
        csv_reader = csv.DictReader(csvfile, dialect=csv.excel_tab(), fieldnames=['OG', 'geneID'])
        for row in csv_reader:
            if row["OG"] in groups[gene]:
                if gene not in orthologs:  # forgot to initialize
                    orthologs[gene] = list()
                orthologs[gene].append(row["geneID"])
                orthologs[gene] = list(set(orthologs[gene]))
        csvfile.close()
    return orthologs
示例#10
0
def ogdetails(gene_ids, csv_source):
    """
    creates gene:(ncbi_geneIDs) for each geneID in gene:(geneIDs)
    """
    with open(csv_source) as csvfile:
        convert = dict()
        csv_reader = csv.DictReader(csvfile, dialect=csv.excel_tab(), fieldnames=['ortho_id', 'value', 'type'])
        for row in csv_reader:
            if(row['type'] == "NCBIgid"):
                convert[row["ortho_id"]] = row["value"]
        ncbi_gene_ids = dict()
        for gene in gene_ids:
            if gene not in ncbi_gene_ids:  # forgot to initialize
                ncbi_gene_ids[gene] = list()
            for ortholog in gene_ids[gene]:
                if ortholog in convert:
                    ncbi_gene_ids[gene].append(convert[ortholog])
                    print("Parsed gene ID " + ortholog)
    return ncbi_gene_ids
示例#11
0
    def __open_fds(self):
        """
        Opens JTL file for reading
        """
        if not os.path.isfile(self.filename):
            self.log.debug("File not appeared yet: %s", self.filename)
            return False

        fsize = os.path.getsize(self.filename)
        if not fsize:
            self.log.debug("File is empty: %s", self.filename)
            return False

        self.log.debug("Opening file: %s", self.filename)
        self.fds = open(self.filename)
        fields = ("timeStamp", "label", "elapsed", "Connect", "Send",
                  "Latency", "Receive", "internal", "bsent", "brecv",
                  "opretcode", "responseCode")
        dialect = csv.excel_tab()
        self.csvreader = csv.DictReader(self.fds, fields, dialect=dialect)
        return True
示例#12
0
文件: pbench.py 项目: dutchb/taurus
    def __open_fds(self):
        """
        Opens JTL file for reading
        """
        if not os.path.isfile(self.filename):
            self.log.debug("File not appeared yet: %s", self.filename)
            return False

        fsize = os.path.getsize(self.filename)
        if not fsize:
            self.log.debug("File is empty: %s", self.filename)
            return False

        self.log.debug("Opening file: %s", self.filename)
        self.fds = open(self.filename)
        fields = ("timeStamp", "label", "elapsed",
                  "Connect", "Send", "Latency", "Receive",
                  "internal",
                  "bsent", "brecv",
                  "opretcode", "responseCode")
        dialect = csv.excel_tab()
        self.csvreader = csv.DictReader(self.fds, fields, dialect=dialect)
        return True
 def _get_writer(self, configuration):
     # Custom dialect needed as a workaround for
     # http://ironpython.codeplex.com/workitem/33627
     dialect = csv.excel_tab()
     dialect.lineterminator = configuration.line_separator
     return csv.writer(configuration.output, dialect=dialect)
 - check log of check_id_map.py for relevant errors:
 >> grep -v "Removed bad chars" checkmap/NewMapping.log
 
 - take subsets of .fna and .qual as necessary to render them isomorphic
 - run split_libs:
 >> macqiime split_libraries.py -e 0 -m checkmap/NewMapping_corrected.txt -f MySeqs.fna -q MyQual.qual -o splib-out -j run_prefix -b 8
 
""" % sys.argv[0]

if __name__ == "__main__":
    #import pdb;pdb.set_trace()
    sargs = dict(zip(sys.argv[1::2],sys.argv[2::2]))

    mapping = open(sargs["-m"])
    newmapping = open(sargs["-o"],"w")

    dialect = csv.excel_tab()

    mapping_csv = csv.reader(mapping, dialect)
    newmapping_csv = csv.writer(newmapping, dialect)#todo check delims, linebrs

    heads = mapping_csv.next()
    newmapping_csv.writerow(heads[:-1]+["run_prefix"]+heads[-1:])

    for row in mapping_csv:
        newmapping_csv.writerow(row[:-1]+[row[0]]+row[-1:])

    map(file.close,[mapping,newmapping])
    
 
示例#15
0
    def browse(self):
        """
        Open a file dialog and select a user specified file.
        """
        formats = [
            "Text - comma separated (*.csv, *)",
            "Text - tab separated (*.tsv, *)",
            "Text - all files (*)"
        ]

        dlg = QFileDialog(
            self, windowTitle="Open Data File",
            acceptMode=QFileDialog.AcceptOpen,
            fileMode=QFileDialog.ExistingFile
        )
        dlg.setNameFilters(formats)
        state = self.dialog_state
        lastdir = state.get("directory", "")
        lastfilter = state.get("filter", "")

        if lastdir and os.path.isdir(lastdir):
            dlg.setDirectory(lastdir)
        if lastfilter:
            dlg.selectNameFilter(lastfilter)

        status = dlg.exec_()
        dlg.deleteLater()
        if status == QFileDialog.Accepted:
            self.dialog_state["directory"] = dlg.directory().absolutePath()
            self.dialog_state["filter"] = dlg.selectedNameFilter()

            selected_filter = dlg.selectedNameFilter()
            path = dlg.selectedFiles()[0]
            # pre-flight check; try to determine the nature of the file
            mtype = _mime_type_for_path(path)
            if not mtype.inherits("text/plain"):
                mb = QMessageBox(
                    parent=self,
                    windowTitle="",
                    icon=QMessageBox.Question,
                    text="The '{basename}' may be a binary file.\n"
                         "Are you sure you want to continue?".format(
                             basename=os.path.basename(path)),
                    standardButtons=QMessageBox.Cancel | QMessageBox.Yes
                )
                mb.setWindowModality(Qt.WindowModal)
                if mb.exec() == QMessageBox.Cancel:
                    return

            # initialize dialect based on selected extension
            if selected_filter in formats[:-1]:
                filter_idx = formats.index(selected_filter)
                if filter_idx == 0:
                    dialect = csv.excel()
                elif filter_idx == 1:
                    dialect = csv.excel_tab()
                else:
                    dialect = csv.excel_tab()
                header = True
            else:
                try:
                    dialect, header = sniff_csv_with_path(path)
                except Exception:  # pylint: disable=broad-except
                    dialect, header = csv.excel(), True

            options = None
            # Search for path in history.
            # If found use the stored params to initialize the import dialog
            items = self.itemsFromSettings()
            idx = index_where(items, lambda t: samepath(t[0], path))
            if idx is not None:
                _, options_ = items[idx]
                if options_ is not None:
                    options = options_

            if options is None:
                if not header:
                    rowspec = []
                else:
                    rowspec = [(range(0, 1), RowSpec.Header)]
                options = Options(
                    encoding="utf-8", dialect=dialect, rowspec=rowspec)

            dlg = CSVImportDialog(
                self, windowTitle="Import Options", sizeGripEnabled=True)
            dlg.setWindowModality(Qt.WindowModal)
            dlg.setPath(path)
            dlg.setOptions(options)
            status = dlg.exec_()
            dlg.deleteLater()
            if status == QDialog.Accepted:
                self.set_selected_file(path, dlg.options())
    def browse(self):
        """
        Open a file dialog and select a user specified file.
        """
        formats = [
            "Text - comma separated (*.csv, *)",
            "Text - tab separated (*.tsv, *)",
            "Text - all files (*)"
        ]

        dlg = QFileDialog(
            self, windowTitle="Open Data File",
            acceptMode=QFileDialog.AcceptOpen,
            fileMode=QFileDialog.ExistingFile
        )
        dlg.setNameFilters(formats)
        state = self.dialog_state
        lastdir = state.get("directory", "")
        lastfilter = state.get("filter", "")

        if lastdir and os.path.isdir(lastdir):
            dlg.setDirectory(lastdir)
        if lastfilter:
            dlg.selectNameFilter(lastfilter)

        status = dlg.exec_()
        dlg.deleteLater()
        if status == QFileDialog.Accepted:
            self.dialog_state["directory"] = dlg.directory().absolutePath()
            self.dialog_state["filter"] = dlg.selectedNameFilter()

            selected_filter = dlg.selectedNameFilter()
            path = dlg.selectedFiles()[0]
            # pre-flight check; try to determine the nature of the file
            mtype = _mime_type_for_path(path)
            if not mtype.inherits("text/plain"):
                mb = QMessageBox(
                    parent=self,
                    windowTitle="",
                    icon=QMessageBox.Question,
                    text="The '{basename}' may be a binary file.\n"
                         "Are you sure you want to continue?".format(
                            basename=os.path.basename(path)),
                    standardButtons=QMessageBox.Cancel | QMessageBox.Yes
                )
                mb.setWindowModality(Qt.WindowModal)
                if mb.exec() == QMessageBox.Cancel:
                    return

            # initialize dialect based on selected extension
            if selected_filter in formats[:-1]:
                filter_idx = formats.index(selected_filter)
                if filter_idx == 0:
                    dialect = csv.excel()
                elif filter_idx == 1:
                    dialect = csv.excel_tab()
                else:
                    dialect = csv.excel_tab()
                header = True
            else:
                try:
                    dialect, header = sniff_csv_with_path(path)
                except Exception:
                    dialect, header = csv.excel(), True

            options = None
            # Search for path in history.
            # If found use the stored params to initialize the import dialog
            items = self.itemsFromSettings()
            idx = index_where(items, lambda t: samepath(t[0], path))
            if idx is not None:
                _, options_ = items[idx]
                if options_ is not None:
                    options = options_

            if options is None:
                if not header:
                    rowspec = []
                else:
                    rowspec = [(range(0, 1), RowSpec.Header)]
                options = Options(
                    encoding="utf-8", dialect=dialect, rowspec=rowspec)

            dlg = CSVImportDialog(
                self, windowTitle="Import Options",  sizeGripEnabled=True)
            dlg.setWindowModality(Qt.WindowModal)
            dlg.setPath(path)
            dlg.setOptions(options)
            status = dlg.exec_()
            dlg.deleteLater()
            if status == QDialog.Accepted:
                self.set_selected_file(path, dlg.options())
示例#17
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import csv
import sqlite3
import sys
import time
from datetime import datetime

DIRECTION_OUT = 0
DIRECTION_IN = 1

dialect = csv.excel_tab()


def dumpChatID(db, chat_id, writer):
    results = db.execute(
        '''SELECT e.Timestamp, Direction, Name, Number, Body 
							FROM Events e
							INNER JOIN Messages m ON m.EventID = e.EventID
							INNER JOIN Contact c ON c.ContactID = e.ContactID
							WHERE e.ChatID = ?
							ORDER BY SortOrder''', [chat_id])
    for row in results:
        (timestamp, direction, name, number, body) = row
        timestamp /= 1000
        dt = datetime.fromtimestamp(timestamp)
        msgdate = dt.strftime("%m/%d/%Y")
示例#18
0
class TestOWCSVFileImport(WidgetTest):
    def setUp(self):
        self._stack = ExitStack().__enter__()
        # patch `_local_settings` to avoid side effects, across tests
        fname = self._stack.enter_context(named_file(""))
        s = QSettings(fname, QSettings.IniFormat)
        self._stack.enter_context(
            mock.patch.object(owcsvimport.OWCSVFileImport, "_local_settings",
                              lambda *a: s))
        self.widget = self.create_widget(owcsvimport.OWCSVFileImport)

    def tearDown(self):
        self.widgets.remove(self.widget)
        self.widget.onDeleteWidget()
        self.widget = None
        self._stack.close()

    def test_basic(self):
        w = self.widget
        w.activate_recent(0)
        w.cancel()

    data_regions_options = owcsvimport.Options(
        encoding="ascii",
        dialect=csv.excel_tab(),
        columntypes=[
            (range(0, 1), ColumnType.Categorical),
            (range(1, 2), ColumnType.Text),
            (range(2, 3), ColumnType.Categorical),
        ],
        rowspec=[
            (range(0, 1), RowSpec.Header),
            (range(1, 3), RowSpec.Skipped),
        ],
    )

    def _check_data_regions(self, table):
        self.assertEqual(len(table), 3)
        self.assertEqual(len(table), 3)
        self.assertTrue(table.domain["id"].is_discrete)
        self.assertTrue(table.domain["continent"].is_discrete)
        self.assertTrue(table.domain["state"].is_string)
        assert_array_equal(table.X, [[0, 1], [1, 1], [2, 0]])
        assert_array_equal(table.metas,
                           np.array([["UK"], ["Russia"], ["Mexico"]], object))

    def test_restore(self):
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")

        w = self.create_widget(owcsvimport.OWCSVFileImport,
                               stored_settings={
                                   "_session_items":
                                   [(path, self.data_regions_options.as_dict())
                                    ]
                               })
        item = w.current_item()
        self.assertEqual(item.path(), path)
        self.assertEqual(item.options(), self.data_regions_options)
        out = self.get_output("Data", w)
        self._check_data_regions(out)
        self.assertEqual(out.name, "data-regions")

    def test_restore_from_local(self):
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")
        s = owcsvimport.OWCSVFileImport._local_settings()
        s.clear()
        QSettings_writeArray(
            s, "recent",
            [{
                "path": path,
                "options": json.dumps(self.data_regions_options.as_dict())
            }])
        w = self.create_widget(owcsvimport.OWCSVFileImport, )
        item = w.current_item()
        self.assertEqual(item.path(), path)
        self.assertEqual(item.options(), self.data_regions_options)
        self.assertEqual(
            w._session_items,
            [(path, self.data_regions_options.as_dict())],
            "local settings item must be recorded in _session_items when "
            "activated in __init__",
        )
        self._check_data_regions(self.get_output("Data", w))

    def test_summary(self):
        """Check if status bar is updated when data is received"""
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")
        widget = self.create_widget(owcsvimport.OWCSVFileImport,
                                    stored_settings={
                                        "_session_items":
                                        [(path,
                                          self.data_regions_options.as_dict())]
                                    })
        output_sum = widget.info.set_output_summary = mock.Mock()
        widget.commit()
        self.wait_until_finished(widget)
        output = self.get_output("Data", widget)
        output_sum.assert_called_with(len(output),
                                      format_summary_details(output))
示例#19
0
class TestOWCSVFileImport(WidgetTest):
    def create_widget(self,
                      cls: Type[W],
                      stored_settings: Optional[dict] = None,
                      reset_default_settings=True,
                      **kwargs) -> W:
        if reset_default_settings:
            self.reset_default_settings(cls)
        widget = cls.__new__(cls,
                             signal_manager=self.signal_manager,
                             stored_settings=stored_settings,
                             **kwargs)
        widget.__init__()

        def delete():
            widget.onDeleteWidget()
            widget.close()
            widget.deleteLater()

        self._stack.callback(delete)
        return widget

    def setUp(self):
        super().setUp()
        self._stack = ExitStack().__enter__()
        # patch `_local_settings` to avoid side effects, across tests
        fname = self._stack.enter_context(named_file(""))
        s = QSettings(fname, QSettings.IniFormat)
        self._stack.enter_context(
            mock.patch.object(owcsvimport.OWCSVFileImport, "_local_settings",
                              lambda *a: s))
        self.widget = self.create_widget(owcsvimport.OWCSVFileImport)

    def tearDown(self):
        del self.widget
        self._stack.close()
        super().tearDown()

    def test_basic(self):
        w = self.widget
        w.activate_recent(0)
        w.cancel()

    data_regions_options = owcsvimport.Options(
        encoding="ascii",
        dialect=csv.excel_tab(),
        columntypes=[
            (range(0, 1), ColumnType.Categorical),
            (range(1, 2), ColumnType.Text),
            (range(2, 3), ColumnType.Categorical),
        ],
        rowspec=[
            (range(0, 1), RowSpec.Header),
            (range(1, 3), RowSpec.Skipped),
        ],
    )
    data_regions_path = os.path.join(os.path.dirname(__file__),
                                     "data-regions.tab")

    def _check_data_regions(self, table):
        self.assertEqual(len(table), 3)
        self.assertEqual(len(table), 3)
        self.assertTrue(table.domain["id"].is_discrete)
        self.assertTrue(table.domain["continent"].is_discrete)
        self.assertTrue(table.domain["state"].is_string)
        assert_array_equal(table.X, [[0, 1], [1, 1], [2, 0]])
        assert_array_equal(table.metas,
                           np.array([["UK"], ["Russia"], ["Mexico"]], object))

    def test_restore(self):
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")

        w = self.create_widget(owcsvimport.OWCSVFileImport,
                               stored_settings={
                                   "_session_items":
                                   [(path, self.data_regions_options.as_dict())
                                    ]
                               })
        item = w.current_item()
        self.assertTrue(samepath(item.path(), path))
        self.assertEqual(item.options(), self.data_regions_options)
        out = self.get_output("Data", w)
        self._check_data_regions(out)
        self.assertEqual(out.name, "data-regions")

    def test_restore_from_local(self):
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")
        s = owcsvimport.OWCSVFileImport._local_settings()
        s.clear()
        QSettings_writeArray(
            s, "recent",
            [{
                "path": path,
                "options": json.dumps(self.data_regions_options.as_dict())
            }])
        w = self.create_widget(owcsvimport.OWCSVFileImport, )
        item = w.current_item()
        self.assertIsNone(item)
        simulate.combobox_activate_index(w.recent_combo, 0)
        item = w.current_item()
        self.assertTrue(samepath(item.path(), path))
        self.assertEqual(item.options(), self.data_regions_options)
        data = w.settingsHandler.pack_data(w)
        self.assertEqual(
            data['_session_items_v2'],
            [(PathItem.AbsPath(path).as_dict(),
              self.data_regions_options.as_dict())],
            "local settings item must be recorded in _session_items_v2 when "
            "activated",
        )
        self._check_data_regions(self.get_output("Data", w))

    data_csv_types_options = owcsvimport.Options(encoding="ascii",
                                                 dialect=csv.excel_tab(),
                                                 columntypes=[
                                                     (range(0, 5),
                                                      ColumnType.Auto),
                                                 ])

    def test_type_guessing(self):
        """ Check if correct column type is guessed when column type auto """
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-csv-types.tab")
        widget = self.create_widget(
            owcsvimport.OWCSVFileImport,
            stored_settings={
                "_session_items":
                [(path, self.data_csv_types_options.as_dict())],
                "__version__": 2  # guessing works for versions >= 2
            })
        widget.commit()
        self.wait_until_finished(widget)
        output = self.get_output("Data", widget)
        domain = output.domain

        self.assertIsInstance(domain["time"], TimeVariable)
        self.assertIsInstance(domain["discrete1"], DiscreteVariable)
        self.assertIsInstance(domain["discrete2"], DiscreteVariable)
        self.assertIsInstance(domain["numeric1"], ContinuousVariable)
        self.assertIsInstance(domain["numeric2"], ContinuousVariable)
        self.assertIsInstance(domain["string"], StringVariable)

    def test_discrete_values_sort(self):
        """ Values in the discrete variable should be naturally sorted """
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-csv-types.tab")
        options = owcsvimport.Options(encoding="ascii",
                                      dialect=csv.excel_tab(),
                                      columntypes=[
                                          (range(0, 1), ColumnType.Auto),
                                          (range(1,
                                                 2), ColumnType.Categorical),
                                          (range(2, 5), ColumnType.Auto)
                                      ])
        widget = self.create_widget(
            owcsvimport.OWCSVFileImport,
            stored_settings={
                "_session_items": [(path, options.as_dict())],
                "__version__": 2  # guessing works for versions >= 2
            })
        widget.commit()
        self.wait_until_finished(widget)
        output = self.get_output("Data", widget)
        self.assertTupleEqual(('1', '3', '4', '5', '12'),
                              output.domain.attributes[1].values)

    def test_backward_compatibility(self):
        """
        Check that widget have old behaviour on workflows with version < 2
        """
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-csv-types.tab")
        widget = self.create_widget(
            owcsvimport.OWCSVFileImport,
            stored_settings={
                "_session_items":
                [(path, self.data_csv_types_options.as_dict())],
                "__version__": 1  # guessing works for versions >= 2
            })
        widget.commit()
        self.wait_until_finished(widget)
        output = self.get_output("Data", widget)
        domain = output.domain

        self.assertIsInstance(domain["time"], StringVariable)
        self.assertIsInstance(domain["discrete1"], ContinuousVariable)
        self.assertIsInstance(domain["discrete2"], StringVariable)
        self.assertIsInstance(domain["numeric1"], ContinuousVariable)
        self.assertIsInstance(domain["numeric2"], ContinuousVariable)
        self.assertIsInstance(domain["string"], StringVariable)

    @staticmethod
    @contextmanager
    def _browse_setup(widget: OWCSVFileImport, path: str):
        browse_dialog = widget._browse_dialog
        with mock.patch.object(widget, "_browse_dialog") as r:
            dlg = browse_dialog()
            dlg.setOption(QFileDialog.DontUseNativeDialog)
            dlg.selectFile(path)
            dlg.exec = lambda: QFileDialog.Accepted
            r.return_value = dlg
            with mock.patch.object(owcsvimport.CSVImportDialog, "exec",
                                   lambda _: QFileDialog.Accepted):
                yield

    def test_browse(self):
        widget = self.widget
        path = self.data_regions_path
        with self._browse_setup(widget, path):
            widget.browse()
        cur = widget.current_item()
        self.assertIsNotNone(cur)
        self.assertTrue(samepath(cur.path(), path))

    def test_browse_prefix(self):
        widget = self.widget
        path = self.data_regions_path
        with self._browse_setup(widget, path):
            basedir = os.path.dirname(__file__)
            widget.workflowEnv = lambda: {"basedir": basedir}
            widget.workflowEnvChanged("basedir", basedir, "")
            widget.browse_relative(prefixname="basedir")

        cur = widget.current_item()
        self.assertIsNotNone(cur)
        self.assertTrue(samepath(cur.path(), path))
        self.assertIsInstance(cur.varPath(), PathItem.VarPath)

    def test_browse_prefix_parent(self):
        widget = self.widget
        path = self.data_regions_path

        with self._browse_setup(widget, path):
            basedir = os.path.join(os.path.dirname(__file__), "bs")
            widget.workflowEnv = lambda: {"basedir": basedir}
            widget.workflowEnvChanged("basedir", basedir, "")
            mb = widget._path_must_be_relative_mb = mock.Mock()
            widget.browse_relative(prefixname="basedir")
            mb.assert_called()
        self.assertIsNone(widget.current_item())

    def test_browse_for_missing(self):
        missing = os.path.dirname(__file__) + "/this file does not exist.csv"
        widget = self.create_widget(owcsvimport.OWCSVFileImport,
                                    stored_settings={
                                        "_session_items":
                                        [(missing,
                                          self.data_regions_options.as_dict())]
                                    })
        widget.activate_recent(0)
        dlg = widget.findChild(QFileDialog)
        assert dlg is not None
        # calling selectFile when using native (macOS) dialog does not have
        # an effect - at least not immediately;
        dlg.setOption(QFileDialog.DontUseNativeDialog)
        dlg.selectFile(self.data_regions_path)
        dlg.accept()
        cur = widget.current_item()
        self.assertTrue(samepath(self.data_regions_path, cur.path()))
        self.assertEqual(self.data_regions_options.as_dict(),
                         cur.options().as_dict())

    def test_browse_for_missing_prefixed(self):
        path = self.data_regions_path
        basedir = os.path.dirname(path)
        widget = self.create_widget(
            owcsvimport.OWCSVFileImport,
            stored_settings={
                "__version__":
                3,
                "_session_items_v2":
                [(PathItem.VarPath("basedir",
                                   "this file does not exist.csv").as_dict(),
                  self.data_regions_options.as_dict())]
            },
            env={"basedir": basedir})
        widget.activate_recent(0)
        dlg = widget.findChild(QFileDialog)
        assert dlg is not None
        # calling selectFile when using native (macOS) dialog does not have
        # an effect - at least not immediately;
        dlg.setOption(QFileDialog.DontUseNativeDialog)
        dlg.selectFile(path)
        dlg.accept()
        cur = widget.current_item()
        self.assertTrue(samepath(path, cur.path()))
        self.assertEqual(cur.varPath(),
                         PathItem.VarPath("basedir", "data-regions.tab"))
        self.assertEqual(self.data_regions_options.as_dict(),
                         cur.options().as_dict())

    def test_browse_for_missing_prefixed_parent(self):
        path = self.data_regions_path
        basedir = os.path.join(os.path.dirname(path), "origin1")
        item = (PathItem.VarPath("basedir", "this file does not exist.csv"),
                self.data_regions_options)
        widget = self.create_widget(owcsvimport.OWCSVFileImport,
                                    stored_settings={
                                        "__version__":
                                        3,
                                        "_session_items_v2":
                                        [(item[0].as_dict(), item[1].as_dict())
                                         ]
                                    },
                                    env={"basedir": basedir})
        mb = widget._path_must_be_relative_mb = mock.Mock()
        widget.activate_recent(0)
        dlg = widget.findChild(QFileDialog)
        assert dlg is not None
        # calling selectFile when using native (macOS) dialog does not have
        # an effect - at least not immediately;
        dlg.setOption(QFileDialog.DontUseNativeDialog)
        dlg.selectFile(path)
        dlg.accept()
        mb.assert_called()
        cur = widget.current_item()
        self.assertEqual(item[0], cur.varPath())
        self.assertEqual(item[1].as_dict(), cur.options().as_dict())
示例#20
0
	def _read(self):
		file_names = []
		try:
			fdir = os.listdir(self._path)
		except OSError as e:
			print 'Invalid path: {0}'.format(e)
			self._log.warn('Bad file path')
			raise
		else:	#get files of type in _path matching _ext
				for file in fdir:
					if file.endswith(self._ext):	#check extension on file
						file_names.append(file)
						
		roster = {}
		#parse each file in _path
		for fname in file_names:
			fpath = os.path.join(self._path, fname)
			self.strip_file(fpath)	#remove all null chars from file
			print 'Opening: {0}'.format(fpath)
			try:
				#open as rbU (read, binary, Universal-newline)
				with open(fpath, 'rbU') as fhandle:
					file = csv.DictReader(fhandle, dialect=csv.excel_tab())
					
					_limit = None
					
					#process each line of data
					for line in file:
						#ensure that there is an ID for the line
						if self._id in line.iterkeys():
							try:
								marine = roster[line[self._id]]
							except KeyError: #marine doesn't exist in _roster, create it
								roster[line[self._id]] = Marine.Marine(line[self._id])
								marine = roster[line[self._id]]
							
							#deal with training codes differently than other fields
							if 'Training Event Code' in line.iterkeys():
								code = line[self._tr['TC']]
								date = self.format_date(line[self._tr['TD']])
								desc = line[self._tr['TE']]
								marine.train(code, date, desc)
							#add any other fields to the marine
							for field, value in line.iteritems():
								if 'Training Event' in field:
									continue	#Exclude training fields
								if field == self._id:
									continue	#eipid in a separate attribute
								if field in self._dt_colms and value != '':
									try:
										value = self.format_date(value)
									except ValueError as e:
										print 'Error converting date: {0}'.format(e)
								if value == '':	#skip any empty values
									continue
								
								#convert field (colm) to uppercase and strip any spaces
								field = field.replace(' ', '').upper()
								marine.add_field(field, value)
						else:
							#raise an exception and skip this file, there is no eipid field
							#print 'Bad input file, no EIPID'
							raise ValueError('Invalid Source File: {0} (skipping)'.format(fpath))
							break
						
						if _limit is None:
							pass
						else:
							_limit += 1
							if _limit >= 1:
								break
					
			except TypeError:
				self._log.exception('Type Error')
			except ValueError as e:
				self._log.warn(e)
			except:
				self._log.exception('Unhandled Exception')
				
		#return roster after processing all files in directory matching criteria
		return roster
示例#21
0
 def _get_writer(self, configuration):
     # Custom dialect needed as a workaround for
     # http://ironpython.codeplex.com/workitem/33627
     dialect = csv.excel_tab()
     dialect.lineterminator = configuration.line_separator if PY2 else '\n'
     return csv.writer(configuration.output, dialect=dialect)
示例#22
0
文件: csv.py 项目: djrobstep/results
def from_tsv(f, *args, dialect=None, **kwargs):
    dialect = dialect or csv.excel_tab()
    return from_csv(f, *args, dialect=dialect, **kwargs)