-
Notifications
You must be signed in to change notification settings - Fork 2
/
package_input.py
703 lines (584 loc) · 27.2 KB
/
package_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
# coding: utf-8
import json
import os
import re
import tempfile
from re import sub
import requests
import babel.numbers
import dateutil.parser
import shortuuid
import csv
from enum import Enum
from sqlalchemy.sql import text
from psycopg2 import sql
from kids.cache import cache
import package
from app import db, logger
from app import get_db_cursor
from app import reset_cache
from consortium import Consortium
from app import s3_client
from excel import convert_spreadsheet_to_csv
from package_file_error_rows import PackageFileErrorRow
from raw_file_upload_object import RawFileUploadObject
from util import safe_commit
class PackageInput:
@staticmethod
def normalize_date(date_str, warn_if_blank=False, default=None):
if date_str:
try:
parsed_date = dateutil.parser.parse(date_str, default=default)
if str(parsed_date.year) not in date_str:
return ParseWarning.ambiguous_date
return parsed_date.isoformat()
except Exception:
return ParseWarning.bad_date
else:
return ParseWarning.bad_date if warn_if_blank else None
@staticmethod
def normalize_year(year, warn_if_blank=False):
if year:
try:
return dateutil.parser.parse(year).year
except Exception:
return ParseWarning.bad_year
else:
return ParseWarning.bad_year if warn_if_blank else None
@staticmethod
def normalize_int(value, warn_if_blank=False):
if value:
try:
return int(value)
except Exception:
return ParseWarning.bad_int
else:
return ParseWarning.no_int if warn_if_blank else None
@staticmethod
def normalize_price(price, warn_if_blank=False):
if price:
try:
if "." in price and "," in price and price.find(".") < price.find(","):
locale = "de"
elif price.count(",") == 1 and (re.search(r"\d{4,},", price) or not re.search(r",\d{3}[$.]", price)):
locale = "de"
else:
locale = "en"
price = sub(r"[^\d.,]", "", price)
parsed_price = babel.numbers.parse_decimal(price, locale=locale)
return int(round(parsed_price))
except babel.numbers.NumberFormatError:
return ParseWarning.bad_usd_price
else:
return ParseWarning.no_usd_price if warn_if_blank else None
@staticmethod
def normalize_issn(issn, warn_if_blank=False):
from openalex import oa_issns
if issn:
issn = issn.replace("issn:", "")
issn = sub(r"\s", "", issn).upper()
if re.match(r"^\d{4}-?\d{3}(?:X|\d)$", issn):
issn = issn.replace("-", "")
issn = issn[0:4] + "-" + issn[4:8]
if issn not in oa_issns:
print(f"Missing journal in normalize_issn {issn} from OpenAlex: https://api.openalex.org/venues/issn:{issn}")
return ParseWarning.unknown_issn
return issn
elif re.match(r"^[A-Z0-9]{4}-\d{3}(?:X|\d)$", issn):
return ParseWarning.bundle_issn
else:
return ParseWarning.bad_issn
else:
return ParseWarning.no_issn if warn_if_blank else None
@staticmethod
def strip_text(txt, warn_if_blank=False):
if txt is not None:
return txt.strip()
else:
return ParseWarning.blank_text if warn_if_blank else None
def csv_columns(self):
raise NotImplementedError()
def import_view_name(self):
raise NotImplementedError()
def destination_table(self):
raise NotImplementedError()
def file_type_label(self):
raise NotImplementedError()
def translate_row(self, row):
return row
def ignore_row(self, row):
return False
def apply_header(self, normalized_rows, header_rows):
return normalized_rows
def update_subscriptions(self):
pass
@cache
def normalize_column_name(self, raw_column_name):
for canonical_name, spec in list(self.csv_columns().items()):
name_snippets = spec["name_snippets"]
excluded_name_snippets = spec.get("excluded_name_snippets", [])
for snippet in name_snippets:
snippet = snippet.lower()
column_name = raw_column_name.strip().lower()
exact_name = spec.get("exact_name", False)
contains_excluded_snippet = False
for ens in excluded_name_snippets:
if ens in column_name:
contains_excluded_snippet = True
break
if not contains_excluded_snippet:
if (exact_name and snippet == column_name) or (not exact_name and snippet in column_name.lower()):
return canonical_name
return None
@cache
def normalize_cell(self, normalized_column_name, raw_column_value):
spec = self.csv_columns()[normalized_column_name]
return spec["normalize"](raw_column_value, spec.get("warn_if_blank", False))
def _copy_staging_csv_to_s3(self, filename, package_id):
bucket_name = "jump-redshift-staging"
object_name = "{}_{}_{}".format(package_id, self.__class__.__name__, shortuuid.uuid())
s3_client.upload_file(filename, bucket_name, object_name)
return "s3://{}/{}".format(bucket_name, object_name)
def _raw_s3_bucket(self):
upload_bucket = "unsub-file-uploads-testing" if os.getenv("TESTING_DB") else "unsub-file-uploads"
return upload_bucket
def _copy_raw_to_s3(self, filename, package_id, num_rows=None, error=None, error_details=None):
if "." in filename:
suffix = ".{}".format(filename.split(".")[-1])
else:
suffix = ""
object_name = "{}_{}{}".format(package_id, self.file_type_label(), suffix)
bucket_name = self._raw_s3_bucket()
s3_client.upload_file(filename, bucket_name, object_name)
with get_db_cursor() as cursor:
command = "delete from jump_raw_file_upload_object where package_id=%s and file=%s"
cursor.execute(command, (package_id, self.file_type_label(),))
if error and not error_details:
error_details_dict = {
"no_useable_rows": "No usable rows found.",
"error_reading_file": "Error reading this file. Try opening this file, save in .xlsx format, and upload that."}
error_details = error_details_dict.get("error", "Error processing file. Please email this file to support@unsub.org so the Unsub team can look into the problem.")
new_object = RawFileUploadObject(
package_id=package_id,
file=self.file_type_label(),
bucket_name=bucket_name,
object_name=object_name,
num_rows=num_rows,
error=error,
error_details=error_details
)
db.session.add(new_object)
safe_commit(db)
return "s3://{}/{}".format(bucket_name, object_name)
# def get_raw_upload_object(self, package_id):
# object_details = RawFileUploadObject.query.filter(
# RawFileUploadObject.package_id == package_id, RawFileUploadObject.file == self.file_type_label()
# ).scalar()
#
# if not object_details:
# return None
#
# try:
# raw_object = s3_client.get_object(Bucket=object_details.bucket_name, Key=object_details.object_name)
#
# headers = {
# "Content-Length": raw_object["ContentLength"],
# "Content-Disposition": "attachment; filename='{}'".format(object_details.object_name)
# }
#
# return {
# "body": raw_object["Body"],
# "content_type": raw_object["ContentType"],
# "headers": headers
# }
# except s3_client.exceptions.NoSuchKey:
# return None
def set_to_delete(self, package_id, report_name=None):
with get_db_cursor() as cursor:
command = "update jump_raw_file_upload_object set to_delete_date=sysdate where package_id=%s and file=%s"
cursor.execute(command, (package_id, self.file_type_label(),))
return "Queued to delete"
# report_name is used by CounterInput override
def delete(self, package_id, report_name=None):
sql_delete = "delete from {} where package_id=%s"
with get_db_cursor() as cursor:
sql1 = sql.SQL(sql_delete).format(sql.Identifier(self.__tablename__))
cursor.execute(sql1, (package_id,))
sql2 = sql.SQL(sql_delete).format(sql.Identifier(self.destination_table()))
cursor.execute(sql2, (package_id,))
cursor.execute("delete from jump_file_import_error_rows where package_id=%s and file=%s",
(package_id, self.file_type_label(),))
cursor.execute("delete from jump_raw_file_upload_object where package_id=%s and file=%s",
(package_id, self.file_type_label(),))
my_package = db.session.query(package.Package).filter(package.Package.package_id == package_id).scalar()
if my_package:
self.clear_caches(my_package)
message = "Deleted {} rows for package {}.".format(self.__class__.__name__, package_id)
print(message)
return message
def clear_caches(self, my_package):
# print "clearing cache"
if my_package.is_owned_by_consortium:
print("clearing consortium cache for my_package.is_owned_by_consortium: {}".format(my_package))
for consortium_scenario_id in my_package.consortia_scenario_ids_who_own_this_package:
# this will actually recalculate all member institutions instead of just this one
# which will take longer, but don't worry about it for now
my_consortium = Consortium(consortium_scenario_id)
email = "scott+{}@ourresearch.org".format(my_package.package_id)
my_consortium.queue_for_recompute(email)
reset_cache("consortium", "consortium_get_computed_data", consortium_scenario_id)
# my_package.clear_package_counter_breakdown_cache() # not used anymore
def update_dest_table(self, package_id):
with get_db_cursor() as cursor:
qry1 = sql.SQL("delete from {} where package_id=%s").format(sql.Identifier(self.destination_table()))
cursor.execute(qry1, (package_id,))
qry2 = sql.SQL("insert into {} (select * from {} where package_id=%s)").format(
sql.Identifier(self.destination_table()), sql.Identifier(self.import_view_name()))
cursor.execute(qry2, (package_id,))
def make_package_file_warning(self, parse_warning, additional_msg=None):
return {
"label": parse_warning.value["label"],
"message": "{}{}".format(
parse_warning.value["text"],
" {}".format(additional_msg) if additional_msg else ""
)
}
def save_errors(self, package_id, errors):
if errors is None:
return
error_str = json.dumps(errors)
chunk_size = 64 * 1024 - 1
error_chunks = [error_str[i:i + chunk_size] for i in range(0, len(error_str), chunk_size)]
rows = []
for i, chunk in enumerate(error_chunks):
rows.append(
PackageFileErrorRow(package_id=package_id, file=self.file_type_label(), sequence=i, errors=chunk)
)
for row in rows:
db.session.add(row)
def load_errors(self, package_id):
rows = PackageFileErrorRow.query.filter(
PackageFileErrorRow.package_id == package_id,
PackageFileErrorRow.file == self.file_type_label()
).order_by(PackageFileErrorRow.sequence).all()
if not rows:
return None
else:
try:
errors = json.loads("".join([row.errors for row in rows]))
except ValueError:
print("ValueError in load_errors with ", package_id)
return None
if errors["rows"]:
return errors
else:
return None
def issn_columns(self):
return []
def validate_publisher(self):
return False
def is_single_column_file(self, csv_file):
import re
line = csv_file.readline().rstrip()
return len(re.split(',|;|\\s', line)) == 1
def normalize_rows(self, file_name, file_package=None):
# convert to csv if needed
if file_name.endswith(".xls") or file_name.endswith(".xlsx"):
sheet_csv_file_names = convert_spreadsheet_to_csv(file_name, parsed=False)
if not sheet_csv_file_names:
raise RuntimeError("Error: Could not be opened as a spreadsheet.")
if len(sheet_csv_file_names) > 1:
raise RuntimeError("Error: Workbook contains multiple sheets.")
file_name = sheet_csv_file_names[0]
# convert to utf-8
# very slow, try not doing this for now
# file_name = convert_to_utf_8(file_name)
# logger.info("converted file: {}".format(file_name))
with open(file_name, "r", encoding="utf-8-sig") as csv_file:
reader_params = {}
if self.is_single_column_file(csv_file):
dialect = None
if file_name.endswith(".tsv"):
reader_params["delimiter"] = "\t"
else:
# determine the csv format
dialect_sample = ""
for i in range(0, 20):
next_line = csv_file.readline()
# ignore header row when determining dialect
# data rows should be more consistent with each other
if i > 0:
dialect_sample = str(dialect_sample) + str(next_line)
if not next_line:
break
try:
dialect = csv.Sniffer().sniff(dialect_sample)
# logger.info(u"sniffed csv dialect:\n{}".format(json.dumps(vars(dialect), indent=2)))
except csv.Error:
dialect = None
if file_name.endswith(".tsv"):
reader_params["delimiter"] = "\t"
csv_file.seek(0)
# turn rows into arrays - remember the first row that looks like a header
max_columns = 0
header_index = None
parsed_rows = []
line_no = 0 # the index in parsed_rows where this row will land
absolute_line_no = 0 # the actual file row we're parsing
parsed_to_absolute_line_no = {}
for line in csv.reader(csv_file, dialect=dialect, **reader_params):
absolute_line_no += 1
if not any([cell.strip() for cell in line]):
continue
parsed_rows.append(line)
parsed_to_absolute_line_no[line_no] = absolute_line_no
populated_columns = len([cell for cell in line if cell.strip()])
if populated_columns > max_columns:
max_columns = populated_columns
header_index = line_no
logger.info("candidate header row: {}".format(", ".join(line)))
line_no += 1
if header_index is None:
# give up. can't turn rows into dicts if we don't have a header
raise RuntimeError("Error: Couldn't identify a header row in the file.")
error_rows = {
"rows": [],
"headers": [{"id": "row_id", "name": "Row Number"}]
}
normalized_rows = []
# make sure we have all the required columns
self.raw_column_names = parsed_rows[header_index]
normalized_column_names = [self.normalize_column_name(cn) for cn in self.raw_column_names]
raw_to_normalized_map = dict(list(zip(self.raw_column_names, normalized_column_names)))
normalized_to_raw_map = {}
for k, v in list(raw_to_normalized_map.items()):
normalized_to_raw_map[v] = k
required_keys = [k for k, v in list(self.csv_columns().items()) if v.get("required", True)]
# combine the header and data rows into dicts
row_dicts = [dict(list(zip(parsed_rows[header_index], x))) for x in parsed_rows[header_index+1:]]
if set(required_keys).difference(set(normalized_column_names)):
raise RuntimeError("Error: missing required columns. Required: {}, Found: {}.".format(required_keys, self.raw_column_names))
for row_no, row in enumerate(row_dicts):
absolute_row_no = parsed_to_absolute_line_no[row_no] + header_index + 1
normalized_row = {}
cell_errors = {}
for raw_column_name in list(row.keys()):
raw_value = row[raw_column_name]
normalized_name = self.normalize_column_name(raw_column_name)
if normalized_name:
try:
normalized_value = self.normalize_cell(normalized_name, raw_value)
if normalized_value.__class__.__name__ == "ParseWarning":
parse_warning = normalized_value
# logger.info("parse warning: {} for data {}, {}".format(parse_warning, raw_column_name, row))
cell_errors[normalized_name] = self.make_package_file_warning(parse_warning)
normalized_row.setdefault(normalized_name, None)
else:
normalized_row.setdefault(normalized_name, normalized_value)
except Exception as e:
cell_errors[normalized_name] = self.make_package_file_warning(
ParseWarning.unknown, additional_msg="message: {}".format(str(e))
)
if self.ignore_row(normalized_row):
continue
normalized_row = self.translate_row(normalized_row)
# keep the first issn in this row
for issn_col in self.issn_columns():
if normalized_row.get(issn_col, None):
row_issn = normalized_row[issn_col]
[cell_errors.pop(c, None) for c in self.issn_columns()] # delete errors for all issn columns
[normalized_row.pop(c, None) for c in self.issn_columns()] # delete issn columns
normalized_row["issn"] = row_issn
break
if not cell_errors:
normalized_rows.append(normalized_row)
else:
error_row = {
"row_id": {
"value": absolute_row_no,
"error": None
}
}
for normalized_name in list(normalized_to_raw_map.keys()):
if normalized_name:
raw_name = normalized_to_raw_map[normalized_name]
error_row[normalized_name] = {
"value": row[raw_name],
"error": cell_errors.get(normalized_name, None)
}
error_rows["rows"].append(error_row)
for normalized, raw in list(normalized_to_raw_map.items()):
if normalized:
error_rows["headers"].append({"id": normalized, "name": raw})
if not normalized_rows and not error_rows['rows']:
raise RuntimeError("Error: No rows found")
if normalized_rows:
self.apply_header(normalized_rows, parsed_rows[0:header_index+1])
if not error_rows["rows"]:
error_rows = None
return normalized_rows, error_rows
def load(self, package_id, file_name, file_type, commit=False):
my_package = db.session.query(package.Package).filter(package.Package.package_id == package_id).scalar()
if "counter" in file_type:
self.stored_file_type_label = file_type
try:
normalized_rows, error_rows = self.normalize_rows(file_name, file_package=my_package)
except (UnicodeError, UnicodeDecodeError, csv.Error) as e:
print("normalize_rows error {}".format(e))
err_mssg = str(e)
if "decode" in err_mssg:
err_mssg += " (try fixing encoding issues, possibly saving file in a different format, and/or changing file encoding)"
self._copy_raw_to_s3(file_name, package_id, num_rows=None, error="error_reading_file", error_details=err_mssg)
return {"success": False, "message": "error_reading_file", "warnings": []}
except RuntimeError as e:
print("Runtime Error processing file: {}".format(str(e)))
self._copy_raw_to_s3(file_name, package_id, num_rows=None, error="parsing_error", error_details=str(e))
return {"success": False, "message": str(e), "warnings": []}
# save normalized rows
aws_creds = "aws_access_key_id={aws_key};aws_secret_access_key={aws_secret}".format(
aws_key=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret=os.getenv("AWS_SECRET_ACCESS_KEY")
)
if normalized_rows:
for row in normalized_rows:
row.update({"package_id": package_id})
# logger.info(u"normalized row: {}".format(json.dumps(row)))
# delete what we've got
from counter import CounterInput
if isinstance(self, CounterInput):
report_name = normalized_rows[1]["report_name"]
report_version = normalized_rows[1]["report_version"]
# make sure to delete counter 4 if loading counter 5, or vice versa
if report_version == "4":
self.delete(package_id, "trj2")
self.delete(package_id, "trj3")
self.delete(package_id, "trj4")
elif report_version == "5":
self.delete(package_id, "jr1")
# and now delete the thing you are currently loading
self.delete(package_id, report_name)
# then set this for use further in the function
self.set_file_type_label(report_name)
else:
self.delete(package_id)
sorted_fields = sorted(normalized_rows[0].keys())
normalized_csv_filename = tempfile.mkstemp()[1]
num_rows = 0
with open(normalized_csv_filename, "w", encoding="utf-8") as normalized_csv_file:
writer = csv.DictWriter(normalized_csv_file, delimiter=",", fieldnames=sorted_fields)
for row in normalized_rows:
num_rows += 1
writer.writerow(row)
s3_object = self._copy_staging_csv_to_s3(normalized_csv_filename, package_id)
copy_cmd = text("""
copy {table} ({fields}) from '{s3_object}'
credentials :creds format as csv
timeformat 'auto';
""".format(
table=self.__tablename__,
fields=", ".join(sorted_fields),
s3_object=s3_object,
))
print((copy_cmd.bindparams(creds=aws_creds)))
safe_commit(db)
db.session.execute(copy_cmd.bindparams(creds=aws_creds))
safe_commit(db)
self.update_dest_table(package_id)
self._copy_raw_to_s3(file_name, package_id, num_rows, error=None)
from filter_titles import FilterTitlesInput
if isinstance(self, FilterTitlesInput):
self.update_subscriptions(package_id)
else:
from collections import OrderedDict
try:
z = list(filter(lambda x: [v.get('error') for k, v in x.items()], error_rows['rows']))[0]
errors_dct = OrderedDict()
for k,v in z.items():
if v.get('error'):
errors_dct[k] = v.get('error')
errors_tup = next(iter(errors_dct.items()))
error_str = f"First error. Error reading column '{errors_tup[0]}': {errors_tup[1].get('message')}"
except:
error_str = None
self._copy_raw_to_s3(file_name, package_id, num_rows=0, error="no_useable_rows", error_details=error_str)
# delete the current errors, save new errors
# self.save_errors(package_id, error_rows)
# db.session.flush()
if commit:
db.session.flush() # see if this fixes Serializable isolation violation
db.session.commit()
if my_package:
self.clear_caches(my_package)
if normalized_rows:
return {
"success": True,
"message": "Inserted {} {} rows for package {}.".format(len(normalized_rows), self.__class__.__name__, package_id),
"warnings": error_rows
}
else:
return {
"success": False,
"message": "No usable rows found.",
"warnings": error_rows
}
class ParseWarning(Enum):
bad_issn = {
"label": "bad_issn",
"text": "This doesn't look like an ISSN."
}
unknown_issn = {
"label": "unknown_issn",
"text": "This looks like an ISSN, but it isn't one we recognize."
}
bundle_issn = {
"label": "bundle_issn",
"text": "ISSN represents a bundle of journals, not a single journal."
}
no_issn = {
"label": "no_issn",
"text": "No ISSN here."
}
bad_date = {
"label": "bad_date",
"text": "Unrecognized date format."
}
ambiguous_date = {
"label": "ambiguous_date",
"text": "Date must contain a 4-digit year."
}
bad_year = {
"label": "bad_year",
"text": "Unrecognized date or year."
}
bad_int = {
"label": "bad_int",
"text": "Unrecognized integer format."
}
no_int = {
"label": "no_int",
"text": "Expected an integer here."
}
bad_usd_price = {
"label": "bad_usd_price",
"text": "Unrecognized USD format."
}
no_usd_price = {
"label": "no_usd_price",
"text": "A price in USD is required here."
}
blank_text = {
"label": "blank_text",
"text": "Expected text here."
}
unknown = {
"label": "unknown_error",
"text": "There was an unexpected error parsing this cell. Try to correct the cell value or contact support."
}
row_error = {
"label": "row_error",
"text": "Error for this row. See cell warnings for details."
}
no_rows = {
"label": "no_rows",
"text": "No usable rows could be extracted."
}