port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print("Table '%s' does not exist." % table) sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print("More than one term of length > 3 is required for this example") sys.exit(1) uuids = [] for e in conn.batch_scan( table, scanranges=[Range(srow="s", erow="t")], iterators=[IntersectingIterator(priority=21, terms=search_terms)]): uuids.append(e.cq) if len(uuids) > 0: for doc in conn.batch_scan( table, scanranges=[Range(srow=uuid, erow=uuid) for uuid in uuids]): print(doc.val) else: print("No results found") conn.close()
if entry is None: return None else: return entry.cq conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = settings.TABLE if sys.argv[1] == "-c": print "create" wr = conn.create_batch_writer(table) i=0 q="%s:%s"%(Q,sys.argv[2]) mut = Mutation(q) for entry in conn.batch_scan(table,cols=[["Genome","md5"]],numthreads=10): genome=entry.row if i%1000 == 0: print entry.row mut.put(cf=QUEUED,cq=genome) i=i+1 wr.add_mutation(mut) wr.close() exit() if sys.argv[1] == "-r": print "recover" q="%s:%s"%(Q,sys.argv[2]) genome=randtask(q,INPROGRESS,10) while genome: print genome
table = "pythontest" conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) print "Ingesting some data ..." for num in range(1, 100): label = '%03d'%num mut = Mutation('r_%s'%label) mut.put(cf='cf_%s'%label, cq='cq1', val='value_%s'%label) mut.put(cf='cf_%s'%label, cq='cq2', val='value_%s'%label) wr.add_mutation(mut) wr.close() print "Rows 001 through 003 ..." for entry in conn.scan(table, scanrange=Range(srow='r_001', erow='r_003'), cols=[]): print entry print "Rows 001 and 011 ..." for entry in conn.batch_scan(table, scanranges=[Range(srow='r_001', erow='r_001'), Range(srow='r_011', erow='r_011')]): print entry conn.close()
linenum = 0 with open(license_file) as infile: for line in infile: linenum += 1 m = Mutation(str(linenum)) m.put(cf="e", cq="", val=line.strip()) wr.add_mutation(m) wr.close() regex1 = RegExFilter(priority=21, val_regex=".*stated.*", match_substring=True, name="RegExFilter1") regex2 = RegExFilter(priority=22, val_regex='.*patent', match_substring=True, name="RegExFilter2") regex3 = RegExFilter(priority=23, val_regex='have made', match_substring=True, name="RegExFilter3") for e in conn.batch_scan(table, cols=[["e"]], iterators=[regex1, regex2, regex3]): print(e) conn.close()
table = "Plenario_data" # poly = Polygon([(37.795542, -122.423058), # (37.800019, -122.398853), # (37.789302, -122.38821), # (37.7737, -122.39542), # (37.770036, -122.417736)]) now_latlong_1 = "37.795542, -122.423058" now_latlong_2 = "37.800019, -122.398853" now_latlong_3 = "37.789302, -122.38821" now_latlong_4 = "37.78121, -122.39212" now_latlong_5 = "37.770036, -122.417736" for i in range(0,20): poly = Polygon(simulate_try()) # scan the entire table with 10 threads count = 0 min_gh, max_gh = geohash_min_max(poly) for entry in conn.batch_scan(table, numthreads=10, scanranges=[Range(srow=min_gh, erow=max_gh)]): count += 1 if count%10000 == 0: print count if poly.contains(Point(*geohash.decode(entry.row))): print entry.row, entry.cf, entry.ts, entry.val print poly.bounds print poly.area print "Done With One Query %d!!!"%(i)
class DBConnection: """ The interface to Accumulo. """ __slots__ = ['_host','_port','_user','_password','_conn'] def __init__(self, host, port, user, password): self._host = host self._port = port self._user = user self._password = password def connect(self): self._conn = Accumulo(host=self._host, port=self._port, user=self._user, password=self._password) def query(self, plenario_session_state): dataset = plenario_session_state.get_dataset() if dataset != "dat_master": raise Exception("Querying currently only supported on dat_master!") ngon = plenario_session_state.get_ngon() if ngon is None: raise Exception("You must have an N-gon selected!") if len(ngon) != 5: raise Exception("Querying currently only supported for a 5-gon!") start_date = plenario_session_state.get_start_date() end_date = plenario_session_state.get_end_date() date_aggr = plenario_session_state.get_date_aggr() p0 = ngon[0] p1 = ngon[1] p2 = ngon[2] p3 = ngon[3] p4 = ngon[4] p0 = (41.88, -87.64) p1 = (41.89, -87.64) p2 = (41.89, -87.63 ) p3 = (41.88, -87.63) p4 = (41.88, -87.635) min_gh = geohash.encode(-89.9,-179.9) max_gh = geohash.encode(89.9,179.9) cells = [] for cell in self._conn.batch_scan("dat_master", numthreads=10, scanranges=[Range(srow=min_gh, erow=max_gh)]): cells.append(cell) # Grouping key-value pairs that belong to same entry rows = {} for cell in cells: if cell.cq not in rows: rows[cell.cq] = {} rows[cell.cq]['ghash'] = cell.row rows[cell.cq][cell.cf] = cell.val rows[cell.cq][cell.cf] = cell.val # Filter out those that are not in the temporal range start_date = datetime.strptime(start_date, "%m/%d/%Y") end_date = datetime.strptime(end_date, "%m/%d/%Y") rows_filtered = {} for key, val in rows.iteritems(): obs_date = datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S") if start_date <= obs_date and obs_date <= end_date: rows_filtered[key] = val # Filter out those that are not in the spatial range, i.e. within the polygon rows = rows_filtered rows_filtered = {} #poly = Polygon([p0,p1,p2,p3,p4]) poly = Polygon([(-90,-180),(90,-180),(90,180),(-90,180)]) for key, val in rows.iteritems(): ghash = val['ghash'] pt = Point(geohash.decode(ghash)) if poly.contains(pt): rows_filtered[key] = val # Truncate date as specified by date_aggr and count group sizes rows = rows_filtered rows_filtered = {} for key, val in rows.iteritems(): date = truncate(datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S"),date_aggr) tup = (val['dataset_name'],date.isoformat()) if tup not in rows_filtered: rows_filtered[tup] = 1 else: rows_filtered[tup] = rows_filtered[tup]+1 return rows_filtered
# See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo from pyaccumulo.iterators import * import settings import sys conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print("Table '%s' does not exist." % table) sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print("More than one term of length > 3 is required for this example") sys.exit(1) for e in conn.batch_scan( table, iterators=[IndexedDocIterator(priority=21, terms=search_terms)]): print(e.val) conn.close()
import settings conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = "regexes" if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) license_file = "LICENSE" linenum = 0 with file(license_file) as infile: for line in infile: linenum += 1 m = Mutation(str(linenum)) m.put(cf="e", cq="", val=line.strip()) wr.add_mutation(m) wr.close() regex1 = RegExFilter(priority=21, val_regex=".*stated.*", match_substring=True, name="RegExFilter1") regex2 = RegExFilter(priority=22, val_regex='.*patent', match_substring=True, name="RegExFilter2") regex3 = RegExFilter(priority=23, val_regex='have made', match_substring=True, name="RegExFilter3") for e in conn.batch_scan(table, cols=[["e"]], iterators=[regex1, regex2, regex3]): print e conn.close()
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo, Mutation, Range from pyaccumulo.iterators import * from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope from examples.util import hashcode import hashlib, re import settings import sys conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print "Table '%s' does not exist."%table sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print "More than one term of length > 3 is required for this example" sys.exit(1) for e in conn.batch_scan(table, iterators=[IndexedDocIterator(priority=21, terms=search_terms)]): print e.val conn.close()
from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope from examples.util import hashcode import hashlib, re import settings import sys conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print "Table '%s' does not exist."%table sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print "More than one term of length > 3 is required for this example" sys.exit(1) uuids = [] for e in conn.batch_scan(table, scanranges=[Range(srow="s", erow="t")], iterators=[IntersectingIterator(priority=21, terms=search_terms)]): uuids.append(e.cq) if len(uuids) > 0: for doc in conn.batch_scan(table, scanranges=[Range(srow=uuid, erow=uuid) for uuid in uuids]): print doc.val else: print "No results found" conn.close()