def import_rows(): row = Row() pos = row.diff.pos neg = row.diff.neg pos.observed.sparsity = ProductValue.Observed.SPARSE neg.observed.sparsity = ProductValue.Observed.SPARSE with open_compressed(RAW) as infile: doc_count = int(infile.next()) word_count = int(infile.next()) observed_count = int(infile.next()) print 'Importing {} observations of {} words in {} documents'.format( observed_count, word_count, doc_count) with open_compressed(DIFFS, 'wb') as outfile: current_doc = None for line in infile: doc, feature, count = line.split() if doc != current_doc: if current_doc is not None: pos.observed.sparse.sort() neg.observed.sparse.sort() protobuf_stream_write(row.SerializeToString(), outfile) print_dot(every=1000) current_doc = doc row.id = int(doc) del pos.booleans[:] del pos.observed.sparse[:] del neg.booleans[:] del neg.observed.sparse[:] feature = int(feature) - 1 pos.observed.sparse.append(feature) pos.booleans.append(True) neg.observed.sparse.append(feature) neg.booleans.append(False) protobuf_stream_write(row.SerializeToString(), outfile)
def send(self, request): assert isinstance(request, Query.Request), request request_string = request.SerializeToString() protobuf_stream_write(request_string, self.proc.stdin) self.proc.stdin.flush()
def call_string(self, request_string): protobuf_stream_write(request_string, self.proc.stdin)
def call_string(self, request_string): protobuf_stream_write(request_string, self.proc.stdin) return protobuf_stream_read(self.proc.stdout)