def test_read_block16(self, dq_idelay=None, dqs_idelay=None, wait_complete=1): # Wait for operation to complete """ Test read block in PS PIO mode, convert data to match DDR3 16-bit output words <dq_idelay> set DQ input delays if provided ([] - skip, single number - both lanes, 2 element list - per/lane) <dqs_idelay> set DQS input delays if provided ([] - skip, single number - both lanes, 2 element list - per/lane) <wait_complete> wait read block operation to complete (0 - may initiate multiple PS PIO operations) returns list of the read data """ if (not dq_idelay is None) and (dq_idelay != []): self.x393_mcntrl_timing.axi_set_dq_idelay(dq_idelay) if (not dqs_idelay is None) and (dqs_idelay != []): self.x393_mcntrl_timing.axi_set_dqs_idelay(dqs_idelay) rd_buf = self.x393_pio_sequences.read_block( 256, 0, wait_complete # num, # show_rslt, ) # Wait for operation to complete read16 = convert_w32_to_mem16(rd_buf) # 512x16 bit, same as DDR3 DQ over time sum_read16 = 0 for d in read16: sum_read16 += d print("read16 (0x%x):" % sum_read16) for i in range(len(read16)): if (i & 0x1F) == 0: print("\n%03x:" % i, end=" ") print("%04x" % read16[i], end=" ") print("\n") return read16
def read_block_buf( self, start_word_address, # input [29:0] start_word_address; num_read, # input integer num_read; # number of words to read (will be rounded up to multiple of 16) show_rslt=True): """ Fill buffer the incremental data (each next register is written with previous register data + 1 @param start_word_address full register address in AXI space (in 32-bit words, not bytes) @param num_read number of 32-bit words to read @param show_rslt print buffer data read 1 - column, 16 - as 16-bit (memory words), 32 - as 32-bit (data words) """ if (self.verbose > 1) or (show_rslt == 1): print( "**** read_block_buf, start_word_address=0x%x, num_read=0x%x " % (start_word_address, num_read)) result = [] for i in range(num_read): #for (i = 0; i < num_read; i = i + 16) begin d = self.x393_mem.axi_read_addr_w(start_word_address + i) if (self.verbose > 2) or (show_rslt == 1): print(" read_block_buf 0x%x:0x%x" % (start_word_address + i, d)) result.append(d) if show_rslt == 16: rslt16 = convert_w32_to_mem16(result) sum_read16 = 0 for d in rslt16: sum_read16 += d print("read16 (0x%x):" % (sum_read16), end="") for i in range(len(rslt16)): if (i & 0x1f) == 0: print("\n%03x:" % i, end=" ") print("%04x" % rslt16[i], end=" ") print("\n") elif show_rslt == 32: sum_rd_buf = 0 for d in result: sum_rd_buf += d print("read buffer: (0x%x):" % (sum_rd_buf), end="") for i in range(len(result)): if (i & 0xf) == 0: print("\n%03x:" % i, end=" ") print("%08x" % result[i], end=" ") print("\n") return result
def read_block_buf(self, start_word_address, # input [29:0] start_word_address; num_read, # input integer num_read; # number of words to read (will be rounded up to multiple of 16) show_rslt=True): """ Fill buffer the incremental data (each next register is written with previous register data + 1 @param start_word_address full register address in AXI space (in 32-bit words, not bytes) @param num_read number of 32-bit words to read @param show_rslt print buffer data read 1 - column, 16 - as 16-bit (memory words), 32 - as 32-bit (data words) """ if (self.verbose>1) or (show_rslt==1): print("**** read_block_buf, start_word_address=0x%x, num_read=0x%x "%(start_word_address,num_read)) result=[] for i in range(num_read): #for (i = 0; i < num_read; i = i + 16) begin d=self.x393_mem.axi_read_addr_w(start_word_address+i) if (self.verbose>2) or (show_rslt==1): print(" read_block_buf 0x%x:0x%x"%(start_word_address+i,d)) result.append(d) if show_rslt==16: rslt16=convert_w32_to_mem16(result) sum_read16=0 for d in rslt16: sum_read16+=d print("read16 (0x%x):"%(sum_read16),end="") for i in range(len(rslt16)): if (i & 0x1f) == 0: print("\n%03x:"%i,end=" ") print("%04x"%rslt16[i],end=" ") print("\n") elif show_rslt==32: sum_rd_buf=0 for d in result: sum_rd_buf+=d print("read buffer: (0x%x):"%(sum_rd_buf),end="") for i in range(len(result)): if (i & 0xf) == 0: print("\n%03x:"%i,end=" ") print("%08x"%result[i],end=" ") print("\n") return result
def scan_or_adjust_delay_random(self, low_delay, high_delay, use_dq, use_odelay, ends_dist, min_diff, adjust, verbose): """ Scan or adjust delays using random data write+read <low_delay> Low delay value to tru <high_delay> high delay value to try <use_dq> 0 - scan dqs, 1 - scan dq (common value, post-adjustment) <use_odelay> 0 - use input delays, 1 - use output delays <ends_dist> do not process if one of the primary interval ends is within this from 0.0 or 1.0 <min_diff> minimal difference between primary delay steps to process <adjust> 0 - scan, 1 - adjust <verbose>: verbose mode (more prints) Returns list of calculated delay values """ checkIntArgs(('low_delay','high_delay'),locals()) brc=(5, # 3'h5, # bank 0x1234, # 15'h1234, # row address 0x100) # 10'h100 # column address # global BASEADDR_PORT1_WR,VERBOSE; # saved_verbose=VERBOSE; # VERBOSE=False; low = split_delay(low_delay) high = split_delay(high_delay) rand16=[] for i in range(512): rand16.append(random.randint(0,65535)) wdata=convert_mem16_to_w32(rand16) if (verbose and not adjust): print("rand16:") for i in range(len(rand16)): if (i & 0x1f) == 0: if (verbose and not adjust): print("\n%03x:"%i,end=" ") if (verbose and not adjust): print("%04x"%rand16[i],end=" ") if (verbose and not adjust): print("\n") if (verbose and not adjust): print("wdata:") for i in range(len(wdata)): if (i & 0xf) == 0: if (verbose and not adjust): print("\n%03x:"%i,end=" ") if (verbose and not adjust): print("%08x"%wdata[i],end=" ") if (verbose and not adjust): print("\n") bit_type=[] # does not include first and last elements for i in range(1,511): types=[] for j in range(16): types.append((((rand16[i-1]>>j) & 1)<<2) | (((rand16[i ]>>j) & 1)<<1) | (((rand16[i+1]>>j) & 1))) bit_type.append(types) # if (verbose and not adjust): print ("i=%d",i) # if (verbose and not adjust): print(types) # total_types=[[0]*8]*16 # number of times each type occurred in the block for each DQ bit (separate for DG up/down?) total_types=[] # number of times each type occurred in the block for each DQ bit (separate for DG up/down?) for i in range(16): total_types.append([0]*8) for typ in bit_type: # if (verbose and not adjust): print(typ) for j in range(16): # total_types[j][typ[j]]+=1 total_types[j][typ[j]]=total_types[j][typ[j]]+1 if (verbose and not adjust): print("\ntotal_types:") if (verbose and not adjust): print (total_types) avg_types=[0.0]*8 N=0 for t in total_types: for j,n in enumerate(t): avg_types[j]+=n N+=n for i in range(len(avg_types)): avg_types[i]/=N if (verbose and not adjust): print("\avg_types:") if (verbose and not adjust): print (avg_types) #write blok buffer with 256x32bit data self.x393_mcntrl_buffers.write_block_buf_chn(0,0,wdata); # fill block memory (channel, page, number) self.x393_pio_sequences.set_write_block(*brc) #64 8-bursts, 1 extra DQ/DQS/ active cycle self.x393_pio_sequences.set_read_block(*brc) if (use_odelay==0) : self.x393_pio_sequences.write_block(0,1) # Wait for operation to complete if verbose: print("++++++++ block written once") #now scanning - first DQS, then try with DQ (post-adjustment - best fit) results = [] if verbose: print("******** use_odelay=%d use_dq=%d"%(use_odelay,use_dq)) alreadyWarned=False for dly in range (low, high+1): enc_dly=combine_delay(dly) if (use_odelay!=0): if (use_dq!=0): if verbose: print("******** axi_set_dq_odelay(0x%x)"%enc_dly) self.x393_mcntrl_timing.axi_set_dq_odelay(enc_dly) # set the same odelay for all DQ bits else: if verbose: print("******** axi_set_dqs_odelay(0x%x)"%enc_dly) self.x393_mcntrl_timing.axi_set_dqs_odelay(enc_dly) self.x393_pio_sequences.write_block(0,1) # Wait for operation to complete if verbose: print("-------- block written AGAIN") else: if (use_dq!=0): if verbose: print("******** axi_set_dq_idelay(0x%x)"%enc_dly) self.x393_mcntrl_timing.axi_set_dq_idelay(enc_dly)# set the same idelay for all DQ bits else: if verbose: print("******** axi_set_dqs_idelay(0x%x)"%enc_dly) self.x393_mcntrl_timing.axi_set_dqs_idelay(enc_dly) buf32=self.x393_pio_sequences.read_block( 256, # num, 0, # show_rslt, 1) # Wait for operation to complete if self.bad_data(buf32): results.append([]) else: # Warn about possible missing DQS pulses during writes alreadyWarned |= self.missing_dqs(buf32, alreadyWarned) read16=convert_w32_to_mem16(buf32) # 512x16 bit, same as DDR3 DQ over time if verbose and (dly==low): if (verbose and not adjust): print("buf32:") for i in range(len(buf32)): if (i & 0xf) == 0: if (verbose and not adjust): print("\n%03x:"%i,end=" ") if (verbose and not adjust): print("%08x"%buf32[i],end=" ") if (verbose and not adjust): print("\n") if (verbose and not adjust): print("read16:") for i in range(len(read16)): if (i & 0x1f) == 0: if (verbose and not adjust): print("\n%03x:"%i,end=" ") if (verbose and not adjust): print("%04x"%read16[i],end=" ") if (verbose and not adjust): print("\n") data=[] # number of times each type occurred in the block for each DQ bit (separate for DG up/down?) for i in range(16): data.append([0]*8) for i in range (1,511): w= read16[i] typ=bit_type[i-1] # first and last words are not used, no type was calculated for j in range(16): if (w & (1<<j)) !=0: data[j][typ[j]]+=1 for i in range(16): for t in range(8): if (total_types[i][t] >0 ): data[i][t]*=1.0/total_types[i][t] results.append(data) if (verbose and not adjust): print ("%3d (0x%02x): "%(dly,enc_dly),end="") for i in range(16): if (verbose and not adjust): print("[",end="") for j in range(8): if (verbose and not adjust): print("%3d"%(round(100.0*data[i][j])),end=" ") if (verbose and not adjust): print("]",end=" ") if (verbose and not adjust): print() titles=["'000","'001","'010", "'011","'100","'101","'110","'111"] #calculate weighted averages #TODO: for DQ scan shift individual bits for the best match if use_dq: if (verbose and not adjust): print("TODO: shift individual bits for the best match before averaging") res_avg=[] for dly in range (len(results)): if (len(results[dly])>0): data=results[dly] avg=[0.0]*8 for t in range(8): weight=0; d=0.0 for i in range(16): weight+=total_types[i][t] d+=total_types[i][t]*data[i][t] if (weight>0): d/=weight avg[t] = d res_avg.append(avg) else: res_avg.append([]) corr_fine=self.calibrate_finedelay( low, # absolute delay value of start scan avg_types, # weights of weach of the 8 bit sequences res_avg, # averaged eye data tablle, each line has 8 elements, or [] for bad measurements ends_dist/256.0, # ends_dist, # do not process if one of the primary interval ends is within this from 0.0 or 1.0 min_diff/256.0) #min_diff): # minimal difference between primary delay steps to process period=len(corr_fine) if (not adjust): print("\n\n\n========== Copy below to the spreadsheet, use columns from corr_delay ==========") print("========== First are individual results for each bit, then averaged eye pattern ==========") print ("delay corr_delay",end=" ") for t in range(8): for i in range(16): if (not adjust): print("%02d:%s"%(i,titles[t]),end=" ") print() for index in range (len(results)): if (len(results[index])>0): dly=index+low corr_dly=dly+corr_fine[dly%period] print ("%d %.2f"%(dly,corr_dly),end=" ") for t in range(8): for i in range(16): try: print("%.4f"%(results[dly][i][t]),end=" ") #IndexError: list index out of range except: print(".????",end="") print() print("\n\n\n========== Copy below to the spreadsheet, use columns from corr_delay ==========") print("========== data above can be used for the individual bits eye patterns ==========") print ("delay corr_delay",end=" ") for t in range(8): print(titles[t],end=" ") print() for index in range (len(res_avg)): if (len(res_avg[index])>0): dly=index+low corr_dly=dly+corr_fine[dly%period] print ("%d %.2f"%(dly,corr_dly),end=" ") for t in range(8): try: print("%.4f"%(res_avg[dly][t]),end=" ") except: print(".????",end=" ") print() dly_corr=None if adjust: dly_corr=self.corr_delays( low, # absolute delay value of start scan avg_types, # weights of weach of the 8 bit sequences results, #individual per-bit results res_avg, # averaged eye data tablle, each line has 8 elements, or [] for bad measurements corr_fine, # fine delay correction ends_dist/256.0, # find where all bits are above/below that distance from 0.0/1.0margin verbose) # VERBOSE=verbose # print ("VERBOSE=",VERBOSE) print ("dly_corr=",dly_corr) print ("use_dq=",use_dq) if dly_corr and use_dq: # only adjusting DQ delays, not DQS dly_comb=combine_delay(dly_corr) if use_odelay: self.x393_mcntrl_timing.axi_set_dq_odelay((dly_comb[0:8],dly_comb[8:16])) """ for i in range (8): axi_set_dly_single(0,i,combine_delay(dly_corr[i])) for i in range (8): axi_set_dly_single(2,i,combine_delay(dly_corr[i+8])) """ else: self.x393_mcntrl_timing.axi_set_dq_idelay((dly_comb[0:8],dly_comb[8:16])) """ for i in range (8): axi_set_dly_single(1,i,combine_delay(dly_corr[i])) for i in range (8): axi_set_dly_single(3,i,combine_delay(dly_corr[i+8])) """ # use_dq, # 0 - scan dqs, 1 - scan dq (common valuwe, post-adjustment) # use_odelay, # VEBOSE=saved_verbose return dly_corr
def scan_or_adjust_delay_random(self, low_delay, high_delay, use_dq, use_odelay, ends_dist, min_diff, adjust, verbose): """ Scan or adjust delays using random data write+read <low_delay> Low delay value to tru <high_delay> high delay value to try <use_dq> 0 - scan dqs, 1 - scan dq (common value, post-adjustment) <use_odelay> 0 - use input delays, 1 - use output delays <ends_dist> do not process if one of the primary interval ends is within this from 0.0 or 1.0 <min_diff> minimal difference between primary delay steps to process <adjust> 0 - scan, 1 - adjust <verbose>: verbose mode (more prints) Returns list of calculated delay values """ checkIntArgs(('low_delay', 'high_delay'), locals()) brc = ( 5, # 3'h5, # bank 0x1234, # 15'h1234, # row address 0x100) # 10'h100 # column address # global BASEADDR_PORT1_WR,VERBOSE; # saved_verbose=VERBOSE; # VERBOSE=False; low = split_delay(low_delay) high = split_delay(high_delay) rand16 = [] for i in range(512): rand16.append(random.randint(0, 65535)) wdata = convert_mem16_to_w32(rand16) if (verbose and not adjust): print("rand16:") for i in range(len(rand16)): if (i & 0x1f) == 0: if (verbose and not adjust): print("\n%03x:" % i, end=" ") if (verbose and not adjust): print("%04x" % rand16[i], end=" ") if (verbose and not adjust): print("\n") if (verbose and not adjust): print("wdata:") for i in range(len(wdata)): if (i & 0xf) == 0: if (verbose and not adjust): print("\n%03x:" % i, end=" ") if (verbose and not adjust): print("%08x" % wdata[i], end=" ") if (verbose and not adjust): print("\n") bit_type = [] # does not include first and last elements for i in range(1, 511): types = [] for j in range(16): types.append((((rand16[i - 1] >> j) & 1) << 2) | (((rand16[i] >> j) & 1) << 1) | (((rand16[i + 1] >> j) & 1))) bit_type.append(types) # if (verbose and not adjust): print ("i=%d",i) # if (verbose and not adjust): print(types) # total_types=[[0]*8]*16 # number of times each type occurred in the block for each DQ bit (separate for DG up/down?) total_types = [ ] # number of times each type occurred in the block for each DQ bit (separate for DG up/down?) for i in range(16): total_types.append([0] * 8) for typ in bit_type: # if (verbose and not adjust): print(typ) for j in range(16): # total_types[j][typ[j]]+=1 total_types[j][typ[j]] = total_types[j][typ[j]] + 1 if (verbose and not adjust): print("\ntotal_types:") if (verbose and not adjust): print(total_types) avg_types = [0.0] * 8 N = 0 for t in total_types: for j, n in enumerate(t): avg_types[j] += n N += n for i in range(len(avg_types)): avg_types[i] /= N if (verbose and not adjust): print("\avg_types:") if (verbose and not adjust): print(avg_types) #write blok buffer with 256x32bit data self.x393_mcntrl_buffers.write_block_buf_chn(0, 0, wdata) # fill block memory (channel, page, number) self.x393_pio_sequences.set_write_block( *brc) #64 8-bursts, 1 extra DQ/DQS/ active cycle self.x393_pio_sequences.set_read_block(*brc) if (use_odelay == 0): self.x393_pio_sequences.write_block( 0, 1) # Wait for operation to complete if verbose: print("++++++++ block written once") #now scanning - first DQS, then try with DQ (post-adjustment - best fit) results = [] if verbose: print("******** use_odelay=%d use_dq=%d" % (use_odelay, use_dq)) alreadyWarned = False for dly in range(low, high + 1): enc_dly = combine_delay(dly) if (use_odelay != 0): if (use_dq != 0): if verbose: print("******** axi_set_dq_odelay(0x%x)" % enc_dly) self.x393_mcntrl_timing.axi_set_dq_odelay( enc_dly) # set the same odelay for all DQ bits else: if verbose: print("******** axi_set_dqs_odelay(0x%x)" % enc_dly) self.x393_mcntrl_timing.axi_set_dqs_odelay(enc_dly) self.x393_pio_sequences.write_block( 0, 1) # Wait for operation to complete if verbose: print("-------- block written AGAIN") else: if (use_dq != 0): if verbose: print("******** axi_set_dq_idelay(0x%x)" % enc_dly) self.x393_mcntrl_timing.axi_set_dq_idelay( enc_dly) # set the same idelay for all DQ bits else: if verbose: print("******** axi_set_dqs_idelay(0x%x)" % enc_dly) self.x393_mcntrl_timing.axi_set_dqs_idelay(enc_dly) buf32 = self.x393_pio_sequences.read_block( 256, # num, 0, # show_rslt, 1) # Wait for operation to complete if self.bad_data(buf32): results.append([]) else: # Warn about possible missing DQS pulses during writes alreadyWarned |= self.missing_dqs(buf32, alreadyWarned) read16 = convert_w32_to_mem16( buf32) # 512x16 bit, same as DDR3 DQ over time if verbose and (dly == low): if (verbose and not adjust): print("buf32:") for i in range(len(buf32)): if (i & 0xf) == 0: if (verbose and not adjust): print("\n%03x:" % i, end=" ") if (verbose and not adjust): print("%08x" % buf32[i], end=" ") if (verbose and not adjust): print("\n") if (verbose and not adjust): print("read16:") for i in range(len(read16)): if (i & 0x1f) == 0: if (verbose and not adjust): print("\n%03x:" % i, end=" ") if (verbose and not adjust): print("%04x" % read16[i], end=" ") if (verbose and not adjust): print("\n") data = [ ] # number of times each type occurred in the block for each DQ bit (separate for DG up/down?) for i in range(16): data.append([0] * 8) for i in range(1, 511): w = read16[i] typ = bit_type[ i - 1] # first and last words are not used, no type was calculated for j in range(16): if (w & (1 << j)) != 0: data[j][typ[j]] += 1 for i in range(16): for t in range(8): if (total_types[i][t] > 0): data[i][t] *= 1.0 / total_types[i][t] results.append(data) if (verbose and not adjust): print("%3d (0x%02x): " % (dly, enc_dly), end="") for i in range(16): if (verbose and not adjust): print("[", end="") for j in range(8): if (verbose and not adjust): print("%3d" % (round(100.0 * data[i][j])), end=" ") if (verbose and not adjust): print("]", end=" ") if (verbose and not adjust): print() titles = [ "'000", "'001", "'010", "'011", "'100", "'101", "'110", "'111" ] #calculate weighted averages #TODO: for DQ scan shift individual bits for the best match if use_dq: if (verbose and not adjust): print( "TODO: shift individual bits for the best match before averaging" ) res_avg = [] for dly in range(len(results)): if (len(results[dly]) > 0): data = results[dly] avg = [0.0] * 8 for t in range(8): weight = 0 d = 0.0 for i in range(16): weight += total_types[i][t] d += total_types[i][t] * data[i][t] if (weight > 0): d /= weight avg[t] = d res_avg.append(avg) else: res_avg.append([]) corr_fine = self.calibrate_finedelay( low, # absolute delay value of start scan avg_types, # weights of weach of the 8 bit sequences res_avg, # averaged eye data tablle, each line has 8 elements, or [] for bad measurements ends_dist / 256.0, # ends_dist, # do not process if one of the primary interval ends is within this from 0.0 or 1.0 min_diff / 256.0 ) #min_diff): # minimal difference between primary delay steps to process period = len(corr_fine) if (not adjust): print( "\n\n\n========== Copy below to the spreadsheet, use columns from corr_delay ==========" ) print( "========== First are individual results for each bit, then averaged eye pattern ==========" ) print("delay corr_delay", end=" ") for t in range(8): for i in range(16): if (not adjust): print("%02d:%s" % (i, titles[t]), end=" ") print() for index in range(len(results)): if (len(results[index]) > 0): dly = index + low corr_dly = dly + corr_fine[dly % period] print("%d %.2f" % (dly, corr_dly), end=" ") for t in range(8): for i in range(16): try: print("%.4f" % (results[dly][i][t]), end=" " ) #IndexError: list index out of range except: print(".????", end="") print() print( "\n\n\n========== Copy below to the spreadsheet, use columns from corr_delay ==========" ) print( "========== data above can be used for the individual bits eye patterns ==========" ) print("delay corr_delay", end=" ") for t in range(8): print(titles[t], end=" ") print() for index in range(len(res_avg)): if (len(res_avg[index]) > 0): dly = index + low corr_dly = dly + corr_fine[dly % period] print("%d %.2f" % (dly, corr_dly), end=" ") for t in range(8): try: print("%.4f" % (res_avg[dly][t]), end=" ") except: print(".????", end=" ") print() dly_corr = None if adjust: dly_corr = self.corr_delays( low, # absolute delay value of start scan avg_types, # weights of weach of the 8 bit sequences results, #individual per-bit results res_avg, # averaged eye data tablle, each line has 8 elements, or [] for bad measurements corr_fine, # fine delay correction ends_dist / 256.0, # find where all bits are above/below that distance from 0.0/1.0margin verbose) # VERBOSE=verbose # print ("VERBOSE=",VERBOSE) print("dly_corr=", dly_corr) print("use_dq=", use_dq) if dly_corr and use_dq: # only adjusting DQ delays, not DQS dly_comb = combine_delay(dly_corr) if use_odelay: self.x393_mcntrl_timing.axi_set_dq_odelay( (dly_comb[0:8], dly_comb[8:16])) """ for i in range (8): axi_set_dly_single(0,i,combine_delay(dly_corr[i])) for i in range (8): axi_set_dly_single(2,i,combine_delay(dly_corr[i+8])) """ else: self.x393_mcntrl_timing.axi_set_dq_idelay( (dly_comb[0:8], dly_comb[8:16])) """ for i in range (8): axi_set_dly_single(1,i,combine_delay(dly_corr[i])) for i in range (8): axi_set_dly_single(3,i,combine_delay(dly_corr[i+8])) """ # use_dq, # 0 - scan dqs, 1 - scan dq (common valuwe, post-adjustment) # use_odelay, # VEBOSE=saved_verbose return dly_corr