diff --git a/finn-rtllib/memstream/component.xml b/finn-rtllib/memstream/component.xml index 6b728c0555a4889b8e76d5759233d1109a3002bd..7c51b026d0c6f38f087c6cc1efb6d36514e00072 100644 --- a/finn-rtllib/memstream/component.xml +++ b/finn-rtllib/memstream/component.xml @@ -260,7 +260,7 @@ <spirit:parameters> <spirit:parameter> <spirit:name>viewChecksum</spirit:name> - <spirit:value>ba6d3300</spirit:value> + <spirit:value>17734179</spirit:value> </spirit:parameter> </spirit:parameters> </spirit:view> @@ -276,7 +276,7 @@ <spirit:parameters> <spirit:parameter> <spirit:name>viewChecksum</spirit:name> - <spirit:value>54f61a0e</spirit:value> + <spirit:value>7f67dadd</spirit:value> </spirit:parameter> </spirit:parameters> </spirit:view> @@ -290,7 +290,7 @@ <spirit:parameters> <spirit:parameter> <spirit:name>viewChecksum</spirit:name> - <spirit:value>92c3ebfc</spirit:value> + <spirit:value>198c09a6</spirit:value> </spirit:parameter> </spirit:parameters> </spirit:view> @@ -891,9 +891,20 @@ <spirit:fileType>verilogSource</spirit:fileType> </spirit:file> <spirit:file> - <spirit:name>hdl/ramb18.v</spirit:name> + <spirit:name>hdl/memstream_singleblock.v</spirit:name> <spirit:fileType>verilogSource</spirit:fileType> - <spirit:userFileType>CHECKSUM_13578c44</spirit:userFileType> + <spirit:logicalName>xil_defaultlib</spirit:logicalName> + </spirit:file> + <spirit:file> + <spirit:name>hdl/memstream_multiblock.v</spirit:name> + <spirit:fileType>verilogSource</spirit:fileType> + <spirit:logicalName>xil_defaultlib</spirit:logicalName> + </spirit:file> + <spirit:file> + <spirit:name>hdl/ramb18_wf_dualport.v</spirit:name> + <spirit:fileType>verilogSource</spirit:fileType> + <spirit:userFileType>CHECKSUM_669aa918</spirit:userFileType> + <spirit:logicalName>xil_defaultlib</spirit:logicalName> </spirit:file> </spirit:fileSet> <spirit:fileSet> @@ -1039,26 +1050,18 @@ <xilinx:coreExtensions> <xilinx:supportedFamilies> <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">artix7</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">artix7l</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">kintex7</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">kintex7l</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">kintexu</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">kintexuplus</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">spartan7</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">aartix7</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">aspartan7</xilinx:family> - <xilinx:family xilinx:lifeCycle="Beta">azynq</xilinx:family> <xilinx:family xilinx:lifeCycle="Beta">zynquplus</xilinx:family> <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family> + <xilinx:family xilinx:lifeCycle="Beta">virtexuplusHBM</xilinx:family> </xilinx:supportedFamilies> <xilinx:taxonomies> <xilinx:taxonomy>/UserIP</xilinx:taxonomy> </xilinx:taxonomies> <xilinx:displayName>memstream_v1_0</xilinx:displayName> + <xilinx:autoFamilySupportLevel>level_0</xilinx:autoFamilySupportLevel> <xilinx:definitionSource>package_project</xilinx:definitionSource> - <xilinx:coreRevision>2</xilinx:coreRevision> - <xilinx:coreCreationDateTime>2019-11-04T19:37:20Z</xilinx:coreCreationDateTime> + <xilinx:coreRevision>3</xilinx:coreRevision> + <xilinx:coreCreationDateTime>2020-08-01T13:46:24Z</xilinx:coreCreationDateTime> <xilinx:tags> <xilinx:tag xilinx:name="nopcore"/> <xilinx:tag xilinx:name="ui.data.coregen.dd@7a3d79be_ARCHIVE_LOCATION">c:/Users/lucianp/Documents/git/finn-rtllib/memstream</xilinx:tag> @@ -1068,12 +1071,42 @@ <xilinx:tag xilinx:name="ui.data.coregen.dd@79ecbc44_ARCHIVE_LOCATION">c:/Users/lucianp/Documents/git/finn-rtllib/memstream</xilinx:tag> <xilinx:tag xilinx:name="ui.data.coregen.dd@22fd683_ARCHIVE_LOCATION">c:/Users/lucianp/Documents/git/finn-rtllib/memstream</xilinx:tag> <xilinx:tag xilinx:name="ui.data.coregen.dd@2c00346d_ARCHIVE_LOCATION">c:/Users/lucianp/Documents/git/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@47e8b2cc_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@6db142b_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@2b7f1d2b_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@17f050c2_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@52c349b8_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@1207da29_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@28e42a6_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@4a83e7b0_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@3e22c142_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@78a83389_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@5acca2e8_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@372bc97_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@5b4e26f_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@657b69b0_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@57d441f7_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@5598c726_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@3fe6b57b_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@11fe4478_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@4bc38b29_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@30f579fd_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@25dd216_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@2fb0f320_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@3cc35eb5_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@6812be47_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@3284ec0e_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@ad6f0f0_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@27fa599c_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@51f55698_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@db10861_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> + <xilinx:tag xilinx:name="ui.data.coregen.dd@314dfa91_ARCHIVE_LOCATION">/home/lpetrica/Documents/git/finn_upstream/finn-rtllib/memstream</xilinx:tag> </xilinx:tags> </xilinx:coreExtensions> <xilinx:packagingInfo> - <xilinx:xilinxVersion>2019.1.3</xilinx:xilinxVersion> + <xilinx:xilinxVersion>2020.1</xilinx:xilinxVersion> <xilinx:checksum xilinx:scope="busInterfaces" xilinx:value="6d8b2551"/> - <xilinx:checksum xilinx:scope="fileGroups" xilinx:value="5e0c4694"/> + <xilinx:checksum xilinx:scope="fileGroups" xilinx:value="635cbdae"/> <xilinx:checksum xilinx:scope="ports" xilinx:value="cabd7433"/> <xilinx:checksum xilinx:scope="hdlParameters" xilinx:value="f63127c8"/> <xilinx:checksum xilinx:scope="parameters" xilinx:value="5365a08b"/> diff --git a/finn-rtllib/memstream/hdl/memstream.v b/finn-rtllib/memstream/hdl/memstream.v index 28acb301a583f7437c580744bae7bdc4aef76337..961103e4ca1261ab0109ad9db291a1a66f9c0915 100644 --- a/finn-rtllib/memstream/hdl/memstream.v +++ b/finn-rtllib/memstream/hdl/memstream.v @@ -109,359 +109,141 @@ module memstream ); -//calculate number of RAMB18 blocks we need depth-wise -localparam NMEMBLOCKS = (MEM_DEPTH+1023) / 1024; //ceil(MEM_DEPTH/1024) - -//calculate width of address for each block -localparam BLOCKADRWIDTH = NMEMBLOCKS > 1 ? 10 : $clog2(MEM_DEPTH); - -//determine whether a stream needs to multiplex between memory blocks -localparam STRM0_MUX = ((STRM0_OFFSET/1024) != ((STRM0_OFFSET+STRM0_DEPTH)/1024)); -localparam STRM1_MUX = ((STRM1_OFFSET/1024) != ((STRM1_OFFSET+STRM1_DEPTH)/1024)); -localparam STRM2_MUX = ((STRM2_OFFSET/1024) != ((STRM2_OFFSET+STRM2_DEPTH)/1024)); -localparam STRM3_MUX = ((STRM3_OFFSET/1024) != ((STRM3_OFFSET+STRM3_DEPTH)/1024)); -localparam STRM4_MUX = ((STRM4_OFFSET/1024) != ((STRM4_OFFSET+STRM4_DEPTH)/1024)); -localparam STRM5_MUX = ((STRM5_OFFSET/1024) != ((STRM5_OFFSET+STRM5_DEPTH)/1024)); - -//determine what the base block of each stream is -localparam STRM0_BLOCK = (STRM0_OFFSET/1024); -localparam STRM1_BLOCK = (STRM1_OFFSET/1024); -localparam STRM2_BLOCK = (STRM2_OFFSET/1024); -localparam STRM3_BLOCK = (STRM3_OFFSET/1024); -localparam STRM4_BLOCK = (STRM4_OFFSET/1024); -localparam STRM5_BLOCK = (STRM5_OFFSET/1024); - -//determine what the end block of each stream is -localparam STRM0_END_BLOCK = ((STRM0_OFFSET+STRM0_DEPTH-1)/1024); -localparam STRM1_END_BLOCK = ((STRM1_OFFSET+STRM1_DEPTH-1)/1024); -localparam STRM2_END_BLOCK = ((STRM2_OFFSET+STRM2_DEPTH-1)/1024); -localparam STRM3_END_BLOCK = ((STRM3_OFFSET+STRM3_DEPTH-1)/1024); -localparam STRM4_END_BLOCK = ((STRM4_OFFSET+STRM4_DEPTH-1)/1024); -localparam STRM5_END_BLOCK = ((STRM5_OFFSET+STRM5_DEPTH-1)/1024); - -//determine the number of blocks spanned by each stream -localparam STRM0_NBLOCKS = STRM0_END_BLOCK - STRM0_BLOCK + 1; -localparam STRM1_NBLOCKS = STRM1_END_BLOCK - STRM1_BLOCK + 1; -localparam STRM2_NBLOCKS = STRM2_END_BLOCK - STRM2_BLOCK + 1; -localparam STRM3_NBLOCKS = STRM3_END_BLOCK - STRM3_BLOCK + 1; -localparam STRM4_NBLOCKS = STRM4_END_BLOCK - STRM4_BLOCK + 1; -localparam STRM5_NBLOCKS = STRM5_END_BLOCK - STRM5_BLOCK + 1; - -//TODO: check that memory width is equal to the widest stream -//TODO: check that the stream depths and offsets make sense, and that the memory depth is sufficient (or calculate depth here?) -initial begin - if((NSTREAMS < 1) | (NSTREAMS > 6)) begin - $display("Invalid setting for NSTREAMS, please set in range [1,6]"); - $finish(); - end -end - -//invert reset -wire rst; -assign rst = ~aresetn; - -//WARNING: pipeline depth is larger than the number of streams per port so we have in-flight writes that may see not-ready when they get executed -//solution: use prog-full to make sure we have an equal number of free slots in the stream to the read pipeline depth - -reg [$clog2(MEM_DEPTH)-1:0] strm0_addr = STRM0_OFFSET; -reg [$clog2(MEM_DEPTH)-1:0] strm1_addr = STRM1_OFFSET; -reg [$clog2(MEM_DEPTH)-1:0] strm2_addr = STRM2_OFFSET; -reg [$clog2(MEM_DEPTH)-1:0] strm3_addr = STRM3_OFFSET; -reg [$clog2(MEM_DEPTH)-1:0] strm4_addr = STRM4_OFFSET; -reg [$clog2(MEM_DEPTH)-1:0] strm5_addr = STRM5_OFFSET; - -reg strm0_incr_en; -reg strm1_incr_en; -reg strm2_incr_en; -reg strm3_incr_en; -reg strm4_incr_en; -reg strm5_incr_en; - -wire strm0_rst; -wire strm1_rst; -wire strm2_rst; -wire strm3_rst; -wire strm4_rst; -wire strm5_rst; - -reg strm0_ready; -reg strm1_ready; -reg strm2_ready; -reg strm3_ready; -reg strm4_ready; -reg strm5_ready; - -//arbiter: work on one stream at a time -//multiplex each port between (up to) half of the streams -reg [1:0] current_stream_porta = 0; -reg [1:0] current_stream_portb = 0; - -always @(posedge aclk) begin - if(rst) - current_stream_porta <= 0; - else case(current_stream_porta) - 0: current_stream_porta <= strm2_ready ? 1 : strm4_ready ? 2 : 0; - 1: current_stream_porta <= strm4_ready ? 2 : strm0_ready ? 0 : 1; - 2: current_stream_porta <= strm0_ready ? 0 : strm2_ready ? 1 : 2; - endcase - if(rst) - current_stream_portb <= 0; - else case(current_stream_portb) - 0: current_stream_portb <= strm3_ready ? 1 : strm5_ready ? 2 : 0; - 1: current_stream_portb <= strm5_ready ? 2 : strm1_ready ? 0 : 1; - 2: current_stream_portb <= strm1_ready ? 0 : strm3_ready ? 1 : 2; - endcase -end +generate +if(NSTREAMS <= 2) begin: singleblock -always @(posedge aclk) begin - if(rst) begin - strm0_incr_en <= 0; - strm1_incr_en <= 0; - strm2_incr_en <= 0; - strm3_incr_en <= 0; - strm4_incr_en <= 0; - strm5_incr_en <= 0; - end else begin - strm0_incr_en <= (current_stream_porta == 0) & strm0_ready; - strm1_incr_en <= (current_stream_portb == 0) & strm1_ready; - strm2_incr_en <= (current_stream_porta == 1) & strm2_ready; - strm3_incr_en <= (current_stream_portb == 1) & strm3_ready; - strm4_incr_en <= (current_stream_porta == 2) & strm4_ready; - strm5_incr_en <= (current_stream_portb == 2) & strm5_ready; - end -end - -assign strm0_rst = strm0_incr_en & (strm0_addr == (STRM0_OFFSET + STRM0_DEPTH-1)); -assign strm1_rst = strm1_incr_en & (strm1_addr == (STRM1_OFFSET + STRM1_DEPTH-1)); -assign strm2_rst = strm2_incr_en & (strm2_addr == (STRM2_OFFSET + STRM2_DEPTH-1)); -assign strm3_rst = strm3_incr_en & (strm3_addr == (STRM3_OFFSET + STRM3_DEPTH-1)); -assign strm4_rst = strm4_incr_en & (strm4_addr == (STRM4_OFFSET + STRM4_DEPTH-1)); -assign strm5_rst = strm5_incr_en & (strm5_addr == (STRM5_OFFSET + STRM5_DEPTH-1)); - -always @(posedge aclk) begin - strm0_ready <= ~m_axis_0_afull; - strm1_ready <= ~m_axis_1_afull & (NSTREAMS >= 2); - strm2_ready <= ~m_axis_2_afull & (NSTREAMS >= 3); - strm3_ready <= ~m_axis_3_afull & (NSTREAMS >= 4); - strm4_ready <= ~m_axis_4_afull & (NSTREAMS >= 5); - strm5_ready <= ~m_axis_5_afull & (NSTREAMS >= 6); -end -//one address counter per stream; more LUTs but keeps routing short and local -always @(posedge aclk) begin - if(strm0_rst | rst) - strm0_addr <= STRM0_OFFSET; - else if(strm0_incr_en) - strm0_addr <= strm0_addr + 1; - if(strm1_rst | rst) - strm1_addr <= STRM1_OFFSET; - else if(strm1_incr_en) - strm1_addr <= strm1_addr + 1; - if(strm2_rst | rst) - strm2_addr <= STRM2_OFFSET; - else if(strm2_incr_en) - strm2_addr <= strm2_addr + 1; - if(strm3_rst | rst) - strm3_addr <= STRM3_OFFSET; - else if(strm3_incr_en) - strm3_addr <= strm3_addr + 1; - if(strm4_rst | rst) - strm4_addr <= STRM4_OFFSET; - else if(strm4_incr_en) - strm4_addr <= strm4_addr + 1; - if(strm5_rst | rst) - strm5_addr <= STRM5_OFFSET; - else if(strm5_incr_en) - strm5_addr <= strm5_addr + 1; -end - -reg [$clog2(MEM_DEPTH)-1:0] addra; -wire [MEM_WIDTH*NMEMBLOCKS-1:0] rdqa; - -reg [$clog2(MEM_DEPTH)-1:0] addrb; -wire [MEM_WIDTH*NMEMBLOCKS-1:0] rdqb; - -wire [NMEMBLOCKS-1:0] we; - -reg [1:0] addr_select_porta; -reg [1:0] addr_select_portb; - -//multiplex addresses of various streams into address ports of memory -always @(posedge aclk) begin - addr_select_porta <= current_stream_porta; - case(addr_select_porta) - 0: addra <= strm0_addr; - 1: addra <= strm2_addr; - 2: addra <= strm4_addr; - endcase - addr_select_portb <= current_stream_portb; - case(addr_select_portb) - 0: addrb <= strm1_addr; - 1: addrb <= strm3_addr; - 2: addrb <= strm5_addr; - endcase -end +memstream_singleblock +#( + .CONFIG_EN(CONFIG_EN), + .NSTREAMS(NSTREAMS), + .MEM_DEPTH(MEM_DEPTH), + .MEM_WIDTH(MEM_WIDTH), + .MEM_INIT(MEM_INIT), + .RAM_STYLE(RAM_STYLE), -genvar g; -generate for(g=0; g<NMEMBLOCKS; g=g+1) begin: blockports + //widths per stream + .STRM0_WIDTH(STRM0_WIDTH), + .STRM1_WIDTH(STRM1_WIDTH), -assign we[g] = (CONFIG_EN == 1) & config_ce & config_we & (config_address[31:BLOCKADRWIDTH] == g); + //depths per stream + .STRM0_DEPTH(STRM0_DEPTH), + .STRM1_DEPTH(STRM1_DEPTH), -ramb18_wf_dualport -#( - .ID(g), - .DWIDTH(MEM_WIDTH), - .AWIDTH(BLOCKADRWIDTH), - .MEM_INIT(MEM_INIT), - .RAM_STYLE(RAM_STYLE) + //offsets for each stream + .STRM0_OFFSET(STRM0_OFFSET), + .STRM1_OFFSET(STRM1_OFFSET) ) -ram +mem ( - .clk(aclk), - - .wea(we[g]), - .addra(we[g] ? config_address[BLOCKADRWIDTH-1:0] : addra[BLOCKADRWIDTH-1:0]), - .wdataa(config_d0), - .rdqa(rdqa[(g+1)*MEM_WIDTH-1:g*MEM_WIDTH]), - - .web(1'b0), - .addrb(addrb[BLOCKADRWIDTH-1:0]), - .wdatab('d0), - .rdqb(rdqb[(g+1)*MEM_WIDTH-1:g*MEM_WIDTH]) + .aclk(aclk), + .aresetn(aresetn), + + .config_address(config_address), + .config_ce(config_ce), + .config_we(config_we), + .config_d0(config_d0), + .config_q0(config_q0), + + .m_axis_0_tready(m_axis_0_tready), + .m_axis_0_tvalid(m_axis_0_tvalid), + .m_axis_0_tdata(m_axis_0_tdata), + + .m_axis_1_tready(m_axis_1_tready), + .m_axis_1_tvalid(m_axis_1_tvalid), + .m_axis_1_tdata(m_axis_1_tdata) ); -end -endgenerate - -integer i; - -generate if(NMEMBLOCKS > 1) begin: multiblock - -wire [MEM_WIDTH-1:0] rdqmux[5:0]; - -reg [$clog2(MEM_DEPTH)-BLOCKADRWIDTH-1:0] rdblocka[2:0]; -reg [$clog2(MEM_DEPTH)-BLOCKADRWIDTH-1:0] rdblockb[2:0]; - -always @(posedge aclk) begin - rdblocka[0] <= addra[$clog2(MEM_DEPTH)-1:BLOCKADRWIDTH]; - rdblockb[0] <= addrb[$clog2(MEM_DEPTH)-1:BLOCKADRWIDTH]; - for(i=0; i<2; i=i+1) begin - rdblocka[i+1] <= rdblocka[i]; - rdblockb[i+1] <= rdblockb[i]; - end -end - -if(NSTREAMS >= 1) begin: en_strm0 - if(STRM0_MUX == 1) begin: mux0 - mux #(STRM0_NBLOCKS, MEM_WIDTH) m(rdqa[(STRM0_BLOCK+STRM0_NBLOCKS)*MEM_WIDTH-1:STRM0_BLOCK*MEM_WIDTH],rdqmux[0],rdblocka[1] - STRM0_BLOCK); - end else begin: nomux0 - assign rdqmux[0] = rdqa[(STRM0_BLOCK+1)*MEM_WIDTH-1:STRM0_BLOCK*MEM_WIDTH]; - end - assign m_axis_0_tdata = rdqmux[0][STRM0_WIDTH-1:0]; -end - -if(NSTREAMS >= 2) begin: en_strm1 - if(STRM1_MUX == 1) begin: mux1 - mux #(STRM1_NBLOCKS, MEM_WIDTH) m(rdqb[(STRM1_BLOCK+STRM1_NBLOCKS)*MEM_WIDTH-1:STRM1_BLOCK*MEM_WIDTH],rdqmux[1],rdblockb[1] - STRM1_BLOCK); - end else begin: nomux1 - assign rdqmux[1] = rdqb[(STRM1_BLOCK+1)*MEM_WIDTH-1:STRM1_BLOCK*MEM_WIDTH]; - end - assign m_axis_1_tdata = rdqmux[1][STRM1_WIDTH-1:0]; -end +assign m_axis_2_tvalid = 0; +assign m_axis_2_tdata = 0; +assign m_axis_3_tvalid = 0; +assign m_axis_3_tdata = 0; +assign m_axis_4_tvalid = 0; +assign m_axis_4_tdata = 0; +assign m_axis_5_tvalid = 0; +assign m_axis_5_tdata = 0; -if(NSTREAMS >= 3) begin: en_strm2 - if(STRM2_MUX == 1) begin: mux2 - mux #(STRM2_NBLOCKS, MEM_WIDTH) m(rdqa[(STRM2_BLOCK+STRM2_NBLOCKS)*MEM_WIDTH-1:STRM2_BLOCK*MEM_WIDTH],rdqmux[2],rdblocka[1] - STRM2_BLOCK); - end else begin: nomux2 - assign rdqmux[2] = rdqa[(STRM2_BLOCK+1)*MEM_WIDTH-1:STRM2_BLOCK*MEM_WIDTH]; - end - assign m_axis_2_tdata = rdqmux[2][STRM2_WIDTH-1:0]; -end +end else begin: multiblock -if(NSTREAMS >= 4) begin: en_strm3 - if(STRM3_MUX == 1) begin: mux3 - mux #(STRM3_NBLOCKS, MEM_WIDTH) m(rdqb[(STRM3_BLOCK+STRM3_NBLOCKS)*MEM_WIDTH-1:STRM3_BLOCK*MEM_WIDTH],rdqmux[3],rdblockb[1] - STRM3_BLOCK); - end else begin: nomux3 - assign rdqmux[3] = rdqb[(STRM3_BLOCK+1)*MEM_WIDTH-1:STRM3_BLOCK*MEM_WIDTH]; - end - assign m_axis_3_tdata = rdqmux[3][STRM3_WIDTH-1:0]; -end -if(NSTREAMS >= 5) begin: en_strm4 - if(STRM4_MUX == 1) begin: mux4 - mux #(STRM4_NBLOCKS, MEM_WIDTH) m(rdqa[(STRM4_BLOCK+STRM4_NBLOCKS)*MEM_WIDTH-1:STRM4_BLOCK*MEM_WIDTH],rdqmux[4],rdblocka[1] - STRM4_BLOCK); - end else begin: nomux4 - assign rdqmux[4] = rdqa[(STRM4_BLOCK+1)*MEM_WIDTH-1:STRM4_BLOCK*MEM_WIDTH]; - end - assign m_axis_4_tdata = rdqmux[4][STRM4_WIDTH-1:0]; -end +memstream_multiblock +#( + .CONFIG_EN(CONFIG_EN), + .NSTREAMS(NSTREAMS), + .MEM_DEPTH(MEM_DEPTH), + .MEM_WIDTH(MEM_WIDTH), + .MEM_INIT(MEM_INIT), + .RAM_STYLE(RAM_STYLE), -if(NSTREAMS >= 6) begin: en_strm5 - if(STRM5_MUX == 1) begin: mux5 - mux #(STRM5_NBLOCKS, MEM_WIDTH) m(rdqb[(STRM5_BLOCK+STRM5_NBLOCKS)*MEM_WIDTH-1:STRM5_BLOCK*MEM_WIDTH],rdqmux[5],rdblockb[1] - STRM5_BLOCK); - end else begin: nomux5 - assign rdqmux[5] = rdqb[(STRM5_BLOCK+1)*MEM_WIDTH-1:STRM5_BLOCK*MEM_WIDTH]; - end - assign m_axis_5_tdata = rdqmux[5][STRM5_WIDTH-1:0]; -end + //widths per stream + .STRM0_WIDTH(STRM0_WIDTH), + .STRM1_WIDTH(STRM1_WIDTH), + .STRM2_WIDTH(STRM2_WIDTH), + .STRM3_WIDTH(STRM3_WIDTH), + .STRM4_WIDTH(STRM4_WIDTH), + .STRM5_WIDTH(STRM5_WIDTH), + + //depths per stream + .STRM0_DEPTH(STRM0_DEPTH), + .STRM1_DEPTH(STRM1_DEPTH), + .STRM2_DEPTH(STRM2_DEPTH), + .STRM3_DEPTH(STRM3_DEPTH), + .STRM4_DEPTH(STRM4_DEPTH), + .STRM5_DEPTH(STRM5_DEPTH), + + //offsets for each stream + .STRM0_OFFSET(STRM0_OFFSET), + .STRM1_OFFSET(STRM1_OFFSET), + .STRM2_OFFSET(STRM2_OFFSET), + .STRM3_OFFSET(STRM3_OFFSET), + .STRM4_OFFSET(STRM4_OFFSET), + .STRM5_OFFSET(STRM5_OFFSET) +) +mem +( + .aclk(aclk), + .aresetn(aresetn), + + .config_address(config_address), + .config_ce(config_ce), + .config_we(config_we), + .config_d0(config_d0), + .config_q0(config_q0), + + .m_axis_0_afull(m_axis_0_afull), + .m_axis_0_tready(m_axis_0_tready), + .m_axis_0_tvalid(m_axis_0_tvalid), + .m_axis_0_tdata(m_axis_0_tdata), + + .m_axis_1_afull(m_axis_1_afull), + .m_axis_1_tready(m_axis_1_tready), + .m_axis_1_tvalid(m_axis_1_tvalid), + .m_axis_1_tdata(m_axis_1_tdata), + + .m_axis_2_afull(m_axis_2_afull), + .m_axis_2_tready(m_axis_2_tready), + .m_axis_2_tvalid(m_axis_2_tvalid), + .m_axis_2_tdata(m_axis_2_tdata), + + .m_axis_3_afull(m_axis_3_afull), + .m_axis_3_tready(m_axis_3_tready), + .m_axis_3_tvalid(m_axis_3_tvalid), + .m_axis_3_tdata(m_axis_3_tdata), + + .m_axis_4_afull(m_axis_4_afull), + .m_axis_4_tready(m_axis_4_tready), + .m_axis_4_tvalid(m_axis_4_tvalid), + .m_axis_4_tdata(m_axis_4_tdata), + + .m_axis_5_afull(m_axis_5_afull), + .m_axis_5_tready(m_axis_5_tready), + .m_axis_5_tvalid(m_axis_5_tvalid), + .m_axis_5_tdata(m_axis_5_tdata) -end else begin: singleblock +); -if(NSTREAMS >= 1) begin: en_strm0_direct - assign m_axis_0_tdata = rdqa[STRM0_WIDTH-1:0]; -end -if(NSTREAMS >= 2) begin: en_strm1_direct - assign m_axis_1_tdata = rdqb[STRM1_WIDTH-1:0]; -end -if(NSTREAMS >= 3) begin: en_strm2_direct - assign m_axis_2_tdata = rdqa[STRM2_WIDTH-1:0]; -end -if(NSTREAMS >= 4) begin: en_strm3_direct - assign m_axis_3_tdata = rdqb[STRM3_WIDTH-1:0]; -end -if(NSTREAMS >= 5) begin: en_strm4_direct - assign m_axis_4_tdata = rdqa[STRM4_WIDTH-1:0]; -end -if(NSTREAMS >= 6) begin: en_strm5_direct - assign m_axis_5_tdata = rdqb[STRM5_WIDTH-1:0]; -end end endgenerate -//output to AXI Streams -reg tvalid_pipe0[2:0]; -reg tvalid_pipe1[2:0]; -reg tvalid_pipe2[2:0]; -reg tvalid_pipe3[2:0]; -reg tvalid_pipe4[2:0]; -reg tvalid_pipe5[2:0]; - -assign m_axis_0_tvalid = tvalid_pipe0[2]; -assign m_axis_1_tvalid = tvalid_pipe1[2]; -assign m_axis_2_tvalid = tvalid_pipe2[2]; -assign m_axis_3_tvalid = tvalid_pipe3[2]; -assign m_axis_4_tvalid = tvalid_pipe4[2]; -assign m_axis_5_tvalid = tvalid_pipe5[2]; - - -always @(posedge aclk) begin - tvalid_pipe0[0] <= strm0_incr_en; - tvalid_pipe1[0] <= strm1_incr_en; - tvalid_pipe2[0] <= strm2_incr_en; - tvalid_pipe3[0] <= strm3_incr_en; - tvalid_pipe4[0] <= strm4_incr_en; - tvalid_pipe5[0] <= strm5_incr_en; - for(i=0; i<2; i=i+1) begin: srl - tvalid_pipe0[i+1] <= tvalid_pipe0[i]; - tvalid_pipe1[i+1] <= tvalid_pipe1[i]; - tvalid_pipe2[i+1] <= tvalid_pipe2[i]; - tvalid_pipe3[i+1] <= tvalid_pipe3[i]; - tvalid_pipe4[i+1] <= tvalid_pipe4[i]; - tvalid_pipe5[i+1] <= tvalid_pipe5[i]; - end -end - -assign config_q0 = 0; - endmodule diff --git a/finn-rtllib/memstream/hdl/memstream_multiblock.v b/finn-rtllib/memstream/hdl/memstream_multiblock.v new file mode 100644 index 0000000000000000000000000000000000000000..017088b8c1572bb3baa2a5a46336509187a762ab --- /dev/null +++ b/finn-rtllib/memstream/hdl/memstream_multiblock.v @@ -0,0 +1,471 @@ +/* + Copyright (c) 2020, Xilinx + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of FINN nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +module memstream_multiblock +#( +//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths + parameter CONFIG_EN = 1, + parameter NSTREAMS = 6,//1 up to 6 + + parameter MEM_DEPTH = 13824, + parameter MEM_WIDTH = 32, + parameter MEM_INIT = "./", + parameter RAM_STYLE = "auto", + + //widths per stream + parameter STRM0_WIDTH = 32, + parameter STRM1_WIDTH = 32, + parameter STRM2_WIDTH = 32, + parameter STRM3_WIDTH = 32, + parameter STRM4_WIDTH = 32, + parameter STRM5_WIDTH = 32, + + //depths per stream + parameter STRM0_DEPTH = 2304, + parameter STRM1_DEPTH = 2304, + parameter STRM2_DEPTH = 2304, + parameter STRM3_DEPTH = 2304, + parameter STRM4_DEPTH = 2304, + parameter STRM5_DEPTH = 2304, + + //offsets for each stream + parameter STRM0_OFFSET = 0, + parameter STRM1_OFFSET = 2304, + parameter STRM2_OFFSET = 4608, + parameter STRM3_OFFSET = 6912, + parameter STRM4_OFFSET = 9216, + parameter STRM5_OFFSET = 11520 +) + +( + input aclk, + input aresetn, + + //optional configuration interface compatible with ap_memory + input [31:0] config_address, + input config_ce, + input config_we, + input [31:0] config_d0, + output [31:0] config_q0, + + //multiple output AXI Streams, TDATA width rounded to multiple of 8 bits + input m_axis_0_afull, + input m_axis_0_tready, + output m_axis_0_tvalid, + output [((STRM0_WIDTH+7)/8)*8-1:0] m_axis_0_tdata, + + input m_axis_1_afull, + input m_axis_1_tready, + output m_axis_1_tvalid, + output [((STRM1_WIDTH+7)/8)*8-1:0] m_axis_1_tdata, + + input m_axis_2_afull, + input m_axis_2_tready, + output m_axis_2_tvalid, + output [((STRM2_WIDTH+7)/8)*8-1:0] m_axis_2_tdata, + + input m_axis_3_afull, + input m_axis_3_tready, + output m_axis_3_tvalid, + output [((STRM3_WIDTH+7)/8)*8-1:0] m_axis_3_tdata, + + input m_axis_4_afull, + input m_axis_4_tready, + output m_axis_4_tvalid, + output [((STRM4_WIDTH+7)/8)*8-1:0] m_axis_4_tdata, + + input m_axis_5_afull, + input m_axis_5_tready, + output m_axis_5_tvalid, + output [((STRM5_WIDTH+7)/8)*8-1:0] m_axis_5_tdata + + +); + +//calculate number of RAMB18 blocks we need depth-wise +localparam NMEMBLOCKS = (MEM_DEPTH+1023) / 1024; //ceil(MEM_DEPTH/1024) + +//calculate width of address for each block +localparam BLOCKADRWIDTH = NMEMBLOCKS > 1 ? 10 : $clog2(MEM_DEPTH); + +//determine whether a stream needs to multiplex between memory blocks +localparam STRM0_MUX = ((STRM0_OFFSET/1024) != ((STRM0_OFFSET+STRM0_DEPTH)/1024)); +localparam STRM1_MUX = ((STRM1_OFFSET/1024) != ((STRM1_OFFSET+STRM1_DEPTH)/1024)); +localparam STRM2_MUX = ((STRM2_OFFSET/1024) != ((STRM2_OFFSET+STRM2_DEPTH)/1024)); +localparam STRM3_MUX = ((STRM3_OFFSET/1024) != ((STRM3_OFFSET+STRM3_DEPTH)/1024)); +localparam STRM4_MUX = ((STRM4_OFFSET/1024) != ((STRM4_OFFSET+STRM4_DEPTH)/1024)); +localparam STRM5_MUX = ((STRM5_OFFSET/1024) != ((STRM5_OFFSET+STRM5_DEPTH)/1024)); + +//determine what the base block of each stream is +localparam STRM0_BLOCK = (STRM0_OFFSET/1024); +localparam STRM1_BLOCK = (STRM1_OFFSET/1024); +localparam STRM2_BLOCK = (STRM2_OFFSET/1024); +localparam STRM3_BLOCK = (STRM3_OFFSET/1024); +localparam STRM4_BLOCK = (STRM4_OFFSET/1024); +localparam STRM5_BLOCK = (STRM5_OFFSET/1024); + +//determine what the end block of each stream is +localparam STRM0_END_BLOCK = ((STRM0_OFFSET+STRM0_DEPTH-1)/1024); +localparam STRM1_END_BLOCK = ((STRM1_OFFSET+STRM1_DEPTH-1)/1024); +localparam STRM2_END_BLOCK = ((STRM2_OFFSET+STRM2_DEPTH-1)/1024); +localparam STRM3_END_BLOCK = ((STRM3_OFFSET+STRM3_DEPTH-1)/1024); +localparam STRM4_END_BLOCK = ((STRM4_OFFSET+STRM4_DEPTH-1)/1024); +localparam STRM5_END_BLOCK = ((STRM5_OFFSET+STRM5_DEPTH-1)/1024); + +//determine the number of blocks spanned by each stream +localparam STRM0_NBLOCKS = STRM0_END_BLOCK - STRM0_BLOCK + 1; +localparam STRM1_NBLOCKS = STRM1_END_BLOCK - STRM1_BLOCK + 1; +localparam STRM2_NBLOCKS = STRM2_END_BLOCK - STRM2_BLOCK + 1; +localparam STRM3_NBLOCKS = STRM3_END_BLOCK - STRM3_BLOCK + 1; +localparam STRM4_NBLOCKS = STRM4_END_BLOCK - STRM4_BLOCK + 1; +localparam STRM5_NBLOCKS = STRM5_END_BLOCK - STRM5_BLOCK + 1; + +//TODO: check that memory width is equal to the widest stream +//TODO: check that the stream depths and offsets make sense, and that the memory depth is sufficient (or calculate depth here?) +initial begin + if((NSTREAMS < 1) | (NSTREAMS > 6)) begin + $display("Invalid setting for NSTREAMS, please set in range [1,6]"); + $finish(); + end +end + +//invert reset +wire rst; +assign rst = ~aresetn; + +//WARNING: pipeline depth is larger than the number of streams per port so we have in-flight writes that may see not-ready when they get executed +//solution: use prog-full to make sure we have an equal number of free slots in the stream to the read pipeline depth + +reg [$clog2(MEM_DEPTH)-1:0] strm0_addr = STRM0_OFFSET; +reg [$clog2(MEM_DEPTH)-1:0] strm1_addr = STRM1_OFFSET; +reg [$clog2(MEM_DEPTH)-1:0] strm2_addr = STRM2_OFFSET; +reg [$clog2(MEM_DEPTH)-1:0] strm3_addr = STRM3_OFFSET; +reg [$clog2(MEM_DEPTH)-1:0] strm4_addr = STRM4_OFFSET; +reg [$clog2(MEM_DEPTH)-1:0] strm5_addr = STRM5_OFFSET; + +reg strm0_incr_en; +reg strm1_incr_en; +reg strm2_incr_en; +reg strm3_incr_en; +reg strm4_incr_en; +reg strm5_incr_en; + +wire strm0_rst; +wire strm1_rst; +wire strm2_rst; +wire strm3_rst; +wire strm4_rst; +wire strm5_rst; + +reg strm0_ready; +reg strm1_ready; +reg strm2_ready; +reg strm3_ready; +reg strm4_ready; +reg strm5_ready; + +//arbiter: work on one stream at a time +//multiplex each port between (up to) half of the streams +reg [1:0] current_stream_porta = 0; +reg [1:0] current_stream_portb = 0; + +always @(posedge aclk) begin + if(rst) + current_stream_porta <= 0; + else case(current_stream_porta) + 0: current_stream_porta <= strm2_ready ? 1 : strm4_ready ? 2 : 0; + 1: current_stream_porta <= strm4_ready ? 2 : strm0_ready ? 0 : 1; + 2: current_stream_porta <= strm0_ready ? 0 : strm2_ready ? 1 : 2; + endcase + if(rst) + current_stream_portb <= 0; + else case(current_stream_portb) + 0: current_stream_portb <= strm3_ready ? 1 : strm5_ready ? 2 : 0; + 1: current_stream_portb <= strm5_ready ? 2 : strm1_ready ? 0 : 1; + 2: current_stream_portb <= strm1_ready ? 0 : strm3_ready ? 1 : 2; + endcase +end + +always @(posedge aclk) begin + if(rst) begin + strm0_incr_en <= 0; + strm1_incr_en <= 0; + strm2_incr_en <= 0; + strm3_incr_en <= 0; + strm4_incr_en <= 0; + strm5_incr_en <= 0; + end else begin + strm0_incr_en <= (current_stream_porta == 0) & strm0_ready; + strm1_incr_en <= (current_stream_portb == 0) & strm1_ready; + strm2_incr_en <= (current_stream_porta == 1) & strm2_ready; + strm3_incr_en <= (current_stream_portb == 1) & strm3_ready; + strm4_incr_en <= (current_stream_porta == 2) & strm4_ready; + strm5_incr_en <= (current_stream_portb == 2) & strm5_ready; + end +end + +assign strm0_rst = strm0_incr_en & (strm0_addr == (STRM0_OFFSET + STRM0_DEPTH-1)); +assign strm1_rst = strm1_incr_en & (strm1_addr == (STRM1_OFFSET + STRM1_DEPTH-1)); +assign strm2_rst = strm2_incr_en & (strm2_addr == (STRM2_OFFSET + STRM2_DEPTH-1)); +assign strm3_rst = strm3_incr_en & (strm3_addr == (STRM3_OFFSET + STRM3_DEPTH-1)); +assign strm4_rst = strm4_incr_en & (strm4_addr == (STRM4_OFFSET + STRM4_DEPTH-1)); +assign strm5_rst = strm5_incr_en & (strm5_addr == (STRM5_OFFSET + STRM5_DEPTH-1)); + +always @(posedge aclk) begin + strm0_ready <= ~m_axis_0_afull; + strm1_ready <= ~m_axis_1_afull & (NSTREAMS >= 2); + strm2_ready <= ~m_axis_2_afull & (NSTREAMS >= 3); + strm3_ready <= ~m_axis_3_afull & (NSTREAMS >= 4); + strm4_ready <= ~m_axis_4_afull & (NSTREAMS >= 5); + strm5_ready <= ~m_axis_5_afull & (NSTREAMS >= 6); +end + +//one address counter per stream; more LUTs but keeps routing short and local +always @(posedge aclk) begin + if(strm0_rst | rst) + strm0_addr <= STRM0_OFFSET; + else if(strm0_incr_en) + strm0_addr <= strm0_addr + 1; + if(strm1_rst | rst) + strm1_addr <= STRM1_OFFSET; + else if(strm1_incr_en) + strm1_addr <= strm1_addr + 1; + if(strm2_rst | rst) + strm2_addr <= STRM2_OFFSET; + else if(strm2_incr_en) + strm2_addr <= strm2_addr + 1; + if(strm3_rst | rst) + strm3_addr <= STRM3_OFFSET; + else if(strm3_incr_en) + strm3_addr <= strm3_addr + 1; + if(strm4_rst | rst) + strm4_addr <= STRM4_OFFSET; + else if(strm4_incr_en) + strm4_addr <= strm4_addr + 1; + if(strm5_rst | rst) + strm5_addr <= STRM5_OFFSET; + else if(strm5_incr_en) + strm5_addr <= strm5_addr + 1; +end + +reg [$clog2(MEM_DEPTH)-1:0] addra; +wire [MEM_WIDTH*NMEMBLOCKS-1:0] rdqa; + +reg [$clog2(MEM_DEPTH)-1:0] addrb; +wire [MEM_WIDTH*NMEMBLOCKS-1:0] rdqb; + +wire [NMEMBLOCKS-1:0] we; + +reg [1:0] addr_select_porta; +reg [1:0] addr_select_portb; + +//multiplex addresses of various streams into address ports of memory +always @(posedge aclk) begin + addr_select_porta <= current_stream_porta; + case(addr_select_porta) + 0: addra <= strm0_addr; + 1: addra <= strm2_addr; + 2: addra <= strm4_addr; + endcase + addr_select_portb <= current_stream_portb; + case(addr_select_portb) + 0: addrb <= strm1_addr; + 1: addrb <= strm3_addr; + 2: addrb <= strm5_addr; + endcase +end + +genvar g; +generate for(g=0; g<NMEMBLOCKS; g=g+1) begin: blockports + +assign we[g] = (CONFIG_EN == 1) & config_ce & config_we & (config_address[31:BLOCKADRWIDTH] == g); + +ramb18_wf_dualport +#( + .ID(g), + .DWIDTH(MEM_WIDTH), + .AWIDTH(BLOCKADRWIDTH), + .MEM_INIT(MEM_INIT), + .RAM_STYLE(RAM_STYLE) +) +ram +( + .clk(aclk), + + .wea(we[g]), + .ena(1'b1), + .enqa(1'b1), + .addra(we[g] ? config_address[BLOCKADRWIDTH-1:0] : addra[BLOCKADRWIDTH-1:0]), + .wdataa(config_d0), + .rdqa(rdqa[(g+1)*MEM_WIDTH-1:g*MEM_WIDTH]), + + .web(1'b0), + .enb(1'b1), + .enqb(1'b1), + .addrb(addrb[BLOCKADRWIDTH-1:0]), + .wdatab('d0), + .rdqb(rdqb[(g+1)*MEM_WIDTH-1:g*MEM_WIDTH]) +); + +end +endgenerate + +integer i; + +generate if(NMEMBLOCKS > 1) begin: multiblock + +wire [MEM_WIDTH-1:0] rdqmux[5:0]; + +reg [$clog2(MEM_DEPTH)-BLOCKADRWIDTH-1:0] rdblocka[2:0]; +reg [$clog2(MEM_DEPTH)-BLOCKADRWIDTH-1:0] rdblockb[2:0]; + +always @(posedge aclk) begin + rdblocka[0] <= addra[$clog2(MEM_DEPTH)-1:BLOCKADRWIDTH]; + rdblockb[0] <= addrb[$clog2(MEM_DEPTH)-1:BLOCKADRWIDTH]; + for(i=0; i<2; i=i+1) begin + rdblocka[i+1] <= rdblocka[i]; + rdblockb[i+1] <= rdblockb[i]; + end +end + +if(NSTREAMS >= 1) begin: en_strm0 + if(STRM0_MUX == 1) begin: mux0 + mux #(STRM0_NBLOCKS, MEM_WIDTH) m(rdqa[(STRM0_BLOCK+STRM0_NBLOCKS)*MEM_WIDTH-1:STRM0_BLOCK*MEM_WIDTH],rdqmux[0],rdblocka[1] - STRM0_BLOCK); + end else begin: nomux0 + assign rdqmux[0] = rdqa[(STRM0_BLOCK+1)*MEM_WIDTH-1:STRM0_BLOCK*MEM_WIDTH]; + end + assign m_axis_0_tdata = rdqmux[0][STRM0_WIDTH-1:0]; +end + +if(NSTREAMS >= 2) begin: en_strm1 + if(STRM1_MUX == 1) begin: mux1 + mux #(STRM1_NBLOCKS, MEM_WIDTH) m(rdqb[(STRM1_BLOCK+STRM1_NBLOCKS)*MEM_WIDTH-1:STRM1_BLOCK*MEM_WIDTH],rdqmux[1],rdblockb[1] - STRM1_BLOCK); + end else begin: nomux1 + assign rdqmux[1] = rdqb[(STRM1_BLOCK+1)*MEM_WIDTH-1:STRM1_BLOCK*MEM_WIDTH]; + end + assign m_axis_1_tdata = rdqmux[1][STRM1_WIDTH-1:0]; +end + +if(NSTREAMS >= 3) begin: en_strm2 + if(STRM2_MUX == 1) begin: mux2 + mux #(STRM2_NBLOCKS, MEM_WIDTH) m(rdqa[(STRM2_BLOCK+STRM2_NBLOCKS)*MEM_WIDTH-1:STRM2_BLOCK*MEM_WIDTH],rdqmux[2],rdblocka[1] - STRM2_BLOCK); + end else begin: nomux2 + assign rdqmux[2] = rdqa[(STRM2_BLOCK+1)*MEM_WIDTH-1:STRM2_BLOCK*MEM_WIDTH]; + end + assign m_axis_2_tdata = rdqmux[2][STRM2_WIDTH-1:0]; +end + +if(NSTREAMS >= 4) begin: en_strm3 + if(STRM3_MUX == 1) begin: mux3 + mux #(STRM3_NBLOCKS, MEM_WIDTH) m(rdqb[(STRM3_BLOCK+STRM3_NBLOCKS)*MEM_WIDTH-1:STRM3_BLOCK*MEM_WIDTH],rdqmux[3],rdblockb[1] - STRM3_BLOCK); + end else begin: nomux3 + assign rdqmux[3] = rdqb[(STRM3_BLOCK+1)*MEM_WIDTH-1:STRM3_BLOCK*MEM_WIDTH]; + end + assign m_axis_3_tdata = rdqmux[3][STRM3_WIDTH-1:0]; +end + +if(NSTREAMS >= 5) begin: en_strm4 + if(STRM4_MUX == 1) begin: mux4 + mux #(STRM4_NBLOCKS, MEM_WIDTH) m(rdqa[(STRM4_BLOCK+STRM4_NBLOCKS)*MEM_WIDTH-1:STRM4_BLOCK*MEM_WIDTH],rdqmux[4],rdblocka[1] - STRM4_BLOCK); + end else begin: nomux4 + assign rdqmux[4] = rdqa[(STRM4_BLOCK+1)*MEM_WIDTH-1:STRM4_BLOCK*MEM_WIDTH]; + end + assign m_axis_4_tdata = rdqmux[4][STRM4_WIDTH-1:0]; +end + +if(NSTREAMS >= 6) begin: en_strm5 + if(STRM5_MUX == 1) begin: mux5 + mux #(STRM5_NBLOCKS, MEM_WIDTH) m(rdqb[(STRM5_BLOCK+STRM5_NBLOCKS)*MEM_WIDTH-1:STRM5_BLOCK*MEM_WIDTH],rdqmux[5],rdblockb[1] - STRM5_BLOCK); + end else begin: nomux5 + assign rdqmux[5] = rdqb[(STRM5_BLOCK+1)*MEM_WIDTH-1:STRM5_BLOCK*MEM_WIDTH]; + end + assign m_axis_5_tdata = rdqmux[5][STRM5_WIDTH-1:0]; +end + +end else begin: singleblock + +if(NSTREAMS >= 1) begin: en_strm0_direct + assign m_axis_0_tdata = rdqa[STRM0_WIDTH-1:0]; +end +if(NSTREAMS >= 2) begin: en_strm1_direct + assign m_axis_1_tdata = rdqb[STRM1_WIDTH-1:0]; +end +if(NSTREAMS >= 3) begin: en_strm2_direct + assign m_axis_2_tdata = rdqa[STRM2_WIDTH-1:0]; +end +if(NSTREAMS >= 4) begin: en_strm3_direct + assign m_axis_3_tdata = rdqb[STRM3_WIDTH-1:0]; +end +if(NSTREAMS >= 5) begin: en_strm4_direct + assign m_axis_4_tdata = rdqa[STRM4_WIDTH-1:0]; +end +if(NSTREAMS >= 6) begin: en_strm5_direct + assign m_axis_5_tdata = rdqb[STRM5_WIDTH-1:0]; +end + +end +endgenerate + +//output to AXI Streams +reg tvalid_pipe0[2:0]; +reg tvalid_pipe1[2:0]; +reg tvalid_pipe2[2:0]; +reg tvalid_pipe3[2:0]; +reg tvalid_pipe4[2:0]; +reg tvalid_pipe5[2:0]; + +assign m_axis_0_tvalid = tvalid_pipe0[2]; +assign m_axis_1_tvalid = tvalid_pipe1[2]; +assign m_axis_2_tvalid = tvalid_pipe2[2]; +assign m_axis_3_tvalid = tvalid_pipe3[2]; +assign m_axis_4_tvalid = tvalid_pipe4[2]; +assign m_axis_5_tvalid = tvalid_pipe5[2]; + + +always @(posedge aclk) begin + tvalid_pipe0[0] <= strm0_incr_en; + tvalid_pipe1[0] <= strm1_incr_en; + tvalid_pipe2[0] <= strm2_incr_en; + tvalid_pipe3[0] <= strm3_incr_en; + tvalid_pipe4[0] <= strm4_incr_en; + tvalid_pipe5[0] <= strm5_incr_en; + for(i=0; i<2; i=i+1) begin: srl + tvalid_pipe0[i+1] <= tvalid_pipe0[i]; + tvalid_pipe1[i+1] <= tvalid_pipe1[i]; + tvalid_pipe2[i+1] <= tvalid_pipe2[i]; + tvalid_pipe3[i+1] <= tvalid_pipe3[i]; + tvalid_pipe4[i+1] <= tvalid_pipe4[i]; + tvalid_pipe5[i+1] <= tvalid_pipe5[i]; + end +end + +assign config_q0 = 0; + +endmodule diff --git a/finn-rtllib/memstream/hdl/memstream_singleblock.v b/finn-rtllib/memstream/hdl/memstream_singleblock.v new file mode 100644 index 0000000000000000000000000000000000000000..7270517e26ff307d20ca219c1e3bc709f3900a20 --- /dev/null +++ b/finn-rtllib/memstream/hdl/memstream_singleblock.v @@ -0,0 +1,179 @@ +/* + Copyright (c) 2020, Xilinx + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of FINN nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + Implements a lightweight streamer for up to 2 streams in a single block of memory +*/ + +module memstream_singleblock +#( + parameter CONFIG_EN = 1, + parameter NSTREAMS = 2,//1 up to 6 + + parameter MEM_DEPTH = 512, + parameter MEM_WIDTH = 32, + parameter MEM_INIT = "./", + parameter RAM_STYLE = "auto", + + //widths per stream + parameter STRM0_WIDTH = 32, + parameter STRM1_WIDTH = 32, + + //depths per stream + parameter STRM0_DEPTH = 256, + parameter STRM1_DEPTH = 256, + + //offsets for each stream + parameter STRM0_OFFSET = 0, + parameter STRM1_OFFSET = 256 +) + +( + input aclk, + input aresetn, + + //optional configuration interface compatible with ap_memory + input [31:0] config_address, + input config_ce, + input config_we, + input [31:0] config_d0, + output [31:0] config_q0, + + //multiple output AXI Streams, TDATA width rounded to multiple of 8 bits + input m_axis_0_tready, + output m_axis_0_tvalid, + output [((STRM0_WIDTH+7)/8)*8-1:0] m_axis_0_tdata, + + input m_axis_1_tready, + output m_axis_1_tvalid, + output [((STRM1_WIDTH+7)/8)*8-1:0] m_axis_1_tdata + +); + +//calculate width of memory address +localparam BLOCKADRWIDTH = $clog2(MEM_DEPTH); + +//TODO: check that memory width is equal to the widest stream +//TODO: check that the stream depths and offsets make sense, and that the memory depth is sufficient (or calculate depth here?) +initial begin + if((NSTREAMS < 1) | (NSTREAMS > 2)) begin + $display("Invalid setting for NSTREAMS, please set in range [1,2]"); + $finish(); + end +end + +//invert reset +wire rst; +assign rst = ~aresetn; + +reg [BLOCKADRWIDTH-1:0] strm0_addr = STRM0_OFFSET; +reg [BLOCKADRWIDTH-1:0] strm1_addr = STRM1_OFFSET; + +wire strm0_incr_en; +wire strm1_incr_en; + +wire strm0_rst; +wire strm1_rst; + +assign strm0_incr_en = m_axis_0_tready | ~m_axis_0_tvalid; +assign strm1_incr_en = m_axis_1_tready | ~m_axis_1_tvalid; + +assign strm0_rst = strm0_incr_en & (strm0_addr == (STRM0_OFFSET + STRM0_DEPTH-1)); +assign strm1_rst = strm1_incr_en & (strm1_addr == (STRM1_OFFSET + STRM1_DEPTH-1)); + +//one address counter per stream; more LUTs but keeps routing short and local +always @(posedge aclk) begin + if(strm0_rst | rst) + strm0_addr <= STRM0_OFFSET; + else if(strm0_incr_en) + strm0_addr <= strm0_addr + 1; + if(strm1_rst | rst) + strm1_addr <= STRM1_OFFSET; + else if(strm1_incr_en) + strm1_addr <= strm1_addr + 1; +end + +ramb18_wf_dualport +#( + .ID(0), + .DWIDTH(MEM_WIDTH), + .AWIDTH(BLOCKADRWIDTH), + .DEPTH(MEM_DEPTH), + .MEM_INIT(MEM_INIT), + .RAM_STYLE(RAM_STYLE) +) +ram +( + .clk(aclk), + + .wea(config_we), + .ena(strm0_incr_en | config_ce), + .enqa(strm0_incr_en | config_ce), + .addra(config_we ? config_address[BLOCKADRWIDTH-1:0] : strm0_addr), + .wdataa(config_d0), + .rdqa(m_axis_0_tdata), + + .web(1'b0), + .enb(strm1_incr_en), + .enqb(strm1_incr_en), + .addrb(strm1_addr), + .wdatab('d0), + .rdqb(m_axis_1_tdata) +); + +//signal valid after 2 tready cycles after initialization +//then stay valid +reg [1:0] tvalid_pipe0 = 2'd0; +reg [1:0] tvalid_pipe1 = 2'd0; + +assign m_axis_0_tvalid = tvalid_pipe0[1]; +assign m_axis_1_tvalid = tvalid_pipe1[1]; + +always @(posedge aclk) begin + if(rst) begin + tvalid_pipe0 <= 0; + end else if(strm0_incr_en) begin + tvalid_pipe0[0] <= 1; + tvalid_pipe0[1] <= tvalid_pipe0[0]; + end +end + +always @(posedge aclk) begin + if(rst) begin + tvalid_pipe1 <= 0; + end else if(strm1_incr_en) begin + tvalid_pipe1[0] <= 1; + tvalid_pipe1[1] <= tvalid_pipe1[0]; + end +end + +assign config_q0 = m_axis_0_tdata; + +endmodule diff --git a/finn-rtllib/memstream/hdl/ramb18_wf_dualport.v b/finn-rtllib/memstream/hdl/ramb18_wf_dualport.v index 4219d0f1c74bddff690b0d0cb21ce6a448c01c97..fe5cbd79bdec3d85dff91f43d4cd0be19920031f 100644 --- a/finn-rtllib/memstream/hdl/ramb18_wf_dualport.v +++ b/finn-rtllib/memstream/hdl/ramb18_wf_dualport.v @@ -31,26 +31,31 @@ module ramb18_wf_dualport #( parameter ID = 0, - parameter DWIDTH = 18, - parameter AWIDTH = 10, - parameter MEM_INIT = "", - parameter RAM_STYLE = "auto" + parameter DWIDTH = 18, + parameter AWIDTH = 10, + parameter DEPTH = 2**AWIDTH, + parameter MEM_INIT = "", + parameter RAM_STYLE = "auto" ) ( input clk, input wea, + input ena, + input enqa, input [AWIDTH-1:0] addra, input [DWIDTH-1:0] wdataa, output reg [DWIDTH-1:0] rdqa, input web, + input enb, + input enqb, input [AWIDTH-1:0] addrb, input [DWIDTH-1:0] wdatab, output reg [DWIDTH-1:0] rdqb ); -(* ram_style = RAM_STYLE *) reg [DWIDTH-1:0] mem[0:2**AWIDTH-1]; +(* ram_style = RAM_STYLE *) reg [DWIDTH-1:0] mem[0:DEPTH-1]; reg [DWIDTH-1:0] rdataa; reg [DWIDTH-1:0] rdatab; @@ -85,16 +90,22 @@ end //memory ports, with output pipeline register always @(posedge clk) begin - if(wea) - mem[addra] <= wdataa; - rdataa <= mem[addra]; - rdqa <= rdataa; + if(ena) begin + if(wea) + mem[addra] <= wdataa; + rdataa <= mem[addra]; + end + if(enqa) + rdqa <= rdataa; end always @(posedge clk) begin - if(web) - mem[addrb] <= wdatab; - rdatab <= mem[addrb]; - rdqb <= rdatab; + if(enb) begin + if(web) + mem[addrb] <= wdatab; + rdatab <= mem[addrb]; + end + if(enqb) + rdqb <= rdatab; end endmodule diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 1da60a5124fa86b4336bae8fd1a587672f2f2e6f..74d55ad8c5e61a7fd106fb9282203c759ab5dd5d 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -146,39 +146,6 @@ wire m_axis_0_tready; wire m_axis_0_tvalid; wire $WEIGHT_RANGE$ m_axis_0_tdata; -wire m_axis_0_tready_q; -wire m_axis_0_tvalid_q; -wire $WEIGHT_RANGE$ m_axis_0_tdata_q; - -wire m_axis_0_tready_q2; -wire m_axis_0_tvalid_q2; -wire $WEIGHT_RANGE$ m_axis_0_tdata_q2; - -reg m_axis_1_afull = 0; -reg m_axis_1_tready = 1; -wire m_axis_1_tvalid; -wire $WEIGHT_RANGE$ m_axis_1_tdata; - -reg m_axis_2_afull = 0; -reg m_axis_2_tready = 1; -wire m_axis_2_tvalid; -wire $WEIGHT_RANGE$ m_axis_2_tdata; - -reg m_axis_3_afull = 0; -reg m_axis_3_tready = 1; -wire m_axis_3_tvalid; -wire $WEIGHT_RANGE$ m_axis_3_tdata; - -reg m_axis_4_afull = 0; -reg m_axis_4_tready = 1; -wire m_axis_4_tvalid; -wire $WEIGHT_RANGE$ m_axis_4_tdata; - -reg m_axis_5_afull = 0; -reg m_axis_5_tready = 1; -wire m_axis_5_tvalid; -wire $WEIGHT_RANGE$ m_axis_5_tdata; - //memstream component memstream @@ -194,27 +161,12 @@ memstream //widths per stream .STRM0_WIDTH($WEIGHT_WIDTH$), -.STRM1_WIDTH($WEIGHT_WIDTH$), -.STRM2_WIDTH($WEIGHT_WIDTH$), -.STRM3_WIDTH($WEIGHT_WIDTH$), -.STRM4_WIDTH($WEIGHT_WIDTH$), -.STRM5_WIDTH($WEIGHT_WIDTH$), //depths per stream .STRM0_DEPTH($WSTREAM_DEPTH$), -.STRM1_DEPTH(1), -.STRM2_DEPTH(1), -.STRM3_DEPTH(1), -.STRM4_DEPTH(1), -.STRM5_DEPTH(1), //offsets for each stream .STRM0_OFFSET(0), -.STRM1_OFFSET(0), -.STRM2_OFFSET(0), -.STRM3_OFFSET(0), -.STRM4_OFFSET(0), -.STRM5_OFFSET(0) ) mem ( @@ -232,55 +184,12 @@ mem .m_axis_0_afull(m_axis_0_afull), .m_axis_0_tready(m_axis_0_tready), .m_axis_0_tvalid(m_axis_0_tvalid), -.m_axis_0_tdata(m_axis_0_tdata), - -.m_axis_1_afull(m_axis_1_afull), -.m_axis_1_tready(m_axis_1_tready), -.m_axis_1_tvalid(m_axis_1_tvalid), -.m_axis_1_tdata(m_axis_1_tdata), - -.m_axis_2_afull(m_axis_2_afull), -.m_axis_2_tready(m_axis_2_tready), -.m_axis_2_tvalid(m_axis_2_tvalid), -.m_axis_2_tdata(m_axis_2_tdata), - -.m_axis_3_afull(m_axis_3_afull), -.m_axis_3_tready(m_axis_3_tready), -.m_axis_3_tvalid(m_axis_3_tvalid), -.m_axis_3_tdata(m_axis_3_tdata), - -.m_axis_4_afull(m_axis_4_afull), -.m_axis_4_tready(m_axis_4_tready), -.m_axis_4_tvalid(m_axis_4_tvalid), -.m_axis_4_tdata(m_axis_4_tdata), - -.m_axis_5_afull(m_axis_5_afull), -.m_axis_5_tready(m_axis_5_tready), -.m_axis_5_tvalid(m_axis_5_tvalid), -.m_axis_5_tdata(m_axis_5_tdata) +.m_axis_0_tdata(m_axis_0_tdata) ); -Q_srl #( -.depth(32), -.width($WEIGHT_WIDTH$) -) -$LAYER_NAME$_w_fifo_1 -( - .clock(ap_clk), - .reset(!ap_rst_n), - .i_d(m_axis_0_tdata), - .i_v(m_axis_0_tvalid), - .i_r(m_axis_0_tready), - .o_d(m_axis_0_tdata_q), - .o_v(m_axis_0_tvalid_q), - .o_r(m_axis_0_tready_q), - .count(fifo_0_count) -); - - //MVA_Stream_Unit $LAYER_NAME$ @@ -291,17 +200,14 @@ MVA_Stream_U .in0_V_V_TDATA(in0_V_V_TDATA), //$IN_RANGE$ input .in0_V_V_TVALID(in0_V_V_TVALID), //input .in0_V_V_TREADY(in0_V_V_TREADY), //output -.weights_V_V_TDATA(m_axis_0_tdata_q), //$WEIGHT_RANGE$ input -.weights_V_V_TVALID(m_axis_0_tvalid_q), //input -.weights_V_V_TREADY(m_axis_0_tready_q), //output +.weights_V_V_TDATA(m_axis_0_tdata), //$WEIGHT_RANGE$ input +.weights_V_V_TVALID(m_axis_0_tvalid), //input +.weights_V_V_TREADY(m_axis_0_tready), //output .out_V_V_TDATA(out_V_V_TDATA), //$OUT_RANGE$ output .out_V_V_TVALID(out_V_V_TVALID), //output .out_V_V_TREADY(out_V_V_TREADY) //input ); -// programmable full threshold at 16 elements -assign m_axis_0_afull = (fifo_0_count > 16); - endmodule """