diff --git a/fetch-repos.sh b/fetch-repos.sh index 16960c71e31671b042dcfb4c31208aaaf8e29906..7078b284a9bbfdebc6bfe5bd8f7d577bdfcacabc 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -27,7 +27,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -QONNX_COMMIT="f14d7dc92a6baeffa2bef811e902abb121a6f696" +QONNX_COMMIT="ce321742d98f23909a890ed680a9c99640d7aaab" FINN_EXP_COMMIT="9cbd2787b5160e2b44e0e8164a0df1457dbd5366" BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03" PYVERILATOR_COMMIT="766e457465f5c0dd315490d7b9cc5d74f9a76f4f" diff --git a/finn-rtllib/fmpadding/hdl/axi2we.sv b/finn-rtllib/fmpadding/hdl/axi2we.sv new file mode 100644 index 0000000000000000000000000000000000000000..842ba3632c4224d58f87c66e1affc4c028b60ef3 --- /dev/null +++ b/finn-rtllib/fmpadding/hdl/axi2we.sv @@ -0,0 +1,122 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief AXI-Light adapter for trivial write enable interface. + * @author Thomas B. Preußer <tpreusse@amd.com> + *****************************************************************************/ + +module axi2we #( + int unsigned ADDR_BITS +)( + //- Global Control ------------------ + input logic ap_clk, + input logic ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + input s_axilite_AWVALID, + output s_axilite_AWREADY, + input [ADDR_BITS-1:0] s_axilite_AWADDR, + + input s_axilite_WVALID, + output s_axilite_WREADY, + input [31:0] s_axilite_WDATA, + input [ 3:0] s_axilite_WSTRB, + + output s_axilite_BVALID, + input s_axilite_BREADY, + output [1:0] s_axilite_BRESP, + + // Reading tied to all-ones + input s_axilite_ARVALID, + output s_axilite_ARREADY, + input [ADDR_BITS-1:0] s_axilite_ARADDR, + + output s_axilite_RVALID, + input s_axilite_RREADY, + output [31:0] s_axilite_RDATA, + output [ 1:0] s_axilite_RRESP, + + // Write Enable Interface + output logic we, + output logic [ADDR_BITS-1:0] wa, + output logic [ 31:0] wd +); + + uwire clk = ap_clk; + uwire rst = !ap_rst_n; + + + logic WABusy = 0; + logic WDBusy = 0; + logic [ADDR_BITS-1:0] Addr = 'x; + logic [ 31:0] Data = 'x; + + assign we = WABusy && WDBusy && s_axilite_BREADY; + assign wa = Addr; + assign wd = Data; + + uwire clr_wr = rst || we; + always_ff @(posedge clk) begin + if(clr_wr) begin + WABusy <= 0; + Addr <= 'x; + WDBusy <= 0; + Data <= 'x; + end + else begin + if(!WABusy) begin + WABusy <= s_axilite_AWVALID; + Addr <= s_axilite_AWADDR; + end + if(!WDBusy) begin + WDBusy <= s_axilite_WVALID; + Data <= s_axilite_WDATA; + end + end + end + assign s_axilite_AWREADY = !WABusy; + assign s_axilite_WREADY = !WDBusy; + assign s_axilite_BVALID = WABusy && WDBusy; + assign s_axilite_BRESP = '0; // OK + + // Answer all reads with '1 + logic RValid = 0; + uwire clr_rd = rst || (RValid && s_axilite_RREADY); + always_ff @(posedge clk) begin + if(clr_rd) RValid <= 0; + else if(!RValid) RValid <= s_axilite_ARVALID; + end + assign s_axilite_ARREADY = !RValid; + assign s_axilite_RVALID = RValid; + assign s_axilite_RDATA = '1; + assign s_axilite_RRESP = '0; // OK + +endmodule : axi2we diff --git a/finn-rtllib/fmpadding/hdl/fmpadding.sv b/finn-rtllib/fmpadding/hdl/fmpadding.sv new file mode 100644 index 0000000000000000000000000000000000000000..904c7c381f7b2499fc354ebf798e86edab262866 --- /dev/null +++ b/finn-rtllib/fmpadding/hdl/fmpadding.sv @@ -0,0 +1,224 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Feature map padding. + * @author Thomas B. Preußer <tpreusse@amd.com> + *****************************************************************************/ + +module fmpadding #( + int unsigned XCOUNTER_BITS, + int unsigned YCOUNTER_BITS, + int unsigned NUM_CHANNELS, + int unsigned SIMD, + int unsigned ELEM_BITS, + int unsigned INIT_XON, + int unsigned INIT_XOFF, + int unsigned INIT_XEND, + int unsigned INIT_YON, + int unsigned INIT_YOFF, + int unsigned INIT_YEND, + + localparam int unsigned STREAM_BITS = 8*(1 + (SIMD*ELEM_BITS-1)/8) +)( + //- Global Control ------------------ + input logic ap_clk, + input logic ap_rst_n, + + // Parameter Configuration ---------- + input logic we, + input logic [ 4:0] wa, + input logic [31:0] wd, + + //- AXI Stream - Input -------------- + output logic s_axis_tready, + input logic s_axis_tvalid, + input logic [STREAM_BITS-1:0] s_axis_tdata, + + //- AXI Stream - Output ------------- + input logic m_axis_tready, + output logic m_axis_tvalid, + output logic [STREAM_BITS-1:0] m_axis_tdata +); + + uwire clk = ap_clk; + uwire rst = !ap_rst_n; + + //----------------------------------------------------------------------- + // Parameter Sanity Checking + initial begin + automatic bit fail = 0; + + if(XCOUNTER_BITS < $clog2(1+INIT_XEND)) begin + $error("XCounter size too small to accommodate end count."); + fail = 1; + end + if(XCOUNTER_BITS < $clog2(1+INIT_XON)) begin + $error("XCounter size too small to accommodate ON count."); + fail = 1; + end + if(XCOUNTER_BITS < $clog2(1+INIT_XOFF)) begin + $error("XCounter size too small to accommodate OFF count."); + fail = 1; + end + if(YCOUNTER_BITS < $clog2(1+INIT_YEND)) begin + $error("YCounter size too small to accommodate end count."); + fail = 1; + end + if(YCOUNTER_BITS < $clog2(1+INIT_YON)) begin + $error("YCounter size too small to accommodate ON count."); + fail = 1; + end + if(YCOUNTER_BITS < $clog2(1+INIT_YOFF)) begin + $error("YCounter size too small to accommodate OFF count."); + fail = 1; + end + + if((INIT_XEND < INIT_XON) || (INIT_XOFF <= INIT_XON)) begin + $warning("Initial empty X output range."); + end + if((INIT_YEND < INIT_YON) || (INIT_YOFF <= INIT_YON)) begin + $warning("Initial empty Y output range."); + end + + if(fail) $finish(); + end + + //----------------------------------------------------------------------- + // Dynamically configurable state + typedef logic [XCOUNTER_BITS-1:0] xcount_t; + xcount_t XEnd = INIT_XEND; + xcount_t XOn = INIT_XON; + xcount_t XOff = INIT_XOFF; + + typedef logic [YCOUNTER_BITS-1:0] ycount_t; + ycount_t YEnd = INIT_YEND; + ycount_t YOn = INIT_YON; + ycount_t YOff = INIT_YOFF; + + always_ff @(posedge clk) begin + if(we) begin + unique case(wa) + 0*4: XOn <= wd; + 1*4: XOff <= wd; + 2*4: XEnd <= wd; + 3*4: YOn <= wd; + 4*4: YOff <= wd; + 5*4: YEnd <= wd; + + default: assert(0) else begin + $error("Illegal write address."); + $stop; + end + endcase + end + end + + //----------------------------------------------------------------------- + // Cascaded enables for the nested counters: SCount, XCount, YCount + uwire sen; + uwire xen; + uwire yen; + + //- S-Counter: SIMD fold ------------ + initial begin + if((NUM_CHANNELS < 1) || (NUM_CHANNELS % SIMD != 0)) begin + $error("Channel count must be SIMD multiple."); + $finish; + end + end + // Count SF-2, SF-3, ..., 1, 0, -1 + localparam int unsigned SF = NUM_CHANNELS/SIMD; + typedef logic [$clog2(SF-1):0] scount_t; + scount_t SCount = SF-2; + + assign xen = sen && SCount[$left(SCount)]; + uwire sclr = rst || xen; + always_ff @(posedge clk) begin + if(sclr) SCount <= SF-2; + else if(sen) SCount <= SCount - 1; + end + + //- X-Counter: image width ---------- + xcount_t XCount = 0; + + assign yen = xen && (XCount == XEnd); + uwire xclr = rst || yen; + always_ff @(posedge clk) begin + if(xclr) XCount <= 0; + else if(xen) XCount <= XCount + 1; + end + uwire xfwd = (XOn <= XCount) && (XCount < XOff); + + //- Y-Counter: image height --------- + ycount_t YCount = 0; + + uwire yclr = rst || (yen && (YCount == YEnd)); + always_ff @(posedge clk) begin + if(yclr) YCount <= 0; + else if(yen) YCount <= YCount + 1; + end + uwire yfwd = (YOn <= YCount) && (YCount < YOff); + + //----------------------------------------------------------------------- + // Input forwarding and edge padding + typedef struct { + logic vld; + logic [STREAM_BITS-1:0] dat; + } buf_t; + buf_t A = '{ vld: 0, dat: 'x }; + buf_t B = '{ vld: 0, dat: 'x }; + + uwire fwd = xfwd && yfwd; + assign sen = (m_axis_tready || !B.vld) && (s_axis_tvalid || A.vld || !fwd); + assign s_axis_tready = !A.vld; + assign m_axis_tvalid = B.vld; + assign m_axis_tdata = B.dat; + + always_ff @(posedge clk) begin + if(rst) begin + B <= '{ vld: 0, dat: 'x }; + end + else if(m_axis_tready || !B.vld) begin + B.vld <= s_axis_tvalid || A.vld || !fwd; + B.dat <= !fwd? '0 : A.vld? A.dat : s_axis_tdata; + end + end + + always_ff @(posedge clk) begin + if(rst) begin + A <= '{ vld: 0, dat: 'x }; + end + else begin + A.vld <= (A.vld || s_axis_tvalid) && ((B.vld && !m_axis_tready) || !fwd); + if(!A.vld) A.dat <= s_axis_tdata; + end + end + +endmodule : fmpadding diff --git a/finn-rtllib/fmpadding/hdl/fmpadding_axi.sv b/finn-rtllib/fmpadding/hdl/fmpadding_axi.sv new file mode 100644 index 0000000000000000000000000000000000000000..5948341d000a1dd82ff363b36557f897d3a064c7 --- /dev/null +++ b/finn-rtllib/fmpadding/hdl/fmpadding_axi.sv @@ -0,0 +1,123 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @brief Feature map padding. + * @author Thomas B. Preußer <tpreusse@amd.com> + *****************************************************************************/ + +module fmpadding_axi #( + int unsigned XCOUNTER_BITS, + int unsigned YCOUNTER_BITS, + int unsigned NUM_CHANNELS, + int unsigned SIMD, + int unsigned ELEM_BITS, + int unsigned INIT_XON, + int unsigned INIT_XOFF, + int unsigned INIT_XEND, + int unsigned INIT_YON, + int unsigned INIT_YOFF, + int unsigned INIT_YEND, + + localparam int unsigned STREAM_BITS = 8*(1 + (SIMD*ELEM_BITS-1)/8) +)( + //- Global Control ------------------ + input logic ap_clk, + input logic ap_rst_n, + + //- AXI Lite ------------------------ + // Writing + input s_axilite_AWVALID, + output s_axilite_AWREADY, + input [4:0] s_axilite_AWADDR, + + input s_axilite_WVALID, + output s_axilite_WREADY, + input [31:0] s_axilite_WDATA, + input [ 3:0] s_axilite_WSTRB, + + output s_axilite_BVALID, + input s_axilite_BREADY, + output [1:0] s_axilite_BRESP, + + // Reading + input s_axilite_ARVALID, + output s_axilite_ARREADY, + input [4:0] s_axilite_ARADDR, + + output s_axilite_RVALID, + input s_axilite_RREADY, + output [31:0] s_axilite_RDATA, + output [ 1:0] s_axilite_RRESP, + + //- AXI Stream - Input -------------- + output logic s_axis_tready, + input logic s_axis_tvalid, + input logic [STREAM_BITS-1:0] s_axis_tdata, + + //- AXI Stream - Output ------------- + input logic m_axis_tready, + output logic m_axis_tvalid, + output logic [STREAM_BITS-1:0] m_axis_tdata +); + + // AXI-Lite Adapter + uwire we; + uwire [ 4:0] wa; + uwire [31:0] wd; + axi2we #(.ADDR_BITS(5)) axilight_adapter ( + .ap_clk, .ap_rst_n, + + .s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR, + .s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB, + .s_axilite_BVALID, .s_axilite_BREADY, .s_axilite_BRESP, + + .s_axilite_ARVALID, .s_axilite_ARREADY, .s_axilite_ARADDR, + .s_axilite_RVALID, .s_axilite_RREADY, .s_axilite_RDATA, .s_axilite_RRESP, + + .we, .wa, .wd + ); + + // Actual Padding + fmpadding #( + .XCOUNTER_BITS(XCOUNTER_BITS), .YCOUNTER_BITS(YCOUNTER_BITS), + .NUM_CHANNELS(NUM_CHANNELS), .SIMD(SIMD), + .INIT_XON(INIT_XON), .INIT_XOFF(INIT_XOFF), .INIT_XEND(INIT_XEND), + .INIT_YON(INIT_YON), .INIT_YOFF(INIT_YOFF), .INIT_YEND(INIT_YEND), + .ELEM_BITS(ELEM_BITS) + ) padding ( + .ap_clk, .ap_rst_n, + + .we, .wa, .wd, + + .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, + .m_axis_tready, .m_axis_tvalid, .m_axis_tdata + ); + +endmodule : fmpadding_axi diff --git a/finn-rtllib/fmpadding/hdl/fmpadding_axi_tb.sv b/finn-rtllib/fmpadding/hdl/fmpadding_axi_tb.sv new file mode 100644 index 0000000000000000000000000000000000000000..741689b3a7af7ad4d07f2af569f71135c1d35c7b --- /dev/null +++ b/finn-rtllib/fmpadding/hdl/fmpadding_axi_tb.sv @@ -0,0 +1,154 @@ + +module fmpadding_axi_tb #( + int unsigned XCOUNTER_BITS = 8, + int unsigned YCOUNTER_BITS = 8, + int unsigned NUM_CHANNELS = 4, + int unsigned SIMD = 2, + int unsigned ELEM_BITS = 4 +)(); + localparam int unsigned STREAM_BITS = 8*(1 + (SIMD*ELEM_BITS-1)/8); + + //- Global Control ------------------ + logic clk = 0; + always #5ns clk = !clk; + logic rst; + + // AXI-Light for Parameter Configuration + logic s_axilite_AWVALID; + uwire s_axilite_AWREADY; + logic [2:0] s_axilite_AWADDR; + + logic s_axilite_WVALID; + uwire s_axilite_WREADY; + logic [31:0] s_axilite_WDATA; + + //- AXI Stream - Input -------------- + uwire s_axis_tready; + logic s_axis_tvalid; + logic [STREAM_BITS-1:0] s_axis_tdata; + + //- AXI Stream - Output ------------- + logic m_axis_tready; + uwire m_axis_tvalid; + uwire [STREAM_BITS-1:0] m_axis_tdata; + + + // DUT + fmpadding_axi #( + .XCOUNTER_BITS(XCOUNTER_BITS), + .YCOUNTER_BITS(YCOUNTER_BITS), + .NUM_CHANNELS(NUM_CHANNELS), + .SIMD(SIMD), + .INIT_XON(0), .INIT_XOFF(0), .INIT_XEND(0), + .INIT_YON(0), .INIT_YOFF(0), .INIT_YEND(0), + .ELEM_BITS(ELEM_BITS) + ) dut ( + .ap_clk(clk), .ap_rst_n(!rst), + + .s_axilite_AWVALID, .s_axilite_AWREADY, .s_axilite_AWADDR, + .s_axilite_WVALID, .s_axilite_WREADY, .s_axilite_WDATA, .s_axilite_WSTRB('1), + .s_axilite_BVALID(), .s_axilite_BREADY('1), .s_axilite_BRESP(), + .s_axilite_ARVALID('0), .s_axilite_ARREADY(), .s_axilite_ARADDR('x), + .s_axilite_RVALID(), .s_axilite_RREADY('0), .s_axilite_RDATA(), .s_axilite_RRESP(), + + .s_axis_tready, .s_axis_tvalid, .s_axis_tdata, + .m_axis_tready, .m_axis_tvalid, .m_axis_tdata + ); + + // Stimuli + localparam int unsigned IMAGES = 2; + localparam int unsigned XSIZE = 10; + localparam int unsigned YSIZE = 7; + localparam int unsigned PAD_LEFT = 2; + localparam int unsigned PAD_RIGHT = 3; + localparam int unsigned PAD_TOP = 1; + localparam int unsigned PAD_BOTTOM = 2; + + task axi_write(input logic [2:0] wa, input logic [31:0] wd); + s_axilite_AWVALID <= 1; + s_axilite_AWADDR <= wa; + @(posedge clk iff s_axilite_AWREADY); + s_axilite_AWVALID <= 0; + s_axilite_AWADDR <= 'x; + + s_axilite_WVALID <= 1; + s_axilite_WDATA <= wd; + @(posedge clk iff s_axilite_WREADY); + s_axilite_WVALID <= 0; + s_axilite_WDATA <= 'x; + endtask : axi_write + + + initial begin + s_axilite_AWVALID = 0; + s_axilite_AWADDR = 'x; + s_axilite_WVALID = 0; + s_axilite_WDATA = 'x; + + s_axis_tvalid = 0; + s_axis_tdata = 'x; + + // Configure Parameters + rst = 0; + @(posedge clk); + /* XOn */ axi_write(0, PAD_LEFT); + /* XOff */ axi_write(1, XSIZE - PAD_RIGHT); + /* XEnd */ axi_write(2, XSIZE - 1); + /* YOn */ axi_write(4, PAD_TOP); + /* YOff */ axi_write(5, YSIZE - PAD_BOTTOM); + /* YEnd */ axi_write(6, YSIZE - 1); + @(posedge clk); + rst <= 1; + @(posedge clk); + rst <= 0; + @(posedge clk); + + // Feed data input + s_axis_tvalid <= 1; + for(int unsigned i = 0; i < IMAGES * (XSIZE-PAD_LEFT-PAD_RIGHT) * (YSIZE-PAD_TOP-PAD_BOTTOM) * (NUM_CHANNELS/SIMD); i++) begin + s_axis_tdata <= i; + @(posedge clk iff s_axis_tready); + if($urandom()%5 == 0) begin + s_axis_tvalid <= 0; + s_axis_tdata <= 'x; + @(posedge clk); + s_axis_tvalid <= 1; + end + end + s_axis_tvalid <= 0; + s_axis_tdata <= 'x; + end + + // Output Throttler + initial begin + m_axis_tready = 0; + @(posedge clk iff !rst); + m_axis_tready <= 1; + forever @(posedge clk iff m_axis_tvalid) begin + m_axis_tready <= 0; + repeat(4-$clog2(1+$urandom()%15)) @(posedge clk); + m_axis_tready <= 1; + end + end + + // Output logger + initial begin + @(negedge rst); + repeat(IMAGES) begin + for(int unsigned y = 0; y < YSIZE; y++) begin + for(int unsigned x = 0; x < XSIZE; x++) begin + automatic string delim = " "; + for(int unsigned s = 0; s < NUM_CHANNELS/SIMD; s++) begin + @(posedge clk iff m_axis_tvalid && m_axis_tready); + $write("%s%02X", delim, m_axis_tdata); + delim = ":"; + end + end + $display(); + end + $display("----"); + end + $finish; + end + +endmodule : fmpadding_axi_tb diff --git a/finn-rtllib/fmpadding/hdl/fmpadding_template.v b/finn-rtllib/fmpadding/hdl/fmpadding_template.v new file mode 100644 index 0000000000000000000000000000000000000000..0b0f40f86a44ac1d905c89bed5328d6d1ea48876 --- /dev/null +++ b/finn-rtllib/fmpadding/hdl/fmpadding_template.v @@ -0,0 +1,118 @@ +/****************************************************************************** + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +module $TOP_MODULE_NAME$( +//- Global Control ------------------ +(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V:s_axilite" *) +input ap_clk, +(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V:s_axilite" *) +input ap_rst_n, + +//- AXI Lite ------------------------ +// Writing +input s_axilite_AWVALID, +output s_axilite_AWREADY, +input [4:0] s_axilite_AWADDR, + +input s_axilite_WVALID, +output s_axilite_WREADY, +input [31:0] s_axilite_WDATA, +input [ 3:0] s_axilite_WSTRB, + +output s_axilite_BVALID, +input s_axilite_BREADY, +output [1:0] s_axilite_BRESP, + +// Reading +input s_axilite_ARVALID, +output s_axilite_ARREADY, +input [4:0] s_axilite_ARADDR, + +output s_axilite_RVALID, +input s_axilite_RREADY, +output [31:0] s_axilite_RDATA, +output [ 1:0] s_axilite_RRESP, + +//- AXI Stream - Input -------------- +output in0_V_TREADY, +input in0_V_TVALID, +input [$STREAM_BITS$-1:0] in0_V_TDATA, + +//- AXI Stream - Output ------------- +input out_V_TREADY, +output out_V_TVALID, +output [$STREAM_BITS$-1:0] out_V_TDATA +); + + +fmpadding_axi #( +.XCOUNTER_BITS($XCOUNTER_BITS$), +.YCOUNTER_BITS($YCOUNTER_BITS$), +.NUM_CHANNELS($NUM_CHANNELS$), +.SIMD($SIMD$), +.ELEM_BITS($ELEM_BITS$), +.INIT_XON($INIT_XON$), +.INIT_XOFF($INIT_XOFF$), +.INIT_XEND($INIT_XEND$), +.INIT_YON($INIT_YON$), +.INIT_YOFF($INIT_YOFF$), +.INIT_YEND($INIT_YEND$) +) +$TOP_MODULE_NAME$_impl +( + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n), + .s_axilite_AWVALID(s_axilite_AWVALID), + .s_axilite_AWREADY(s_axilite_AWREADY), + .s_axilite_AWADDR(s_axilite_AWADDR), + .s_axilite_WVALID(s_axilite_WVALID), + .s_axilite_WREADY(s_axilite_WREADY), + .s_axilite_WDATA(s_axilite_WDATA), + .s_axilite_WSTRB(s_axilite_WSTRB), + .s_axilite_BVALID(s_axilite_BVALID), + .s_axilite_BREADY(s_axilite_BREADY), + .s_axilite_BRESP(s_axilite_BRESP), + .s_axilite_ARVALID(s_axilite_ARVALID), + .s_axilite_ARREADY(s_axilite_ARREADY), + .s_axilite_ARADDR(s_axilite_ARADDR), + .s_axilite_RVALID(s_axilite_RVALID), + .s_axilite_RREADY(s_axilite_RREADY), + .s_axilite_RDATA(s_axilite_RDATA), + .s_axilite_RRESP(s_axilite_RRESP), + .s_axis_tready(in0_V_TREADY), + .s_axis_tvalid(in0_V_TVALID), + .s_axis_tdata(in0_V_TDATA), + .m_axis_tready(out_V_TREADY), + .m_axis_tvalid(out_V_TVALID), + .m_axis_tdata(out_V_TDATA) +); + +endmodule diff --git a/finn-rtllib/swg/swg_template_axilite.v b/finn-rtllib/swg/swg_template_axilite.v new file mode 100644 index 0000000000000000000000000000000000000000..9479c7f80d7d82b27141dbe5abcce442049237bd --- /dev/null +++ b/finn-rtllib/swg/swg_template_axilite.v @@ -0,0 +1,567 @@ + +`timescale 1 ns / 1 ps + +module $TOP_MODULE_NAME$_axilite # +( + // Users to add parameters here + + // User parameters ends + // Do not modify the parameters beyond this line + + // Width of S_AXI data bus + parameter integer C_S_AXI_DATA_WIDTH = 32, + // Width of S_AXI address bus + parameter integer C_S_AXI_ADDR_WIDTH = 6 +) +( + // Users to add ports here + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg0, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg1, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg2, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg3, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg4, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg5, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg6, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg7, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg8, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg9, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg10, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg11, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg12, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg13, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg14, + output wire [C_S_AXI_DATA_WIDTH-1:0] cfg_reg15, + + // User ports ends + // Do not modify the ports beyond this line + + // Global Clock Signal + input wire S_AXI_ACLK, + // Global Reset Signal. This Signal is Active LOW + input wire S_AXI_ARESETN, + // Write address (issued by master, acceped by Slave) + input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_AWADDR, + // Write channel Protection type. This signal indicates the + // privilege and security level of the transaction, and whether + // the transaction is a data access or an instruction access. + input wire [2 : 0] S_AXI_AWPROT, + // Write address valid. This signal indicates that the master signaling + // valid write address and control information. + input wire S_AXI_AWVALID, + // Write address ready. This signal indicates that the slave is ready + // to accept an address and associated control signals. + output wire S_AXI_AWREADY, + // Write data (issued by master, acceped by Slave) + input wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_WDATA, + // Write strobes. This signal indicates which byte lanes hold + // valid data. There is one write strobe bit for each eight + // bits of the write data bus. + input wire [(C_S_AXI_DATA_WIDTH/8)-1 : 0] S_AXI_WSTRB, + // Write valid. This signal indicates that valid write + // data and strobes are available. + input wire S_AXI_WVALID, + // Write ready. This signal indicates that the slave + // can accept the write data. + output wire S_AXI_WREADY, + // Write response. This signal indicates the status + // of the write transaction. + output wire [1 : 0] S_AXI_BRESP, + // Write response valid. This signal indicates that the channel + // is signaling a valid write response. + output wire S_AXI_BVALID, + // Response ready. This signal indicates that the master + // can accept a write response. + input wire S_AXI_BREADY, + // Read address (issued by master, acceped by Slave) + input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_ARADDR, + // Protection type. This signal indicates the privilege + // and security level of the transaction, and whether the + // transaction is a data access or an instruction access. + input wire [2 : 0] S_AXI_ARPROT, + // Read address valid. This signal indicates that the channel + // is signaling valid read address and control information. + input wire S_AXI_ARVALID, + // Read address ready. This signal indicates that the slave is + // ready to accept an address and associated control signals. + output wire S_AXI_ARREADY, + // Read data (issued by slave) + output wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_RDATA, + // Read response. This signal indicates the status of the + // read transfer. + output wire [1 : 0] S_AXI_RRESP, + // Read valid. This signal indicates that the channel is + // signaling the required read data. + output wire S_AXI_RVALID, + // Read ready. This signal indicates that the master can + // accept the read data and response information. + input wire S_AXI_RREADY +); + +// AXI4LITE signals +reg [C_S_AXI_ADDR_WIDTH-1 : 0] axi_awaddr; +reg axi_awready; +reg axi_wready; +reg [1 : 0] axi_bresp; +reg axi_bvalid; +reg [C_S_AXI_ADDR_WIDTH-1 : 0] axi_araddr; +reg axi_arready; +reg [C_S_AXI_DATA_WIDTH-1 : 0] axi_rdata; +reg [1 : 0] axi_rresp; +reg axi_rvalid; + +// Example-specific design signals +// local parameter for addressing 32 bit / 64 bit C_S_AXI_DATA_WIDTH +// ADDR_LSB is used for addressing 32/64 bit registers/memories +// ADDR_LSB = 2 for 32 bits (n downto 2) +// ADDR_LSB = 3 for 64 bits (n downto 3) +localparam integer ADDR_LSB = (C_S_AXI_DATA_WIDTH/32) + 1; +localparam integer OPT_MEM_ADDR_BITS = 3; +//---------------------------------------------- +//-- Signals for user logic register space example +//------------------------------------------------ +//-- Number of Slave Registers 16 +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg0; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg1; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg2; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg3; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg4; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg5; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg6; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg7; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg8; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg9; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg10; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg11; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg12; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg13; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg14; +reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg15; +wire slv_reg_rden; +wire slv_reg_wren; +reg [C_S_AXI_DATA_WIDTH-1:0] reg_data_out; +integer byte_index; +reg aw_en; + +// I/O Connections assignments + +assign S_AXI_AWREADY = axi_awready; +assign S_AXI_WREADY = axi_wready; +assign S_AXI_BRESP = axi_bresp; +assign S_AXI_BVALID = axi_bvalid; +assign S_AXI_ARREADY = axi_arready; +assign S_AXI_RDATA = axi_rdata; +assign S_AXI_RRESP = axi_rresp; +assign S_AXI_RVALID = axi_rvalid; +// Implement axi_awready generation +// axi_awready is asserted for one S_AXI_ACLK clock cycle when both +// S_AXI_AWVALID and S_AXI_WVALID are asserted. axi_awready is +// de-asserted when reset is low. + +always @( posedge S_AXI_ACLK ) +begin + if ( S_AXI_ARESETN == 1'b0 ) + begin + axi_awready <= 1'b0; + aw_en <= 1'b1; + end + else + begin + if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID && aw_en) + begin + // slave is ready to accept write address when + // there is a valid write address and write data + // on the write address and data bus. This design + // expects no outstanding transactions. + axi_awready <= 1'b1; + aw_en <= 1'b0; + end + else if (S_AXI_BREADY && axi_bvalid) + begin + aw_en <= 1'b1; + axi_awready <= 1'b0; + end + else + begin + axi_awready <= 1'b0; + end + end +end + +// Implement axi_awaddr latching +// This process is used to latch the address when both +// S_AXI_AWVALID and S_AXI_WVALID are valid. + +always @( posedge S_AXI_ACLK ) +begin + if ( S_AXI_ARESETN == 1'b0 ) + begin + axi_awaddr <= 0; + end + else + begin + if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID && aw_en) + begin + // Write Address latching + axi_awaddr <= S_AXI_AWADDR; + end + end +end + +// Implement axi_wready generation +// axi_wready is asserted for one S_AXI_ACLK clock cycle when both +// S_AXI_AWVALID and S_AXI_WVALID are asserted. axi_wready is +// de-asserted when reset is low. + +always @( posedge S_AXI_ACLK ) +begin + if ( S_AXI_ARESETN == 1'b0 ) + begin + axi_wready <= 1'b0; + end + else + begin + if (~axi_wready && S_AXI_WVALID && S_AXI_AWVALID && aw_en ) + begin + // slave is ready to accept write data when + // there is a valid write address and write data + // on the write address and data bus. This design + // expects no outstanding transactions. + axi_wready <= 1'b1; + end + else + begin + axi_wready <= 1'b0; + end + end +end + +// Implement memory mapped register select and write logic generation +// The write data is accepted and written to memory mapped registers when +// axi_awready, S_AXI_WVALID, axi_wready and S_AXI_WVALID are asserted. Write strobes are used to +// select byte enables of slave registers while writing. +// These registers are cleared when reset (active low) is applied. +// Slave register write enable is asserted when valid address and data are available +// and the slave is ready to accept the write address and write data. +assign slv_reg_wren = axi_wready && S_AXI_WVALID && axi_awready && S_AXI_AWVALID; + +always @( posedge S_AXI_ACLK ) +begin + if ( S_AXI_ARESETN == 1'b0 ) + begin + slv_reg0 <= 0; + slv_reg1 <= 0; + slv_reg2 <= 0; + slv_reg3 <= 0; + slv_reg4 <= 0; + slv_reg5 <= 0; + slv_reg6 <= 0; + slv_reg7 <= 0; + slv_reg8 <= 0; + slv_reg9 <= 0; + slv_reg10 <= 0; + slv_reg11 <= 0; + slv_reg12 <= 0; + slv_reg13 <= 0; + slv_reg14 <= 0; + slv_reg15 <= 0; + end + else begin + if (slv_reg_wren) + begin + case ( axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB] ) + 4'h0: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 0 + slv_reg0[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h1: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 1 + slv_reg1[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h2: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 2 + slv_reg2[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h3: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 3 + slv_reg3[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h4: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 4 + slv_reg4[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h5: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 5 + slv_reg5[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h6: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 6 + slv_reg6[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h7: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 7 + slv_reg7[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h8: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 8 + slv_reg8[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'h9: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 9 + slv_reg9[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'hA: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 10 + slv_reg10[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'hB: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 11 + slv_reg11[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'hC: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 12 + slv_reg12[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'hD: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 13 + slv_reg13[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'hE: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 14 + slv_reg14[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + 4'hF: + for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) + if ( S_AXI_WSTRB[byte_index] == 1 ) begin + // Respective byte enables are asserted as per write strobes + // Slave register 15 + slv_reg15[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; + end + default : begin + slv_reg0 <= slv_reg0; + slv_reg1 <= slv_reg1; + slv_reg2 <= slv_reg2; + slv_reg3 <= slv_reg3; + slv_reg4 <= slv_reg4; + slv_reg5 <= slv_reg5; + slv_reg6 <= slv_reg6; + slv_reg7 <= slv_reg7; + slv_reg8 <= slv_reg8; + slv_reg9 <= slv_reg9; + slv_reg10 <= slv_reg10; + slv_reg11 <= slv_reg11; + slv_reg12 <= slv_reg12; + slv_reg13 <= slv_reg13; + slv_reg14 <= slv_reg14; + slv_reg15 <= slv_reg15; + end + endcase + end + end +end + +// Implement write response logic generation +// The write response and response valid signals are asserted by the slave +// when axi_wready, S_AXI_WVALID, axi_wready and S_AXI_WVALID are asserted. +// This marks the acceptance of address and indicates the status of +// write transaction. + +always @( posedge S_AXI_ACLK ) +begin + if ( S_AXI_ARESETN == 1'b0 ) + begin + axi_bvalid <= 0; + axi_bresp <= 2'b0; + end + else + begin + if (axi_awready && S_AXI_AWVALID && ~axi_bvalid && axi_wready && S_AXI_WVALID) + begin + // indicates a valid write response is available + axi_bvalid <= 1'b1; + axi_bresp <= 2'b0; // 'OKAY' response + end // work error responses in future + else + begin + if (S_AXI_BREADY && axi_bvalid) + //check if bready is asserted while bvalid is high) + //(there is a possibility that bready is always asserted high) + begin + axi_bvalid <= 1'b0; + end + end + end +end + +// Implement axi_arready generation +// axi_arready is asserted for one S_AXI_ACLK clock cycle when +// S_AXI_ARVALID is asserted. axi_awready is +// de-asserted when reset (active low) is asserted. +// The read address is also latched when S_AXI_ARVALID is +// asserted. axi_araddr is reset to zero on reset assertion. + +always @( posedge S_AXI_ACLK ) +begin + if ( S_AXI_ARESETN == 1'b0 ) + begin + axi_arready <= 1'b0; + axi_araddr <= 32'b0; + end + else + begin + if (~axi_arready && S_AXI_ARVALID) + begin + // indicates that the slave has acceped the valid read address + axi_arready <= 1'b1; + // Read address latching + axi_araddr <= S_AXI_ARADDR; + end + else + begin + axi_arready <= 1'b0; + end + end +end + +// Implement axi_arvalid generation +// axi_rvalid is asserted for one S_AXI_ACLK clock cycle when both +// S_AXI_ARVALID and axi_arready are asserted. The slave registers +// data are available on the axi_rdata bus at this instance. The +// assertion of axi_rvalid marks the validity of read data on the +// bus and axi_rresp indicates the status of read transaction.axi_rvalid +// is deasserted on reset (active low). axi_rresp and axi_rdata are +// cleared to zero on reset (active low). +always @( posedge S_AXI_ACLK ) +begin + if ( S_AXI_ARESETN == 1'b0 ) + begin + axi_rvalid <= 0; + axi_rresp <= 0; + end + else + begin + if (axi_arready && S_AXI_ARVALID && ~axi_rvalid) + begin + // Valid read data is available at the read data bus + axi_rvalid <= 1'b1; + axi_rresp <= 2'b0; // 'OKAY' response + end + else if (axi_rvalid && S_AXI_RREADY) + begin + // Read data is accepted by the master + axi_rvalid <= 1'b0; + end + end +end + +// Implement memory mapped register select and read logic generation +// Slave register read enable is asserted when valid address is available +// and the slave is ready to accept the read address. +assign slv_reg_rden = axi_arready & S_AXI_ARVALID & ~axi_rvalid; +always @(*) +begin + // Address decoding for reading registers + case ( axi_araddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB] ) + 4'h0 : reg_data_out <= slv_reg0; + 4'h1 : reg_data_out <= slv_reg1; + 4'h2 : reg_data_out <= slv_reg2; + 4'h3 : reg_data_out <= slv_reg3; + 4'h4 : reg_data_out <= slv_reg4; + 4'h5 : reg_data_out <= slv_reg5; + 4'h6 : reg_data_out <= slv_reg6; + 4'h7 : reg_data_out <= slv_reg7; + 4'h8 : reg_data_out <= slv_reg8; + 4'h9 : reg_data_out <= slv_reg9; + 4'hA : reg_data_out <= slv_reg10; + 4'hB : reg_data_out <= slv_reg11; + 4'hC : reg_data_out <= slv_reg12; + 4'hD : reg_data_out <= slv_reg13; + 4'hE : reg_data_out <= slv_reg14; + 4'hF : reg_data_out <= slv_reg15; + default : reg_data_out <= 0; + endcase +end + +// Output register or memory read data +always @( posedge S_AXI_ACLK ) +begin + if ( S_AXI_ARESETN == 1'b0 ) + begin + axi_rdata <= 0; + end + else + begin + // When there is a valid read address (S_AXI_ARVALID) with + // acceptance of read address by the slave (axi_arready), + // output the read dada + if (slv_reg_rden) + begin + axi_rdata <= reg_data_out; // register read data + end + end +end + +// Add user logic here +assign cfg_reg0 = slv_reg0; +assign cfg_reg1 = slv_reg1; +assign cfg_reg2 = slv_reg2; +assign cfg_reg3 = slv_reg3; +assign cfg_reg4 = slv_reg4; +assign cfg_reg5 = slv_reg5; +assign cfg_reg6 = slv_reg6; +assign cfg_reg7 = slv_reg7; +assign cfg_reg8 = slv_reg8; +assign cfg_reg9 = slv_reg9; +assign cfg_reg10 = slv_reg10; +assign cfg_reg11 = slv_reg11; +assign cfg_reg12 = slv_reg12; +assign cfg_reg13 = slv_reg13; +assign cfg_reg14 = slv_reg14; +assign cfg_reg15 = slv_reg15; +// User logic ends + +endmodule diff --git a/finn-rtllib/swg/swg_template_default.sv b/finn-rtllib/swg/swg_template_default.sv index 97517438a0c261e4488b74a677a352f9dc51743b..06e65e911100dd7d3d8879b014a6d59713eb9bbd 100644 --- a/finn-rtllib/swg/swg_template_default.sv +++ b/finn-rtllib/swg/swg_template_default.sv @@ -36,7 +36,6 @@ module $TOP_MODULE_NAME$_controller #( int unsigned LOOP_SIMD_ITERATIONS = $LOOP_SIMD_ITERATIONS$, int unsigned INCR_BITWIDTH = $INCR_BITWIDTH$, - bit [INCR_BITWIDTH-1:0] ADDR_INCREMENT_MAP[6] = $ADDR_INCREMENT_MAP$, bit IS_DEPTHWISE = $IS_DEPTHWISE$ )( @@ -60,26 +59,31 @@ module $TOP_MODULE_NAME$_controller #( state_e State = $INNERMOST_STATE$; state_e state_next; - logic signed [$clog2(LOOP_H_ITERATIONS +2)+1-1:0] Counter_loop_h = LOOP_H_ITERATIONS-1; - logic signed [$clog2(LOOP_W_ITERATIONS +2)+1-1:0] Counter_loop_w = LOOP_W_ITERATIONS-1; - logic signed [$clog2(LOOP_KH_ITERATIONS +2)+1-1:0] Counter_loop_kh = LOOP_KH_ITERATIONS-1; - logic signed [$clog2(LOOP_KW_ITERATIONS +2)+1-1:0] Counter_loop_kw = LOOP_KW_ITERATIONS-1; - logic signed [$clog2(LOOP_SIMD_ITERATIONS+2)+1-1:0] Counter_loop_simd = LOOP_SIMD_ITERATIONS-1; - - assign addr_incr = ADDR_INCREMENT_MAP[State]; + logic signed [$clog2(LOOP_H_ITERATIONS +2)+1-1:0] Counter_loop_h = LOOP_H_ITERATIONS; + logic signed [$clog2(LOOP_W_ITERATIONS +2)+1-1:0] Counter_loop_w = LOOP_W_ITERATIONS; + logic signed [$clog2(LOOP_KH_ITERATIONS +2)+1-1:0] Counter_loop_kh = LOOP_KH_ITERATIONS; + logic signed [$clog2(LOOP_KW_ITERATIONS +2)+1-1:0] Counter_loop_kw = LOOP_KW_ITERATIONS; + logic signed [$clog2(LOOP_SIMD_ITERATIONS+2)+1-1:0] Counter_loop_simd = LOOP_SIMD_ITERATIONS; + + // combinational logic for addr_incr generation + always_comb begin : blkHead + unique case (State) + 0 : addr_incr = 0; + 1 : addr_incr = $HEAD_INCR_SIMD$; + 2 : addr_incr = $HEAD_INCR_KW$; + 3 : addr_incr = $HEAD_INCR_KH$; + 4 : addr_incr = $HEAD_INCR_W$; + 5 : addr_incr = $HEAD_INCR_H$; + endcase + end // combinational logic for tail_incr generation uwire tail_incr_inner_condition = IS_DEPTHWISE? (Counter_loop_kh >= 0) : 0; - always_comb begin : blkTail - if (tail_incr_inner_condition) - tail_incr = 1; - else if (Counter_loop_w >= 0) - tail_incr = $TAIL_INCR_W$; - else if (Counter_loop_h >= 0) - tail_incr = $TAIL_INCR_H$; - else - tail_incr = $TAIL_INCR_LAST$; - end + assign tail_incr = + tail_incr_inner_condition? 1 : + Counter_loop_w >= 0? $TAIL_INCR_W$ : + Counter_loop_h >= 0? $TAIL_INCR_H$ : + /* else */ $TAIL_INCR_LAST$; // combinational next state logic always_comb begin : blkState @@ -101,29 +105,29 @@ module $TOP_MODULE_NAME$_controller #( always_ff @ (posedge clk) begin if(!rst_n) begin State <= $INNERMOST_STATE$; - Counter_loop_h <= LOOP_H_ITERATIONS-1; - Counter_loop_w <= LOOP_W_ITERATIONS-1; - Counter_loop_kh <= LOOP_KH_ITERATIONS-1; - Counter_loop_kw <= LOOP_KW_ITERATIONS-1; - Counter_loop_simd <= LOOP_SIMD_ITERATIONS-1; + Counter_loop_h <= LOOP_H_ITERATIONS; + Counter_loop_w <= LOOP_W_ITERATIONS; + Counter_loop_kh <= LOOP_KH_ITERATIONS; + Counter_loop_kw <= LOOP_KW_ITERATIONS; + Counter_loop_simd <= LOOP_SIMD_ITERATIONS; end else if(advance) begin State <= state_next; if (State == $INNERMOST_STATE$) begin if(Counter_loop_simd >= 0) Counter_loop_simd <= Counter_loop_simd-1; else begin - Counter_loop_simd <= LOOP_SIMD_ITERATIONS-1; + Counter_loop_simd <= LOOP_SIMD_ITERATIONS; if(Counter_loop_kw >= 0) Counter_loop_kw <= Counter_loop_kw-1; else begin - Counter_loop_kw <= LOOP_KW_ITERATIONS-1; + Counter_loop_kw <= LOOP_KW_ITERATIONS; if(Counter_loop_kh >= 0) Counter_loop_kh <= Counter_loop_kh-1; else begin - Counter_loop_kh <= LOOP_KH_ITERATIONS-1; + Counter_loop_kh <= LOOP_KH_ITERATIONS; if(Counter_loop_w >= 0) Counter_loop_w <= Counter_loop_w-1; else begin - Counter_loop_w <= LOOP_W_ITERATIONS-1; + Counter_loop_w <= LOOP_W_ITERATIONS; if(Counter_loop_h >= 0) Counter_loop_h <= Counter_loop_h-1; - else Counter_loop_h <= LOOP_H_ITERATIONS-1; + else Counter_loop_h <= LOOP_H_ITERATIONS; end end end @@ -139,7 +143,6 @@ module $TOP_MODULE_NAME$_cyclic_buffer_addressable #( int unsigned DEPTH )( input logic clk, - input logic rst_n, input logic write_enable, input logic [$clog2(DEPTH)-1:0] write_addr, @@ -182,7 +185,7 @@ module $TOP_MODULE_NAME$_impl #( input logic out_V_V_TREADY, output logic [BIT_WIDTH * SIMD * MMV_OUT-1:0] out_V_V_TDATA ); - // derived Constants + // derived constants localparam int unsigned BUF_IN_WIDTH = BIT_WIDTH * SIMD * MMV_IN; localparam int unsigned BUF_OUT_ELEM_WIDTH = BIT_WIDTH * SIMD; localparam int unsigned BUF_OUT_WIDTH = BIT_WIDTH * SIMD * MMV_OUT; @@ -199,7 +202,6 @@ module $TOP_MODULE_NAME$_impl #( .DEPTH(BUF_ELEM_TOTAL) ) window_buffer_inst ( .clk(ap_clk), - .rst_n(ap_rst_n), .write_enable(window_buffer_write_enable), .write_addr(window_buffer_write_addr), @@ -234,6 +236,15 @@ module $TOP_MODULE_NAME$_impl #( logic [$clog2(BUF_ELEM_TOTAL)-1:0] Window_buffer_write_addr_reg = 0; // Control signals/registers + logic Write_cmd = 0; + logic Writing_done = 0; + uwire write_ok = Write_cmd && out_V_V_TREADY; + uwire write_blocked = Write_cmd && !out_V_V_TREADY; + + logic Fetching_done = 0; + uwire fetch_cmd = !($signed(Current_elem) > Newest_buffered_elem) && !write_blocked && !Fetching_done; + + uwire reading_done = Newest_buffered_elem == LAST_READ_ELEM; uwire read_cmd = !reading_done && ( // if there is still an input element left to read Fetching_done || ( // if fetching is done (e.g. for skipped rows at FM end due to stride) @@ -242,15 +253,6 @@ module $TOP_MODULE_NAME$_impl #( ) // (over-)write to buffer if oldest buffered element will no longer be needed ); uwire read_ok = read_cmd && in0_V_V_TVALID; - uwire reading_done = Newest_buffered_elem == LAST_READ_ELEM; - - uwire fetch_cmd = !($signed(Current_elem) > Newest_buffered_elem) && !write_blocked && !Fetching_done; - logic Fetching_done = 0; - - logic Write_cmd = 0; - logic Writing_done = 0; - uwire write_ok = Write_cmd && out_V_V_TREADY; - uwire write_blocked = Write_cmd && !out_V_V_TREADY;; //assign buffer control assign window_buffer_write_addr = Window_buffer_write_addr_reg; diff --git a/finn-rtllib/swg/swg_template_default_dynamic.sv b/finn-rtllib/swg/swg_template_default_dynamic.sv new file mode 100644 index 0000000000000000000000000000000000000000..eb53978b580a4753bbea6c8478f35912deb812b4 --- /dev/null +++ b/finn-rtllib/swg/swg_template_default_dynamic.sv @@ -0,0 +1,416 @@ +module $TOP_MODULE_NAME$_controller #( + int unsigned CNTR_BITWIDTH, + int unsigned INCR_BITWIDTH, + + bit IS_DEPTHWISE = $IS_DEPTHWISE$ +)( + input logic clk, + input logic rst_n, + + input logic advance, + output logic [INCR_BITWIDTH-1:0] addr_incr, + output logic [INCR_BITWIDTH-1:0] tail_incr, + + input logic cfg_valid, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_simd, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_kw, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_kh, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_w, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_h, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_simd, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_kw, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_kh, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_w, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_h, + input logic [INCR_BITWIDTH-1:0] cfg_incr_tail_w, + input logic [INCR_BITWIDTH-1:0] cfg_incr_tail_h, + input logic [INCR_BITWIDTH-1:0] cfg_incr_tail_last +); + + // (dynamic) configuration registers + logic [CNTR_BITWIDTH-1:0] Cfg_cntr_simd = $LOOP_SIMD_ITERATIONS$; + logic [CNTR_BITWIDTH-1:0] Cfg_cntr_kw = $LOOP_KW_ITERATIONS$; + logic [CNTR_BITWIDTH-1:0] Cfg_cntr_kh = $LOOP_KH_ITERATIONS$; + logic [CNTR_BITWIDTH-1:0] Cfg_cntr_w = $LOOP_W_ITERATIONS$; + logic [CNTR_BITWIDTH-1:0] Cfg_cntr_h = $LOOP_H_ITERATIONS$; + logic [INCR_BITWIDTH-1:0] Cfg_incr_head_simd = $HEAD_INCR_SIMD$; + logic [INCR_BITWIDTH-1:0] Cfg_incr_head_kw = $HEAD_INCR_KW$; + logic [INCR_BITWIDTH-1:0] Cfg_incr_head_kh = $HEAD_INCR_KH$; + logic [INCR_BITWIDTH-1:0] Cfg_incr_head_w = $HEAD_INCR_W$; + logic [INCR_BITWIDTH-1:0] Cfg_incr_head_h = $HEAD_INCR_H$; + logic [INCR_BITWIDTH-1:0] Cfg_incr_tail_w = $TAIL_INCR_W$; + logic [INCR_BITWIDTH-1:0] Cfg_incr_tail_h = $TAIL_INCR_H$; + logic [INCR_BITWIDTH-1:0] Cfg_incr_tail_last = $TAIL_INCR_LAST$; + + // configuration reset/set logic + always_ff @ (posedge clk) begin + if(cfg_valid) begin + Cfg_cntr_simd <= cfg_cntr_simd; + Cfg_cntr_kw <= cfg_cntr_kw; + Cfg_cntr_kh <= cfg_cntr_kh; + Cfg_cntr_w <= cfg_cntr_w; + Cfg_cntr_h <= cfg_cntr_h; + Cfg_incr_head_simd <= cfg_incr_head_simd; + Cfg_incr_head_kw <= cfg_incr_head_kw; + Cfg_incr_head_kh <= cfg_incr_head_kh; + Cfg_incr_head_w <= cfg_incr_head_w; + Cfg_incr_head_h <= cfg_incr_head_h; + Cfg_incr_tail_w <= cfg_incr_tail_w; + Cfg_incr_tail_h <= cfg_incr_tail_h; + Cfg_incr_tail_last <= cfg_incr_tail_last; + end + end + + // state and counters + typedef enum logic [2:0] { + STATE_START, + STATE_LOOP_SIMD, + STATE_LOOP_KW, + STATE_LOOP_KH, + STATE_LOOP_W, + STATE_LOOP_H + } state_e; + state_e State = $INNERMOST_STATE$; + state_e state_next; + + logic signed [$clog2($LOOP_H_ITERATIONS$ +2)+1-1:0] Counter_loop_h = $LOOP_H_ITERATIONS$; + logic signed [$clog2($LOOP_W_ITERATIONS$ +2)+1-1:0] Counter_loop_w = $LOOP_W_ITERATIONS$; + logic signed [$clog2($LOOP_KH_ITERATIONS$ +2)+1-1:0] Counter_loop_kh = $LOOP_KH_ITERATIONS$; + logic signed [$clog2($LOOP_KW_ITERATIONS$ +2)+1-1:0] Counter_loop_kw = $LOOP_KW_ITERATIONS$; + logic signed [$clog2($LOOP_SIMD_ITERATIONS$+2)+1-1:0] Counter_loop_simd = $LOOP_SIMD_ITERATIONS$; + + // combinational logic for addr_incr generation + always_comb begin : blkHead + unique case (State) + 0 : addr_incr = 0; + 1 : addr_incr = Cfg_incr_head_simd; + 2 : addr_incr = Cfg_incr_head_kw; + 3 : addr_incr = Cfg_incr_head_kh; + 4 : addr_incr = Cfg_incr_head_w; + 5 : addr_incr = Cfg_incr_head_h; + endcase + end + + // combinational logic for tail_incr generation + uwire tail_incr_inner_condition = IS_DEPTHWISE? (Counter_loop_kh >= 0) : 0; + assign tail_incr = + tail_incr_inner_condition? 1 : + Counter_loop_w >= 0? Cfg_incr_tail_w : + Counter_loop_h >= 0? Cfg_incr_tail_h : + /* else */ Cfg_incr_tail_last; + + // combinational next state logic + always_comb begin : blkState + state_next = State; + if(State != $INNERMOST_STATE$) state_next = $INNERMOST_STATE$; + else begin + if(Counter_loop_simd < 0) begin + state_next = + (Counter_loop_kw >= 0)? STATE_LOOP_KW : + (Counter_loop_kh >= 0)? STATE_LOOP_KH : + (Counter_loop_w >= 0)? STATE_LOOP_W : + (Counter_loop_h >= 0)? STATE_LOOP_H : + /* else */ STATE_START; + end + end + end : blkState + + // sequential logic + always_ff @ (posedge clk) begin + if(!rst_n) begin + State <= $INNERMOST_STATE$; + Counter_loop_h <= Cfg_cntr_h; + Counter_loop_w <= Cfg_cntr_w; + Counter_loop_kh <= Cfg_cntr_kh; + Counter_loop_kw <= Cfg_cntr_kw; + Counter_loop_simd <= Cfg_cntr_simd; + end + else if(advance) begin + State <= state_next; + if (State == $INNERMOST_STATE$) begin + if(Counter_loop_simd >= 0) Counter_loop_simd <= Counter_loop_simd-1; + else begin + Counter_loop_simd <= Cfg_cntr_simd; + if(Counter_loop_kw >= 0) Counter_loop_kw <= Counter_loop_kw-1; + else begin + Counter_loop_kw <= Cfg_cntr_kw; + if(Counter_loop_kh >= 0) Counter_loop_kh <= Counter_loop_kh-1; + else begin + Counter_loop_kh <= Cfg_cntr_kh; + if(Counter_loop_w >= 0) Counter_loop_w <= Counter_loop_w-1; + else begin + Counter_loop_w <= Cfg_cntr_w; + if(Counter_loop_h >= 0) Counter_loop_h <= Counter_loop_h-1; + else Counter_loop_h <= Cfg_cntr_h; + end + end + end + end + end + end + end + +endmodule : $TOP_MODULE_NAME$_controller + +module $TOP_MODULE_NAME$_cyclic_buffer_addressable #( + int unsigned WIDTH, + int unsigned DEPTH +)( + input logic clk, + + input logic write_enable, + input logic [$clog2(DEPTH)-1:0] write_addr, + input logic [WIDTH-1:0] data_in, + + input logic read_enable, + input logic [$clog2(DEPTH)-1:0] read_addr, // absolute (!) read address of cyclic buffer + output logic [WIDTH-1:0] data_out +); + + $RAM_STYLE$ logic [WIDTH-1:0] Ram[DEPTH]; + logic [WIDTH-1:0] Out = 'x; + always_ff @(posedge clk) begin + if (read_enable) Out <= Ram[read_addr]; + if (write_enable) Ram[write_addr] <= data_in; + end + assign data_out = Out; + +endmodule : $TOP_MODULE_NAME$_cyclic_buffer_addressable + +module $TOP_MODULE_NAME$_impl #( + int BIT_WIDTH, + int SIMD, + int MMV_IN, + int MMV_OUT, + int unsigned CNTR_BITWIDTH, + int unsigned INCR_BITWIDTH, + + int LAST_READ_ELEM = $LAST_READ_ELEM$, + int LAST_WRITE_ELEM = $LAST_WRITE_ELEM$, + int BUF_ELEM_TOTAL = $BUF_ELEM_TOTAL$, + int ELEM_PER_WINDOW = $ELEM_PER_WINDOW$ +)( + input logic ap_clk, + input logic ap_rst_n, + + input logic in0_V_V_TVALID, + output logic in0_V_V_TREADY, + input logic [BIT_WIDTH * SIMD * MMV_IN-1:0] in0_V_V_TDATA, + + output logic out_V_V_TVALID, + input logic out_V_V_TREADY, + output logic [BIT_WIDTH * SIMD * MMV_OUT-1:0] out_V_V_TDATA, + + input logic cfg_valid, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_simd, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_kw, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_kh, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_w, + input logic [CNTR_BITWIDTH-1:0] cfg_cntr_h, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_simd, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_kw, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_kh, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_w, + input logic [INCR_BITWIDTH-1:0] cfg_incr_head_h, + input logic [INCR_BITWIDTH-1:0] cfg_incr_tail_w, + input logic [INCR_BITWIDTH-1:0] cfg_incr_tail_h, + input logic [INCR_BITWIDTH-1:0] cfg_incr_tail_last, + input logic [31:0] cfg_last_read, + input logic [31:0] cfg_last_write +); + // derived constants + localparam int unsigned BUF_IN_WIDTH = BIT_WIDTH * SIMD * MMV_IN; + localparam int unsigned BUF_OUT_ELEM_WIDTH = BIT_WIDTH * SIMD; + localparam int unsigned BUF_OUT_WIDTH = BIT_WIDTH * SIMD * MMV_OUT; + + // (dynamic) configuration registers + logic [31:0] Cfg_last_read = LAST_READ_ELEM; + logic [31:0] Cfg_last_write = LAST_WRITE_ELEM; + + // configuration reset/set logic + always_ff @ (posedge ap_clk) begin + if(cfg_valid) begin + Cfg_last_read <= cfg_last_read; + Cfg_last_write <= cfg_last_write; + end + end + + // main buffer instantiation + uwire [BUF_IN_WIDTH -1:0] window_buffer_in; + uwire [BUF_OUT_WIDTH-1:0] window_buffer_out; + uwire window_buffer_write_enable; + uwire window_buffer_read_enable; + uwire [$clog2(BUF_ELEM_TOTAL)-1:0] window_buffer_write_addr; + uwire [$clog2(BUF_ELEM_TOTAL)-1:0] window_buffer_read_addr; + $TOP_MODULE_NAME$_cyclic_buffer_addressable #( + .WIDTH(BUF_IN_WIDTH), + .DEPTH(BUF_ELEM_TOTAL) + ) window_buffer_inst ( + .clk(ap_clk), + + .write_enable(window_buffer_write_enable), + .write_addr(window_buffer_write_addr), + .data_in(window_buffer_in), + + .read_enable(window_buffer_read_enable), + .read_addr(window_buffer_read_addr), + .data_out(window_buffer_out) + ); + + //controller instantiation + uwire advance_controller; + uwire signed [INCR_BITWIDTH-1:0] addr_incr; + uwire [INCR_BITWIDTH-1:0] tail_incr; + $TOP_MODULE_NAME$_controller #( + .CNTR_BITWIDTH(CNTR_BITWIDTH), + .INCR_BITWIDTH(INCR_BITWIDTH) + ) controller_inst ( + .clk(ap_clk), + .rst_n(ap_rst_n), + .advance(advance_controller), + .addr_incr(addr_incr), + .tail_incr(tail_incr), + + .cfg_valid(cfg_valid), + .cfg_cntr_simd(cfg_cntr_simd), + .cfg_cntr_kw(cfg_cntr_kw), + .cfg_cntr_kh(cfg_cntr_kh), + .cfg_cntr_w(cfg_cntr_w), + .cfg_cntr_h(cfg_cntr_h), + .cfg_incr_head_simd(cfg_incr_head_simd), + .cfg_incr_head_kw(cfg_incr_head_kw), + .cfg_incr_head_kh(cfg_incr_head_kh), + .cfg_incr_head_w(cfg_incr_head_w), + .cfg_incr_head_h(cfg_incr_head_h), + .cfg_incr_tail_w(cfg_incr_tail_w), + .cfg_incr_tail_h(cfg_incr_tail_h), + .cfg_incr_tail_last(cfg_incr_tail_last) + ); + + // Counters/address registers + // Add a sign bit even to (most) unsigned counters and Window_buffer_read_addr_reg, + // so we can use automatic sign extension and simplify calculations w/ signed increment. + // Alternatively, we could manually sign-extend and shave off a bit here or there. + logic signed [$clog2(LAST_READ_ELEM+1)+1-1:0] Newest_buffered_elem = -1; + logic [$clog2(LAST_READ_ELEM+1)+1-1:0] Current_elem = 0; + logic [$clog2(LAST_READ_ELEM+1)+1-1:0] First_elem_next_window = 0; + logic [$clog2(ELEM_PER_WINDOW) -1:0] Position_in_window = 0; + logic [$clog2(BUF_ELEM_TOTAL)+1 -1:0] Window_buffer_read_addr_reg = 0; + logic [$clog2(BUF_ELEM_TOTAL)-1:0] Window_buffer_write_addr_reg = 0; + + // Control signals/registers + logic Write_cmd = 0; + logic Writing_done = 0; + uwire write_ok = Write_cmd && out_V_V_TREADY; + uwire write_blocked = Write_cmd && !out_V_V_TREADY; + + logic Fetching_done = 0; + uwire fetch_cmd = !($signed(Current_elem) > Newest_buffered_elem) && !write_blocked && !Fetching_done; + + uwire reading_done = Newest_buffered_elem == Cfg_last_read; + uwire read_cmd = + !reading_done && ( // if there is still an input element left to read + Fetching_done || ( // if fetching is done (e.g. for skipped rows at FM end due to stride) + $signed(((Newest_buffered_elem - (BUF_ELEM_TOTAL - 1)))) < $signed(First_elem_next_window) && + $signed(((Newest_buffered_elem - (BUF_ELEM_TOTAL - 1)))) < $signed(Current_elem) + ) // (over-)write to buffer if oldest buffered element will no longer be needed + ); + uwire read_ok = read_cmd && in0_V_V_TVALID; + + //assign buffer control + assign window_buffer_write_addr = Window_buffer_write_addr_reg; + assign window_buffer_read_addr = Window_buffer_read_addr_reg; + assign window_buffer_write_enable = read_ok; + assign window_buffer_read_enable = fetch_cmd; + assign advance_controller = fetch_cmd; + + //assign I/O ports + assign window_buffer_in = in0_V_V_TDATA; + assign out_V_V_TDATA = window_buffer_out; + assign in0_V_V_TREADY = ap_rst_n && read_ok; //only asserted if data is available and we can store it (allowed) + assign out_V_V_TVALID = ap_rst_n && Write_cmd; //only asserted if we have data available and it has not been read yet (don't wait for READY from sink) + + //main process for advancing counters + always_ff @(posedge ap_clk) begin + if(!ap_rst_n) begin + Newest_buffered_elem <= -1; + Current_elem <= 0; + First_elem_next_window <= 0; + Position_in_window <= 0; + Window_buffer_read_addr_reg <= 0; + Window_buffer_write_addr_reg <= 0; + Fetching_done <= 0; + Write_cmd <= 0; + Writing_done <= 0; + end + else begin + if (read_ok) begin + Window_buffer_write_addr_reg <= (Window_buffer_write_addr_reg == BUF_ELEM_TOTAL-1)? 0 : Window_buffer_write_addr_reg + 1; + Newest_buffered_elem <= Newest_buffered_elem+1; + + if (Newest_buffered_elem == Cfg_last_read-1) begin + Window_buffer_write_addr_reg <= 0; + end + //check if this is the last read cycle (reading_done will be true afterwards) + if ((Newest_buffered_elem == Cfg_last_read-1) && Writing_done) begin + //start processing of next FM if writing is done already (possible due to unused input elements at the tail end) + //todo: allow for read overlapping between feature maps (i.e., reading first elements from next FM while still writing last window of current FM) + Newest_buffered_elem <= -1; + Current_elem <= 0; + Window_buffer_read_addr_reg <= 0; + First_elem_next_window <= 0; + Writing_done <= 0; + Fetching_done <= 0; + end + end + + if (fetch_cmd) begin + //count up to track which element index is about to be read from the buffer, and where it is located within the buffer + //use increment value calculated by controller + + // absolute buffer address wrap-around + automatic logic signed [$clog2(BUF_ELEM_TOTAL)+1:0] ra = $signed(Window_buffer_read_addr_reg) + $signed(addr_incr); + automatic logic signed [$clog2(BUF_ELEM_TOTAL+1):0] ra_correct = + (ra >= BUF_ELEM_TOTAL)? -BUF_ELEM_TOTAL : + (ra < 0)? BUF_ELEM_TOTAL : 0; + Window_buffer_read_addr_reg <= ra + ra_correct; + + //keep track where we are within a window + Position_in_window <= (Position_in_window != ELEM_PER_WINDOW - 1)? Position_in_window+1 : 0; + + //update first element of next window to allow buffer overwrite up until that point + if (Position_in_window == 0) + First_elem_next_window <= First_elem_next_window + tail_incr; + + //check if this is the last write cycle (Writing_done will be true afterwards) + if (Current_elem == Cfg_last_write) + Fetching_done <= 1; + else + Current_elem <= $signed(Current_elem) + addr_incr; + + // determine if prefetched data will be outstanding in the next cycle + // if we fetch in this cycle -> yes + // if we do not fetch nor write -> do not change + // if we do not fetch but write successfully-> clear outstanding data + Write_cmd <= fetch_cmd; + end + + if (write_ok) + Write_cmd <= fetch_cmd; + + if (write_ok && Fetching_done) begin + //check if this is the last write cycle (Writing_done will be true afterwards) + if (reading_done || (read_ok && (Newest_buffered_elem == Cfg_last_read - 1))) begin + //start processing of next FM if reading is done already, or completes in the same cycle + Newest_buffered_elem <= -1; + Current_elem <= 0; + Window_buffer_read_addr_reg <= 0; + First_elem_next_window <= 0; + Fetching_done <= 0; + end else + Writing_done <= 1; + end + end + end + +endmodule : $TOP_MODULE_NAME$_impl diff --git a/finn-rtllib/swg/swg_template_wrapper_dynamic.v b/finn-rtllib/swg/swg_template_wrapper_dynamic.v new file mode 100644 index 0000000000000000000000000000000000000000..ca870ace11edcf097645bc12b0486ffbb83b0ea4 --- /dev/null +++ b/finn-rtllib/swg/swg_template_wrapper_dynamic.v @@ -0,0 +1,154 @@ +`timescale 1 ns / 1 ps + +module $TOP_MODULE_NAME$ #( + // top-level parameters (set via code-generation) + parameter BIT_WIDTH = $BIT_WIDTH$, + parameter SIMD = $SIMD$, + parameter MMV_IN = $MMV_IN$, + parameter MMV_OUT = $MMV_OUT$, + + parameter CNTR_BITWIDTH = $CNTR_BITWIDTH$, + parameter INCR_BITWIDTH = $INCR_BITWIDTH$, + + // derived constants + parameter BUF_IN_WIDTH = BIT_WIDTH * SIMD * MMV_IN, + parameter BUF_OUT_WIDTH = BIT_WIDTH * SIMD * MMV_OUT, + + parameter integer C_s_axilite_DATA_WIDTH = 32, + parameter integer C_s_axilite_ADDR_WIDTH = 6 +) +( + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V:s_axilite" *) + input ap_clk, + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V:s_axilite" *) + input ap_rst_n, + input [BUF_IN_WIDTH-1:0] in0_V_TDATA, + input in0_V_TVALID, + output in0_V_TREADY, + output [BUF_OUT_WIDTH-1:0] out_V_TDATA, + output out_V_TVALID, + input out_V_TREADY, + + // Ports of Axi Slave Bus Interface s_axilite + input [C_s_axilite_ADDR_WIDTH-1 : 0] s_axilite_awaddr, + input [2 : 0] s_axilite_awprot, + input s_axilite_awvalid, + output s_axilite_awready, + input [C_s_axilite_DATA_WIDTH-1 : 0] s_axilite_wdata, + input [(C_s_axilite_DATA_WIDTH/8)-1 : 0] s_axilite_wstrb, + input s_axilite_wvalid, + output s_axilite_wready, + output [1 : 0] s_axilite_bresp, + output s_axilite_bvalid, + input s_axilite_bready, + input [C_s_axilite_ADDR_WIDTH-1 : 0] s_axilite_araddr, + input [2 : 0] s_axilite_arprot, + input s_axilite_arvalid, + output s_axilite_arready, + output [C_s_axilite_DATA_WIDTH-1 : 0] s_axilite_rdata, + output [1 : 0] s_axilite_rresp, + output s_axilite_rvalid, + input s_axilite_rready +); + +wire cfg_valid; +wire [CNTR_BITWIDTH-1:0] cfg_cntr_simd; +wire [CNTR_BITWIDTH-1:0] cfg_cntr_kw; +wire [CNTR_BITWIDTH-1:0] cfg_cntr_kh; +wire [CNTR_BITWIDTH-1:0] cfg_cntr_w; +wire [CNTR_BITWIDTH-1:0] cfg_cntr_h; +wire [INCR_BITWIDTH-1:0] cfg_incr_head_simd; +wire [INCR_BITWIDTH-1:0] cfg_incr_head_kw; +wire [INCR_BITWIDTH-1:0] cfg_incr_head_kh; +wire [INCR_BITWIDTH-1:0] cfg_incr_head_w; +wire [INCR_BITWIDTH-1:0] cfg_incr_head_h; +wire [INCR_BITWIDTH-1:0] cfg_incr_tail_w; +wire [INCR_BITWIDTH-1:0] cfg_incr_tail_h; +wire [INCR_BITWIDTH-1:0] cfg_incr_tail_last; +wire [31:0] cfg_last_read; +wire [31:0] cfg_last_write; + +// Instantiation of Axi Bus Interface s_axilite +$TOP_MODULE_NAME$_axilite # ( + .C_S_AXI_DATA_WIDTH(C_s_axilite_DATA_WIDTH), + .C_S_AXI_ADDR_WIDTH(C_s_axilite_ADDR_WIDTH) +) axilite_cfg_inst ( + .S_AXI_ACLK(ap_clk), + .S_AXI_ARESETN(ap_rst_n), + .S_AXI_AWADDR(s_axilite_awaddr), + .S_AXI_AWPROT(s_axilite_awprot), + .S_AXI_AWVALID(s_axilite_awvalid), + .S_AXI_AWREADY(s_axilite_awready), + .S_AXI_WDATA(s_axilite_wdata), + .S_AXI_WSTRB(s_axilite_wstrb), + .S_AXI_WVALID(s_axilite_wvalid), + .S_AXI_WREADY(s_axilite_wready), + .S_AXI_BRESP(s_axilite_bresp), + .S_AXI_BVALID(s_axilite_bvalid), + .S_AXI_BREADY(s_axilite_bready), + .S_AXI_ARADDR(s_axilite_araddr), + .S_AXI_ARPROT(s_axilite_arprot), + .S_AXI_ARVALID(s_axilite_arvalid), + .S_AXI_ARREADY(s_axilite_arready), + .S_AXI_RDATA(s_axilite_rdata), + .S_AXI_RRESP(s_axilite_rresp), + .S_AXI_RVALID(s_axilite_rvalid), + .S_AXI_RREADY(s_axilite_rready), + + .cfg_reg0(cfg_valid), + .cfg_reg1(cfg_cntr_simd), + .cfg_reg2(cfg_cntr_kw), + .cfg_reg3(cfg_cntr_kh), + .cfg_reg4(cfg_cntr_w), + .cfg_reg5(cfg_cntr_h), + .cfg_reg6(cfg_incr_head_simd), + .cfg_reg7(cfg_incr_head_kw), + .cfg_reg8(cfg_incr_head_kh), + .cfg_reg9(cfg_incr_head_w), + .cfg_reg10(cfg_incr_head_h), + .cfg_reg11(cfg_incr_tail_w), + .cfg_reg12(cfg_incr_tail_h), + .cfg_reg13(cfg_incr_tail_last), + .cfg_reg14(cfg_last_read), + .cfg_reg15(cfg_last_write) +); + +$TOP_MODULE_NAME$_impl +#( + .BIT_WIDTH(BIT_WIDTH), + .SIMD(SIMD), + .MMV_IN(MMV_IN), + .MMV_OUT(MMV_OUT), + .CNTR_BITWIDTH(CNTR_BITWIDTH), + .INCR_BITWIDTH(INCR_BITWIDTH) +) +impl +( + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n), + .in0_V_V_TDATA(in0_V_TDATA), + .in0_V_V_TVALID(in0_V_TVALID), + .in0_V_V_TREADY(in0_V_TREADY), + .out_V_V_TDATA(out_V_TDATA), + .out_V_V_TVALID(out_V_TVALID), + .out_V_V_TREADY(out_V_TREADY), + + .cfg_valid(cfg_valid), + .cfg_cntr_simd(cfg_cntr_simd), + .cfg_cntr_kw(cfg_cntr_kw), + .cfg_cntr_kh(cfg_cntr_kh), + .cfg_cntr_w(cfg_cntr_w), + .cfg_cntr_h(cfg_cntr_h), + .cfg_incr_head_simd(cfg_incr_head_simd), + .cfg_incr_head_kw(cfg_incr_head_kw), + .cfg_incr_head_kh(cfg_incr_head_kh), + .cfg_incr_head_w(cfg_incr_head_w), + .cfg_incr_head_h(cfg_incr_head_h), + .cfg_incr_tail_w(cfg_incr_tail_w), + .cfg_incr_tail_h(cfg_incr_tail_h), + .cfg_incr_tail_last(cfg_incr_tail_last), + .cfg_last_read(cfg_last_read), + .cfg_last_write(cfg_last_write) +); + +endmodule //TOP_MODULE_NAME diff --git a/notebooks/advanced/2_custom_op.ipynb b/notebooks/advanced/2_custom_op.ipynb index c27f8bdca788e6404fbc01e226b06e8cfaaba066..051a406708ee7b4bcbd548b39acac000b473c7cf 100644 --- a/notebooks/advanced/2_custom_op.ipynb +++ b/notebooks/advanced/2_custom_op.ipynb @@ -178,6 +178,7 @@ "source": [ "from qonnx.core.modelwrapper import ModelWrapper\n", "from onnx import TensorProto\n", + "from qonnx.util.basic import qonnx_make_model\n", "\n", "def make_graph(ishape, exp, op_type = \"MyPythonPowerOp\"):\n", " inp = helper.make_tensor_value_info(\n", @@ -204,7 +205,7 @@ " graph = helper.make_graph(\n", " nodes=[custom_node], name=\"custom_graph\", inputs=[inp], outputs=[outp]\n", " )\n", - " model = helper.make_model(graph, producer_name=\"custom-model\")\n", + " model = qonnx_make_model(graph, producer_name=\"custom-model\")\n", " return ModelWrapper(model)" ] }, diff --git a/notebooks/basics/0_how_to_work_with_onnx.ipynb b/notebooks/basics/0_how_to_work_with_onnx.ipynb index 514efd1693d667af896e89902a264ea7e6e01da7..b6a5a0481574928ef490d8bb55bbbe2bb882b951 100644 --- a/notebooks/basics/0_how_to_work_with_onnx.ipynb +++ b/notebooks/basics/0_how_to_work_with_onnx.ipynb @@ -36,6 +36,7 @@ "outputs": [], "source": [ "import onnx\n", + "from qonnx.util.basic import qonnx_make_model\n", "\n", "Add1_node = onnx.helper.make_node(\n", " 'Add',\n", @@ -158,7 +159,7 @@ "metadata": {}, "outputs": [], "source": [ - "onnx_model = onnx.helper.make_model(graph, producer_name=\"simple-model\")\n", + "onnx_model = qonnx_make_model(graph, producer_name=\"simple-model\")\n", "onnx.save(onnx_model, '/tmp/simple_model.onnx')" ] }, @@ -550,7 +551,7 @@ "metadata": {}, "outputs": [], "source": [ - "onnx_model1 = onnx.helper.make_model(graph, producer_name=\"simple-model1\")\n", + "onnx_model1 = qonnx_make_model(graph, producer_name=\"simple-model1\")\n", "onnx.save(onnx_model1, '/tmp/simple_model1.onnx')" ] }, diff --git a/requirements.txt b/requirements.txt index 348b1afab9deca1547d40cb8d8c54a396befa65d..83aad07d729e30cbbbaf565b4332fb1f7ae6f014 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,11 +4,11 @@ dataclasses-json==0.5.7 docrep==0.2.7 gspread==3.6.0 numpy==1.22.0 -onnx==1.11.0 +onnx==1.13.0 onnxoptimizer onnxruntime==1.11.1 pre-commit==2.9.2 -protobuf==3.20.2 +protobuf==3.20.3 psutil==5.9.4 pyscaffold==3.2.1 scipy==1.5.2 diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index fb4d60c1ebf6bcf39d5c71388572f27f065279cc..6e07a541e3d462b159792482dae4777999921a2c 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -666,8 +666,8 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi + "in FINN C++ verilator driver, falling back to Python" ) rtlsim_bs = int(cfg.rtlsim_batch_size) + orig_rtlsim_trace_depth = get_rtlsim_trace_depth() if force_python_rtlsim: - orig_rtlsim_trace_depth = get_rtlsim_trace_depth() assert rtlsim_bs > 0, "rtlsim batch size must be >0" if cfg.verify_save_rtlsim_waveforms: # set depth to 3 for layer-by-layer visibility diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index e5eb483a00f6890f5eeb16c5cec533a4533c9f15..56d4230a3af3057daaa5c47140fcde1590dee686 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -43,6 +43,7 @@ from finn.custom_op.fpgadataflow.downsampler import DownSampler from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch from finn.custom_op.fpgadataflow.eltwise import StreamingEltwise from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch +from finn.custom_op.fpgadataflow.fmpadding_rtl import FMPadding_rtl from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch from finn.custom_op.fpgadataflow.iodma import IODMA from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch @@ -91,3 +92,4 @@ custom_op["Lookup"] = Lookup custom_op["StreamingConcat"] = StreamingConcat custom_op["CheckSum"] = CheckSum custom_op["StreamingEltwise"] = StreamingEltwise +custom_op["FMPadding_rtl"] = FMPadding_rtl diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py index 5424050a8ed0a353894721d5bba28c1d45e62771..1afd23d3a1709a8929a03c21a6eba0a5a8cd6ba6 100755 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py @@ -29,7 +29,6 @@ import math import numpy as np import os -from math import copysign from qonnx.core.datatype import DataType from qonnx.custom_op.general import im2col from qonnx.custom_op.general.im2col import compute_conv_output_dim @@ -81,6 +80,9 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): "inputDataType": ("s", True, ""), "outputDataType": ("s", True, ""), "depthwise": ("i", False, 0, {0, 1}), + # Enable reprogrammable implementation to change FM dimensions, + # stride, or dilation during runtime + "dynamic_mode": ("i", False, 0, {0, 1}), # FPGA resource type for ConvolutionInputGenerator input buffer # auto -- let Vivado decide # block -- use BRAM @@ -457,9 +459,11 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): def prepare_codegen_default(self): # Default implementation style for MMV_out = 1: addressable cyclic buffer # Computing incremental addressing scheme directly.. - template_path = ( - os.environ["FINN_ROOT"] + "/finn-rtllib/swg/swg_template_default.sv" - ) + if self.get_nodeattr("dynamic_mode"): + template_select = "/finn-rtllib/swg/swg_template_default_dynamic.sv" + else: + template_select = "/finn-rtllib/swg/swg_template_default.sv" + template_path = os.environ["FINN_ROOT"] + template_select code_gen_dict = {} ifm_ch = self.get_nodeattr("IFMChannels") @@ -569,10 +573,6 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): tail_incr_last_window = buffer_min_size - 1 code_gen_dict["$IS_DEPTHWISE$"] = ["0"] - code_gen_dict["$TAIL_INCR_W$"] = [str(tail_incr_w)] - code_gen_dict["$TAIL_INCR_H$"] = [str(tail_incr_h)] - code_gen_dict["$TAIL_INCR_LAST$"] = [str(tail_incr_last_window)] - # support SIMD = IFMChannels and k_w = 1 cases # for k = [k_h, k_w] = [1, k_w], no adjustment is needed # for k = [k_h, k_w] = [1, 1], do not use this impl. style (mmv_out=K=1) @@ -590,11 +590,23 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): code_gen_dict["$INNERMOST_STATE$"] = ["STATE_LOOP_SIMD"] loop_simd_iterations -= 1 # -1 because state is initial state - code_gen_dict["$LOOP_H_ITERATIONS$"] = [str(loop_h_iterations - 1)] - code_gen_dict["$LOOP_W_ITERATIONS$"] = [str(loop_w_iterations - 1)] - code_gen_dict["$LOOP_KH_ITERATIONS$"] = [str(loop_kh_iterations - 1)] - code_gen_dict["$LOOP_KW_ITERATIONS$"] = [str(loop_kw_iterations - 1)] - code_gen_dict["$LOOP_SIMD_ITERATIONS$"] = [str(loop_simd_iterations - 1)] + cntr_bitwidth = math.ceil( + math.log2( + max( + loop_h_iterations - 2 + 1, + loop_w_iterations - 2 + 1, + loop_kh_iterations - 2 + 1, + loop_kw_iterations - 2 + 1, + loop_simd_iterations - 2 + 1, + ) + ) + ) + code_gen_dict["$CNTR_BITWIDTH$"] = [str(cntr_bitwidth)] + code_gen_dict["$LOOP_H_ITERATIONS$"] = [str(loop_h_iterations - 2)] + code_gen_dict["$LOOP_W_ITERATIONS$"] = [str(loop_w_iterations - 2)] + code_gen_dict["$LOOP_KH_ITERATIONS$"] = [str(loop_kh_iterations - 2)] + code_gen_dict["$LOOP_KW_ITERATIONS$"] = [str(loop_kw_iterations - 2)] + code_gen_dict["$LOOP_SIMD_ITERATIONS$"] = [str(loop_simd_iterations - 2)] incr_bitwidth = 1 + math.ceil( math.log2( @@ -611,21 +623,14 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): ) ) code_gen_dict["$INCR_BITWIDTH$"] = [str(incr_bitwidth)] - code_gen_dict["$ADDR_INCREMENT_MAP$"] = [ - "'{{ {}'d0, {}'d{}, {}'d{}, {}'d{}, {}'d{}, {}'d{}}}".format( - incr_bitwidth, - int(copysign(incr_bitwidth, addr_incr_end_simd)), - abs(addr_incr_end_simd), - int(copysign(incr_bitwidth, addr_incr_end_window_elem)), - abs(addr_incr_end_window_elem), - int(copysign(incr_bitwidth, addr_incr_end_window_row)), - abs(addr_incr_end_window_row), - int(copysign(incr_bitwidth, addr_incr_end_window)), - abs(addr_incr_end_window), - int(copysign(incr_bitwidth, addr_incr_end_row)), - abs(addr_incr_end_row), - ) - ] + code_gen_dict["$HEAD_INCR_SIMD$"] = [str(addr_incr_end_simd)] + code_gen_dict["$HEAD_INCR_KW$"] = [str(addr_incr_end_window_elem)] + code_gen_dict["$HEAD_INCR_KH$"] = [str(addr_incr_end_window_row)] + code_gen_dict["$HEAD_INCR_W$"] = [str(addr_incr_end_window)] + code_gen_dict["$HEAD_INCR_H$"] = [str(addr_incr_end_row)] + code_gen_dict["$TAIL_INCR_W$"] = [str(tail_incr_w)] + code_gen_dict["$TAIL_INCR_H$"] = [str(tail_incr_h)] + code_gen_dict["$TAIL_INCR_LAST$"] = [str(tail_incr_last_window)] code_gen_dict["$ELEM_PER_WINDOW$"] = [str(elem_per_window)] code_gen_dict["$SIMD$"] = [str(simd)] @@ -710,15 +715,22 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") with open(template_path, "r") as f: template = f.read() + if self.get_nodeattr("dynamic_mode"): + template_select = "/finn-rtllib/swg/swg_template_wrapper_dynamic.v" + else: + template_select = "/finn-rtllib/swg/swg_template_wrapper.v" + with open(os.environ["FINN_ROOT"] + template_select, "r") as f: + template_wrapper = f.read() with open( - os.environ["FINN_ROOT"] + "/finn-rtllib/swg/swg_template_wrapper.v", "r" + os.environ["FINN_ROOT"] + "/finn-rtllib/swg/swg_template_axilite.v", "r" ) as f: - template_wrapper = f.read() + template_axilite = f.read() for key in code_gen_dict: # transform list into long string separated by '\n' code_gen_line = "\n".join(code_gen_dict[key]) template = template.replace(key, code_gen_line) template_wrapper = template_wrapper.replace(key, code_gen_line) + template_axilite = template_axilite.replace(key, code_gen_line) with open( os.path.join( code_gen_dir, self.get_nodeattr("gen_top_module") + "_impl.sv" @@ -734,6 +746,16 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): ) as f: f.write(template_wrapper) + # AXI-Lite reg. file component is only needed for dynamic mode + if self.get_nodeattr("dynamic_mode"): + with open( + os.path.join( + code_gen_dir, self.get_nodeattr("gen_top_module") + "_axilite.v" + ), + "w", + ) as f: + f.write(template_axilite) + # set ipgen_path and ip_path so that HLS-Synth transformation # and stich_ip transformation do not complain self.set_nodeattr("ipgen_path", code_gen_dir) @@ -754,6 +776,8 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): self.get_nodeattr("gen_top_module") + "_wrapper.v", self.get_nodeattr("gen_top_module") + "_impl.sv", ] + if self.get_nodeattr("dynamic_mode"): + verilog_files.append(self.get_nodeattr("gen_top_module") + "_axilite.v") # build the Verilator emu library sim = PyVerilator.build( @@ -771,25 +795,97 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): """Constructs and returns the TCL for node instantiation in Vivado IPI.""" code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - cmd = [ - "add_files -norecurse %s" - % ( - os.path.join( - code_gen_dir, self.get_nodeattr("gen_top_module") + "_wrapper.v" - ) - ), - "add_files -norecurse %s" - % ( - os.path.join( - code_gen_dir, self.get_nodeattr("gen_top_module") + "_impl.sv" - ) - ), - "create_bd_cell -type module -reference %s %s" - % (self.get_nodeattr("gen_top_module"), self.onnx_node.name), + sourcefiles = [ + self.get_nodeattr("gen_top_module") + "_wrapper.v", + self.get_nodeattr("gen_top_module") + "_impl.sv", ] + if self.get_nodeattr("dynamic_mode"): + sourcefiles += [self.get_nodeattr("gen_top_module") + "_axilite.v"] + + sourcefiles = [os.path.join(code_gen_dir, f) for f in sourcefiles] + + cmd = [] + for f in sourcefiles: + cmd += ["add_files -norecurse %s" % (f)] + cmd += [ + "create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) + ] return cmd + def get_verilog_top_module_intf_names(self): + # Overload default HLSCustomOp implementation to add axilite control IF + """Return a dict of names of input and output interfaces. + The keys reflect the protocols each interface implements: + 'clk', 'rst', 'm_axis', 's_axis', 'aximm', 'axilite'. + Values are lists of tuples (axis, aximm) or names (axilite): + 'axis' tuples correspond to the list of node inputs in order, + each tuple is (interface_name, interface_width_bits). + axilite always assumed to be 32 bits and is not tuple (name only). + Each block must have at most one aximm and one axilite.""" + intf_names = super().get_verilog_top_module_intf_names() + if self.get_nodeattr("dynamic_mode"): + intf_names["axilite"] = ["s_axilite"] + return intf_names + + def get_dynamic_config(self, ifm_dim=None, stride=None, dilation=None): + """Returns a configuration dict to re-configure FM dimension during + runtime. Stride and dilation can also be changed. Certain restrictions + apply (e.g. component must be synthesized for largest buffer size).""" + # NOTE: For better driver integration, this functionality could be packaged + # as a standalone function in the future + + if ifm_dim is None: + ifm_dim = self.get_nodeattr("IFMDim") + k = self.get_nodeattr("ConvKernelDim") + if stride is None: + stride = self.get_nodeattr("Stride") + if dilation is None: + dilation = self.get_nodeattr("Dilation") + + k_h, k_w = k + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + ifm_dim_h, ifm_dim_w = ifm_dim + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + # update attributes and perform sanity check + original_buffer_depth = self.get_buffer_depth() + self.set_nodeattr("IFMDim", ifm_dim) + self.set_nodeattr("OFMDim", ofm_dim) + self.set_nodeattr("Stride", stride) + self.set_nodeattr("Dilation", dilation) + assert ( + self.get_buffer_depth() <= original_buffer_depth + ), """Error: requested + dynamic configuration does not fit in generated buffer implementation.""" + + # (re-)call codegen and extract new values + # each setting is mapped to an axi-lite register address + template_path, code_gen_dict = self.prepare_codegen_default() + config = { + "cfg_wren": (0 * 4, 1), + "cfg_cntr_simd": (1 * 4, int(code_gen_dict["$LOOP_SIMD_ITERATIONS$"][0])), + "cfg_cntr_kw": (2 * 4, int(code_gen_dict["$LOOP_KW_ITERATIONS$"][0])), + "cfg_cntr_kh": (3 * 4, int(code_gen_dict["$LOOP_KH_ITERATIONS$"][0])), + "cfg_cntr_w": (4 * 4, int(code_gen_dict["$LOOP_W_ITERATIONS$"][0])), + "cfg_cntr_h": (5 * 4, int(code_gen_dict["$LOOP_H_ITERATIONS$"][0])), + "cfg_incr_head_simd": (6 * 4, int(code_gen_dict["$HEAD_INCR_SIMD$"][0])), + "cfg_incr_head_kw": (7 * 4, int(code_gen_dict["$HEAD_INCR_KW$"][0])), + "cfg_incr_head_kh": (8 * 4, int(code_gen_dict["$HEAD_INCR_KH$"][0])), + "cfg_incr_head_w": (9 * 4, int(code_gen_dict["$HEAD_INCR_W$"][0])), + "cfg_incr_head_h": (10 * 4, int(code_gen_dict["$HEAD_INCR_H$"][0])), + "cfg_incr_tail_w": (11 * 4, int(code_gen_dict["$TAIL_INCR_W$"][0])), + "cfg_incr_tail_h": (12 * 4, int(code_gen_dict["$TAIL_INCR_H$"][0])), + "cfg_incr_tail_last": (13 * 4, int(code_gen_dict["$TAIL_INCR_LAST$"][0])), + "cfg_last_read": (14 * 4, int(code_gen_dict["$LAST_READ_ELEM$"][0])), + "cfg_last_write": (15 * 4, int(code_gen_dict["$LAST_WRITE_ELEM$"][0])), + } + return config + def code_generation_ipgen(self, model, fpgapart, clk): """Normally: Generates C++ code and tcl script for IP generation. Here: Generates (System-)Verilog code for IP generation.""" diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_rtl.py b/src/finn/custom_op/fpgadataflow/fmpadding_rtl.py new file mode 100644 index 0000000000000000000000000000000000000000..5650d218857a7c7ff86c15ac057c4ebbc18df5ca --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/fmpadding_rtl.py @@ -0,0 +1,420 @@ +# Copyright (C) 2022, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import math +import numpy as np +import os +import shutil +import warnings +from qonnx.core.datatype import DataType +from qonnx.util.basic import roundup_to_integer_multiple + +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.util.basic import get_rtlsim_trace_depth, make_build_dir +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + +try: + from pyverilator import PyVerilator +except ModuleNotFoundError: + PyVerilator = None + + +class FMPadding_rtl(HLSCustomOp): + """CustomOp wrapper for the finn-rtllib fmpadding_axi component + Supports adjusting the padding amount and spatial feature sizes at + runtime.""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + def get_nodeattr_types(self): + my_attrs = { + # spatial size of input images + "ImgDim": ("ints", True, []), # [H, W] = [Y, X] + # total padding (per dimension) to apply + "Padding": ( + "ints", + True, + [1, 1, 1, 1], + ), # [H_begin, W_begin, H_end, W_end] = [Y_begin, X_begin, Y_end, X_end] + # number of channels in input image + "NumChannels": ("i", True, 0), + # SIMD Input parallelism + "SIMD": ("i", False, 1), + # FINN input datatype + "inputDataType": ("s", True, ""), + # shape describing input vecs per execution + "numInputVectors": ("i", False, 1), + # Enable reprogrammable implementation to change FM dimensions, + # stride, or dilation during runtime + "dynamic_mode": ("i", False, 0, {0, 1}), + # attribute to save top module name - not user configurable + "gen_top_module": ("s", False, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_padded_odim(self): + "Return the padded spatial size of the output." + idim_h, idim_w = self.get_nodeattr("ImgDim") + pad = self.get_nodeattr("Padding") + pad_h = pad[0] + pad[2] + pad_w = pad[1] + pad[3] + odim_h = idim_h + pad_h + odim_w = idim_w + pad_w + return [odim_h, odim_w] + + def get_exp_cycles(self): + odim_h, odim_w = self.get_padded_odim() + channels = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + batch_size = self.get_nodeattr("numInputVectors") + exp_cycles = (channels / simd) * batch_size * odim_h * odim_w + return int(exp_cycles) + + def get_normal_input_shape(self, ind=0): + idim_h, idim_w = self.get_nodeattr("ImgDim") + num_ch = self.get_nodeattr("NumChannels") + ishape = (1, idim_h, idim_w, num_ch) + return ishape + + def get_normal_output_shape(self, ind=0): + odim_h, odim_w = self.get_padded_odim() + num_ch = self.get_nodeattr("NumChannels") + + oshape = (1, odim_h, odim_w, num_ch) + return oshape + + def get_folded_input_shape(self, ind=0): + normal_ishape = list(self.get_normal_input_shape()) + ifm_ch = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_ishape[-1] / simd) + folded_ishape = normal_ishape[:-1] + [fold, simd] + return tuple(folded_ishape) + + def get_folded_output_shape(self, ind=0): + normal_oshape = list(self.get_normal_output_shape()) + ifm_ch = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_oshape[-1] / simd) + folded_oshape = normal_oshape[:-1] + [fold, simd] + return tuple(folded_oshape) + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape for FMPadding_rtl." + return super().make_const_shape_op(oshape) + + def infer_node_datatype(self, model): + node = self.onnx_node + idt = model.get_tensor_datatype(node.input[0]) + if idt != self.get_input_datatype(): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + node.name, + str(self.get_input_datatype()), + str(idt), + ) + warnings.warn(warn_str) + self.set_nodeattr("inputDataType", idt.name) + model.set_tensor_datatype(node.output[0], idt) + + def verify_node(self): + pass + + def get_input_datatype(self, ind=0): + """Returns FINN DataType of input.""" + ret = DataType[self.get_nodeattr("inputDataType")] + # the hlslib op always pads with zeros, so ensure that the DataType + # is able to represent zeros + assert ret.allowed(0), "FMPadding_rtl DataType must support zero" + return ret + + def get_output_datatype(self, ind=0): + """Returns FINN DataType of output. (Same as input datatype)""" + return self.get_input_datatype() + + def get_instream_width(self, ind=0): + ibits = self.get_input_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + return ibits * simd + + def get_outstream_width(self, ind=0): + obits = self.get_output_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + return obits * simd + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + + def get_verilog_top_module_intf_names(self): + # Overload default HLSCustomOp implementation to add axilite control IF + intf_names = super().get_verilog_top_module_intf_names() + if self.get_nodeattr("dynamic_mode"): + intf_names["axilite"] = ["s_axilite"] + return intf_names + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_ishape = self.get_folded_input_shape() + + if mode == "cppsim": + raise Exception( + "cppsim not possible for FMPadding_rtl, please set exec_mode to rtlsim" + ) + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert ( + inp.shape == exp_ishape + ), """Input shape doesn't + match expected shape (1, ImgDim_h, ImgDim_w, NumChannels).""" + export_idt = self.get_input_datatype() + + reshaped_input = inp.reshape(folded_ishape) + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + sim = self.get_rtlsim() + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = export_idt + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + + assert ( + context[node.output[0]].shape == exp_oshape + ), """Output shape doesn't match expected shape + (1, OutputDim_H, OutputDim_W, NumChannels).""" + + def get_template_values(self, ifm_dims, pads, chans, simd, idt): + dimY, dimX = ifm_dims + padT, padL, padB, padR = pads + y_counter_bits = int(math.ceil(math.log2(padT + dimY + padB + 1))) + x_counter_bits = int(math.ceil(math.log2(padL + dimX + padR + 1))) + topname = self.get_verilog_top_module_name() + stream_bits = idt.bitwidth() * simd + stream_bits = int(roundup_to_integer_multiple(stream_bits, 8)) + code_gen_dict = { + "XCOUNTER_BITS": int(x_counter_bits), + "YCOUNTER_BITS": int(y_counter_bits), + "NUM_CHANNELS": int(chans), + "SIMD": int(simd), + "ELEM_BITS": idt.bitwidth(), + "TOP_MODULE_NAME": topname, + "INIT_XON": int(padL), + "INIT_XOFF": int(padL + dimX), + "INIT_XEND": int(padL + dimX + padR - 1), + "INIT_YON": int(padT), + "INIT_YOFF": int(padT + dimY), + "INIT_YEND": int(padT + dimY + padB - 1), + "STREAM_BITS": int(stream_bits), + } + return code_gen_dict + + def get_dynamic_config(self, ifm_dims=None, pads=None): + """Returns a configuration dict to re-configure FM dimension and + padding amounts during runtime.""" + + if ifm_dims is None: + ifm_dims = self.get_nodeattr("ImgDim") + if pads is None: + pads = self.get_nodeattr("Padding") + chans = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + idt = self.get_input_datatype() + code_gen_dict = self.get_template_values(ifm_dims, pads, chans, simd, idt) + config = { + "XON": (0 * 4, (code_gen_dict["INIT_XON"])), + "XOFF": (1 * 4, (code_gen_dict["INIT_XOFF"])), + "XEND": (2 * 4, (code_gen_dict["INIT_XEND"])), + "YON": (3 * 4, (code_gen_dict["INIT_YON"])), + "YOFF": (4 * 4, (code_gen_dict["INIT_YOFF"])), + "YEND": (5 * 4, (code_gen_dict["INIT_YEND"])), + } + return config + + def generate_hdl(self): + rtlsrc = os.environ["FINN_ROOT"] + "/finn-rtllib/fmpadding/hdl" + template_path = rtlsrc + "/fmpadding_template.v" + dims = self.get_nodeattr("ImgDim") + pads = self.get_nodeattr("Padding") + chans = self.get_nodeattr("NumChannels") + simd = self.get_nodeattr("SIMD") + idt = self.get_input_datatype() + code_gen_dict = self.get_template_values(dims, pads, chans, simd, idt) + # save top module name so we can refer to it after this node has been renamed + # (e.g. by GiveUniqueNodeNames(prefix) during MakeZynqProject) + self.set_nodeattr("gen_top_module", self.get_verilog_top_module_name()) + + # apply code generation to templates + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + with open(template_path, "r") as f: + template = f.read() + for key_name in code_gen_dict: + key = "$%s$" % key_name + template = template.replace(key, str(code_gen_dict[key_name])) + + with open( + os.path.join(code_gen_dir, self.get_verilog_top_module_name() + ".v"), + "w", + ) as f: + f.write(template) + + sv_files = ["fmpadding_axi.sv", "fmpadding.sv", "axi2we.sv"] + for sv_file in sv_files: + shutil.copy(rtlsrc + "/" + sv_file, code_gen_dir) + # set ipgen_path and ip_path so that HLS-Synth transformation + # and stich_ip transformation do not complain + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + + def prepare_rtlsim(self): + """Creates a Verilator emulation library for the RTL code generated + for this node, sets the rtlsim_so attribute to its path and returns + a PyVerilator wrapper around it.""" + # Modified to use generated (System-)Verilog instead of HLS output products + + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + verilog_paths = [code_gen_dir] + verilog_files = [ + "fmpadding_axi.sv", + "fmpadding.sv", + "axi2we.sv", + self.get_nodeattr("gen_top_module") + ".v", + ] + + # build the Verilator emu library + sim = PyVerilator.build( + verilog_files, + build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), + verilog_path=verilog_paths, + trace_depth=get_rtlsim_trace_depth(), + top_module_name=self.get_verilog_top_module_name(), + ) + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", sim.lib._name) + return sim + + def code_generation_ipi(self): + """Constructs and returns the TCL for node instantiation in Vivado IPI.""" + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + + sourcefiles = [ + "fmpadding_axi.sv", + "fmpadding.sv", + "axi2we.sv", + self.get_nodeattr("gen_top_module") + ".v", + ] + + sourcefiles = [os.path.join(code_gen_dir, f) for f in sourcefiles] + + cmd = [] + for f in sourcefiles: + cmd += ["add_files -norecurse %s" % (f)] + cmd += [ + "create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) + ] + return cmd + + def code_generation_ipgen(self, model, fpgapart, clk): + """Normally: Generates C++ code and tcl script for IP generation. + Here: Generates (System-)Verilog code for IP generation.""" + self.generate_hdl() + + def ipgen_singlenode_code(self): + """Normally: Builds the bash script for IP generation.""" + pass + + def code_generation_cppsim(self, model): + """Normally: Generates C++ code for simulation (cppsim).""" + pass + + def compile_singlenode_code(self): + pass + + def global_includes(self): + pass + + def defines(self, var): + pass + + def read_npy_data(self): + pass + + def strm_decl(self): + pass + + def docompute(self): + pass + + def dataoutstrm(self): + pass + + def save_as_npy(self): + pass + + def blackboxfunction(self): + pass + + def pragmas(self): + pass diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index 525af7ea920e1c8809ce9cd53e628dd756cfdad4..7b8a1bf6b83175cfda041cfc49a22273fd696d8e 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -117,8 +117,12 @@ class InferConvInpGen(Transformation): ConvInpGen_idim_h = odim_padding_h ConvInpGen_idim_w = odim_padding_w + padding_optype = ( + "FMPadding_rtl" if self.use_rtl_variant else "FMPadding_Batch" + ) + padding_node = helper.make_node( - "FMPadding_Batch", + padding_optype, [i2c_input], [padding_out], domain="finn.custom_op.fpgadataflow", diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py index 3af34eba8eb709099474426b665f295f21e0ce40..73df52f890d227137ea076804d161206e66653dc 100644 --- a/src/finn/transformation/streamline/absorb.py +++ b/src/finn/transformation/streamline/absorb.py @@ -492,6 +492,8 @@ class AbsorbConsecutiveTransposes(Transformation): if node.op_type == "Transpose": next_nodes = model.find_consumers(node.output[0]) perms1 = list(get_by_name(node.attribute, "perm").ints) + if len(next_nodes) == 0: + continue # check if all nodes after fork are opposite transposes all_opposite_transposes = True for next_node in next_nodes: diff --git a/src/finn/util/create.py b/src/finn/util/create.py index a8c2e67b385b797905cd4c5a196091069898b583..ed3e1a843eca47d2e20e9ca1c9df0d2d6f5a8a13 100644 --- a/src/finn/util/create.py +++ b/src/finn/util/create.py @@ -30,7 +30,11 @@ import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper -from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor +from qonnx.util.basic import ( + calculate_signed_dot_prod_range, + gen_finn_dt_tensor, + qonnx_make_model, +) def hls_random_mlp_maker(layer_spec): @@ -84,7 +88,7 @@ def hls_mlp_maker(layer_spec): graph = helper.make_graph(nodes=[], name="mlp", inputs=[], outputs=[]) - model = helper.make_model(graph, producer_name="finn") + model = qonnx_make_model(graph, producer_name="finn") model = ModelWrapper(model) for lyr in layer_spec: diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py index 49ee32c71ee941ff7435d4c12ccadae3f8e55c5e..f5edabbd4ba029899239cc2f40dd6a94d178eafd 100644 --- a/tests/fpgadataflow/test_code_gen_trafo.py +++ b/tests/fpgadataflow/test_code_gen_trafo.py @@ -32,7 +32,7 @@ import os from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper -from qonnx.util.basic import gen_finn_dt_tensor, get_by_name +from qonnx.util.basic import gen_finn_dt_tensor, get_by_name, qonnx_make_model from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim @@ -70,7 +70,7 @@ def test_code_gen_trafo(): nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="fclayer-model") + model = qonnx_make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py index 9bafb101cedabc99d97356069c883cab4ed8a87f..d04b68a56ba7fc5f01e1eef57075636954f86843 100644 --- a/tests/fpgadataflow/test_compilation_trafo.py +++ b/tests/fpgadataflow/test_compilation_trafo.py @@ -32,7 +32,7 @@ import os from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper -from qonnx.util.basic import gen_finn_dt_tensor, get_by_name +from qonnx.util.basic import gen_finn_dt_tensor, get_by_name, qonnx_make_model from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim @@ -71,7 +71,7 @@ def test_compilation_trafo(): nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="fclayer-model") + model = qonnx_make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py index 7b3e20616410f54e4718290baec9a510a0d49c0d..98a7c76ee4de0332586772ba7c1007ee55979a51 100644 --- a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py @@ -38,7 +38,7 @@ from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls @@ -121,7 +121,7 @@ def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, use_rtl_swg, exec_ helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] - modelproto = helper.make_model( + modelproto = qonnx_make_model( helper.make_graph( name="conv_test", inputs=[top_in], diff --git a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py index 0f19b6d79ab0ed77981022f286fabd430094d69f..089d1ae420f4fab744fcda5950d88b13216b4c93 100644 --- a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py @@ -35,7 +35,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls @@ -57,7 +57,7 @@ def make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape): outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, ishape) p0 = helper.make_tensor_value_info("p0", TensorProto.FLOAT, pshape) - model = helper.make_model( + model = qonnx_make_model( helper.make_graph( name="test", inputs=[inp], diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py index 0760ff9b37487f4a1ac06853055d2e47b7269f9e..3512c39cb3fab04e4e4225728c9495b546b7c655 100755 --- a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py +++ b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py @@ -39,7 +39,7 @@ from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls @@ -149,7 +149,7 @@ def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): "Flatten", ["thres1_out"], ["flatten_out"], axis=1 ) - modelproto = helper.make_model( + modelproto = qonnx_make_model( helper.make_graph( name="test", inputs=[global_in], diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py index 8c9f110c315089ec03354863bf2213963197217a..de31ef0f125cb96ea82f953eadb9d5ccf7aab16c 100644 --- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py @@ -38,7 +38,7 @@ from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls @@ -107,7 +107,7 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, use_rtl_swg, exec_mod helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] - modelproto = helper.make_model( + modelproto = qonnx_make_model( helper.make_graph( name="conv_test", inputs=[top_in], @@ -175,8 +175,11 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, use_rtl_swg, exec_mod assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0 - if pad == 1: - padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0] + if pad: + if use_rtl_swg: + padding_node = new_model.get_nodes_by_op_type("FMPadding_rtl")[0] + else: + padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0] padding_inst = getCustomOp(padding_node) assert padding_inst.get_nodeattr("SIMD") == in_chn diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py index 79a48793e0c4f062654e43aadcaf09ebf6d7da5b..c837a46a7ca7dcab6628cbf16373161b7b9ab9c2 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py @@ -43,7 +43,7 @@ from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.insert_topk import InsertTopK -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls @@ -123,7 +123,7 @@ def make_model(ch, ifmdim): outputs=[outp], ) - model = helper.make_model(graph, producer_name="add-model") + model = qonnx_make_model(graph, producer_name="add-model") model = ModelWrapper(model) # set initializers for scalar add/mul nodes diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py index ef9bd7a13dcecf7aa61ecb982ac6393d7813a4d5..6d628c9e53828fef88028bdc115bd64b0292dfed 100644 --- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py +++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py @@ -35,7 +35,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls @@ -78,7 +78,7 @@ def make_single_maxpool_modelwrapper( nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="mp-model") + model = qonnx_make_model(graph, producer_name="mp-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) @@ -112,7 +112,7 @@ def make_single_quantavpool_modelwrapper(k, stride, ifm_ch, ifm_dim, ofm_dim, id nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="mp-model") + model = qonnx_make_model(graph, producer_name="mp-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py index 5228ade3d0f4db3bd99f5fcccb7aee41f57ed73b..8ab22bcfdcb0312bd49677f0e00d8e97cdcad3c1 100644 --- a/tests/fpgadataflow/test_depthwise_convolution.py +++ b/tests/fpgadataflow/test_depthwise_convolution.py @@ -37,7 +37,11 @@ from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor +from qonnx.util.basic import ( + calculate_signed_dot_prod_range, + gen_finn_dt_tensor, + qonnx_make_model, +) import finn.core.onnx_exec as oxe from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim @@ -123,7 +127,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding): outputs=[global_out], value_info=value_info, ) - model = oh.make_model(graph, producer_name="lowered_dw_cnv-model") + model = qonnx_make_model(graph, producer_name="lowered_dw_cnv-model") model = ModelWrapper(model) # initialize model diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py index 6d881f45b60384d9a78b5d9f9705581a10b48e6c..1ad2c26610c99c46bde4c05ed156a81b122aba53 100644 --- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py @@ -34,7 +34,7 @@ from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -68,7 +68,7 @@ def make_addstreams_modelwrapper(ch, pe, idt): outputs=[outp], ) - model = helper.make_model(graph, producer_name="addstreams-model") + model = qonnx_make_model(graph, producer_name="addstreams-model") model = ModelWrapper(model) model.set_tensor_datatype("inp1", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py index ceafda90e54004c7aea8786d003b6adf1defab35..13fab9a47f15999c184680b9db04494787889881 100644 --- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py +++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py @@ -34,7 +34,7 @@ from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -73,7 +73,7 @@ def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs): ) graph = helper.make_graph(nodes=[node], name="graph", inputs=[inp], outputs=[outp]) - model = helper.make_model(graph, producer_name="model") + model = qonnx_make_model(graph, producer_name="model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_checksum.py b/tests/fpgadataflow/test_fpgadataflow_checksum.py index 495fcd10b6a977c6b0917ac37b58ec5595185c25..cd404f5a6332d77f17ec69c47b53c8c893f28607 100644 --- a/tests/fpgadataflow/test_fpgadataflow_checksum.py +++ b/tests/fpgadataflow/test_fpgadataflow_checksum.py @@ -36,7 +36,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.core.rtlsim_exec import rtlsim_exec @@ -115,7 +115,7 @@ def create_two_fc_model(): value_info=[mid], ) - model = helper.make_model(graph, producer_name="fclayer-model") + model = qonnx_make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py index a196ecbb61b74843ddc8efa4ac3c5ab8197e64fe..3cfff9ac34ae47bdc072bca9f6ca0fffeea756c5 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py @@ -34,7 +34,7 @@ from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -73,7 +73,7 @@ def make_single_im2col_modelwrapper( nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="im2col-model") + model = qonnx_make_model(graph, producer_name="im2col-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) @@ -117,7 +117,7 @@ def make_single_slidingwindow_modelwrapper( outputs=[outp], ) - model = helper.make_model(graph, producer_name="slidingwindow-model") + model = qonnx_make_model(graph, producer_name="slidingwindow-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py index 0fc3ca82cfa919079a324160e4876377ac4dc3b4..f467f37618bbee6359bb7b7dfa963e3d8785d0c9 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py @@ -35,7 +35,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -82,7 +82,7 @@ def make_single_im2col_modelwrapper( nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="im2col-model") + model = qonnx_make_model(graph, producer_name="im2col-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) @@ -133,7 +133,7 @@ def make_single_slidingwindow_modelwrapper( outputs=[outp], ) - model = helper.make_model(graph, producer_name="slidingwindow-model") + model = qonnx_make_model(graph, producer_name="slidingwindow-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py index 007360a5fd0b74ee49d54c84f332061dd5f3a114..58fc5ec04cc471b0e8f201e235ac9bd033e3f5c4 100755 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py @@ -33,7 +33,7 @@ from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.fpgadataflow.prepare_ip import PrepareIP @@ -72,7 +72,7 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilatio nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="im2col-model") + model = qonnx_make_model(graph, producer_name="im2col-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) @@ -124,7 +124,7 @@ def make_single_slidingwindow_modelwrapper( outputs=[outp], ) - model = helper.make_model(graph, producer_name="slidingwindow-model") + model = qonnx_make_model(graph, producer_name="slidingwindow-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py new file mode 100644 index 0000000000000000000000000000000000000000..7f7bf649a9284e7716aec5adfb91957fdabb55d5 --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py @@ -0,0 +1,617 @@ +# Copyright (c) 2022, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import copy +import numpy as np +import onnx.parser as oprs +import os +from onnx import TensorProto, helper +from pyverilator.util.axi_utils import axilite_write, reset_rtlsim +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.general.im2col import compute_conv_output_dim +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from qonnx.transformation.infer_datatypes import InferDataTypes +from qonnx.transformation.infer_shapes import InferShapes +from qonnx.transformation.lower_convs_to_matmul import ( + LowerConvsToMatMul, + _auto_pad_to_explicit_padding, +) +from qonnx.util.basic import gen_finn_dt_tensor, get_by_name, qonnx_make_model + +import finn.core.onnx_exec as oxe +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +import finn.transformation.streamline.absorb as absorb +from finn.core.onnx_exec import execute_onnx +from finn.core.rtlsim_exec import rtlsim_exec +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_dwc import InsertDWC +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.util.basic import pyverilate_get_liveness_threshold_cycles + + +def create_conv_model( + idim_h, idim_w, ifm, k, stride, ofm, idt, wdt, pad_mode, depthwise +): + np.random.seed(0) + group = ifm if depthwise else 1 + group_str = str(group) + ishp = (1, ifm, idim_h, idim_w) + pad_0 = _auto_pad_to_explicit_padding( + pad_mode, idim_h, idim_w, k, k, stride, stride, 2 + ) + int_dim_h = compute_conv_output_dim( + idim_h, k, stride, total_pad=pad_0[0] + pad_0[2] + ) + int_dim_w = compute_conv_output_dim( + idim_w, k, stride, total_pad=pad_0[1] + pad_0[3] + ) + + pad_1 = _auto_pad_to_explicit_padding( + pad_mode, int_dim_h, int_dim_w, k, k, stride, stride, 2 + ) + odim_h = compute_conv_output_dim( + int_dim_h, k, stride, total_pad=pad_1[0] + pad_1[2] + ) + odim_w = compute_conv_output_dim( + int_dim_w, k, stride, total_pad=pad_1[1] + pad_1[3] + ) + oshp = (1, ifm, odim_h, odim_w) if depthwise else (1, ofm, odim_h, odim_w) + wshp = (ifm, 1, k, k) if depthwise else (ofm, ifm, k, k) + wshp_1 = (ifm, 1, k, k) if depthwise else (ofm, ofm, k, k) + ishp_str = str(list(ishp)) + oshp_str = str(list(oshp)) + wshp_str = str(list(wshp)) + wshp_1_str = str(list(wshp_1)) + kshp_str = str([k, k]) + pad_0_str = str(list(pad_0)) + pad_1_str = str(list(pad_1)) + stride_str = str([stride, stride]) + dil_str = str([1, 1]) + + input = f""" + < + ir_version: 7, + opset_import: ["" : 9] + > + agraph (float{ishp_str} in0) => (float{oshp_str} out0) + < + float{wshp_str} param_c0_weight, + float{wshp_1_str} param_c1_weight + > + {{ + conv0 = Conv< + dilations={dil_str},group={group_str},kernel_shape={kshp_str},pads={pad_0_str}, + strides={stride_str} + >(in0, param_c0_weight) + out0 = Conv< + dilations={dil_str},group={group_str},kernel_shape={kshp_str},pads={pad_1_str}, + strides={stride_str} + >(conv0, param_c1_weight) + }} + """ + model = oprs.parse_model(input) + model = ModelWrapper(model) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model.set_tensor_datatype("in0", idt) + model.set_tensor_datatype("param_c0_weight", wdt) + model.set_tensor_datatype("param_c1_weight", wdt) + model.set_initializer("param_c0_weight", gen_finn_dt_tensor(wdt, wshp)) + model.set_initializer("param_c1_weight", gen_finn_dt_tensor(wdt, wshp_1)) + return model + + +def update_conv_model_dims(model, idim_new_h, idim_new_w): + cnode = model.get_nodes_by_op_type("Conv")[0] + k, _ = get_by_name(cnode.attribute, "kernel_shape").ints + stride, _ = get_by_name(cnode.attribute, "strides").ints + ishp = model.get_tensor_shape("in0") + n, ci, _, _ = ishp + n, co, _, _ = model.get_tensor_shape("out0") + int_dim_h = compute_conv_output_dim(idim_new_h, k, stride) + int_dim_w = compute_conv_output_dim(idim_new_w, k, stride) + odim_h = compute_conv_output_dim(int_dim_h, k, stride) + odim_w = compute_conv_output_dim(int_dim_w, k, stride) + model.set_tensor_shape("in0", (n, ci, idim_new_h, idim_new_w)) + model.set_tensor_shape("out0", (n, co, odim_h, odim_w)) + # remove all existing shapes + del model.graph.value_info[:] + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return model + + +# Helper function to update tensor dimensions manually because shape inference +# does not work on FINN nodes (they assume well-defined tensor shapes). +def update_tensor_dim(model, tensor_name, new_hw): + shape = model.get_tensor_shape(tensor_name) + shape[1] = new_hw[0] + shape[2] = new_hw[1] + model.set_tensor_shape(tensor_name, shape) + + +# Helper function that delivers the hook to program the SWG via AXI-Lite +def config_hook(configs): + if configs is None: + return None + + def write_swg_config(sim): + reset_rtlsim(sim) + for axi_name, config in configs: + # Write config registers to the SWG/FMPadding dict + # defines (addr, value) tuples + for config_entry in config.values(): + axilite_write(sim, config_entry[0], config_entry[1], basename=axi_name) + reset_rtlsim(sim) + + return write_swg_config + + +cfg0 = { + "idims": [(32, 32), (8, 8)], + "ifm": 64, + "k": 3, + "stride": 1, + "ofm": 64, + "depthwise": True, + "pad_mode": "SAME_UPPER", +} +cfg1 = { + "idims": [(32, 16), (16, 8)], + "ifm": 4, + "k": 4, + "stride": 1, + "ofm": 8, + "depthwise": False, + "pad_mode": "SAME_UPPER", +} +cfg2 = { + "idims": [(64, 128), (2, 4)], + "ifm": 64, + "k": 3, + "stride": 1, + "ofm": 64, + "depthwise": True, + "pad_mode": "SAME_UPPER", +} + + +@pytest.mark.parametrize("cfg", [cfg0, cfg1, cfg2]) +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.fpgadataflow +def test_fpgadataflow_conv_dynamic(cfg): + pad_mode = cfg["pad_mode"] + depthwise = cfg["depthwise"] + idims = cfg["idims"] + ifm = cfg["ifm"] + k = cfg["k"] + stride = cfg["stride"] + ofm = cfg["ofm"] + idt = DataType["UINT4"] + wdt = DataType["INT2"] + exp_cfgs = [] + largest_model = None + for idim in idims: + idim_h, idim_w = idim + ishp = (1, ifm, idim_h, idim_w) + np.random.seed(0) + inp = gen_finn_dt_tensor(idt, ishp) + model = create_conv_model( + idim_h, idim_w, ifm, k, stride, ofm, idt, wdt, pad_mode, depthwise + ) + _, _, int_dim_h, int_dim_w = model.get_tensor_shape("conv0") + _, _, odim_h, odim_w = model.get_tensor_shape("out0") + pad0 = get_by_name(model.graph.node[0].attribute, "pads").ints + pad1 = get_by_name(model.graph.node[1].attribute, "pads").ints + if idim == max(idims): + # use largest model for hardware conversion + largest_model = copy.deepcopy(model) + golden = execute_onnx(model, {"in0": inp})["out0"] + exp_cfg = ( + (idim_h, idim_w), + (int_dim_h, int_dim_w), + (odim_h, odim_w), + pad0, + pad1, + inp, + golden, + ) + exp_cfgs.append(exp_cfg) + + # convert to hardware and prepare simulation + model = largest_model.transform(LowerConvsToMatMul()) + model = model.transform(to_hls.InferConvInpGen(use_rtl_variant=True)) + model = model.transform( + to_hls.InferQuantizedMatrixVectorActivation(mem_mode="decoupled") + ) + model = model.transform(to_hls.InferVectorVectorActivation()) + model = model.transform(absorb.AbsorbConsecutiveTransposes()) + parent_model = model.transform(CreateDataflowPartition()) + sdp_inst = getCustomOp( + parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + ) + model = ModelWrapper(sdp_inst.get_nodeattr("model")) + assert len(model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl")) == 2 + if pad_mode == "VALID": + assert len(model.get_nodes_by_op_type("FMPadding_rtl")) == 0 + else: + assert len(model.get_nodes_by_op_type("FMPadding_rtl")) == 2 + dyn_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl") + dyn_nodes += model.get_nodes_by_op_type("FMPadding_rtl") + for swg_node in dyn_nodes: + getCustomOp(swg_node).set_nodeattr("SIMD", 4) + getCustomOp(swg_node).set_nodeattr("dynamic_mode", 1) + getCustomOp(swg_node).set_nodeattr("inFIFODepths", [16]) + getCustomOp(swg_node).set_nodeattr("outFIFODepths", [16]) + comp_nodes = model.get_nodes_by_op_type("MatrixVectorActivation") + comp_nodes += model.get_nodes_by_op_type("VectorVectorActivation") + for comp_node in comp_nodes: + if depthwise: + getCustomOp(comp_node).set_nodeattr("PE", 4) + else: + getCustomOp(comp_node).set_nodeattr("SIMD", 4) + getCustomOp(comp_node).set_nodeattr("PE", 4) + model = model.transform(InsertDWC()) + model = model.transform(InsertFIFO(create_shallow_fifos=True)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5)) + model.set_metadata_prop("exec_mode", "rtlsim") + + # loop through experiment configurations + for exp_cfg in exp_cfgs: + ( + (idim_h, idim_w), + (int_dim_h, int_dim_w), + (odim_h, odim_w), + pad0, + pad1, + inp, + golden, + ) = exp_cfg + conv0_idim_h = idim_h + pad0[0] + pad0[2] + conv0_idim_w = idim_w + pad0[1] + pad0[3] + conv1_idim_h = int_dim_h + pad1[0] + pad1[2] + conv1_idim_w = int_dim_w + pad1[1] + pad1[3] + # get config for the new dimensions + swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl") + swg0 = getCustomOp(swg_nodes[0]) + update_tensor_dim(model, swg0.onnx_node.input[0], (conv0_idim_h, conv0_idim_w)) + update_tensor_dim(model, swg0.onnx_node.output[0], (int_dim_h, int_dim_w)) + swg_config0 = swg0.get_dynamic_config((conv0_idim_h, conv0_idim_w)) + swg1 = getCustomOp(swg_nodes[1]) + update_tensor_dim(model, swg1.onnx_node.input[0], (conv1_idim_h, conv1_idim_w)) + update_tensor_dim(model, swg1.onnx_node.output[0], (odim_h, odim_w)) + swg_config1 = swg1.get_dynamic_config((conv1_idim_h, conv1_idim_w)) + if pad_mode != "VALID": + pad_nodes = model.get_nodes_by_op_type("FMPadding_rtl") + padder0 = getCustomOp(pad_nodes[0]) + update_tensor_dim(model, padder0.onnx_node.input[0], (idim_h, idim_w)) + update_tensor_dim( + model, padder0.onnx_node.output[0], (conv0_idim_h, conv0_idim_w) + ) + pad_config0 = padder0.get_dynamic_config((idim_h, idim_w), pad0) + padder1 = getCustomOp(pad_nodes[1]) + update_tensor_dim(model, padder1.onnx_node.input[0], (int_dim_h, int_dim_w)) + update_tensor_dim( + model, padder1.onnx_node.output[0], (conv1_idim_h, conv1_idim_w) + ) + pad_config1 = padder1.get_dynamic_config((int_dim_h, int_dim_w), pad1) + configs = [ + ("s_axilite_0_", pad_config0), + ("s_axilite_1_", swg_config0), + ("s_axilite_2_", pad_config1), + ("s_axilite_3_", swg_config1), + ] + else: + configs = [("s_axilite_0_", swg_config0), ("s_axilite_1_", swg_config1)] + # adjust folded shapes for I/O FIFOs + # (since rtlsim_exec uses folded shape info to fold global i/o tensors) + first_node = getCustomOp(model.graph.node[0]) + first_node_shp = list(first_node.get_folded_input_shape()) + first_node_shp[1] = idim_h + first_node_shp[2] = idim_w + first_node.set_nodeattr("folded_shape", first_node_shp) + update_tensor_dim(model, first_node.onnx_node.input[0], (idim_h, idim_w)) + last_node = getCustomOp(model.graph.node[-1]) + last_node_shp = list(last_node.get_folded_output_shape()) + last_node_shp[1] = odim_h + last_node_shp[2] = odim_w + update_tensor_dim(model, last_node.onnx_node.output[0], (odim_h, odim_w)) + last_node.set_nodeattr("folded_shape", last_node_shp) + ctx = {"global_in": inp.transpose(0, 2, 3, 1)} + liveness_prev = pyverilate_get_liveness_threshold_cycles() + os.environ["LIVENESS_THRESHOLD"] = "100000" + rtlsim_exec(model, ctx, pre_hook=config_hook(configs)) + os.environ["LIVENESS_THRESHOLD"] = str(liveness_prev) + ret = ctx["global_out"].transpose(0, 3, 1, 2) + assert np.isclose(golden, ret).all() + + +def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt): + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + ofm_dim_h, ofm_dim_w = ofm_dim + + odt = idt + inp = helper.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] + ) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch] + ) + + im2col_node = helper.make_node( + "Im2Col", + ["inp"], + ["outp"], + domain="finn.custom_op.general", + stride=[stride_h, stride_w], + kernel_size=[k_h, k_w], + input_shape=str((1, ifm_dim_h, ifm_dim_w, ifm_ch)), + dilations=[dilation_h, dilation_w], + pad_amount=[0, 0, 0, 0], + pad_value=0, + ) + graph = helper.make_graph( + nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp] + ) + + model = qonnx_make_model(graph, producer_name="im2col-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + + return model + + +def make_single_slidingwindow_modelwrapper( + k, ifm_ch, ifm_dim, ofm_dim, simd, m, parallel_window, stride, dilation, idt, dw=0 +): + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + ofm_dim_h, ofm_dim_w = ofm_dim + + odt = idt + inp = helper.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] + ) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch] + ) + + SlidingWindow_node = helper.make_node( + "ConvolutionInputGenerator_rtl", + ["inp"], + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + ConvKernelDim=[k_h, k_w], + IFMChannels=ifm_ch, + IFMDim=[ifm_dim_h, ifm_dim_w], + OFMDim=[ofm_dim_h, ofm_dim_w], + SIMD=simd, + M=m, + parallel_window=parallel_window, + Stride=[stride_h, stride_w], + Dilation=[dilation_h, dilation_w], + inputDataType=idt.name, + outputDataType=odt.name, + depthwise=dw, + dynamic_mode=1, + ) + graph = helper.make_graph( + nodes=[SlidingWindow_node], + name="slidingwindow_graph", + inputs=[inp], + outputs=[outp], + ) + + model = qonnx_make_model(graph, producer_name="slidingwindow-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + + return model + + +def prepare_inputs(input_tensor): + return {"inp": input_tensor} + + +# input datatype +@pytest.mark.parametrize("idt", [DataType["UINT4"]]) +# kernel size +@pytest.mark.parametrize("k", [[3, 3]]) +# input dimension +@pytest.mark.parametrize("ifm_dim_series", [[[32, 32], [16, 16], [8, 8]]]) +# input channels +@pytest.mark.parametrize("ifm_ch", [6]) +# Stride +@pytest.mark.parametrize("stride", [[1, 1]]) +# Dilation +@pytest.mark.parametrize("dilation", [[1, 1]]) +# depthwise +@pytest.mark.parametrize("dw", [0, 1]) +# input channel parallelism ("SIMD") +@pytest.mark.parametrize("simd", [2, 6]) +# parallel_window enable (MMV_out = M*K) +@pytest.mark.parametrize("parallel_window", [0]) +# in/out MMV ("M") +@pytest.mark.parametrize("m", [1]) +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.fpgadataflow +def test_fpgadataflow_slidingwindow_rtl_dynamic( + idt, k, ifm_dim_series, ifm_ch, stride, dilation, dw, simd, m, parallel_window +): + # Begin test by generating RTL SWG normally for the first FM of the series. + # The following FM dimensions must be equal or smaller than the initial + # dimensions (in terms of required buffer depth). + ifm_dim = ifm_dim_series[0] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip( + "Illegal convolution configuration: kernel or stride > FM dimension" + ) + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip( + "Illegal convolution configuration: kernel or stride > FM dimension" + ) + if (k_h == 1 and (stride_h != 1 or dilation_h != 1)) or ( + k_w == 1 and (stride_w != 1 or dilation_w != 1) + ): + pytest.skip( + """Illegal convolution configuration: + stride or dilation defined for unitary kernel dim""" + ) + if k_h == 1 and k_w == 1 and simd != ifm_ch: + pytest.skip("1x1 Kernel only supported in parallel mode (SIMD=C)") + if parallel_window and simd != ifm_ch: + pytest.skip("Parallel window requires SIMD=C") + + model = make_single_slidingwindow_modelwrapper( + k=k, + ifm_ch=ifm_ch, + ifm_dim=ifm_dim, + ofm_dim=ofm_dim, + simd=simd, + m=m, + parallel_window=parallel_window, + stride=stride, + dilation=dilation, + idt=idt, + dw=dw, + ) + + # Simulate using stitched-ip-rtlsim so we can use existing infrastructure + # that supports hook functions to re-program configuration before rtlsim + model = model.transform(InsertFIFO(True)) # required for proper simulation + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5)) + model.set_metadata_prop("exec_mode", "rtlsim") + + # Simulate 1 FM for each dimension in the series + for i, ifm_dim in enumerate(ifm_dim_series): + ifm_dim_h, ifm_dim_w = ifm_dim + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + configs = None + if i > 0: # skip re-programming for initial FM dimension + # Necessary update of node and tensor attributes to make rtlsim work: + swg_node = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl")[0] + swg_inst = getCustomOp(swg_node) + update_tensor_dim(model, swg_node.input[0], ifm_dim) + update_tensor_dim(model, swg_node.output[0], ofm_dim) + + # Generate config, also overwrites IFMDim/OFMDim attributes: + config = swg_inst.get_dynamic_config(ifm_dim) + configs = [("s_axilite_0_", config)] + + # Also update FIFO nodes and corresponding tensors + fifo_node = model.get_nodes_by_op_type("StreamingFIFO")[0] + fifo_inst = getCustomOp(fifo_node) + shape = fifo_inst.get_nodeattr("folded_shape") + shape[1] = ifm_dim_h + shape[2] = ifm_dim_w + fifo_inst.set_nodeattr("folded_shape", shape) + update_tensor_dim(model, fifo_node.input[0], ifm_dim) + + fifo_node = model.get_nodes_by_op_type("StreamingFIFO")[1] + fifo_inst = getCustomOp(fifo_node) + shape = fifo_inst.get_nodeattr("folded_shape") + shape[1] = ofm_dim_h + shape[2] = ofm_dim_w + fifo_inst.set_nodeattr("folded_shape", shape) + update_tensor_dim(model, fifo_node.output[0], ofm_dim) + + # Run rtlsim on stitched-ip + x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) + context = prepare_inputs(x) + rtlsim_exec(model, context, pre_hook=config_hook(configs)) + y_produced = context["outp"] + + # Generate golden result + golden = make_single_im2col_modelwrapper( + k=k, + ifm_ch=ifm_ch, + ifm_dim=ifm_dim, + ofm_dim=ofm_dim, + stride=stride, + dilation=dilation, + idt=idt, + ) + input_dict = prepare_inputs(x) + y_expected = oxe.execute_onnx(golden, input_dict)["outp"] + + # Check result + if dw == 0: + assert (y_produced == y_expected).all() + else: + y_expected = y_expected.reshape( + 1, ofm_dim_h, ofm_dim_w, k_h * k_w, ifm_ch // simd, simd + ) + y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) + y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w) + assert (y_produced == y_expected).all() diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py index 7ec254405d23f0a972de7f9d02d2ab021ed3d959..441bbce50a8a218185f93a7968767abe2541ed15 100644 --- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py @@ -36,7 +36,7 @@ from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -76,7 +76,7 @@ def make_dupstreams_modelwrapper(ch, pe, idim, idt, n_dupl): nodes=[dupstrm_node], name="graph", inputs=[inp], outputs=out_vi ) - model = helper.make_model(graph, producer_name="addstreams-model") + model = qonnx_make_model(graph, producer_name="addstreams-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index 695a5f902ce7bf3c22bfd46dc264dda4bfceb15f..2bde148a1499e4c7065ab1e151e3c4198e1e96da 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -32,7 +32,7 @@ from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP @@ -63,7 +63,7 @@ def make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_styl nodes=[DWC_node], name="dwc_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="dwc-model") + model = qonnx_make_model(graph, producer_name="dwc-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", finn_dtype) diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py index b9c74185d9f104e15355a5dd6021d7e74dac641e..efdb3bf6aaab23fec67055ae28b2e285f1a32b6a 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fifo.py +++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py @@ -33,7 +33,7 @@ from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP @@ -66,7 +66,7 @@ def make_single_fifo_modelwrapper(Shape, Depth, fld_shape, finn_dtype): nodes=[FIFO_node], name="fifo_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="fifo-model") + model = qonnx_make_model(graph, producer_name="fifo-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", finn_dtype) diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py index 34928ce45be0fd96d27b153ae28e2128bf306bb5..b95409fda87718f30a74bad88697c3dbad0bf98f 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py +++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py @@ -36,7 +36,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -53,7 +53,7 @@ test_fpga_part = pynq_part_map[test_pynq_board] target_clk_ns = 10 -def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt): +def make_single_fmpadding_modelwrapper(optype, idim, padding, num_ch, simd, idt): pad_h = padding[0] + padding[2] pad_w = padding[1] + padding[3] idim_h, idim_w = idim @@ -70,7 +70,7 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt): ) FMPadding = helper.make_node( - "FMPadding_Batch", + optype, ["inp"], ["outp"], domain="finn.custom_op.fpgadataflow", @@ -87,7 +87,7 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt): nodes=[FMPadding], name="fmpadding_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="fmpadding-model") + model = qonnx_make_model(graph, producer_name="fmpadding-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) @@ -110,10 +110,14 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt): @pytest.mark.parametrize("idt", [DataType["INT2"], DataType["INT4"]]) # execution mode @pytest.mark.parametrize("mode", ["cppsim", "rtlsim"]) +# implementation style +@pytest.mark.parametrize("impl_style", ["rtl", "hls"]) @pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, idt, mode): +def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, idt, mode, impl_style): + if impl_style == "rtl" and mode == "cppsim": + pytest.skip("rtl implstyle has no cppsim, skipping") if num_ch % simd != 0: pytest.skip(" num_ch % simd != 0, skipping") @@ -127,7 +131,9 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, idt, mode): odim_h = idim_h + pad_h odim_w = idim_w + pad_w - model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt) + optype = {"hls": "FMPadding_Batch", "rtl": "FMPadding_rtl"}[impl_style] + + model = make_single_fmpadding_modelwrapper(optype, idim, pad, num_ch, simd, idt) model = model.transform(InferShapes()) model = model.transform(SetExecMode(mode)) model = model.transform(GiveUniqueNodeNames()) @@ -138,6 +144,7 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, idt, mode): model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) + y_produced = oxe.execute_onnx(model, input_dict)["outp"] expected_oshape = (1, odim_h, odim_w, num_ch) assert y_produced.shape == expected_oshape @@ -149,7 +156,7 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, idt, mode): assert (y_produced == y_expected).all() if mode == "rtlsim": - node = model.get_nodes_by_op_type("FMPadding_Batch")[0] + node = model.get_nodes_by_op_type(optype)[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py index a37e6e3271a9f7e033e6beaa6dbed01271365101..a2c3d09a55f81dc5e9d5ae1819cd8ea6b7df1e27 100644 --- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py +++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py @@ -34,7 +34,7 @@ from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -65,7 +65,7 @@ def make_accpool_modelwrapper(ch, pe, idim, idt): nodes=[accpool_node], name="graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="thresholding-model") + model = qonnx_make_model(graph, producer_name="thresholding-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index 325470a6d6c6032249ca1dd64317fb288d3e94c9..b220338e6919e8eeaeef0f6e5343fed9b1dfca10 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -36,7 +36,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_data_layouts import InferDataLayouts -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model from finn.core.onnx_exec import execute_onnx from finn.transformation.fpgadataflow.create_dataflow_partition import ( @@ -100,7 +100,7 @@ def create_one_fc_model(mem_mode="const"): nodes=[fc0], name="fclayer_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="fclayer-model") + model = qonnx_make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) @@ -177,7 +177,7 @@ def create_two_fc_model(mem_mode="decoupled"): value_info=[mid], ) - model = helper.make_model(graph, producer_name="fclayer-model") + model = qonnx_make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py index a9b98ecaf80b4c86fc1e9ccec23e6d97c5982f55..553f263ba2e004233011db90feabea057d88026a 100644 --- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py +++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py @@ -33,7 +33,7 @@ from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim @@ -67,7 +67,7 @@ def make_labelselect_modelwrapper(labels, pe, k, idt): outputs=[outp], ) - model = helper.make_model(graph, producer_name="thresholding-model") + model = qonnx_make_model(graph, producer_name="thresholding-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_mvau.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py index a7e7eba7ee8de81ec5eebe3e270e8e1d28564a00..b80ef76a19e487a93b23ae7db17350e85fb66822 100644 --- a/tests/fpgadataflow/test_fpgadataflow_mvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py @@ -36,7 +36,11 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.multithreshold import multithreshold from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor +from qonnx.util.basic import ( + calculate_signed_dot_prod_range, + gen_finn_dt_tensor, + qonnx_make_model, +) import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -106,7 +110,7 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="fclayer-model") + model = qonnx_make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py index e3c79fa44fb57718d359b58d1a8716746f6668fb..b3cf7b4229c39f27c7f3689ef51fb7d22c7aa0f2 100644 --- a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py +++ b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py @@ -32,6 +32,7 @@ from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.general import GiveUniqueNodeNames +from qonnx.util.basic import qonnx_make_model from finn.analysis.fpgadataflow.res_estimation import ( res_estimation, @@ -87,7 +88,7 @@ def test_res_estimate(): nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="fclayer-model") + model = qonnx_make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py b/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py index a3968cf79704092ffb5ec53c887842372b625f4d..628721b429abadf198126a2f5801178f2f710033 100644 --- a/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py +++ b/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py @@ -35,7 +35,7 @@ from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -74,7 +74,7 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_ nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="mp-model") + model = qonnx_make_model(graph, producer_name="mp-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index 706679b6809844d0b2924411440088ea892ba7a9..96cd69c3453793c1634f132cb159f0cc8a94a28c 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -37,7 +37,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.multithreshold import multithreshold from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -93,7 +93,7 @@ def make_single_thresholding_modelwrapper( outputs=[outp], ) - model = helper.make_model(graph, producer_name="thresholding-model") + model = qonnx_make_model(graph, producer_name="thresholding-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py index 03ddb1286320b8178276ea53082095106a43d7a1..abf8ba0b9efde67c77711abc8451475887430cae 100644 --- a/tests/fpgadataflow/test_fpgadataflow_vvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py @@ -35,7 +35,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.multithreshold import multithreshold from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -132,7 +132,7 @@ def _make_single_vvau_modelwrapper( nodes=[VVAU_node], name="vvau_graph", inputs=[inp], outputs=[outp] ) - model = helper.make_model(graph, producer_name="vvau-model") + model = qonnx_make_model(graph, producer_name="vvau-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) diff --git a/tests/fpgadataflow/test_set_folding.py b/tests/fpgadataflow/test_set_folding.py index 8ea0e18f2cace10b6fefae50ce1e28845ab24050..5355dd7044343d9dbb077225b5b8786eb7fdfe32 100644 --- a/tests/fpgadataflow/test_set_folding.py +++ b/tests/fpgadataflow/test_set_folding.py @@ -34,6 +34,7 @@ from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames +from qonnx.util.basic import qonnx_make_model from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.transformation.fpgadataflow.create_dataflow_partition import ( @@ -91,7 +92,7 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes): outputs=[tensors[-1]], ) - model = helper.make_model(graph, producer_name="fclayer-model") + model = qonnx_make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", adt) diff --git a/tests/transformation/streamline/test_absorb_mul_into_topk.py b/tests/transformation/streamline/test_absorb_mul_into_topk.py index a6dff788dc58dba45536a280c7fe5f5c53edc4e1..89ef74e0b3f83fc092268ad2582c533e47eab618 100644 --- a/tests/transformation/streamline/test_absorb_mul_into_topk.py +++ b/tests/transformation/streamline/test_absorb_mul_into_topk.py @@ -34,6 +34,7 @@ from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNode from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.insert_topk import InsertTopK +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.absorb import AbsorbScalarMulAddIntoTopK @@ -65,7 +66,7 @@ def test_absorb_mul_into_topk(mul_positive, scalar): value_info=[a0, b0, c0], ) - model = helper.make_model(mul_graph, producer_name="mul_model") + model = qonnx_make_model(mul_graph, producer_name="mul_model") model = ModelWrapper(model) # initialize values # for mul diff --git a/tests/transformation/streamline/test_absorb_transp_into_flatten.py b/tests/transformation/streamline/test_absorb_transp_into_flatten.py index 1358d468c04c3edf08b11e7e9b858dda58965368..44b0c1d7e04447f13043cb326047a7b8d69469dd 100644 --- a/tests/transformation/streamline/test_absorb_transp_into_flatten.py +++ b/tests/transformation/streamline/test_absorb_transp_into_flatten.py @@ -8,6 +8,7 @@ from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNode from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.absorb import AbsorbTransposeIntoFlatten @@ -45,7 +46,7 @@ def test_absorb_transp_into_flatten(perm, shape, ishape, data_layout): outputs=[outp], ) - model = helper.make_model(graph, producer_name="absorb_transpose_model") + model = qonnx_make_model(graph, producer_name="absorb_transpose_model") model = ModelWrapper(model) if shape is not None: model.graph.value_info.append(shape0) diff --git a/tests/transformation/streamline/test_collapse_repeated_op.py b/tests/transformation/streamline/test_collapse_repeated_op.py index 268e0ffc5c5cb342634ff51ac8fe02157ae8c7c6..c1d3ee00883b84ec2a8c18d093b1756a4d6aea36 100644 --- a/tests/transformation/streamline/test_collapse_repeated_op.py +++ b/tests/transformation/streamline/test_collapse_repeated_op.py @@ -33,6 +33,7 @@ import onnx.helper as oh from onnx import TensorProto from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as ox from finn.transformation.streamline import CollapseRepeatedAdd, CollapseRepeatedMul @@ -46,7 +47,7 @@ def test_collapse_repeated_op(): add_param_1 = oh.make_tensor_value_info("add_param_1", TensorProto.FLOAT, [2]) mul_param_1 = oh.make_tensor_value_info("mul_param_1", TensorProto.FLOAT, [2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [2]) - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], @@ -96,7 +97,7 @@ def test_collapse_repeated_only_if_linear(test_args): value_info += [oh.make_tensor_value_info("p4", TensorProto.FLOAT, [1])] value_info += [oh.make_tensor_value_info("p5", TensorProto.FLOAT, [1])] - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], diff --git a/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py b/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py index 04ab9bf0b9c092bdf2c2a6c6268974fd78020eee..89596a1c0f4af4b95e19f3b6aba19e7f459aa7df 100644 --- a/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py +++ b/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py @@ -33,6 +33,7 @@ import onnx.helper as oh from onnx import TensorProto from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as ox from finn.transformation.streamline import FactorOutMulSignMagnitude @@ -43,7 +44,7 @@ def test_factor_out_mul_sign_magnitude(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [1, 2]) mul_param = oh.make_tensor_value_info("mul_param", TensorProto.FLOAT, [1, 2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [1, 2]) - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], diff --git a/tests/transformation/streamline/test_linear_past_eltwise.py b/tests/transformation/streamline/test_linear_past_eltwise.py index 12633d750bb405757efca0c028dece92b289b472..4e5dcd63862b61f5575d8adf2cbb69912ee726d7 100644 --- a/tests/transformation/streamline/test_linear_past_eltwise.py +++ b/tests/transformation/streamline/test_linear_past_eltwise.py @@ -35,6 +35,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.fold_constants import FoldConstants from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd @@ -78,7 +79,7 @@ def make_model(shape): outputs=[outp], ) - model = helper.make_model(graph, producer_name="add-model") + model = qonnx_make_model(graph, producer_name="add-model") model = ModelWrapper(model) # set initializers for scalar add/mul nodes @@ -156,7 +157,7 @@ def test_linear_past_eltwise_add_multiple_forks(ch, ifmdim): helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape) ] - modelproto = helper.make_model( + modelproto = qonnx_make_model( helper.make_graph( name="test", inputs=[top_in], diff --git a/tests/transformation/streamline/test_maxpool_nhwc.py b/tests/transformation/streamline/test_maxpool_nhwc.py index aa77b5cf1a6e77d67ff8351ca5f544a63eb47f29..d61eedaaf5d1f10e64712d5282190b67f56acb49 100644 --- a/tests/transformation/streamline/test_maxpool_nhwc.py +++ b/tests/transformation/streamline/test_maxpool_nhwc.py @@ -7,7 +7,7 @@ from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MakeMaxPoolNHWC @@ -56,7 +56,7 @@ def create_maxpool(ifm_dim, ifm_ch, kernel_shape, pads, strides, ceil_mode, idt) value_info=[outp_mp], ) - model = oh.make_model(graph, producer_name="maxpool_model") + model = qonnx_make_model(graph, producer_name="maxpool_model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", idt) diff --git a/tests/transformation/streamline/test_move_add_past_mul.py b/tests/transformation/streamline/test_move_add_past_mul.py index 0fb4dd9f7a116d0d52578d7222421f251ac17ec1..ea9c2a954d2bd7b4a4be421c1869d4a8dd8f0cf1 100644 --- a/tests/transformation/streamline/test_move_add_past_mul.py +++ b/tests/transformation/streamline/test_move_add_past_mul.py @@ -33,6 +33,7 @@ import onnx.helper as oh from onnx import TensorProto from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as ox from finn.transformation.streamline import MoveAddPastMul @@ -44,7 +45,7 @@ def test_move_add_past_mul_single(): add_param = oh.make_tensor_value_info("add_param", TensorProto.FLOAT, [2]) mul_param = oh.make_tensor_value_info("mul_param", TensorProto.FLOAT, [2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [2]) - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], @@ -76,7 +77,7 @@ def test_move_add_past_mul_multi(): add_param_1 = oh.make_tensor_value_info("add_param_1", TensorProto.FLOAT, [2]) mul_param_1 = oh.make_tensor_value_info("mul_param_1", TensorProto.FLOAT, [2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [2]) - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], @@ -116,7 +117,7 @@ def test_move_add_past_mul_only_if_linear(): value_info += [oh.make_tensor_value_info("mul1_param", TensorProto.FLOAT, [1])] value_info += [oh.make_tensor_value_info("mul2_param", TensorProto.FLOAT, [1])] value_info += [oh.make_tensor_value_info("mul3_param", TensorProto.FLOAT, [1])] - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], diff --git a/tests/transformation/streamline/test_move_chw_add_past_conv.py b/tests/transformation/streamline/test_move_chw_add_past_conv.py index 7eb7f9f1af67efa1a6934157b9c2b3f8a6a814c2..e1b324a798a23b5f4a6878f5e2b27434a61fe8f8 100644 --- a/tests/transformation/streamline/test_move_chw_add_past_conv.py +++ b/tests/transformation/streamline/test_move_chw_add_past_conv.py @@ -33,6 +33,7 @@ from onnx import TensorProto, helper from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveAddPastConv @@ -72,7 +73,7 @@ def test_move_chw_add_past_conv(idim, k, s, ich, och): add_node = helper.make_node("Add", ["inp", "a0"], ["add_out"]) conv_node = helper.make_node("Conv", ["add_out", "a1"], ["outp"], **conv_config) - model = helper.make_model( + model = qonnx_make_model( helper.make_graph( nodes=[add_node, conv_node], name="move-add-graph", diff --git a/tests/transformation/streamline/test_move_flatten_past_affine.py b/tests/transformation/streamline/test_move_flatten_past_affine.py index 8c3f71d1f35de1b03fb33e53e41599fae7e02304..22c5e19fac700e147a36f74f10dad10614d47992 100644 --- a/tests/transformation/streamline/test_move_flatten_past_affine.py +++ b/tests/transformation/streamline/test_move_flatten_past_affine.py @@ -36,7 +36,7 @@ from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNode from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveFlattenPastAffine @@ -74,7 +74,7 @@ def test_move_flatten_past_affine(data_layout, batch_size): value_info=[a0, a1, a2], ) - model = helper.make_model(graph, producer_name="move_reshape_model") + model = qonnx_make_model(graph, producer_name="move_reshape_model") model = ModelWrapper(model) # initialize values diff --git a/tests/transformation/streamline/test_move_flatten_past_topk.py b/tests/transformation/streamline/test_move_flatten_past_topk.py index d1478088e2e8caaeb33fbec2880e74ea65905073..82336cd3e69d865e4c36536e7e0b16f092a7033d 100644 --- a/tests/transformation/streamline/test_move_flatten_past_topk.py +++ b/tests/transformation/streamline/test_move_flatten_past_topk.py @@ -36,7 +36,7 @@ from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.insert_topk import InsertTopK -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveFlattenPastTopK @@ -67,7 +67,7 @@ def test_move_flatten_past_topk(data_layout, batch_size): outputs=[outp], ) - model = helper.make_model(graph, producer_name="move_flatten_model") + model = qonnx_make_model(graph, producer_name="move_flatten_model") model = ModelWrapper(model) model.set_tensor_datatype("inp", DataType["INT2"]) diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_op.py b/tests/transformation/streamline/test_move_identical_op_past_join_op.py index 4986363ff4dba0b0126babdbd1f393faa2df5de3..7be97631625354297c322267792520628454c4f9 100644 --- a/tests/transformation/streamline/test_move_identical_op_past_join_op.py +++ b/tests/transformation/streamline/test_move_identical_op_past_join_op.py @@ -30,7 +30,7 @@ import pytest from onnx import TensorProto from onnx import helper as oh from qonnx.core.modelwrapper import ModelWrapper -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveTransposePastJoinAdd @@ -81,7 +81,7 @@ def create_model(perm): ], ) - onnx_model = oh.make_model(graph, producer_name="test_model") + onnx_model = qonnx_make_model(graph, producer_name="test_model") model = ModelWrapper(onnx_model) return model diff --git a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py index bf25eee9e685d2536faf5bd25bc7b1aa36700463..6126acd9e388869c34cd0c73bb64f4b6c56b4c06 100644 --- a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py +++ b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py @@ -32,6 +32,7 @@ from onnx import TensorProto, helper from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveMaxPoolPastMultiThreshold @@ -99,7 +100,7 @@ def test_move_maxpool_past_multithreshold(): ) ] - modelproto = helper.make_model( + modelproto = qonnx_make_model( helper.make_graph( name="test", inputs=[top_in], diff --git a/tests/transformation/streamline/test_move_mul_past_dw_conv.py b/tests/transformation/streamline/test_move_mul_past_dw_conv.py index 401631a728412e7676fa804626601cfc58b5a5e3..72a6650ec4e6b853b79c93941af84dd15a7e5c47 100644 --- a/tests/transformation/streamline/test_move_mul_past_dw_conv.py +++ b/tests/transformation/streamline/test_move_mul_past_dw_conv.py @@ -33,7 +33,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveMulPastDWConv @@ -94,7 +94,7 @@ def test_move_mul_past_dw_conv(ifm_dim, ifm_ch, k, stride, pad_amt, dw): value_info=[mul, W], ) - model = helper.make_model(graph, producer_name="mulpastconv-model") + model = qonnx_make_model(graph, producer_name="mulpastconv-model") model = ModelWrapper(model) inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, ifm_dim, ifm_dim]) mul_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, 1, 1]) diff --git a/tests/transformation/streamline/test_move_mul_past_maxpool.py b/tests/transformation/streamline/test_move_mul_past_maxpool.py index fcc1b6513230c548bdcc04a40aad793b64c6faf2..3bae2905a064b8372b520a7a8083905284343429 100755 --- a/tests/transformation/streamline/test_move_mul_past_maxpool.py +++ b/tests/transformation/streamline/test_move_mul_past_maxpool.py @@ -34,7 +34,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveMulPastMaxPool @@ -92,7 +92,7 @@ def test_move_mul_past_maxpool(ifm_dim, ifm_ch, k, stride, pad, cw, negative): value_info=[mul], ) - model = helper.make_model(graph, producer_name="mulpastmaxpool-model") + model = qonnx_make_model(graph, producer_name="mulpastmaxpool-model") model = ModelWrapper(model) inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, ifm_dim, ifm_dim]) mul_values = np.random.random_sample(mul_shape).astype(np.float32) diff --git a/tests/transformation/streamline/test_move_scalar_past_conv.py b/tests/transformation/streamline/test_move_scalar_past_conv.py index 59b8b8f8b2fee99bbb77c6d354620406a108cb54..bb99fd1d8f7d48ab9ad7038d78f5352f26f2ad06 100644 --- a/tests/transformation/streamline/test_move_scalar_past_conv.py +++ b/tests/transformation/streamline/test_move_scalar_past_conv.py @@ -32,6 +32,7 @@ import onnx.helper as oh from onnx import TensorProto from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as ox from finn.transformation.streamline import MoveAddPastConv, MoveScalarMulPastConv @@ -79,7 +80,7 @@ def test_move_scalar_past_conv(test_args, padding): value_info += [oh.make_tensor_value_info("p2", TensorProto.FLOAT, conv_param_shape)] value_info += [oh.make_tensor_value_info("p3", TensorProto.FLOAT, conv_param_shape)] - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], @@ -158,7 +159,7 @@ def test_move_scalar_past_conv_only_if_linear(test_args): value_info += [oh.make_tensor_value_info("p4", TensorProto.FLOAT, conv_param_shape)] value_info += [oh.make_tensor_value_info("p5", TensorProto.FLOAT, conv_param_shape)] - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], diff --git a/tests/transformation/streamline/test_move_scalar_past_matmul.py b/tests/transformation/streamline/test_move_scalar_past_matmul.py index 6fdaaadfaea5862b566fd3a8d060ac28acadf1cd..6c788294bc739332c0b9bd0e98081bcb83330b53 100644 --- a/tests/transformation/streamline/test_move_scalar_past_matmul.py +++ b/tests/transformation/streamline/test_move_scalar_past_matmul.py @@ -33,6 +33,7 @@ import onnx.helper as oh from onnx import TensorProto from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as ox from finn.transformation.streamline import ( @@ -47,7 +48,7 @@ def test_move_scalar_mul_past_matmul(): mul_param = oh.make_tensor_value_info("mul_param", TensorProto.FLOAT, [1, 1]) matmul_param = oh.make_tensor_value_info("matmul_param", TensorProto.FLOAT, [2, 2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [1, 2]) - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], @@ -79,7 +80,7 @@ def test_move_scalar_add_past_matmul(): add_param = oh.make_tensor_value_info("add_param", TensorProto.FLOAT, [1, 1]) matmul_param = oh.make_tensor_value_info("matmul_param", TensorProto.FLOAT, [2, 2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [1, 2]) - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], @@ -122,7 +123,7 @@ def test_move_scalar_past_matmul_only_if_linear(test_args): p2 = oh.make_tensor_value_info("p2", TensorProto.FLOAT, matmul_shape) p3 = oh.make_tensor_value_info("p3", TensorProto.FLOAT, matmul_shape) p4 = oh.make_tensor_value_info("p4", TensorProto.FLOAT, matmul_shape) - modelproto = oh.make_model( + modelproto = qonnx_make_model( oh.make_graph( name="test", inputs=[top_in], diff --git a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py index 9662ba8a908e9bb793e0c0c2b078cf26adb5cef3..6bf72961ac06331c8ce972c8ca78dea99fb3c0a0 100644 --- a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py +++ b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py @@ -36,6 +36,7 @@ from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNode from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MoveTransposePastScalarMul @@ -71,7 +72,7 @@ def test_move_transpose_past_scalar_mul(perm, scalar, data_layout): value_info=[a0], ) - model = helper.make_model(graph, producer_name="mv_transpose_model") + model = qonnx_make_model(graph, producer_name="mv_transpose_model") model = ModelWrapper(model) # initialize values diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py index 1ec5f02e878a540a89cc37179b2e6dd76ede882c..85c60b37d5193de7ed2f38b9da6eb2e9b35b0150 100644 --- a/tests/transformation/streamline/test_round_thresholds.py +++ b/tests/transformation/streamline/test_round_thresholds.py @@ -32,6 +32,7 @@ import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper +from qonnx.util.basic import qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline import RoundAndClipThresholds @@ -46,7 +47,7 @@ def test_round_thresholds(): "MultiThreshold", ["v", "thresholds"], ["out"], domain="qonnx.custom_op.general" ) graph_def = helper.make_graph([node_def], "test_model", [v, thresholds], [out]) - model_def = helper.make_model(graph_def) + model_def = qonnx_make_model(graph_def) model = ModelWrapper(model_def) threshold_val = np.asarray([[-1.1], [0.7], [2.3], [5.1]], dtype=np.float32) model.set_initializer("thresholds", threshold_val) diff --git a/tests/transformation/streamline/test_scale_resize_nhwc.py b/tests/transformation/streamline/test_scale_resize_nhwc.py index f10930f4e7d5aeb98a60630e7e4f48adfc371d59..5e107448f8d8cc78d572f846496ed541591dfe05 100644 --- a/tests/transformation/streamline/test_scale_resize_nhwc.py +++ b/tests/transformation/streamline/test_scale_resize_nhwc.py @@ -9,7 +9,7 @@ from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe from finn.transformation.streamline.reorder import MakeScaleResizeNHWC @@ -58,7 +58,7 @@ def create_resize_transpose(ifm_dim, ifm_ch, scales, mode, idt): value_info=[outp_up, param, roi], ) - model = oh.make_model(graph, producer_name="resize_model1") + model = qonnx_make_model(graph, producer_name="resize_model1") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", idt) @@ -113,7 +113,7 @@ def create_transpose_resize(ifm_dim, ifm_ch, scales, mode, idt): value_info=[outp_tr, param, roi], ) - model = oh.make_model(graph, producer_name="resize_model2") + model = qonnx_make_model(graph, producer_name="resize_model2") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", idt) @@ -180,7 +180,7 @@ def create_transpose_resize_transpose(ifm_dim, ifm_ch, scales, mode, idt): value_info=[outp_up, outp_tr, param, roi], ) - model = oh.make_model(graph, producer_name="resize_model3") + model = qonnx_make_model(graph, producer_name="resize_model3") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", idt)