diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index a613d0622ee95e7f1ca848142e2930cf6d3c91bd..ba45e01bf3ecee457e9788e5dbea4cd1c3ee0007 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -103,7 +103,9 @@ dma.sendchannel.wait()
 dma.recvchannel.wait()
 
 # unpack the packed output buffer from accelerator
-obuf_folded = packed_bytearray_to_finnpy(obuf_packed, odt, oshape_folded)
+obuf_folded = packed_bytearray_to_finnpy(
+    obuf_packed, odt, oshape_folded, reverse_endian=True
+)
 # convert to normal reshape and save
 obuf_normal = obuf_folded.reshape(oshape_normal)
 np.save("output.npy", obuf_normal)