openSUSE Build Service

Overview Repositories Revisions Requests Users Attributes Meta

File riscv.patch of Package valgrind

diff --git a/Makefile.all.am b/Makefile.all.am
index dcea2690e..109e3abbc 100755
--- a/Makefile.all.am
+++ b/Makefile.all.am
@@ -291,6 +291,11 @@ AM_CFLAGS_PSO_MIPS64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) \
 				$(AM_CFLAGS_PSO_BASE)
 AM_CCASFLAGS_MIPS64_LINUX  = @FLAG_M64@ -g
 
+AM_FLAG_M3264_RISCV64_LINUX = @FLAG_M64@
+AM_CFLAGS_RISCV64_LINUX     = @FLAG_M64@ $(AM_CFLAGS_BASE)
+AM_CFLAGS_PSO_RISCV64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) $(AM_CFLAGS_PSO_BASE)
+AM_CCASFLAGS_RISCV64_LINUX  = @FLAG_M64@ -g
+
 AM_FLAG_M3264_X86_SOLARIS   = @FLAG_M32@
 AM_CFLAGS_X86_SOLARIS       = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY_2@ \
 				$(AM_CFLAGS_BASE) -fomit-frame-pointer @SOLARIS_UNDEF_LARGESOURCE@
@@ -352,6 +357,7 @@ PRELOAD_LDFLAGS_S390X_LINUX    = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
 PRELOAD_LDFLAGS_MIPS32_LINUX   = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
 PRELOAD_LDFLAGS_NANOMIPS_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
 PRELOAD_LDFLAGS_MIPS64_LINUX   = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+PRELOAD_LDFLAGS_RISCV64_LINUX  = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
 PRELOAD_LDFLAGS_X86_SOLARIS    = $(PRELOAD_LDFLAGS_COMMON_SOLARIS) @FLAG_M32@
 PRELOAD_LDFLAGS_AMD64_SOLARIS  = $(PRELOAD_LDFLAGS_COMMON_SOLARIS) @FLAG_M64@
 
diff --git a/Makefile.am b/Makefile.am
index 7749442e9..3c9a2d1f4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -116,6 +116,7 @@ EXTRA_DIST = \
 	README.android_emulator \
 	README.mips \
 	README.aarch64 \
+	README.riscv64 \
 	README.solaris \
 	README.freebsd \
 	NEWS.old \
diff --git a/Makefile.tool.am b/Makefile.tool.am
index c779596e7..7f18d2547 100644
--- a/Makefile.tool.am
+++ b/Makefile.tool.am
@@ -110,6 +110,9 @@ TOOL_LDFLAGS_MIPS64_LINUX = \
 	-static -nodefaultlibs -nostartfiles -u __start @FLAG_NO_BUILD_ID@ \
 	@FLAG_M64@
 
+TOOL_LDFLAGS_RISCV64_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+
 TOOL_LDFLAGS_X86_SOLARIS = \
 	$(TOOL_LDFLAGS_COMMON_SOLARIS) @FLAG_M32@
 
@@ -181,6 +184,9 @@ LIBREPLACEMALLOC_MIPS32_LINUX = \
 LIBREPLACEMALLOC_MIPS64_LINUX = \
 	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-mips64-linux.a
 
+LIBREPLACEMALLOC_RISCV64_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-riscv64-linux.a
+
 LIBREPLACEMALLOC_X86_SOLARIS = \
 	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-solaris.a
 
@@ -258,6 +264,11 @@ LIBREPLACEMALLOC_LDFLAGS_MIPS64_LINUX = \
 	$(LIBREPLACEMALLOC_MIPS64_LINUX) \
 	-Wl,--no-whole-archive
 
+LIBREPLACEMALLOC_LDFLAGS_RISCV64_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_RISCV64_LINUX) \
+	-Wl,--no-whole-archive
+
 LIBREPLACEMALLOC_LDFLAGS_X86_SOLARIS = \
 	-Wl,--whole-archive \
 	$(LIBREPLACEMALLOC_X86_SOLARIS) \
diff --git a/Makefile.vex.am b/Makefile.vex.am
index c1244a69d..f75e9b4c6 100644
--- a/Makefile.vex.am
+++ b/Makefile.vex.am
@@ -26,6 +26,7 @@ pkginclude_HEADERS = \
 	pub/libvex_guest_s390x.h \
 	pub/libvex_guest_mips32.h \
 	pub/libvex_guest_mips64.h \
+	pub/libvex_guest_riscv64.h \
 	pub/libvex_s390x_common.h \
 	pub/libvex_ir.h \
 	pub/libvex_trc_values.h \
@@ -49,6 +50,7 @@ noinst_HEADERS = \
 	priv/guest_mips_defs.h \
 	priv/mips_defs.h \
 	priv/guest_nanomips_defs.h \
+	priv/guest_riscv64_defs.h \
 	priv/host_generic_regs.h \
 	priv/host_generic_simd64.h \
 	priv/host_generic_simd128.h \
@@ -65,7 +67,8 @@ noinst_HEADERS = \
 	priv/s390_defs.h \
 	priv/host_mips_defs.h \
 	priv/host_nanomips_defs.h \
-	priv/common_nanomips_defs.h
+	priv/common_nanomips_defs.h \
+	priv/host_riscv64_defs.h
 
 BUILT_SOURCES = pub/libvex_guest_offsets.h
 CLEANFILES    = pub/libvex_guest_offsets.h
@@ -94,7 +97,8 @@ pub/libvex_guest_offsets.h: auxprogs/genoffsets.c \
 			    pub/libvex_guest_arm64.h \
 			    pub/libvex_guest_s390x.h \
 			    pub/libvex_guest_mips32.h \
-			    pub/libvex_guest_mips64.h
+			    pub/libvex_guest_mips64.h \
+			    pub/libvex_guest_riscv64.h
 	rm -f auxprogs/genoffsets.s
 	$(mkdir_p) auxprogs pub
 	$(CC) $(CFLAGS_FOR_GENOFFSETS) \
@@ -152,6 +156,8 @@ LIBVEX_SOURCES_COMMON = \
 	priv/guest_mips_toIR.c \
 	priv/guest_nanomips_helpers.c \
 	priv/guest_nanomips_toIR.c \
+	priv/guest_riscv64_helpers.c \
+	priv/guest_riscv64_toIR.c \
 	priv/host_generic_regs.c \
 	priv/host_generic_simd64.c \
 	priv/host_generic_simd128.c \
@@ -176,7 +182,9 @@ LIBVEX_SOURCES_COMMON = \
 	priv/host_mips_defs.c \
 	priv/host_nanomips_defs.c \
 	priv/host_mips_isel.c \
-	priv/host_nanomips_isel.c
+	priv/host_nanomips_isel.c \
+	priv/host_riscv64_defs.c \
+	priv/host_riscv64_isel.c
 
 LIBVEXMULTIARCH_SOURCES = priv/multiarch_main_main.c
 
diff --git a/README.riscv64 b/README.riscv64
new file mode 100644
index 000000000..6ce2a082f
--- /dev/null
+++ b/README.riscv64
@@ -0,0 +1,45 @@
+
+Status
+~~~~~~
+
+The RISC-V port targets the 64-bit RISC-V architecture and the Linux operating
+system. The port has been tested to work on real hardware and under QEMU.
+
+The following ISA base and extensions are currently supported:
+
+| Name         | Description                       | #Instrs | Notes    |
+| ------------ | --------------------------------- | ------- | -------- |
+| RV64I        | Base instruction set              |   52/52 |          |
+| RV64M        | Integer multiplication & division |   12/13 | (1)      |
+| RV64A        | Atomic                            |   22/22 | (2)      |
+| RV64F        | Single-precision floating-point   |   30/30 | (3)      |
+| RV64D        | Double-precision floating-point   |   32/32 |          |
+| RV64Zicsr    | Control & status register         |     3/6 | (4), (5) |
+| RV64Zifencei | Instruction-fetch fence           |     0/1 | (6)      |
+| RV64C        | Compressed                        |   37/37 |          |
+
+Notes:
+(1) MULHSU is not recognized.
+(2) LR and SC use the VEX "fallback" method which suffers from the ABA problem.
+(3) Operations do not check if the input operands are correctly NaN-boxed.
+(4) CSRRWI, CSRRSI and CSRRCI are not recognized.
+(5) Only registers fflags, frm and fcsr are accepted.
+(6) FENCE.I is not recognized.
+
+
+Implementation tidying-up/TODO notes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Implement a proper "non-fallback" method for LR and SC instructions.
+* Add a check for correct NaN-boxing of 32-bit floating-point operands.
+* Optimize instruction selection, in particular make more use of <instr>i
+  variants.
+* Optimize handling of floating-point exceptions. Avoid helpers and calculate
+  exception flags using the same instruction which produced an actual result.
+* Review register usage by the codegen.
+* Avoid re-use of Intel-constants CFIC_IA_SPREL and CFIC_IA_BPREL. Generalize
+  them for all architectures or introduce same CFIC_RISCV64_ variants.
+* Get rid of the typedef of vki_modify_ldt_t in include/vki/vki-riscv64-linux.h.
+* Review if setup_client_stack() should expose AT_SYSINFO_EHDR to clients.
+* Make sure that the final exit sequence in run_a_thread_NORETURN() is not racy
+  in regards to accessing the thread state.
diff --git a/VEX/auxprogs/genoffsets.c b/VEX/auxprogs/genoffsets.c
index 6b70cd087..48c9723dc 100644
--- a/VEX/auxprogs/genoffsets.c
+++ b/VEX/auxprogs/genoffsets.c
@@ -53,6 +53,7 @@
 #include "../pub/libvex_guest_s390x.h"
 #include "../pub/libvex_guest_mips32.h"
 #include "../pub/libvex_guest_mips64.h"
+#include "../pub/libvex_guest_riscv64.h"
 
 #define VG_STRINGIFZ(__str)  #__str
 #define VG_STRINGIFY(__str)  VG_STRINGIFZ(__str)
@@ -265,6 +266,74 @@ void foo ( void )
    GENOFFSET(MIPS64,mips64,PC);
    GENOFFSET(MIPS64,mips64,HI);
    GENOFFSET(MIPS64,mips64,LO);
+
+   // riscv64
+   GENOFFSET(RISCV64,riscv64,x0);
+   GENOFFSET(RISCV64,riscv64,x1);
+   GENOFFSET(RISCV64,riscv64,x2);
+   GENOFFSET(RISCV64,riscv64,x3);
+   GENOFFSET(RISCV64,riscv64,x4);
+   GENOFFSET(RISCV64,riscv64,x5);
+   GENOFFSET(RISCV64,riscv64,x6);
+   GENOFFSET(RISCV64,riscv64,x7);
+   GENOFFSET(RISCV64,riscv64,x8);
+   GENOFFSET(RISCV64,riscv64,x9);
+   GENOFFSET(RISCV64,riscv64,x10);
+   GENOFFSET(RISCV64,riscv64,x11);
+   GENOFFSET(RISCV64,riscv64,x12);
+   GENOFFSET(RISCV64,riscv64,x13);
+   GENOFFSET(RISCV64,riscv64,x14);
+   GENOFFSET(RISCV64,riscv64,x15);
+   GENOFFSET(RISCV64,riscv64,x16);
+   GENOFFSET(RISCV64,riscv64,x17);
+   GENOFFSET(RISCV64,riscv64,x18);
+   GENOFFSET(RISCV64,riscv64,x19);
+   GENOFFSET(RISCV64,riscv64,x20);
+   GENOFFSET(RISCV64,riscv64,x21);
+   GENOFFSET(RISCV64,riscv64,x22);
+   GENOFFSET(RISCV64,riscv64,x23);
+   GENOFFSET(RISCV64,riscv64,x24);
+   GENOFFSET(RISCV64,riscv64,x25);
+   GENOFFSET(RISCV64,riscv64,x26);
+   GENOFFSET(RISCV64,riscv64,x27);
+   GENOFFSET(RISCV64,riscv64,x28);
+   GENOFFSET(RISCV64,riscv64,x29);
+   GENOFFSET(RISCV64,riscv64,x30);
+   GENOFFSET(RISCV64,riscv64,x31);
+   GENOFFSET(RISCV64,riscv64,pc);
+   GENOFFSET(RISCV64,riscv64,f0);
+   GENOFFSET(RISCV64,riscv64,f1);
+   GENOFFSET(RISCV64,riscv64,f2);
+   GENOFFSET(RISCV64,riscv64,f3);
+   GENOFFSET(RISCV64,riscv64,f4);
+   GENOFFSET(RISCV64,riscv64,f5);
+   GENOFFSET(RISCV64,riscv64,f6);
+   GENOFFSET(RISCV64,riscv64,f7);
+   GENOFFSET(RISCV64,riscv64,f8);
+   GENOFFSET(RISCV64,riscv64,f9);
+   GENOFFSET(RISCV64,riscv64,f10);
+   GENOFFSET(RISCV64,riscv64,f11);
+   GENOFFSET(RISCV64,riscv64,f12);
+   GENOFFSET(RISCV64,riscv64,f13);
+   GENOFFSET(RISCV64,riscv64,f14);
+   GENOFFSET(RISCV64,riscv64,f15);
+   GENOFFSET(RISCV64,riscv64,f16);
+   GENOFFSET(RISCV64,riscv64,f17);
+   GENOFFSET(RISCV64,riscv64,f18);
+   GENOFFSET(RISCV64,riscv64,f19);
+   GENOFFSET(RISCV64,riscv64,f20);
+   GENOFFSET(RISCV64,riscv64,f21);
+   GENOFFSET(RISCV64,riscv64,f22);
+   GENOFFSET(RISCV64,riscv64,f23);
+   GENOFFSET(RISCV64,riscv64,f24);
+   GENOFFSET(RISCV64,riscv64,f25);
+   GENOFFSET(RISCV64,riscv64,f26);
+   GENOFFSET(RISCV64,riscv64,f27);
+   GENOFFSET(RISCV64,riscv64,f28);
+   GENOFFSET(RISCV64,riscv64,f29);
+   GENOFFSET(RISCV64,riscv64,f30);
+   GENOFFSET(RISCV64,riscv64,f31);
+   GENOFFSET(RISCV64,riscv64,fcsr);
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/VEX/priv/guest_riscv64_defs.h b/VEX/priv/guest_riscv64_defs.h
new file mode 100644
index 000000000..ee5435e14
--- /dev/null
+++ b/VEX/priv/guest_riscv64_defs.h
@@ -0,0 +1,136 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                   guest_riscv64_defs.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+/* Only to be used within the guest_riscv64_* files. */
+
+#ifndef __VEX_GUEST_RISCV64_DEFS_H
+#define __VEX_GUEST_RISCV64_DEFS_H
+
+#include "libvex_basictypes.h"
+
+#include "guest_generic_bb_to_IR.h"
+
+/*------------------------------------------------------------*/
+/*--- riscv64 to IR conversion                             ---*/
+/*------------------------------------------------------------*/
+
+/* Convert one riscv64 insn to IR. See the type DisOneInstrFn in
+   guest_generic_bb_to_IR.h. */
+DisResult disInstr_RISCV64(IRSB*              irbb,
+                           const UChar*       guest_code,
+                           Long               delta,
+                           Addr               guest_IP,
+                           VexArch            guest_arch,
+                           const VexArchInfo* archinfo,
+                           const VexAbiInfo*  abiinfo,
+                           VexEndness         host_endness,
+                           Bool               sigill_diag);
+
+/* Used by the optimiser to specialise calls to helpers. */
+IRExpr* guest_riscv64_spechelper(const HChar* function_name,
+                                 IRExpr**     args,
+                                 IRStmt**     precedingStmts,
+                                 Int          n_precedingStmts);
+
+/* Describes to the optimiser which part of the guest state require precise
+   memory exceptions. This is logically part of the guest state description. */
+Bool guest_riscv64_state_requires_precise_mem_exns(
+   Int minoff, Int maxoff, VexRegisterUpdates pxControl);
+
+extern VexGuestLayout riscv64guest_layout;
+
+/*------------------------------------------------------------*/
+/*--- riscv64 guest helpers                                ---*/
+/*------------------------------------------------------------*/
+
+/* --- CLEAN HELPERS --- */
+
+/* Calculate resulting flags of a specified floating-point operation. Returns
+   a 32-bit value where bits 4:0 contain the fflags in the RISC-V native
+   format (NV DZ OF UF NX) and remaining upper bits are zero. */
+UInt riscv64g_calculate_fflags_fsqrt_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_w_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_wu_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_w(UInt a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_wu(UInt a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_l_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_lu_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_l(ULong a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_lu(ULong a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fsqrt_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_w_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_wu_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_l_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_lu_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_d_l(ULong a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_d_lu(ULong a1, UInt rm_RISCV);
+
+UInt riscv64g_calculate_fflags_fadd_s(Float a1, Float a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fmul_s(Float a1, Float a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fdiv_s(Float a1, Float a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fadd_d(Double a1, Double a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fmul_d(Double a1, Double a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fdiv_d(Double a1, Double a2, UInt rm_RISCV);
+
+UInt riscv64g_calculate_fflags_fmin_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_fmax_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_feq_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_flt_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_fle_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_fmin_d(Double a1, Double a2);
+UInt riscv64g_calculate_fflags_fmax_d(Double a1, Double a2);
+UInt riscv64g_calculate_fflags_feq_d(Double a1, Double a2);
+UInt riscv64g_calculate_fflags_flt_d(Double a1, Double a2);
+UInt riscv64g_calculate_fflags_fle_d(Double a1, Double a2);
+
+UInt riscv64g_calculate_fflags_fmadd_s(Float a1,
+                                       Float a2,
+                                       Float a3,
+                                       UInt  rm_RISCV);
+UInt riscv64g_calculate_fflags_fmadd_d(Double a1,
+                                       Double a2,
+                                       Double a3,
+                                       UInt   rm_RISCV);
+
+/* Calculate floating-point class. Returns a 64-bit value where bits 9:0
+   contains the properties in the RISC-V FCLASS-instruction format and remaining
+   upper bits are zero. */
+ULong riscv64g_calculate_fclass_s(Float a1);
+ULong riscv64g_calculate_fclass_d(Double a1);
+
+#endif /* ndef __VEX_GUEST_RISCV64_DEFS_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                     guest_riscv64_defs.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/guest_riscv64_helpers.c b/VEX/priv/guest_riscv64_helpers.c
new file mode 100644
index 000000000..e7c4ed805
--- /dev/null
+++ b/VEX/priv/guest_riscv64_helpers.c
@@ -0,0 +1,481 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                guest_riscv64_helpers.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_guest_riscv64.h"
+
+#include "guest_riscv64_defs.h"
+#include "main_util.h"
+
+/* This file contains helper functions for riscv64 guest code. Calls to these
+   functions are generated by the back end. These calls are of course in the
+   host machine code and this file will be compiled to host machine code, so
+   that all makes sense.
+
+   Only change the signatures of these helper functions very carefully. If you
+   change the signature here, you'll have to change the parameters passed to it
+   in the IR calls constructed by guest_riscv64_toIR.c.
+
+   The convention used is that all functions called from generated code are
+   named riscv64g_<something>, and any function whose name lacks that prefix is
+   not called from generated code. Note that some LibVEX_* functions can however
+   be called by VEX's client, but that is not the same as calling them from
+   VEX-generated code.
+*/
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+/* clang-format off */
+#define CALCULATE_FFLAGS_UNARY64_F(inst)                                       \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " ft0, %[a1]\n\t"                                                \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [rm] "r"(rm_RISCV)                                    \
+         : "t0", "ft0");                                                       \
+      return res;                                                              \
+   } while (0)
+#define CALCULATE_FFLAGS_UNARY64_IF(inst)                                      \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " t1, %[a1]\n\t"                                                 \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [rm] "r"(rm_RISCV)                                    \
+         : "t0", "t1");                                                        \
+      return res;                                                              \
+   } while (0)
+#define CALCULATE_FFLAGS_UNARY64_FI(inst)                                      \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " ft0, %[a1]\n\t"                                                \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "r"(a1), [rm] "r"(rm_RISCV)                                    \
+         : "t0", "ft0");                                                       \
+      return res;                                                              \
+   } while (0)
+/* clang-format on */
+#else
+/* No simulated version is currently implemented. */
+#define CALCULATE_FFLAGS_UNARY64_F(inst)                                       \
+   do {                                                                        \
+      (void)rm_RISCV;                                                          \
+      return 0;                                                                \
+   } while (0)
+#define CALCULATE_FFLAGS_UNARY64_IF(inst)                                      \
+   do {                                                                        \
+      (void)rm_RISCV;                                                          \
+      return 0;                                                                \
+   } while (0)
+#define CALCULATE_FFLAGS_UNARY64_FI(inst)                                      \
+   do {                                                                        \
+      (void)rm_RISCV;                                                          \
+      return 0;                                                                \
+   } while (0)
+#endif
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPERS */
+UInt riscv64g_calculate_fflags_fsqrt_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_F("fsqrt.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_w_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.w.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_wu_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.wu.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_w(UInt a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.s.w");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_wu(UInt a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.s.wu");
+}
+UInt riscv64g_calculate_fflags_fcvt_l_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.l.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_lu_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.lu.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_l(ULong a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.s.l");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_lu(ULong a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.s.lu");
+}
+UInt riscv64g_calculate_fflags_fsqrt_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_F("fsqrt.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_F("fcvt.s.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_w_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.w.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_wu_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.wu.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_l_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.l.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_lu_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.lu.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_d_l(ULong a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.d.l");
+}
+UInt riscv64g_calculate_fflags_fcvt_d_lu(ULong a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.d.lu");
+}
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+/* clang-format off */
+#define CALCULATE_FFLAGS_BINARY64(inst)                                        \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " %[a1], %[a1], %[a2]\n\t"                                       \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [a2] "f"(a2), [rm] "r"(rm_RISCV)                      \
+         : "t0");                                                              \
+      return res;                                                              \
+   } while (0)
+#define CALCULATE_FFLAGS_BINARY64_IFF(inst)                                    \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " t1, %[a1], %[a2]\n\t"                                          \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [a2] "f"(a2), [rm] "r"(rm_RISCV)                      \
+         : "t0", "t1");                                                        \
+      return res;                                                              \
+   } while (0)
+/* clang-format on */
+#else
+/* No simulated version is currently implemented. */
+#define CALCULATE_FFLAGS_BINARY64(inst)                                        \
+   do {                                                                        \
+      (void)rm_RISCV;                                                          \
+      return 0;                                                                \
+   } while (0)
+#define CALCULATE_FFLAGS_BINARY64_IFF(inst)                                    \
+   do {                                                                        \
+      (void)rm_RISCV;                                                          \
+      return 0;                                                                \
+   } while (0)
+#endif
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPERS */
+UInt riscv64g_calculate_fflags_fadd_s(Float a1, Float a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fadd.s");
+}
+UInt riscv64g_calculate_fflags_fmul_s(Float a1, Float a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fmul.s");
+}
+UInt riscv64g_calculate_fflags_fdiv_s(Float a1, Float a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fdiv.s");
+}
+UInt riscv64g_calculate_fflags_fadd_d(Double a1, Double a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fadd.d");
+}
+UInt riscv64g_calculate_fflags_fmul_d(Double a1, Double a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fmul.d");
+}
+UInt riscv64g_calculate_fflags_fdiv_d(Double a1, Double a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fdiv.d");
+}
+UInt riscv64g_calculate_fflags_fmin_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64("fmin.s");
+}
+UInt riscv64g_calculate_fflags_fmax_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64("fmax.s");
+}
+UInt riscv64g_calculate_fflags_feq_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("feq.s");
+}
+UInt riscv64g_calculate_fflags_flt_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("flt.s");
+}
+UInt riscv64g_calculate_fflags_fle_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("fle.s");
+}
+UInt riscv64g_calculate_fflags_fmin_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64("fmin.d");
+}
+UInt riscv64g_calculate_fflags_fmax_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64("fmax.d");
+}
+UInt riscv64g_calculate_fflags_feq_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("feq.d");
+}
+UInt riscv64g_calculate_fflags_flt_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("flt.d");
+}
+UInt riscv64g_calculate_fflags_fle_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("fle.d");
+}
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+/* clang-format off */
+#define CALCULATE_FFLAGS_TERNARY64(inst)                                       \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " %[a1], %[a1], %[a2], %[a3]\n\t"                                \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [a2] "f"(a2), [a3] "f"(a3), [rm] "r"(rm_RISCV)        \
+         : "t0");                                                              \
+      return res;                                                              \
+   } while (0)
+/* clang-format on */
+#else
+/* No simulated version is currently implemented. */
+#define CALCULATE_FFLAGS_TERNARY64(inst)                                       \
+   do {                                                                        \
+      (void)rm_RISCV;                                                          \
+      return 0;                                                                \
+   } while (0)
+#endif
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPERS */
+UInt riscv64g_calculate_fflags_fmadd_s(Float a1,
+                                       Float a2,
+                                       Float a3,
+                                       UInt  rm_RISCV)
+{
+   CALCULATE_FFLAGS_TERNARY64("fmadd.s");
+}
+UInt riscv64g_calculate_fflags_fmadd_d(Double a1,
+                                       Double a2,
+                                       Double a3,
+                                       UInt   rm_RISCV)
+{
+   CALCULATE_FFLAGS_TERNARY64("fmadd.d");
+}
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+/* clang-format off */
+#define CALCULATE_FCLASS(inst)                                                 \
+   do {                                                                        \
+      ULong res;                                                               \
+      __asm__ __volatile__(                                                    \
+         inst " %[res], %[a1]\n\t"                                             \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1));                                                      \
+      return res;                                                              \
+   } while (0)
+/* clang-format on */
+#else
+/* No simulated version is currently implemented. */
+#define CALCULATE_FCLASS(inst)                                                 \
+   do {                                                                        \
+      return 0;                                                                \
+   } while (0)
+#endif
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPERS */
+ULong riscv64g_calculate_fclass_s(Float a1) { CALCULATE_FCLASS("fclass.s"); }
+ULong riscv64g_calculate_fclass_d(Double a1) { CALCULATE_FCLASS("fclass.d"); }
+
+/*------------------------------------------------------------*/
+/*--- Flag-helpers translation-time function specialisers. ---*/
+/*--- These help iropt specialise calls the above run-time ---*/
+/*--- flags functions.                                     ---*/
+/*------------------------------------------------------------*/
+
+IRExpr* guest_riscv64_spechelper(const HChar* function_name,
+                                 IRExpr**     args,
+                                 IRStmt**     precedingStmts,
+                                 Int          n_precedingStmts)
+{
+   return NULL;
+}
+
+/*------------------------------------------------------------*/
+/*--- Helpers for dealing with, and describing, guest      ---*/
+/*--- state as a whole.                                    ---*/
+/*------------------------------------------------------------*/
+
+/* Initialise the entire riscv64 guest state. */
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestRISCV64_initialise(/*OUT*/ VexGuestRISCV64State* vex_state)
+{
+   vex_bzero(vex_state, sizeof(*vex_state));
+}
+
+/* Figure out if any part of the guest state contained in minoff .. maxoff
+   requires precise memory exceptions. If in doubt return True (but this
+   generates significantly slower code).
+
+   By default we enforce precise exns for guest x2 (sp), x8 (fp) and pc only.
+   These are the minimum needed to extract correct stack backtraces from riscv64
+   code.
+
+   Only x2 (sp) is needed in mode VexRegUpdSpAtMemAccess.
+*/
+Bool guest_riscv64_state_requires_precise_mem_exns(Int                minoff,
+                                                   Int                maxoff,
+                                                   VexRegisterUpdates pxControl)
+{
+   Int fp_min = offsetof(VexGuestRISCV64State, guest_x8);
+   Int fp_max = fp_min + 8 - 1;
+   Int sp_min = offsetof(VexGuestRISCV64State, guest_x2);
+   Int sp_max = sp_min + 8 - 1;
+   Int pc_min = offsetof(VexGuestRISCV64State, guest_pc);
+   Int pc_max = pc_min + 8 - 1;
+
+   if (maxoff < sp_min || minoff > sp_max) {
+      /* No overlap with sp. */
+      if (pxControl == VexRegUpdSpAtMemAccess)
+         return False; /* We only need to check stack pointer. */
+   } else
+      return True;
+
+   if (maxoff < fp_min || minoff > fp_max) {
+      /* No overlap with fp. */
+   } else
+      return True;
+
+   if (maxoff < pc_min || minoff > pc_max) {
+      /* No overlap with pc. */
+   } else
+      return True;
+
+   return False;
+}
+
+#define ALWAYSDEFD(field)                                                      \
+   {                                                                           \
+      offsetof(VexGuestRISCV64State, field),                                   \
+         (sizeof((VexGuestRISCV64State*)0)->field)                             \
+   }
+
+VexGuestLayout riscv64guest_layout = {
+   /* Total size of the guest state, in bytes. */
+   .total_sizeB = sizeof(VexGuestRISCV64State),
+
+   /* Describe the stack pointer. */
+   .offset_SP = offsetof(VexGuestRISCV64State, guest_x2),
+   .sizeof_SP = 8,
+
+   /* Describe the frame pointer. */
+   .offset_FP = offsetof(VexGuestRISCV64State, guest_x8),
+   .sizeof_FP = 8,
+
+   /* Describe the instruction pointer. */
+   .offset_IP = offsetof(VexGuestRISCV64State, guest_pc),
+   .sizeof_IP = 8,
+
+   /* Describe any sections to be regarded by Memcheck as 'always-defined'. */
+   .n_alwaysDefd = 6,
+
+   .alwaysDefd = {
+      /* 0 */ ALWAYSDEFD(guest_x0),
+      /* 1 */ ALWAYSDEFD(guest_pc),
+      /* 2 */ ALWAYSDEFD(guest_EMNOTE),
+      /* 3 */ ALWAYSDEFD(guest_CMSTART),
+      /* 4 */ ALWAYSDEFD(guest_CMLEN),
+      /* 5 */ ALWAYSDEFD(guest_NRADDR),
+   },
+};
+
+/*--------------------------------------------------------------------*/
+/*--- end                                  guest_riscv64_helpers.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/guest_riscv64_toIR.c b/VEX/priv/guest_riscv64_toIR.c
new file mode 100644
index 000000000..4552eb05d
--- /dev/null
+++ b/VEX/priv/guest_riscv64_toIR.c
@@ -0,0 +1,3536 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                   guest_riscv64_toIR.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Translates riscv64 code to IR. */
+
+/* "Special" instructions.
+
+   This instruction decoder can decode four special instructions which mean
+   nothing natively (are no-ops as far as regs/mem are concerned) but have
+   meaning for supporting Valgrind. A special instruction is flagged by
+   a 16-byte preamble:
+
+      00305013 00d05013 03305013 03d05013
+      (srli zero, zero, 3;   srli zero, zero, 13
+       srli zero, zero, 51;  srli zero, zero, 61)
+
+   Following that, one of the following 4 are allowed (standard interpretation
+   in parentheses):
+
+      00a56533 (or a0, a0, a0)   a3 = client_request ( a4 )
+      00b5e5b3 (or a1, a1, a1)   a3 = guest_NRADDR
+      00c66633 (or a2, a2, a2)   branch-and-link-to-noredir t0
+      00d6e6b3 (or a3, a3, a3)   IR injection
+
+   Any other bytes following the 16-byte preamble are illegal and constitute
+   a failure in instruction decoding. This all assumes that the preamble will
+   never occur except in specific code fragments designed for Valgrind to catch.
+*/
+
+#include "libvex_guest_riscv64.h"
+
+#include "guest_riscv64_defs.h"
+#include "main_globals.h"
+#include "main_util.h"
+
+/*------------------------------------------------------------*/
+/*--- Debugging output                                     ---*/
+/*------------------------------------------------------------*/
+
+#define DIP(format, args...)                                                   \
+   do {                                                                        \
+      if (vex_traceflags & VEX_TRACE_FE)                                       \
+         vex_printf(format, ##args);                                           \
+   } while (0)
+
+#define DIS(buf, format, args...)                                              \
+   do {                                                                        \
+      if (vex_traceflags & VEX_TRACE_FE)                                       \
+         vex_sprintf(buf, format, ##args);                                     \
+   } while (0)
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for deconstructing the        ---*/
+/*--- riscv64 insn stream.                                 ---*/
+/*------------------------------------------------------------*/
+
+/* Do a little-endian load of a 32-bit word, regardless of the endianness of the
+   underlying host. */
+static inline UInt getUIntLittleEndianly(const UChar* p)
+{
+   UInt w = 0;
+   w      = (w << 8) | p[3];
+   w      = (w << 8) | p[2];
+   w      = (w << 8) | p[1];
+   w      = (w << 8) | p[0];
+   return w;
+}
+
+/* Do read of an instruction, which can be 16-bit (compressed) or 32-bit in
+   size. */
+static inline UInt getInsn(const UChar* p)
+{
+   Bool is_compressed = (p[0] & 0x3) != 0x3;
+   UInt w             = 0;
+   if (!is_compressed) {
+      w = (w << 8) | p[3];
+      w = (w << 8) | p[2];
+   }
+   w = (w << 8) | p[1];
+   w = (w << 8) | p[0];
+   return w;
+}
+
+/* Produce _uint[_bMax:_bMin]. */
+#define SLICE_UInt(_uint, _bMax, _bMin)                                        \
+   ((((UInt)(_uint)) >> (_bMin)) &                                             \
+    (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
+
+/*------------------------------------------------------------*/
+/*--- Helpers for constructing IR.                         ---*/
+/*------------------------------------------------------------*/
+
+/* Create an expression to produce a 64-bit constant. */
+static IRExpr* mkU64(ULong i) { return IRExpr_Const(IRConst_U64(i)); }
+
+/* Create an expression to produce a 32-bit constant. */
+static IRExpr* mkU32(UInt i) { return IRExpr_Const(IRConst_U32(i)); }
+
+/* Create an expression to produce an 8-bit constant. */
+static IRExpr* mkU8(UInt i)
+{
+   vassert(i < 256);
+   return IRExpr_Const(IRConst_U8((UChar)i));
+}
+
+/* Create an expression to read a temporary. */
+static IRExpr* mkexpr(IRTemp tmp) { return IRExpr_RdTmp(tmp); }
+
+/* Create an unary-operation expression. */
+static IRExpr* unop(IROp op, IRExpr* a) { return IRExpr_Unop(op, a); }
+
+/* Create a binary-operation expression. */
+static IRExpr* binop(IROp op, IRExpr* a1, IRExpr* a2)
+{
+   return IRExpr_Binop(op, a1, a2);
+}
+
+/* Create a ternary-operation expression. */
+static IRExpr* triop(IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3)
+{
+   return IRExpr_Triop(op, a1, a2, a3);
+}
+
+/* Create a quaternary-operation expression. */
+static IRExpr* qop(IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3, IRExpr* a4)
+{
+   return IRExpr_Qop(op, a1, a2, a3, a4);
+}
+
+/* Create an expression to load a value from memory (in the little-endian
+   order). */
+static IRExpr* loadLE(IRType ty, IRExpr* addr)
+{
+   return IRExpr_Load(Iend_LE, ty, addr);
+}
+
+/* Add a statement to the list held by irsb. */
+static void stmt(/*MOD*/ IRSB* irsb, IRStmt* st) { addStmtToIRSB(irsb, st); }
+
+/* Add a statement to assign a value to a temporary. */
+static void assign(/*MOD*/ IRSB* irsb, IRTemp dst, IRExpr* e)
+{
+   stmt(irsb, IRStmt_WrTmp(dst, e));
+}
+
+/* Generate a statement to store a value in memory (in the little-endian
+   order). */
+static void storeLE(/*MOD*/ IRSB* irsb, IRExpr* addr, IRExpr* data)
+{
+   stmt(irsb, IRStmt_Store(Iend_LE, addr, data));
+}
+
+/* Generate a new temporary of the given type. */
+static IRTemp newTemp(/*MOD*/ IRSB* irsb, IRType ty)
+{
+   vassert(isPlausibleIRType(ty));
+   return newIRTemp(irsb->tyenv, ty);
+}
+
+/* Sign-extend a 32/64-bit integer expression to 64 bits. */
+static IRExpr* widenSto64(IRType srcTy, IRExpr* e)
+{
+   switch (srcTy) {
+   case Ity_I64:
+      return e;
+   case Ity_I32:
+      return unop(Iop_32Sto64, e);
+   default:
+      vpanic("widenSto64(riscv64)");
+   }
+}
+
+/* Narrow a 64-bit integer expression to 32/64 bits. */
+static IRExpr* narrowFrom64(IRType dstTy, IRExpr* e)
+{
+   switch (dstTy) {
+   case Ity_I64:
+      return e;
+   case Ity_I32:
+      return unop(Iop_64to32, e);
+   default:
+      vpanic("narrowFrom64(riscv64)");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Offsets of various parts of the riscv64 guest state  ---*/
+/*------------------------------------------------------------*/
+
+#define OFFB_X0  offsetof(VexGuestRISCV64State, guest_x0)
+#define OFFB_X1  offsetof(VexGuestRISCV64State, guest_x1)
+#define OFFB_X2  offsetof(VexGuestRISCV64State, guest_x2)
+#define OFFB_X3  offsetof(VexGuestRISCV64State, guest_x3)
+#define OFFB_X4  offsetof(VexGuestRISCV64State, guest_x4)
+#define OFFB_X5  offsetof(VexGuestRISCV64State, guest_x5)
+#define OFFB_X6  offsetof(VexGuestRISCV64State, guest_x6)
+#define OFFB_X7  offsetof(VexGuestRISCV64State, guest_x7)
+#define OFFB_X8  offsetof(VexGuestRISCV64State, guest_x8)
+#define OFFB_X9  offsetof(VexGuestRISCV64State, guest_x9)
+#define OFFB_X10 offsetof(VexGuestRISCV64State, guest_x10)
+#define OFFB_X11 offsetof(VexGuestRISCV64State, guest_x11)
+#define OFFB_X12 offsetof(VexGuestRISCV64State, guest_x12)
+#define OFFB_X13 offsetof(VexGuestRISCV64State, guest_x13)
+#define OFFB_X14 offsetof(VexGuestRISCV64State, guest_x14)
+#define OFFB_X15 offsetof(VexGuestRISCV64State, guest_x15)
+#define OFFB_X16 offsetof(VexGuestRISCV64State, guest_x16)
+#define OFFB_X17 offsetof(VexGuestRISCV64State, guest_x17)
+#define OFFB_X18 offsetof(VexGuestRISCV64State, guest_x18)
+#define OFFB_X19 offsetof(VexGuestRISCV64State, guest_x19)
+#define OFFB_X20 offsetof(VexGuestRISCV64State, guest_x20)
+#define OFFB_X21 offsetof(VexGuestRISCV64State, guest_x21)
+#define OFFB_X22 offsetof(VexGuestRISCV64State, guest_x22)
+#define OFFB_X23 offsetof(VexGuestRISCV64State, guest_x23)
+#define OFFB_X24 offsetof(VexGuestRISCV64State, guest_x24)
+#define OFFB_X25 offsetof(VexGuestRISCV64State, guest_x25)
+#define OFFB_X26 offsetof(VexGuestRISCV64State, guest_x26)
+#define OFFB_X27 offsetof(VexGuestRISCV64State, guest_x27)
+#define OFFB_X28 offsetof(VexGuestRISCV64State, guest_x28)
+#define OFFB_X29 offsetof(VexGuestRISCV64State, guest_x29)
+#define OFFB_X30 offsetof(VexGuestRISCV64State, guest_x30)
+#define OFFB_X31 offsetof(VexGuestRISCV64State, guest_x31)
+#define OFFB_PC  offsetof(VexGuestRISCV64State, guest_pc)
+
+#define OFFB_F0   offsetof(VexGuestRISCV64State, guest_f0)
+#define OFFB_F1   offsetof(VexGuestRISCV64State, guest_f1)
+#define OFFB_F2   offsetof(VexGuestRISCV64State, guest_f2)
+#define OFFB_F3   offsetof(VexGuestRISCV64State, guest_f3)
+#define OFFB_F4   offsetof(VexGuestRISCV64State, guest_f4)
+#define OFFB_F5   offsetof(VexGuestRISCV64State, guest_f5)
+#define OFFB_F6   offsetof(VexGuestRISCV64State, guest_f6)
+#define OFFB_F7   offsetof(VexGuestRISCV64State, guest_f7)
+#define OFFB_F8   offsetof(VexGuestRISCV64State, guest_f8)
+#define OFFB_F9   offsetof(VexGuestRISCV64State, guest_f9)
+#define OFFB_F10  offsetof(VexGuestRISCV64State, guest_f10)
+#define OFFB_F11  offsetof(VexGuestRISCV64State, guest_f11)
+#define OFFB_F12  offsetof(VexGuestRISCV64State, guest_f12)
+#define OFFB_F13  offsetof(VexGuestRISCV64State, guest_f13)
+#define OFFB_F14  offsetof(VexGuestRISCV64State, guest_f14)
+#define OFFB_F15  offsetof(VexGuestRISCV64State, guest_f15)
+#define OFFB_F16  offsetof(VexGuestRISCV64State, guest_f16)
+#define OFFB_F17  offsetof(VexGuestRISCV64State, guest_f17)
+#define OFFB_F18  offsetof(VexGuestRISCV64State, guest_f18)
+#define OFFB_F19  offsetof(VexGuestRISCV64State, guest_f19)
+#define OFFB_F20  offsetof(VexGuestRISCV64State, guest_f20)
+#define OFFB_F21  offsetof(VexGuestRISCV64State, guest_f21)
+#define OFFB_F22  offsetof(VexGuestRISCV64State, guest_f22)
+#define OFFB_F23  offsetof(VexGuestRISCV64State, guest_f23)
+#define OFFB_F24  offsetof(VexGuestRISCV64State, guest_f24)
+#define OFFB_F25  offsetof(VexGuestRISCV64State, guest_f25)
+#define OFFB_F26  offsetof(VexGuestRISCV64State, guest_f26)
+#define OFFB_F27  offsetof(VexGuestRISCV64State, guest_f27)
+#define OFFB_F28  offsetof(VexGuestRISCV64State, guest_f28)
+#define OFFB_F29  offsetof(VexGuestRISCV64State, guest_f29)
+#define OFFB_F30  offsetof(VexGuestRISCV64State, guest_f30)
+#define OFFB_F31  offsetof(VexGuestRISCV64State, guest_f31)
+#define OFFB_FCSR offsetof(VexGuestRISCV64State, guest_fcsr)
+
+#define OFFB_EMNOTE  offsetof(VexGuestRISCV64State, guest_EMNOTE)
+#define OFFB_CMSTART offsetof(VexGuestRISCV64State, guest_CMSTART)
+#define OFFB_CMLEN   offsetof(VexGuestRISCV64State, guest_CMLEN)
+#define OFFB_NRADDR  offsetof(VexGuestRISCV64State, guest_NRADDR)
+
+#define OFFB_LLSC_SIZE offsetof(VexGuestRISCV64State, guest_LLSC_SIZE)
+#define OFFB_LLSC_ADDR offsetof(VexGuestRISCV64State, guest_LLSC_ADDR)
+#define OFFB_LLSC_DATA offsetof(VexGuestRISCV64State, guest_LLSC_DATA)
+
+/*------------------------------------------------------------*/
+/*--- Integer registers                                    ---*/
+/*------------------------------------------------------------*/
+
+static Int offsetIReg64(UInt iregNo)
+{
+   switch (iregNo) {
+   case 0:
+      return OFFB_X0;
+   case 1:
+      return OFFB_X1;
+   case 2:
+      return OFFB_X2;
+   case 3:
+      return OFFB_X3;
+   case 4:
+      return OFFB_X4;
+   case 5:
+      return OFFB_X5;
+   case 6:
+      return OFFB_X6;
+   case 7:
+      return OFFB_X7;
+   case 8:
+      return OFFB_X8;
+   case 9:
+      return OFFB_X9;
+   case 10:
+      return OFFB_X10;
+   case 11:
+      return OFFB_X11;
+   case 12:
+      return OFFB_X12;
+   case 13:
+      return OFFB_X13;
+   case 14:
+      return OFFB_X14;
+   case 15:
+      return OFFB_X15;
+   case 16:
+      return OFFB_X16;
+   case 17:
+      return OFFB_X17;
+   case 18:
+      return OFFB_X18;
+   case 19:
+      return OFFB_X19;
+   case 20:
+      return OFFB_X20;
+   case 21:
+      return OFFB_X21;
+   case 22:
+      return OFFB_X22;
+   case 23:
+      return OFFB_X23;
+   case 24:
+      return OFFB_X24;
+   case 25:
+      return OFFB_X25;
+   case 26:
+      return OFFB_X26;
+   case 27:
+      return OFFB_X27;
+   case 28:
+      return OFFB_X28;
+   case 29:
+      return OFFB_X29;
+   case 30:
+      return OFFB_X30;
+   case 31:
+      return OFFB_X31;
+   default:
+      vassert(0);
+   }
+}
+
+/* Obtain ABI name of a register. */
+static const HChar* nameIReg(UInt iregNo)
+{
+   vassert(iregNo < 32);
+   static const HChar* names[32] = {
+      "zero", "ra", "sp", "gp", "tp",  "t0",  "t1", "t2", "s0", "s1", "a0",
+      "a1",   "a2", "a3", "a4", "a5",  "a6",  "a7", "s2", "s3", "s4", "s5",
+      "s6",   "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"};
+   return names[iregNo];
+}
+
+/* Read a 64-bit value from a guest integer register. */
+static IRExpr* getIReg64(UInt iregNo)
+{
+   vassert(iregNo < 32);
+   return IRExpr_Get(offsetIReg64(iregNo), Ity_I64);
+}
+
+/* Write a 64-bit value into a guest integer register. */
+static void putIReg64(/*OUT*/ IRSB* irsb, UInt iregNo, /*IN*/ IRExpr* e)
+{
+   vassert(iregNo > 0 && iregNo < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
+   stmt(irsb, IRStmt_Put(offsetIReg64(iregNo), e));
+}
+
+/* Read a 32-bit value from a guest integer register. */
+static IRExpr* getIReg32(UInt iregNo)
+{
+   vassert(iregNo < 32);
+   return unop(Iop_64to32, IRExpr_Get(offsetIReg64(iregNo), Ity_I64));
+}
+
+/* Write a 32-bit value into a guest integer register. */
+static void putIReg32(/*OUT*/ IRSB* irsb, UInt iregNo, /*IN*/ IRExpr* e)
+{
+   vassert(iregNo > 0 && iregNo < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+   stmt(irsb, IRStmt_Put(offsetIReg64(iregNo), unop(Iop_32Sto64, e)));
+}
+
+/* Write an address into the guest pc. */
+static void putPC(/*OUT*/ IRSB* irsb, /*IN*/ IRExpr* e)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
+   stmt(irsb, IRStmt_Put(OFFB_PC, e));
+}
+
+/*------------------------------------------------------------*/
+/*--- Floating-point registers                             ---*/
+/*------------------------------------------------------------*/
+
+static Int offsetFReg(UInt fregNo)
+{
+   switch (fregNo) {
+   case 0:
+      return OFFB_F0;
+   case 1:
+      return OFFB_F1;
+   case 2:
+      return OFFB_F2;
+   case 3:
+      return OFFB_F3;
+   case 4:
+      return OFFB_F4;
+   case 5:
+      return OFFB_F5;
+   case 6:
+      return OFFB_F6;
+   case 7:
+      return OFFB_F7;
+   case 8:
+      return OFFB_F8;
+   case 9:
+      return OFFB_F9;
+   case 10:
+      return OFFB_F10;
+   case 11:
+      return OFFB_F11;
+   case 12:
+      return OFFB_F12;
+   case 13:
+      return OFFB_F13;
+   case 14:
+      return OFFB_F14;
+   case 15:
+      return OFFB_F15;
+   case 16:
+      return OFFB_F16;
+   case 17:
+      return OFFB_F17;
+   case 18:
+      return OFFB_F18;
+   case 19:
+      return OFFB_F19;
+   case 20:
+      return OFFB_F20;
+   case 21:
+      return OFFB_F21;
+   case 22:
+      return OFFB_F22;
+   case 23:
+      return OFFB_F23;
+   case 24:
+      return OFFB_F24;
+   case 25:
+      return OFFB_F25;
+   case 26:
+      return OFFB_F26;
+   case 27:
+      return OFFB_F27;
+   case 28:
+      return OFFB_F28;
+   case 29:
+      return OFFB_F29;
+   case 30:
+      return OFFB_F30;
+   case 31:
+      return OFFB_F31;
+   default:
+      vassert(0);
+   }
+}
+
+/* Obtain ABI name of a register. */
+static const HChar* nameFReg(UInt fregNo)
+{
+   vassert(fregNo < 32);
+   static const HChar* names[32] = {
+      "ft0", "ft1", "ft2",  "ft3",  "ft4", "ft5", "ft6",  "ft7",
+      "fs0", "fs1", "fa0",  "fa1",  "fa2", "fa3", "fa4",  "fa5",
+      "fa6", "fa7", "fs2",  "fs3",  "fs4", "fs5", "fs6",  "fs7",
+      "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11"};
+   return names[fregNo];
+}
+
+/* Read a 64-bit value from a guest floating-point register. */
+static IRExpr* getFReg64(UInt fregNo)
+{
+   vassert(fregNo < 32);
+   return IRExpr_Get(offsetFReg(fregNo), Ity_F64);
+}
+
+/* Write a 64-bit value into a guest floating-point register. */
+static void putFReg64(/*OUT*/ IRSB* irsb, UInt fregNo, /*IN*/ IRExpr* e)
+{
+   vassert(fregNo < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
+   stmt(irsb, IRStmt_Put(offsetFReg(fregNo), e));
+}
+
+/* Read a 32-bit value from a guest floating-point register. */
+static IRExpr* getFReg32(UInt fregNo)
+{
+   vassert(fregNo < 32);
+   /* Note that the following access depends on the host being little-endian
+      which is checked in disInstr_RISCV64(). */
+   /* TODO Check that the value is correctly NaN-boxed. If not then return
+      the 32-bit canonical qNaN, as mandated by the RISC-V ISA. */
+   return IRExpr_Get(offsetFReg(fregNo), Ity_F32);
+}
+
+/* Write a 32-bit value into a guest floating-point register. */
+static void putFReg32(/*OUT*/ IRSB* irsb, UInt fregNo, /*IN*/ IRExpr* e)
+{
+   vassert(fregNo < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
+   /* Note that the following access depends on the host being little-endian
+      which is checked in disInstr_RISCV64(). */
+   Int offset = offsetFReg(fregNo);
+   stmt(irsb, IRStmt_Put(offset, e));
+   /* Write 1's in the upper bits of the target 64-bit register to create
+      a NaN-boxed value, as mandated by the RISC-V ISA. */
+   stmt(irsb, IRStmt_Put(offset + 4, mkU32(0xffffffff)));
+   /* TODO Check that this works with Memcheck. */
+}
+
+/* Read a 32-bit value from the fcsr. */
+static IRExpr* getFCSR(void) { return IRExpr_Get(OFFB_FCSR, Ity_I32); }
+
+/* Write a 32-bit value into the fcsr. */
+static void putFCSR(/*OUT*/ IRSB* irsb, /*IN*/ IRExpr* e)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+   stmt(irsb, IRStmt_Put(OFFB_FCSR, e));
+}
+
+/* Accumulate exception flags in fcsr. */
+static void accumulateFFLAGS(/*OUT*/ IRSB* irsb, /*IN*/ IRExpr* e)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+   putFCSR(irsb, binop(Iop_Or32, getFCSR(), binop(Iop_And32, e, mkU32(0x1f))));
+}
+
+/* Generate IR to get hold of the rounding mode in both RISC-V and IR
+   formats. A floating-point operation can use either a static rounding mode
+   encoded in the instruction, or a dynamic rounding mode held in fcsr. Bind the
+   final result to the passed temporaries (which are allocated by the function).
+ */
+static void mk_get_rounding_mode(/*MOD*/ IRSB*   irsb,
+                                 /*OUT*/ IRTemp* rm_RISCV,
+                                 /*OUT*/ IRTemp* rm_IR,
+                                 UInt            inst_rm_RISCV)
+{
+   /*
+      rounding mode                | RISC-V |  IR
+      --------------------------------------------
+      to nearest, ties to even     |   000  | 0000
+      to zero                      |   001  | 0011
+      to +infinity                 |   010  | 0010
+      to -infinity                 |   011  | 0001
+      to nearest, ties away from 0 |   100  | 0100
+      invalid                      |   101  | 1000
+      invalid                      |   110  | 1000
+      dynamic                      |   111  | 1000
+
+      The 'dynamic' value selects the mode from fcsr. Its value is valid when
+      encoded in the instruction but naturally invalid when found in fcsr.
+
+      Static mode is known at the decode time and can be directly expressed by
+      a respective rounding mode IR constant.
+
+      Dynamic mode requires a runtime mapping from the RISC-V to the IR mode.
+      It can be implemented using the following transformation:
+         t0 = fcsr_rm_RISCV - 20
+         t1 = t0 >> 2
+         t2 = fcsr_rm_RISCV + 3
+         t3 = t2 ^ 3
+         rm_IR = t1 & t3
+   */
+   *rm_RISCV = newTemp(irsb, Ity_I32);
+   *rm_IR    = newTemp(irsb, Ity_I32);
+   switch (inst_rm_RISCV) {
+   case 0b000:
+      assign(irsb, *rm_RISCV, mkU32(0));
+      assign(irsb, *rm_IR, mkU32(Irrm_NEAREST));
+      break;
+   case 0b001:
+      assign(irsb, *rm_RISCV, mkU32(1));
+      assign(irsb, *rm_IR, mkU32(Irrm_ZERO));
+      break;
+   case 0b010:
+      assign(irsb, *rm_RISCV, mkU32(2));
+      assign(irsb, *rm_IR, mkU32(Irrm_PosINF));
+      break;
+   case 0b011:
+      assign(irsb, *rm_RISCV, mkU32(3));
+      assign(irsb, *rm_IR, mkU32(Irrm_NegINF));
+      break;
+   case 0b100:
+      assign(irsb, *rm_RISCV, mkU32(4));
+      assign(irsb, *rm_IR, mkU32(Irrm_NEAREST_TIE_AWAY_0));
+      break;
+   case 0b101:
+      assign(irsb, *rm_RISCV, mkU32(5));
+      assign(irsb, *rm_IR, mkU32(Irrm_INVALID));
+      break;
+   case 0b110:
+      assign(irsb, *rm_RISCV, mkU32(6));
+      assign(irsb, *rm_IR, mkU32(Irrm_INVALID));
+      break;
+   case 0b111: {
+      assign(irsb, *rm_RISCV,
+             binop(Iop_And32, binop(Iop_Shr32, getFCSR(), mkU8(5)), mkU32(7)));
+      IRTemp t0 = newTemp(irsb, Ity_I32);
+      assign(irsb, t0, binop(Iop_Sub32, mkexpr(*rm_RISCV), mkU32(20)));
+      IRTemp t1 = newTemp(irsb, Ity_I32);
+      assign(irsb, t1, binop(Iop_Shr32, mkexpr(t0), mkU8(2)));
+      IRTemp t2 = newTemp(irsb, Ity_I32);
+      assign(irsb, t2, binop(Iop_Add32, mkexpr(*rm_RISCV), mkU32(3)));
+      IRTemp t3 = newTemp(irsb, Ity_I32);
+      assign(irsb, t3, binop(Iop_Xor32, mkexpr(t2), mkU32(3)));
+      assign(irsb, *rm_IR, binop(Iop_And32, mkexpr(t1), mkexpr(t3)));
+      break;
+   }
+   default:
+      vassert(0);
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Name helpers                                         ---*/
+/*------------------------------------------------------------*/
+
+/* Obtain an acquire/release atomic-instruction suffix. */
+static const HChar* nameAqRlSuffix(UInt aqrl)
+{
+   switch (aqrl) {
+   case 0b00:
+      return "";
+   case 0b01:
+      return ".rl";
+   case 0b10:
+      return ".aq";
+   case 0b11:
+      return ".aqrl";
+   default:
+      vpanic("nameAqRlSuffix(riscv64)");
+   }
+}
+
+/* Obtain a control/status register name. */
+static const HChar* nameCSR(UInt csr)
+{
+   switch (csr) {
+   case 0x001:
+      return "fflags";
+   case 0x002:
+      return "frm";
+   case 0x003:
+      return "fcsr";
+   default:
+      vpanic("nameCSR(riscv64)");
+   }
+}
+
+/* Obtain a floating-point rounding-mode operand string. */
+static const HChar* nameRMOperand(UInt rm)
+{
+   switch (rm) {
+   case 0b000:
+      return ", rne";
+   case 0b001:
+      return ", rtz";
+   case 0b010:
+      return ", rdn";
+   case 0b011:
+      return ", rup";
+   case 0b100:
+      return ", rmm";
+   case 0b101:
+      return ", <invalid>";
+   case 0b110:
+      return ", <invalid>";
+   case 0b111:
+      return ""; /* dyn */
+   default:
+      vpanic("nameRMOperand(riscv64)");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Disassemble a single instruction                     ---*/
+/*------------------------------------------------------------*/
+
+/* A macro to fish bits out of 'insn' which is a local variable to all
+   disassembly functions. */
+#define INSN(_bMax, _bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+
+static Bool dis_RV64C(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn,
+                      Addr                  guest_pc_curr_instr,
+                      Bool                  sigill_diag)
+{
+   vassert(INSN(1, 0) == 0b00 || INSN(1, 0) == 0b01 || INSN(1, 0) == 0b10);
+
+   /* ---- RV64C compressed instruction set, quadrant 0 ----- */
+
+   /* ------------- c.addi4spn rd, nzuimm[9:2] -------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b000) {
+      UInt rd = INSN(4, 2) + 8;
+      UInt nzuimm9_2 =
+         INSN(10, 7) << 4 | INSN(12, 11) << 2 | INSN(5, 5) << 1 | INSN(6, 6);
+      if (nzuimm9_2 == 0) {
+         /* Invalid C.ADDI4SPN, fall through. */
+      } else {
+         ULong uimm = nzuimm9_2 << 2;
+         putIReg64(irsb, rd,
+                   binop(Iop_Add64, getIReg64(2 /*x2/sp*/), mkU64(uimm)));
+         DIP("c.addi4spn %s, %llu\n", nameIReg(rd), uimm);
+         return True;
+      }
+   }
+
+   /* -------------- c.fld rd, uimm[7:3](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b001) {
+      UInt  rd      = INSN(4, 2) + 8;
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  uimm7_3 = INSN(6, 5) << 3 | INSN(12, 10);
+      ULong uimm    = uimm7_3 << 3;
+      putFReg64(irsb, rd,
+                loadLE(Ity_F64, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm))));
+      DIP("c.fld %s, %llu(%s)\n", nameFReg(rd), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* --------------- c.lw rd, uimm[6:2](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b010) {
+      UInt  rd      = INSN(4, 2) + 8;
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  uimm6_2 = INSN(5, 5) << 4 | INSN(12, 10) << 1 | INSN(6, 6);
+      ULong uimm    = uimm6_2 << 2;
+      putIReg64(
+         irsb, rd,
+         unop(Iop_32Sto64,
+              loadLE(Ity_I32, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)))));
+      DIP("c.lw %s, %llu(%s)\n", nameIReg(rd), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* --------------- c.ld rd, uimm[7:3](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b011) {
+      UInt  rd      = INSN(4, 2) + 8;
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  uimm7_3 = INSN(6, 5) << 3 | INSN(12, 10);
+      ULong uimm    = uimm7_3 << 3;
+      putIReg64(irsb, rd,
+                loadLE(Ity_I64, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm))));
+      DIP("c.ld %s, %llu(%s)\n", nameIReg(rd), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.fsd rs2, uimm[7:3](rs1) -------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b101) {
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  rs2     = INSN(4, 2) + 8;
+      UInt  uimm7_3 = INSN(6, 5) << 3 | INSN(12, 10);
+      ULong uimm    = uimm7_3 << 3;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              getFReg64(rs2));
+      DIP("c.fsd %s, %llu(%s)\n", nameFReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.sw rs2, uimm[6:2](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b110) {
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  rs2     = INSN(4, 2) + 8;
+      UInt  uimm6_2 = INSN(5, 5) << 4 | INSN(12, 10) << 1 | INSN(6, 6);
+      ULong uimm    = uimm6_2 << 2;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              unop(Iop_64to32, getIReg64(rs2)));
+      DIP("c.sw %s, %llu(%s)\n", nameIReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.sd rs2, uimm[7:3](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b111) {
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  rs2     = INSN(4, 2) + 8;
+      UInt  uimm7_3 = INSN(6, 5) << 3 | INSN(12, 10);
+      ULong uimm    = uimm7_3 << 3;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              getIReg64(rs2));
+      DIP("c.sd %s, %llu(%s)\n", nameIReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* ---- RV64C compressed instruction set, quadrant 1 ----- */
+
+   /* ------------------------ c.nop ------------------------ */
+   if (INSN(15, 0) == 0b0000000000000001) {
+      DIP("c.nop\n");
+      return True;
+   }
+
+   /* -------------- c.addi rd_rs1, nzimm[5:0] -------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b000) {
+      UInt rd_rs1   = INSN(11, 7);
+      UInt nzimm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd_rs1 == 0 || nzimm5_0 == 0) {
+         /* Invalid C.ADDI, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(nzimm5_0, 6);
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_Add64, getIReg64(rd_rs1), mkU64(simm)));
+         DIP("c.addi %s, %lld\n", nameIReg(rd_rs1), (Long)simm);
+         return True;
+      }
+   }
+
+   /* -------------- c.addiw rd_rs1, imm[5:0] --------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b001) {
+      UInt rd_rs1 = INSN(11, 7);
+      UInt imm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd_rs1 == 0) {
+         /* Invalid C.ADDIW, fall through. */
+      } else {
+         UInt simm = (UInt)vex_sx_to_64(imm5_0, 6);
+         putIReg32(irsb, rd_rs1,
+                   binop(Iop_Add32, getIReg32(rd_rs1), mkU32(simm)));
+         DIP("c.addiw %s, %d\n", nameIReg(rd_rs1), (Int)simm);
+         return True;
+      }
+   }
+
+   /* ------------------ c.li rd, imm[5:0] ------------------ */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b010) {
+      UInt rd     = INSN(11, 7);
+      UInt imm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd == 0) {
+         /* Invalid C.LI, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(imm5_0, 6);
+         putIReg64(irsb, rd, mkU64(simm));
+         DIP("c.li %s, %lld\n", nameIReg(rd), (Long)simm);
+         return True;
+      }
+   }
+
+   /* ---------------- c.addi16sp nzimm[9:4] ---------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b011) {
+      UInt rd_rs1   = INSN(11, 7);
+      UInt nzimm9_4 = INSN(12, 12) << 5 | INSN(4, 3) << 3 | INSN(5, 5) << 2 |
+                      INSN(2, 2) << 1 | INSN(6, 6);
+      if (rd_rs1 != 2 || nzimm9_4 == 0) {
+         /* Invalid C.ADDI16SP, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(nzimm9_4 << 4, 10);
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_Add64, getIReg64(rd_rs1), mkU64(simm)));
+         DIP("c.addi16sp %lld\n", (Long)simm);
+         return True;
+      }
+   }
+
+   /* --------------- c.lui rd, nzimm[17:12] ---------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b011) {
+      UInt rd         = INSN(11, 7);
+      UInt nzimm17_12 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd == 0 || rd == 2 || nzimm17_12 == 0) {
+         /* Invalid C.LUI, fall through. */
+      } else {
+         putIReg64(irsb, rd, mkU64(vex_sx_to_64(nzimm17_12 << 12, 18)));
+         DIP("c.lui %s, 0x%x\n", nameIReg(rd), nzimm17_12);
+         return True;
+      }
+   }
+
+   /* ---------- c.{srli,srai} rd_rs1, nzuimm[5:0] ---------- */
+   if (INSN(1, 0) == 0b01 && INSN(11, 11) == 0b0 && INSN(15, 13) == 0b100) {
+      Bool is_log    = INSN(10, 10) == 0b0;
+      UInt rd_rs1    = INSN(9, 7) + 8;
+      UInt nzuimm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (nzuimm5_0 == 0) {
+         /* Invalid C.{SRLI,SRAI}, fall through. */
+      } else {
+         putIReg64(irsb, rd_rs1,
+                   binop(is_log ? Iop_Shr64 : Iop_Sar64, getIReg64(rd_rs1),
+                         mkU8(nzuimm5_0)));
+         DIP("c.%s %s, %u\n", is_log ? "srli" : "srai", nameIReg(rd_rs1),
+             nzuimm5_0);
+         return True;
+      }
+   }
+
+   /* --------------- c.andi rd_rs1, imm[5:0] --------------- */
+   if (INSN(1, 0) == 0b01 && INSN(11, 10) == 0b10 && INSN(15, 13) == 0b100) {
+      UInt rd_rs1 = INSN(9, 7) + 8;
+      UInt imm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd_rs1 == 0) {
+         /* Invalid C.ANDI, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(imm5_0, 6);
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_And64, getIReg64(rd_rs1), mkU64(simm)));
+         DIP("c.andi %s, 0x%llx\n", nameIReg(rd_rs1), simm);
+         return True;
+      }
+   }
+
+   /* ----------- c.{sub,xor,or,and} rd_rs1, rs2 ----------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 10) == 0b100011) {
+      UInt         funct2 = INSN(6, 5);
+      UInt         rd_rs1 = INSN(9, 7) + 8;
+      UInt         rs2    = INSN(4, 2) + 8;
+      const HChar* name;
+      IROp         op;
+      switch (funct2) {
+      case 0b00:
+         name = "sub";
+         op   = Iop_Sub64;
+         break;
+      case 0b01:
+         name = "xor";
+         op   = Iop_Xor64;
+         break;
+      case 0b10:
+         name = "or";
+         op   = Iop_Or64;
+         break;
+      case 0b11:
+         name = "and";
+         op   = Iop_And64;
+         break;
+      default:
+         vassert(0);
+      }
+      putIReg64(irsb, rd_rs1, binop(op, getIReg64(rd_rs1), getIReg64(rs2)));
+      DIP("c.%s %s, %s\n", name, nameIReg(rd_rs1), nameIReg(rs2));
+      return True;
+   }
+
+   /* -------------- c.{subw,addw} rd_rs1, rs2 -------------- */
+   if (INSN(1, 0) == 0b01 && INSN(6, 6) == 0b0 && INSN(15, 10) == 0b100111) {
+      Bool is_sub = INSN(5, 5) == 0b0;
+      UInt rd_rs1 = INSN(9, 7) + 8;
+      UInt rs2    = INSN(4, 2) + 8;
+      putIReg32(irsb, rd_rs1,
+                binop(is_sub ? Iop_Sub32 : Iop_Add32, getIReg32(rd_rs1),
+                      getIReg32(rs2)));
+      DIP("c.%s %s, %s\n", is_sub ? "subw" : "addw", nameIReg(rd_rs1),
+          nameIReg(rs2));
+      return True;
+   }
+
+   /* -------------------- c.j imm[11:1] -------------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b101) {
+      UInt imm11_1 = INSN(12, 12) << 10 | INSN(8, 8) << 9 | INSN(10, 9) << 7 |
+                     INSN(6, 6) << 6 | INSN(7, 7) << 5 | INSN(2, 2) << 4 |
+                     INSN(11, 11) << 3 | INSN(5, 3);
+      ULong simm   = vex_sx_to_64(imm11_1 << 1, 12);
+      ULong dst_pc = guest_pc_curr_instr + simm;
+      putPC(irsb, mkU64(dst_pc));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_Boring;
+      DIP("c.j 0x%llx\n", dst_pc);
+      return True;
+   }
+
+   /* ------------- c.{beqz,bnez} rs1, imm[8:1] ------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 14) == 0b11) {
+      Bool is_eq  = INSN(13, 13) == 0b0;
+      UInt rs1    = INSN(9, 7) + 8;
+      UInt imm8_1 = INSN(12, 12) << 7 | INSN(6, 5) << 5 | INSN(2, 2) << 4 |
+                    INSN(11, 10) << 2 | INSN(4, 3);
+      ULong simm   = vex_sx_to_64(imm8_1 << 1, 9);
+      ULong dst_pc = guest_pc_curr_instr + simm;
+      stmt(irsb, IRStmt_Exit(binop(is_eq ? Iop_CmpEQ64 : Iop_CmpNE64,
+                                   getIReg64(rs1), mkU64(0)),
+                             Ijk_Boring, IRConst_U64(dst_pc), OFFB_PC));
+      putPC(irsb, mkU64(guest_pc_curr_instr + 2));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_Boring;
+      DIP("c.%s %s, 0x%llx\n", is_eq ? "beqz" : "bnez", nameIReg(rs1), dst_pc);
+      return True;
+   }
+
+   /* ---- RV64C compressed instruction set, quadrant 2 ----- */
+
+   /* ------------- c.slli rd_rs1, nzuimm[5:0] -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b000) {
+      UInt rd_rs1    = INSN(11, 7);
+      UInt nzuimm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd_rs1 == 0 || nzuimm5_0 == 0) {
+         /* Invalid C.SLLI, fall through. */
+      } else {
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_Shl64, getIReg64(rd_rs1), mkU8(nzuimm5_0)));
+         DIP("c.slli %s, %u\n", nameIReg(rd_rs1), nzuimm5_0);
+         return True;
+      }
+   }
+
+   /* -------------- c.fldsp rd, uimm[8:3](x2) -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b001) {
+      UInt  rd      = INSN(11, 7);
+      UInt  rs1     = 2; /* base=x2/sp */
+      UInt  uimm8_3 = INSN(4, 2) << 3 | INSN(12, 12) << 2 | INSN(6, 5);
+      ULong uimm    = uimm8_3 << 3;
+      putFReg64(irsb, rd,
+                loadLE(Ity_F64, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm))));
+      DIP("c.fldsp %s, %llu(%s)\n", nameFReg(rd), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.lwsp rd, uimm[7:2](x2) --------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b010) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = 2; /* base=x2/sp */
+      UInt uimm7_2 = INSN(3, 2) << 4 | INSN(12, 12) << 3 | INSN(6, 4);
+      if (rd == 0) {
+         /* Invalid C.LWSP, fall through. */
+      } else {
+         ULong uimm = uimm7_2 << 2;
+         putIReg64(irsb, rd,
+                   unop(Iop_32Sto64,
+                        loadLE(Ity_I32,
+                               binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)))));
+         DIP("c.lwsp %s, %llu(%s)\n", nameIReg(rd), uimm, nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* -------------- c.ldsp rd, uimm[8:3](x2) --------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b011) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = 2; /* base=x2/sp */
+      UInt uimm8_3 = INSN(4, 2) << 3 | INSN(12, 12) << 2 | INSN(6, 5);
+      if (rd == 0) {
+         /* Invalid C.LDSP, fall through. */
+      } else {
+         ULong uimm = uimm8_3 << 3;
+         putIReg64(
+            irsb, rd,
+            loadLE(Ity_I64, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm))));
+         DIP("c.ldsp %s, %llu(%s)\n", nameIReg(rd), uimm, nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* ---------------------- c.jr rs1 ----------------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 12) == 0b1000) {
+      UInt rs1 = INSN(11, 7);
+      UInt rs2 = INSN(6, 2);
+      if (rs1 == 0 || rs2 != 0) {
+         /* Invalid C.JR, fall through. */
+      } else {
+         putPC(irsb, getIReg64(rs1));
+         dres->whatNext = Dis_StopHere;
+         if (rs1 == 1 /*x1/ra*/) {
+            dres->jk_StopHere = Ijk_Ret;
+            DIP("c.ret\n");
+         } else {
+            dres->jk_StopHere = Ijk_Boring;
+            DIP("c.jr %s\n", nameIReg(rs1));
+         }
+         return True;
+      }
+   }
+
+   /* -------------------- c.mv rd, rs2 --------------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 12) == 0b1000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs2 = INSN(6, 2);
+      if (rd == 0 || rs2 == 0) {
+         /* Invalid C.MV, fall through. */
+      } else {
+         putIReg64(irsb, rd, getIReg64(rs2));
+         DIP("c.mv %s, %s\n", nameIReg(rd), nameIReg(rs2));
+         return True;
+      }
+   }
+
+   /* --------------------- c.ebreak ------------------------ */
+   if (INSN(15, 0) == 0b1001000000000010) {
+      putPC(irsb, mkU64(guest_pc_curr_instr + 2));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_SigTRAP;
+      DIP("c.ebreak\n");
+      return True;
+   }
+
+   /* --------------------- c.jalr rs1 ---------------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 12) == 0b1001) {
+      UInt rs1 = INSN(11, 7);
+      UInt rs2 = INSN(6, 2);
+      if (rs1 == 0 || rs2 != 0) {
+         /* Invalid C.JALR, fall through. */
+      } else {
+         putIReg64(irsb, 1 /*x1/ra*/, mkU64(guest_pc_curr_instr + 2));
+         putPC(irsb, getIReg64(rs1));
+         dres->whatNext    = Dis_StopHere;
+         dres->jk_StopHere = Ijk_Call;
+         DIP("c.jalr %s\n", nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* ------------------ c.add rd_rs1, rs2 ------------------ */
+   if (INSN(1, 0) == 0b10 && INSN(15, 12) == 0b1001) {
+      UInt rd_rs1 = INSN(11, 7);
+      UInt rs2    = INSN(6, 2);
+      if (rd_rs1 == 0 || rs2 == 0) {
+         /* Invalid C.ADD, fall through. */
+      } else {
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_Add64, getIReg64(rd_rs1), getIReg64(rs2)));
+         DIP("c.add %s, %s\n", nameIReg(rd_rs1), nameIReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------- c.fsdsp rs2, uimm[8:3](x2) -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b101) {
+      UInt  rs1     = 2; /* base=x2/sp */
+      UInt  rs2     = INSN(6, 2);
+      UInt  uimm8_3 = INSN(9, 7) << 3 | INSN(12, 10);
+      ULong uimm    = uimm8_3 << 3;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              getFReg64(rs2));
+      DIP("c.fsdsp %s, %llu(%s)\n", nameFReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.swsp rs2, uimm[7:2](x2) -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b110) {
+      UInt  rs1     = 2; /* base=x2/sp */
+      UInt  rs2     = INSN(6, 2);
+      UInt  uimm7_2 = INSN(8, 7) << 4 | INSN(12, 9);
+      ULong uimm    = uimm7_2 << 2;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              unop(Iop_64to32, getIReg64(rs2)));
+      DIP("c.swsp %s, %llu(%s)\n", nameIReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.sdsp rs2, uimm[8:3](x2) -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b111) {
+      UInt  rs1     = 2; /* base=x2/sp */
+      UInt  rs2     = INSN(6, 2);
+      UInt  uimm8_3 = INSN(9, 7) << 3 | INSN(12, 10);
+      ULong uimm    = uimm8_3 << 3;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              getIReg64(rs2));
+      DIP("c.sdsp %s, %llu(%s)\n", nameIReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   if (sigill_diag)
+      vex_printf("RISCV64 front end: compressed\n");
+   return False;
+}
+
+static Bool dis_RV64I(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn,
+                      Addr                  guest_pc_curr_instr)
+{
+   /* ------------- RV64I base instruction set -------------- */
+
+   /* ----------------- lui rd, imm[31:12] ------------------ */
+   if (INSN(6, 0) == 0b0110111) {
+      UInt rd       = INSN(11, 7);
+      UInt imm31_12 = INSN(31, 12);
+      if (rd != 0)
+         putIReg64(irsb, rd, mkU64(vex_sx_to_64(imm31_12 << 12, 32)));
+      DIP("lui %s, 0x%x\n", nameIReg(rd), imm31_12);
+      return True;
+   }
+
+   /* ---------------- auipc rd, imm[31:12] ----------------- */
+   if (INSN(6, 0) == 0b0010111) {
+      UInt rd       = INSN(11, 7);
+      UInt imm31_12 = INSN(31, 12);
+      if (rd != 0)
+         putIReg64(
+            irsb, rd,
+            mkU64(guest_pc_curr_instr + vex_sx_to_64(imm31_12 << 12, 32)));
+      DIP("auipc %s, 0x%x\n", nameIReg(rd), imm31_12);
+      return True;
+   }
+
+   /* ------------------ jal rd, imm[20:1] ------------------ */
+   if (INSN(6, 0) == 0b1101111) {
+      UInt rd      = INSN(11, 7);
+      UInt imm20_1 = INSN(31, 31) << 19 | INSN(19, 12) << 11 |
+                     INSN(20, 20) << 10 | INSN(30, 21);
+      ULong simm   = vex_sx_to_64(imm20_1 << 1, 21);
+      ULong dst_pc = guest_pc_curr_instr + simm;
+      if (rd != 0)
+         putIReg64(irsb, rd, mkU64(guest_pc_curr_instr + 4));
+      putPC(irsb, mkU64(dst_pc));
+      dres->whatNext = Dis_StopHere;
+      if (rd != 0) {
+         dres->jk_StopHere = Ijk_Call;
+         DIP("jal %s, 0x%llx\n", nameIReg(rd), dst_pc);
+      } else {
+         dres->jk_StopHere = Ijk_Boring;
+         DIP("j 0x%llx\n", dst_pc);
+      }
+      return True;
+   }
+
+   /* --------------- jalr rd, imm[11:0](rs1) --------------- */
+   if (INSN(6, 0) == 0b1100111 && INSN(14, 12) == 0b000) {
+      UInt   rd      = INSN(11, 7);
+      UInt   rs1     = INSN(19, 15);
+      UInt   imm11_0 = INSN(31, 20);
+      ULong  simm    = vex_sx_to_64(imm11_0, 12);
+      IRTemp dst_pc  = newTemp(irsb, Ity_I64);
+      assign(irsb, dst_pc, binop(Iop_Add64, getIReg64(rs1), mkU64(simm)));
+      if (rd != 0)
+         putIReg64(irsb, rd, mkU64(guest_pc_curr_instr + 4));
+      putPC(irsb, mkexpr(dst_pc));
+      dres->whatNext = Dis_StopHere;
+      if (rd == 0) {
+         if (rs1 == 1 /*x1/ra*/ && simm == 0) {
+            dres->jk_StopHere = Ijk_Ret;
+            DIP("ret\n");
+         } else {
+            dres->jk_StopHere = Ijk_Boring;
+            DIP("jr %lld(%s)\n", (Long)simm, nameIReg(rs1));
+         }
+      } else {
+         dres->jk_StopHere = Ijk_Call;
+         DIP("jalr %s, %lld(%s)\n", nameIReg(rd), (Long)simm, nameIReg(rs1));
+      }
+      return True;
+   }
+
+   /* ------------ {beq,bne} rs1, rs2, imm[12:1] ------------ */
+   /* ------------ {blt,bge} rs1, rs2, imm[12:1] ------------ */
+   /* ----------- {bltu,bgeu} rs1, rs2, imm[12:1] ----------- */
+   if (INSN(6, 0) == 0b1100011) {
+      UInt funct3  = INSN(14, 12);
+      UInt rs1     = INSN(19, 15);
+      UInt rs2     = INSN(24, 20);
+      UInt imm12_1 = INSN(31, 31) << 11 | INSN(7, 7) << 10 | INSN(30, 25) << 4 |
+                     INSN(11, 8);
+      if (funct3 == 0b010 || funct3 == 0b011) {
+         /* Invalid B<x>, fall through. */
+      } else {
+         ULong        simm   = vex_sx_to_64(imm12_1 << 1, 13);
+         ULong        dst_pc = guest_pc_curr_instr + simm;
+         const HChar* name;
+         IRExpr*      cond;
+         switch (funct3) {
+         case 0b000:
+            name = "beq";
+            cond = binop(Iop_CmpEQ64, getIReg64(rs1), getIReg64(rs2));
+            break;
+         case 0b001:
+            name = "bne";
+            cond = binop(Iop_CmpNE64, getIReg64(rs1), getIReg64(rs2));
+            break;
+         case 0b100:
+            name = "blt";
+            cond = binop(Iop_CmpLT64S, getIReg64(rs1), getIReg64(rs2));
+            break;
+         case 0b101:
+            name = "bge";
+            cond = binop(Iop_CmpLE64S, getIReg64(rs2), getIReg64(rs1));
+            break;
+         case 0b110:
+            name = "bltu";
+            cond = binop(Iop_CmpLT64U, getIReg64(rs1), getIReg64(rs2));
+            break;
+         case 0b111:
+            name = "bgeu";
+            cond = binop(Iop_CmpLE64U, getIReg64(rs2), getIReg64(rs1));
+            break;
+         default:
+            vassert(0);
+         }
+         stmt(irsb,
+              IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(dst_pc), OFFB_PC));
+         putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+         dres->whatNext    = Dis_StopHere;
+         dres->jk_StopHere = Ijk_Boring;
+         DIP("%s %s, %s, 0x%llx\n", name, nameIReg(rs1), nameIReg(rs2), dst_pc);
+         return True;
+      }
+   }
+
+   /* ---------- {lb,lh,lw,ld} rd, imm[11:0](rs1) ----------- */
+   /* ---------- {lbu,lhu,lwu} rd, imm[11:0](rs1) ----------- */
+   if (INSN(6, 0) == 0b0000011) {
+      UInt funct3  = INSN(14, 12);
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt imm11_0 = INSN(31, 20);
+      if (funct3 == 0b111) {
+         /* Invalid L<x>, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(imm11_0, 12);
+         if (rd != 0) {
+            IRExpr* ea = binop(Iop_Add64, getIReg64(rs1), mkU64(simm));
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000:
+               expr = unop(Iop_8Sto64, loadLE(Ity_I8, ea));
+               break;
+            case 0b001:
+               expr = unop(Iop_16Sto64, loadLE(Ity_I16, ea));
+               break;
+            case 0b010:
+               expr = unop(Iop_32Sto64, loadLE(Ity_I32, ea));
+               break;
+            case 0b011:
+               expr = loadLE(Ity_I64, ea);
+               break;
+            case 0b100:
+               expr = unop(Iop_8Uto64, loadLE(Ity_I8, ea));
+               break;
+            case 0b101:
+               expr = unop(Iop_16Uto64, loadLE(Ity_I16, ea));
+               break;
+            case 0b110:
+               expr = unop(Iop_32Uto64, loadLE(Ity_I32, ea));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = "lb";
+            break;
+         case 0b001:
+            name = "lh";
+            break;
+         case 0b010:
+            name = "lw";
+            break;
+         case 0b011:
+            name = "ld";
+            break;
+         case 0b100:
+            name = "lbu";
+            break;
+         case 0b101:
+            name = "lhu";
+            break;
+         case 0b110:
+            name = "lwu";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %lld(%s)\n", name, nameIReg(rd), (Long)simm,
+             nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* ---------- {sb,sh,sw,sd} rs2, imm[11:0](rs1) ---------- */
+   if (INSN(6, 0) == 0b0100011) {
+      UInt funct3  = INSN(14, 12);
+      UInt rs1     = INSN(19, 15);
+      UInt rs2     = INSN(24, 20);
+      UInt imm11_0 = INSN(31, 25) << 5 | INSN(11, 7);
+      if (funct3 == 0b100 || funct3 == 0b101 || funct3 == 0b110 ||
+          funct3 == 0b111) {
+         /* Invalid S<x>, fall through. */
+      } else {
+         ULong        simm = vex_sx_to_64(imm11_0, 12);
+         IRExpr*      ea   = binop(Iop_Add64, getIReg64(rs1), mkU64(simm));
+         const HChar* name;
+         IRExpr*      expr;
+         switch (funct3) {
+         case 0b000:
+            name = "sb";
+            expr = unop(Iop_64to8, getIReg64(rs2));
+            break;
+         case 0b001:
+            name = "sh";
+            expr = unop(Iop_64to16, getIReg64(rs2));
+            break;
+         case 0b010:
+            name = "sw";
+            expr = unop(Iop_64to32, getIReg64(rs2));
+            break;
+         case 0b011:
+            name = "sd";
+            expr = getIReg64(rs2);
+            break;
+         default:
+            vassert(0);
+         }
+         storeLE(irsb, ea, expr);
+         DIP("%s %s, %lld(%s)\n", name, nameIReg(rs2), (Long)simm,
+             nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* -------- {addi,slti,sltiu} rd, rs1, imm[11:0] --------- */
+   /* --------- {xori,ori,andi} rd, rs1, imm[11:0] ---------- */
+   if (INSN(6, 0) == 0b0010011) {
+      UInt funct3  = INSN(14, 12);
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt imm11_0 = INSN(31, 20);
+      if (funct3 == 0b001 || funct3 == 0b101) {
+         /* Invalid <x>I, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(imm11_0, 12);
+         if (rd != 0) {
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000:
+               expr = binop(Iop_Add64, getIReg64(rs1), mkU64(simm));
+               break;
+            case 0b010:
+               expr = unop(Iop_1Uto64,
+                           binop(Iop_CmpLT64S, getIReg64(rs1), mkU64(simm)));
+               break;
+            case 0b011:
+               /* Note that the comparison itself is unsigned but the immediate
+                  is sign-extended. */
+               expr = unop(Iop_1Uto64,
+                           binop(Iop_CmpLT64U, getIReg64(rs1), mkU64(simm)));
+               break;
+            case 0b100:
+               expr = binop(Iop_Xor64, getIReg64(rs1), mkU64(simm));
+               break;
+            case 0b110:
+               expr = binop(Iop_Or64, getIReg64(rs1), mkU64(simm));
+               break;
+            case 0b111:
+               expr = binop(Iop_And64, getIReg64(rs1), mkU64(simm));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = "addi";
+            break;
+         case 0b010:
+            name = "slti";
+            break;
+         case 0b011:
+            name = "sltiu";
+            break;
+         case 0b100:
+            name = "xori";
+            break;
+         case 0b110:
+            name = "ori";
+            break;
+         case 0b111:
+            name = "andi";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %lld\n", name, nameIReg(rd), nameIReg(rs1),
+             (Long)simm);
+         return True;
+      }
+   }
+
+   /* --------------- slli rd, rs1, uimm[5:0] --------------- */
+   if (INSN(6, 0) == 0b0010011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 26) == 0b000000) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt uimm5_0 = INSN(25, 20);
+      if (rd != 0)
+         putIReg64(irsb, rd, binop(Iop_Shl64, getIReg64(rs1), mkU8(uimm5_0)));
+      DIP("slli %s, %s, %u\n", nameIReg(rd), nameIReg(rs1), uimm5_0);
+      return True;
+   }
+
+   /* ----------- {srli,srai} rd, rs1, uimm[5:0] ----------=- */
+   if (INSN(6, 0) == 0b0010011 && INSN(14, 12) == 0b101 &&
+       INSN(29, 26) == 0b0000 && INSN(31, 31) == 0b0) {
+      Bool is_log  = INSN(30, 30) == 0b0;
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt uimm5_0 = INSN(25, 20);
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   binop(is_log ? Iop_Shr64 : Iop_Sar64, getIReg64(rs1),
+                         mkU8(uimm5_0)));
+      DIP("%s %s, %s, %u\n", is_log ? "srli" : "srai", nameIReg(rd),
+          nameIReg(rs1), uimm5_0);
+      return True;
+   }
+
+   /* --------------- {add,sub} rd, rs1, rs2 ---------------- */
+   /* ------------- {sll,srl,sra} rd, rs1, rs2 -------------- */
+   /* --------------- {slt,sltu} rd, rs1, rs2 --------------- */
+   /* -------------- {xor,or,and} rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b0110011 && INSN(29, 25) == 0b00000 &&
+       INSN(31, 31) == 0b0) {
+      UInt funct3  = INSN(14, 12);
+      Bool is_base = INSN(30, 30) == 0b0;
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt rs2     = INSN(24, 20);
+      if (!is_base && funct3 != 0b000 && funct3 != 0b101) {
+         /* Invalid <x>, fall through. */
+      } else {
+         if (rd != 0) {
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000: /* sll */
+               expr = binop(is_base ? Iop_Add64 : Iop_Sub64, getIReg64(rs1),
+                            getIReg64(rs2));
+               break;
+            case 0b001:
+               expr = binop(Iop_Shl64, getIReg64(rs1),
+                            unop(Iop_64to8, getIReg64(rs2)));
+               break;
+            case 0b010:
+               expr = unop(Iop_1Uto64,
+                           binop(Iop_CmpLT64S, getIReg64(rs1), getIReg64(rs2)));
+               break;
+            case 0b011:
+               expr = unop(Iop_1Uto64,
+                           binop(Iop_CmpLT64U, getIReg64(rs1), getIReg64(rs2)));
+               break;
+            case 0b100:
+               expr = binop(Iop_Xor64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b101:
+               expr = binop(is_base ? Iop_Shr64 : Iop_Sar64, getIReg64(rs1),
+                            unop(Iop_64to8, getIReg64(rs2)));
+               break;
+            case 0b110:
+               expr = binop(Iop_Or64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b111:
+               expr = binop(Iop_And64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = is_base ? "add" : "sub";
+            break;
+         case 0b001:
+            name = "sll";
+            break;
+         case 0b010:
+            name = "slt";
+            break;
+         case 0b011:
+            name = "sltu";
+            break;
+         case 0b100:
+            name = "xor";
+            break;
+         case 0b101:
+            name = is_base ? "srl" : "sra";
+            break;
+         case 0b110:
+            name = "or";
+            break;
+         case 0b111:
+            name = "and";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %s\n", name, nameIReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------------------ fence ------------------------ */
+   if (INSN(19, 0) == 0b00000000000000001111) {
+      UInt succ = INSN(23, 20);
+      UInt pred = INSN(27, 24);
+      UInt fm   = INSN(31, 28);
+      if (fm != 0b0000 && (fm != 0b1000 || succ != 0b0011 || pred != 0b0011)) {
+         /* Invalid FENCE, fall through. */
+      } else {
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+         if (fm == 0b1000)
+            DIP("fence.tso\n");
+         else if (pred == 0b1111 && succ == 0b1111)
+            DIP("fence\n");
+         else
+            DIP("fence %s%s%s%s,%s%s%s%s\n", (pred & 0x8) ? "i" : "",
+                (pred & 0x4) ? "o" : "", (pred & 0x2) ? "r" : "",
+                (pred & 0x1) ? "w" : "", (succ & 0x8) ? "i" : "",
+                (succ & 0x4) ? "o" : "", (succ & 0x2) ? "r" : "",
+                (succ & 0x1) ? "w" : "");
+         return True;
+      }
+   }
+
+   /* ------------------------ ecall ------------------------ */
+   if (INSN(31, 0) == 0b00000000000000000000000001110011) {
+      putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_Sys_syscall;
+      DIP("ecall\n");
+      return True;
+   }
+
+   /* ------------------------ ebreak ------------------------ */
+   if (INSN(31, 0) == 0b00000000000100000000000001110011) {
+      putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_SigTRAP;
+      DIP("ebreak\n");
+      return True;
+   }
+
+   /* -------------- addiw rd, rs1, imm[11:0] --------------- */
+   if (INSN(6, 0) == 0b0011011 && INSN(14, 12) == 0b000) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt imm11_0 = INSN(31, 20);
+      UInt simm    = (UInt)vex_sx_to_64(imm11_0, 12);
+      if (rd != 0)
+         putIReg32(irsb, rd, binop(Iop_Add32, getIReg32(rs1), mkU32(simm)));
+      DIP("addiw %s, %s, %d\n", nameIReg(rd), nameIReg(rs1), (Int)simm);
+      return True;
+   }
+
+   /* -------------- slliw rd, rs1, uimm[4:0] --------------- */
+   if (INSN(6, 0) == 0b0011011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 25) == 0b0000000) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt uimm4_0 = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(irsb, rd, binop(Iop_Shl32, getIReg32(rs1), mkU8(uimm4_0)));
+      DIP("slliw %s, %s, %u\n", nameIReg(rd), nameIReg(rs1), uimm4_0);
+      return True;
+   }
+
+   /* ---------- {srliw,sraiw} rd, rs1, uimm[4:0] ----------- */
+   if (INSN(6, 0) == 0b0011011 && INSN(14, 12) == 0b101 &&
+       INSN(29, 25) == 0b00000 && INSN(31, 31) == 0b0) {
+      Bool is_log  = INSN(30, 30) == 0b0;
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt uimm4_0 = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_log ? Iop_Shr32 : Iop_Sar32, getIReg32(rs1),
+                         mkU8(uimm4_0)));
+      DIP("%s %s, %s, %u\n", is_log ? "srliw" : "sraiw", nameIReg(rd),
+          nameIReg(rs1), uimm4_0);
+      return True;
+   }
+
+   /* -------------- {addw,subw} rd, rs1, rs2 --------------- */
+   if (INSN(6, 0) == 0b0111011 && INSN(14, 12) == 0b000 &&
+       INSN(29, 25) == 0b00000 && INSN(31, 31) == 0b0) {
+      Bool is_add = INSN(30, 30) == 0b0;
+      UInt rd     = INSN(11, 7);
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_add ? Iop_Add32 : Iop_Sub32, getIReg32(rs1),
+                         getIReg32(rs2)));
+      DIP("%s %s, %s, %s\n", is_add ? "addw" : "subw", nameIReg(rd),
+          nameIReg(rs1), nameIReg(rs2));
+      return True;
+   }
+
+   /* ------------------ sllw rd, rs1, rs2 ------------------ */
+   if (INSN(6, 0) == 0b0111011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 25) == 0b0000000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(
+            irsb, rd,
+            binop(Iop_Shl32, getIReg32(rs1), unop(Iop_64to8, getIReg64(rs2))));
+      DIP("sllw %s, %s, %s\n", nameIReg(rd), nameIReg(rs1), nameIReg(rs2));
+      return True;
+   }
+
+   /* -------------- {srlw,sraw} rd, rs1, rs2 --------------- */
+   if (INSN(6, 0) == 0b0111011 && INSN(14, 12) == 0b101 &&
+       INSN(29, 25) == 0b00000 && INSN(31, 31) == 0b0) {
+      Bool is_log = INSN(30, 30) == 0b0;
+      UInt rd     = INSN(11, 7);
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_log ? Iop_Shr32 : Iop_Sar32, getIReg32(rs1),
+                         unop(Iop_64to8, getIReg64(rs2))));
+      DIP("%s %s, %s, %s\n", is_log ? "srlw" : "sraw", nameIReg(rd),
+          nameIReg(rs1), nameIReg(rs2));
+      return True;
+   }
+
+   return False;
+}
+
+static Bool dis_RV64M(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn)
+{
+   /* -------------- RV64M standard extension --------------- */
+
+   /* -------- {mul,mulh,mulhsu,mulhu} rd, rs1, rs2 --------- */
+   /* --------------- {div,divu} rd, rs1, rs2 --------------- */
+   /* --------------- {rem,remu} rd, rs1, rs2 --------------- */
+   if (INSN(6, 0) == 0b0110011 && INSN(31, 25) == 0b0000001) {
+      UInt rd     = INSN(11, 7);
+      UInt funct3 = INSN(14, 12);
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      if (funct3 == 0b010) {
+         /* Invalid {MUL,DIV,REM}<x>, fall through. */
+      } else if (funct3 == 0b010) {
+         /* MULHSU, not currently handled, fall through. */
+      } else {
+         if (rd != 0) {
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000:
+               expr = binop(Iop_Mul64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b001:
+               expr = unop(Iop_128HIto64,
+                           binop(Iop_MullS64, getIReg64(rs1), getIReg64(rs2)));
+               break;
+            case 0b011:
+               expr = unop(Iop_128HIto64,
+                           binop(Iop_MullU64, getIReg64(rs1), getIReg64(rs2)));
+               break;
+            case 0b100:
+               expr = binop(Iop_DivS64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b101:
+               expr = binop(Iop_DivU64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b110:
+               expr =
+                  unop(Iop_128HIto64, binop(Iop_DivModS64to64, getIReg64(rs1),
+                                            getIReg64(rs2)));
+               break;
+            case 0b111:
+               expr =
+                  unop(Iop_128HIto64, binop(Iop_DivModU64to64, getIReg64(rs1),
+                                            getIReg64(rs2)));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = "mul";
+            break;
+         case 0b001:
+            name = "mulh";
+            break;
+         case 0b011:
+            name = "mulhu";
+            break;
+         case 0b100:
+            name = "div";
+            break;
+         case 0b101:
+            name = "divu";
+            break;
+         case 0b110:
+            name = "rem";
+            break;
+         case 0b111:
+            name = "remu";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %s\n", name, nameIReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------------ mulw rd, rs1, rs2 ------------------ */
+   /* -------------- {divw,divuw} rd, rs1, rs2 -------------- */
+   /* -------------- {remw,remuw} rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b0111011 && INSN(31, 25) == 0b0000001) {
+      UInt rd     = INSN(11, 7);
+      UInt funct3 = INSN(14, 12);
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      if (funct3 == 0b001 || funct3 == 0b010 || funct3 == 0b011) {
+         /* Invalid {MUL,DIV,REM}<x>W, fall through. */
+      } else {
+         if (rd != 0) {
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000:
+               expr = binop(Iop_Mul32, getIReg32(rs1), getIReg32(rs2));
+               break;
+            case 0b100:
+               expr = binop(Iop_DivS32, getIReg32(rs1), getIReg32(rs2));
+               break;
+            case 0b101:
+               expr = binop(Iop_DivU32, getIReg32(rs1), getIReg32(rs2));
+               break;
+            case 0b110:
+               expr = unop(Iop_64HIto32, binop(Iop_DivModS32to32,
+                                               getIReg32(rs1), getIReg32(rs2)));
+               break;
+            case 0b111:
+               expr = unop(Iop_64HIto32, binop(Iop_DivModU32to32,
+                                               getIReg32(rs1), getIReg32(rs2)));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg32(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = "mulw";
+            break;
+         case 0b100:
+            name = "divw";
+            break;
+         case 0b101:
+            name = "divuw";
+            break;
+         case 0b110:
+            name = "remw";
+            break;
+         case 0b111:
+            name = "remuw";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %s\n", name, nameIReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+         return True;
+      }
+   }
+
+   return False;
+}
+
+static Bool dis_RV64A(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn,
+                      Addr                  guest_pc_curr_instr,
+                      const VexAbiInfo*     abiinfo)
+{
+   /* -------------- RV64A standard extension --------------- */
+
+   /* ----------------- lr.{w,d} rd, (rs1) ------------------ */
+   if (INSN(6, 0) == 0b0101111 && INSN(14, 13) == 0b01 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 27) == 0b00010) {
+      UInt rd    = INSN(11, 7);
+      Bool is_32 = INSN(12, 12) == 0b0;
+      UInt rs1   = INSN(19, 15);
+      UInt aqrl  = INSN(26, 25);
+
+      if (aqrl & 0x1)
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+      IRType ty = is_32 ? Ity_I32 : Ity_I64;
+      if (abiinfo->guest__use_fallback_LLSC) {
+         /* Get address of the load. */
+         IRTemp ea = newTemp(irsb, Ity_I64);
+         assign(irsb, ea, getIReg64(rs1));
+
+         /* Load the value. */
+         IRTemp res = newTemp(irsb, Ity_I64);
+         assign(irsb, res, widenSto64(ty, loadLE(ty, mkexpr(ea))));
+
+         /* Set up the LLSC fallback data. */
+         stmt(irsb, IRStmt_Put(OFFB_LLSC_DATA, mkexpr(res)));
+         stmt(irsb, IRStmt_Put(OFFB_LLSC_ADDR, mkexpr(ea)));
+         stmt(irsb, IRStmt_Put(OFFB_LLSC_SIZE, mkU64(4)));
+
+         /* Write the result to the destination register. */
+         if (rd != 0)
+            putIReg64(irsb, rd, mkexpr(res));
+      } else {
+         /* TODO Rework the non-fallback mode by recognizing common LR+SC
+            sequences and simulating them as one. */
+         IRTemp res = newTemp(irsb, ty);
+         stmt(irsb, IRStmt_LLSC(Iend_LE, res, getIReg64(rs1), NULL /*LL*/));
+         if (rd != 0)
+            putIReg64(irsb, rd, widenSto64(ty, mkexpr(res)));
+      }
+
+      if (aqrl & 0x2)
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+      DIP("lr.%s%s %s, (%s)%s\n", is_32 ? "w" : "d", nameAqRlSuffix(aqrl),
+          nameIReg(rd), nameIReg(rs1),
+          abiinfo->guest__use_fallback_LLSC ? " (fallback implementation)"
+                                            : "");
+      return True;
+   }
+
+   /* --------------- sc.{w,d} rd, rs2, (rs1) --------------- */
+   if (INSN(6, 0) == 0b0101111 && INSN(14, 13) == 0b01 &&
+       INSN(31, 27) == 0b00011) {
+      UInt rd    = INSN(11, 7);
+      Bool is_32 = INSN(12, 12) == 0b0;
+      UInt rs1   = INSN(19, 15);
+      UInt rs2   = INSN(24, 20);
+      UInt aqrl  = INSN(26, 25);
+
+      if (aqrl & 0x1)
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+      IRType ty = is_32 ? Ity_I32 : Ity_I64;
+      if (abiinfo->guest__use_fallback_LLSC) {
+         /* Get address of the load. */
+         IRTemp ea = newTemp(irsb, Ity_I64);
+         assign(irsb, ea, getIReg64(rs1));
+
+         /* Get the continuation address. */
+         IRConst* nia = IRConst_U64(guest_pc_curr_instr + 4);
+
+         /* Mark the SC initially as failed. */
+         if (rd != 0)
+            putIReg64(irsb, rd, mkU64(1));
+
+         /* Set that no transaction is in progress. */
+         IRTemp size = newTemp(irsb, Ity_I64);
+         assign(irsb, size, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
+         stmt(irsb,
+              IRStmt_Put(OFFB_LLSC_SIZE, mkU64(0) /* "no transaction" */));
+
+         /* Fail if no or wrong-size transaction. */
+         stmt(irsb, IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(size), mkU64(4)),
+                                Ijk_Boring, nia, OFFB_PC));
+
+         /* Fail if the address doesn't match the LL address. */
+         stmt(irsb, IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(ea),
+                                      IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
+                                Ijk_Boring, nia, OFFB_PC));
+
+         /* Fail if the data doesn't match the LL data. */
+         IRTemp data = newTemp(irsb, Ity_I64);
+         assign(irsb, data, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
+         stmt(irsb, IRStmt_Exit(binop(Iop_CmpNE64,
+                                      widenSto64(ty, loadLE(ty, mkexpr(ea))),
+                                      mkexpr(data)),
+                                Ijk_Boring, nia, OFFB_PC));
+
+         /* Try to CAS the new value in. */
+         IRTemp old  = newTemp(irsb, ty);
+         IRTemp expd = newTemp(irsb, ty);
+         assign(irsb, expd, narrowFrom64(ty, mkexpr(data)));
+         stmt(irsb, IRStmt_CAS(mkIRCAS(
+                       /*oldHi*/ IRTemp_INVALID, old, Iend_LE, mkexpr(ea),
+                       /*expdHi*/ NULL, mkexpr(expd),
+                       /*dataHi*/ NULL, narrowFrom64(ty, getIReg64(rs2)))));
+
+         /* Fail if the CAS failed (old != expd). */
+         stmt(irsb, IRStmt_Exit(binop(is_32 ? Iop_CmpNE32 : Iop_CmpNE64,
+                                      mkexpr(old), mkexpr(expd)),
+                                Ijk_Boring, nia, OFFB_PC));
+
+         /* Otherwise mark the operation as successful. */
+         if (rd != 0)
+            putIReg64(irsb, rd, mkU64(0));
+      } else {
+         IRTemp res = newTemp(irsb, Ity_I1);
+         stmt(irsb, IRStmt_LLSC(Iend_LE, res, getIReg64(rs1),
+                                narrowFrom64(ty, getIReg64(rs2))));
+         /* IR semantics: res is 1 if store succeeds, 0 if it fails. Need to set
+            rd to 1 on failure, 0 on success. */
+         if (rd != 0)
+            putIReg64(
+               irsb, rd,
+               binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), mkU64(1)));
+      }
+
+      if (aqrl & 0x2)
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+      DIP("sc.%s%s %s, %s, (%s)%s\n", is_32 ? "w" : "d", nameAqRlSuffix(aqrl),
+          nameIReg(rd), nameIReg(rs2), nameIReg(rs1),
+          abiinfo->guest__use_fallback_LLSC ? " (fallback implementation)"
+                                            : "");
+      return True;
+   }
+
+   /* --------- amo{swap,add}.{w,d} rd, rs2, (rs1) ---------- */
+   /* -------- amo{xor,and,or}.{w,d} rd, rs2, (rs1) --------- */
+   /* ---------- amo{min,max}.{w,d} rd, rs2, (rs1) ---------- */
+   /* --------- amo{minu,maxu}.{w,d} rd, rs2, (rs1) --------- */
+   if (INSN(6, 0) == 0b0101111 && INSN(14, 13) == 0b01) {
+      UInt rd     = INSN(11, 7);
+      Bool is_32  = INSN(12, 12) == 0b0;
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      UInt aqrl   = INSN(26, 25);
+      UInt funct5 = INSN(31, 27);
+      if ((funct5 & 0b00010) || funct5 == 0b00101 || funct5 == 0b01001 ||
+          funct5 == 0b01101 || funct5 == 0b10001 || funct5 == 0b10101 ||
+          funct5 == 0b11001 || funct5 == 0b11101) {
+         /* Invalid AMO<x>, fall through. */
+      } else {
+         if (aqrl & 0x1)
+            stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+         IRTemp addr = newTemp(irsb, Ity_I64);
+         assign(irsb, addr, getIReg64(rs1));
+
+         IRType ty   = is_32 ? Ity_I32 : Ity_I64;
+         IRTemp orig = newTemp(irsb, ty);
+         assign(irsb, orig, loadLE(ty, mkexpr(addr)));
+         IRExpr* lhs = mkexpr(orig);
+         IRExpr* rhs = narrowFrom64(ty, getIReg64(rs2));
+
+         /* Perform the operation. */
+         const HChar* name;
+         IRExpr*      res;
+         switch (funct5) {
+         case 0b00001:
+            name = "amoswap";
+            res  = rhs;
+            break;
+         case 0b00000:
+            name = "amoadd";
+            res  = binop(is_32 ? Iop_Add32 : Iop_Add64, lhs, rhs);
+            break;
+         case 0b00100:
+            name = "amoxor";
+            res  = binop(is_32 ? Iop_Xor32 : Iop_Xor64, lhs, rhs);
+            break;
+         case 0b01100:
+            name = "amoand";
+            res  = binop(is_32 ? Iop_And32 : Iop_And64, lhs, rhs);
+            break;
+         case 0b01000:
+            name = "amoor";
+            res  = binop(is_32 ? Iop_Or32 : Iop_Or64, lhs, rhs);
+            break;
+         case 0b10000:
+            name = "amomin";
+            res  = IRExpr_ITE(
+                binop(is_32 ? Iop_CmpLT32S : Iop_CmpLT64S, lhs, rhs), lhs, rhs);
+            break;
+         case 0b10100:
+            name = "amomax";
+            res  = IRExpr_ITE(
+                binop(is_32 ? Iop_CmpLT32S : Iop_CmpLT64S, lhs, rhs), rhs, lhs);
+            break;
+         case 0b11000:
+            name = "amominu";
+            res  = IRExpr_ITE(
+                binop(is_32 ? Iop_CmpLT32U : Iop_CmpLT64U, lhs, rhs), lhs, rhs);
+            break;
+         case 0b11100:
+            name = "amomaxu";
+            res  = IRExpr_ITE(
+                binop(is_32 ? Iop_CmpLT32U : Iop_CmpLT64U, lhs, rhs), rhs, lhs);
+            break;
+         default:
+            vassert(0);
+         }
+
+         /* Store the result back if the original value remains unchanged in
+            memory. */
+         IRTemp old = newTemp(irsb, ty);
+         stmt(irsb, IRStmt_CAS(mkIRCAS(/*oldHi*/ IRTemp_INVALID, old, Iend_LE,
+                                       mkexpr(addr),
+                                       /*expdHi*/ NULL, mkexpr(orig),
+                                       /*dataHi*/ NULL, res)));
+
+         if (aqrl & 0x2)
+            stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+         /* Retry if the CAS failed (i.e. when old != orig). */
+         stmt(irsb, IRStmt_Exit(binop(is_32 ? Iop_CasCmpNE32 : Iop_CasCmpNE64,
+                                      mkexpr(old), mkexpr(orig)),
+                                Ijk_Boring, IRConst_U64(guest_pc_curr_instr),
+                                OFFB_PC));
+         /* Otherwise we succeeded. */
+         if (rd != 0)
+            putIReg64(irsb, rd, widenSto64(ty, mkexpr(old)));
+
+         DIP("%s.%s%s %s, %s, (%s)\n", name, is_32 ? "w" : "d",
+             nameAqRlSuffix(aqrl), nameIReg(rd), nameIReg(rs2), nameIReg(rs1));
+         return True;
+      }
+   }
+
+   return False;
+}
+
+static Bool dis_RV64F(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn)
+{
+   /* -------------- RV64F standard extension --------------- */
+
+   /* --------------- flw rd, imm[11:0](rs1) ---------------- */
+   if (INSN(6, 0) == 0b0000111 && INSN(14, 12) == 0b010) {
+      UInt  rd      = INSN(11, 7);
+      UInt  rs1     = INSN(19, 15);
+      UInt  imm11_0 = INSN(31, 20);
+      ULong simm    = vex_sx_to_64(imm11_0, 12);
+      putFReg32(irsb, rd,
+                loadLE(Ity_F32, binop(Iop_Add64, getIReg64(rs1), mkU64(simm))));
+      DIP("flw %s, %lld(%s)\n", nameFReg(rd), (Long)simm, nameIReg(rs1));
+      return True;
+   }
+
+   /* --------------- fsw rs2, imm[11:0](rs1) --------------- */
+   if (INSN(6, 0) == 0b0100111 && INSN(14, 12) == 0b010) {
+      UInt  rs1     = INSN(19, 15);
+      UInt  rs2     = INSN(24, 20);
+      UInt  imm11_0 = INSN(31, 25) << 5 | INSN(11, 7);
+      ULong simm    = vex_sx_to_64(imm11_0, 12);
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(simm)),
+              getFReg32(rs2));
+      DIP("fsw %s, %lld(%s)\n", nameFReg(rs2), (Long)simm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------- f{madd,msub}.s rd, rs1, rs2, rs3, rm --------- */
+   /* ------- f{nmsub,nmadd}.s rd, rs1, rs2, rs3, rm -------- */
+   if (INSN(1, 0) == 0b11 && INSN(6, 4) == 0b100 && INSN(26, 25) == 0b00) {
+      UInt   opcode = INSN(6, 0);
+      UInt   rd     = INSN(11, 7);
+      UInt   rm     = INSN(14, 12);
+      UInt   rs1    = INSN(19, 15);
+      UInt   rs2    = INSN(24, 20);
+      UInt   rs3    = INSN(31, 27);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      const HChar* name;
+      IRTemp       a1 = newTemp(irsb, Ity_F32);
+      IRTemp       a2 = newTemp(irsb, Ity_F32);
+      IRTemp       a3 = newTemp(irsb, Ity_F32);
+      switch (opcode) {
+      case 0b1000011:
+         name = "fmadd";
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         assign(irsb, a3, getFReg32(rs3));
+         break;
+      case 0b1000111:
+         name = "fmsub";
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         assign(irsb, a3, unop(Iop_NegF32, getFReg32(rs3)));
+         break;
+      case 0b1001011:
+         name = "fnmsub";
+         assign(irsb, a1, unop(Iop_NegF32, getFReg32(rs1)));
+         assign(irsb, a2, getFReg32(rs2));
+         assign(irsb, a3, getFReg32(rs3));
+         break;
+      case 0b1001111:
+         name = "fnmadd";
+         assign(irsb, a1, unop(Iop_NegF32, getFReg32(rs1)));
+         assign(irsb, a2, getFReg32(rs2));
+         assign(irsb, a3, unop(Iop_NegF32, getFReg32(rs3)));
+         break;
+      default:
+         vassert(0);
+      }
+      putFReg32(
+         irsb, rd,
+         qop(Iop_MAddF32, mkexpr(rm_IR), mkexpr(a1), mkexpr(a2), mkexpr(a3)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fmadd_s",
+                             riscv64g_calculate_fflags_fmadd_s,
+                             mkIRExprVec_4(mkexpr(a1), mkexpr(a2), mkexpr(a3),
+                                           mkexpr(rm_RISCV))));
+      DIP("%s.s %s, %s, %s, %s%s\n", name, nameFReg(rd), nameFReg(rs1),
+          nameFReg(rs2), nameFReg(rs3), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------ f{add,sub}.s rd, rs1, rs2, rm ------------ */
+   /* ------------ f{mul,div}.s rd, rs1, rs2, rm ------------ */
+   if (INSN(6, 0) == 0b1010011 && INSN(26, 25) == 0b00 &&
+       INSN(31, 29) == 0b000) {
+      UInt   rd     = INSN(11, 7);
+      UInt   rm     = INSN(14, 12);
+      UInt   rs1    = INSN(19, 15);
+      UInt   rs2    = INSN(24, 20);
+      UInt   funct7 = INSN(31, 25);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      const HChar* name;
+      IROp         op;
+      IRTemp       a1 = newTemp(irsb, Ity_F32);
+      IRTemp       a2 = newTemp(irsb, Ity_F32);
+      const HChar* helper_name;
+      void*        helper_addr;
+      switch (funct7) {
+      case 0b0000000:
+         name = "fadd";
+         op   = Iop_AddF32;
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         helper_name = "riscv64g_calculate_fflags_fadd_s";
+         helper_addr = riscv64g_calculate_fflags_fadd_s;
+         break;
+      case 0b0000100:
+         name = "fsub";
+         op   = Iop_AddF32;
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, unop(Iop_NegF32, getFReg32(rs2)));
+         helper_name = "riscv64g_calculate_fflags_fadd_s";
+         helper_addr = riscv64g_calculate_fflags_fadd_s;
+         break;
+      case 0b0001000:
+         name = "fmul";
+         op   = Iop_MulF32;
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         helper_name = "riscv64g_calculate_fflags_fmul_s";
+         helper_addr = riscv64g_calculate_fflags_fmul_s;
+         break;
+      case 0b0001100:
+         name = "fdiv";
+         op   = Iop_DivF32;
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         helper_name = "riscv64g_calculate_fflags_fdiv_s";
+         helper_addr = riscv64g_calculate_fflags_fdiv_s;
+         break;
+      default:
+         vassert(0);
+      }
+      putFReg32(irsb, rd, triop(op, mkexpr(rm_IR), mkexpr(a1), mkexpr(a2)));
+      accumulateFFLAGS(irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                           helper_addr,
+                                           mkIRExprVec_3(mkexpr(a1), mkexpr(a2),
+                                                         mkexpr(rm_RISCV))));
+      DIP("%s.s %s, %s, %s%s\n", name, nameFReg(rd), nameFReg(rs1),
+          nameFReg(rs2), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ----------------- fsqrt.s rd, rs1, rm ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 20) == 0b00000 &&
+       INSN(31, 25) == 0b0101100) {
+      UInt   rd  = INSN(11, 7);
+      UInt   rm  = INSN(14, 12);
+      UInt   rs1 = INSN(19, 15);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F32);
+      assign(irsb, a1, getFReg32(rs1));
+      putFReg32(irsb, rd, binop(Iop_SqrtF32, mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fsqrt_s",
+                             riscv64g_calculate_fflags_fsqrt_s,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fsqrt.s %s, %s%s\n", nameFReg(rd), nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ---------------- fsgnj.s rd, rs1, rs2 ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(31, 25) == 0b0010000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg32(irsb, rd, getFReg32(rs1));
+         DIP("fmv.s %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg32(
+            irsb, rd,
+            unop(Iop_ReinterpI32asF32,
+                 binop(
+                    Iop_Or32,
+                    binop(Iop_And32, unop(Iop_ReinterpF32asI32, getFReg32(rs1)),
+                          mkU32(0x7fffffff)),
+                    binop(Iop_And32, unop(Iop_ReinterpF32asI32, getFReg32(rs2)),
+                          mkU32(0x80000000)))));
+         DIP("fsgnj.s %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* ---------------- fsgnjn.s rd, rs1, rs2 ---------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 25) == 0b0010000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg32(irsb, rd, unop(Iop_NegF32, getFReg32(rs1)));
+         DIP("fneg.s %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg32(irsb, rd,
+                   unop(Iop_ReinterpI32asF32,
+                        binop(Iop_Or32,
+                              binop(Iop_And32,
+                                    unop(Iop_ReinterpF32asI32, getFReg32(rs1)),
+                                    mkU32(0x7fffffff)),
+                              binop(Iop_And32,
+                                    unop(Iop_ReinterpF32asI32,
+                                         unop(Iop_NegF32, getFReg32(rs2))),
+                                    mkU32(0x80000000)))));
+         DIP("fsgnjn.s %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* ---------------- fsgnjx.s rd, rs1, rs2 ---------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b010 &&
+       INSN(31, 25) == 0b0010000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg32(irsb, rd, unop(Iop_AbsF32, getFReg32(rs1)));
+         DIP("fabs.s %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg32(
+            irsb, rd,
+            unop(Iop_ReinterpI32asF32,
+                 binop(Iop_Xor32, unop(Iop_ReinterpF32asI32, getFReg32(rs1)),
+                       binop(Iop_And32,
+                             unop(Iop_ReinterpF32asI32, getFReg32(rs2)),
+                             mkU32(0x80000000)))));
+         DIP("fsgnjx.s %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* -------------- f{min,max}.s rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(31, 25) == 0b0010100) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rm != 0b000 && rm != 0b001) {
+         /* Invalid F{MIN,MAX}.S, fall through. */
+      } else {
+         const HChar* name;
+         IROp         op;
+         const HChar* helper_name;
+         void*        helper_addr;
+         switch (rm) {
+         case 0b000:
+            name        = "fmin";
+            op          = Iop_MinNumF32;
+            helper_name = "riscv64g_calculate_fflags_fmin_s";
+            helper_addr = riscv64g_calculate_fflags_fmin_s;
+            break;
+         case 0b001:
+            name        = "fmax";
+            op          = Iop_MaxNumF32;
+            helper_name = "riscv64g_calculate_fflags_fmax_s";
+            helper_addr = riscv64g_calculate_fflags_fmax_s;
+            break;
+         default:
+            vassert(0);
+         }
+         IRTemp a1 = newTemp(irsb, Ity_F32);
+         IRTemp a2 = newTemp(irsb, Ity_F32);
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         putFReg32(irsb, rd, binop(op, mkexpr(a1), mkexpr(a2)));
+         accumulateFFLAGS(irsb,
+                          mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                        helper_addr,
+                                        mkIRExprVec_2(mkexpr(a1), mkexpr(a2))));
+         DIP("%s.s %s, %s, %s\n", name, nameFReg(rd), nameFReg(rs1),
+             nameFReg(rs2));
+         return True;
+      }
+   }
+
+   /* -------------- fcvt.{w,wu}.s rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0000 &&
+       INSN(31, 25) == 0b1100000) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F32);
+      assign(irsb, a1, getFReg32(rs1));
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_signed ? Iop_F32toI32S : Iop_F32toI32U,
+                         mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_w_s"
+                                       : "riscv64g_calculate_fflags_fcvt_wu_s",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_w_s
+                                       : riscv64g_calculate_fflags_fcvt_wu_s,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.w%s.s %s, %s%s\n", is_signed ? "" : "u", nameIReg(rd),
+          nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------------- fmv.x.w rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1110000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      if (rd != 0)
+         putIReg32(irsb, rd, unop(Iop_ReinterpF32asI32, getFReg32(rs1)));
+      DIP("fmv.x.w %s, %s\n", nameIReg(rd), nameFReg(rs1));
+      return True;
+   }
+
+   /* ------------- f{eq,lt,le}.s rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(31, 25) == 0b1010000) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rm != 0b010 && rm != 0b001 && rm != 0b000) {
+         /* Invalid F{EQ,LT,LE}.S, fall through. */
+      } else {
+         IRTemp a1 = newTemp(irsb, Ity_F32);
+         IRTemp a2 = newTemp(irsb, Ity_F32);
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         if (rd != 0) {
+            IRTemp cmp = newTemp(irsb, Ity_I32);
+            assign(irsb, cmp, binop(Iop_CmpF32, mkexpr(a1), mkexpr(a2)));
+            IRTemp res = newTemp(irsb, Ity_I1);
+            switch (rm) {
+            case 0b010:
+               assign(irsb, res,
+                      binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_EQ)));
+               break;
+            case 0b001:
+               assign(irsb, res,
+                      binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_LT)));
+               break;
+            case 0b000:
+               assign(irsb, res,
+                      binop(Iop_Or1,
+                            binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_LT)),
+                            binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_EQ))));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, unop(Iop_1Uto64, mkexpr(res)));
+         }
+         const HChar* name;
+         const HChar* helper_name;
+         void*        helper_addr;
+         switch (rm) {
+         case 0b010:
+            name        = "feq";
+            helper_name = "riscv64g_calculate_fflags_feq_s";
+            helper_addr = riscv64g_calculate_fflags_feq_s;
+            break;
+         case 0b001:
+            name        = "flt";
+            helper_name = "riscv64g_calculate_fflags_flt_s";
+            helper_addr = riscv64g_calculate_fflags_flt_s;
+            break;
+         case 0b000:
+            name        = "fle";
+            helper_name = "riscv64g_calculate_fflags_fle_s";
+            helper_addr = riscv64g_calculate_fflags_fle_s;
+            break;
+         default:
+            vassert(0);
+         }
+         accumulateFFLAGS(irsb,
+                          mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                        helper_addr,
+                                        mkIRExprVec_2(mkexpr(a1), mkexpr(a2))));
+         DIP("%s.s %s, %s, %s\n", name, nameIReg(rd), nameFReg(rs1),
+             nameFReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------------ fclass.s rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b001 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1110000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   mkIRExprCCall(Ity_I64, 0 /*regparms*/,
+                                 "riscv64g_calculate_fclass_s",
+                                 riscv64g_calculate_fclass_s,
+                                 mkIRExprVec_1(getFReg32(rs1))));
+      DIP("fclass.s %s, %s\n", nameIReg(rd), nameFReg(rs1));
+      return True;
+   }
+
+   /* ------------------- fmv.w.x rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1111000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      putFReg32(irsb, rd, unop(Iop_ReinterpI32asF32, getIReg32(rs1)));
+      DIP("fmv.w.x %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- fcvt.s.{w,wu} rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0000 &&
+       INSN(31, 25) == 0b1101000) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_I32);
+      assign(irsb, a1, getIReg32(rs1));
+      putFReg32(irsb, rd,
+                binop(is_signed ? Iop_I32StoF32 : Iop_I32UtoF32, mkexpr(rm_IR),
+                      mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_s_w"
+                                       : "riscv64g_calculate_fflags_fcvt_s_wu",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_s_w
+                                       : riscv64g_calculate_fflags_fcvt_s_wu,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.s.w%s %s, %s%s\n", is_signed ? "" : "u", nameFReg(rd),
+          nameIReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* -------------- fcvt.{l,lu}.s rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0001 &&
+       INSN(31, 25) == 0b1100000) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F32);
+      assign(irsb, a1, getFReg32(rs1));
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   binop(is_signed ? Iop_F32toI64S : Iop_F32toI64U,
+                         mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_l_s"
+                                       : "riscv64g_calculate_fflags_fcvt_lu_s",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_l_s
+                                       : riscv64g_calculate_fflags_fcvt_lu_s,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.l%s.s %s, %s%s\n", is_signed ? "" : "u", nameIReg(rd),
+          nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* -------------- fcvt.s.{l,lu} rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0001 &&
+       INSN(31, 25) == 0b1101000) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_I64);
+      assign(irsb, a1, getIReg64(rs1));
+      putFReg32(irsb, rd,
+                binop(is_signed ? Iop_I64StoF32 : Iop_I64UtoF32, mkexpr(rm_IR),
+                      mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_s_l"
+                                       : "riscv64g_calculate_fflags_fcvt_s_lu",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_s_l
+                                       : riscv64g_calculate_fflags_fcvt_s_lu,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.s.l%s %s, %s%s\n", is_signed ? "" : "u", nameFReg(rd),
+          nameIReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   return False;
+}
+
+static Bool dis_RV64D(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn)
+{
+   /* -------------- RV64D standard extension --------------- */
+
+   /* --------------- fld rd, imm[11:0](rs1) ---------------- */
+   if (INSN(6, 0) == 0b0000111 && INSN(14, 12) == 0b011) {
+      UInt  rd      = INSN(11, 7);
+      UInt  rs1     = INSN(19, 15);
+      UInt  imm11_0 = INSN(31, 20);
+      ULong simm    = vex_sx_to_64(imm11_0, 12);
+      putFReg64(irsb, rd,
+                loadLE(Ity_F64, binop(Iop_Add64, getIReg64(rs1), mkU64(simm))));
+      DIP("fld %s, %lld(%s)\n", nameFReg(rd), (Long)simm, nameIReg(rs1));
+      return True;
+   }
+
+   /* --------------- fsd rs2, imm[11:0](rs1) --------------- */
+   if (INSN(6, 0) == 0b0100111 && INSN(14, 12) == 0b011) {
+      UInt  rs1     = INSN(19, 15);
+      UInt  rs2     = INSN(24, 20);
+      UInt  imm11_0 = INSN(31, 25) << 5 | INSN(11, 7);
+      ULong simm    = vex_sx_to_64(imm11_0, 12);
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(simm)),
+              getFReg64(rs2));
+      DIP("fsd %s, %lld(%s)\n", nameFReg(rs2), (Long)simm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------- f{madd,msub}.d rd, rs1, rs2, rs3, rm --------- */
+   /* ------- f{nmsub,nmadd}.d rd, rs1, rs2, rs3, rm -------- */
+   if (INSN(1, 0) == 0b11 && INSN(6, 4) == 0b100 && INSN(26, 25) == 0b01) {
+      UInt   opcode = INSN(6, 0);
+      UInt   rd     = INSN(11, 7);
+      UInt   rm     = INSN(14, 12);
+      UInt   rs1    = INSN(19, 15);
+      UInt   rs2    = INSN(24, 20);
+      UInt   rs3    = INSN(31, 27);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      const HChar* name;
+      IRTemp       a1 = newTemp(irsb, Ity_F64);
+      IRTemp       a2 = newTemp(irsb, Ity_F64);
+      IRTemp       a3 = newTemp(irsb, Ity_F64);
+      switch (opcode) {
+      case 0b1000011:
+         name = "fmadd";
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         assign(irsb, a3, getFReg64(rs3));
+         break;
+      case 0b1000111:
+         name = "fmsub";
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         assign(irsb, a3, unop(Iop_NegF64, getFReg64(rs3)));
+         break;
+      case 0b1001011:
+         name = "fnmsub";
+         assign(irsb, a1, unop(Iop_NegF64, getFReg64(rs1)));
+         assign(irsb, a2, getFReg64(rs2));
+         assign(irsb, a3, getFReg64(rs3));
+         break;
+      case 0b1001111:
+         name = "fnmadd";
+         assign(irsb, a1, unop(Iop_NegF64, getFReg64(rs1)));
+         assign(irsb, a2, getFReg64(rs2));
+         assign(irsb, a3, unop(Iop_NegF64, getFReg64(rs3)));
+         break;
+      default:
+         vassert(0);
+      }
+      putFReg64(
+         irsb, rd,
+         qop(Iop_MAddF64, mkexpr(rm_IR), mkexpr(a1), mkexpr(a2), mkexpr(a3)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fmadd_d",
+                             riscv64g_calculate_fflags_fmadd_d,
+                             mkIRExprVec_4(mkexpr(a1), mkexpr(a2), mkexpr(a3),
+                                           mkexpr(rm_RISCV))));
+      DIP("%s.d %s, %s, %s, %s%s\n", name, nameFReg(rd), nameFReg(rs1),
+          nameFReg(rs2), nameFReg(rs3), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------ f{add,sub}.d rd, rs1, rs2, rm ------------ */
+   /* ------------ f{mul,div}.d rd, rs1, rs2, rm ------------ */
+   if (INSN(6, 0) == 0b1010011 && INSN(26, 25) == 0b01 &&
+       INSN(31, 29) == 0b000) {
+      UInt   rd     = INSN(11, 7);
+      UInt   rm     = INSN(14, 12);
+      UInt   rs1    = INSN(19, 15);
+      UInt   rs2    = INSN(24, 20);
+      UInt   funct7 = INSN(31, 25);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      const HChar* name;
+      IROp         op;
+      IRTemp       a1 = newTemp(irsb, Ity_F64);
+      IRTemp       a2 = newTemp(irsb, Ity_F64);
+      const HChar* helper_name;
+      void*        helper_addr;
+      switch (funct7) {
+      case 0b0000001:
+         name = "fadd";
+         op   = Iop_AddF64;
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         helper_name = "riscv64g_calculate_fflags_fadd_d";
+         helper_addr = riscv64g_calculate_fflags_fadd_d;
+         break;
+      case 0b0000101:
+         name = "fsub";
+         op   = Iop_AddF64;
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, unop(Iop_NegF64, getFReg64(rs2)));
+         helper_name = "riscv64g_calculate_fflags_fadd_d";
+         helper_addr = riscv64g_calculate_fflags_fadd_d;
+         break;
+      case 0b0001001:
+         name = "fmul";
+         op   = Iop_MulF64;
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         helper_name = "riscv64g_calculate_fflags_fmul_d";
+         helper_addr = riscv64g_calculate_fflags_fmul_d;
+         break;
+      case 0b0001101:
+         name = "fdiv";
+         op   = Iop_DivF64;
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         helper_name = "riscv64g_calculate_fflags_fdiv_d";
+         helper_addr = riscv64g_calculate_fflags_fdiv_d;
+         break;
+      default:
+         vassert(0);
+      }
+      putFReg64(irsb, rd, triop(op, mkexpr(rm_IR), mkexpr(a1), mkexpr(a2)));
+      accumulateFFLAGS(irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                           helper_addr,
+                                           mkIRExprVec_3(mkexpr(a1), mkexpr(a2),
+                                                         mkexpr(rm_RISCV))));
+      DIP("%s.d %s, %s, %s%s\n", name, nameFReg(rd), nameFReg(rs1),
+          nameFReg(rs2), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ----------------- fsqrt.d rd, rs1, rm ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 20) == 0b00000 &&
+       INSN(31, 25) == 0b0101101) {
+      UInt   rd  = INSN(11, 7);
+      UInt   rm  = INSN(14, 12);
+      UInt   rs1 = INSN(19, 15);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F64);
+      assign(irsb, a1, getFReg64(rs1));
+      putFReg64(irsb, rd, binop(Iop_SqrtF64, mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fsqrt_d",
+                             riscv64g_calculate_fflags_fsqrt_d,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fsqrt.d %s, %s%s\n", nameFReg(rd), nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ---------------- fsgnj.d rd, rs1, rs2 ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(31, 25) == 0b0010001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg64(irsb, rd, getFReg64(rs1));
+         DIP("fmv.d %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg64(
+            irsb, rd,
+            unop(Iop_ReinterpI64asF64,
+                 binop(
+                    Iop_Or64,
+                    binop(Iop_And64, unop(Iop_ReinterpF64asI64, getFReg64(rs1)),
+                          mkU64(0x7fffffffffffffff)),
+                    binop(Iop_And64, unop(Iop_ReinterpF64asI64, getFReg64(rs2)),
+                          mkU64(0x8000000000000000)))));
+         DIP("fsgnj.d %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* ---------------- fsgnjn.d rd, rs1, rs2 ---------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 25) == 0b0010001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg64(irsb, rd, unop(Iop_NegF64, getFReg64(rs1)));
+         DIP("fneg.d %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg64(irsb, rd,
+                   unop(Iop_ReinterpI64asF64,
+                        binop(Iop_Or64,
+                              binop(Iop_And64,
+                                    unop(Iop_ReinterpF64asI64, getFReg64(rs1)),
+                                    mkU64(0x7fffffffffffffff)),
+                              binop(Iop_And64,
+                                    unop(Iop_ReinterpF64asI64,
+                                         unop(Iop_NegF64, getFReg64(rs2))),
+                                    mkU64(0x8000000000000000)))));
+         DIP("fsgnjn.d %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* ---------------- fsgnjx.d rd, rs1, rs2 ---------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b010 &&
+       INSN(31, 25) == 0b0010001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg64(irsb, rd, unop(Iop_AbsF64, getFReg64(rs1)));
+         DIP("fabs.d %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg64(
+            irsb, rd,
+            unop(Iop_ReinterpI64asF64,
+                 binop(Iop_Xor64, unop(Iop_ReinterpF64asI64, getFReg64(rs1)),
+                       binop(Iop_And64,
+                             unop(Iop_ReinterpF64asI64, getFReg64(rs2)),
+                             mkU64(0x8000000000000000)))));
+         DIP("fsgnjx.d %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* -------------- f{min,max}.d rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(31, 25) == 0b0010101) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rm != 0b000 && rm != 0b001) {
+         /* Invalid F{MIN,MAX}.D, fall through. */
+      } else {
+         const HChar* name;
+         IROp         op;
+         const HChar* helper_name;
+         void*        helper_addr;
+         switch (rm) {
+         case 0b000:
+            name        = "fmin";
+            op          = Iop_MinNumF64;
+            helper_name = "riscv64g_calculate_fflags_fmin_d";
+            helper_addr = riscv64g_calculate_fflags_fmin_d;
+            break;
+         case 0b001:
+            name        = "fmax";
+            op          = Iop_MaxNumF64;
+            helper_name = "riscv64g_calculate_fflags_fmax_d";
+            helper_addr = riscv64g_calculate_fflags_fmax_d;
+            break;
+         default:
+            vassert(0);
+         }
+         IRTemp a1 = newTemp(irsb, Ity_F64);
+         IRTemp a2 = newTemp(irsb, Ity_F64);
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         putFReg64(irsb, rd, binop(op, mkexpr(a1), mkexpr(a2)));
+         accumulateFFLAGS(irsb,
+                          mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                        helper_addr,
+                                        mkIRExprVec_2(mkexpr(a1), mkexpr(a2))));
+         DIP("%s.d %s, %s, %s\n", name, nameFReg(rd), nameFReg(rs1),
+             nameFReg(rs2));
+         return True;
+      }
+   }
+
+   /* ---------------- fcvt.s.d rd, rs1, rm ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 20) == 0b00001 &&
+       INSN(31, 25) == 0b0100000) {
+      UInt   rd  = INSN(11, 7);
+      UInt   rm  = INSN(14, 12);
+      UInt   rs1 = INSN(19, 15);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F64);
+      assign(irsb, a1, getFReg64(rs1));
+      putFReg32(irsb, rd, binop(Iop_F64toF32, mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fcvt_s_d",
+                             riscv64g_calculate_fflags_fcvt_s_d,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.s.d %s, %s%s\n", nameFReg(rd), nameFReg(rs1),
+          nameRMOperand(rm));
+      return True;
+   }
+
+   /* ---------------- fcvt.d.s rd, rs1, rm ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 20) == 0b00000 &&
+       INSN(31, 25) == 0b0100001) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12); /* Ignored as the result is always exact. */
+      UInt rs1 = INSN(19, 15);
+      putFReg64(irsb, rd, unop(Iop_F32toF64, getFReg32(rs1)));
+      DIP("fcvt.d.s %s, %s%s\n", nameFReg(rd), nameFReg(rs1),
+          nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------- f{eq,lt,le}.d rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(31, 25) == 0b1010001) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rm != 0b010 && rm != 0b001 && rm != 0b000) {
+         /* Invalid F{EQ,LT,LE}.D, fall through. */
+      } else {
+         IRTemp a1 = newTemp(irsb, Ity_F64);
+         IRTemp a2 = newTemp(irsb, Ity_F64);
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         if (rd != 0) {
+            IRTemp cmp = newTemp(irsb, Ity_I32);
+            assign(irsb, cmp, binop(Iop_CmpF64, mkexpr(a1), mkexpr(a2)));
+            IRTemp res = newTemp(irsb, Ity_I1);
+            switch (rm) {
+            case 0b010:
+               assign(irsb, res,
+                      binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_EQ)));
+               break;
+            case 0b001:
+               assign(irsb, res,
+                      binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_LT)));
+               break;
+            case 0b000:
+               assign(irsb, res,
+                      binop(Iop_Or1,
+                            binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_LT)),
+                            binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_EQ))));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, unop(Iop_1Uto64, mkexpr(res)));
+         }
+         const HChar* name;
+         const HChar* helper_name;
+         void*        helper_addr;
+         switch (rm) {
+         case 0b010:
+            name        = "feq";
+            helper_name = "riscv64g_calculate_fflags_feq_d";
+            helper_addr = riscv64g_calculate_fflags_feq_d;
+            break;
+         case 0b001:
+            name        = "flt";
+            helper_name = "riscv64g_calculate_fflags_flt_d";
+            helper_addr = riscv64g_calculate_fflags_flt_d;
+            break;
+         case 0b000:
+            name        = "fle";
+            helper_name = "riscv64g_calculate_fflags_fle_d";
+            helper_addr = riscv64g_calculate_fflags_fle_d;
+            break;
+         default:
+            vassert(0);
+         }
+         accumulateFFLAGS(irsb,
+                          mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                        helper_addr,
+                                        mkIRExprVec_2(mkexpr(a1), mkexpr(a2))));
+         DIP("%s.d %s, %s, %s\n", name, nameIReg(rd), nameFReg(rs1),
+             nameFReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------------ fclass.d rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b001 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1110001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   mkIRExprCCall(Ity_I64, 0 /*regparms*/,
+                                 "riscv64g_calculate_fclass_d",
+                                 riscv64g_calculate_fclass_d,
+                                 mkIRExprVec_1(getFReg64(rs1))));
+      DIP("fclass.d %s, %s\n", nameIReg(rd), nameFReg(rs1));
+      return True;
+   }
+
+   /* -------------- fcvt.{w,wu}.d rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0000 &&
+       INSN(31, 25) == 0b1100001) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F64);
+      assign(irsb, a1, getFReg64(rs1));
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_signed ? Iop_F64toI32S : Iop_F64toI32U,
+                         mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_w_d"
+                                       : "riscv64g_calculate_fflags_fcvt_wu_d",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_w_d
+                                       : riscv64g_calculate_fflags_fcvt_wu_d,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.w%s.d %s, %s%s\n", is_signed ? "" : "u", nameIReg(rd),
+          nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* -------------- fcvt.d.{w,wu} rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0000 &&
+       INSN(31, 25) == 0b1101001) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12); /* Ignored as the result is always exact. */
+      UInt rs1 = INSN(19, 15);
+      Bool is_signed = INSN(20, 20) == 0b0;
+      putFReg64(
+         irsb, rd,
+         unop(is_signed ? Iop_I32StoF64 : Iop_I32UtoF64, getIReg32(rs1)));
+      DIP("fcvt.d.w%s %s, %s%s\n", is_signed ? "" : "u", nameFReg(rd),
+          nameIReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* -------------- fcvt.{l,lu}.d rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0001 &&
+       INSN(31, 25) == 0b1100001) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F64);
+      assign(irsb, a1, getFReg64(rs1));
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   binop(is_signed ? Iop_F64toI64S : Iop_F64toI64U,
+                         mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_l_d"
+                                       : "riscv64g_calculate_fflags_fcvt_lu_d",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_l_d
+                                       : riscv64g_calculate_fflags_fcvt_lu_d,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.l%s.d %s, %s%s\n", is_signed ? "" : "u", nameIReg(rd),
+          nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------------- fmv.x.d rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1110001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      if (rd != 0)
+         putIReg64(irsb, rd, unop(Iop_ReinterpF64asI64, getFReg64(rs1)));
+      DIP("fmv.x.d %s, %s\n", nameIReg(rd), nameFReg(rs1));
+      return True;
+   }
+
+   /* -------------- fcvt.d.{l,lu} rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0001 &&
+       INSN(31, 25) == 0b1101001) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_I64);
+      assign(irsb, a1, getIReg64(rs1));
+      putFReg64(irsb, rd,
+                binop(is_signed ? Iop_I64StoF64 : Iop_I64UtoF64, mkexpr(rm_IR),
+                      mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_d_l"
+                                       : "riscv64g_calculate_fflags_fcvt_d_lu",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_d_l
+                                       : riscv64g_calculate_fflags_fcvt_d_lu,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.d.l%s %s, %s%s\n", is_signed ? "" : "u", nameFReg(rd),
+          nameIReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------------- fmv.d.x rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1111001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      putFReg64(irsb, rd, unop(Iop_ReinterpI64asF64, getIReg64(rs1)));
+      DIP("fmv.d.x %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      return True;
+   }
+
+   return False;
+}
+
+static Bool dis_RV64Zicsr(/*MB_OUT*/ DisResult* dres,
+                          /*OUT*/ IRSB*         irsb,
+                          UInt                  insn)
+{
+   /* ------------ RV64Zicsr standard extension ------------- */
+
+   /* -------------- csrr{w,s,c} rd, csr, rs1 --------------- */
+   if (INSN(6, 0) == 0b1110011) {
+      UInt rd     = INSN(11, 7);
+      UInt funct3 = INSN(14, 12);
+      UInt rs1    = INSN(19, 15);
+      UInt csr    = INSN(31, 20);
+      if ((funct3 != 0b001 && funct3 != 0b010 && funct3 != 0b011) ||
+          (csr != 0x001 && csr != 0x002 && csr != 0x003)) {
+         /* Invalid CSRR{W,S,C}, fall through. */
+      } else {
+         switch (csr) {
+         case 0x001: {
+            /* fflags */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(irsb, rd,
+                         unop(Iop_32Uto64,
+                              binop(Iop_And32, mkexpr(fcsr), mkU32(0x1f))));
+
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b001:
+               expr = binop(Iop_Or32,
+                            binop(Iop_And32, mkexpr(fcsr), mkU32(0xffffffe0)),
+                            binop(Iop_And32, getIReg32(rs1), mkU32(0x1f)));
+               break;
+            case 0b010:
+               expr = binop(Iop_Or32, mkexpr(fcsr),
+                            binop(Iop_And32, getIReg32(rs1), mkU32(0x1f)));
+               break;
+            case 0b011:
+               expr = binop(Iop_And32, mkexpr(fcsr),
+                            unop(Iop_Not32, binop(Iop_And32, getIReg32(rs1),
+                                                  mkU32(0x1f))));
+               break;
+            default:
+               vassert(0);
+            }
+            putFCSR(irsb, expr);
+            break;
+         }
+         case 0x002: {
+            /* frm */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(
+                  irsb, rd,
+                  unop(Iop_32Uto64,
+                       binop(Iop_And32, binop(Iop_Shr32, mkexpr(fcsr), mkU8(5)),
+                             mkU32(0x7))));
+
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b001:
+               expr = binop(
+                  Iop_Or32, binop(Iop_And32, mkexpr(fcsr), mkU32(0xffffff1f)),
+                  binop(Iop_Shl32, binop(Iop_And32, getIReg32(rs1), mkU32(0x7)),
+                        mkU8(5)));
+               break;
+            case 0b010:
+               expr = binop(Iop_Or32, mkexpr(fcsr),
+                            binop(Iop_Shl32,
+                                  binop(Iop_And32, getIReg32(rs1), mkU32(0x7)),
+                                  mkU8(5)));
+               break;
+            case 0b011:
+               expr =
+                  binop(Iop_And32, mkexpr(fcsr),
+                        unop(Iop_Not32,
+                             binop(Iop_Shl32,
+                                   binop(Iop_And32, getIReg32(rs1), mkU32(0x7)),
+                                   mkU8(5))));
+               break;
+            default:
+               vassert(0);
+            }
+            putFCSR(irsb, expr);
+            break;
+         }
+         case 0x003: {
+            /* fcsr */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(irsb, rd, unop(Iop_32Uto64, mkexpr(fcsr)));
+
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b001:
+               expr = binop(Iop_And32, getIReg32(rs1), mkU32(0xff));
+               break;
+            case 0b010:
+               expr = binop(Iop_Or32, mkexpr(fcsr),
+                            binop(Iop_And32, getIReg32(rs1), mkU32(0xff)));
+               break;
+            case 0b011:
+               expr = binop(Iop_And32, mkexpr(fcsr),
+                            unop(Iop_Not32, binop(Iop_And32, getIReg32(rs1),
+                                                  mkU32(0xff))));
+               break;
+            default:
+               vassert(0);
+            }
+            putFCSR(irsb, expr);
+            break;
+         }
+         default:
+            vassert(0);
+         }
+
+         const HChar* name;
+         switch (funct3) {
+         case 0b001:
+            name = "csrrw";
+            break;
+         case 0b010:
+            name = "csrrs";
+            break;
+         case 0b011:
+            name = "csrrc";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %s\n", name, nameIReg(rd), nameCSR(csr),
+             nameIReg(rs1));
+         return True;
+      }
+   }
+
+   return False;
+}
+
+static Bool dis_RISCV64_standard(/*MB_OUT*/ DisResult* dres,
+                                 /*OUT*/ IRSB*         irsb,
+                                 UInt                  insn,
+                                 Addr                  guest_pc_curr_instr,
+                                 const VexAbiInfo*     abiinfo,
+                                 Bool                  sigill_diag)
+{
+   vassert(INSN(1, 0) == 0b11);
+
+   Bool ok = False;
+   if (!ok)
+      ok = dis_RV64I(dres, irsb, insn, guest_pc_curr_instr);
+   if (!ok)
+      ok = dis_RV64M(dres, irsb, insn);
+   if (!ok)
+      ok = dis_RV64A(dres, irsb, insn, guest_pc_curr_instr, abiinfo);
+   if (!ok)
+      ok = dis_RV64F(dres, irsb, insn);
+   if (!ok)
+      ok = dis_RV64D(dres, irsb, insn);
+   if (!ok)
+      ok = dis_RV64Zicsr(dres, irsb, insn);
+   if (ok)
+      return True;
+
+   if (sigill_diag)
+      vex_printf("RISCV64 front end: standard\n");
+   return False;
+}
+
+/* Disassemble a single riscv64 instruction into IR. Returns True iff the
+   instruction was decoded, in which case *dres will be set accordingly, or
+   False, in which case *dres should be ignored by the caller. */
+static Bool disInstr_RISCV64_WRK(/*MB_OUT*/ DisResult* dres,
+                                 /*OUT*/ IRSB*         irsb,
+                                 const UChar*          guest_instr,
+                                 Addr                  guest_pc_curr_instr,
+                                 const VexArchInfo*    archinfo,
+                                 const VexAbiInfo*     abiinfo,
+                                 Bool                  sigill_diag)
+{
+   /* Set result defaults. */
+   dres->whatNext    = Dis_Continue;
+   dres->len         = 0;
+   dres->jk_StopHere = Ijk_INVALID;
+   dres->hint        = Dis_HintNone;
+
+   /* Read the instruction word. */
+   UInt insn = getInsn(guest_instr);
+
+   if (0)
+      vex_printf("insn: 0x%x\n", insn);
+
+   DIP("\t(riscv64) 0x%llx:  ", (ULong)guest_pc_curr_instr);
+
+   vassert((guest_pc_curr_instr & 1) == 0);
+
+   /* Spot "Special" instructions (see comment at top of file). */
+   {
+      const UChar* code = guest_instr;
+      /* Spot the 16-byte preamble:
+            00305013   srli zero, zero, 3
+            00d05013   srli zero, zero, 13
+            03305013   srli zero, zero, 51
+            03d05013   srli zero, zero, 61
+      */
+      UInt word1 = 0x00305013;
+      UInt word2 = 0x00d05013;
+      UInt word3 = 0x03305013;
+      UInt word4 = 0x03d05013;
+      if (getUIntLittleEndianly(code + 0) == word1 &&
+          getUIntLittleEndianly(code + 4) == word2 &&
+          getUIntLittleEndianly(code + 8) == word3 &&
+          getUIntLittleEndianly(code + 12) == word4) {
+         /* Got a "Special" instruction preamble. Which one is it? */
+         dres->len  = 20;
+         UInt which = getUIntLittleEndianly(code + 16);
+         if (which == 0x00a56533 /* or a0, a0, a0 */) {
+            /* a3 = client_request ( a4 ) */
+            DIP("a3 = client_request ( a4 )\n");
+            putPC(irsb, mkU64(guest_pc_curr_instr + 20));
+            dres->jk_StopHere = Ijk_ClientReq;
+            dres->whatNext    = Dis_StopHere;
+            return True;
+         } else if (which == 0x00b5e5b3 /* or a1, a1, a1 */) {
+            /* a3 = guest_NRADDR */
+            DIP("a3 = guest_NRADDR\n");
+            putIReg64(irsb, 13 /*x13/a3*/, IRExpr_Get(OFFB_NRADDR, Ity_I64));
+            return True;
+         } else if (which == 0x00c66633 /* or a2, a2, a2 */) {
+            /* branch-and-link-to-noredir t0 */
+            DIP("branch-and-link-to-noredir t0\n");
+            putIReg64(irsb, 1 /*x1/ra*/, mkU64(guest_pc_curr_instr + 20));
+            putPC(irsb, getIReg64(5 /*x5/t0*/));
+            dres->jk_StopHere = Ijk_NoRedir;
+            dres->whatNext    = Dis_StopHere;
+            return True;
+         } else if (which == 0x00d6e6b3 /* or a3, a3, a3 */) {
+            /* IR injection */
+            DIP("IR injection\n");
+            vex_inject_ir(irsb, Iend_LE);
+            /* Invalidate the current insn. The reason is that the IRop we're
+               injecting here can change. In which case the translation has to
+               be redone. For ease of handling, we simply invalidate all the
+               time. */
+            stmt(irsb, IRStmt_Put(OFFB_CMSTART, mkU64(guest_pc_curr_instr)));
+            stmt(irsb, IRStmt_Put(OFFB_CMLEN, mkU64(20)));
+            putPC(irsb, mkU64(guest_pc_curr_instr + 20));
+            dres->whatNext    = Dis_StopHere;
+            dres->jk_StopHere = Ijk_InvalICache;
+            return True;
+         }
+         /* We don't know what it is. */
+         return False;
+      }
+   }
+
+   /* Main riscv64 instruction decoder starts here. */
+   Bool ok = False;
+   UInt inst_size;
+
+   /* Parse insn[1:0] to determine whether the instruction is 16-bit
+      (compressed) or 32-bit. */
+   switch (INSN(1, 0)) {
+   case 0b00:
+   case 0b01:
+   case 0b10:
+      dres->len = inst_size = 2;
+      ok = dis_RV64C(dres, irsb, insn, guest_pc_curr_instr, sigill_diag);
+      break;
+
+   case 0b11:
+      dres->len = inst_size = 4;
+      ok = dis_RISCV64_standard(dres, irsb, insn, guest_pc_curr_instr, abiinfo,
+                                sigill_diag);
+      break;
+
+   default:
+      vassert(0); /* Can't happen. */
+   }
+
+   /* If the next-level down decoders failed, make sure dres didn't get
+      changed. */
+   if (!ok) {
+      vassert(dres->whatNext == Dis_Continue);
+      vassert(dres->len == inst_size);
+      vassert(dres->jk_StopHere == Ijk_INVALID);
+   }
+
+   return ok;
+}
+
+#undef INSN
+
+/*------------------------------------------------------------*/
+/*--- Top-level fn                                         ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction is located in host
+   memory at &guest_code[delta]. */
+DisResult disInstr_RISCV64(IRSB*              irsb,
+                           const UChar*       guest_code,
+                           Long               delta,
+                           Addr               guest_IP,
+                           VexArch            guest_arch,
+                           const VexArchInfo* archinfo,
+                           const VexAbiInfo*  abiinfo,
+                           VexEndness         host_endness,
+                           Bool               sigill_diag)
+{
+   DisResult dres;
+   vex_bzero(&dres, sizeof(dres));
+
+   vassert(guest_arch == VexArchRISCV64);
+   /* Check that the host is little-endian as getFReg32() and putFReg32() depend
+      on this fact. */
+   vassert(host_endness == VexEndnessLE);
+
+   /* Try to decode. */
+   Bool ok = disInstr_RISCV64_WRK(&dres, irsb, &guest_code[delta], guest_IP,
+                                  archinfo, abiinfo, sigill_diag);
+   if (ok) {
+      /* All decode successes end up here. */
+      vassert(dres.len == 2 || dres.len == 4 || dres.len == 20);
+      switch (dres.whatNext) {
+      case Dis_Continue:
+         putPC(irsb, mkU64(guest_IP + dres.len));
+         break;
+      case Dis_StopHere:
+         break;
+      default:
+         vassert(0);
+      }
+      DIP("\n");
+   } else {
+      /* All decode failures end up here. */
+      if (sigill_diag) {
+         Int   i, j;
+         UChar buf[64];
+         UInt  insn = getInsn(&guest_code[delta]);
+         vex_bzero(buf, sizeof(buf));
+         for (i = j = 0; i < 32; i++) {
+            if (i > 0) {
+               if ((i & 7) == 0)
+                  buf[j++] = ' ';
+               else if ((i & 3) == 0)
+                  buf[j++] = '\'';
+            }
+            buf[j++] = (insn & (1 << (31 - i))) ? '1' : '0';
+         }
+         vex_printf("disInstr(riscv64): unhandled instruction 0x%08x\n", insn);
+         vex_printf("disInstr(riscv64): %s\n", buf);
+      }
+
+      /* Tell the dispatcher that this insn cannot be decoded, and so has not
+         been executed, and (is currently) the next to be executed. The pc
+         register should be up-to-date since it is made so at the start of each
+         insn, but nevertheless be paranoid and update it again right now. */
+      putPC(irsb, mkU64(guest_IP));
+      dres.len         = 0;
+      dres.whatNext    = Dis_StopHere;
+      dres.jk_StopHere = Ijk_NoDecode;
+   }
+   return dres;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                     guest_riscv64_toIR.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h
index 2387f49c6..2b369f2eb 100644
--- a/VEX/priv/host_generic_regs.h
+++ b/VEX/priv/host_generic_regs.h
@@ -36,6 +36,7 @@
 
 #include "libvex_basictypes.h"
 
+#include "main_util.h"
 
 /*---------------------------------------------------------*/
 /*--- Representing HOST REGISTERS                       ---*/
diff --git a/VEX/priv/host_riscv64_defs.c b/VEX/priv/host_riscv64_defs.c
new file mode 100644
index 000000000..24d029b25
--- /dev/null
+++ b/VEX/priv/host_riscv64_defs.c
@@ -0,0 +1,2751 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                    host_riscv64_defs.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_trc_values.h"
+
+#include "host_riscv64_defs.h"
+#include "main_util.h"
+
+/*------------------------------------------------------------*/
+/*--- Registers                                            ---*/
+/*------------------------------------------------------------*/
+
+UInt ppHRegRISCV64(HReg reg)
+{
+   static const HChar* inames[32] = {
+      "zero", "ra", "sp", "gp", "tp",  "t0",  "t1", "t2", "s0", "s1", "a0",
+      "a1",   "a2", "a3", "a4", "a5",  "a6",  "a7", "s2", "s3", "s4", "s5",
+      "s6",   "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"};
+
+   static const HChar* fnames[32] = {
+      "ft0", "ft1", "ft2",  "ft3",  "ft4", "ft5", "ft6",  "ft7",
+      "fs0", "fs1", "fa0",  "fa1",  "fa2", "fa3", "fa4",  "fa5",
+      "fa6", "fa7", "fs2",  "fs3",  "fs4", "fs5", "fs6",  "fs7",
+      "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11"};
+
+   /* Be generic for all virtual regs. */
+   if (hregIsVirtual(reg))
+      return ppHReg(reg);
+
+   /* Be specific for real regs. */
+   switch (hregClass(reg)) {
+   case HRcInt64: {
+      UInt r = hregEncoding(reg);
+      vassert(r < 32);
+      return vex_printf("%s", inames[r]);
+   }
+   case HRcFlt64: {
+      UInt r = hregEncoding(reg);
+      vassert(r < 32);
+      return vex_printf("%s", fnames[r]);
+   }
+   default:
+      vpanic("ppHRegRISCV64");
+   }
+}
+
+static inline UInt iregEnc(HReg r)
+{
+   vassert(hregClass(r) == HRcInt64);
+   vassert(!hregIsVirtual(r));
+
+   UInt n = hregEncoding(r);
+   vassert(n < 32);
+   return n;
+}
+
+static inline UInt fregEnc(HReg r)
+{
+   UInt n;
+   vassert(hregClass(r) == HRcFlt64);
+   vassert(!hregIsVirtual(r));
+   n = hregEncoding(r);
+   vassert(n < 32);
+   return n;
+}
+
+/*------------------------------------------------------------*/
+/*--- Instructions                                         ---*/
+/*------------------------------------------------------------*/
+
+static const HChar* showRISCV64CSR(UInt csr)
+{
+   switch (csr) {
+   case 0x001:
+      return "fflags";
+   case 0x002:
+      return "frm";
+   case 0x003:
+      return "fcsr";
+   }
+   vpanic("showRISCV64CSR");
+}
+
+static const HChar* showRISCV64ALUOp(RISCV64ALUOp op)
+{
+   switch (op) {
+   case RISCV64op_ADD:
+      return "add";
+   case RISCV64op_SUB:
+      return "sub";
+   case RISCV64op_ADDW:
+      return "addw";
+   case RISCV64op_SUBW:
+      return "subw";
+   case RISCV64op_XOR:
+      return "xor";
+   case RISCV64op_OR:
+      return "or";
+   case RISCV64op_AND:
+      return "and";
+   case RISCV64op_SLL:
+      return "sll";
+   case RISCV64op_SRL:
+      return "srl";
+   case RISCV64op_SRA:
+      return "sra";
+   case RISCV64op_SLLW:
+      return "sllw";
+   case RISCV64op_SRLW:
+      return "srlw";
+   case RISCV64op_SRAW:
+      return "sraw";
+   case RISCV64op_SLT:
+      return "slt";
+   case RISCV64op_SLTU:
+      return "sltu";
+   case RISCV64op_MUL:
+      return "mul";
+   case RISCV64op_MULH:
+      return "mulh";
+   case RISCV64op_MULHU:
+      return "mulhu";
+   case RISCV64op_DIV:
+      return "div";
+   case RISCV64op_DIVU:
+      return "divu";
+   case RISCV64op_REM:
+      return "rem";
+   case RISCV64op_REMU:
+      return "remu";
+   case RISCV64op_MULW:
+      return "mulw";
+   case RISCV64op_DIVW:
+      return "divw";
+   case RISCV64op_DIVUW:
+      return "divuw";
+   case RISCV64op_REMW:
+      return "remw";
+   case RISCV64op_REMUW:
+      return "remuw";
+   }
+   vpanic("showRISCV64ALUOp");
+}
+
+static const HChar* showRISCV64ALUImmOp(RISCV64ALUImmOp op)
+{
+   switch (op) {
+   case RISCV64op_ADDI:
+      return "addi";
+   case RISCV64op_ADDIW:
+      return "addiw";
+   case RISCV64op_XORI:
+      return "xori";
+   case RISCV64op_ANDI:
+      return "andi";
+   case RISCV64op_SLLI:
+      return "slli";
+   case RISCV64op_SRLI:
+      return "srli";
+   case RISCV64op_SRAI:
+      return "srai";
+   case RISCV64op_SLTIU:
+      return "sltiu";
+   }
+   vpanic("showRISCV64ALUImmOp");
+}
+
+static const HChar* showRISCV64LoadOp(RISCV64LoadOp op)
+{
+   switch (op) {
+   case RISCV64op_LD:
+      return "ld";
+   case RISCV64op_LW:
+      return "lw";
+   case RISCV64op_LH:
+      return "lh";
+   case RISCV64op_LB:
+      return "lb";
+   }
+   vpanic("showRISCV64LoadOp");
+}
+
+static const HChar* showRISCV64StoreOp(RISCV64StoreOp op)
+{
+   switch (op) {
+   case RISCV64op_SD:
+      return "sd";
+   case RISCV64op_SW:
+      return "sw";
+   case RISCV64op_SH:
+      return "sh";
+   case RISCV64op_SB:
+      return "sb";
+   }
+   vpanic("showRISCV64StoreOp");
+}
+
+static const HChar* showRISCV64LoadROp(RISCV64LoadROp op)
+{
+   switch (op) {
+   case RISCV64op_LR_W:
+      return "lr.w";
+   }
+   vpanic("showRISCV64LoadROp");
+}
+
+static const HChar* showRISCV64StoreCOp(RISCV64StoreCOp op)
+{
+   switch (op) {
+   case RISCV64op_SC_W:
+      return "sc.w";
+   }
+   vpanic("showRISCV64StoreCOp");
+}
+
+static const HChar* showRISCV64FpUnaryOp(RISCV64FpUnaryOp op)
+{
+   switch (op) {
+   case RISCV64op_FSQRT_S:
+      return "fsqrt.s";
+   case RISCV64op_FSQRT_D:
+      return "fsqrt.d";
+   }
+   vpanic("showRISCV64FpUnaryOp");
+}
+
+static const HChar* showRISCV64FpBinaryOp(RISCV64FpBinaryOp op)
+{
+   switch (op) {
+   case RISCV64op_FADD_S:
+      return "fadd.s";
+   case RISCV64op_FMUL_S:
+      return "fmul.s";
+   case RISCV64op_FDIV_S:
+      return "fdiv.s";
+   case RISCV64op_FSGNJN_S:
+      return "fsgnjn.s";
+   case RISCV64op_FSGNJX_S:
+      return "fsgnjx.s";
+   case RISCV64op_FMIN_S:
+      return "fmin.s";
+   case RISCV64op_FMAX_S:
+      return "fmax.s";
+   case RISCV64op_FADD_D:
+      return "fadd.d";
+   case RISCV64op_FSUB_D:
+      return "fsub.d";
+   case RISCV64op_FMUL_D:
+      return "fmul.d";
+   case RISCV64op_FDIV_D:
+      return "fdiv.d";
+   case RISCV64op_FSGNJN_D:
+      return "fsgnjn.d";
+   case RISCV64op_FSGNJX_D:
+      return "fsgnjx.d";
+   case RISCV64op_FMIN_D:
+      return "fmin.d";
+   case RISCV64op_FMAX_D:
+      return "fmax.d";
+   }
+   vpanic("showRISCV64FpBinaryOp");
+}
+
+static const HChar* showRISCV64FpTernaryOp(RISCV64FpTernaryOp op)
+{
+   switch (op) {
+   case RISCV64op_FMADD_S:
+      return "fmadd.s";
+   case RISCV64op_FMADD_D:
+      return "fmadd.d";
+   }
+   vpanic("showRISCV64FpTernaryOp");
+}
+
+static const HChar* showRISCV64FpMoveOp(RISCV64FpMoveOp op)
+{
+   switch (op) {
+   case RISCV64op_FMV_X_W:
+      return "fmv.x.w";
+   case RISCV64op_FMV_W_X:
+      return "fmv.w.x";
+   case RISCV64op_FMV_D:
+      return "fmv.d";
+   case RISCV64op_FMV_X_D:
+      return "fmv.x.d";
+   case RISCV64op_FMV_D_X:
+      return "fmv.d.x";
+   }
+   vpanic("showRISCV64FpMoveOp");
+}
+
+static const HChar* showRISCV64FpConvertOp(RISCV64FpConvertOp op)
+{
+   switch (op) {
+   case RISCV64op_FCVT_W_S:
+      return "fcvt.w.s";
+   case RISCV64op_FCVT_WU_S:
+      return "fcvt.wu.s";
+   case RISCV64op_FCVT_S_W:
+      return "fcvt.s.w";
+   case RISCV64op_FCVT_S_WU:
+      return "fcvt.s.wu";
+   case RISCV64op_FCVT_L_S:
+      return "fcvt.l.s";
+   case RISCV64op_FCVT_LU_S:
+      return "fcvt.lu.s";
+   case RISCV64op_FCVT_S_L:
+      return "fcvt.s.l";
+   case RISCV64op_FCVT_S_LU:
+      return "fcvt.s.lu";
+   case RISCV64op_FCVT_S_D:
+      return "fcvt.s.d";
+   case RISCV64op_FCVT_D_S:
+      return "fcvt.d.s";
+   case RISCV64op_FCVT_W_D:
+      return "fcvt.w.d";
+   case RISCV64op_FCVT_WU_D:
+      return "fcvt.wu.d";
+   case RISCV64op_FCVT_D_W:
+      return "fcvt.d.w";
+   case RISCV64op_FCVT_D_WU:
+      return "fcvt.d.wu";
+   case RISCV64op_FCVT_L_D:
+      return "fcvt.l.d";
+   case RISCV64op_FCVT_LU_D:
+      return "fcvt.lu.d";
+   case RISCV64op_FCVT_D_L:
+      return "fcvt.d.l";
+   case RISCV64op_FCVT_D_LU:
+      return "fcvt.d.lu";
+   }
+   vpanic("showRISCV64FpConvertOp");
+}
+
+static const HChar* showRISCV64FpCompareOp(RISCV64FpCompareOp op)
+{
+   switch (op) {
+   case RISCV64op_FEQ_S:
+      return "feq.s";
+   case RISCV64op_FLT_S:
+      return "flt.s";
+   case RISCV64op_FEQ_D:
+      return "feq.d";
+   case RISCV64op_FLT_D:
+      return "flt.d";
+   }
+   vpanic("showRISCV64FpCompareOp");
+}
+
+static const HChar* showRISCV64FpLdStOp(RISCV64FpLdStOp op)
+{
+   switch (op) {
+   case RISCV64op_FLW:
+      return "flw";
+   case RISCV64op_FLD:
+      return "fld";
+   case RISCV64op_FSW:
+      return "fsw";
+   case RISCV64op_FSD:
+      return "fsd";
+   }
+   vpanic("showRISCV64FpLdStOp");
+}
+
+RISCV64Instr* RISCV64Instr_LI(HReg dst, ULong imm64)
+{
+   RISCV64Instr* i       = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                = RISCV64in_LI;
+   i->RISCV64in.LI.dst   = dst;
+   i->RISCV64in.LI.imm64 = imm64;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_MV(HReg dst, HReg src)
+{
+   RISCV64Instr* i     = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag              = RISCV64in_MV;
+   i->RISCV64in.MV.dst = dst;
+   i->RISCV64in.MV.src = src;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_ALU(RISCV64ALUOp op, HReg dst, HReg src1, HReg src2)
+{
+   RISCV64Instr* i       = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                = RISCV64in_ALU;
+   i->RISCV64in.ALU.op   = op;
+   i->RISCV64in.ALU.dst  = dst;
+   i->RISCV64in.ALU.src1 = src1;
+   i->RISCV64in.ALU.src2 = src2;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_ALUImm(RISCV64ALUImmOp op, HReg dst, HReg src, Int imm12)
+{
+   RISCV64Instr* i           = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                    = RISCV64in_ALUImm;
+   i->RISCV64in.ALUImm.op    = op;
+   i->RISCV64in.ALUImm.dst   = dst;
+   i->RISCV64in.ALUImm.src   = src;
+   i->RISCV64in.ALUImm.imm12 = imm12;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_Load(RISCV64LoadOp op, HReg dst, HReg base, Int soff12)
+{
+   RISCV64Instr* i          = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                   = RISCV64in_Load;
+   i->RISCV64in.Load.op     = op;
+   i->RISCV64in.Load.dst    = dst;
+   i->RISCV64in.Load.base   = base;
+   i->RISCV64in.Load.soff12 = soff12;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_Store(RISCV64StoreOp op, HReg src, HReg base, Int soff12)
+{
+   RISCV64Instr* i           = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                    = RISCV64in_Store;
+   i->RISCV64in.Store.op     = op;
+   i->RISCV64in.Store.src    = src;
+   i->RISCV64in.Store.base   = base;
+   i->RISCV64in.Store.soff12 = soff12;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_LoadR(RISCV64LoadROp op, HReg dst, HReg addr)
+{
+   RISCV64Instr* i         = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                  = RISCV64in_LoadR;
+   i->RISCV64in.LoadR.op   = op;
+   i->RISCV64in.LoadR.dst  = dst;
+   i->RISCV64in.LoadR.addr = addr;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_StoreC(RISCV64StoreCOp op, HReg res, HReg src, HReg addr)
+{
+   RISCV64Instr* i          = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                   = RISCV64in_StoreC;
+   i->RISCV64in.StoreC.op   = op;
+   i->RISCV64in.StoreC.res  = res;
+   i->RISCV64in.StoreC.src  = src;
+   i->RISCV64in.StoreC.addr = addr;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_CSRRW(HReg dst, HReg src, UInt csr)
+{
+   RISCV64Instr* i        = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                 = RISCV64in_CSRRW;
+   i->RISCV64in.CSRRW.dst = dst;
+   i->RISCV64in.CSRRW.src = src;
+   i->RISCV64in.CSRRW.csr = csr;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FpUnary(RISCV64FpUnaryOp op, HReg dst, HReg src)
+{
+   RISCV64Instr* i          = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                   = RISCV64in_FpUnary;
+   i->RISCV64in.FpUnary.op  = op;
+   i->RISCV64in.FpUnary.dst = dst;
+   i->RISCV64in.FpUnary.src = src;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_FpBinary(RISCV64FpBinaryOp op, HReg dst, HReg src1, HReg src2)
+{
+   RISCV64Instr* i            = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                     = RISCV64in_FpBinary;
+   i->RISCV64in.FpBinary.op   = op;
+   i->RISCV64in.FpBinary.dst  = dst;
+   i->RISCV64in.FpBinary.src1 = src1;
+   i->RISCV64in.FpBinary.src2 = src2;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FpTernary(
+   RISCV64FpTernaryOp op, HReg dst, HReg src1, HReg src2, HReg src3)
+{
+   RISCV64Instr* i             = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                      = RISCV64in_FpTernary;
+   i->RISCV64in.FpTernary.op   = op;
+   i->RISCV64in.FpTernary.dst  = dst;
+   i->RISCV64in.FpTernary.src1 = src1;
+   i->RISCV64in.FpTernary.src2 = src2;
+   i->RISCV64in.FpTernary.src3 = src3;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FpMove(RISCV64FpMoveOp op, HReg dst, HReg src)
+{
+   RISCV64Instr* i         = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                  = RISCV64in_FpMove;
+   i->RISCV64in.FpMove.op  = op;
+   i->RISCV64in.FpMove.dst = dst;
+   i->RISCV64in.FpMove.src = src;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FpConvert(RISCV64FpConvertOp op, HReg dst, HReg src)
+{
+   RISCV64Instr* i            = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                     = RISCV64in_FpConvert;
+   i->RISCV64in.FpConvert.op  = op;
+   i->RISCV64in.FpConvert.dst = dst;
+   i->RISCV64in.FpConvert.src = src;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_FpCompare(RISCV64FpCompareOp op, HReg dst, HReg src1, HReg src2)
+{
+   RISCV64Instr* i             = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                      = RISCV64in_FpCompare;
+   i->RISCV64in.FpCompare.op   = op;
+   i->RISCV64in.FpCompare.dst  = dst;
+   i->RISCV64in.FpCompare.src1 = src1;
+   i->RISCV64in.FpCompare.src2 = src2;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_FpLdSt(RISCV64FpLdStOp op, HReg reg, HReg base, Int soff12)
+{
+   RISCV64Instr* i            = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                     = RISCV64in_FpLdSt;
+   i->RISCV64in.FpLdSt.op     = op;
+   i->RISCV64in.FpLdSt.reg    = reg;
+   i->RISCV64in.FpLdSt.base   = base;
+   i->RISCV64in.FpLdSt.soff12 = soff12;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_FpCSEL(HReg dst, HReg iftrue, HReg iffalse, HReg cond)
+{
+   RISCV64Instr* i             = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                      = RISCV64in_FpCSEL;
+   i->RISCV64in.FpCSEL.dst     = dst;
+   i->RISCV64in.FpCSEL.iftrue  = iftrue;
+   i->RISCV64in.FpCSEL.iffalse = iffalse;
+   i->RISCV64in.FpCSEL.cond    = cond;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_CAS(RISCV64CASOp op, HReg old, HReg addr, HReg expd, HReg data)
+{
+   RISCV64Instr* i       = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                = RISCV64in_CAS;
+   i->RISCV64in.CAS.op   = op;
+   i->RISCV64in.CAS.old  = old;
+   i->RISCV64in.CAS.addr = addr;
+   i->RISCV64in.CAS.expd = expd;
+   i->RISCV64in.CAS.data = data;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FENCE(void)
+{
+   RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag          = RISCV64in_FENCE;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_CSEL(HReg dst, HReg iftrue, HReg iffalse, HReg cond)
+{
+   RISCV64Instr* i           = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                    = RISCV64in_CSEL;
+   i->RISCV64in.CSEL.dst     = dst;
+   i->RISCV64in.CSEL.iftrue  = iftrue;
+   i->RISCV64in.CSEL.iffalse = iffalse;
+   i->RISCV64in.CSEL.cond    = cond;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_Call(
+   RetLoc rloc, Addr64 target, HReg cond, UChar nArgRegs, UChar nFArgRegs)
+{
+   RISCV64Instr* i             = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                      = RISCV64in_Call;
+   i->RISCV64in.Call.rloc      = rloc;
+   i->RISCV64in.Call.target    = target;
+   i->RISCV64in.Call.cond      = cond;
+   i->RISCV64in.Call.nArgRegs  = nArgRegs;
+   i->RISCV64in.Call.nFArgRegs = nFArgRegs;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_XDirect(
+   Addr64 dstGA, HReg base, Int soff12, HReg cond, Bool toFastEP)
+{
+   RISCV64Instr* i               = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                        = RISCV64in_XDirect;
+   i->RISCV64in.XDirect.dstGA    = dstGA;
+   i->RISCV64in.XDirect.base     = base;
+   i->RISCV64in.XDirect.soff12   = soff12;
+   i->RISCV64in.XDirect.cond     = cond;
+   i->RISCV64in.XDirect.toFastEP = toFastEP;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_XIndir(HReg dstGA, HReg base, Int soff12, HReg cond)
+{
+   RISCV64Instr* i            = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                     = RISCV64in_XIndir;
+   i->RISCV64in.XIndir.dstGA  = dstGA;
+   i->RISCV64in.XIndir.base   = base;
+   i->RISCV64in.XIndir.soff12 = soff12;
+   i->RISCV64in.XIndir.cond   = cond;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_XAssisted(
+   HReg dstGA, HReg base, Int soff12, HReg cond, IRJumpKind jk)
+{
+   RISCV64Instr* i               = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                        = RISCV64in_XAssisted;
+   i->RISCV64in.XAssisted.dstGA  = dstGA;
+   i->RISCV64in.XAssisted.base   = base;
+   i->RISCV64in.XAssisted.soff12 = soff12;
+   i->RISCV64in.XAssisted.cond   = cond;
+   i->RISCV64in.XAssisted.jk     = jk;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_EvCheck(HReg base_amCounter,
+                                   Int  soff12_amCounter,
+                                   HReg base_amFailAddr,
+                                   Int  soff12_amFailAddr)
+{
+   RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag          = RISCV64in_EvCheck;
+   i->RISCV64in.EvCheck.base_amCounter    = base_amCounter;
+   i->RISCV64in.EvCheck.soff12_amCounter  = soff12_amCounter;
+   i->RISCV64in.EvCheck.base_amFailAddr   = base_amFailAddr;
+   i->RISCV64in.EvCheck.soff12_amFailAddr = soff12_amFailAddr;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_ProfInc(void)
+{
+   RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag          = RISCV64in_ProfInc;
+   return i;
+}
+
+void ppRISCV64Instr(const RISCV64Instr* i, Bool mode64)
+{
+   vassert(mode64 == True);
+
+   switch (i->tag) {
+   case RISCV64in_LI:
+      vex_printf("li      ");
+      ppHRegRISCV64(i->RISCV64in.LI.dst);
+      vex_printf(", 0x%llx", i->RISCV64in.LI.imm64);
+      return;
+   case RISCV64in_MV:
+      vex_printf("mv      ");
+      ppHRegRISCV64(i->RISCV64in.MV.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.MV.src);
+      return;
+   case RISCV64in_ALU:
+      vex_printf("%-7s ", showRISCV64ALUOp(i->RISCV64in.ALU.op));
+      ppHRegRISCV64(i->RISCV64in.ALU.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.ALU.src1);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.ALU.src2);
+      return;
+   case RISCV64in_ALUImm:
+      vex_printf("%-7s ", showRISCV64ALUImmOp(i->RISCV64in.ALUImm.op));
+      ppHRegRISCV64(i->RISCV64in.ALUImm.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.ALUImm.src);
+      vex_printf(", %d", i->RISCV64in.ALUImm.imm12);
+      return;
+   case RISCV64in_Load:
+      vex_printf("%-7s ", showRISCV64LoadOp(i->RISCV64in.Load.op));
+      ppHRegRISCV64(i->RISCV64in.Load.dst);
+      vex_printf(", %d(", i->RISCV64in.Load.soff12);
+      ppHRegRISCV64(i->RISCV64in.Load.base);
+      vex_printf(")");
+      return;
+   case RISCV64in_Store:
+      vex_printf("%-7s ", showRISCV64StoreOp(i->RISCV64in.Store.op));
+      ppHRegRISCV64(i->RISCV64in.Store.src);
+      vex_printf(", %d(", i->RISCV64in.Store.soff12);
+      ppHRegRISCV64(i->RISCV64in.Store.base);
+      vex_printf(")");
+      return;
+   case RISCV64in_LoadR:
+      vex_printf("%-7s ", showRISCV64LoadROp(i->RISCV64in.LoadR.op));
+      ppHRegRISCV64(i->RISCV64in.LoadR.dst);
+      vex_printf(", (");
+      ppHRegRISCV64(i->RISCV64in.LoadR.addr);
+      vex_printf(")");
+      return;
+   case RISCV64in_StoreC:
+      vex_printf("%-7s ", showRISCV64StoreCOp(i->RISCV64in.StoreC.op));
+      ppHRegRISCV64(i->RISCV64in.StoreC.res);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.StoreC.src);
+      vex_printf(", (");
+      ppHRegRISCV64(i->RISCV64in.StoreC.addr);
+      vex_printf(")");
+      return;
+   case RISCV64in_CSRRW:
+      vex_printf("csrrw   ");
+      ppHRegRISCV64(i->RISCV64in.CSRRW.dst);
+      vex_printf(", %s, ", showRISCV64CSR(i->RISCV64in.CSRRW.csr));
+      ppHRegRISCV64(i->RISCV64in.CSRRW.src);
+      return;
+   case RISCV64in_FpUnary:
+      vex_printf("%-7s ", showRISCV64FpUnaryOp(i->RISCV64in.FpUnary.op));
+      ppHRegRISCV64(i->RISCV64in.FpUnary.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpUnary.src);
+      return;
+   case RISCV64in_FpBinary:
+      vex_printf("%-7s ", showRISCV64FpBinaryOp(i->RISCV64in.FpBinary.op));
+      ppHRegRISCV64(i->RISCV64in.FpBinary.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpBinary.src1);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpBinary.src2);
+      return;
+   case RISCV64in_FpTernary:
+      vex_printf("%-7s ", showRISCV64FpTernaryOp(i->RISCV64in.FpTernary.op));
+      ppHRegRISCV64(i->RISCV64in.FpTernary.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpTernary.src1);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpTernary.src2);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpTernary.src3);
+      return;
+   case RISCV64in_FpMove:
+      vex_printf("%-7s ", showRISCV64FpMoveOp(i->RISCV64in.FpMove.op));
+      ppHRegRISCV64(i->RISCV64in.FpMove.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpMove.src);
+      return;
+   case RISCV64in_FpConvert:
+      vex_printf("%-7s ", showRISCV64FpConvertOp(i->RISCV64in.FpConvert.op));
+      ppHRegRISCV64(i->RISCV64in.FpConvert.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpConvert.src);
+      return;
+   case RISCV64in_FpCompare:
+      vex_printf("%-7s ", showRISCV64FpCompareOp(i->RISCV64in.FpCompare.op));
+      ppHRegRISCV64(i->RISCV64in.FpCompare.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpCompare.src1);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpCompare.src2);
+      return;
+   case RISCV64in_FpLdSt:
+      vex_printf("%-7s ", showRISCV64FpLdStOp(i->RISCV64in.FpLdSt.op));
+      ppHRegRISCV64(i->RISCV64in.FpLdSt.reg);
+      vex_printf(", %d(", i->RISCV64in.FpLdSt.soff12);
+      ppHRegRISCV64(i->RISCV64in.FpLdSt.base);
+      vex_printf(")");
+      return;
+   case RISCV64in_FpCSEL: {
+      vex_printf("(FpCSEL) beq ");
+      ppHRegRISCV64(i->RISCV64in.FpCSEL.cond);
+      vex_printf(", zero, 1f; fmv.d ");
+      ppHRegRISCV64(i->RISCV64in.FpCSEL.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpCSEL.iftrue);
+      vex_printf("; c.j 2f; 1: fmv.d ");
+      ppHRegRISCV64(i->RISCV64in.FpCSEL.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpCSEL.iffalse);
+      vex_printf("; 2:");
+      return;
+   }
+   case RISCV64in_CAS: {
+      vassert(i->RISCV64in.CAS.op == RISCV64op_CAS_D ||
+              i->RISCV64in.CAS.op == RISCV64op_CAS_W);
+      Bool is_d = i->RISCV64in.CAS.op == RISCV64op_CAS_D;
+      vex_printf("(%s) 1: %s ", is_d ? "CAS_D" : "CAS_W",
+                 is_d ? "lr.d" : "lr.w");
+      ppHRegRISCV64(i->RISCV64in.CAS.old);
+      vex_printf(", (");
+      ppHRegRISCV64(i->RISCV64in.CAS.addr);
+      vex_printf("); bne ");
+      ppHRegRISCV64(i->RISCV64in.CAS.old);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.CAS.expd);
+      vex_printf(", 2f; %s t0, ", is_d ? "sc.d" : "sc.w");
+      ppHRegRISCV64(i->RISCV64in.CAS.data);
+      vex_printf(", (");
+      ppHRegRISCV64(i->RISCV64in.CAS.addr);
+      vex_printf("); bne t0, zero, 1b; 2:");
+      return;
+   }
+   case RISCV64in_FENCE:
+      vex_printf("fence");
+      return;
+   case RISCV64in_CSEL:
+      vex_printf("(CSEL) beq ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.cond);
+      vex_printf(", zero, 1f; c.mv ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.iftrue);
+      vex_printf("; c.j 2f; 1: c.mv ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.iffalse);
+      vex_printf("; 2:");
+      return;
+   case RISCV64in_Call:
+      vex_printf("(Call) ");
+      if (!hregIsInvalid(i->RISCV64in.Call.cond)) {
+         vex_printf("beq ");
+         ppHRegRISCV64(i->RISCV64in.Call.cond);
+         vex_printf(", zero, 1f; ");
+      }
+      vex_printf("li t0, 0x%llx; c.jalr 0(t0) [nArgRegs=%u, nFArgRegs=%u, ",
+                 i->RISCV64in.Call.target, i->RISCV64in.Call.nArgRegs,
+                 i->RISCV64in.Call.nFArgRegs);
+      ppRetLoc(i->RISCV64in.Call.rloc);
+      vex_printf("]; 1:");
+      return;
+   case RISCV64in_XDirect:
+      vex_printf("(xDirect) ");
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond)) {
+         vex_printf("beq ");
+         ppHRegRISCV64(i->RISCV64in.XDirect.cond);
+         vex_printf(", zero, 1f; ");
+      }
+      vex_printf("li t0, 0x%llx; sd t0, %d(", i->RISCV64in.XDirect.dstGA,
+                 i->RISCV64in.XDirect.soff12);
+      ppHRegRISCV64(i->RISCV64in.XDirect.base);
+      vex_printf("); li t0, <%s>; c.jalr 0(t0); 1:",
+                 i->RISCV64in.XDirect.toFastEP ? "disp_cp_chain_me_to_fastEP"
+                                               : "disp_cp_chain_me_to_slowEP");
+      return;
+   case RISCV64in_XIndir:
+      vex_printf("(xIndir) ");
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond)) {
+         vex_printf("beq ");
+         ppHRegRISCV64(i->RISCV64in.XIndir.cond);
+         vex_printf(", zero, 1f; ");
+      }
+      vex_printf("sd ");
+      ppHRegRISCV64(i->RISCV64in.XIndir.dstGA);
+      vex_printf(", %d(", i->RISCV64in.XIndir.soff12);
+      ppHRegRISCV64(i->RISCV64in.XIndir.base);
+      vex_printf("); li t0, <disp_cp_xindir>; c.jr 0(t0); 1:");
+      return;
+   case RISCV64in_XAssisted:
+      vex_printf("(xAssisted) ");
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond)) {
+         vex_printf("beq ");
+         ppHRegRISCV64(i->RISCV64in.XAssisted.cond);
+         vex_printf(", zero, 1f; ");
+      }
+      vex_printf("sd ");
+      ppHRegRISCV64(i->RISCV64in.XAssisted.dstGA);
+      vex_printf(", %d(", i->RISCV64in.XAssisted.soff12);
+      ppHRegRISCV64(i->RISCV64in.XAssisted.base);
+      vex_printf("); mv s0, $IRJumpKind_to_TRCVAL(%d)",
+                 (Int)i->RISCV64in.XAssisted.jk);
+      vex_printf("; li t0, <disp_cp_xassisted>; c.jr 0(t0); 1:");
+      return;
+   case RISCV64in_EvCheck:
+      vex_printf("(evCheck) lw t0, %d(", i->RISCV64in.EvCheck.soff12_amCounter);
+      ppHRegRISCV64(i->RISCV64in.EvCheck.base_amCounter);
+      vex_printf("); c.addiw t0, -1; sw t0, %d(",
+                 i->RISCV64in.EvCheck.soff12_amCounter);
+      ppHRegRISCV64(i->RISCV64in.EvCheck.base_amCounter);
+      vex_printf("); bge t0, zero, 1f; ld t0, %d(",
+                 i->RISCV64in.EvCheck.soff12_amFailAddr);
+      ppHRegRISCV64(i->RISCV64in.EvCheck.base_amFailAddr);
+      vex_printf("); c.jr 0(t0); 1:");
+      return;
+   case RISCV64in_ProfInc:
+      vex_printf("(profInc) li t1, $NotKnownYet; "
+                 "ld t0, 0(t1); c.addi t0, t0, 1; sd t0, 0(t1)");
+      return;
+   default:
+      vpanic("ppRISCV64Instr");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Helpers for register allocation                      ---*/
+/*------------------------------------------------------------*/
+
+/* Initialise and return the "register universe", i.e. a list of all hardware
+   registers. Called once. */
+const RRegUniverse* getRRegUniverse_RISCV64(void)
+{
+   static RRegUniverse all_regs;
+   static Bool         initialised = False;
+   RRegUniverse*       ru          = &all_regs;
+
+   if (LIKELY(initialised))
+      return ru;
+
+   RRegUniverse__init(ru);
+
+   /* Add the registers that are available to the register allocator. */
+   ru->allocable_start[HRcInt64] = ru->size;
+   ru->regs[ru->size++]          = hregRISCV64_x18(); /* s2 */
+   ru->regs[ru->size++]          = hregRISCV64_x19(); /* s3 */
+   ru->regs[ru->size++]          = hregRISCV64_x20(); /* s4 */
+   ru->regs[ru->size++]          = hregRISCV64_x21(); /* s5 */
+   ru->regs[ru->size++]          = hregRISCV64_x22(); /* s6 */
+   ru->regs[ru->size++]          = hregRISCV64_x23(); /* s7 */
+   ru->regs[ru->size++]          = hregRISCV64_x24(); /* s8 */
+   ru->regs[ru->size++]          = hregRISCV64_x25(); /* s9 */
+   ru->regs[ru->size++]          = hregRISCV64_x26(); /* s10 */
+   ru->regs[ru->size++]          = hregRISCV64_x27(); /* s11 */
+   ru->regs[ru->size++]          = hregRISCV64_x10(); /* a0 */
+   ru->regs[ru->size++]          = hregRISCV64_x11(); /* a1 */
+   ru->regs[ru->size++]          = hregRISCV64_x12(); /* a2 */
+   ru->regs[ru->size++]          = hregRISCV64_x13(); /* a3 */
+   ru->regs[ru->size++]          = hregRISCV64_x14(); /* a4 */
+   ru->regs[ru->size++]          = hregRISCV64_x15(); /* a5 */
+   ru->regs[ru->size++]          = hregRISCV64_x16(); /* a6 */
+   ru->regs[ru->size++]          = hregRISCV64_x17(); /* a7 */
+   ru->allocable_end[HRcInt64]   = ru->size - 1;
+
+   /* Floating-point registers, all of which are caller-saved. */
+   ru->allocable_start[HRcFlt64] = ru->size;
+   ru->regs[ru->size++]          = hregRISCV64_f0();  /* ft0 */
+   ru->regs[ru->size++]          = hregRISCV64_f1();  /* ft1 */
+   ru->regs[ru->size++]          = hregRISCV64_f2();  /* ft2 */
+   ru->regs[ru->size++]          = hregRISCV64_f3();  /* ft3 */
+   ru->regs[ru->size++]          = hregRISCV64_f4();  /* ft4 */
+   ru->regs[ru->size++]          = hregRISCV64_f5();  /* ft5 */
+   ru->regs[ru->size++]          = hregRISCV64_f6();  /* ft6 */
+   ru->regs[ru->size++]          = hregRISCV64_f7();  /* ft7 */
+   ru->regs[ru->size++]          = hregRISCV64_f10(); /* fa0 */
+   ru->regs[ru->size++]          = hregRISCV64_f11(); /* fa1 */
+   ru->regs[ru->size++]          = hregRISCV64_f12(); /* fa2 */
+   ru->regs[ru->size++]          = hregRISCV64_f13(); /* fa3 */
+   ru->regs[ru->size++]          = hregRISCV64_f14(); /* fa4 */
+   ru->regs[ru->size++]          = hregRISCV64_f15(); /* fa5 */
+   ru->regs[ru->size++]          = hregRISCV64_f16(); /* fa6 */
+   ru->regs[ru->size++]          = hregRISCV64_f17(); /* fa7 */
+   ru->regs[ru->size++]          = hregRISCV64_f28(); /* ft8 */
+   ru->regs[ru->size++]          = hregRISCV64_f29(); /* ft9 */
+   ru->regs[ru->size++]          = hregRISCV64_f30(); /* ft10 */
+   ru->regs[ru->size++]          = hregRISCV64_f31(); /* ft11 */
+   ru->allocable_end[HRcFlt64]   = ru->size - 1;
+   ru->allocable                 = ru->size;
+
+   /* Add the registers that are not available for allocation. */
+   ru->regs[ru->size++] = hregRISCV64_x0(); /* zero */
+   ru->regs[ru->size++] = hregRISCV64_x2(); /* sp */
+   ru->regs[ru->size++] = hregRISCV64_x8(); /* s0 */
+
+   initialised = True;
+
+   RRegUniverse__check_is_sane(ru);
+   return ru;
+}
+
+/* Tell the register allocator how the given instruction uses the registers it
+   refers to. */
+void getRegUsage_RISCV64Instr(HRegUsage* u, const RISCV64Instr* i, Bool mode64)
+{
+   vassert(mode64 == True);
+
+   initHRegUsage(u);
+   switch (i->tag) {
+   case RISCV64in_LI:
+      addHRegUse(u, HRmWrite, i->RISCV64in.LI.dst);
+      return;
+   case RISCV64in_MV:
+      addHRegUse(u, HRmWrite, i->RISCV64in.MV.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.MV.src);
+      return;
+   case RISCV64in_ALU:
+      addHRegUse(u, HRmWrite, i->RISCV64in.ALU.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.ALU.src1);
+      addHRegUse(u, HRmRead, i->RISCV64in.ALU.src2);
+      return;
+   case RISCV64in_ALUImm:
+      addHRegUse(u, HRmWrite, i->RISCV64in.ALUImm.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.ALUImm.src);
+      return;
+   case RISCV64in_Load:
+      addHRegUse(u, HRmWrite, i->RISCV64in.Load.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.Load.base);
+      return;
+   case RISCV64in_Store:
+      addHRegUse(u, HRmRead, i->RISCV64in.Store.src);
+      addHRegUse(u, HRmRead, i->RISCV64in.Store.base);
+      return;
+   case RISCV64in_LoadR:
+      addHRegUse(u, HRmWrite, i->RISCV64in.LoadR.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.LoadR.addr);
+      return;
+   case RISCV64in_StoreC:
+      addHRegUse(u, HRmWrite, i->RISCV64in.StoreC.res);
+      addHRegUse(u, HRmRead, i->RISCV64in.StoreC.src);
+      addHRegUse(u, HRmRead, i->RISCV64in.StoreC.addr);
+      return;
+   case RISCV64in_CSRRW:
+      addHRegUse(u, HRmWrite, i->RISCV64in.CSRRW.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.CSRRW.src);
+      return;
+   case RISCV64in_FpUnary:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpUnary.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpUnary.src);
+      return;
+   case RISCV64in_FpBinary:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpBinary.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpBinary.src1);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpBinary.src2);
+      return;
+   case RISCV64in_FpTernary:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpTernary.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpTernary.src1);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpTernary.src2);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpTernary.src3);
+      return;
+   case RISCV64in_FpMove:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpMove.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpMove.src);
+      return;
+   case RISCV64in_FpConvert:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpConvert.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpConvert.src);
+      return;
+   case RISCV64in_FpCompare:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpCompare.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpCompare.src1);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpCompare.src2);
+      return;
+   case RISCV64in_FpLdSt:
+      switch (i->RISCV64in.FpLdSt.op) {
+      case RISCV64op_FLW:
+      case RISCV64op_FLD:
+         addHRegUse(u, HRmWrite, i->RISCV64in.FpLdSt.reg);
+         break;
+      case RISCV64op_FSW:
+      case RISCV64op_FSD:
+         addHRegUse(u, HRmRead, i->RISCV64in.FpLdSt.reg);
+         break;
+      }
+      addHRegUse(u, HRmRead, i->RISCV64in.FpLdSt.base);
+      return;
+   case RISCV64in_FpCSEL:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpCSEL.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpCSEL.iftrue);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpCSEL.iffalse);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpCSEL.cond);
+      return;
+   case RISCV64in_CAS:
+      addHRegUse(u, HRmWrite, i->RISCV64in.CAS.old);
+      addHRegUse(u, HRmRead, i->RISCV64in.CAS.addr);
+      addHRegUse(u, HRmRead, i->RISCV64in.CAS.expd);
+      addHRegUse(u, HRmRead, i->RISCV64in.CAS.data);
+      return;
+   case RISCV64in_FENCE:
+      return;
+   case RISCV64in_CSEL:
+      addHRegUse(u, HRmWrite, i->RISCV64in.CSEL.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.CSEL.iftrue);
+      addHRegUse(u, HRmRead, i->RISCV64in.CSEL.iffalse);
+      addHRegUse(u, HRmRead, i->RISCV64in.CSEL.cond);
+      return;
+   case RISCV64in_Call:
+      /* Logic and comments copied/modified from the arm64 backend. */
+      /* First off, claim it trashes all the caller-saved registers which fall
+         within the register allocator's jurisdiction. */
+      addHRegUse(u, HRmWrite, hregRISCV64_x10());
+      addHRegUse(u, HRmWrite, hregRISCV64_x11());
+      addHRegUse(u, HRmWrite, hregRISCV64_x12());
+      addHRegUse(u, HRmWrite, hregRISCV64_x13());
+      addHRegUse(u, HRmWrite, hregRISCV64_x14());
+      addHRegUse(u, HRmWrite, hregRISCV64_x15());
+      addHRegUse(u, HRmWrite, hregRISCV64_x16());
+      addHRegUse(u, HRmWrite, hregRISCV64_x17());
+      addHRegUse(u, HRmWrite, hregRISCV64_f0());
+      addHRegUse(u, HRmWrite, hregRISCV64_f1());
+      addHRegUse(u, HRmWrite, hregRISCV64_f2());
+      addHRegUse(u, HRmWrite, hregRISCV64_f3());
+      addHRegUse(u, HRmWrite, hregRISCV64_f4());
+      addHRegUse(u, HRmWrite, hregRISCV64_f5());
+      addHRegUse(u, HRmWrite, hregRISCV64_f6());
+      addHRegUse(u, HRmWrite, hregRISCV64_f7());
+      addHRegUse(u, HRmWrite, hregRISCV64_f10());
+      addHRegUse(u, HRmWrite, hregRISCV64_f11());
+      addHRegUse(u, HRmWrite, hregRISCV64_f12());
+      addHRegUse(u, HRmWrite, hregRISCV64_f13());
+      addHRegUse(u, HRmWrite, hregRISCV64_f14());
+      addHRegUse(u, HRmWrite, hregRISCV64_f15());
+      addHRegUse(u, HRmWrite, hregRISCV64_f16());
+      addHRegUse(u, HRmWrite, hregRISCV64_f17());
+      addHRegUse(u, HRmWrite, hregRISCV64_f28());
+      addHRegUse(u, HRmWrite, hregRISCV64_f29());
+      addHRegUse(u, HRmWrite, hregRISCV64_f30());
+      addHRegUse(u, HRmWrite, hregRISCV64_f31());
+      /* Now we have to state any parameter-carrying registers which might be
+         read. This depends on nArgRegs and nFArgRegs. */
+      switch (i->RISCV64in.Call.nArgRegs) {
+      case 8:
+         addHRegUse(u, HRmRead, hregRISCV64_x17()); /*fallthru*/
+      case 7:
+         addHRegUse(u, HRmRead, hregRISCV64_x16()); /*fallthru*/
+      case 6:
+         addHRegUse(u, HRmRead, hregRISCV64_x15()); /*fallthru*/
+      case 5:
+         addHRegUse(u, HRmRead, hregRISCV64_x14()); /*fallthru*/
+      case 4:
+         addHRegUse(u, HRmRead, hregRISCV64_x13()); /*fallthru*/
+      case 3:
+         addHRegUse(u, HRmRead, hregRISCV64_x12()); /*fallthru*/
+      case 2:
+         addHRegUse(u, HRmRead, hregRISCV64_x11()); /*fallthru*/
+      case 1:
+         addHRegUse(u, HRmRead, hregRISCV64_x10());
+         break;
+      case 0:
+         break;
+      default:
+         vpanic("getRegUsage_RISCV64Instr:Call:regparms");
+      }
+      switch (i->RISCV64in.Call.nFArgRegs) {
+      case 8:
+         addHRegUse(u, HRmRead, hregRISCV64_f17()); /*fallthru*/
+      case 7:
+         addHRegUse(u, HRmRead, hregRISCV64_f16()); /*fallthru*/
+      case 6:
+         addHRegUse(u, HRmRead, hregRISCV64_f15()); /*fallthru*/
+      case 5:
+         addHRegUse(u, HRmRead, hregRISCV64_f14()); /*fallthru*/
+      case 4:
+         addHRegUse(u, HRmRead, hregRISCV64_f13()); /*fallthru*/
+      case 3:
+         addHRegUse(u, HRmRead, hregRISCV64_f12()); /*fallthru*/
+      case 2:
+         addHRegUse(u, HRmRead, hregRISCV64_f11()); /*fallthru*/
+      case 1:
+         addHRegUse(u, HRmRead, hregRISCV64_f10());
+         break;
+      case 0:
+         break;
+      default:
+         vpanic("getRegUsage_RISCV64Instr:Call:fregparms");
+      }
+      /* Finally, add the condition register. */
+      if (!hregIsInvalid(i->RISCV64in.Call.cond))
+         addHRegUse(u, HRmRead, i->RISCV64in.Call.cond);
+      return;
+   /* XDirect/XIndir/XAssisted are also a bit subtle. They conditionally exit
+      the block. Hence we only need to list (1) the registers that they read,
+      and (2) the registers that they write in the case where the block is not
+      exited. (2) is empty, hence only (1) is relevant here. */
+   case RISCV64in_XDirect:
+      addHRegUse(u, HRmRead, i->RISCV64in.XDirect.base);
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond))
+         addHRegUse(u, HRmRead, i->RISCV64in.XDirect.cond);
+      return;
+   case RISCV64in_XIndir:
+      addHRegUse(u, HRmRead, i->RISCV64in.XIndir.dstGA);
+      addHRegUse(u, HRmRead, i->RISCV64in.XIndir.base);
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond))
+         addHRegUse(u, HRmRead, i->RISCV64in.XIndir.cond);
+      return;
+   case RISCV64in_XAssisted:
+      addHRegUse(u, HRmRead, i->RISCV64in.XAssisted.dstGA);
+      addHRegUse(u, HRmRead, i->RISCV64in.XAssisted.base);
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond))
+         addHRegUse(u, HRmRead, i->RISCV64in.XAssisted.cond);
+      return;
+   case RISCV64in_EvCheck:
+      /* We expect both amodes only to mention x8/s0, so this is in fact
+         pointless, since the register isn't allocatable, but anyway.. */
+      addHRegUse(u, HRmRead, i->RISCV64in.EvCheck.base_amCounter);
+      addHRegUse(u, HRmRead, i->RISCV64in.EvCheck.base_amFailAddr);
+      return;
+   case RISCV64in_ProfInc:
+      /* Does not use any registers known to RA. */
+      return;
+   default:
+      ppRISCV64Instr(i, mode64);
+      vpanic("getRegUsage_RISCV64Instr");
+   }
+}
+
+/* Local helper. */
+static void mapReg(HRegRemap* m, HReg* r) { *r = lookupHRegRemap(m, *r); }
+
+/* Map the registers of the given instruction. */
+void mapRegs_RISCV64Instr(HRegRemap* m, RISCV64Instr* i, Bool mode64)
+{
+   vassert(mode64 == True);
+
+   switch (i->tag) {
+   case RISCV64in_LI:
+      mapReg(m, &i->RISCV64in.LI.dst);
+      return;
+   case RISCV64in_MV:
+      mapReg(m, &i->RISCV64in.MV.dst);
+      mapReg(m, &i->RISCV64in.MV.src);
+      return;
+   case RISCV64in_ALU:
+      mapReg(m, &i->RISCV64in.ALU.dst);
+      mapReg(m, &i->RISCV64in.ALU.src1);
+      mapReg(m, &i->RISCV64in.ALU.src2);
+      return;
+   case RISCV64in_ALUImm:
+      mapReg(m, &i->RISCV64in.ALUImm.dst);
+      mapReg(m, &i->RISCV64in.ALUImm.src);
+      return;
+   case RISCV64in_Load:
+      mapReg(m, &i->RISCV64in.Load.dst);
+      mapReg(m, &i->RISCV64in.Load.base);
+      return;
+   case RISCV64in_Store:
+      mapReg(m, &i->RISCV64in.Store.src);
+      mapReg(m, &i->RISCV64in.Store.base);
+      return;
+   case RISCV64in_LoadR:
+      mapReg(m, &i->RISCV64in.LoadR.dst);
+      mapReg(m, &i->RISCV64in.LoadR.addr);
+      return;
+   case RISCV64in_StoreC:
+      mapReg(m, &i->RISCV64in.StoreC.res);
+      mapReg(m, &i->RISCV64in.StoreC.src);
+      mapReg(m, &i->RISCV64in.StoreC.addr);
+      return;
+   case RISCV64in_CSRRW:
+      mapReg(m, &i->RISCV64in.CSRRW.dst);
+      mapReg(m, &i->RISCV64in.CSRRW.src);
+      return;
+   case RISCV64in_FpUnary:
+      mapReg(m, &i->RISCV64in.FpUnary.dst);
+      mapReg(m, &i->RISCV64in.FpUnary.src);
+      return;
+   case RISCV64in_FpBinary:
+      mapReg(m, &i->RISCV64in.FpBinary.dst);
+      mapReg(m, &i->RISCV64in.FpBinary.src1);
+      mapReg(m, &i->RISCV64in.FpBinary.src2);
+      return;
+   case RISCV64in_FpTernary:
+      mapReg(m, &i->RISCV64in.FpTernary.dst);
+      mapReg(m, &i->RISCV64in.FpTernary.src1);
+      mapReg(m, &i->RISCV64in.FpTernary.src2);
+      mapReg(m, &i->RISCV64in.FpTernary.src3);
+      return;
+   case RISCV64in_FpMove:
+      mapReg(m, &i->RISCV64in.FpMove.dst);
+      mapReg(m, &i->RISCV64in.FpMove.src);
+      return;
+   case RISCV64in_FpConvert:
+      mapReg(m, &i->RISCV64in.FpConvert.dst);
+      mapReg(m, &i->RISCV64in.FpConvert.src);
+      return;
+   case RISCV64in_FpCompare:
+      mapReg(m, &i->RISCV64in.FpCompare.dst);
+      mapReg(m, &i->RISCV64in.FpCompare.src1);
+      mapReg(m, &i->RISCV64in.FpCompare.src2);
+      return;
+   case RISCV64in_FpLdSt:
+      mapReg(m, &i->RISCV64in.FpLdSt.reg);
+      mapReg(m, &i->RISCV64in.FpLdSt.base);
+      return;
+   case RISCV64in_FpCSEL:
+      mapReg(m, &i->RISCV64in.FpCSEL.dst);
+      mapReg(m, &i->RISCV64in.FpCSEL.iftrue);
+      mapReg(m, &i->RISCV64in.FpCSEL.iffalse);
+      mapReg(m, &i->RISCV64in.FpCSEL.cond);
+      return;
+   case RISCV64in_CAS:
+      mapReg(m, &i->RISCV64in.CAS.old);
+      mapReg(m, &i->RISCV64in.CAS.addr);
+      mapReg(m, &i->RISCV64in.CAS.expd);
+      mapReg(m, &i->RISCV64in.CAS.data);
+      return;
+   case RISCV64in_FENCE:
+      return;
+   case RISCV64in_CSEL:
+      mapReg(m, &i->RISCV64in.CSEL.dst);
+      mapReg(m, &i->RISCV64in.CSEL.iftrue);
+      mapReg(m, &i->RISCV64in.CSEL.iffalse);
+      mapReg(m, &i->RISCV64in.CSEL.cond);
+      return;
+   case RISCV64in_Call:
+      if (!hregIsInvalid(i->RISCV64in.Call.cond))
+         mapReg(m, &i->RISCV64in.Call.cond);
+      return;
+   case RISCV64in_XDirect:
+      mapReg(m, &i->RISCV64in.XDirect.base);
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond))
+         mapReg(m, &i->RISCV64in.XDirect.cond);
+      return;
+   case RISCV64in_XIndir:
+      mapReg(m, &i->RISCV64in.XIndir.dstGA);
+      mapReg(m, &i->RISCV64in.XIndir.base);
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond))
+         mapReg(m, &i->RISCV64in.XIndir.cond);
+      return;
+   case RISCV64in_XAssisted:
+      mapReg(m, &i->RISCV64in.XAssisted.dstGA);
+      mapReg(m, &i->RISCV64in.XAssisted.base);
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond))
+         mapReg(m, &i->RISCV64in.XAssisted.cond);
+      return;
+   case RISCV64in_EvCheck:
+      /* We expect both amodes only to mention x8/s0, so this is in fact
+         pointless, since the register isn't allocatable, but anyway.. */
+      mapReg(m, &i->RISCV64in.EvCheck.base_amCounter);
+      mapReg(m, &i->RISCV64in.EvCheck.base_amFailAddr);
+      return;
+   case RISCV64in_ProfInc:
+      /* Hardwires x5/t0 and x6/t1 -- nothing to modify. */
+      return;
+   default:
+      ppRISCV64Instr(i, mode64);
+      vpanic("mapRegs_RISCV64Instr");
+   }
+}
+
+/* Generate riscv64 spill/reload instructions under the direction of the
+   register allocator. Note it's critical these don't write the condition
+   codes. */
+void genSpill_RISCV64(/*OUT*/ HInstr** i1,
+                      /*OUT*/ HInstr** i2,
+                      HReg             rreg,
+                      Int              offsetB,
+                      Bool             mode64)
+{
+   vassert(offsetB >= 0);
+   vassert(!hregIsVirtual(rreg));
+   vassert(mode64 == True);
+
+   HReg base   = get_baseblock_register();
+   Int  soff12 = offsetB - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12 >= -2048 && soff12 < 2048);
+
+   HRegClass rclass = hregClass(rreg);
+   switch (rclass) {
+   case HRcInt64:
+      *i1 = RISCV64Instr_Store(RISCV64op_SD, rreg, base, soff12);
+      return;
+   case HRcFlt64:
+      *i1 = RISCV64Instr_FpLdSt(RISCV64op_FSD, rreg, base, soff12);
+      return;
+   default:
+      ppHRegClass(rclass);
+      vpanic("genSpill_RISCV64: unimplemented regclass");
+   }
+}
+
+void genReload_RISCV64(/*OUT*/ HInstr** i1,
+                       /*OUT*/ HInstr** i2,
+                       HReg             rreg,
+                       Int              offsetB,
+                       Bool             mode64)
+{
+   vassert(offsetB >= 0);
+   vassert(!hregIsVirtual(rreg));
+   vassert(mode64 == True);
+
+   HReg base   = get_baseblock_register();
+   Int  soff12 = offsetB - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12 >= -2048 && soff12 < 2048);
+
+   HRegClass rclass = hregClass(rreg);
+   switch (rclass) {
+   case HRcInt64:
+      *i1 = RISCV64Instr_Load(RISCV64op_LD, rreg, base, soff12);
+      return;
+   case HRcFlt64:
+      *i1 = RISCV64Instr_FpLdSt(RISCV64op_FLD, rreg, base, soff12);
+      return;
+   default:
+      ppHRegClass(rclass);
+      vpanic("genReload_RISCV64: unimplemented regclass");
+   }
+}
+
+RISCV64Instr* genMove_RISCV64(HReg from, HReg to, Bool mode64)
+{
+   vassert(mode64 == True);
+
+   HRegClass rclass = hregClass(from);
+   switch (rclass) {
+   case HRcInt64:
+      return RISCV64Instr_MV(to, from);
+   case HRcFlt64:
+      return RISCV64Instr_FpMove(RISCV64op_FMV_D, to, from);
+   default:
+      ppHRegClass(rclass);
+      vpanic("genMove_RISCV64: unimplemented regclass");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Functions to emit a sequence of bytes                ---*/
+/*------------------------------------------------------------*/
+
+static inline UChar* emit16(UChar* p, UShort val)
+{
+   *p++ = (val >> 0) & 0xff;
+   *p++ = (val >> 8) & 0xff;
+   return p;
+}
+
+static inline UChar* emit32(UChar* p, UInt val)
+{
+   *p++ = (val >> 0) & 0xff;
+   *p++ = (val >> 8) & 0xff;
+   *p++ = (val >> 16) & 0xff;
+   *p++ = (val >> 24) & 0xff;
+   return p;
+}
+
+/*------------------------------------------------------------*/
+/*--- Functions to emit various instruction formats        ---*/
+/*------------------------------------------------------------*/
+
+/* Emit an R-type instruction. */
+static UChar* emit_R(
+   UChar* p, UInt opcode, UInt rd, UInt funct3, UInt rs1, UInt rs2, UInt funct7)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(funct3 >> 3 == 0);
+   vassert(rs1 >> 5 == 0);
+   vassert(rs2 >> 5 == 0);
+   vassert(funct7 >> 7 == 0);
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= rd << 7;
+   the_insn |= funct3 << 12;
+   the_insn |= rs1 << 15;
+   the_insn |= rs2 << 20;
+   the_insn |= funct7 << 25;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit an I-type instruction. */
+static UChar*
+emit_I(UChar* p, UInt opcode, UInt rd, UInt funct3, UInt rs1, UInt imm11_0)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(funct3 >> 3 == 0);
+   vassert(rs1 >> 5 == 0);
+   vassert(imm11_0 >> 12 == 0);
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= rd << 7;
+   the_insn |= funct3 << 12;
+   the_insn |= rs1 << 15;
+   the_insn |= imm11_0 << 20;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit an S-type instruction. */
+static UChar*
+emit_S(UChar* p, UInt opcode, UInt imm11_0, UInt funct3, UInt rs1, UInt rs2)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(imm11_0 >> 12 == 0);
+   vassert(funct3 >> 3 == 0);
+   vassert(rs1 >> 5 == 0);
+   vassert(rs2 >> 5 == 0);
+
+   UInt imm4_0  = (imm11_0 >> 0) & 0x1f;
+   UInt imm11_5 = (imm11_0 >> 5) & 0x7f;
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= imm4_0 << 7;
+   the_insn |= funct3 << 12;
+   the_insn |= rs1 << 15;
+   the_insn |= rs2 << 20;
+   the_insn |= imm11_5 << 25;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit a B-type instruction. */
+static UChar*
+emit_B(UChar* p, UInt opcode, UInt imm12_1, UInt funct3, UInt rs1, UInt rs2)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(imm12_1 >> 12 == 0);
+   vassert(funct3 >> 3 == 0);
+   vassert(rs1 >> 5 == 0);
+   vassert(rs2 >> 5 == 0);
+
+   UInt imm11_11 = (imm12_1 >> 10) & 0x1;
+   UInt imm4_1   = (imm12_1 >> 0) & 0xf;
+   UInt imm10_5  = (imm12_1 >> 4) & 0x3f;
+   UInt imm12_12 = (imm12_1 >> 11) & 0x1;
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= imm11_11 << 7;
+   the_insn |= imm4_1 << 8;
+   the_insn |= funct3 << 12;
+   the_insn |= rs1 << 15;
+   the_insn |= rs2 << 20;
+   the_insn |= imm10_5 << 25;
+   the_insn |= imm12_12 << 31;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit a U-type instruction. */
+static UChar* emit_U(UChar* p, UInt opcode, UInt rd, UInt imm31_12)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(imm31_12 >> 20 == 0);
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= rd << 7;
+   the_insn |= imm31_12 << 12;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit a CR-type instruction. */
+static UChar* emit_CR(UChar* p, UInt opcode, UInt rs2, UInt rd, UInt funct4)
+{
+   vassert(opcode >> 2 == 0);
+   vassert(rs2 >> 5 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(funct4 >> 4 == 0);
+
+   UShort the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= rs2 << 2;
+   the_insn |= rd << 7;
+   the_insn |= funct4 << 12;
+
+   return emit16(p, the_insn);
+}
+
+/* Emit a CI-type instruction. */
+static UChar* emit_CI(UChar* p, UInt opcode, UInt imm5_0, UInt rd, UInt funct3)
+{
+   vassert(opcode >> 2 == 0);
+   vassert(imm5_0 >> 6 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(funct3 >> 3 == 0);
+
+   UInt imm4_0 = (imm5_0 >> 0) & 0x1f;
+   UInt imm5_5 = (imm5_0 >> 5) & 0x1;
+
+   UShort the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= imm4_0 << 2;
+   the_insn |= rd << 7;
+   the_insn |= imm5_5 << 12;
+   the_insn |= funct3 << 13;
+
+   return emit16(p, the_insn);
+}
+
+/* Emit a CJ-type instruction. */
+static UChar* emit_CJ(UChar* p, UInt opcode, UInt imm11_1, UInt funct3)
+{
+   vassert(opcode >> 2 == 0);
+   vassert(imm11_1 >> 11 == 0);
+   vassert(funct3 >> 3 == 0);
+
+   UInt imm5_5   = (imm11_1 >> 4) & 0x1;
+   UInt imm3_1   = (imm11_1 >> 0) & 0x7;
+   UInt imm7_7   = (imm11_1 >> 6) & 0x1;
+   UInt imm6_6   = (imm11_1 >> 5) & 0x1;
+   UInt imm10_10 = (imm11_1 >> 9) & 0x1;
+   UInt imm9_8   = (imm11_1 >> 7) & 0x3;
+   UInt imm4_4   = (imm11_1 >> 3) & 0x1;
+   UInt imm11_11 = (imm11_1 >> 10) & 0x1;
+
+   UShort the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= imm5_5 << 2;
+   the_insn |= imm3_1 << 3;
+   the_insn |= imm7_7 << 6;
+   the_insn |= imm6_6 << 7;
+   the_insn |= imm10_10 << 8;
+   the_insn |= imm9_8 << 9;
+   the_insn |= imm4_4 << 11;
+   the_insn |= imm11_11 << 12;
+   the_insn |= funct3 << 13;
+
+   return emit16(p, the_insn);
+}
+
+/*------------------------------------------------------------*/
+/*--- Code generation                                      ---*/
+/*------------------------------------------------------------*/
+
+/* Get an immediate into a register, using only that register. */
+static UChar* imm64_to_ireg(UChar* p, UInt dst, ULong imm64)
+{
+   vassert(dst > 0 && dst <= 31);
+
+   Long simm64 = imm64;
+
+   if (simm64 >= -32 && simm64 <= 31) {
+      /* c.li dst, simm64[5:0] */
+      return emit_CI(p, 0b01, imm64 & 0x3f, dst, 0b010);
+   }
+
+   /* TODO Add implementation with addi only and c.lui+addi. */
+
+   if (simm64 >= -2147483648 && simm64 <= 2147483647) {
+      /* lui dst, simm64[31:12]+simm64[11] */
+      p = emit_U(p, 0b0110111, dst, ((imm64 + 0x800) >> 12) & 0xfffff);
+      if ((imm64 & 0xfff) == 0)
+         return p;
+      /* addiw dst, dst, simm64[11:0] */
+      return emit_I(p, 0b0011011, dst, 0b000, dst, imm64 & 0xfff);
+   }
+
+   /* Handle a constant that is out of the 32-bit signed integer range. */
+   /* Strip the low 12 bits. */
+   ULong imm11_0 = imm64 & 0xfff;
+
+   /* Get the remaining adjusted upper bits. */
+   ULong rem   = (simm64 + 0x800) >> 12;
+   UInt  sham6 = 12 + __builtin_ctzll(rem);
+   vassert(sham6 < 64);
+   rem = vex_sx_to_64(rem >> (sham6 - 12), 64 - sham6);
+
+   /* Generate instructions to load the upper bits. */
+   p = imm64_to_ireg(p, dst, rem);
+   /* c.slli dst, sham6 */
+   p = emit_CI(p, 0b10, sham6, dst, 0b000);
+
+   /* Add the low bits in. */
+   if (imm11_0 == 0)
+      return p;
+   UInt imm5_0 = imm11_0 & 0x3f;
+   if (vex_sx_to_64(imm5_0, 6) == vex_sx_to_64(imm11_0, 12)) {
+      /* c.addi dst, imm5_0 */
+      p = emit_CI(p, 0b01, imm5_0, dst, 0b000);
+   } else {
+      /* addi dst, dst, imm11_0 */
+      p = emit_I(p, 0b0010011, dst, 0b000, dst, imm11_0);
+   }
+
+   return p;
+}
+
+/* Get a 48-bit address into a register, using only that register, and
+   generating a constant number of instructions with 18 bytes in size,
+   regardless of the value of the address. This is used when generating
+   sections of code that need to be patched later, so as to guarantee a
+   specific size.
+
+   Notice that this function is designed to support target systems that use the
+   Sv39 or Sv48 virtual-memory system. The input address is checked to be in
+   the Sv48 format, that is bits [63:48] must be all equal to bit 47.
+   Utilizing the fact that the address is only 48-bits in size allows to save 2
+   instructions compared to materializing a full 64-bit address.
+   */
+static UChar* addr48_to_ireg_EXACTLY_18B(UChar* p, UInt dst, ULong imm48)
+{
+   vassert(imm48 >> 47 == 0 || imm48 >> 47 == 0x1ffff);
+
+   ULong rem = imm48;
+   ULong imm47_28, imm27_16, imm15_4, imm3_0;
+   imm3_0   = rem & 0xf;
+   rem      = (rem + 0x8) >> 4;
+   imm15_4  = rem & 0xfff;
+   rem      = (rem + 0x800) >> 12;
+   imm27_16 = rem & 0xfff;
+   rem      = (rem + 0x800) >> 12;
+   imm47_28 = rem & 0xfffff;
+
+   /* lui dst, imm47_28 */
+   p = emit_U(p, 0b0110111, dst, imm47_28);
+   /* addiw dst, dst, imm27_16 */
+   p = emit_I(p, 0b0011011, dst, 0b000, dst, imm27_16);
+   /* c.slli dst, 12 */
+   p = emit_CI(p, 0b10, 12, dst, 0b000);
+   /* addi dst, dst, imm15_4 */
+   p = emit_I(p, 0b0010011, dst, 0b000, dst, imm15_4);
+   /* c.slli dst, 4 */
+   p = emit_CI(p, 0b10, 4, dst, 0b000);
+   if (imm3_0 != 0) {
+      /* c.addi dst, imm3_0 */
+      p = emit_CI(p, 0b01, vex_sx_to_64(imm3_0, 4) & 0x3f, dst, 0b000);
+   } else {
+      /* c.nop */
+      p = emit_CI(p, 0b01, 0, 0, 0b000);
+   }
+
+   return p;
+}
+
+/* Check whether p points at an instruction sequence cooked up by
+   addr48_to_ireg_EXACTLY_18B(). */
+static Bool is_addr48_to_ireg_EXACTLY_18B(UChar* p, UInt dst, ULong imm48)
+{
+   UChar  tmp[18];
+   UChar* q;
+
+   q = addr48_to_ireg_EXACTLY_18B(&tmp[0], dst, imm48);
+   if (q - &tmp[0] != 18)
+      return False;
+
+   q = &tmp[0];
+   for (UInt i = 0; i < 18; i++) {
+      if (*p != *q)
+         return False;
+      p++;
+      q++;
+   }
+   return True;
+}
+
+/* Emit an instruction into buf and return the number of bytes used. Note that
+   buf is not the insn's final place, and therefore it is imperative to emit
+   position-independent code. If the emitted instruction was a profiler inc, set
+   *is_profInc to True, else leave it unchanged. */
+Int emit_RISCV64Instr(/*MB_MOD*/ Bool*    is_profInc,
+                      UChar*              buf,
+                      Int                 nbuf,
+                      const RISCV64Instr* i,
+                      Bool                mode64,
+                      VexEndness          endness_host,
+                      const void*         disp_cp_chain_me_to_slowEP,
+                      const void*         disp_cp_chain_me_to_fastEP,
+                      const void*         disp_cp_xindir,
+                      const void*         disp_cp_xassisted)
+{
+   vassert(nbuf >= 32);
+   vassert(mode64 == True);
+   vassert(((HWord)buf & 1) == 0);
+
+   UChar* p = &buf[0];
+
+   switch (i->tag) {
+   case RISCV64in_LI:
+      p = imm64_to_ireg(p, iregEnc(i->RISCV64in.LI.dst), i->RISCV64in.LI.imm64);
+      goto done;
+   case RISCV64in_MV: {
+      /* c.mv dst, src */
+      UInt dst = iregEnc(i->RISCV64in.MV.dst);
+      UInt src = iregEnc(i->RISCV64in.MV.src);
+
+      p = emit_CR(p, 0b10, src, dst, 0b1000);
+      goto done;
+   }
+   case RISCV64in_ALU: {
+      /* <op> dst, src1, src2 */
+      UInt dst  = iregEnc(i->RISCV64in.ALU.dst);
+      UInt src1 = iregEnc(i->RISCV64in.ALU.src1);
+      UInt src2 = iregEnc(i->RISCV64in.ALU.src2);
+      switch (i->RISCV64in.ALU.op) {
+      case RISCV64op_ADD:
+         p = emit_R(p, 0b0110011, dst, 0b000, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SUB:
+         p = emit_R(p, 0b0110011, dst, 0b000, src1, src2, 0b0100000);
+         goto done;
+      case RISCV64op_ADDW:
+         p = emit_R(p, 0b0111011, dst, 0b000, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SUBW:
+         p = emit_R(p, 0b0111011, dst, 0b000, src1, src2, 0b0100000);
+         goto done;
+      case RISCV64op_XOR:
+         p = emit_R(p, 0b0110011, dst, 0b100, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_OR:
+         p = emit_R(p, 0b0110011, dst, 0b110, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_AND:
+         p = emit_R(p, 0b0110011, dst, 0b111, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SLL:
+         p = emit_R(p, 0b0110011, dst, 0b001, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SRL:
+         p = emit_R(p, 0b0110011, dst, 0b101, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SRA:
+         p = emit_R(p, 0b0110011, dst, 0b101, src1, src2, 0b0100000);
+         goto done;
+      case RISCV64op_SLLW:
+         p = emit_R(p, 0b0111011, dst, 0b001, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SRLW:
+         p = emit_R(p, 0b0111011, dst, 0b101, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SRAW:
+         p = emit_R(p, 0b0111011, dst, 0b101, src1, src2, 0b0100000);
+         goto done;
+      case RISCV64op_SLT:
+         p = emit_R(p, 0b0110011, dst, 0b010, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SLTU:
+         p = emit_R(p, 0b0110011, dst, 0b011, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_MUL:
+         p = emit_R(p, 0b0110011, dst, 0b000, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_MULH:
+         p = emit_R(p, 0b0110011, dst, 0b001, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_MULHU:
+         p = emit_R(p, 0b0110011, dst, 0b011, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_DIV:
+         p = emit_R(p, 0b0110011, dst, 0b100, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_DIVU:
+         p = emit_R(p, 0b0110011, dst, 0b101, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_REM:
+         p = emit_R(p, 0b0110011, dst, 0b110, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_REMU:
+         p = emit_R(p, 0b0110011, dst, 0b111, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_MULW:
+         p = emit_R(p, 0b0111011, dst, 0b000, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_DIVW:
+         p = emit_R(p, 0b0111011, dst, 0b100, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_DIVUW:
+         p = emit_R(p, 0b0111011, dst, 0b101, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_REMW:
+         p = emit_R(p, 0b0111011, dst, 0b110, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_REMUW:
+         p = emit_R(p, 0b0111011, dst, 0b111, src1, src2, 0b0000001);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_ALUImm: {
+      /* <op> dst, src, imm12 */
+      UInt dst   = iregEnc(i->RISCV64in.ALUImm.dst);
+      UInt src   = iregEnc(i->RISCV64in.ALUImm.src);
+      Int  imm12 = i->RISCV64in.ALUImm.imm12;
+      switch (i->RISCV64in.ALUImm.op) {
+      case RISCV64op_ADDI:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0010011, dst, 0b000, src, imm12 & 0xfff);
+         goto done;
+      case RISCV64op_ADDIW:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0011011, dst, 0b000, src, imm12 & 0xfff);
+         goto done;
+      case RISCV64op_XORI:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0010011, dst, 0b100, src, imm12 & 0xfff);
+         goto done;
+      case RISCV64op_ANDI:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0010011, dst, 0b111, src, imm12 & 0xfff);
+         goto done;
+      case RISCV64op_SLLI:
+         vassert(imm12 >= 0 && imm12 < 64);
+         p = emit_I(p, 0b0010011, dst, 0b001, src, (0b000000 << 6) | imm12);
+         goto done;
+      case RISCV64op_SRLI:
+         vassert(imm12 >= 0 && imm12 < 64);
+         p = emit_I(p, 0b0010011, dst, 0b101, src, (0b000000 << 6) | imm12);
+         goto done;
+      case RISCV64op_SRAI:
+         vassert(imm12 >= 0 && imm12 < 64);
+         p = emit_I(p, 0b0010011, dst, 0b101, src, (0b010000 << 6) | imm12);
+         goto done;
+      case RISCV64op_SLTIU:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0010011, dst, 0b011, src, imm12 & 0xfff);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_Load: {
+      /* l<size> dst, soff12(base) */
+      UInt dst    = iregEnc(i->RISCV64in.Load.dst);
+      UInt base   = iregEnc(i->RISCV64in.Load.base);
+      Int  soff12 = i->RISCV64in.Load.soff12;
+      vassert(soff12 >= -2048 && soff12 < 2048);
+      UInt imm11_0 = soff12 & 0xfff;
+      switch (i->RISCV64in.Load.op) {
+      case RISCV64op_LD:
+         p = emit_I(p, 0b0000011, dst, 0b011, base, imm11_0);
+         goto done;
+      case RISCV64op_LW:
+         p = emit_I(p, 0b0000011, dst, 0b010, base, imm11_0);
+         goto done;
+      case RISCV64op_LH:
+         p = emit_I(p, 0b0000011, dst, 0b001, base, imm11_0);
+         goto done;
+      case RISCV64op_LB:
+         p = emit_I(p, 0b0000011, dst, 0b000, base, imm11_0);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_Store: {
+      /* s<size> src, soff12(base) */
+      UInt src    = iregEnc(i->RISCV64in.Store.src);
+      UInt base   = iregEnc(i->RISCV64in.Store.base);
+      Int  soff12 = i->RISCV64in.Store.soff12;
+      vassert(soff12 >= -2048 && soff12 < 2048);
+      UInt imm11_0 = soff12 & 0xfff;
+      switch (i->RISCV64in.Store.op) {
+      case RISCV64op_SD:
+         p = emit_S(p, 0b0100011, imm11_0, 0b011, base, src);
+         goto done;
+      case RISCV64op_SW:
+         p = emit_S(p, 0b0100011, imm11_0, 0b010, base, src);
+         goto done;
+      case RISCV64op_SH:
+         p = emit_S(p, 0b0100011, imm11_0, 0b001, base, src);
+         goto done;
+      case RISCV64op_SB:
+         p = emit_S(p, 0b0100011, imm11_0, 0b000, base, src);
+         goto done;
+      }
+      goto done;
+   }
+   case RISCV64in_LoadR: {
+      /* lr.<size> dst, (addr) */
+      UInt dst  = iregEnc(i->RISCV64in.LoadR.dst);
+      UInt addr = iregEnc(i->RISCV64in.LoadR.addr);
+      switch (i->RISCV64in.LoadR.op) {
+      case RISCV64op_LR_W:
+         p = emit_R(p, 0b0101111, dst, 0b010, addr, 0b00000, 0b0001000);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_StoreC: {
+      /* sc.<size> res, dst, (addr) */
+      UInt res  = iregEnc(i->RISCV64in.StoreC.res);
+      UInt src  = iregEnc(i->RISCV64in.StoreC.src);
+      UInt addr = iregEnc(i->RISCV64in.StoreC.addr);
+      switch (i->RISCV64in.StoreC.op) {
+      case RISCV64op_SC_W:
+         p = emit_R(p, 0b0101111, res, 0b010, addr, src, 0b0001100);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_CSRRW: {
+      /* csrrw dst, csr, src */
+      UInt dst = iregEnc(i->RISCV64in.CSRRW.dst);
+      UInt src = iregEnc(i->RISCV64in.CSRRW.src);
+      UInt csr = i->RISCV64in.CSRRW.csr;
+      vassert(csr < 4096);
+
+      p = emit_I(p, 0b1110011, dst, 0b001, src, csr);
+      goto done;
+   }
+   case RISCV64in_FpUnary: {
+      /* f<op> dst, src */
+      UInt dst = fregEnc(i->RISCV64in.FpUnary.dst);
+      UInt src = fregEnc(i->RISCV64in.FpUnary.src);
+      switch (i->RISCV64in.FpUnary.op) {
+      case RISCV64op_FSQRT_S:
+         p = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b0101100);
+         goto done;
+      case RISCV64op_FSQRT_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b0101101);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpBinary: {
+      /* f<op> dst, src1, src2 */
+      UInt dst  = fregEnc(i->RISCV64in.FpBinary.dst);
+      UInt src1 = fregEnc(i->RISCV64in.FpBinary.src1);
+      UInt src2 = fregEnc(i->RISCV64in.FpBinary.src2);
+      switch (i->RISCV64in.FpBinary.op) {
+      case RISCV64op_FADD_S:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_FMUL_S:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0001000);
+         goto done;
+      case RISCV64op_FDIV_S:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0001100);
+         goto done;
+      case RISCV64op_FSGNJN_S:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b0010000);
+         goto done;
+      case RISCV64op_FSGNJX_S:
+         p = emit_R(p, 0b1010011, dst, 0b010, src1, src2, 0b0010000);
+         goto done;
+      case RISCV64op_FMIN_S:
+         p = emit_R(p, 0b1010011, dst, 0b000, src1, src2, 0b0010100);
+         goto done;
+      case RISCV64op_FMAX_S:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b0010100);
+         goto done;
+      case RISCV64op_FADD_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_FSUB_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0000101);
+         goto done;
+      case RISCV64op_FMUL_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0001001);
+         goto done;
+      case RISCV64op_FDIV_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0001101);
+         goto done;
+      case RISCV64op_FSGNJN_D:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b0010001);
+         goto done;
+      case RISCV64op_FSGNJX_D:
+         p = emit_R(p, 0b1010011, dst, 0b010, src1, src2, 0b0010001);
+         goto done;
+      case RISCV64op_FMIN_D:
+         p = emit_R(p, 0b1010011, dst, 0b000, src1, src2, 0b0010101);
+         goto done;
+      case RISCV64op_FMAX_D:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b0010101);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpTernary: {
+      /* f<op> dst, src1, src2, src3 */
+      UInt dst  = fregEnc(i->RISCV64in.FpTernary.dst);
+      UInt src1 = fregEnc(i->RISCV64in.FpTernary.src1);
+      UInt src2 = fregEnc(i->RISCV64in.FpTernary.src2);
+      UInt src3 = fregEnc(i->RISCV64in.FpTernary.src3);
+      switch (i->RISCV64in.FpTernary.op) {
+      case RISCV64op_FMADD_S:
+         p = emit_R(p, 0b1000011, dst, 0b111, src1, src2, src3 << 2 | 0b00);
+         goto done;
+      case RISCV64op_FMADD_D:
+         p = emit_R(p, 0b1000011, dst, 0b111, src1, src2, src3 << 2 | 0b01);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpMove: {
+      /* f<op> dst, src */
+      UInt dst, src;
+      switch (i->RISCV64in.FpMove.op) {
+      case RISCV64op_FMV_X_W:
+         dst = iregEnc(i->RISCV64in.FpMove.dst);
+         src = fregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, 0b00000, 0b1110000);
+         goto done;
+      case RISCV64op_FMV_W_X:
+         dst = fregEnc(i->RISCV64in.FpMove.dst);
+         src = iregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, 0b00000, 0b1111000);
+         goto done;
+      case RISCV64op_FMV_D:
+         dst = fregEnc(i->RISCV64in.FpMove.dst);
+         src = fregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, src, 0b0010001);
+         goto done;
+      case RISCV64op_FMV_X_D:
+         dst = iregEnc(i->RISCV64in.FpMove.dst);
+         src = fregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, 0b00000, 0b1110001);
+         goto done;
+      case RISCV64op_FMV_D_X:
+         dst = fregEnc(i->RISCV64in.FpMove.dst);
+         src = iregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, 0b00000, 0b1111001);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpConvert: {
+      /* f<op> dst, src */
+      UInt dst, src;
+      switch (i->RISCV64in.FpConvert.op) {
+      case RISCV64op_FCVT_W_S:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b1100000);
+         goto done;
+      case RISCV64op_FCVT_WU_S:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b1100000);
+         goto done;
+      case RISCV64op_FCVT_S_W:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b1101000);
+         goto done;
+      case RISCV64op_FCVT_S_WU:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b1101000);
+         goto done;
+      case RISCV64op_FCVT_L_S:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00010, 0b1100000);
+         goto done;
+      case RISCV64op_FCVT_LU_S:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00011, 0b1100000);
+         goto done;
+      case RISCV64op_FCVT_S_L:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00010, 0b1101000);
+         goto done;
+      case RISCV64op_FCVT_S_LU:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00011, 0b1101000);
+         goto done;
+      case RISCV64op_FCVT_S_D:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b0100000);
+         goto done;
+      case RISCV64op_FCVT_D_S:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b0100001);
+         goto done;
+      case RISCV64op_FCVT_W_D:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b1100001);
+         goto done;
+      case RISCV64op_FCVT_WU_D:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b1100001);
+         goto done;
+      case RISCV64op_FCVT_D_W:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b1101001);
+         goto done;
+      case RISCV64op_FCVT_D_WU:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b1101001);
+         goto done;
+      case RISCV64op_FCVT_L_D:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00010, 0b1100001);
+         goto done;
+      case RISCV64op_FCVT_LU_D:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00011, 0b1100001);
+         goto done;
+      case RISCV64op_FCVT_D_L:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00010, 0b1101001);
+         goto done;
+      case RISCV64op_FCVT_D_LU:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00011, 0b1101001);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpCompare: {
+      /* f<op> dst, src1, src2 */
+      UInt dst  = iregEnc(i->RISCV64in.FpCompare.dst);
+      UInt src1 = fregEnc(i->RISCV64in.FpCompare.src1);
+      UInt src2 = fregEnc(i->RISCV64in.FpCompare.src2);
+      switch (i->RISCV64in.FpCompare.op) {
+      case RISCV64op_FEQ_S:
+         p = emit_R(p, 0b1010011, dst, 0b010, src1, src2, 0b1010000);
+         goto done;
+      case RISCV64op_FLT_S:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b1010000);
+         goto done;
+      case RISCV64op_FEQ_D:
+         p = emit_R(p, 0b1010011, dst, 0b010, src1, src2, 0b1010001);
+         goto done;
+      case RISCV64op_FLT_D:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b1010001);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpLdSt: {
+      /* f<op> reg, soff12(base) */
+      UInt reg    = fregEnc(i->RISCV64in.FpLdSt.reg);
+      UInt base   = iregEnc(i->RISCV64in.FpLdSt.base);
+      Int  soff12 = i->RISCV64in.FpLdSt.soff12;
+      vassert(soff12 >= -2048 && soff12 < 2048);
+      UInt imm11_0 = soff12 & 0xfff;
+      switch (i->RISCV64in.FpLdSt.op) {
+      case RISCV64op_FLW:
+         p = emit_I(p, 0b0000111, reg /*dst*/, 0b010, base, imm11_0);
+         goto done;
+      case RISCV64op_FLD:
+         p = emit_I(p, 0b0000111, reg /*dst*/, 0b011, base, imm11_0);
+         goto done;
+      case RISCV64op_FSW:
+         p = emit_S(p, 0b0100111, imm11_0, 0b010, base, reg /*src*/);
+         goto done;
+      case RISCV64op_FSD:
+         p = emit_S(p, 0b0100111, imm11_0, 0b011, base, reg /*src*/);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpCSEL: {
+      /*    beq cond, zero, 1f
+            fmv.d dst, iftrue
+            c.j 2f
+         1: fmv.d dst, iffalse
+         2:
+       */
+      UInt dst     = fregEnc(i->RISCV64in.FpCSEL.dst);
+      UInt iftrue  = fregEnc(i->RISCV64in.FpCSEL.iftrue);
+      UInt iffalse = fregEnc(i->RISCV64in.FpCSEL.iffalse);
+      UInt cond    = iregEnc(i->RISCV64in.FpCSEL.cond);
+      p = emit_B(p, 0b1100011, (10 >> 1) & 0xfff, 0b000, cond, 0 /*x0/zero*/);
+      p = emit_R(p, 0b1010011, dst, 0b000, iftrue, iftrue, 0b0010001);
+      p = emit_CJ(p, 0b01, (6 >> 1) & 0x7ff, 0b101);
+      p = emit_R(p, 0b1010011, dst, 0b000, iffalse, iffalse, 0b0010001);
+      goto done;
+   }
+   case RISCV64in_CAS: {
+      /* 1: lr.<size> old, (addr)
+            bne old, expd, 2f
+            sc.<size> t0, data, (addr)
+            bne t0, zero, 1b
+         2:
+       */
+      UInt old  = iregEnc(i->RISCV64in.CAS.old);
+      UInt addr = iregEnc(i->RISCV64in.CAS.addr);
+      UInt expd = iregEnc(i->RISCV64in.CAS.expd);
+      UInt data = iregEnc(i->RISCV64in.CAS.data);
+      switch (i->RISCV64in.CAS.op) {
+      case RISCV64op_CAS_D:
+         p = emit_R(p, 0b0101111, old, 0b011, addr, 0b00000, 0b0001000);
+         p = emit_B(p, 0b1100011, (12 >> 1) & 0xfff, 0b001, old, expd);
+         p = emit_R(p, 0b0101111, 5 /*x5/t0*/, 0b011, addr, data, 0b0001100);
+         p = emit_B(p, 0b1100011, (-12 >> 1) & 0xfff, 0b001, 5 /*x5/t0*/,
+                    0 /*x0/zero*/);
+         goto done;
+      case RISCV64op_CAS_W:
+         p = emit_R(p, 0b0101111, old, 0b010, addr, 0b00000, 0b0001000);
+         p = emit_B(p, 0b1100011, (12 >> 1) & 0xfff, 0b001, old, expd);
+         p = emit_R(p, 0b0101111, 5 /*x5/t0*/, 0b010, addr, data, 0b0001100);
+         p = emit_B(p, 0b1100011, (-12 >> 1) & 0xfff, 0b001, 5 /*x5/t0*/,
+                    0 /*x0/zero*/);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FENCE: {
+      /* fence */
+      p = emit_I(p, 0b0001111, 0b00000, 0b000, 0b00000, 0b000011111111);
+      goto done;
+   }
+   case RISCV64in_CSEL: {
+      /*    beq cond, zero, 1f
+            c.mv dst, iftrue
+            c.j 2f
+         1: c.mv dst, iffalse
+         2:
+       */
+      UInt dst     = iregEnc(i->RISCV64in.CSEL.dst);
+      UInt iftrue  = iregEnc(i->RISCV64in.CSEL.iftrue);
+      UInt iffalse = iregEnc(i->RISCV64in.CSEL.iffalse);
+      UInt cond    = iregEnc(i->RISCV64in.CSEL.cond);
+
+      p = emit_B(p, 0b1100011, (8 >> 1) & 0xfff, 0b000, cond, 0 /*x0/zero*/);
+      p = emit_CR(p, 0b10, iftrue, dst, 0b1000);
+      p = emit_CJ(p, 0b01, (4 >> 1) & 0x7ff, 0b101);
+      p = emit_CR(p, 0b10, iffalse, dst, 0b1000);
+      goto done;
+   }
+   case RISCV64in_Call: {
+      /*    beq cond, zero, 1f
+            li t0, target
+            c.jalr 0(t0)
+         1:
+       */
+      UChar* ptmp = NULL;
+      if (!hregIsInvalid(i->RISCV64in.Call.cond)) {
+         ptmp = p;
+         p += 4;
+      }
+
+      /* li t0, target */
+      p = imm64_to_ireg(p, 5 /*x5/t0*/, i->RISCV64in.Call.target);
+
+      /* c.jalr 0(t0) */
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1001);
+
+      /* Fix up the conditional jump, if there was one. */
+      if (!hregIsInvalid(i->RISCV64in.Call.cond)) {
+         /* beq cond, zero, delta */
+         UInt cond  = iregEnc(i->RISCV64in.Call.cond);
+         UInt delta = p - ptmp;
+         /* delta_min = 4 (beq) + 2 (c.li) + 2 (c.jalr) = 8 */
+         vassert(delta >= 8 && delta < 4096 && (delta & 1) == 0);
+         UInt imm12_1 = (delta >> 1) & 0xfff;
+
+         emit_B(ptmp, 0b1100011, imm12_1, 0b000, cond, 0 /*x0/zero*/);
+      }
+
+      goto done;
+   }
+
+   case RISCV64in_XDirect: {
+      /* NB: what goes on here has to be very closely coordinated with the
+         chainXDirect_RISCV64() and unchainXDirect_RISCV64() below. */
+      /* We're generating chain-me requests here, so we need to be sure this is
+         actually allowed -- no-redir translations can't use chain-me's.
+         Hence: */
+      vassert(disp_cp_chain_me_to_slowEP != NULL);
+      vassert(disp_cp_chain_me_to_fastEP != NULL);
+
+      /* First off, if this is conditional, create a conditional jump over the
+         rest of it. Or at least, leave a space for it that we will shortly fill
+         in. */
+      UChar* ptmp = NULL;
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond)) {
+         ptmp = p;
+         p += 4;
+      }
+
+      /* Update the guest pc. */
+      {
+         /* li t0, dstGA */
+         p = imm64_to_ireg(p, 5 /*x5/t0*/, i->RISCV64in.XDirect.dstGA);
+
+         /* sd t0, soff12(base) */
+         UInt base   = iregEnc(i->RISCV64in.XDirect.base);
+         Int  soff12 = i->RISCV64in.XDirect.soff12;
+         vassert(soff12 >= -2048 && soff12 < 2048);
+         UInt imm11_0 = soff12 & 0xfff;
+
+         p = emit_S(p, 0b0100011, imm11_0, 0b011, base, 5 /*x5/t0*/);
+      }
+
+      /* --- FIRST PATCHABLE BYTE follows --- */
+      /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling to) backs
+         up the return address, so as to find the address of the first patchable
+         byte. So: don't change the number of instructions (3) below. */
+      /* li t0, VG_(disp_cp_chain_me_to_{slowEP,fastEP}) */
+      const void* disp_cp_chain_me = i->RISCV64in.XDirect.toFastEP
+                                        ? disp_cp_chain_me_to_fastEP
+                                        : disp_cp_chain_me_to_slowEP;
+
+      p = addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/, (ULong)disp_cp_chain_me);
+
+      /* c.jalr 0(t0) */
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1001);
+      /* --- END of PATCHABLE BYTES --- */
+
+      /* Fix up the conditional jump, if there was one. */
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond)) {
+         /* beq cond, zero, delta */
+         UInt cond  = iregEnc(i->RISCV64in.XDirect.cond);
+         UInt delta = p - ptmp;
+         /* delta_min = 4 (beq) + 2 (c.li) + 4 (sd) + 18 (addr48) + 2 (c.jalr)
+                      = 30 */
+         vassert(delta >= 30 && delta < 4096 && (delta & 1) == 0);
+         UInt imm12_1 = (delta >> 1) & 0xfff;
+
+         emit_B(ptmp, 0b1100011, imm12_1, 0b000, cond, 0 /*x0/zero*/);
+      }
+
+      goto done;
+   }
+
+   case RISCV64in_XIndir: {
+      /* We're generating transfers that could lead indirectly to a chain-me, so
+         we need to be sure this is actually allowed -- no-redir translations
+         are not allowed to reach normal translations without going through the
+         scheduler. That means no XDirects or XIndirs out from no-redir
+         translations. Hence: */
+      vassert(disp_cp_xindir != NULL);
+
+      /* First off, if this is conditional, create a conditional jump over the
+         rest of it. Or at least, leave a space for it that we will shortly fill
+         in. */
+      UChar* ptmp = NULL;
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond)) {
+         ptmp = p;
+         p += 4;
+      }
+
+      /* Update the guest pc. */
+      {
+         /* sd r-dstGA, soff12(base) */
+         UInt src    = iregEnc(i->RISCV64in.XIndir.dstGA);
+         UInt base   = iregEnc(i->RISCV64in.XIndir.base);
+         Int  soff12 = i->RISCV64in.XIndir.soff12;
+         vassert(soff12 >= -2048 && soff12 < 2048);
+         UInt imm11_0 = soff12 & 0xfff;
+
+         p = emit_S(p, 0b0100011, imm11_0, 0b011, base, src);
+      }
+
+      /* li t0, VG_(disp_cp_xindir) */
+      p = imm64_to_ireg(p, 5 /*x5/t0*/, (ULong)disp_cp_xindir);
+
+      /* c.jr 0(t0) */
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1000);
+
+      /* Fix up the conditional jump, if there was one. */
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond)) {
+         /* beq cond, zero, delta */
+         UInt cond  = iregEnc(i->RISCV64in.XIndir.cond);
+         UInt delta = p - ptmp;
+         /* delta_min = 4 (beq) + 4 (sd) + 2 (c.li) + 2 (c.jr) = 12 */
+         vassert(delta >= 12 && delta < 4096 && (delta & 1) == 0);
+         UInt imm12_1 = (delta >> 1) & 0xfff;
+
+         emit_B(ptmp, 0b1100011, imm12_1, 0b000, cond, 0 /*x0/zero*/);
+      }
+
+      goto done;
+   }
+
+   case RISCV64in_XAssisted: {
+      /* First off, if this is conditional, create a conditional jump over the
+         rest of it. Or at least, leave a space for it that we will shortly fill
+         in. */
+      UChar* ptmp = NULL;
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond)) {
+         ptmp = p;
+         p += 4;
+      }
+
+      /* Update the guest pc. */
+      {
+         /* sd r-dstGA, soff12(base) */
+         UInt src    = iregEnc(i->RISCV64in.XAssisted.dstGA);
+         UInt base   = iregEnc(i->RISCV64in.XAssisted.base);
+         Int  soff12 = i->RISCV64in.XAssisted.soff12;
+         vassert(soff12 >= -2048 && soff12 < 2048);
+         UInt imm11_0 = soff12 & 0xfff;
+
+         p = emit_S(p, 0b0100011, imm11_0, 0b011, base, src);
+      }
+
+      /* li s0, $magic_number */
+      UInt trcval = 0;
+      switch (i->RISCV64in.XAssisted.jk) {
+      case Ijk_ClientReq:
+         trcval = VEX_TRC_JMP_CLIENTREQ;
+         break;
+      case Ijk_Sys_syscall:
+         trcval = VEX_TRC_JMP_SYS_SYSCALL;
+         break;
+      case Ijk_NoDecode:
+         trcval = VEX_TRC_JMP_NODECODE;
+         break;
+      case Ijk_InvalICache:
+         trcval = VEX_TRC_JMP_INVALICACHE;
+         break;
+      case Ijk_NoRedir:
+         trcval = VEX_TRC_JMP_NOREDIR;
+         break;
+      case Ijk_SigTRAP:
+         trcval = VEX_TRC_JMP_SIGTRAP;
+         break;
+      case Ijk_Boring:
+         trcval = VEX_TRC_JMP_BORING;
+         break;
+      default:
+         ppIRJumpKind(i->RISCV64in.XAssisted.jk);
+         vpanic("emit_RISCV64Instr.RISCV64in_XAssisted: unexpected jump kind");
+      }
+      vassert(trcval != 0);
+      p = imm64_to_ireg(p, 8 /*x8/s0*/, trcval);
+
+      /* li t0, VG_(disp_cp_xassisted) */
+      p = imm64_to_ireg(p, 5 /*x5/t0*/, (ULong)disp_cp_xassisted);
+
+      /* c.jr 0(t0) */
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1000);
+
+      /* Fix up the conditional jump, if there was one. */
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond)) {
+         /* beq cond, zero, delta */
+         UInt cond  = iregEnc(i->RISCV64in.XAssisted.cond);
+         UInt delta = p - ptmp;
+         /* delta_min = 4 (beq) + 4 (sd) + 2 (c.li) + 2 (c.li) + 2 (c.jr)
+                      = 14 */
+         vassert(delta >= 14 && delta < 4096 && (delta & 1) == 0);
+         UInt imm12_1 = (delta >> 1) & 0xfff;
+
+         emit_B(ptmp, 0b1100011, imm12_1, 0b000, cond, 0 /*x0/zero*/);
+      }
+
+      goto done;
+   }
+
+   case RISCV64in_EvCheck: {
+      /*    lw t0, soff12_amCounter(base_amCounter)
+            c.addiw t0, -1
+            sw t0, soff12_amCounter(base_amCounter)
+            bge t0, zero, 1f
+            ld t0, soff12_amFailAddr(base_amFailAddr)
+            c.jr 0(t0)
+         1:
+      */
+      UInt base_amCounter   = iregEnc(i->RISCV64in.EvCheck.base_amCounter);
+      Int  soff12_amCounter = i->RISCV64in.EvCheck.soff12_amCounter;
+      vassert(soff12_amCounter >= -2048 && soff12_amCounter < 2048);
+      UInt imm11_0_amCounter = soff12_amCounter & 0xfff;
+
+      UInt base_amFailAddr   = iregEnc(i->RISCV64in.EvCheck.base_amFailAddr);
+      Int  soff12_amFailAddr = i->RISCV64in.EvCheck.soff12_amFailAddr;
+      vassert(soff12_amFailAddr >= -2048 && soff12_amFailAddr < 2048);
+      UInt imm11_0_amFailAddr = soff12_amFailAddr & 0xfff;
+
+      p = emit_I(p, 0b0000011, 5 /*x5/t0*/, 0b010, base_amCounter,
+                 imm11_0_amCounter);
+      p = emit_CI(p, 0b01, -1 & 0x3f, 5 /*x5/t0*/, 0b001);
+      p = emit_S(p, 0b0100011, imm11_0_amCounter, 0b010, base_amCounter,
+                 5 /*x5/t0*/);
+      p = emit_B(p, 0b1100011, (10 >> 1) & 0xfff, 0b101, 5 /*x5/t0*/,
+                 0 /*x0/zero*/);
+      p = emit_I(p, 0b0000011, 5 /*x5/t0*/, 0b011, base_amFailAddr,
+                 imm11_0_amFailAddr);
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1000);
+
+      /* Crosscheck. */
+      vassert(evCheckSzB_RISCV64() == p - buf);
+      goto done;
+   }
+
+   case RISCV64in_ProfInc: {
+      /* Generate a code template to increment a memory location whose address
+         will be known later as an immediate value. This code template will be
+         patched by LibVEX_PatchProfInc() once the memory location is known. For
+         now do this with address == 0x0000'6555'7555'8566.
+
+         li t1, 0x655575558566
+         ld t0, 0(t1)
+         c.addi t0, t0, 1
+         sd t0, 0(t1)
+       */
+      p = addr48_to_ireg_EXACTLY_18B(p, 6 /*x6/t1*/, 0x655575558566ULL);
+      p = emit_I(p, 0b0000011, 5 /*x5/t0*/, 0b011, 6 /*x6/t1*/, 0);
+      p = emit_CI(p, 0b01, 1, 5 /*x5/t0*/, 0b000);
+      p = emit_S(p, 0b0100011, 0, 0b011, 6 /*x6/t1*/, 5 /*x5/t0*/);
+      /* Tell the caller .. */
+      vassert(!*is_profInc);
+      *is_profInc = True;
+      goto done;
+   }
+
+   default:
+      goto bad;
+   }
+
+bad:
+   ppRISCV64Instr(i, mode64);
+   vpanic("emit_RISCV64Instr");
+   /*NOTREACHED*/
+
+done:
+   vassert(p - &buf[0] <= 44);
+   return p - &buf[0];
+}
+
+/* Return the number of bytes emitted for an RISCV64in_EvCheck, as produced by
+   emit_RISCV64Instr(). */
+Int evCheckSzB_RISCV64(void) { return 20; }
+
+/* NB: what goes on here has to be very closely coordinated with the emitInstr
+   case for XDirect, above. */
+VexInvalRange chainXDirect_RISCV64(VexEndness  endness_host,
+                                   void*       place_to_chain,
+                                   const void* disp_cp_chain_me_EXPECTED,
+                                   const void* place_to_jump_to)
+{
+   vassert(endness_host == VexEndnessLE);
+
+   /* What we're expecting to see is:
+        lui t0, disp_cp_chain_me_to_EXPECTED[47:28]'
+        addiw t0, t0, disp_cp_chain_me_to_EXPECTED[27:16]'
+        c.slli t0, 12
+        addi t0, t0, disp_cp_chain_me_to_EXPECTED[15:4]'
+        c.slli t0, 4
+        c.addi t0, disp_cp_chain_me_to_EXPECTED[3:0]'
+        c.jalr 0(t0)
+      viz
+        <18 bytes generated by addr48_to_ireg_EXACTLY_18B>
+        82 92
+   */
+   UChar* p = place_to_chain;
+   vassert(((HWord)p & 1) == 0);
+   vassert(is_addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/,
+                                         (ULong)disp_cp_chain_me_EXPECTED));
+   vassert(p[18] == 0x82 && p[19] == 0x92);
+
+   /* And what we want to change it to is:
+        lui t0, place_to_jump[47:28]'
+        addiw t0, t0, place_to_jump[27:16]'
+        c.slli t0, 12
+        addi t0, t0, place_to_jump[15:4]'
+        c.slli t0, 4
+        c.addi t0, place_to_jump[3:0]'
+        c.jr 0(t0)
+      viz
+        <18 bytes generated by addr48_to_ireg_EXACTLY_18B>
+        82 82
+
+      The replacement has the same length as the original.
+   */
+   (void)addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/, (ULong)place_to_jump_to);
+   p[18] = 0x82;
+   p[19] = 0x82;
+
+   VexInvalRange vir = {(HWord)p, 20};
+   return vir;
+}
+
+/* NB: what goes on here has to be very closely coordinated with the emitInstr
+   case for XDirect, above. */
+VexInvalRange unchainXDirect_RISCV64(VexEndness  endness_host,
+                                     void*       place_to_unchain,
+                                     const void* place_to_jump_to_EXPECTED,
+                                     const void* disp_cp_chain_me)
+{
+   vassert(endness_host == VexEndnessLE);
+
+   /* What we're expecting to see is:
+        lui t0, place_to_jump_to_EXPECTED[47:28]'
+        addiw t0, t0, place_to_jump_to_EXPECTED[27:16]'
+        c.slli t0, 12
+        addi t0, t0, place_to_jump_to_EXPECTED[15:4]'
+        c.slli t0, 4
+        c.addi t0, place_to_jump_to_EXPECTED[3:0]'
+        c.jr 0(t0)
+      viz
+        <18 bytes generated by addr48_to_ireg_EXACTLY_18B>
+        82 82
+   */
+   UChar* p = place_to_unchain;
+   vassert(((HWord)p & 1) == 0);
+   vassert(is_addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/,
+                                         (ULong)place_to_jump_to_EXPECTED));
+   vassert(p[18] == 0x82 && p[19] == 0x82);
+
+   /* And what we want to change it to is:
+        lui t0, disp_cp_chain_me[47:28]'
+        addiw t0, t0, disp_cp_chain_me[27:16]'
+        c.slli t0, 12
+        addi t0, t0, disp_cp_chain_me[15:4]'
+        c.slli t0, 4
+        c.addi t0, disp_cp_chain_me[3:0]'
+        c.jalr 0(t0)
+      viz
+        <18 bytes generated by addr48_to_ireg_EXACTLY_18B>
+        82 92
+
+      The replacement has the same length as the original.
+   */
+   (void)addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/, (ULong)disp_cp_chain_me);
+   p[18] = 0x82;
+   p[19] = 0x89;
+
+   VexInvalRange vir = {(HWord)p, 20};
+   return vir;
+}
+
+/* Patch the counter address into a profile inc point, as previously created by
+   the RISCV64in_ProfInc case for emit_RISCV64Instr(). */
+VexInvalRange patchProfInc_RISCV64(VexEndness   endness_host,
+                                   void*        place_to_patch,
+                                   const ULong* location_of_counter)
+{
+   vassert(sizeof(ULong*) == 8);
+   vassert(endness_host == VexEndnessLE);
+   UChar* p = place_to_patch;
+   vassert(((HWord)p & 3) == 0);
+   vassert(is_addr48_to_ireg_EXACTLY_18B(p, 6 /*x6/t1*/, 0x655575558566ULL));
+   vassert(p[18] == 0x83 && p[19] == 0x32 && p[20] == 0x03 && p[21] == 0x00);
+   vassert(p[22] == 0x85 && p[23] == 0x02);
+   vassert(p[24] == 0x23 && p[25] == 0x30 && p[26] == 0x53 && p[27] == 0x00);
+   (void)addr48_to_ireg_EXACTLY_18B(p, 6 /*x6/t1*/, (ULong)location_of_counter);
+   VexInvalRange vir = {(HWord)p, 28};
+   return vir;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      host_riscv64_defs.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/host_riscv64_defs.h b/VEX/priv/host_riscv64_defs.h
new file mode 100644
index 000000000..16c524cab
--- /dev/null
+++ b/VEX/priv/host_riscv64_defs.h
@@ -0,0 +1,654 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                    host_riscv64_defs.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VEX_HOST_RISCV64_DEFS_H
+#define __VEX_HOST_RISCV64_DEFS_H
+
+#include "libvex.h"
+#include "libvex_basictypes.h"
+
+#include "host_generic_regs.h"
+
+/*------------------------------------------------------------*/
+/*--- Registers                                            ---*/
+/*------------------------------------------------------------*/
+
+#define ST_IN static inline
+ST_IN HReg hregRISCV64_x18(void) { return mkHReg(False, HRcInt64, 18, 0); }
+ST_IN HReg hregRISCV64_x19(void) { return mkHReg(False, HRcInt64, 19, 1); }
+ST_IN HReg hregRISCV64_x20(void) { return mkHReg(False, HRcInt64, 20, 2); }
+ST_IN HReg hregRISCV64_x21(void) { return mkHReg(False, HRcInt64, 21, 3); }
+ST_IN HReg hregRISCV64_x22(void) { return mkHReg(False, HRcInt64, 22, 4); }
+ST_IN HReg hregRISCV64_x23(void) { return mkHReg(False, HRcInt64, 23, 5); }
+ST_IN HReg hregRISCV64_x24(void) { return mkHReg(False, HRcInt64, 24, 6); }
+ST_IN HReg hregRISCV64_x25(void) { return mkHReg(False, HRcInt64, 25, 7); }
+ST_IN HReg hregRISCV64_x26(void) { return mkHReg(False, HRcInt64, 26, 8); }
+ST_IN HReg hregRISCV64_x27(void) { return mkHReg(False, HRcInt64, 27, 9); }
+
+ST_IN HReg hregRISCV64_x10(void) { return mkHReg(False, HRcInt64, 10, 10); }
+ST_IN HReg hregRISCV64_x11(void) { return mkHReg(False, HRcInt64, 11, 11); }
+ST_IN HReg hregRISCV64_x12(void) { return mkHReg(False, HRcInt64, 12, 12); }
+ST_IN HReg hregRISCV64_x13(void) { return mkHReg(False, HRcInt64, 13, 13); }
+ST_IN HReg hregRISCV64_x14(void) { return mkHReg(False, HRcInt64, 14, 14); }
+ST_IN HReg hregRISCV64_x15(void) { return mkHReg(False, HRcInt64, 15, 15); }
+ST_IN HReg hregRISCV64_x16(void) { return mkHReg(False, HRcInt64, 16, 16); }
+ST_IN HReg hregRISCV64_x17(void) { return mkHReg(False, HRcInt64, 17, 17); }
+
+ST_IN HReg hregRISCV64_f0(void) { return mkHReg(False, HRcFlt64, 0, 18); }
+ST_IN HReg hregRISCV64_f1(void) { return mkHReg(False, HRcFlt64, 1, 19); }
+ST_IN HReg hregRISCV64_f2(void) { return mkHReg(False, HRcFlt64, 2, 20); }
+ST_IN HReg hregRISCV64_f3(void) { return mkHReg(False, HRcFlt64, 3, 21); }
+ST_IN HReg hregRISCV64_f4(void) { return mkHReg(False, HRcFlt64, 4, 22); }
+ST_IN HReg hregRISCV64_f5(void) { return mkHReg(False, HRcFlt64, 5, 23); }
+ST_IN HReg hregRISCV64_f6(void) { return mkHReg(False, HRcFlt64, 6, 24); }
+ST_IN HReg hregRISCV64_f7(void) { return mkHReg(False, HRcFlt64, 7, 25); }
+
+ST_IN HReg hregRISCV64_f10(void) { return mkHReg(False, HRcFlt64, 10, 26); }
+ST_IN HReg hregRISCV64_f11(void) { return mkHReg(False, HRcFlt64, 11, 27); }
+ST_IN HReg hregRISCV64_f12(void) { return mkHReg(False, HRcFlt64, 12, 28); }
+ST_IN HReg hregRISCV64_f13(void) { return mkHReg(False, HRcFlt64, 13, 29); }
+ST_IN HReg hregRISCV64_f14(void) { return mkHReg(False, HRcFlt64, 14, 30); }
+ST_IN HReg hregRISCV64_f15(void) { return mkHReg(False, HRcFlt64, 15, 31); }
+ST_IN HReg hregRISCV64_f16(void) { return mkHReg(False, HRcFlt64, 16, 32); }
+ST_IN HReg hregRISCV64_f17(void) { return mkHReg(False, HRcFlt64, 17, 33); }
+
+ST_IN HReg hregRISCV64_f28(void) { return mkHReg(False, HRcFlt64, 28, 34); }
+ST_IN HReg hregRISCV64_f29(void) { return mkHReg(False, HRcFlt64, 29, 35); }
+ST_IN HReg hregRISCV64_f30(void) { return mkHReg(False, HRcFlt64, 30, 36); }
+ST_IN HReg hregRISCV64_f31(void) { return mkHReg(False, HRcFlt64, 31, 37); }
+
+ST_IN HReg hregRISCV64_x0(void) { return mkHReg(False, HRcInt64, 0, 38); }
+ST_IN HReg hregRISCV64_x2(void) { return mkHReg(False, HRcInt64, 2, 39); }
+ST_IN HReg hregRISCV64_x8(void) { return mkHReg(False, HRcInt64, 8, 40); }
+#undef ST_IN
+
+/* Number of registers used for argument passing in function calls. */
+#define RISCV64_N_ARGREGS  8 /* x10/a0 .. x17/a7 */
+#define RISCV64_N_FARGREGS 8 /* f10/fa0 .. f17/fa7 */
+
+/*------------------------------------------------------------*/
+/*--- Instructions                                         ---*/
+/*------------------------------------------------------------*/
+
+/* RISCV64in_ALU sub-types. */
+typedef enum {
+   RISCV64op_ADD = 0x100, /* Addition of two registers. */
+   RISCV64op_SUB,         /* Subtraction of one register from another. */
+   RISCV64op_ADDW,        /* 32-bit addition of two registers. */
+   RISCV64op_SUBW,        /* 32-bit subtraction of one register from another. */
+   RISCV64op_XOR,         /* Bitwise XOR of two registers. */
+   RISCV64op_OR,          /* Bitwise OR of two registers. */
+   RISCV64op_AND,         /* Bitwise AND of two registers. */
+   RISCV64op_SLL,         /* Logical left shift on a register. */
+   RISCV64op_SRL,         /* Logical right shift on a register. */
+   RISCV64op_SRA,         /* Arithmetic right shift on a register. */
+   RISCV64op_SLLW,        /* 32-bit logical left shift on a register. */
+   RISCV64op_SRLW,        /* 32-bit logical right shift on a register. */
+   RISCV64op_SRAW,        /* 32-bit arithmetic right shift on a register. */
+   RISCV64op_SLT,         /* Signed comparison of two registers. */
+   RISCV64op_SLTU,        /* Unsigned comparison of two registers. */
+   RISCV64op_MUL,         /* Multiplication of two registers, producing the
+                             lower 64 bits. */
+   RISCV64op_MULH,        /* Signed multiplication of two registers, producing
+                             the upper 64 bits. */
+   RISCV64op_MULHU,       /* Unsigned multiplication of two registers, producing
+                             the upper 64 bits. */
+   RISCV64op_DIV,         /* Signed division of one register by another. */
+   RISCV64op_DIVU,        /* Unsigned division of one register by another. */
+   RISCV64op_REM,         /* Remainder from signed division of one register by
+                             another. */
+   RISCV64op_REMU,        /* Remainder from unsigned division of one register by
+                             another. */
+   RISCV64op_MULW,        /* 32-bit multiplication of two registers, producing
+                             the lower 32 bits. */
+   RISCV64op_DIVW,        /* 32-bit signed division of one register by
+                             another. */
+   RISCV64op_DIVUW,       /* 32-bit unsigned division of one register by
+                             another. */
+   RISCV64op_REMW,        /* Remainder from 32-bit signed division of one
+                             register by another. */
+   RISCV64op_REMUW,       /* Remainder from 32-bit unsigned division of one
+                             register by another. */
+} RISCV64ALUOp;
+
+/* RISCV64in_ALUImm sub-types. */
+typedef enum {
+   RISCV64op_ADDI = 0x200, /* Addition of a register and a sx-12-bit
+                              immediate. */
+   RISCV64op_ADDIW,        /* 32-bit addition of a register and a sx-12-bit
+                              immediate. */
+   RISCV64op_XORI,         /* Bitwise XOR of a register and a sx-12-bit
+                              immediate. */
+   RISCV64op_ANDI,         /* Bitwise AND of a register and a sx-12-bit
+                              immediate. */
+   RISCV64op_SLLI,         /* Logical left shift on a register by a 6-bit
+                              immediate. */
+   RISCV64op_SRLI,         /* Logical right shift on a register by a 6-bit
+                              immediate. */
+   RISCV64op_SRAI,         /* Arithmetic right shift on a register by a 6-bit
+                              immediate. */
+   RISCV64op_SLTIU,        /* Unsigned comparison of a register and a sx-12-bit
+                              immediate. */
+} RISCV64ALUImmOp;
+
+/* RISCV64in_Load sub-types. */
+typedef enum {
+   RISCV64op_LD = 0x300, /* 64-bit load. */
+   RISCV64op_LW,         /* sx-32-to-64-bit load. */
+   RISCV64op_LH,         /* sx-16-to-64-bit load. */
+   RISCV64op_LB,         /* sx-8-to-64-bit load. */
+} RISCV64LoadOp;
+
+/* RISCV64in_Store sub-types. */
+typedef enum {
+   RISCV64op_SD = 0x400, /* 64-bit store. */
+   RISCV64op_SW,         /* 32-bit store. */
+   RISCV64op_SH,         /* 16-bit store. */
+   RISCV64op_SB,         /* 8-bit store. */
+} RISCV64StoreOp;
+
+/* RISCV64in_LoadR sub-types. */
+typedef enum {
+   RISCV64op_LR_W = 0x500, /* sx-32-to-64-bit load-reserved. */
+} RISCV64LoadROp;
+
+/* RISCV64in_StoreC sub-types. */
+typedef enum {
+   RISCV64op_SC_W = 0x600, /* 32-bit store-conditional. */
+} RISCV64StoreCOp;
+
+/* RISCV64in_FpUnary sub-types. */
+typedef enum {
+   RISCV64op_FSQRT_S = 0x700, /* Square root of a 32-bit floating-point
+                                 register. */
+   RISCV64op_FSQRT_D,         /* Square root of a 64-bit floating-point
+                                 register. */
+} RISCV64FpUnaryOp;
+
+/* RISCV64in_FpBinary sub-types. */
+typedef enum {
+   RISCV64op_FADD_S = 0x800, /* Addition of two 32-bit floating-point
+                                registers. */
+   RISCV64op_FMUL_S,         /* Multiplication of two 32-bit floating-point
+                                registers. */
+   RISCV64op_FDIV_S,         /* Division of a 32-bit floating-point register by
+                                another. */
+   RISCV64op_FSGNJN_S,       /* Copy of a 32-bit floating-point register to
+                                another with the sign bit taken from the second
+                                input and negated. */
+   RISCV64op_FSGNJX_S,       /* Copy of a 32-bit floating-point register to
+                                another with the sign bit XOR'ed from the second
+                                input. */
+   RISCV64op_FMIN_S,         /* Select minimum-number of two 32-bit
+                                floating-point registers. */
+   RISCV64op_FMAX_S,         /* Select maximum-number of two 32-bit
+                                floating-point registers. */
+   RISCV64op_FADD_D,         /* Addition of two 64-bit floating-point
+                                registers. */
+   RISCV64op_FSUB_D,         /* Subtraction of one 64-bit floating-point
+                                register from another. */
+   RISCV64op_FMUL_D,         /* Multiplication of two 64-bit floating-point
+                                registers. */
+   RISCV64op_FDIV_D,         /* Division of a 64-bit floating-point register by
+                                another. */
+   RISCV64op_FSGNJN_D,       /* Copy of a 64-bit floating-point register to
+                                another with the sign bit taken from the second
+                                input and negated. */
+   RISCV64op_FSGNJX_D,       /* Copy of a 64-bit floating-point register to
+                                another with the sign bit XOR'ed from the second
+                                input. */
+   RISCV64op_FMIN_D,         /* Select minimum-number of two 64-bit
+                                floating-point registers. */
+   RISCV64op_FMAX_D,         /* Select maximum-number of two 64-bit
+                                floating-point registers. */
+} RISCV64FpBinaryOp;
+
+/* RISCV64in_FpTernary sub-types. */
+typedef enum {
+   RISCV64op_FMADD_S = 0x900, /* Fused multiply-add of 32-bit floating-point
+                                 registers. */
+   RISCV64op_FMADD_D,         /* Fused multiply-add of 64-bit floating-point
+                                 registers. */
+} RISCV64FpTernaryOp;
+
+/* RISCV64in_FpMove sub-types. */
+typedef enum {
+   RISCV64op_FMV_X_W = 0xa00, /* Move as-is a 32-bit value from a floating-point
+                                 register to an integer register. */
+   RISCV64op_FMV_W_X,         /* Move as-is a 32-bit value from an integer
+                                 register to a floating-point register. */
+   RISCV64op_FMV_D,           /* Copy one 64-bit floating-point register to
+                                 another. */
+   RISCV64op_FMV_X_D,         /* Move as-is a 64-bit value from a floating-point
+                                 register to an integer register. */
+   RISCV64op_FMV_D_X,         /* Move as-is a 64-bit value from an integer
+                                 register to a floating-point register. */
+} RISCV64FpMoveOp;
+
+/* RISCV64in_FpConvert sub-types. */
+typedef enum {
+   RISCV64op_FCVT_W_S = 0xb00, /* Convert a 32-bit floating-point number to
+                                  a 32-bit signed integer. */
+   RISCV64op_FCVT_WU_S,        /* Convert a 32-bit floating-point number to
+                                  a 32-bit unsigned integer. */
+   RISCV64op_FCVT_S_W,         /* Convert a 32-bit signed integer to a 32-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_S_WU,        /* Convert a 32-bit unsigned integer to a 32-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_L_S,         /* Convert a 32-bit floating-point number to
+                                  a 64-bit signed integer. */
+   RISCV64op_FCVT_LU_S,        /* Convert a 32-bit floating-point number to
+                                  a 64-bit unsigned integer. */
+   RISCV64op_FCVT_S_L,         /* Convert a 64-bit signed integer to a 32-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_S_LU,        /* Convert a 64-bit unsigned integer to a 32-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_S_D,         /* Convert a 64-bit floating-point number to
+                                  a 32-bit floating-point number. */
+   RISCV64op_FCVT_D_S,         /* Convert a 32-bit floating-point number to
+                                  a 64-bit floating-point number. */
+   RISCV64op_FCVT_W_D,         /* Convert a 64-bit floating-point number to
+                                  a 32-bit signed integer. */
+   RISCV64op_FCVT_WU_D,        /* Convert a 64-bit floating-point number to
+                                  a 32-bit unsigned integer. */
+   RISCV64op_FCVT_D_W,         /* Convert a 32-bit signed integer to a 64-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_D_WU,        /* Convert a 32-bit unsigned integer to a 64-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_L_D,         /* Convert a 64-bit floating-point number to
+                                  a 64-bit signed integer. */
+   RISCV64op_FCVT_LU_D,        /* Convert a 64-bit floating-point number to
+                                  a 64-bit unsigned integer. */
+   RISCV64op_FCVT_D_L,         /* Convert a 64-bit signed integer to a 64-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_D_LU,        /* Convert a 64-bit unsigned integer to a 64-bit
+                                  floating-point number. */
+} RISCV64FpConvertOp;
+
+/* RISCV64in_FpCompare sub-types. */
+typedef enum {
+   RISCV64op_FEQ_S = 0xc00, /* Equality comparison of two 32-bit floating-point
+                               registers. */
+   RISCV64op_FLT_S,         /* Less-than comparison of two 32-bit floating-point
+                               registers. */
+   RISCV64op_FEQ_D,         /* Equality comparison of two 64-bit floating-point
+                               registers. */
+   RISCV64op_FLT_D,         /* Less-than comparison of two 64-bit floating-point
+                               registers. */
+} RISCV64FpCompareOp;
+
+/* RISCV64in_FpLdSt sub-types. */
+typedef enum {
+   RISCV64op_FLW = 0xd00, /* 32-bit floating-point load. */
+   RISCV64op_FLD,         /* 64-bit floating-point load. */
+   RISCV64op_FSW,         /* 32-bit floating-point store. */
+   RISCV64op_FSD,         /* 64-bit floating-point store. */
+} RISCV64FpLdStOp;
+
+/* RISCV64in_CAS sub-types. */
+typedef enum {
+   RISCV64op_CAS_D = 0xe00, /* 64-bit compare-and-swap pseudoinstruction. */
+   RISCV64op_CAS_W,         /* 32-bit compare-and-swap pseudoinstruction. */
+} RISCV64CASOp;
+
+/* The kind of instructions. */
+typedef enum {
+   RISCV64in_LI = 0x52640000, /* Load immediate pseudoinstruction. */
+   RISCV64in_MV,              /* Copy one register to another. */
+   RISCV64in_ALU,             /* Computational binary instruction. */
+   RISCV64in_ALUImm,          /* Computational binary instruction, with
+                                 an immediate as the second input. */
+   RISCV64in_Load,            /* Load from memory (sign-extended). */
+   RISCV64in_Store,           /* Store to memory. */
+   RISCV64in_LoadR,           /* Load-reserved from memory (sign-extended). */
+   RISCV64in_StoreC,          /* Store-conditional to memory. */
+   RISCV64in_CSRRW,           /* Atomic swap of values in a CSR and an integer
+                                 register. */
+   RISCV64in_FpUnary,         /* Floating-point unary instruction. */
+   RISCV64in_FpBinary,        /* Floating-point binary instruction. */
+   RISCV64in_FpTernary,       /* Floating-point ternary instruction. */
+   RISCV64in_FpMove,          /* Floating-point move instruction. */
+   RISCV64in_FpConvert,       /* Floating-point convert instruction. */
+   RISCV64in_FpCompare,       /* Floating-point compare instruction. */
+   RISCV64in_FpLdSt,          /* Floating-point load/store instruction. */
+   RISCV64in_FpCSEL,          /* Floating-point conditional-select pseudoinstruction.*/
+   RISCV64in_CAS,             /* Compare-and-swap pseudoinstruction. */
+   RISCV64in_FENCE,           /* Device I/O and memory fence. */
+   RISCV64in_CSEL,            /* Conditional-select pseudoinstruction. */
+   RISCV64in_Call,            /* Call pseudoinstruction. */
+   RISCV64in_XDirect,         /* Direct transfer to guest address. */
+   RISCV64in_XIndir,          /* Indirect transfer to guest address. */
+   RISCV64in_XAssisted,       /* Assisted transfer to guest address. */
+   RISCV64in_EvCheck,         /* Event check. */
+   RISCV64in_ProfInc          /* 64-bit profile counter increment. */
+} RISCV64InstrTag;
+
+typedef struct {
+   RISCV64InstrTag tag;
+   union {
+      /* Load immediate pseudoinstruction. */
+      struct {
+         HReg  dst;
+         ULong imm64;
+      } LI;
+      /* Copy one register to another. */
+      struct {
+         HReg dst;
+         HReg src;
+      } MV;
+      /* Computational binary instruction. */
+      struct {
+         RISCV64ALUOp op;
+         HReg         dst;
+         HReg         src1;
+         HReg         src2;
+      } ALU;
+      /* Computational binary instruction, with an immediate as the second
+         input. */
+      struct {
+         RISCV64ALUImmOp op;
+         HReg            dst;
+         HReg            src;
+         Int             imm12; /* simm12 or uimm6 */
+      } ALUImm;
+      /* Load from memory (sign-extended). */
+      struct {
+         RISCV64LoadOp op;
+         HReg          dst;
+         HReg          base;
+         Int           soff12; /* -2048 .. +2047 */
+      } Load;
+      /* Store to memory. */
+      struct {
+         RISCV64StoreOp op;
+         HReg           src;
+         HReg           base;
+         Int            soff12; /* -2048 .. +2047 */
+      } Store;
+      /* Load-reserved from memory (sign-extended). */
+      struct {
+         RISCV64LoadROp op;
+         HReg           dst;
+         HReg           addr;
+      } LoadR;
+      /* Store-conditional to memory. */
+      struct {
+         RISCV64StoreCOp op;
+         HReg            res;
+         HReg            src;
+         HReg            addr;
+      } StoreC;
+      /* Atomic swap of values in a CSR and an integer register. */
+      struct {
+         HReg dst;
+         HReg src;
+         UInt csr;
+      } CSRRW;
+      /* Floating-point unary instruction. */
+      struct {
+         RISCV64FpUnaryOp op;
+         HReg             dst;
+         HReg             src;
+      } FpUnary;
+      /* Floating-point binary instruction. */
+      struct {
+         RISCV64FpBinaryOp op;
+         HReg              dst;
+         HReg              src1;
+         HReg              src2;
+      } FpBinary;
+      /* Floating-point ternary instruction. */
+      struct {
+         RISCV64FpTernaryOp op;
+         HReg               dst;
+         HReg               src1;
+         HReg               src2;
+         HReg               src3;
+      } FpTernary;
+      /* Floating-point move instruction. */
+      struct {
+         RISCV64FpMoveOp op;
+         HReg            dst;
+         HReg            src;
+      } FpMove;
+      /* Floating-point convert instruction. */
+      struct {
+         RISCV64FpConvertOp op;
+         HReg               dst;
+         HReg               src;
+      } FpConvert;
+      /* Floating-point compare instruction. */
+      struct {
+         RISCV64FpCompareOp op;
+         HReg               dst;
+         HReg               src1;
+         HReg               src2;
+      } FpCompare;
+      /* Floating-point load/store instruction. */
+      struct {
+         RISCV64FpLdStOp op;
+         HReg            reg; /* dst for load, src for store */
+         HReg            base;
+         Int             soff12; /* -2048 .. +2047 */
+      } FpLdSt;
+      /* Floating-point conditional-select pseudoinstruction. */
+      struct {
+         HReg dst;
+         HReg iftrue;
+         HReg iffalse;
+         HReg cond;
+      } FpCSEL;
+      /* Compare-and-swap pseudoinstruction. */
+      struct {
+         RISCV64CASOp op;
+         HReg         old;
+         HReg         addr;
+         HReg         expd;
+         HReg         data;
+      } CAS;
+      /* Device I/O and memory fence. */
+      struct {
+      } FENCE;
+      /* Conditional-select pseudoinstruction. */
+      struct {
+         HReg dst;
+         HReg iftrue;
+         HReg iffalse;
+         HReg cond;
+      } CSEL;
+      /* Call pseudoinstruction. Call a target (an absolute address), on a given
+         condition register. */
+      struct {
+         RetLoc rloc;      /* Where the return value will be. */
+         Addr64 target;    /* Target address of the call. */
+         HReg   cond;      /* Condition, can be INVALID_HREG for "always". */
+         UChar  nArgRegs;  /* # regs carrying integer args: 0 .. 8 */
+         UChar  nFArgRegs; /* # regs carrying floating-point args: 0 .. 8 */
+      } Call;
+      /* Update the guest pc value, then exit requesting to chain to it. May be
+         conditional. */
+      struct {
+         Addr64 dstGA;    /* Next guest address. */
+         HReg   base;     /* Base to access the guest state. */
+         Int    soff12;   /* Offset from the base register to access pc. */
+         HReg   cond;     /* Condition, can be INVALID_HREG for "always". */
+         Bool   toFastEP; /* Chain to the slow or fast point? */
+      } XDirect;
+      /* Boring transfer to a guest address not known at JIT time. Not
+         chainable. May be conditional. */
+      struct {
+         HReg dstGA;  /* Next guest address. */
+         HReg base;   /* Base to access the guest state. */
+         Int  soff12; /* Offset from the base register to access pc. */
+         HReg cond;   /* Condition, can be INVALID_HREG for "always". */
+      } XIndir;
+      /* Assisted transfer to a guest address, most general case. Not chainable.
+         May be conditional. */
+      struct {
+         HReg       dstGA;  /* Next guest address. */
+         HReg       base;   /* Base to access the guest state. */
+         Int        soff12; /* Offset from the base register to access pc. */
+         HReg       cond;   /* Condition, can be INVALID_HREG for "always". */
+         IRJumpKind jk;
+      } XAssisted;
+      /* Event check. */
+      struct {
+         HReg base_amCounter;   /* Base to access the guest state for
+                                   host_EvC_Counter. */
+         Int soff12_amCounter;  /* Offset from the base register to access
+                                   host_EvC_COUNTER. */
+         HReg base_amFailAddr;  /* Base to access the guest state for for
+                                   host_EvC_FAILADDR. */
+         Int soff12_amFailAddr; /* Offset from the base register to access
+                                   host_EvC_FAILADDR. */
+      } EvCheck;
+      /* 64-bit profile counter increment. */
+      struct {
+         /* No fields. The address of the counter to inc is installed later,
+            post-translation, by patching it in, as it is not known at
+            translation time. */
+      } ProfInc;
+   } RISCV64in;
+} RISCV64Instr;
+
+RISCV64Instr* RISCV64Instr_LI(HReg dst, ULong imm64);
+RISCV64Instr* RISCV64Instr_MV(HReg dst, HReg src);
+RISCV64Instr* RISCV64Instr_ALU(RISCV64ALUOp op, HReg dst, HReg src1, HReg src2);
+RISCV64Instr*
+RISCV64Instr_ALUImm(RISCV64ALUImmOp op, HReg dst, HReg src, Int imm12);
+RISCV64Instr*
+RISCV64Instr_Load(RISCV64LoadOp op, HReg dst, HReg base, Int soff12);
+RISCV64Instr*
+RISCV64Instr_Store(RISCV64StoreOp op, HReg src, HReg base, Int soff12);
+RISCV64Instr* RISCV64Instr_LoadR(RISCV64LoadROp op, HReg dst, HReg addr);
+RISCV64Instr*
+RISCV64Instr_StoreC(RISCV64StoreCOp op, HReg res, HReg src, HReg addr);
+RISCV64Instr* RISCV64Instr_CSRRW(HReg dst, HReg src, UInt csr);
+RISCV64Instr* RISCV64Instr_FpUnary(RISCV64FpUnaryOp op, HReg dst, HReg src);
+RISCV64Instr*
+RISCV64Instr_FpBinary(RISCV64FpBinaryOp op, HReg dst, HReg src1, HReg src2);
+RISCV64Instr* RISCV64Instr_FpTernary(
+   RISCV64FpTernaryOp op, HReg dst, HReg src1, HReg src2, HReg src3);
+RISCV64Instr* RISCV64Instr_FpMove(RISCV64FpMoveOp op, HReg dst, HReg src);
+RISCV64Instr* RISCV64Instr_FpConvert(RISCV64FpConvertOp op, HReg dst, HReg src);
+RISCV64Instr*
+RISCV64Instr_FpCompare(RISCV64FpCompareOp op, HReg dst, HReg src1, HReg src2);
+RISCV64Instr*
+RISCV64Instr_FpLdSt(RISCV64FpLdStOp op, HReg reg, HReg base, Int soff12);
+RISCV64Instr*
+RISCV64Instr_FpCSEL(HReg dst, HReg iftrue, HReg iffalse, HReg cond);
+RISCV64Instr*
+RISCV64Instr_CAS(RISCV64CASOp op, HReg old, HReg addr, HReg expd, HReg data);
+RISCV64Instr* RISCV64Instr_FENCE(void);
+RISCV64Instr* RISCV64Instr_CSEL(HReg dst, HReg iftrue, HReg iffalse, HReg cond);
+RISCV64Instr* RISCV64Instr_Call(
+   RetLoc rloc, Addr64 target, HReg cond, UChar nArgRegs, UChar nFArgRegs);
+RISCV64Instr* RISCV64Instr_XDirect(
+   Addr64 dstGA, HReg base, Int soff12, HReg cond, Bool toFastEP);
+RISCV64Instr* RISCV64Instr_XIndir(HReg dstGA, HReg base, Int soff12, HReg cond);
+RISCV64Instr* RISCV64Instr_XAssisted(
+   HReg dstGA, HReg base, Int soff12, HReg cond, IRJumpKind jk);
+RISCV64Instr* RISCV64Instr_EvCheck(HReg base_amCounter,
+                                   Int  soff12_amCounter,
+                                   HReg base_amFailAddr,
+                                   Int  soff12_amFailAddr);
+RISCV64Instr* RISCV64Instr_ProfInc(void);
+
+/*------------------------------------------------------------*/
+/*--- Misc helpers                                         ---*/
+/*------------------------------------------------------------*/
+
+static inline HReg get_baseblock_register(void) { return hregRISCV64_x8(); }
+#define BASEBLOCK_OFFSET_ADJUSTMENT 2048
+
+/*------------------------------------------------------------*/
+/* --- Interface exposed to VEX                           --- */
+/*------------------------------------------------------------*/
+
+UInt ppHRegRISCV64(HReg reg);
+
+void ppRISCV64Instr(const RISCV64Instr* i, Bool mode64);
+
+const RRegUniverse* getRRegUniverse_RISCV64(void);
+
+/* Some functions that insulate the register allocator from details of the
+   underlying instruction set. */
+void getRegUsage_RISCV64Instr(HRegUsage* u, const RISCV64Instr* i, Bool mode64);
+void mapRegs_RISCV64Instr(HRegRemap* m, RISCV64Instr* i, Bool mode64);
+
+void genSpill_RISCV64(
+   /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, HReg rreg, Int offset, Bool);
+void genReload_RISCV64(
+   /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, HReg rreg, Int offset, Bool);
+RISCV64Instr* genMove_RISCV64(HReg from, HReg to, Bool);
+
+Int emit_RISCV64Instr(/*MB_MOD*/ Bool*    is_profInc,
+                      UChar*              buf,
+                      Int                 nbuf,
+                      const RISCV64Instr* i,
+                      Bool                mode64,
+                      VexEndness          endness_host,
+                      const void*         disp_cp_chain_me_to_slowEP,
+                      const void*         disp_cp_chain_me_to_fastEP,
+                      const void*         disp_cp_xindir,
+                      const void*         disp_cp_xassisted);
+
+/* Return the number of bytes of code needed for an event check. */
+Int evCheckSzB_RISCV64(void);
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+VexInvalRange chainXDirect_RISCV64(VexEndness  endness_host,
+                                   void*       place_to_chain,
+                                   const void* disp_cp_chain_me_EXPECTED,
+                                   const void* place_to_jump_to);
+
+VexInvalRange unchainXDirect_RISCV64(VexEndness  endness_host,
+                                     void*       place_to_unchain,
+                                     const void* place_to_jump_to_EXPECTED,
+                                     const void* disp_cp_chain_me);
+
+/* Patch the counter location into an existing ProfInc point. */
+VexInvalRange patchProfInc_RISCV64(VexEndness   endness_host,
+                                   void*        place_to_patch,
+                                   const ULong* location_of_counter);
+
+HInstrArray* iselSB_RISCV64(const IRSB*        bb,
+                            VexArch            arch_host,
+                            const VexArchInfo* archinfo_host,
+                            const VexAbiInfo*  vbi,
+                            Int                offs_Host_EvC_Counter,
+                            Int                offs_Host_EvC_FailAddr,
+                            Bool               chainingAllowed,
+                            Bool               addProfInc,
+                            Addr               max_ga);
+
+#endif /* ndef __VEX_HOST_RISCV64_DEFS_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      host_riscv64_defs.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/host_riscv64_isel.c b/VEX/priv/host_riscv64_isel.c
new file mode 100644
index 000000000..7c26e7073
--- /dev/null
+++ b/VEX/priv/host_riscv64_isel.c
@@ -0,0 +1,2107 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                    host_riscv64_isel.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "host_riscv64_defs.h"
+#include "main_globals.h"
+#include "main_util.h"
+
+/*------------------------------------------------------------*/
+/*--- ISelEnv                                              ---*/
+/*------------------------------------------------------------*/
+
+/* This carries around:
+
+   - A mapping from IRTemp to IRType, giving the type of any IRTemp we might
+     encounter. This is computed before insn selection starts, and does not
+     change.
+
+   - A mapping from IRTemp to HReg. This tells the insn selector which virtual
+     register is associated with each IRTemp temporary. This is computed before
+     insn selection starts, and does not change. We expect this mapping to map
+     precisely the same set of IRTemps as the type mapping does.
+
+     - vregmap   holds the primary register for the IRTemp.
+     - vregmapHI is only used for 128-bit integer-typed IRTemps. It holds the
+                 identity of a second 64-bit virtual HReg, which holds the high
+                 half of the value.
+
+   - The code array, that is, the insns selected so far.
+
+   - A counter, for generating new virtual registers.
+
+   - The host hardware capabilities word. This is set at the start and does not
+     change.
+
+   - A Bool for indicating whether we may generate chain-me instructions for
+     control flow transfers, or whether we must use XAssisted.
+
+   - The maximum guest address of any guest insn in this block. Actually, the
+     address of the highest-addressed byte from any insn in this block. Is set
+     at the start and does not change. This is used for detecting jumps which
+     are definitely forward-edges from this block, and therefore can be made
+     (chained) to the fast entry point of the destination, thereby avoiding the
+     destination's event check.
+
+   - An IRExpr*, which may be NULL, holding the IR expression (an
+     IRRoundingMode-encoded value) to which the FPU's rounding mode was most
+     recently set. Setting to NULL is always safe. Used to avoid redundant
+     settings of the FPU's rounding mode, as described in
+     set_fcsr_rounding_mode() below.
+
+   Note, this is all (well, mostly) host-independent.
+*/
+
+typedef struct {
+   /* Constant -- are set at the start and do not change. */
+   IRTypeEnv* type_env;
+
+   HReg* vregmap;
+   HReg* vregmapHI;
+   Int   n_vregmap;
+
+   UInt hwcaps;
+
+   Bool   chainingAllowed;
+   Addr64 max_ga;
+
+   /* These are modified as we go along. */
+   HInstrArray* code;
+   Int          vreg_ctr;
+
+   IRExpr* previous_rm;
+} ISelEnv;
+
+static HReg lookupIRTemp(ISelEnv* env, IRTemp tmp)
+{
+   vassert(tmp < env->n_vregmap);
+   return env->vregmap[tmp];
+}
+
+static void addInstr(ISelEnv* env, RISCV64Instr* instr)
+{
+   addHInstr(env->code, instr);
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      ppRISCV64Instr(instr, True /*mode64*/);
+      vex_printf("\n");
+   }
+}
+
+static HReg newVRegI(ISelEnv* env)
+{
+   HReg reg = mkHReg(True /*virtual*/, HRcInt64, 0, env->vreg_ctr);
+   env->vreg_ctr++;
+   return reg;
+}
+
+static HReg newVRegF(ISelEnv* env)
+{
+   HReg reg = mkHReg(True /*virtual*/, HRcFlt64, 0, env->vreg_ctr);
+   env->vreg_ctr++;
+   return reg;
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Forward declarations                           ---*/
+/*------------------------------------------------------------*/
+
+/* These are organised as iselXXX and iselXXX_wrk pairs. The iselXXX_wrk do the
+   real work, but are not to be called directly. For each XXX, iselXXX calls its
+   iselXXX_wrk counterpart, then checks that all returned registers are virtual.
+   You should not call the _wrk version directly. */
+
+static HReg iselIntExpr_R(ISelEnv* env, IRExpr* e);
+static void iselInt128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e);
+static HReg iselFltExpr(ISelEnv* env, IRExpr* e);
+
+/*------------------------------------------------------------*/
+/*--- ISEL: FP rounding mode helpers                       ---*/
+/*------------------------------------------------------------*/
+
+/* Set the FP rounding mode: 'mode' is an I32-typed expression denoting a value
+   of IRRoundingMode. Set the fcsr RISC-V register to have the same rounding.
+
+   All attempts to set the rounding mode have to be routed through this
+   function for things to work properly. Refer to the comment in the AArch64
+   backend for set_FPCR_rounding_mode() how the mechanism relies on the SSA
+   property of IR and CSE.
+*/
+static void set_fcsr_rounding_mode(ISelEnv* env, IRExpr* mode)
+{
+   vassert(typeOfIRExpr(env->type_env, mode) == Ity_I32);
+
+   /* Do we need to do anything? */
+   if (env->previous_rm && env->previous_rm->tag == Iex_RdTmp &&
+       mode->tag == Iex_RdTmp &&
+       env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
+      /* No - setting it to what it was before.  */
+      vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
+      return;
+   }
+
+   /* No luck - we better set it, and remember what we set it to. */
+   env->previous_rm = mode;
+
+   /*
+      rounding mode                 |  IR  | RISC-V
+      ---------------------------------------------
+      to nearest, ties to even      | 0000 |   000
+      to -infinity                  | 0001 |   011
+      to +infinity                  | 0010 |   010
+      to zero                       | 0011 |   001
+      to nearest, ties away from 0  | 0100 |   100
+      prepare for shorter precision | 0101 |   111
+      to away from 0                | 0110 |   111
+      to nearest, ties towards 0    | 0111 |   111
+      invalid                       | 1000 |   111
+
+      All rounding modes not supported on RISC-V are mapped to 111 which is the
+      dynamic mode that is always invalid in fcsr and raises an illegal
+      instruction exception.
+
+      The mapping can be implemented using the following transformation:
+         t0 = 30 >> rm_IR
+         t1 = t0 & 19
+         t2 = t0 + 7
+         t3 = t1 + t2
+         fcsr_rm_RISCV = t3 >> t1
+   */
+   HReg rm_IR  = iselIntExpr_R(env, mode);
+   HReg imm_30 = newVRegI(env);
+   addInstr(env, RISCV64Instr_LI(imm_30, 30));
+   HReg t0 = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALU(RISCV64op_SRL, t0, imm_30, rm_IR));
+   HReg t1 = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ANDI, t1, t0, 19));
+   HReg t2 = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ADDI, t2, t0, 7));
+   HReg t3 = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALU(RISCV64op_ADD, t3, t1, t2));
+   HReg fcsr_rm_RISCV = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALU(RISCV64op_SRL, fcsr_rm_RISCV, t3, t1));
+   addInstr(env,
+            RISCV64Instr_CSRRW(hregRISCV64_x0(), fcsr_rm_RISCV, 0x002 /*frm*/));
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Function call helpers                          ---*/
+/*------------------------------------------------------------*/
+
+/* Used only in doHelperCall(). See the big comment in doHelperCall() regarding
+   handling of register-parameter arguments. This function figures out whether
+   evaluation of an expression might require use of a fixed register. If in
+   doubt return True (safe but suboptimal).
+*/
+static Bool mightRequireFixedRegs(IRExpr* e)
+{
+   if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
+      /* These are always "safe" -- either a copy of x2/sp in some arbitrary
+         vreg, or a copy of x8/s0, respectively. */
+      return False;
+   }
+   /* Else it's a "normal" expression. */
+   switch (e->tag) {
+   case Iex_RdTmp:
+   case Iex_Const:
+   case Iex_Get:
+      return False;
+   default:
+      return True;
+   }
+}
+
+/* Do a complete function call. |guard| is a Ity_Bit expression indicating
+   whether or not the call happens. If guard==NULL, the call is unconditional.
+   |retloc| is set to indicate where the return value is after the call. The
+   caller (of this fn) must generate code to add |stackAdjustAfterCall| to the
+   stack pointer after the call is done. Returns True iff it managed to handle
+   this combination of arg/return types, else returns False. */
+static Bool doHelperCall(/*OUT*/ UInt*   stackAdjustAfterCall,
+                         /*OUT*/ RetLoc* retloc,
+                         ISelEnv*        env,
+                         IRExpr*         guard,
+                         IRCallee*       cee,
+                         IRType          retTy,
+                         IRExpr**        args)
+{
+   /* Set default returns. We'll update them later if needed. */
+   *stackAdjustAfterCall = 0;
+   *retloc               = mk_RetLoc_INVALID();
+
+   /* Marshal args for a call and do the call.
+
+      This function only deals with a limited set of possibilities, which cover
+      all helpers in practice. The restrictions are that only the following
+      arguments are supported:
+      * RISCV64_N_REGPARMS x Ity_I32/Ity_I64 values, passed in x10/a0 .. x17/a7,
+      * RISCV64_N_FREGPARMS x Ity_F32/Ity_F64 values, passed in f10/fa0 ..
+        f17/fa7.
+
+      Note that the cee->regparms field is meaningless on riscv64 hosts (since
+      we only implement one calling convention) and so we always ignore it.
+
+      The return type can be I{8,16,32,64} or V128. In the V128 case, it is
+      expected that |args| will contain the special node IRExpr_VECRET(), in
+      which case this routine generates code to allocate space on the stack for
+      the vector return value.  Since we are not passing any scalars on the
+      stack, it is enough to preallocate the return space before marshalling any
+      arguments, in this case.
+
+      |args| may also contain IRExpr_GSPTR(), in which case the value in the
+      guest state pointer register minus BASEBLOCK_OFFSET_ADJUSTMENT is passed
+      as the corresponding argument.
+
+      Generating code which is both efficient and correct when parameters are to
+      be passed in registers is difficult, for the reasons elaborated in detail
+      in comments attached to doHelperCall() in VEX/priv/host_x86_isel.c. Here,
+      we use a variant of the method described in those comments.
+
+      The problem is split into two cases: the fast scheme and the slow scheme.
+      In the fast scheme, arguments are computed directly into the target (real)
+      registers. This is only safe when we can be sure that computation of each
+      argument will not trash any real registers set by computation of any other
+      argument.
+
+      In the slow scheme, all args are first computed into vregs, and once they
+      are all done, they are moved to the relevant real regs. This always gives
+      correct code, but it also gives a bunch of vreg-to-rreg moves which are
+      usually redundant but are hard for the register allocator to get rid of.
+
+      To decide which scheme to use, all argument expressions are first
+      examined. If they are all so simple that it is clear they will be
+      evaluated without use of any fixed registers, use the fast scheme, else
+      use the slow scheme. Note also that only unconditional calls may use the
+      fast scheme, since having to compute a condition expression could itself
+      trash real registers.
+
+      Note this requires being able to examine an expression and determine
+      whether or not evaluation of it might use a fixed register. That requires
+      knowledge of how the rest of this insn selector works. Currently just the
+      following 3 are regarded as safe -- hopefully they cover the majority of
+      arguments in practice: IRExpr_RdTmp, IRExpr_Const, IRExpr_Get.
+   */
+
+   /* These are used for cross-checking that IR-level constraints on the use of
+      IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
+   UInt nVECRETs = 0;
+   UInt nGSPTRs  = 0;
+
+   UInt n_args = 0;
+   for (UInt i = 0; args[i] != NULL; i++) {
+      IRExpr* arg = args[i];
+      if (UNLIKELY(arg->tag == Iex_VECRET))
+         nVECRETs++;
+      else if (UNLIKELY(arg->tag == Iex_GSPTR))
+         nGSPTRs++;
+      n_args++;
+   }
+
+   /* If this fails, the IR is ill-formed. */
+   vassert(nGSPTRs == 0 || nGSPTRs == 1);
+
+   /* If we have a VECRET, allocate space on the stack for the return value, and
+      record the stack pointer after that. */
+   HReg r_vecRetAddr = INVALID_HREG;
+   if (nVECRETs == 1) {
+      vassert(retTy == Ity_V128 || retTy == Ity_V256);
+      r_vecRetAddr = newVRegI(env);
+      addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ADDI, hregRISCV64_x2(),
+                                        hregRISCV64_x2(),
+                                        retTy == Ity_V128 ? -16 : -32));
+      addInstr(env, RISCV64Instr_MV(r_vecRetAddr, hregRISCV64_x2()));
+   } else {
+      /* If either of these fail, the IR is ill-formed. */
+      vassert(retTy != Ity_V128 && retTy != Ity_V256);
+      vassert(nVECRETs == 0);
+   }
+
+   /* First decide which scheme (slow or fast) is to be used. First assume the
+      fast scheme, and select slow if any contraindications (wow) appear. */
+   Bool go_fast = True;
+
+   /* We'll need space on the stack for the return value. Avoid possible
+      complications with nested calls by using the slow scheme. */
+   if (retTy == Ity_V128 || retTy == Ity_V256)
+      go_fast = False;
+
+   if (go_fast && guard != NULL) {
+      if (guard->tag == Iex_Const && guard->Iex.Const.con->tag == Ico_U1 &&
+          guard->Iex.Const.con->Ico.U1 == True) {
+         /* Unconditional. */
+      } else {
+         /* Not manifestly unconditional -- be conservative. */
+         go_fast = False;
+      }
+   }
+
+   if (go_fast)
+      for (UInt i = 0; i < n_args; i++) {
+         if (mightRequireFixedRegs(args[i])) {
+            go_fast = False;
+            break;
+         }
+      }
+
+   /* At this point the scheme to use has been established. Generate code to get
+      the arg values into the argument regs. If we run out of arg regs, give up.
+    */
+
+   HReg argregs[RISCV64_N_ARGREGS];
+   HReg fargregs[RISCV64_N_FARGREGS];
+
+   vassert(RISCV64_N_ARGREGS == 8);
+   vassert(RISCV64_N_FARGREGS == 8);
+
+   argregs[0] = hregRISCV64_x10();
+   argregs[1] = hregRISCV64_x11();
+   argregs[2] = hregRISCV64_x12();
+   argregs[3] = hregRISCV64_x13();
+   argregs[4] = hregRISCV64_x14();
+   argregs[5] = hregRISCV64_x15();
+   argregs[6] = hregRISCV64_x16();
+   argregs[7] = hregRISCV64_x17();
+
+   fargregs[0] = hregRISCV64_f10();
+   fargregs[1] = hregRISCV64_f11();
+   fargregs[2] = hregRISCV64_f12();
+   fargregs[3] = hregRISCV64_f13();
+   fargregs[4] = hregRISCV64_f14();
+   fargregs[5] = hregRISCV64_f15();
+   fargregs[6] = hregRISCV64_f16();
+   fargregs[7] = hregRISCV64_f17();
+
+   HReg tmpregs[RISCV64_N_ARGREGS];
+   HReg ftmpregs[RISCV64_N_FARGREGS];
+   Int  nextArgReg = 0, nextFArgReg = 0;
+   HReg cond;
+
+   if (go_fast) {
+      /* FAST SCHEME */
+      for (UInt i = 0; i < n_args; i++) {
+         IRExpr* arg = args[i];
+
+         IRType aTy = Ity_INVALID;
+         if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
+            aTy = typeOfIRExpr(env->type_env, args[i]);
+
+         if (aTy == Ity_I32 || aTy == Ity_I64) {
+            if (nextArgReg >= RISCV64_N_ARGREGS)
+               return False; /* Out of argregs. */
+            addInstr(env, RISCV64Instr_MV(argregs[nextArgReg],
+                                          iselIntExpr_R(env, args[i])));
+            nextArgReg++;
+         } else if (aTy == Ity_F32 || aTy == Ity_F64) {
+            if (nextFArgReg >= RISCV64_N_FARGREGS)
+               return False; /* Out of fargregs. */
+            addInstr(env,
+                     RISCV64Instr_FpMove(RISCV64op_FMV_D, fargregs[nextFArgReg],
+                                         iselFltExpr(env, args[i])));
+            nextFArgReg++;
+         } else if (arg->tag == Iex_GSPTR) {
+            if (nextArgReg >= RISCV64_N_ARGREGS)
+               return False; /* Out of argregs. */
+            addInstr(env,
+                     RISCV64Instr_MV(argregs[nextArgReg], hregRISCV64_x8()));
+            nextArgReg++;
+         } else if (arg->tag == Iex_VECRET) {
+            /* Because of the go_fast logic above, we can't get here, since
+               vector return values make us use the slow path instead. */
+            vassert(0);
+         } else
+            return False; /* Unhandled arg type. */
+      }
+
+      /* Fast scheme only applies for unconditional calls. Hence: */
+      cond = INVALID_HREG;
+
+   } else {
+      /* SLOW SCHEME; move via temporaries. */
+      for (UInt i = 0; i < n_args; i++) {
+         IRExpr* arg = args[i];
+
+         IRType aTy = Ity_INVALID;
+         if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
+            aTy = typeOfIRExpr(env->type_env, args[i]);
+
+         if (aTy == Ity_I32 || aTy == Ity_I64) {
+            if (nextArgReg >= RISCV64_N_ARGREGS)
+               return False; /* Out of argregs. */
+            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
+            nextArgReg++;
+         } else if (aTy == Ity_F32 || aTy == Ity_F64) {
+            if (nextFArgReg >= RISCV64_N_FARGREGS)
+               return False; /* Out of fargregs. */
+            ftmpregs[nextFArgReg] = iselFltExpr(env, args[i]);
+            nextFArgReg++;
+         } else if (arg->tag == Iex_GSPTR) {
+            if (nextArgReg >= RISCV64_N_ARGREGS)
+               return False; /* Out of argregs. */
+            tmpregs[nextArgReg] = hregRISCV64_x8();
+            nextArgReg++;
+         } else if (arg->tag == Iex_VECRET) {
+            vassert(!hregIsInvalid(r_vecRetAddr));
+            tmpregs[nextArgReg] = r_vecRetAddr;
+            nextArgReg++;
+         } else
+            return False; /* Unhandled arg type. */
+      }
+
+      /* Compute the condition. Be a bit clever to handle the common case where
+         the guard is 1:Bit. */
+      cond = INVALID_HREG;
+      if (guard) {
+         if (guard->tag == Iex_Const && guard->Iex.Const.con->tag == Ico_U1 &&
+             guard->Iex.Const.con->Ico.U1 == True) {
+            /* Unconditional -- do nothing. */
+         } else {
+            cond = iselIntExpr_R(env, guard);
+         }
+      }
+
+      /* Move the args to their final destinations. */
+      for (UInt i = 0; i < nextArgReg; i++) {
+         vassert(!(hregIsInvalid(tmpregs[i])));
+         addInstr(env, RISCV64Instr_MV(argregs[i], tmpregs[i]));
+      }
+      for (UInt i = 0; i < nextFArgReg; i++) {
+         vassert(!(hregIsInvalid(ftmpregs[i])));
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_D, fargregs[i],
+                                           ftmpregs[i]));
+      }
+   }
+
+   /* Should be assured by checks above. */
+   vassert(nextArgReg <= RISCV64_N_ARGREGS);
+   vassert(nextFArgReg <= RISCV64_N_FARGREGS);
+
+   /* Do final checks, set the return values, and generate the call instruction
+      proper. */
+   vassert(nGSPTRs == 0 || nGSPTRs == 1);
+   vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
+   vassert(*stackAdjustAfterCall == 0);
+   vassert(is_RetLoc_INVALID(*retloc));
+   switch (retTy) {
+   case Ity_INVALID:
+      /* Function doesn't return a value. */
+      *retloc = mk_RetLoc_simple(RLPri_None);
+      break;
+   case Ity_I8:
+   case Ity_I16:
+   case Ity_I32:
+   case Ity_I64:
+      *retloc = mk_RetLoc_simple(RLPri_Int);
+      break;
+   case Ity_V128:
+      *retloc               = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
+      *stackAdjustAfterCall = 16;
+      break;
+   case Ity_V256:
+      *retloc               = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
+      *stackAdjustAfterCall = 32;
+      break;
+   default:
+      /* IR can denote other possible return types, but we don't handle those
+         here. */
+      return False;
+   }
+
+   /* Finally, generate the call itself. This needs the *retloc value set in the
+      switch above, which is why it's at the end. */
+
+   /* nextArgReg doles out argument registers. Since these are assigned in the
+      order x10/a0 .. x17/a7, its numeric value at this point, which must be
+      between 0 and 8 inclusive, is going to be equal to the number of arg regs
+      in use for the call. Hence bake that number into the call (we'll need to
+      know it when doing register allocation, to know what regs the call reads.)
+
+      The same applies to nextFArgReg which records a number of used
+      floating-point registers f10/fa0 .. f17/fa7.
+    */
+   addInstr(env, RISCV64Instr_Call(*retloc, (Addr64)cee->addr, cond, nextArgReg,
+                                   nextFArgReg));
+
+   return True;
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64/32/16/8/1 bit)         ---*/
+/*------------------------------------------------------------*/
+
+/* Select insns for an integer-typed expression, and add them to the code list.
+   Return a reg holding the result. This reg will be a virtual register. THE
+   RETURNED REG MUST NOT BE MODIFIED. If you want to modify it, ask for a new
+   vreg, copy it in there, and modify the copy. The register allocator will do
+   its best to map both vregs to the same real register, so the copies will
+   often disappear later in the game.
+
+   This should handle expressions of 64, 32, 16, 8 and 1-bit type. All results
+   are returned in a 64-bit register. For an N-bit expression, the upper 64-N
+   bits are arbitrary, so you should mask or sign-extend partial values if
+   necessary.
+
+   The riscv64 backend however internally always extends the values as follows:
+   * a 32/16/8-bit integer result is sign-extended to 64 bits,
+   * a 1-bit logical result is zero-extended to 64 bits.
+
+   This schema follows the approach taken by the RV64 ISA which by default
+   sign-extends any 32/16/8-bit operation result to 64 bits. Matching the isel
+   with the ISA generally results in requiring less instructions. For instance,
+   it allows that any Ico_U32 immediate can be always materialized at maximum
+   using two instructions (LUI+ADDIW).
+
+   An important consequence of this design is that any Iop_<N>Sto64 extension is
+   a no-op. On the other hand, any Iop_64to<N> operation must additionally
+   perform an N-bit sign-extension. This is the opposite situation than in most
+   other VEX backends.
+*/
+
+/* -------------------------- Reg --------------------------- */
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
+{
+   IRType ty = typeOfIRExpr(env->type_env, e);
+   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8 ||
+           ty == Ity_I1);
+
+   switch (e->tag) {
+   /* ------------------------ TEMP ------------------------- */
+   case Iex_RdTmp: {
+      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+   }
+
+   /* ------------------------ LOAD ------------------------- */
+   case Iex_Load: {
+      if (e->Iex.Load.end != Iend_LE)
+         goto irreducible;
+
+      HReg dst = newVRegI(env);
+      /* TODO Optimize the cases with small imm Add64/Sub64. */
+      HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
+
+      if (ty == Ity_I64)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LD, dst, addr, 0));
+      else if (ty == Ity_I32)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LW, dst, addr, 0));
+      else if (ty == Ity_I16)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LH, dst, addr, 0));
+      else if (ty == Ity_I8)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LB, dst, addr, 0));
+      else
+         goto irreducible;
+      return dst;
+   }
+
+   /* ---------------------- BINARY OP ---------------------- */
+   case Iex_Binop: {
+      /* TODO Optimize for small imms by generating <instr>i. */
+      switch (e->Iex.Binop.op) {
+      case Iop_Add64:
+      case Iop_Add32:
+      case Iop_Sub64:
+      case Iop_Sub32:
+      case Iop_Xor64:
+      case Iop_Xor32:
+      case Iop_Or64:
+      case Iop_Or32:
+      case Iop_Or1:
+      case Iop_And64:
+      case Iop_And32:
+      case Iop_And1:
+      case Iop_Shl64:
+      case Iop_Shl32:
+      case Iop_Shr64:
+      case Iop_Shr32:
+      case Iop_Sar64:
+      case Iop_Sar32:
+      case Iop_Mul64:
+      case Iop_Mul32:
+      case Iop_DivU64:
+      case Iop_DivU32:
+      case Iop_DivS64:
+      case Iop_DivS32: {
+         RISCV64ALUOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_Add64:
+            op = RISCV64op_ADD;
+            break;
+         case Iop_Add32:
+            op = RISCV64op_ADDW;
+            break;
+         case Iop_Sub64:
+            op = RISCV64op_SUB;
+            break;
+         case Iop_Sub32:
+            op = RISCV64op_SUBW;
+            break;
+         case Iop_Xor64:
+         case Iop_Xor32:
+            op = RISCV64op_XOR;
+            break;
+         case Iop_Or64:
+         case Iop_Or32:
+         case Iop_Or1:
+            op = RISCV64op_OR;
+            break;
+         case Iop_And64:
+         case Iop_And32:
+         case Iop_And1:
+            op = RISCV64op_AND;
+            break;
+         case Iop_Shl64:
+            op = RISCV64op_SLL;
+            break;
+         case Iop_Shl32:
+            op = RISCV64op_SLLW;
+            break;
+         case Iop_Shr64:
+            op = RISCV64op_SRL;
+            break;
+         case Iop_Shr32:
+            op = RISCV64op_SRLW;
+            break;
+         case Iop_Sar64:
+            op = RISCV64op_SRA;
+            break;
+         case Iop_Sar32:
+            op = RISCV64op_SRAW;
+            break;
+         case Iop_Mul64:
+            op = RISCV64op_MUL;
+            break;
+         case Iop_Mul32:
+            op = RISCV64op_MULW;
+            break;
+         case Iop_DivU64:
+            op = RISCV64op_DIVU;
+            break;
+         case Iop_DivU32:
+            op = RISCV64op_DIVUW;
+            break;
+         case Iop_DivS64:
+            op = RISCV64op_DIV;
+            break;
+         case Iop_DivS32:
+            op = RISCV64op_DIVW;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(op, dst, argL, argR));
+         return dst;
+      }
+      case Iop_CmpEQ64:
+      case Iop_CmpEQ32:
+      case Iop_CasCmpEQ64:
+      case Iop_CasCmpEQ32: {
+         HReg tmp  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SUB, tmp, argL, argR));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, tmp, 1));
+         return dst;
+      }
+      case Iop_CmpNE64:
+      case Iop_CmpNE32:
+      case Iop_CasCmpNE64:
+      case Iop_CasCmpNE32: {
+         HReg tmp  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SUB, tmp, argL, argR));
+         HReg dst = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALU(RISCV64op_SLTU, dst, hregRISCV64_x0(), tmp));
+         return dst;
+      }
+      case Iop_CmpLT64S:
+      case Iop_CmpLT32S: {
+         HReg dst  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLT, dst, argL, argR));
+         return dst;
+      }
+      case Iop_CmpLE64S:
+      case Iop_CmpLE32S: {
+         HReg tmp  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLT, tmp, argR, argL));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, tmp, 1));
+         return dst;
+      }
+      case Iop_CmpLT64U:
+      case Iop_CmpLT32U: {
+         HReg dst  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLTU, dst, argL, argR));
+         return dst;
+      }
+      case Iop_CmpLE64U:
+      case Iop_CmpLE32U: {
+         HReg tmp  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLTU, tmp, argR, argL));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, tmp, 1));
+         return dst;
+      }
+      case Iop_Max32U: {
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         HReg cond = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLTU, cond, argL, argR));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_CSEL(dst, argR, argL, cond));
+         return dst;
+      }
+      case Iop_32HLto64: {
+         HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+
+         HReg lo32_tmp = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SLLI, lo32_tmp, lo32s, 32));
+         HReg lo32 = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRLI, lo32, lo32_tmp, 32));
+
+         HReg hi32 = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, hi32, hi32s, 32));
+
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, dst, hi32, lo32));
+         return dst;
+      }
+      case Iop_DivModS32to32: {
+         /* TODO Improve in conjunction with Iop_64HIto32. */
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+
+         HReg remw = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_REMW, remw, argL, argR));
+         HReg remw_hi = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, remw_hi, remw, 32));
+
+         HReg divw = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_DIVW, divw, argL, argR));
+         HReg divw_hi = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, divw_hi, divw, 32));
+         HReg divw_lo = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SRLI, divw_lo, divw_hi, 32));
+
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, dst, remw_hi, divw_lo));
+         return dst;
+      }
+      case Iop_DivModU32to32: {
+         /* TODO Improve in conjunction with Iop_64HIto32. */
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+
+         HReg remuw = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_REMUW, remuw, argL, argR));
+         HReg remuw_hi = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SLLI, remuw_hi, remuw, 32));
+
+         HReg divuw = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_DIVUW, divuw, argL, argR));
+         HReg divuw_hi = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SLLI, divuw_hi, divuw, 32));
+         HReg divuw_lo = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SRLI, divuw_lo, divuw_hi, 32));
+
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, dst, remuw_hi, divuw_lo));
+         return dst;
+      }
+      case Iop_F32toI32S:
+      case Iop_F32toI32U:
+      case Iop_F32toI64S:
+      case Iop_F32toI64U: {
+         RISCV64FpConvertOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_F32toI32S:
+            op = RISCV64op_FCVT_W_S;
+            break;
+         case Iop_F32toI32U:
+            op = RISCV64op_FCVT_WU_S;
+            break;
+         case Iop_F32toI64S:
+            op = RISCV64op_FCVT_L_S;
+            break;
+         case Iop_F32toI64U:
+            op = RISCV64op_FCVT_LU_S;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst = newVRegI(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpConvert(op, dst, src));
+         return dst;
+      }
+      case Iop_CmpF32:
+      case Iop_CmpF64: {
+         HReg argL = iselFltExpr(env, e->Iex.Binop.arg1);
+         HReg argR = iselFltExpr(env, e->Iex.Binop.arg2);
+
+         HReg lt = newVRegI(env);
+         HReg gt = newVRegI(env);
+         HReg eq = newVRegI(env);
+         if (e->Iex.Binop.op == Iop_CmpF32) {
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FLT_S, lt, argL, argR));
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FLT_S, gt, argR, argL));
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FEQ_S, eq, argL, argR));
+         } else {
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FLT_D, lt, argL, argR));
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FLT_D, gt, argR, argL));
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FEQ_D, eq, argL, argR));
+         }
+
+         /*
+            t0 = Ircr_UN
+            t1 = Ircr_LT
+            t2 = csel t1, t0, lt
+            t3 = Ircr_GT
+            t4 = csel t3, t2, gt
+            t5 = Ircr_EQ
+            dst = csel t5, t4, eq
+         */
+         HReg t0 = newVRegI(env);
+         addInstr(env, RISCV64Instr_LI(t0, Ircr_UN));
+         HReg t1 = newVRegI(env);
+         addInstr(env, RISCV64Instr_LI(t1, Ircr_LT));
+         HReg t2 = newVRegI(env);
+         addInstr(env, RISCV64Instr_CSEL(t2, t1, t0, lt));
+         HReg t3 = newVRegI(env);
+         addInstr(env, RISCV64Instr_LI(t3, Ircr_GT));
+         HReg t4 = newVRegI(env);
+         addInstr(env, RISCV64Instr_CSEL(t4, t3, t2, gt));
+         HReg t5 = newVRegI(env);
+         addInstr(env, RISCV64Instr_LI(t5, Ircr_EQ));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_CSEL(dst, t5, t4, eq));
+         return dst;
+      }
+      case Iop_F64toI32S:
+      case Iop_F64toI32U:
+      case Iop_F64toI64S:
+      case Iop_F64toI64U: {
+         RISCV64FpConvertOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_F64toI32S:
+            op = RISCV64op_FCVT_W_D;
+            break;
+         case Iop_F64toI32U:
+            op = RISCV64op_FCVT_WU_D;
+            break;
+         case Iop_F64toI64S:
+            op = RISCV64op_FCVT_L_D;
+            break;
+         case Iop_F64toI64U:
+            op = RISCV64op_FCVT_LU_D;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst = newVRegI(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpConvert(op, dst, src));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* ---------------------- UNARY OP ----------------------- */
+   case Iex_Unop: {
+      switch (e->Iex.Unop.op) {
+      case Iop_Not64:
+      case Iop_Not32: {
+         HReg dst = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_XORI, dst, src, -1));
+         return dst;
+      }
+      case Iop_Not1: {
+         HReg dst = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, src, 1));
+         return dst;
+      }
+      case Iop_8Uto32:
+      case Iop_8Uto64:
+      case Iop_16Uto64:
+      case Iop_32Uto64: {
+         UInt shift =
+            64 - 8 * sizeofIRType(typeOfIRExpr(env->type_env, e->Iex.Unop.arg));
+         HReg tmp = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp, src, shift));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRLI, dst, tmp, shift));
+         return dst;
+      }
+      case Iop_1Sto32:
+      case Iop_1Sto64: {
+         HReg tmp = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp, src, 63));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, tmp, 63));
+         return dst;
+      }
+      case Iop_1Uto64:
+      case Iop_8Sto64:
+      case Iop_16Sto64:
+      case Iop_32Sto64:
+         /* These are no-ops. */
+         return iselIntExpr_R(env, e->Iex.Unop.arg);
+      case Iop_32to8:
+      case Iop_32to16:
+      case Iop_64to8:
+      case Iop_64to16:
+      case Iop_64to32: {
+         UInt shift = 64 - 8 * sizeofIRType(ty);
+         HReg tmp   = newVRegI(env);
+         HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp, src, shift));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, tmp, shift));
+         return dst;
+      }
+      case Iop_128HIto64: {
+         HReg rHi, rLo;
+         iselInt128Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
+         return rHi; /* and abandon rLo */
+      }
+      case Iop_64HIto32: {
+         HReg dst = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, src, 32));
+         return dst;
+      }
+      case Iop_ReinterpF32asI32: {
+         HReg dst = newVRegI(env);
+         HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_X_W, dst, src));
+         return dst;
+      }
+      case Iop_ReinterpF64asI64: {
+         HReg dst = newVRegI(env);
+         HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_X_D, dst, src));
+         return dst;
+      }
+      case Iop_CmpNEZ8:
+      case Iop_CmpNEZ32:
+      case Iop_CmpNEZ64: {
+         HReg dst = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env,
+                  RISCV64Instr_ALU(RISCV64op_SLTU, dst, hregRISCV64_x0(), src));
+         return dst;
+      }
+      case Iop_CmpwNEZ32:
+      case Iop_CmpwNEZ64: {
+         /* Use the fact that x | -x == 0 iff x == 0. Otherwise, either X or -X
+            will have a 1 in the MSB. */
+         HReg neg = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env,
+                  RISCV64Instr_ALU(RISCV64op_SUB, neg, hregRISCV64_x0(), src));
+         HReg or = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, or, src, neg));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, or, 63));
+         return dst;
+      }
+      case Iop_Left32:
+      case Iop_Left64: {
+         /* Left32/64(src) = src | -src. */
+         HReg neg = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env,
+                  RISCV64Instr_ALU(RISCV64op_SUB, neg, hregRISCV64_x0(), src));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, dst, src, neg));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* ------------------------- GET ------------------------- */
+   case Iex_Get: {
+      HReg dst  = newVRegI(env);
+      HReg base = get_baseblock_register();
+      Int  off  = e->Iex.Get.offset - BASEBLOCK_OFFSET_ADJUSTMENT;
+      vassert(off >= -2048 && off < 2048);
+
+      if (ty == Ity_I64)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LD, dst, base, off));
+      else if (ty == Ity_I32)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LW, dst, base, off));
+      else if (ty == Ity_I16)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LH, dst, base, off));
+      else if (ty == Ity_I8)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LB, dst, base, off));
+      else
+         goto irreducible;
+      return dst;
+   }
+
+   /* ------------------------ CCALL ------------------------ */
+   case Iex_CCall: {
+      vassert(ty == e->Iex.CCall.retty);
+
+      /* Be very restrictive for now. Only 32 and 64-bit ints are allowed for
+         the return type. */
+      if (e->Iex.CCall.retty != Ity_I32 && e->Iex.CCall.retty != Ity_I64)
+         goto irreducible;
+
+      /* Marshal args and do the call. */
+      UInt   addToSp = 0;
+      RetLoc rloc    = mk_RetLoc_INVALID();
+      Bool   ok =
+         doHelperCall(&addToSp, &rloc, env, NULL /*guard*/, e->Iex.CCall.cee,
+                      e->Iex.CCall.retty, e->Iex.CCall.args);
+      if (!ok)
+         goto irreducible;
+      vassert(is_sane_RetLoc(rloc));
+      vassert(rloc.pri == RLPri_Int);
+      vassert(addToSp == 0);
+
+      HReg dst = newVRegI(env);
+      switch (e->Iex.CCall.retty) {
+      case Ity_I32:
+         /* Sign-extend the value returned from the helper as is expected by the
+            rest of the backend. */
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ADDIW, dst,
+                                           hregRISCV64_x10(), 0));
+         break;
+      case Ity_I64:
+         addInstr(env, RISCV64Instr_MV(dst, hregRISCV64_x10()));
+         break;
+      default:
+         vassert(0);
+      }
+      return dst;
+   }
+
+   /* ----------------------- LITERAL ----------------------- */
+   /* 64/32/16/8-bit literals. */
+   case Iex_Const: {
+      ULong u;
+      HReg  dst = newVRegI(env);
+      switch (e->Iex.Const.con->tag) {
+      case Ico_U64:
+         u = e->Iex.Const.con->Ico.U64;
+         break;
+      case Ico_U32:
+         vassert(ty == Ity_I32);
+         u = vex_sx_to_64(e->Iex.Const.con->Ico.U32, 32);
+         break;
+      case Ico_U16:
+         vassert(ty == Ity_I16);
+         u = vex_sx_to_64(e->Iex.Const.con->Ico.U16, 16);
+         break;
+      case Ico_U8:
+         vassert(ty == Ity_I8);
+         u = vex_sx_to_64(e->Iex.Const.con->Ico.U8, 8);
+         break;
+      case Ico_U1:
+         vassert(ty == Ity_I1);
+         u = vex_sx_to_64(e->Iex.Const.con->Ico.U1, 1);
+         break;
+      default:
+         goto irreducible;
+      }
+      addInstr(env, RISCV64Instr_LI(dst, u));
+      return dst;
+   }
+
+   /* ---------------------- MULTIPLEX ---------------------- */
+   case Iex_ITE: {
+      /* ITE(ccexpr, iftrue, iffalse) */
+      if (ty == Ity_I64 || ty == Ity_I32) {
+         HReg dst     = newVRegI(env);
+         HReg iftrue  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
+         HReg iffalse = iselIntExpr_R(env, e->Iex.ITE.iffalse);
+         HReg cond    = iselIntExpr_R(env, e->Iex.ITE.cond);
+         addInstr(env, RISCV64Instr_CSEL(dst, iftrue, iffalse, cond));
+         return dst;
+      }
+      break;
+   }
+
+   default:
+      break;
+   }
+
+   /* We get here if no pattern matched. */
+irreducible:
+   ppIRExpr(e);
+   vpanic("iselIntExpr_R(riscv64)");
+}
+
+static HReg iselIntExpr_R(ISelEnv* env, IRExpr* e)
+{
+   HReg r = iselIntExpr_R_wrk(env, e);
+
+   /* Sanity checks ... */
+   vassert(hregClass(r) == HRcInt64);
+   vassert(hregIsVirtual(r));
+
+   return r;
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Integer expressions (128 bit)                  ---*/
+/*------------------------------------------------------------*/
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static void iselInt128Expr_wrk(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e)
+{
+   vassert(typeOfIRExpr(env->type_env, e) == Ity_I128);
+
+   /* ---------------------- BINARY OP ---------------------- */
+   if (e->tag == Iex_Binop) {
+      switch (e->Iex.Binop.op) {
+      /* 64 x 64 -> 128 multiply */
+      case Iop_MullS64:
+      case Iop_MullU64: {
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         *rHi      = newVRegI(env);
+         *rLo      = newVRegI(env);
+         if (e->Iex.Binop.op == Iop_MullS64)
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_MULH, *rHi, argL, argR));
+         else
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_MULHU, *rHi, argL, argR));
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_MUL, *rLo, argL, argR));
+         return;
+      }
+
+      /* 64 x 64 -> (64(rem),64(div)) division */
+      case Iop_DivModS64to64:
+      case Iop_DivModU64to64: {
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         *rHi      = newVRegI(env);
+         *rLo      = newVRegI(env);
+         if (e->Iex.Binop.op == Iop_DivModS64to64) {
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_REM, *rHi, argL, argR));
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_DIV, *rLo, argL, argR));
+         } else {
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_REMU, *rHi, argL, argR));
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_DIVU, *rLo, argL, argR));
+         }
+         return;
+      }
+
+      /* 64HLto128(e1,e2) */
+      case Iop_64HLto128:
+         *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         return;
+
+      default:
+         break;
+      }
+   }
+
+   ppIRExpr(e);
+   vpanic("iselInt128Expr(riscv64)");
+}
+
+/* Compute a 128-bit value into a register pair, which is returned as the first
+   two parameters. As with iselIntExpr_R, these will be virtual registers and
+   they must not be changed by subsequent code emitted by the caller. */
+static void iselInt128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e)
+{
+   iselInt128Expr_wrk(rHi, rLo, env, e);
+
+   /* Sanity checks ... */
+   vassert(hregClass(*rHi) == HRcInt64);
+   vassert(hregIsVirtual(*rHi));
+   vassert(hregClass(*rLo) == HRcInt64);
+   vassert(hregIsVirtual(*rLo));
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Floating point expressions                     ---*/
+/*------------------------------------------------------------*/
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg iselFltExpr_wrk(ISelEnv* env, IRExpr* e)
+{
+   IRType ty = typeOfIRExpr(env->type_env, e);
+   vassert(ty == Ity_F32 || ty == Ity_F64);
+
+   switch (e->tag) {
+   /* ------------------------ TEMP ------------------------- */
+   case Iex_RdTmp: {
+      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+   }
+
+   /* ------------------------ LOAD ------------------------- */
+   case Iex_Load: {
+      if (e->Iex.Load.end != Iend_LE)
+         goto irreducible;
+
+      HReg dst = newVRegF(env);
+      /* TODO Optimize the cases with small imm Add64/Sub64. */
+      HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
+
+      if (ty == Ity_F32)
+         addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FLW, dst, addr, 0));
+      else if (ty == Ity_F64)
+         addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FLD, dst, addr, 0));
+      else
+         vassert(0);
+      return dst;
+   }
+
+   /* -------------------- QUATERNARY OP -------------------- */
+   case Iex_Qop: {
+      switch (e->Iex.Qop.details->op) {
+      case Iop_MAddF32: {
+         HReg dst  = newVRegF(env);
+         HReg argN = iselFltExpr(env, e->Iex.Qop.details->arg2);
+         HReg argM = iselFltExpr(env, e->Iex.Qop.details->arg3);
+         HReg argA = iselFltExpr(env, e->Iex.Qop.details->arg4);
+         set_fcsr_rounding_mode(env, e->Iex.Qop.details->arg1);
+         addInstr(env, RISCV64Instr_FpTernary(RISCV64op_FMADD_S, dst, argN,
+                                              argM, argA));
+         return dst;
+      }
+      case Iop_MAddF64: {
+         HReg dst  = newVRegF(env);
+         HReg argN = iselFltExpr(env, e->Iex.Qop.details->arg2);
+         HReg argM = iselFltExpr(env, e->Iex.Qop.details->arg3);
+         HReg argA = iselFltExpr(env, e->Iex.Qop.details->arg4);
+         set_fcsr_rounding_mode(env, e->Iex.Qop.details->arg1);
+         addInstr(env, RISCV64Instr_FpTernary(RISCV64op_FMADD_D, dst, argN,
+                                              argM, argA));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* --------------------- TERNARY OP ---------------------- */
+   case Iex_Triop: {
+      RISCV64FpBinaryOp op;
+      switch (e->Iex.Triop.details->op) {
+      case Iop_AddF32:
+         op = RISCV64op_FADD_S;
+         break;
+      case Iop_MulF32:
+         op = RISCV64op_FMUL_S;
+         break;
+      case Iop_DivF32:
+         op = RISCV64op_FDIV_S;
+         break;
+      case Iop_AddF64:
+         op = RISCV64op_FADD_D;
+         break;
+      case Iop_SubF64:
+         op = RISCV64op_FSUB_D;
+         break;
+      case Iop_MulF64:
+         op = RISCV64op_FMUL_D;
+         break;
+      case Iop_DivF64:
+         op = RISCV64op_FDIV_D;
+         break;
+      default:
+         goto irreducible;
+      }
+      HReg dst  = newVRegF(env);
+      HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
+      HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
+      set_fcsr_rounding_mode(env, e->Iex.Triop.details->arg1);
+      addInstr(env, RISCV64Instr_FpBinary(op, dst, src1, src2));
+      return dst;
+   }
+
+   /* ---------------------- BINARY OP ---------------------- */
+   case Iex_Binop: {
+      switch (e->Iex.Binop.op) {
+      case Iop_SqrtF32: {
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpUnary(RISCV64op_FSQRT_S, dst, src));
+         return dst;
+      }
+      case Iop_SqrtF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpUnary(RISCV64op_FSQRT_D, dst, src));
+         return dst;
+      }
+      case Iop_I32StoF32:
+      case Iop_I32UtoF32:
+      case Iop_I64StoF32:
+      case Iop_I64UtoF32:
+      case Iop_I64StoF64:
+      case Iop_I64UtoF64: {
+         RISCV64FpConvertOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_I32StoF32:
+            op = RISCV64op_FCVT_S_W;
+            break;
+         case Iop_I32UtoF32:
+            op = RISCV64op_FCVT_S_WU;
+            break;
+         case Iop_I64StoF32:
+            op = RISCV64op_FCVT_S_L;
+            break;
+         case Iop_I64UtoF32:
+            op = RISCV64op_FCVT_S_LU;
+            break;
+         case Iop_I64StoF64:
+            op = RISCV64op_FCVT_D_L;
+            break;
+         case Iop_I64UtoF64:
+            op = RISCV64op_FCVT_D_LU;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpConvert(op, dst, src));
+         return dst;
+      }
+      case Iop_F64toF32: {
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpConvert(RISCV64op_FCVT_S_D, dst, src));
+         return dst;
+      }
+      case Iop_MinNumF32:
+      case Iop_MaxNumF32:
+      case Iop_MinNumF64:
+      case Iop_MaxNumF64: {
+         RISCV64FpBinaryOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_MinNumF32:
+            op = RISCV64op_FMIN_S;
+            break;
+         case Iop_MaxNumF32:
+            op = RISCV64op_FMAX_S;
+            break;
+         case Iop_MinNumF64:
+            op = RISCV64op_FMIN_D;
+            break;
+         case Iop_MaxNumF64:
+            op = RISCV64op_FMAX_D;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst  = newVRegF(env);
+         HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
+         HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_FpBinary(op, dst, src1, src2));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* ---------------------- UNARY OP ----------------------- */
+   case Iex_Unop: {
+      switch (e->Iex.Unop.op) {
+      case Iop_NegF32:
+      case Iop_AbsF32:
+      case Iop_NegF64:
+      case Iop_AbsF64: {
+         RISCV64FpBinaryOp op;
+         switch (e->Iex.Unop.op) {
+         case Iop_NegF32:
+            op = RISCV64op_FSGNJN_S;
+            break;
+         case Iop_AbsF32:
+            op = RISCV64op_FSGNJX_S;
+            break;
+         case Iop_NegF64:
+            op = RISCV64op_FSGNJN_D;
+            break;
+         case Iop_AbsF64:
+            op = RISCV64op_FSGNJX_D;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpBinary(op, dst, src, src));
+         return dst;
+      }
+      case Iop_I32StoF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpConvert(RISCV64op_FCVT_D_W, dst, src));
+         return dst;
+      }
+      case Iop_I32UtoF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpConvert(RISCV64op_FCVT_D_WU, dst, src));
+         return dst;
+      }
+      case Iop_F32toF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpConvert(RISCV64op_FCVT_D_S, dst, src));
+         return dst;
+      }
+      case Iop_ReinterpI32asF32: {
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_W_X, dst, src));
+         return dst;
+      }
+      case Iop_ReinterpI64asF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_D_X, dst, src));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* ------------------------- GET ------------------------- */
+   case Iex_Get: {
+      HReg dst  = newVRegF(env);
+      HReg base = get_baseblock_register();
+      Int  off  = e->Iex.Get.offset - BASEBLOCK_OFFSET_ADJUSTMENT;
+      vassert(off >= -2048 && off < 2048);
+
+      if (ty == Ity_F32)
+         addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FLW, dst, base, off));
+      else if (ty == Ity_F64)
+         addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FLD, dst, base, off));
+      else
+         vassert(0);
+      return dst;
+   }
+
+   /* ---------------------- MULTIPLEX ---------------------- */
+   case Iex_ITE: {
+      /* ITE(ccexpr, iftrue, iffalse) */
+      HReg cond    = iselIntExpr_R(env, e->Iex.ITE.cond);
+      HReg iftrue  = iselFltExpr(env, e->Iex.ITE.iftrue);
+      HReg iffalse = iselFltExpr(env, e->Iex.ITE.iffalse);
+      HReg dst     = newVRegF(env);
+      addInstr(env, RISCV64Instr_FpCSEL(dst, iftrue, iffalse, cond));
+      return dst;
+   }
+
+   default:
+      break;
+   }
+
+irreducible:
+   ppIRExpr(e);
+   vpanic("iselFltExpr(riscv64)");
+}
+
+/* Compute a floating-point value into a register, the identity of which is
+   returned. As with iselIntExpr_R, the register will be virtual and must not be
+   changed by subsequent code emitted by the caller. */
+static HReg iselFltExpr(ISelEnv* env, IRExpr* e)
+{
+   HReg r = iselFltExpr_wrk(env, e);
+
+   /* Sanity checks ... */
+   vassert(hregClass(r) == HRcFlt64);
+   vassert(hregIsVirtual(r));
+
+   return r;
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Statements                                     ---*/
+/*------------------------------------------------------------*/
+
+static void iselStmt(ISelEnv* env, IRStmt* stmt)
+{
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf("\n-- ");
+      ppIRStmt(stmt);
+      vex_printf("\n");
+   }
+
+   switch (stmt->tag) {
+   /* ------------------------ STORE ------------------------ */
+   /* Little-endian write to memory. */
+   case Ist_Store: {
+      IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+      if (tyd == Ity_I64 || tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
+         HReg src = iselIntExpr_R(env, stmt->Ist.Store.data);
+         /* TODO Optimize the cases with small imm Add64/Sub64. */
+         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
+
+         if (tyd == Ity_I64)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SD, src, addr, 0));
+         else if (tyd == Ity_I32)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SW, src, addr, 0));
+         else if (tyd == Ity_I16)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SH, src, addr, 0));
+         else if (tyd == Ity_I8)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SB, src, addr, 0));
+         else
+            vassert(0);
+         return;
+      }
+      if (tyd == Ity_F32 || tyd == Ity_F64) {
+         HReg src  = iselFltExpr(env, stmt->Ist.Store.data);
+         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
+
+         if (tyd == Ity_F32)
+            addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FSW, src, addr, 0));
+         else if (tyd == Ity_F64)
+            addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FSD, src, addr, 0));
+         else
+            vassert(0);
+         return;
+      }
+      break;
+   }
+
+   /* ------------------------- PUT ------------------------- */
+   /* Write guest state, fixed offset. */
+   case Ist_Put: {
+      IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
+      if (tyd == Ity_I64 || tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
+         HReg src  = iselIntExpr_R(env, stmt->Ist.Put.data);
+         HReg base = get_baseblock_register();
+         Int  off  = stmt->Ist.Put.offset - BASEBLOCK_OFFSET_ADJUSTMENT;
+         vassert(off >= -2048 && off < 2048);
+
+         if (tyd == Ity_I64)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SD, src, base, off));
+         else if (tyd == Ity_I32)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SW, src, base, off));
+         else if (tyd == Ity_I16)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SH, src, base, off));
+         else if (tyd == Ity_I8)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SB, src, base, off));
+         else
+            vassert(0);
+         return;
+      }
+      if (tyd == Ity_F32 || tyd == Ity_F64) {
+         HReg src  = iselFltExpr(env, stmt->Ist.Put.data);
+         HReg base = get_baseblock_register();
+         Int  off  = stmt->Ist.Put.offset - BASEBLOCK_OFFSET_ADJUSTMENT;
+         vassert(off >= -2048 && off < 2048);
+
+         if (tyd == Ity_F32)
+            addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FSW, src, base, off));
+         else if (tyd == Ity_F64)
+            addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FSD, src, base, off));
+         else
+            vassert(0);
+         return;
+      }
+      break;
+   }
+
+   /* ------------------------- TMP ------------------------- */
+   /* Assign value to temporary. */
+   case Ist_WrTmp: {
+      IRType ty = typeOfIRTemp(env->type_env, stmt->Ist.WrTmp.tmp);
+      if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8 ||
+          ty == Ity_I1) {
+         HReg dst = lookupIRTemp(env, stmt->Ist.WrTmp.tmp);
+         HReg src = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
+         addInstr(env, RISCV64Instr_MV(dst, src));
+         return;
+      }
+      if (ty == Ity_F32 || ty == Ity_F64) {
+         HReg dst = lookupIRTemp(env, stmt->Ist.WrTmp.tmp);
+         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_D, dst, src));
+         return;
+      }
+      break;
+   }
+
+   /* ---------------- Call to DIRTY helper ----------------- */
+   /* Call complex ("dirty") helper function. */
+   case Ist_Dirty: {
+      IRDirty* d = stmt->Ist.Dirty.details;
+
+      /* Figure out the return type, if any. */
+      IRType retty = Ity_INVALID;
+      if (d->tmp != IRTemp_INVALID)
+         retty = typeOfIRTemp(env->type_env, d->tmp);
+
+      if (retty != Ity_INVALID && retty != Ity_I8 && retty != Ity_I16 &&
+          retty != Ity_I32 && retty != Ity_I64)
+         goto stmt_fail;
+
+      /* Marshal args and do the call. */
+      UInt   addToSp = 0;
+      RetLoc rloc    = mk_RetLoc_INVALID();
+      Bool   ok =
+         doHelperCall(&addToSp, &rloc, env, d->guard, d->cee, retty, d->args);
+      if (!ok)
+         goto stmt_fail;
+      vassert(is_sane_RetLoc(rloc));
+      vassert(addToSp == 0);
+
+      /* Now figure out what to do with the returned value, if any. */
+      switch (retty) {
+      case Ity_INVALID: {
+         /* No return value. Nothing to do. */
+         vassert(d->tmp == IRTemp_INVALID);
+         vassert(rloc.pri == RLPri_None);
+         return;
+      }
+      /* The returned value is for Ity_I<x> in x10/a0. Park it in the register
+         associated with tmp. */
+      case Ity_I8:
+      case Ity_I16: {
+         vassert(rloc.pri == RLPri_Int);
+         /* Sign-extend the value returned from the helper as is expected by the
+            rest of the backend. */
+         HReg dst   = lookupIRTemp(env, d->tmp);
+         UInt shift = 64 - 8 * sizeofIRType(retty);
+         HReg tmp   = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp,
+                                           hregRISCV64_x10(), shift));
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, tmp, shift));
+         return;
+      }
+      case Ity_I32: {
+         vassert(rloc.pri == RLPri_Int);
+         HReg dst = lookupIRTemp(env, d->tmp);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ADDIW, dst,
+                                           hregRISCV64_x10(), 0));
+         return;
+      }
+      case Ity_I64: {
+         vassert(rloc.pri == RLPri_Int);
+         HReg dst = lookupIRTemp(env, d->tmp);
+         addInstr(env, RISCV64Instr_MV(dst, hregRISCV64_x10()));
+         return;
+      }
+      default:
+         vassert(0);
+      }
+      break;
+   }
+
+   /* ---------- Load Linked and Store Conditional ---------- */
+   case Ist_LLSC: {
+      if (stmt->Ist.LLSC.storedata == NULL) {
+         /* LL */
+         IRTemp res = stmt->Ist.LLSC.result;
+         IRType ty  = typeOfIRTemp(env->type_env, res);
+         if (ty == Ity_I32) {
+            HReg r_dst  = lookupIRTemp(env, res);
+            HReg r_addr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
+            addInstr(env, RISCV64Instr_LoadR(RISCV64op_LR_W, r_dst, r_addr));
+            return;
+         }
+      } else {
+         /* SC */
+         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
+         if (tyd == Ity_I32) {
+            HReg r_tmp  = newVRegI(env);
+            HReg r_src  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
+            HReg r_addr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
+            addInstr(env,
+                     RISCV64Instr_StoreC(RISCV64op_SC_W, r_tmp, r_src, r_addr));
+
+            /* Now r_tmp is non-zero if failed, 0 if success. Change to IR
+               conventions (0 is fail, 1 is success). */
+            IRTemp res   = stmt->Ist.LLSC.result;
+            HReg   r_res = lookupIRTemp(env, res);
+            IRType ty    = typeOfIRTemp(env->type_env, res);
+            vassert(ty == Ity_I1);
+            addInstr(env,
+                     RISCV64Instr_ALUImm(RISCV64op_SLTIU, r_res, r_tmp, 1));
+            return;
+         }
+      }
+      break;
+   }
+
+   /* ------------------------ ACAS ------------------------- */
+   case Ist_CAS: {
+      if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+         /* "Normal" singleton CAS. */
+         IRCAS* cas = stmt->Ist.CAS.details;
+         IRType tyd = typeOfIRTemp(env->type_env, cas->oldLo);
+         if (tyd == Ity_I64 || tyd == Ity_I32) {
+            HReg old  = lookupIRTemp(env, cas->oldLo);
+            HReg addr = iselIntExpr_R(env, cas->addr);
+            HReg expd = iselIntExpr_R(env, cas->expdLo);
+            HReg data = iselIntExpr_R(env, cas->dataLo);
+            if (tyd == Ity_I64)
+               addInstr(env, RISCV64Instr_CAS(RISCV64op_CAS_D, old, addr, expd,
+                                              data));
+            else
+               addInstr(env, RISCV64Instr_CAS(RISCV64op_CAS_W, old, addr, expd,
+                                              data));
+            return;
+         }
+      }
+      break;
+   }
+
+   /* ---------------------- MEM FENCE ---------------------- */
+   case Ist_MBE:
+      switch (stmt->Ist.MBE.event) {
+      case Imbe_Fence:
+         addInstr(env, RISCV64Instr_FENCE());
+         return;
+      default:
+         break;
+      }
+      break;
+
+   /* --------------------- INSTR MARK ---------------------- */
+   /* Doesn't generate any executable code ... */
+   case Ist_IMark:
+      return;
+
+   /* ---------------------- ABI HINT ----------------------- */
+   /* These have no meaning (denotation in the IR) and so we ignore them ... if
+      any actually made it this far. */
+   case Ist_AbiHint:
+       return;
+
+   /* ------------------------ NO-OP ------------------------ */
+   case Ist_NoOp:
+      return;
+
+   /* ------------------------ EXIT ------------------------- */
+   case Ist_Exit: {
+      if (stmt->Ist.Exit.dst->tag != Ico_U64)
+         vpanic("iselStmt(riscv64): Ist_Exit: dst is not a 64-bit value");
+
+      HReg cond   = iselIntExpr_R(env, stmt->Ist.Exit.guard);
+      HReg base   = get_baseblock_register();
+      Int  soff12 = stmt->Ist.Exit.offsIP - BASEBLOCK_OFFSET_ADJUSTMENT;
+      vassert(soff12 >= -2048 && soff12 < 2048);
+
+      /* Case: boring transfer to known address. */
+      if (stmt->Ist.Exit.jk == Ijk_Boring) {
+         if (env->chainingAllowed) {
+            /* .. almost always true .. */
+            /* Skip the event check at the dst if this is a forwards edge. */
+            Bool toFastEP = (Addr64)stmt->Ist.Exit.dst->Ico.U64 > env->max_ga;
+            if (0)
+               vex_printf("%s", toFastEP ? "Y" : ",");
+            addInstr(env, RISCV64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
+                                               base, soff12, cond, toFastEP));
+         } else {
+            /* .. very occasionally .. */
+            /* We can't use chaining, so ask for an assisted transfer, as
+               that's the only alternative that is allowable. */
+            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+            addInstr(env,
+                     RISCV64Instr_XAssisted(r, base, soff12, cond, Ijk_Boring));
+         }
+         return;
+      }
+
+      /* Case: assisted transfer to arbitrary address. */
+      switch (stmt->Ist.Exit.jk) {
+      /* Keep this list in sync with that for iselNext below. */
+      case Ijk_ClientReq:
+      case Ijk_NoDecode:
+      case Ijk_NoRedir:
+      case Ijk_Sys_syscall:
+      case Ijk_InvalICache:
+      case Ijk_SigTRAP: {
+         HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+         addInstr(env, RISCV64Instr_XAssisted(r, base, soff12, cond,
+                                              stmt->Ist.Exit.jk));
+         return;
+      }
+      default:
+         break;
+      }
+
+      /* Do we ever expect to see any other kind? */
+      goto stmt_fail;
+   }
+
+   default:
+      break;
+   }
+
+stmt_fail:
+   ppIRStmt(stmt);
+   vpanic("iselStmt");
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Basic block terminators (Nexts)                ---*/
+/*------------------------------------------------------------*/
+
+static void iselNext(ISelEnv* env, IRExpr* next, IRJumpKind jk, Int offsIP)
+{
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf("\n-- PUT(%d) = ", offsIP);
+      ppIRExpr(next);
+      vex_printf("; exit-");
+      ppIRJumpKind(jk);
+      vex_printf("\n");
+   }
+
+   HReg base   = get_baseblock_register();
+   Int  soff12 = offsIP - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12 >= -2048 && soff12 < 2048);
+
+   /* Case: boring transfer to known address. */
+   if (next->tag == Iex_Const) {
+      IRConst* cdst = next->Iex.Const.con;
+      vassert(cdst->tag == Ico_U64);
+      if (jk == Ijk_Boring || jk == Ijk_Call) {
+         /* Boring transfer to known address. */
+         if (env->chainingAllowed) {
+            /* .. almost always true .. */
+            /* Skip the event check at the dst if this is a forwards edge. */
+            Bool toFastEP = (Addr64)cdst->Ico.U64 > env->max_ga;
+            if (0)
+               vex_printf("%s", toFastEP ? "X" : ".");
+            addInstr(env, RISCV64Instr_XDirect(cdst->Ico.U64, base, soff12,
+                                               INVALID_HREG, toFastEP));
+         } else {
+            /* .. very occasionally .. */
+            /* We can't use chaining, so ask for an assisted transfer, as that's
+               the only alternative that is allowable. */
+            HReg r = iselIntExpr_R(env, next);
+            addInstr(env, RISCV64Instr_XAssisted(r, base, soff12, INVALID_HREG,
+                                                 Ijk_Boring));
+         }
+         return;
+      }
+   }
+
+   /* Case: call/return (==boring) transfer to any address. */
+   switch (jk) {
+   case Ijk_Boring:
+   case Ijk_Ret:
+   case Ijk_Call: {
+      HReg r = iselIntExpr_R(env, next);
+      if (env->chainingAllowed)
+         addInstr(env, RISCV64Instr_XIndir(r, base, soff12, INVALID_HREG));
+      else
+         addInstr(env, RISCV64Instr_XAssisted(r, base, soff12, INVALID_HREG,
+                                              Ijk_Boring));
+      return;
+   }
+   default:
+      break;
+   }
+
+   /* Case: assisted transfer to arbitrary address. */
+   switch (jk) {
+   /* Keep this list in sync with that for Ist_Exit above. */
+   case Ijk_ClientReq:
+   case Ijk_NoDecode:
+   case Ijk_NoRedir:
+   case Ijk_Sys_syscall:
+   case Ijk_InvalICache:
+   case Ijk_SigTRAP: {
+      HReg r = iselIntExpr_R(env, next);
+      addInstr(env, RISCV64Instr_XAssisted(r, base, soff12, INVALID_HREG, jk));
+      return;
+   }
+   default:
+      break;
+   }
+
+   vex_printf("\n-- PUT(%d) = ", offsIP);
+   ppIRExpr(next);
+   vex_printf("; exit-");
+   ppIRJumpKind(jk);
+   vex_printf("\n");
+   vassert(0); /* Are we expecting any other kind? */
+}
+
+/*------------------------------------------------------------*/
+/*--- Insn selector top-level                              ---*/
+/*------------------------------------------------------------*/
+
+/* Translate an entire SB to riscv64 code. */
+
+HInstrArray* iselSB_RISCV64(const IRSB*        bb,
+                            VexArch            arch_host,
+                            const VexArchInfo* archinfo_host,
+                            const VexAbiInfo*  vbi /*UNUSED*/,
+                            Int                offs_Host_EvC_Counter,
+                            Int                offs_Host_EvC_FailAddr,
+                            Bool               chainingAllowed,
+                            Bool               addProfInc,
+                            Addr               max_ga)
+{
+   Int      i, j;
+   HReg     hreg, hregHI;
+   ISelEnv* env;
+
+   /* Do some sanity checks. */
+   vassert(arch_host == VexArchRISCV64);
+
+   /* Check that the host's endianness is as expected. */
+   vassert(archinfo_host->endness == VexEndnessLE);
+
+   /* Guard against unexpected space regressions. */
+   vassert(sizeof(RISCV64Instr) <= 32);
+
+   /* Make up an initial environment to use. */
+   env           = LibVEX_Alloc_inline(sizeof(ISelEnv));
+   env->vreg_ctr = 0;
+
+   /* Set up output code array. */
+   env->code = newHInstrArray();
+
+   /* Copy BB's type env. */
+   env->type_env = bb->tyenv;
+
+   /* Make up an IRTemp -> virtual HReg mapping. This doesn't change as we go
+      along. */
+   env->n_vregmap = bb->tyenv->types_used;
+   env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
+   env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
+
+   /* and finally ... */
+   env->chainingAllowed = chainingAllowed;
+   env->hwcaps          = archinfo_host->hwcaps;
+   env->previous_rm     = NULL;
+   env->max_ga          = max_ga;
+
+   /* For each IR temporary, allocate a suitably-kinded virtual register. */
+   j = 0;
+   for (i = 0; i < env->n_vregmap; i++) {
+      hregHI = hreg = INVALID_HREG;
+      switch (bb->tyenv->types[i]) {
+      case Ity_I1:
+      case Ity_I8:
+      case Ity_I16:
+      case Ity_I32:
+      case Ity_I64:
+         hreg = mkHReg(True, HRcInt64, 0, j++);
+         break;
+      case Ity_I128:
+         hreg   = mkHReg(True, HRcInt64, 0, j++);
+         hregHI = mkHReg(True, HRcInt64, 0, j++);
+         break;
+      case Ity_F32:
+      case Ity_F64:
+         hreg = mkHReg(True, HRcFlt64, 0, j++);
+         break;
+      default:
+         ppIRType(bb->tyenv->types[i]);
+         vpanic("iselBB(riscv64): IRTemp type");
+      }
+      env->vregmap[i]   = hreg;
+      env->vregmapHI[i] = hregHI;
+   }
+   env->vreg_ctr = j;
+
+   /* The very first instruction must be an event check. */
+   HReg base             = get_baseblock_register();
+   Int  soff12_amCounter = offs_Host_EvC_Counter - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12_amCounter >= -2048 && soff12_amCounter < 2048);
+   Int soff12_amFailAddr = offs_Host_EvC_FailAddr - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12_amFailAddr >= -2048 && soff12_amFailAddr < 2048);
+   addInstr(env, RISCV64Instr_EvCheck(base, soff12_amCounter, base,
+                                      soff12_amFailAddr));
+
+   /* Possibly a block counter increment (for profiling). At this point we don't
+      know the address of the counter, so just pretend it is zero. It will have
+      to be patched later, but before this translation is used, by a call to
+      LibVEX_PatchProfInc(). */
+   if (addProfInc)
+      addInstr(env, RISCV64Instr_ProfInc());
+
+   /* Ok, finally we can iterate over the statements. */
+   for (i = 0; i < bb->stmts_used; i++)
+      iselStmt(env, bb->stmts[i]);
+
+   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
+
+   /* Record the number of vregs we used. */
+   env->code->n_vregs = env->vreg_ctr;
+   return env->code;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      host_riscv64_isel.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
index eda2fe6ee..ad15dad63 100644
--- a/VEX/priv/main_main.c
+++ b/VEX/priv/main_main.c
@@ -43,6 +43,7 @@
 #include "libvex_guest_s390x.h"
 #include "libvex_guest_mips32.h"
 #include "libvex_guest_mips64.h"
+#include "libvex_guest_riscv64.h"
 
 #include "main_globals.h"
 #include "main_util.h"
@@ -57,6 +58,7 @@
 #include "host_s390_defs.h"
 #include "host_mips_defs.h"
 #include "host_nanomips_defs.h"
+#include "host_riscv64_defs.h"
 
 #include "guest_generic_bb_to_IR.h"
 #include "guest_x86_defs.h"
@@ -67,6 +69,7 @@
 #include "guest_s390_defs.h"
 #include "guest_mips_defs.h"
 #include "guest_nanomips_defs.h"
+#include "guest_riscv64_defs.h"
 
 #include "host_generic_simd128.h"
 
@@ -163,6 +166,14 @@
 #define NANOMIPSST(f) vassert(0)
 #endif
 
+#if defined(VGA_riscv64) || defined(VEXMULTIARCH)
+#define RISCV64FN(f) f
+#define RISCV64ST(f) f
+#else
+#define RISCV64FN(f) NULL
+#define RISCV64ST(f) vassert(0)
+#endif
+
 /* This file contains the top level interface to the library. */
 
 /* --------- fwds ... --------- */
@@ -541,6 +552,23 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta,
          vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_NRADDR ) == 4);
          break;
 
+      case VexArchRISCV64:
+         preciseMemExnsFn
+            = RISCV64FN(guest_riscv64_state_requires_precise_mem_exns);
+         disInstrFn              = RISCV64FN(disInstr_RISCV64);
+         specHelper              = RISCV64FN(guest_riscv64_spechelper);
+         guest_layout            = RISCV64FN(&riscv64guest_layout);
+         offB_CMSTART            = offsetof(VexGuestRISCV64State,guest_CMSTART);
+         offB_CMLEN              = offsetof(VexGuestRISCV64State,guest_CMLEN);
+         offB_GUEST_IP           = offsetof(VexGuestRISCV64State,guest_pc);
+         szB_GUEST_IP            = sizeof( ((VexGuestRISCV64State*)0)->guest_pc );
+         vassert(vta->archinfo_guest.endness == VexEndnessLE);
+         vassert(0 == sizeof(VexGuestRISCV64State) % LibVEX_GUEST_STATE_ALIGN);
+         vassert(sizeof( ((VexGuestRISCV64State*)0)->guest_CMSTART ) == 8);
+         vassert(sizeof( ((VexGuestRISCV64State*)0)->guest_CMLEN   ) == 8);
+         vassert(sizeof( ((VexGuestRISCV64State*)0)->guest_NRADDR  ) == 8);
+         break;
+
       default:
          vpanic("LibVEX_Translate: unsupported guest insn set");
    }
@@ -878,6 +906,14 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
          offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR);
          break;
 
+      case VexArchRISCV64:
+         preciseMemExnsFn
+            = RISCV64FN(guest_riscv64_state_requires_precise_mem_exns);
+         guest_sizeB            = sizeof(VexGuestRISCV64State);
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestRISCV64State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestRISCV64State,host_EvC_FAILADDR);
+         break;
+
       default:
          vpanic("LibVEX_Codegen: unsupported guest insn set");
    }
@@ -1052,6 +1088,22 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
                  || vta->archinfo_host.endness == VexEndnessBE);
          break;
 
+      case VexArchRISCV64:
+         mode64       = True;
+         rRegUniv     = RISCV64FN(getRRegUniverse_RISCV64());
+         getRegUsage
+            = CAST_TO_TYPEOF(getRegUsage) RISCV64FN(getRegUsage_RISCV64Instr);
+         mapRegs      = CAST_TO_TYPEOF(mapRegs) RISCV64FN(mapRegs_RISCV64Instr);
+         genSpill     = CAST_TO_TYPEOF(genSpill) RISCV64FN(genSpill_RISCV64);
+         genReload    = CAST_TO_TYPEOF(genReload) RISCV64FN(genReload_RISCV64);
+         genMove      = CAST_TO_TYPEOF(genMove) RISCV64FN(genMove_RISCV64);
+         ppInstr      = CAST_TO_TYPEOF(ppInstr) RISCV64FN(ppRISCV64Instr);
+         ppReg        = CAST_TO_TYPEOF(ppReg) RISCV64FN(ppHRegRISCV64);
+         iselSB       = RISCV64FN(iselSB_RISCV64);
+         emit         = CAST_TO_TYPEOF(emit) RISCV64FN(emit_RISCV64Instr);
+         vassert(vta->archinfo_host.endness == VexEndnessLE);
+         break;
+
       default:
          vpanic("LibVEX_Translate: unsupported host insn set");
    }
@@ -1297,6 +1349,11 @@ VexInvalRange LibVEX_Chain ( VexArch     arch_host,
                                                  place_to_chain,
                                                  disp_cp_chain_me_EXPECTED,
                                                  place_to_jump_to));
+      case VexArchRISCV64:
+         RISCV64ST(return chainXDirect_RISCV64(endness_host,
+                                               place_to_chain,
+                                               disp_cp_chain_me_EXPECTED,
+                                               place_to_jump_to));
       default:
          vassert(0);
    }
@@ -1359,6 +1416,11 @@ VexInvalRange LibVEX_UnChain ( VexArch     arch_host,
                                                  place_to_unchain,
                                                  place_to_jump_to_EXPECTED,
                                                  disp_cp_chain_me));
+      case VexArchRISCV64:
+         RISCV64ST(return unchainXDirect_RISCV64(endness_host,
+                                                 place_to_unchain,
+                                                 place_to_jump_to_EXPECTED,
+                                                 disp_cp_chain_me));
       default:
          vassert(0);
    }
@@ -1387,8 +1449,10 @@ Int LibVEX_evCheckSzB ( VexArch    arch_host )
             MIPS32ST(cached = evCheckSzB_MIPS()); break;
          case VexArchMIPS64:
             MIPS64ST(cached = evCheckSzB_MIPS()); break;
-        case VexArchNANOMIPS:
+         case VexArchNANOMIPS:
             NANOMIPSST(cached = evCheckSzB_NANOMIPS()); break;
+         case VexArchRISCV64:
+            RISCV64ST(cached = evCheckSzB_RISCV64()); break;
          default:
             vassert(0);
       }
@@ -1432,6 +1496,9 @@ VexInvalRange LibVEX_PatchProfInc ( VexArch    arch_host,
       case VexArchNANOMIPS:
          NANOMIPSST(return patchProfInc_NANOMIPS(endness_host, place_to_patch,
                                                  location_of_counter));
+      case VexArchRISCV64:
+         RISCV64ST(return patchProfInc_RISCV64(endness_host, place_to_patch,
+                                               location_of_counter));
       default:
          vassert(0);
    }
@@ -1515,6 +1582,7 @@ const HChar* LibVEX_ppVexArch ( VexArch arch )
       case VexArchMIPS32:   return "MIPS32";
       case VexArchMIPS64:   return "MIPS64";
       case VexArchNANOMIPS: return "NANOMIPS";
+      case VexArchRISCV64:  return "RISCV64";
       default:              return "VexArch???";
    }
 }
@@ -1585,6 +1653,7 @@ static IRType arch_word_size (VexArch arch) {
       case VexArchMIPS64:
       case VexArchPPC64:
       case VexArchS390X:
+      case VexArchRISCV64:
          return Ity_I64;
 
       default:
@@ -1927,6 +1996,11 @@ static const HChar* show_hwcaps_mips64 ( UInt hwcaps )
    return "Unsupported baseline";
 }
 
+static const HChar* show_hwcaps_riscv64 ( UInt hwcaps )
+{
+   return "riscv64";
+}
+
 #undef NUM_HWCAPS
 
 /* Thie function must not return NULL. */
@@ -1934,15 +2008,16 @@ static const HChar* show_hwcaps_mips64 ( UInt hwcaps )
 static const HChar* show_hwcaps ( VexArch arch, UInt hwcaps )
 {
    switch (arch) {
-      case VexArchX86:    return show_hwcaps_x86(hwcaps);
-      case VexArchAMD64:  return show_hwcaps_amd64(hwcaps);
-      case VexArchPPC32:  return show_hwcaps_ppc32(hwcaps);
-      case VexArchPPC64:  return show_hwcaps_ppc64(hwcaps);
-      case VexArchARM:    return show_hwcaps_arm(hwcaps);
-      case VexArchARM64:  return show_hwcaps_arm64(hwcaps);
-      case VexArchS390X:  return show_hwcaps_s390x(hwcaps);
-      case VexArchMIPS32: return show_hwcaps_mips32(hwcaps);
-      case VexArchMIPS64: return show_hwcaps_mips64(hwcaps);
+      case VexArchX86:     return show_hwcaps_x86(hwcaps);
+      case VexArchAMD64:   return show_hwcaps_amd64(hwcaps);
+      case VexArchPPC32:   return show_hwcaps_ppc32(hwcaps);
+      case VexArchPPC64:   return show_hwcaps_ppc64(hwcaps);
+      case VexArchARM:     return show_hwcaps_arm(hwcaps);
+      case VexArchARM64:   return show_hwcaps_arm64(hwcaps);
+      case VexArchS390X:   return show_hwcaps_s390x(hwcaps);
+      case VexArchMIPS32:  return show_hwcaps_mips32(hwcaps);
+      case VexArchMIPS64:  return show_hwcaps_mips64(hwcaps);
+      case VexArchRISCV64: return show_hwcaps_riscv64(hwcaps);
       default: return NULL;
    }
 }
@@ -2205,6 +2280,11 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps )
             return;
          invalid_hwcaps(arch, hwcaps, "Unsupported baseline\n");
 
+      case VexArchRISCV64:
+         if (hwcaps == 0)
+            return;
+         invalid_hwcaps(arch, hwcaps, "Cannot handle capabilities\n");
+
       default:
          vpanic("unknown architecture");
    }
diff --git a/VEX/priv/main_util.h b/VEX/priv/main_util.h
index 2fa26b062..7fd304dd1 100644
--- a/VEX/priv/main_util.h
+++ b/VEX/priv/main_util.h
@@ -100,6 +100,17 @@ extern SizeT vex_strlen ( const HChar* str );
 extern void vex_bzero ( void* s, SizeT n );
 
 
+/* Math ops */
+
+/* Sign extend an N-bit value up to 64 bits, by copying bit N-1 into all higher
+   positions. */
+static inline ULong vex_sx_to_64( ULong x, UInt n )
+{
+   vassert(n >= 1 && n < 64);
+   return (ULong)((Long)(x << (64 - n)) >> (64 - n));
+}
+
+
 /* Storage management: clear the area, and allocate from it. */
 
 /* By default allocation occurs in the temporary area.  However, it is
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
index 15e2d39de..6e32ccd5b 100644
--- a/VEX/pub/libvex.h
+++ b/VEX/pub/libvex.h
@@ -60,6 +60,7 @@ typedef
       VexArchMIPS32,
       VexArchMIPS64,
       VexArchNANOMIPS,
+      VexArchRISCV64,
    }
    VexArch;
 
@@ -1030,6 +1031,16 @@ extern void LibVEX_InitIRI ( const IRICB * );
    ~~~~~
    r21 is GSP.
 
+   riscv64
+   ~~~~~~~
+   On entry, x8/s0 should point to the guest state + 2048. RISC-V has
+   load/store instructions with immediate (offset from the base
+   register) in range -2048 to 2047. The adjustment of 2048 allows
+   LibVEX to effectively use the full range. When translating
+   riscv64->riscv64, only a single instruction is then needed to
+   read/write values in the guest state (primary + 2x shadow state
+   areas) and most of the spill area.
+
    ALL GUEST ARCHITECTURES
    ~~~~~~~~~~~~~~~~~~~~~~~
    The guest state must contain two pseudo-registers, guest_CMSTART
diff --git a/VEX/pub/libvex_basictypes.h b/VEX/pub/libvex_basictypes.h
index e3f1485d5..6c48b227c 100644
--- a/VEX/pub/libvex_basictypes.h
+++ b/VEX/pub/libvex_basictypes.h
@@ -153,7 +153,6 @@ typedef  unsigned long HWord;
 #undef VEX_HOST_WORDSIZE
 #undef VEX_REGPARM
 
-/* The following 4 work OK for Linux. */
 #if defined(__x86_64__)
 #   define VEX_HOST_WORDSIZE 8
 #   define VEX_REGPARM(_n) /* */
@@ -198,6 +197,10 @@ typedef  unsigned long HWord;
 #   define VEX_HOST_WORDSIZE 4
 #   define VEX_REGPARM(_n) /* */
 
+#elif defined(__riscv) && (__riscv_xlen == 64)
+#   define VEX_HOST_WORDSIZE 8
+#   define VEX_REGPARM(_n) /* */
+
 #else
 #   error "Vex: Fatal: Can't establish the host architecture"
 #endif
diff --git a/VEX/pub/libvex_guest_riscv64.h b/VEX/pub/libvex_guest_riscv64.h
new file mode 100644
index 000000000..31264b124
--- /dev/null
+++ b/VEX/pub/libvex_guest_riscv64.h
@@ -0,0 +1,148 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                 libvex_guest_riscv64.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __LIBVEX_PUB_GUEST_RISCV64_H
+#define __LIBVEX_PUB_GUEST_RISCV64_H
+
+#include "libvex_basictypes.h"
+
+/*------------------------------------------------------------*/
+/*--- Vex's representation of the riscv64 CPU state.       ---*/
+/*------------------------------------------------------------*/
+
+typedef struct {
+   /*   0 */ ULong host_EvC_FAILADDR;
+   /*   8 */ UInt  host_EvC_COUNTER;
+   /*  12 */ UInt  pad0;
+   /*  16 */ ULong guest_x0;
+   /*  24 */ ULong guest_x1;
+   /*  32 */ ULong guest_x2;
+   /*  40 */ ULong guest_x3;
+   /*  48 */ ULong guest_x4;
+   /*  56 */ ULong guest_x5;
+   /*  64 */ ULong guest_x6;
+   /*  72 */ ULong guest_x7;
+   /*  80 */ ULong guest_x8;
+   /*  88 */ ULong guest_x9;
+   /*  96 */ ULong guest_x10;
+   /* 104 */ ULong guest_x11;
+   /* 112 */ ULong guest_x12;
+   /* 120 */ ULong guest_x13;
+   /* 128 */ ULong guest_x14;
+   /* 136 */ ULong guest_x15;
+   /* 144 */ ULong guest_x16;
+   /* 152 */ ULong guest_x17;
+   /* 160 */ ULong guest_x18;
+   /* 168 */ ULong guest_x19;
+   /* 176 */ ULong guest_x20;
+   /* 184 */ ULong guest_x21;
+   /* 192 */ ULong guest_x22;
+   /* 200 */ ULong guest_x23;
+   /* 208 */ ULong guest_x24;
+   /* 216 */ ULong guest_x25;
+   /* 224 */ ULong guest_x26;
+   /* 232 */ ULong guest_x27;
+   /* 240 */ ULong guest_x28;
+   /* 248 */ ULong guest_x29;
+   /* 256 */ ULong guest_x30;
+   /* 264 */ ULong guest_x31;
+   /* 272 */ ULong guest_pc;
+
+   /* Floating-point state. */
+   /* 280 */ ULong guest_f0;
+   /* 288 */ ULong guest_f1;
+   /* 296 */ ULong guest_f2;
+   /* 304 */ ULong guest_f3;
+   /* 312 */ ULong guest_f4;
+   /* 320 */ ULong guest_f5;
+   /* 328 */ ULong guest_f6;
+   /* 336 */ ULong guest_f7;
+   /* 344 */ ULong guest_f8;
+   /* 352 */ ULong guest_f9;
+   /* 360 */ ULong guest_f10;
+   /* 368 */ ULong guest_f11;
+   /* 376 */ ULong guest_f12;
+   /* 384 */ ULong guest_f13;
+   /* 392 */ ULong guest_f14;
+   /* 400 */ ULong guest_f15;
+   /* 408 */ ULong guest_f16;
+   /* 416 */ ULong guest_f17;
+   /* 424 */ ULong guest_f18;
+   /* 432 */ ULong guest_f19;
+   /* 440 */ ULong guest_f20;
+   /* 448 */ ULong guest_f21;
+   /* 456 */ ULong guest_f22;
+   /* 464 */ ULong guest_f23;
+   /* 472 */ ULong guest_f24;
+   /* 480 */ ULong guest_f25;
+   /* 488 */ ULong guest_f26;
+   /* 496 */ ULong guest_f27;
+   /* 504 */ ULong guest_f28;
+   /* 512 */ ULong guest_f29;
+   /* 520 */ ULong guest_f30;
+   /* 528 */ ULong guest_f31;
+   /* 536 */ UInt  guest_fcsr;
+
+   /* Various pseudo-regs mandated by Vex or Valgrind. */
+   /* Emulation notes. */
+   /* 540 */ UInt guest_EMNOTE;
+
+   /* For clflush/clinval: record start and length of area. */
+   /* 544 */ ULong guest_CMSTART;
+   /* 552 */ ULong guest_CMLEN;
+
+   /* Used to record the unredirected guest address at the start of a
+      translation whose start has been redirected. By reading this
+      pseudo-register shortly afterwards, the translation can find out what the
+      corresponding no-redirection address was. Note, this is only set for
+      wrap-style redirects, not for replace-style ones. */
+   /* 560 */ ULong guest_NRADDR;
+
+   /* Fallback LL/SC support. */
+   /* 568 */ ULong guest_LLSC_SIZE; /* 0==no transaction, else 4 or 8. */
+   /* 576 */ ULong guest_LLSC_ADDR; /* Address of the transaction. */
+   /* 584 */ ULong guest_LLSC_DATA; /* Original value at ADDR, sign-extended. */
+
+   /* Padding to 16 bytes. */
+   /* 592 */
+} VexGuestRISCV64State;
+
+/*------------------------------------------------------------*/
+/*--- Utility functions for riscv64 guest stuff.           ---*/
+/*------------------------------------------------------------*/
+
+/* ALL THE FOLLOWING ARE VISIBLE TO LIBRARY CLIENT */
+
+/* Initialise all guest riscv64 state. */
+void LibVEX_GuestRISCV64_initialise(/*OUT*/ VexGuestRISCV64State* vex_state);
+
+#endif /* ndef __LIBVEX_PUB_GUEST_RISCV64_H */
+
+/*--------------------------------------------------------------------*/
+/*---                                       libvex_guest_riscv64.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index f6f347a05..1c899c80e 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -2089,7 +2089,8 @@ typedef
       Irrm_PREPARE_SHORTER      = 5,  // Round to prepare for shorter 
                                       // precision
       Irrm_AWAY_FROM_ZERO       = 6,  // Round to away from 0
-      Irrm_NEAREST_TIE_TOWARD_0 = 7   // Round to nearest, ties towards 0
+      Irrm_NEAREST_TIE_TOWARD_0 = 7,  // Round to nearest, ties towards 0
+      Irrm_INVALID              = 8   // Invalid mode
    }
    IRRoundingMode;
 
diff --git a/cachegrind/cg_arch.c b/cachegrind/cg_arch.c
index 68314c9db..be2973405 100644
--- a/cachegrind/cg_arch.c
+++ b/cachegrind/cg_arch.c
@@ -484,6 +484,13 @@ configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
    *D1c = (cache_t) {  65536, 2, 64 };
    *LLc = (cache_t) { 262144, 8, 64 };
 
+#elif defined(VGA_riscv64)
+
+   // Default cache configuration is SiFive FU740-C000 (HiFive Unmatched)
+   *I1c = (cache_t) {   32768,  4, 64 };
+   *D1c = (cache_t) {   32768,  8, 64 };
+   *LLc = (cache_t) { 2097152, 16, 64 };
+
 #else
 
 #error "Unknown arch"
diff --git a/cachegrind/cg_branchpred.c b/cachegrind/cg_branchpred.c
index 927b7bf21..f7a261c6f 100644
--- a/cachegrind/cg_branchpred.c
+++ b/cachegrind/cg_branchpred.c
@@ -48,7 +48,7 @@
 #  define N_IADDR_LO_ZERO_BITS 2
 #elif defined(VGA_x86) || defined(VGA_amd64)
 #  define N_IADDR_LO_ZERO_BITS 0
-#elif defined(VGA_s390x) || defined(VGA_arm)
+#elif defined(VGA_s390x) || defined(VGA_arm) || defined(VGA_riscv64)
 #  define N_IADDR_LO_ZERO_BITS 1
 #else
 #  error "Unsupported architecture"
diff --git a/configure.ac b/configure.ac
index ec4170cbf..666521de4 100755
--- a/configure.ac
+++ b/configure.ac
@@ -325,6 +325,11 @@ case "${host_cpu}" in
         ARCH_MAX="nanomips"
         ;;
 
+     riscv64)
+       AC_MSG_RESULT([ok (${host_cpu})])
+       ARCH_MAX="riscv64"
+       ;;
+
      *) 
 	AC_MSG_RESULT([no (${host_cpu})])
 	AC_MSG_ERROR([Unsupported host architecture. Sorry])
@@ -989,6 +994,17 @@ case "$ARCH_MAX-$VGCONF_OS" in
         valt_load_address_sec_inner="0xUNSET"
         AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})])
         ;;
+     riscv64-linux)
+        VGCONF_ARCH_PRI="riscv64"
+        VGCONF_ARCH_SEC=""
+        VGCONF_PLATFORM_PRI_CAPS="RISCV64_LINUX"
+        VGCONF_PLATFORM_SEC_CAPS=""
+        valt_load_address_pri_norml="0x58000000"
+        valt_load_address_pri_inner="0x38000000"
+        valt_load_address_sec_norml="0xUNSET"
+        valt_load_address_sec_inner="0xUNSET"
+        AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})])
+        ;;
      x86-solaris)
         VGCONF_ARCH_PRI="x86"
         VGCONF_ARCH_SEC=""
@@ -1082,6 +1098,8 @@ AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_MIPS64,
                test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX ) 
 AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_NANOMIPS,
                test x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX )
+AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_RISCV64,
+               test x$VGCONF_PLATFORM_PRI_CAPS = xRISCV64_LINUX )
 
 # Set up VGCONF_PLATFORMS_INCLUDE_<platform>.  Either one or two of these
 # become defined.
@@ -1112,6 +1130,8 @@ AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_MIPS64_LINUX,
                test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX)
 AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_NANOMIPS_LINUX,
                test x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX)
+AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_RISCV64_LINUX,
+               test x$VGCONF_PLATFORM_PRI_CAPS = xRISCV64_LINUX)
 AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_X86_FREEBSD,
                test x$VGCONF_PLATFORM_PRI_CAPS = xX86_FREEBSD \
                  -o x$VGCONF_PLATFORM_SEC_CAPS = xX86_FREEBSD)
@@ -1145,7 +1165,8 @@ AM_CONDITIONAL(VGCONF_OS_IS_LINUX,
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX \
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX \
-                 -o x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX)
+                 -o x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX \
+                 -o x$VGCONF_PLATFORM_PRI_CAPS = xRISCV64_LINUX)
 AM_CONDITIONAL(VGCONF_OS_IS_FREEBSD,
                test x$VGCONF_PLATFORM_PRI_CAPS = xX86_FREEBSD \
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xAMD64_FREEBSD \
@@ -5074,7 +5095,8 @@ elif test x$VGCONF_PLATFORM_PRI_CAPS = xAMD64_LINUX \
        -o x$VGCONF_PLATFORM_PRI_CAPS = xARM64_LINUX \
        -o x$VGCONF_PLATFORM_PRI_CAPS = xARM64_FREEBSD \
        -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX \
-       -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX ; then
+       -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \
+       -o x$VGCONF_PLATFORM_PRI_CAPS = xRISCV64_LINUX ; then
   mflag_primary=$FLAG_M64
 elif test x$VGCONF_PLATFORM_PRI_CAPS = xX86_DARWIN ; then
   mflag_primary="$FLAG_M32 -arch i386"
@@ -5644,6 +5666,7 @@ AC_CONFIG_FILES([
    memcheck/tests/amd64-linux/Makefile
    memcheck/tests/arm64-linux/Makefile
    memcheck/tests/x86-linux/Makefile
+   memcheck/tests/riscv64-linux/Makefile
    memcheck/tests/amd64-solaris/Makefile
    memcheck/tests/x86-solaris/Makefile
    memcheck/tests/amd64-freebsd/Makefile
@@ -5689,6 +5712,7 @@ AC_CONFIG_FILES([
    none/tests/mips32/Makefile
    none/tests/mips64/Makefile
    none/tests/nanomips/Makefile
+   none/tests/riscv64/Makefile
    none/tests/linux/Makefile
    none/tests/darwin/Makefile
    none/tests/solaris/Makefile
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index e3e31a73b..f2c59ed4b 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -390,6 +390,7 @@ COREGRIND_SOURCES_COMMON = \
 	m_dispatch/dispatch-mips32-linux.S \
 	m_dispatch/dispatch-mips64-linux.S \
 	m_dispatch/dispatch-nanomips-linux.S \
+	m_dispatch/dispatch-riscv64-linux.S \
 	m_dispatch/dispatch-x86-freebsd.S \
 	m_dispatch/dispatch-amd64-freebsd.S \
 	m_dispatch/dispatch-arm64-freebsd.S \
@@ -417,6 +418,7 @@ COREGRIND_SOURCES_COMMON = \
 	m_gdbserver/valgrind-low-mips32.c \
 	m_gdbserver/valgrind-low-mips64.c \
 	m_gdbserver/valgrind-low-nanomips.c \
+	m_gdbserver/valgrind-low-riscv64.c \
 	m_gdbserver/version.c \
 	m_initimg/initimg-linux.c \
 	m_initimg/initimg-freebsd.c \
@@ -445,6 +447,7 @@ COREGRIND_SOURCES_COMMON = \
 	m_sigframe/sigframe-mips32-linux.c \
 	m_sigframe/sigframe-mips64-linux.c \
 	m_sigframe/sigframe-nanomips-linux.c \
+	m_sigframe/sigframe-riscv64-linux.c \
 	m_sigframe/sigframe-x86-darwin.c \
 	m_sigframe/sigframe-amd64-darwin.c \
 	m_sigframe/sigframe-solaris.c \
@@ -459,6 +462,7 @@ COREGRIND_SOURCES_COMMON = \
 	m_syswrap/syscall-mips32-linux.S \
 	m_syswrap/syscall-mips64-linux.S \
 	m_syswrap/syscall-nanomips-linux.S \
+	m_syswrap/syscall-riscv64-linux.S \
 	m_syswrap/syscall-x86-freebsd.S \
 	m_syswrap/syscall-amd64-freebsd.S \
 	m_syswrap/syscall-arm64-freebsd.S \
@@ -486,6 +490,7 @@ COREGRIND_SOURCES_COMMON = \
 	m_syswrap/syswrap-mips32-linux.c \
 	m_syswrap/syswrap-mips64-linux.c \
 	m_syswrap/syswrap-nanomips-linux.c \
+	m_syswrap/syswrap-riscv64-linux.c \
 	m_syswrap/syswrap-x86-darwin.c \
 	m_syswrap/syswrap-amd64-darwin.c \
 	m_syswrap/syswrap-xen.c \
@@ -778,7 +783,15 @@ GDBSERVER_XML_FILES = \
 	m_gdbserver/mips64-linux-valgrind.xml \
 	m_gdbserver/mips64-fpu-valgrind-s1.xml \
 	m_gdbserver/mips64-fpu-valgrind-s2.xml \
-	m_gdbserver/mips64-fpu.xml
+	m_gdbserver/mips64-fpu.xml \
+	m_gdbserver/riscv64-cpu-valgrind-s1.xml \
+	m_gdbserver/riscv64-cpu-valgrind-s2.xml \
+	m_gdbserver/riscv64-cpu.xml \
+	m_gdbserver/riscv64-linux.xml \
+	m_gdbserver/riscv64-linux-valgrind.xml \
+	m_gdbserver/riscv64-fpu-valgrind-s1.xml \
+	m_gdbserver/riscv64-fpu-valgrind-s2.xml \
+	m_gdbserver/riscv64-fpu.xml
 
 # so as to make sure these get copied into the install tree
 vglibdir = $(pkglibexecdir)
diff --git a/coregrind/launcher-linux.c b/coregrind/launcher-linux.c
index 715fdab81..20e624003 100644
--- a/coregrind/launcher-linux.c
+++ b/coregrind/launcher-linux.c
@@ -51,16 +51,18 @@
 #include <string.h>
 #include <unistd.h>
 
+/* Provide own definitions for elf.h constants that might not be yet available
+   on some older systems. */
 #ifndef EM_X86_64
-#define EM_X86_64 62    // elf.h doesn't define this on some older systems
+#define EM_X86_64 62
 #endif
 
 #ifndef EM_AARCH64
-#define EM_AARCH64 183  // ditto
+#define EM_AARCH64 183
 #endif
 
 #ifndef EM_PPC64
-#define EM_PPC64 21  // ditto
+#define EM_PPC64 21
 #endif
 
 #ifndef EM_NANOMIPS
@@ -75,6 +77,10 @@
 #define E_MIPS_ABI2    0x00000020
 #endif
 
+#ifndef EM_RISCV
+#define EM_RISCV 243
+#endif
+
 /* Report fatal errors */
 __attribute__((noreturn))
 static void barf ( const char *format, ... )
@@ -316,6 +322,10 @@ static const char *select_platform(const char *clientname)
                 (header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_SYSV ||
                  header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
                platform = "ppc64le-linux";
+            } else if (header.ehdr64.e_machine == EM_RISCV &&
+                (header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_SYSV ||
+                 header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
+               platform = "riscv64-linux";
             }
          } else if (header.c[EI_DATA] == ELFDATA2MSB) {
 #           if !defined(VGPV_arm_linux_android) \
@@ -404,8 +414,8 @@ int main(int argc, char** argv, char** envp)
       the executable (eg because it's a shell script).  VG_PLATFORM is the
       default_platform. Its value is defined in coregrind/Makefile.am and
       typically it is the primary build target. Unless the primary build
-      target is not built is not built in which case VG_PLATFORM is the
-      secondary build target. */
+      target is not built in which case VG_PLATFORM is the secondary build
+      target. */
 #  if defined(VGO_linux)
    if ((0==strcmp(VG_PLATFORM,"x86-linux"))    ||
        (0==strcmp(VG_PLATFORM,"amd64-linux"))  ||
@@ -417,7 +427,8 @@ int main(int argc, char** argv, char** envp)
        (0==strcmp(VG_PLATFORM,"s390x-linux"))  ||
        (0==strcmp(VG_PLATFORM,"mips32-linux")) ||
        (0==strcmp(VG_PLATFORM,"mips64-linux")) ||
-       (0==strcmp(VG_PLATFORM,"nanomips-linux")))
+       (0==strcmp(VG_PLATFORM,"nanomips-linux")) ||
+       (0==strcmp(VG_PLATFORM,"riscv64-linux")))
       default_platform = VG_PLATFORM;
 #  elif defined(VGO_solaris)
    if ((0==strcmp(VG_PLATFORM,"x86-solaris")) ||
diff --git a/coregrind/m_aspacemgr/aspacemgr-common.c b/coregrind/m_aspacemgr/aspacemgr-common.c
index a0c18efac..cf30e5791 100644
--- a/coregrind/m_aspacemgr/aspacemgr-common.c
+++ b/coregrind/m_aspacemgr/aspacemgr-common.c
@@ -157,7 +157,8 @@ SysRes VG_(am_do_mmap_NO_NOTIFY)( Addr start, SizeT length, UInt prot,
 #  elif defined(VGP_amd64_linux) \
         || defined(VGP_ppc64be_linux)  || defined(VGP_ppc64le_linux) \
         || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
-        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
+        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
+        || defined(VGP_riscv64_linux)
    res = VG_(do_syscall6)(__NR_mmap, (UWord)start, length, 
                          prot, flags, fd, offset);
 #  elif defined(VGP_x86_darwin)
@@ -262,8 +263,9 @@ SysRes ML_(am_do_relocate_nooverlap_mapping_NO_NOTIFY)(
 
 SysRes ML_(am_open) ( const HChar* pathname, Int flags, Int mode )
 {
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
-   /* ARM64 wants to use __NR_openat rather than __NR_open. */
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
+   /* More recent Linux platforms have only __NR_openat and no __NR_open. */
    SysRes res = VG_(do_syscall4)(__NR_openat,
                                  VKI_AT_FDCWD, (UWord)pathname, flags, mode);
 #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
@@ -291,7 +293,8 @@ void ML_(am_close) ( Int fd )
 Int ML_(am_readlink)(const HChar* path, HChar* buf, UInt bufsiz)
 {
    SysRes res;
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
    res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
                                            (UWord)path, (UWord)buf, bufsiz);
 #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
diff --git a/coregrind/m_cache.c b/coregrind/m_cache.c
index 428a4df43..9a6e7e1ae 100644
--- a/coregrind/m_cache.c
+++ b/coregrind/m_cache.c
@@ -539,7 +539,8 @@ get_cache_info(VexArchInfo *vai)
 #elif defined(VGA_arm) || defined(VGA_ppc32)    || \
    defined(VGA_ppc64be) || defined(VGA_ppc64le) || \
    defined(VGA_mips32) || defined(VGA_mips64) || \
-   defined(VGA_arm64) || defined(VGA_nanomips)
+   defined(VGA_arm64) || defined(VGA_nanomips) || \
+   defined(VGA_riscv64)
 static Bool
 get_cache_info(VexArchInfo *vai)
 {
diff --git a/coregrind/m_coredump/coredump-elf.c b/coregrind/m_coredump/coredump-elf.c
index a4632d9e2..b57d26275 100644
--- a/coregrind/m_coredump/coredump-elf.c
+++ b/coregrind/m_coredump/coredump-elf.c
@@ -277,7 +277,7 @@ static void fill_prstatus(const ThreadState *tst,
    prs->pr_sid = VG_(getpgrp)();
 #endif
    
-#if defined(VGP_s390x_linux)
+#if defined(VGP_s390x_linux) || defined(VGP_riscv64_linux)
    /* prs->pr_reg has struct type. Need to take address. */
    regs = (struct vki_user_regs_struct *)&(prs->pr_reg);
 #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
@@ -489,6 +489,39 @@ static void fill_prstatus(const ThreadState *tst,
    regs[VKI_MIPS32_EF_CP0_STATUS] = arch->vex.guest_CP0_status;
    regs[VKI_MIPS32_EF_CP0_EPC]    = arch->vex.guest_PC;
 #  undef DO
+#elif defined(VGP_riscv64_linux)
+   regs->pc = arch->vex.guest_pc;
+   regs->ra = arch->vex.guest_x1;
+   regs->sp = arch->vex.guest_x2;
+   regs->gp = arch->vex.guest_x3;
+   regs->tp = arch->vex.guest_x4;
+   regs->t0 = arch->vex.guest_x5;
+   regs->t1 = arch->vex.guest_x6;
+   regs->t2 = arch->vex.guest_x7;
+   regs->s0 = arch->vex.guest_x8;
+   regs->s1 = arch->vex.guest_x9;
+   regs->a0 = arch->vex.guest_x10;
+   regs->a1 = arch->vex.guest_x11;
+   regs->a2 = arch->vex.guest_x12;
+   regs->a3 = arch->vex.guest_x13;
+   regs->a4 = arch->vex.guest_x14;
+   regs->a5 = arch->vex.guest_x15;
+   regs->a6 = arch->vex.guest_x16;
+   regs->a7 = arch->vex.guest_x17;
+   regs->s2 = arch->vex.guest_x18;
+   regs->s3 = arch->vex.guest_x19;
+   regs->s4 = arch->vex.guest_x20;
+   regs->s5 = arch->vex.guest_x21;
+   regs->s6 = arch->vex.guest_x22;
+   regs->s7 = arch->vex.guest_x23;
+   regs->s8 = arch->vex.guest_x24;
+   regs->s9 = arch->vex.guest_x25;
+   regs->s10 = arch->vex.guest_x26;
+   regs->s11 = arch->vex.guest_x27;
+   regs->t3 = arch->vex.guest_x28;
+   regs->t4 = arch->vex.guest_x29;
+   regs->t5 = arch->vex.guest_x30;
+   regs->t6 = arch->vex.guest_x31;
 #elif defined(VGP_amd64_freebsd)
    regs->rflags = LibVEX_GuestAMD64_get_rflags( &arch->vex );
    regs->rsp    = arch->vex.guest_RSP;
@@ -691,6 +724,41 @@ static void fill_fpu(const ThreadState *tst, vki_elf_fpregset_t *fpu)
 #  undef DO
 #elif defined(VGP_nanomips_linux)
 
+#elif defined(VGP_riscv64_linux)
+   fpu->d.f[0] = arch->vex.guest_f0;
+   fpu->d.f[1] = arch->vex.guest_f1;
+   fpu->d.f[2] = arch->vex.guest_f2;
+   fpu->d.f[3] = arch->vex.guest_f3;
+   fpu->d.f[4] = arch->vex.guest_f4;
+   fpu->d.f[5] = arch->vex.guest_f5;
+   fpu->d.f[6] = arch->vex.guest_f6;
+   fpu->d.f[7] = arch->vex.guest_f7;
+   fpu->d.f[8] = arch->vex.guest_f8;
+   fpu->d.f[9] = arch->vex.guest_f9;
+   fpu->d.f[10] = arch->vex.guest_f10;
+   fpu->d.f[11] = arch->vex.guest_f11;
+   fpu->d.f[12] = arch->vex.guest_f12;
+   fpu->d.f[13] = arch->vex.guest_f13;
+   fpu->d.f[14] = arch->vex.guest_f14;
+   fpu->d.f[15] = arch->vex.guest_f15;
+   fpu->d.f[16] = arch->vex.guest_f16;
+   fpu->d.f[17] = arch->vex.guest_f17;
+   fpu->d.f[18] = arch->vex.guest_f18;
+   fpu->d.f[19] = arch->vex.guest_f19;
+   fpu->d.f[20] = arch->vex.guest_f20;
+   fpu->d.f[21] = arch->vex.guest_f21;
+   fpu->d.f[22] = arch->vex.guest_f22;
+   fpu->d.f[23] = arch->vex.guest_f23;
+   fpu->d.f[24] = arch->vex.guest_f24;
+   fpu->d.f[25] = arch->vex.guest_f25;
+   fpu->d.f[26] = arch->vex.guest_f26;
+   fpu->d.f[27] = arch->vex.guest_f27;
+   fpu->d.f[28] = arch->vex.guest_f28;
+   fpu->d.f[29] = arch->vex.guest_f29;
+   fpu->d.f[30] = arch->vex.guest_f30;
+   fpu->d.f[31] = arch->vex.guest_f31;
+   fpu->d.fcsr = arch->vex.guest_fcsr;
+
 #elif defined(VGP_x86_freebsd)
 
 #elif defined(VGP_amd64_freebsd)
diff --git a/coregrind/m_debuginfo/d3basics.c b/coregrind/m_debuginfo/d3basics.c
index 97ecbdaf4..278766cb3 100644
--- a/coregrind/m_debuginfo/d3basics.c
+++ b/coregrind/m_debuginfo/d3basics.c
@@ -555,6 +555,9 @@ static Bool get_Dwarf_Reg( /*OUT*/Addr* a, Word regno, const RegSummary* regs )
 #  elif defined(VGP_arm64_linux)  || defined(VGP_arm64_freebsd)
    if (regno == 31) { *a = regs->sp; return True; }
    if (regno == 29) { *a = regs->fp; return True; }
+#  elif defined(VGP_riscv64_linux)
+   if (regno == 2) { *a = regs->sp; return True; }
+   if (regno == 8) { *a = regs->fp; return True; }
 #  else
 #    error "Unknown platform"
 #  endif
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c
index cc79429bd..5ca0e40fc 100644
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -1273,7 +1273,7 @@ ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV, Int use_fd )
    is_rx_map = seg->hasR && seg->hasX;
    is_rw_map = seg->hasR && seg->hasW;
 #  elif defined(VGA_amd64) || defined(VGA_ppc64be) || defined(VGA_ppc64le)  \
-        || defined(VGA_arm) || defined(VGA_arm64)
+        || defined(VGA_arm) || defined(VGA_arm64) || defined(VGA_riscv64)
    is_rx_map = seg->hasR && seg->hasX && !seg->hasW;
    is_rw_map = seg->hasR && seg->hasW && !seg->hasX;
 #  elif defined(VGP_s390x_linux)
@@ -3087,12 +3087,12 @@ UWord evalCfiExpr ( const XArray* exprs, Int ix,
             case Creg_IA_SP: return eec->uregs->sp;
             case Creg_IA_BP: return eec->uregs->fp;
             case Creg_MIPS_RA: return eec->uregs->ra;
-#           elif defined(VGA_ppc32) || defined(VGA_ppc64be) \
-               || defined(VGA_ppc64le)
 #           elif defined(VGP_arm64_linux) || defined(VGP_arm64_freebsd)
             case Creg_ARM64_SP: return eec->uregs->sp;
             case Creg_ARM64_X30: return eec->uregs->x30;
             case Creg_ARM64_X29: return eec->uregs->x29;
+#           elif defined(VGA_ppc32) || defined(VGA_ppc64be) \
+               || defined(VGA_ppc64le) || defined(VGP_riscv64_linux)
 #           else
 #             error "Unsupported arch"
 #           endif
@@ -3374,6 +3374,13 @@ static Addr compute_cfa ( const D3UnwindRegs* uregs,
       cfa = cfsi_m->cfa_off + uregs->x29;
       break;
 
+#     elif defined(VGP_riscv64_linux)
+      case CFIC_IA_SPREL:
+         cfa = cfsi_m->cfa_off + uregs->sp;
+         break;
+      case CFIC_IA_BPREL:
+         cfa = cfsi_m->cfa_off + uregs->fp;
+         break;
 #     else
 #       error "Unsupported arch"
 #     endif
@@ -3445,6 +3452,15 @@ Addr ML_(get_CFA) ( Addr ip, Addr sp, Addr fp,
      return compute_cfa(&uregs,
                         min_accessible,  max_accessible, ce->di, ce->cfsi_m);
    }
+#elif defined(VGA_riscv64)
+   { D3UnwindRegs uregs;
+     uregs.pc = ip;
+     uregs.sp = sp;
+     uregs.fp = fp;
+     uregs.ra = 0;
+     return compute_cfa(&uregs,
+                        min_accessible,  max_accessible, ce->di, ce->cfsi_m);
+   }
 
 #  else
    return 0; /* indicates failure */
@@ -3496,6 +3512,8 @@ void VG_(ppUnwindInfo) (Addr from, Addr to)
    For arm64, the unwound registers are: X29(FP) X30(LR) SP PC.
 
    For s390, the unwound registers are: R11(FP) R14(LR) R15(SP) F0..F7 PC.
+
+   For riscv64, the unwound registers are: X2(SP) X8(FP) PC
 */
 Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere,
                         Addr min_accessible,
@@ -3521,6 +3539,8 @@ Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere,
    ipHere = uregsHere->pc;
 #  elif defined(VGP_arm64_freebsd)
    ipHere = uregsHere->pc;
+#  elif defined(VGP_riscv64_linux)
+   ipHere = uregsHere->pc;
 #  else
 #    error "Unknown arch"
 #  endif
@@ -3666,6 +3686,15 @@ Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere,
    COMPUTE(uregsPrev.sp,  uregsHere->sp,  cfsi_m->sp_how,  cfsi_m->sp_off);
    COMPUTE(uregsPrev.x30, uregsHere->x30, cfsi_m->x30_how, cfsi_m->x30_off);
    COMPUTE(uregsPrev.x29, uregsHere->x29, cfsi_m->x29_how, cfsi_m->x29_off);
+#  elif defined(VGP_riscv64_linux)
+   /* Compute register values in the caller's frame. Notice that the previous
+      pc is equal to the previous ra and is calculated as such. The previous ra
+      is however set to 0 here as this helps to promptly fail cases where an
+      inner frame uses the CFIR_SAME rule for ra which is bogus. */
+   COMPUTE(uregsPrev.pc, uregsHere->ra, cfsi_m->ra_how, cfsi_m->ra_off);
+   COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off);
+   COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi_m->fp_how, cfsi_m->fp_off);
+   uregsPrev.ra = 0;
 #  else
 #    error "Unknown arch"
 #  endif
diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h
index 441b379d2..d8cba81c4 100644
--- a/coregrind/m_debuginfo/priv_storage.h
+++ b/coregrind/m_debuginfo/priv_storage.h
@@ -355,6 +355,19 @@ typedef
    }
    DiCfSI_m;
 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
+typedef
+   struct {
+      UChar cfa_how; /* a CFIC_ value */
+      UChar ra_how;  /* a CFIR_ value */
+      UChar sp_how;  /* a CFIR_ value */
+      UChar fp_how;  /* a CFIR_ value */
+      Int   cfa_off;
+      Int   ra_off;
+      Int   sp_off;
+      Int   fp_off;
+   }
+   DiCfSI_m;
+#elif defined(VGA_riscv64)
 typedef
    struct {
       UChar cfa_how; /* a CFIC_ value */
diff --git a/coregrind/m_debuginfo/readdwarf.c b/coregrind/m_debuginfo/readdwarf.c
index 48df2e73e..9d512f8fd 100644
--- a/coregrind/m_debuginfo/readdwarf.c
+++ b/coregrind/m_debuginfo/readdwarf.c
@@ -2066,6 +2066,10 @@ void ML_(read_debuginfo_dwarf1) (
 #  define FP_REG         30
 #  define SP_REG         29
 #  define RA_REG_DEFAULT 31
+#elif defined(VGP_riscv64_linux)
+#  define FP_REG         8
+#  define SP_REG         2
+#  define RA_REG_DEFAULT 1
 #else
 #  error "Unknown platform"
 #endif
@@ -2084,6 +2088,8 @@ void ML_(read_debuginfo_dwarf1) (
 # define N_CFI_REGS 128
 #elif defined(VGP_s390x_linux)
 # define N_CFI_REGS 66
+#elif defined(VGP_riscv64_linux)
+# define N_CFI_REGS 128
 #else
 # define N_CFI_REGS 20
 #endif
@@ -2310,6 +2316,10 @@ static void initUnwindContext ( /*OUT*/UnwindContext* ctx )
          start out as RR_Same. */
       ctx->state[j].reg[29/*FP*/].tag = RR_Same;
       ctx->state[j].reg[30/*LR*/].tag = RR_Same;
+#     elif defined(VGA_riscv64)
+      /* Registers fp and ra start out implicitly as RR_Same. */
+      ctx->state[j].reg[FP_REG].tag = RR_Same;
+      ctx->state[j].reg[RA_REG_DEFAULT].tag = RR_Same;
 #     endif
    }
 }
@@ -2392,7 +2402,8 @@ static Bool summarise_context(/*OUT*/Addr* base,
    if (ctxs->cfa_is_regoff && ctxs->cfa_reg == SP_REG) {
       si_m->cfa_off = ctxs->cfa_off;
 #     if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x) \
-         || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64)
+         || defined(VGA_mips32) || defined(VGA_nanomips) \
+         || defined(VGA_mips64) || defined(VGA_riscv64)
       si_m->cfa_how = CFIC_IA_SPREL;
 #     elif defined(VGA_arm)
       si_m->cfa_how = CFIC_ARM_R13REL;
@@ -2406,7 +2417,8 @@ static Bool summarise_context(/*OUT*/Addr* base,
    if (ctxs->cfa_is_regoff && ctxs->cfa_reg == FP_REG) {
       si_m->cfa_off = ctxs->cfa_off;
 #     if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x) \
-         || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64)
+         || defined(VGA_mips32) || defined(VGA_nanomips) \
+         || defined(VGA_mips64) || defined(VGA_riscv64)
       si_m->cfa_how = CFIC_IA_BPREL;
 #     elif defined(VGA_arm)
       si_m->cfa_how = CFIC_ARM_R12REL;
@@ -2786,6 +2798,30 @@ static Bool summarise_context(/*OUT*/Addr* base,
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    /* These don't use CFI based unwinding (is that really true?) */
 
+#  elif defined(VGA_riscv64)
+
+   /* --- entire tail of this fn specialised for riscv64 --- */
+
+   SUMMARISE_HOW(si_m->ra_how, si_m->ra_off, ctxs->reg[ctx->ra_reg]);
+   SUMMARISE_HOW(si_m->fp_how, si_m->fp_off, ctxs->reg[FP_REG]);
+
+   /* on riscv64, it seems the old sp value before the call is always
+      the same as the CFA.  Therefore ... */
+   si_m->sp_how = CFIR_CFAREL;
+   si_m->sp_off = 0;
+
+   /* bogus looking range?  Note, we require that the difference is
+      representable in 32 bits. */
+   if (loc_start >= ctx->loc)
+      { why = 4; goto failed; }
+   if (ctx->loc - loc_start > 10000000 /* let's say */)
+      { why = 5; goto failed; }
+
+   *base = loc_start + ctx->initloc;
+   *len  = (UInt)(ctx->loc - loc_start);
+
+   return True;
+
 #  else
 #    error "Unknown arch"
 #  endif
@@ -2884,7 +2920,7 @@ static Int copy_convert_CfiExpr_tree ( XArray*        dstxa,
          if (dwreg == srcuc->ra_reg)
             return ML_(CfiExpr_CfiReg)( dstxa, Creg_ARM64_X30 );
 #        elif defined(VGA_ppc32) || defined(VGA_ppc64be) \
-            || defined(VGA_ppc64le)
+            || defined(VGA_ppc64le) || defined(VGA_riscv64)
 #        else
 #           error "Unknown arch"
 #        endif
diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c
index 735f83044..2a696df32 100644
--- a/coregrind/m_debuginfo/readelf.c
+++ b/coregrind/m_debuginfo/readelf.c
@@ -1780,7 +1780,8 @@ static HChar* readlink_path (const HChar *path)
 
    while (tries > 0) {
       SysRes res;
-#if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+    || defined(VGP_riscv64_linux)
       res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
                                               (UWord)path, (UWord)buf, bufsiz);
 #elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
@@ -2705,6 +2706,7 @@ Bool ML_(read_elf_object) ( struct _DebugInfo* di )
          || defined(VGP_arm_linux) || defined (VGP_s390x_linux) \
          || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
          || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+         || defined(VGP_riscv64_linux) \
          || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
          || defined(VGP_x86_freebsd) || defined(VGP_amd64_freebsd) \
          || defined(VGP_arm64_freebsd)
diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c
index 148de6f17..637cd9879 100644
--- a/coregrind/m_debuginfo/storage.c
+++ b/coregrind/m_debuginfo/storage.c
@@ -260,6 +260,11 @@ void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
    SHOW_HOW(si_m->x30_how, si_m->x30_off);
    VG_(printf)(" X29=");
    SHOW_HOW(si_m->x29_how, si_m->x29_off);
+#  elif defined(VGA_riscv64)
+   VG_(printf)(" SP=");
+   SHOW_HOW(si_m->sp_how, si_m->sp_off);
+   VG_(printf)(" FP=");
+   SHOW_HOW(si_m->fp_how, si_m->fp_off);
 #  else
 #    error "Unknown arch"
 #  endif
diff --git a/coregrind/m_debuglog.c b/coregrind/m_debuglog.c
index cad95bcbe..225e5a085 100644
--- a/coregrind/m_debuglog.c
+++ b/coregrind/m_debuglog.c
@@ -637,6 +637,34 @@ static UInt local_sys_getpid ( void )
    return a0;
 }
 
+#elif defined(VGP_riscv64_linux)
+
+static UInt local_sys_write_stderr ( const HChar* buf, Int n )
+{
+   register RegWord a0 asm("a0") = 2; /* stderr */
+   register RegWord a1 asm("a1") = (RegWord)buf;
+   register RegWord a2 asm("a2") = n;
+   register RegWord a7 asm("a7") = __NR_write;
+   __asm__ volatile (
+      "ecall\n"
+      : "+r" (a0)
+      : "r" (a1), "r" (a2), "r" (a7)
+   );
+   return a0 >= 0 ? (UInt)a0 : -1;
+}
+
+static UInt local_sys_getpid ( void )
+{
+   register RegWord a0 asm("a0");
+   register RegWord a7 asm("a7") = __NR_getpid;
+   __asm__ volatile (
+      "ecall\n"
+      : "=r" (a0)
+      : "r" (a7)
+   );
+   return (UInt)a0;
+}
+
 #elif defined(VGP_x86_solaris)
 static UInt local_sys_write_stderr ( const HChar* buf, Int n )
 {
diff --git a/coregrind/m_dispatch/dispatch-riscv64-linux.S b/coregrind/m_dispatch/dispatch-riscv64-linux.S
new file mode 100644
index 000000000..c4941e457
--- /dev/null
+++ b/coregrind/m_dispatch/dispatch-riscv64-linux.S
@@ -0,0 +1,298 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address.       ---*/
+/*---                                     dispatch-riscv64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+  This file is part of Valgrind, a dynamic binary instrumentation
+  framework.
+
+  Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_core_basics_asm.h"
+
+#if defined(VGP_riscv64_linux)
+
+#include "pub_core_dispatch_asm.h"
+#include "pub_core_transtab_asm.h"
+#include "libvex_guest_offsets.h"
+
+
+/*------------------------------------------------------------*/
+/*---                                                      ---*/
+/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
+/*--- used to run all translations,                        ---*/
+/*--- including no-redir ones.                             ---*/
+/*---                                                      ---*/
+/*------------------------------------------------------------*/
+
+/*----------------------------------------------------*/
+/*--- Entry and preamble (set everything up)       ---*/
+/*----------------------------------------------------*/
+
+/* signature:
+void VG_(disp_run_translations)( UWord* two_words,
+                                 void*  guest_state,
+                                 Addr   host_addr );
+*/
+.text
+.global VG_(disp_run_translations)
+VG_(disp_run_translations):
+	/* a0 holds two_words
+	   a1 holds guest_state
+	   a2 holds host_addr
+	*/
+	/* Push the callee-saved registers. Note this sequence maintains
+	   16-alignment of sp. Also save a0 since it will be needed in the
+	   postamble. */
+	addi sp, sp, -112
+	sd ra, 104(sp)
+	sd s0, 96(sp)
+	sd s1, 88(sp)
+	sd s2, 80(sp)
+	sd s3, 72(sp)
+	sd s4, 64(sp)
+	sd s5, 56(sp)
+	sd s6, 48(sp)
+	sd s7, 40(sp)
+	sd s8, 32(sp)
+	sd s9, 24(sp)
+	sd s10, 16(sp)
+	sd s11, 8(sp)
+	sd a0, 0(sp)
+
+	/* Set up the guest state pointer. */
+	li t0, 2048
+	add s0, a1, t0
+
+	/* and jump into the code cache. Chained translations in the code cache
+	   run, until for whatever reason, they can't continue. When that
+	   happens, the translation in question will jump (or call) to one of
+	   the continuation points VG_(cp_...) below. */
+	jr a2
+	/* NOTREACHED */
+
+/*----------------------------------------------------*/
+/*--- Postamble and exit.                          ---*/
+/*----------------------------------------------------*/
+
+postamble:
+	/* At this point, t0 and t1 contain two words to be returned to the
+	   caller. t0 holds a TRC value, and t1 optionally may hold another
+	   word (for CHAIN_ME exits, the address of the place to patch.) */
+
+	/* Restore int regs, including importantly a0 (two_words). */
+	ld ra, 104(sp)
+	ld s0, 96(sp)
+	ld s1, 88(sp)
+	ld s2, 80(sp)
+	ld s3, 72(sp)
+	ld s4, 64(sp)
+	ld s5, 56(sp)
+	ld s6, 48(sp)
+	ld s7, 40(sp)
+	ld s8, 32(sp)
+	ld s9, 24(sp)
+	ld s10, 16(sp)
+	ld s11, 8(sp)
+	ld a0, 0(sp)
+	addi sp, sp, 112
+
+	/* Stash return values. */
+	sd t0, 0(a0)
+	sd t1, 8(a0)
+	ret
+
+/*----------------------------------------------------*/
+/*--- Continuation points                          ---*/
+/*----------------------------------------------------*/
+
+/* ------ Chain me to slow entry point ------ */
+.global VG_(disp_cp_chain_me_to_slowEP)
+VG_(disp_cp_chain_me_to_slowEP):
+	/* We got called. The return address indicates where the patching needs
+	   to happen. Collect the return address and, exit back to C land,
+	   handing the caller the pair (Chain_me_F, RA). */
+	li t0, VG_TRC_CHAIN_ME_TO_SLOW_EP
+	mv t1, ra
+	/* 4 = lui t0, disp_cp_chain_me_to_slowEP[47:28]'
+	   4 = addiw t0, t0, disp_cp_chain_me_to_slowEP[27:16]'
+	   2 = c.slli t0, 12
+	   4 = addi t0, t0, disp_cp_chain_me_to_slowEP[15:4]'
+	   2 = c.slli t0, 4
+	   2 = c.addi t0, disp_cp_chain_me_to_slowEP[3:0]'
+	   2 = c.jalr 0(t0)
+	*/
+	addi t1, t1, -(4+4+2+4+2+2+2)
+	j postamble
+
+/* ------ Chain me to fast entry point ------ */
+.global VG_(disp_cp_chain_me_to_fastEP)
+VG_(disp_cp_chain_me_to_fastEP):
+	/* We got called. The return address indicates where the patching needs
+	   to happen. Collect the return address and, exit back to C land,
+	   handing the caller the pair (Chain_me_F, RA). */
+	li t0, VG_TRC_CHAIN_ME_TO_FAST_EP
+	mv t1, ra
+	/* 4 = lui t0, disp_cp_chain_me_to_fastEP[47:28]'
+	   4 = addiw t0, t0, disp_cp_chain_me_to_fastEP[27:16]'
+	   2 = c.slli t0, 12
+	   4 = addi t0, t0, disp_cp_chain_me_to_fastEP[15:4]'
+	   2 = c.slli t0, 4
+	   2 = c.addi t0, disp_cp_chain_me_to_fastEP[3:0]'
+	   2 = c.jalr 0(t0)
+	*/
+	addi t1, t1, -(4+4+2+4+2+2+2)
+	j postamble
+
+/* ------ Indirect but boring jump ------ */
+.global VG_(disp_cp_xindir)
+VG_(disp_cp_xindir):
+	/* Where are we going? */
+	ld t0, OFFSET_riscv64_pc-2048(s0)
+
+	/* Stats only. */
+	lw t1, VG_(stats__n_xIndirs_32)
+	addi t1, t1, 1
+	sw t1, VG_(stats__n_xIndirs_32), t2
+
+	/* LIVE: s0 (guest state ptr), t0 (guest address to go to).
+	   We use 6 temporaries:
+	     t6 (to point at the relevant FastCacheSet),
+	     t1, t2, t3 (scratch, for swapping entries within a set)
+	     t4, t5 (other scratch) */
+
+	/* Try a fast lookup in the translation cache. This is pretty much
+	   a handcoded version of VG_(lookupInFastCache). */
+
+	/* Compute t6 = VG_TT_FAST_HASH(guest). */
+	srli t6, t0, 1                       /* g2 = guest >> 1 */
+	srli t4, t6, VG_TT_FAST_BITS         /* g2 >> VG_TT_FAST_BITS */
+	xor t6, t4, t6                       /* (g2 >> VG_TT_FAST_BITS) ^ g2 */
+	li t4, VG_TT_FAST_MASK               /* VG_TT_FAST_MASK */
+	and t6, t6, t4                       /* setNo */
+
+	/* Compute t6 = &VG_(tt_fast)[t6]. */
+	la t4, VG_(tt_fast)                  /* &VG_(tt_fast)[0] */
+	slli t6, t6, VG_FAST_CACHE_SET_BITS
+	add t6, t4, t6                       /* &VG_(tt_fast)[setNo] */
+
+	/* LIVE: s0 (guest state ptr), t0 (guest addr), t6 (cache set). */
+	/* Try way 0. */
+	ld t4, FCS_g0(t6)                    /* t4 = .guest0 */
+	bne t4, t0, 1f                       /* cmp against .guest0 */
+	/* Hit at way 0. */
+	/* Go to .host0. */
+	ld t5, FCS_h0(t6)                    /* t5 = .host0 */
+	jr t5
+	/*NOTREACHED*/
+
+1:	/* Try way 1. */
+	ld t4, FCS_g1(t6)
+	bne t4, t0, 2f                       /* cmp against .guest1 */
+	/* Hit at way 1; swap upwards. */
+	ld t1, FCS_g0(t6)                    /* t1 = old .guest0 */
+	ld t2, FCS_h0(t6)                    /* t2 = old .host0 */
+	ld t3, FCS_h1(t6)                    /* t3 = old .host1 */
+	sd t0, FCS_g0(t6)                    /* new .guest0 = guest */
+	sd t3, FCS_h0(t6)                    /* new .host0 = old .host1 */
+	sd t1, FCS_g1(t6)                    /* new .guest1 = old .guest0 */
+	sd t2, FCS_h1(t6)                    /* new .host1 = old .host0 */
+	/* Stats only. */
+	lw t4, VG_(stats__n_xIndir_hits1_32)
+	addi t4, t4, 1
+	sw t4, VG_(stats__n_xIndir_hits1_32), t5
+	/* Go to old .host1 a.k.a. new .host0. */
+	jr t3
+	/*NOTREACHED*/
+
+2:	/* Try way 2. */
+	ld t4, FCS_g2(t6)
+	bne t4, t0, 3f                       /* cmp against .guest2 */
+	/* Hit at way 2; swap upwards. */
+	ld t1, FCS_g1(t6)
+	ld t2, FCS_h1(t6)
+	ld t3, FCS_h2(t6)
+	sd t0, FCS_g1(t6)
+	sd t3, FCS_h1(t6)
+	sd t1, FCS_g2(t6)
+	sd t2, FCS_h2(t6)
+	/* Stats only. */
+	lw t4, VG_(stats__n_xIndir_hits2_32)
+	addi t4, t4, 1
+	sw t4, VG_(stats__n_xIndir_hits2_32), t5
+	/* Go to old .host2 a.k.a. new .host1. */
+	jr t3
+	/*NOTREACHED*/
+
+3:	/* Try way 3. */
+	ld t4, FCS_g3(t6)
+	bne t4, t0, 4f                       /* cmp against .guest3 */
+	/* Hit at way 3; swap upwards. */
+	ld t1, FCS_g2(t6)
+	ld t2, FCS_h2(t6)
+	ld t3, FCS_h3(t6)
+	sd t0, FCS_g2(t6)
+	sd t3, FCS_h2(t6)
+	sd t1, FCS_g3(t6)
+	sd t2, FCS_h3(t6)
+	/* Stats only. */
+	lw t4, VG_(stats__n_xIndir_hits3_32)
+	addi t4, t4, 1
+	sw t4, VG_(stats__n_xIndir_hits3_32), t5
+	/* Go to old .host3 a.k.a. new .host2. */
+	jr t3
+	/*NOTREACHED*/
+
+4:	/* Fast lookup failed. */
+	lw t4, VG_(stats__n_xIndir_misses_32)
+	addi t4, t4, 1
+	sw t4, VG_(stats__n_xIndir_misses_32), t5
+
+	li t0, VG_TRC_INNER_FASTMISS
+	li t1, 0
+	j postamble
+
+/* ------ Assisted jump ------ */
+.global VG_(disp_cp_xassisted)
+VG_(disp_cp_xassisted):
+	/* s0 contains the TRC. */
+	mv t0, s0
+	li t1, 0
+	j postamble
+
+/* ------ Event check failed ------ */
+.global VG_(disp_cp_evcheck_fail)
+VG_(disp_cp_evcheck_fail):
+	li t0, VG_TRC_INNER_COUNTERZERO
+	li t1, 0
+	j postamble
+
+.size VG_(disp_run_translations), .-VG_(disp_run_translations)
+
+#endif // defined(VGP_riscv64_linux)
+
+/* Let the linker know we don't need an executable stack */
+MARK_STACK_NO_EXEC
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 dispatch-riscv64-linux.S ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_gdbserver/riscv64-cpu-valgrind-s1.xml b/coregrind/m_gdbserver/riscv64-cpu-valgrind-s1.xml
new file mode 100644
index 000000000..22c7dd038
--- /dev/null
+++ b/coregrind/m_gdbserver/riscv64-cpu-valgrind-s1.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.cpu.valgrind.s1">
+  <reg name="zeros1" bitsize="64" type="int" regnum="69"/>
+  <reg name="ras1" bitsize="64" type="int"/>
+  <reg name="sps1" bitsize="64" type="int"/>
+  <reg name="gps1" bitsize="64" type="int"/>
+  <reg name="tps1" bitsize="64" type="int"/>
+  <reg name="t0s1" bitsize="64" type="int"/>
+  <reg name="t1s1" bitsize="64" type="int"/>
+  <reg name="t2s1" bitsize="64" type="int"/>
+  <reg name="fps1" bitsize="64" type="int"/>
+  <reg name="s1s1" bitsize="64" type="int"/>
+  <reg name="a0s1" bitsize="64" type="int"/>
+  <reg name="a1s1" bitsize="64" type="int"/>
+  <reg name="a2s1" bitsize="64" type="int"/>
+  <reg name="a3s1" bitsize="64" type="int"/>
+  <reg name="a4s1" bitsize="64" type="int"/>
+  <reg name="a5s1" bitsize="64" type="int"/>
+  <reg name="a6s1" bitsize="64" type="int"/>
+  <reg name="a7s1" bitsize="64" type="int"/>
+  <reg name="s2s1" bitsize="64" type="int"/>
+  <reg name="s3s1" bitsize="64" type="int"/>
+  <reg name="s4s1" bitsize="64" type="int"/>
+  <reg name="s5s1" bitsize="64" type="int"/>
+  <reg name="s6s1" bitsize="64" type="int"/>
+  <reg name="s7s1" bitsize="64" type="int"/>
+  <reg name="s8s1" bitsize="64" type="int"/>
+  <reg name="s9s1" bitsize="64" type="int"/>
+  <reg name="s10s1" bitsize="64" type="int"/>
+  <reg name="s11s1" bitsize="64" type="int"/>
+  <reg name="t3s1" bitsize="64" type="int"/>
+  <reg name="t4s1" bitsize="64" type="int"/>
+  <reg name="t5s1" bitsize="64" type="int"/>
+  <reg name="t6s1" bitsize="64" type="int"/>
+  <reg name="pcs1" bitsize="64" type="int"/>
+</feature>
diff --git a/coregrind/m_gdbserver/riscv64-cpu-valgrind-s2.xml b/coregrind/m_gdbserver/riscv64-cpu-valgrind-s2.xml
new file mode 100644
index 000000000..095a992fc
--- /dev/null
+++ b/coregrind/m_gdbserver/riscv64-cpu-valgrind-s2.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.cpu.valgrind.s2">
+  <reg name="zeros2" bitsize="64" type="int" regnum="138"/>
+  <reg name="ras2" bitsize="64" type="int"/>
+  <reg name="sps2" bitsize="64" type="int"/>
+  <reg name="gps2" bitsize="64" type="int"/>
+  <reg name="tps2" bitsize="64" type="int"/>
+  <reg name="t0s2" bitsize="64" type="int"/>
+  <reg name="t1s2" bitsize="64" type="int"/>
+  <reg name="t2s2" bitsize="64" type="int"/>
+  <reg name="fps2" bitsize="64" type="int"/>
+  <reg name="s1s2" bitsize="64" type="int"/>
+  <reg name="a0s2" bitsize="64" type="int"/>
+  <reg name="a1s2" bitsize="64" type="int"/>
+  <reg name="a2s2" bitsize="64" type="int"/>
+  <reg name="a3s2" bitsize="64" type="int"/>
+  <reg name="a4s2" bitsize="64" type="int"/>
+  <reg name="a5s2" bitsize="64" type="int"/>
+  <reg name="a6s2" bitsize="64" type="int"/>
+  <reg name="a7s2" bitsize="64" type="int"/>
+  <reg name="s2s2" bitsize="64" type="int"/>
+  <reg name="s3s2" bitsize="64" type="int"/>
+  <reg name="s4s2" bitsize="64" type="int"/>
+  <reg name="s5s2" bitsize="64" type="int"/>
+  <reg name="s6s2" bitsize="64" type="int"/>
+  <reg name="s7s2" bitsize="64" type="int"/>
+  <reg name="s8s2" bitsize="64" type="int"/>
+  <reg name="s9s2" bitsize="64" type="int"/>
+  <reg name="s10s2" bitsize="64" type="int"/>
+  <reg name="s11s2" bitsize="64" type="int"/>
+  <reg name="t3s2" bitsize="64" type="int"/>
+  <reg name="t4s2" bitsize="64" type="int"/>
+  <reg name="t5s2" bitsize="64" type="int"/>
+  <reg name="t6s2" bitsize="64" type="int"/>
+  <reg name="pcs2" bitsize="64" type="int"/>
+</feature>
diff --git a/coregrind/m_gdbserver/riscv64-cpu.xml b/coregrind/m_gdbserver/riscv64-cpu.xml
new file mode 100644
index 000000000..2d31a6e19
--- /dev/null
+++ b/coregrind/m_gdbserver/riscv64-cpu.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+
+
+
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.cpu">
+  <reg name="zero" bitsize="64" type="int" regnum="0"/>
+  <reg name="ra" bitsize="64" type="code_ptr"/>
+  <reg name="sp" bitsize="64" type="data_ptr"/>
+  <reg name="gp" bitsize="64" type="data_ptr"/>
+  <reg name="tp" bitsize="64" type="data_ptr"/>
+  <reg name="t0" bitsize="64" type="int"/>
+  <reg name="t1" bitsize="64" type="int"/>
+  <reg name="t2" bitsize="64" type="int"/>
+  <reg name="fp" bitsize="64" type="data_ptr"/>
+  <reg name="s1" bitsize="64" type="int"/>
+  <reg name="a0" bitsize="64" type="int"/>
+  <reg name="a1" bitsize="64" type="int"/>
+  <reg name="a2" bitsize="64" type="int"/>
+  <reg name="a3" bitsize="64" type="int"/>
+  <reg name="a4" bitsize="64" type="int"/>
+  <reg name="a5" bitsize="64" type="int"/>
+  <reg name="a6" bitsize="64" type="int"/>
+  <reg name="a7" bitsize="64" type="int"/>
+  <reg name="s2" bitsize="64" type="int"/>
+  <reg name="s3" bitsize="64" type="int"/>
+  <reg name="s4" bitsize="64" type="int"/>
+  <reg name="s5" bitsize="64" type="int"/>
+  <reg name="s6" bitsize="64" type="int"/>
+  <reg name="s7" bitsize="64" type="int"/>
+  <reg name="s8" bitsize="64" type="int"/>
+  <reg name="s9" bitsize="64" type="int"/>
+  <reg name="s10" bitsize="64" type="int"/>
+  <reg name="s11" bitsize="64" type="int"/>
+  <reg name="t3" bitsize="64" type="int"/>
+  <reg name="t4" bitsize="64" type="int"/>
+  <reg name="t5" bitsize="64" type="int"/>
+  <reg name="t6" bitsize="64" type="int"/>
+  <reg name="pc" bitsize="64" type="code_ptr"/>
+</feature>
diff --git a/coregrind/m_gdbserver/riscv64-fpu-valgrind-s1.xml b/coregrind/m_gdbserver/riscv64-fpu-valgrind-s1.xml
new file mode 100644
index 000000000..263a3fffb
--- /dev/null
+++ b/coregrind/m_gdbserver/riscv64-fpu-valgrind-s1.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.fpu.valgrind.s1">
+
+  <reg name="ft0s1" bitsize="64" type="int" regnum="102"/>
+  <reg name="ft1s1" bitsize="64" type="int"/>
+  <reg name="ft2s1" bitsize="64" type="int"/>
+  <reg name="ft3s1" bitsize="64" type="int"/>
+  <reg name="ft4s1" bitsize="64" type="int"/>
+  <reg name="ft5s1" bitsize="64" type="int"/>
+  <reg name="ft6s1" bitsize="64" type="int"/>
+  <reg name="ft7s1" bitsize="64" type="int"/>
+  <reg name="fs0s1" bitsize="64" type="int"/>
+  <reg name="fs1s1" bitsize="64" type="int"/>
+  <reg name="fa0s1" bitsize="64" type="int"/>
+  <reg name="fa1s1" bitsize="64" type="int"/>
+  <reg name="fa2s1" bitsize="64" type="int"/>
+  <reg name="fa3s1" bitsize="64" type="int"/>
+  <reg name="fa4s1" bitsize="64" type="int"/>
+  <reg name="fa5s1" bitsize="64" type="int"/>
+  <reg name="fa6s1" bitsize="64" type="int"/>
+  <reg name="fa7s1" bitsize="64" type="int"/>
+  <reg name="fs2s1" bitsize="64" type="int"/>
+  <reg name="fs3s1" bitsize="64" type="int"/>
+  <reg name="fs4s1" bitsize="64" type="int"/>
+  <reg name="fs5s1" bitsize="64" type="int"/>
+  <reg name="fs6s1" bitsize="64" type="int"/>
+  <reg name="fs7s1" bitsize="64" type="int"/>
+  <reg name="fs8s1" bitsize="64" type="int"/>
+  <reg name="fs9s1" bitsize="64" type="int"/>
+  <reg name="fs10s1" bitsize="64" type="int"/>
+  <reg name="fs11s1" bitsize="64" type="int"/>
+  <reg name="ft8s1" bitsize="64" type="int"/>
+  <reg name="ft9s1" bitsize="64" type="int"/>
+  <reg name="ft10s1" bitsize="64" type="int"/>
+  <reg name="ft11s1" bitsize="64" type="int"/>
+
+  <reg name="fflagss1" bitsize="32" type="int" regnum="135"/>
+  <reg name="frms1" bitsize="32" type="int"/>
+  <reg name="fcsrs1" bitsize="32" type="int"/>
+</feature>
diff --git a/coregrind/m_gdbserver/riscv64-fpu-valgrind-s2.xml b/coregrind/m_gdbserver/riscv64-fpu-valgrind-s2.xml
new file mode 100644
index 000000000..1992c03f2
--- /dev/null
+++ b/coregrind/m_gdbserver/riscv64-fpu-valgrind-s2.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.fpu.valgrind.s2">
+
+  <reg name="ft0s2" bitsize="64" type="int" regnum="171"/>
+  <reg name="ft1s2" bitsize="64" type="int"/>
+  <reg name="ft2s2" bitsize="64" type="int"/>
+  <reg name="ft3s2" bitsize="64" type="int"/>
+  <reg name="ft4s2" bitsize="64" type="int"/>
+  <reg name="ft5s2" bitsize="64" type="int"/>
+  <reg name="ft6s2" bitsize="64" type="int"/>
+  <reg name="ft7s2" bitsize="64" type="int"/>
+  <reg name="fs0s2" bitsize="64" type="int"/>
+  <reg name="fs1s2" bitsize="64" type="int"/>
+  <reg name="fa0s2" bitsize="64" type="int"/>
+  <reg name="fa1s2" bitsize="64" type="int"/>
+  <reg name="fa2s2" bitsize="64" type="int"/>
+  <reg name="fa3s2" bitsize="64" type="int"/>
+  <reg name="fa4s2" bitsize="64" type="int"/>
+  <reg name="fa5s2" bitsize="64" type="int"/>
+  <reg name="fa6s2" bitsize="64" type="int"/>
+  <reg name="fa7s2" bitsize="64" type="int"/>
+  <reg name="fs2s2" bitsize="64" type="int"/>
+  <reg name="fs3s2" bitsize="64" type="int"/>
+  <reg name="fs4s2" bitsize="64" type="int"/>
+  <reg name="fs5s2" bitsize="64" type="int"/>
+  <reg name="fs6s2" bitsize="64" type="int"/>
+  <reg name="fs7s2" bitsize="64" type="int"/>
+  <reg name="fs8s2" bitsize="64" type="int"/>
+  <reg name="fs9s2" bitsize="64" type="int"/>
+  <reg name="fs10s2" bitsize="64" type="int"/>
+  <reg name="fs11s2" bitsize="64" type="int"/>
+  <reg name="ft8s2" bitsize="64" type="int"/>
+  <reg name="ft9s2" bitsize="64" type="int"/>
+  <reg name="ft10s2" bitsize="64" type="int"/>
+  <reg name="ft11s2" bitsize="64" type="int"/>
+
+  <reg name="fflagss2" bitsize="32" type="int" regnum="204"/>
+  <reg name="frms2" bitsize="32" type="int"/>
+  <reg name="fcsrs2" bitsize="32" type="int"/>
+</feature>
diff --git a/coregrind/m_gdbserver/riscv64-fpu.xml b/coregrind/m_gdbserver/riscv64-fpu.xml
new file mode 100644
index 000000000..ff42b4a21
--- /dev/null
+++ b/coregrind/m_gdbserver/riscv64-fpu.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0"?>
+
+
+
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.fpu">
+
+  <union id="riscv_double">
+    <field name="float" type="ieee_single"/>
+    <field name="double" type="ieee_double"/>
+  </union>
+
+  <reg name="ft0" bitsize="64" type="riscv_double" regnum="33"/>
+  <reg name="ft1" bitsize="64" type="riscv_double"/>
+  <reg name="ft2" bitsize="64" type="riscv_double"/>
+  <reg name="ft3" bitsize="64" type="riscv_double"/>
+  <reg name="ft4" bitsize="64" type="riscv_double"/>
+  <reg name="ft5" bitsize="64" type="riscv_double"/>
+  <reg name="ft6" bitsize="64" type="riscv_double"/>
+  <reg name="ft7" bitsize="64" type="riscv_double"/>
+  <reg name="fs0" bitsize="64" type="riscv_double"/>
+  <reg name="fs1" bitsize="64" type="riscv_double"/>
+  <reg name="fa0" bitsize="64" type="riscv_double"/>
+  <reg name="fa1" bitsize="64" type="riscv_double"/>
+  <reg name="fa2" bitsize="64" type="riscv_double"/>
+  <reg name="fa3" bitsize="64" type="riscv_double"/>
+  <reg name="fa4" bitsize="64" type="riscv_double"/>
+  <reg name="fa5" bitsize="64" type="riscv_double"/>
+  <reg name="fa6" bitsize="64" type="riscv_double"/>
+  <reg name="fa7" bitsize="64" type="riscv_double"/>
+  <reg name="fs2" bitsize="64" type="riscv_double"/>
+  <reg name="fs3" bitsize="64" type="riscv_double"/>
+  <reg name="fs4" bitsize="64" type="riscv_double"/>
+  <reg name="fs5" bitsize="64" type="riscv_double"/>
+  <reg name="fs6" bitsize="64" type="riscv_double"/>
+  <reg name="fs7" bitsize="64" type="riscv_double"/>
+  <reg name="fs8" bitsize="64" type="riscv_double"/>
+  <reg name="fs9" bitsize="64" type="riscv_double"/>
+  <reg name="fs10" bitsize="64" type="riscv_double"/>
+  <reg name="fs11" bitsize="64" type="riscv_double"/>
+  <reg name="ft8" bitsize="64" type="riscv_double"/>
+  <reg name="ft9" bitsize="64" type="riscv_double"/>
+  <reg name="ft10" bitsize="64" type="riscv_double"/>
+  <reg name="ft11" bitsize="64" type="riscv_double"/>
+
+  <reg name="fflags" bitsize="32" type="int" regnum="66"/>
+  <reg name="frm" bitsize="32" type="int" regnum="67"/>
+  <reg name="fcsr" bitsize="32" type="int" regnum="68"/>
+</feature>
diff --git a/coregrind/m_gdbserver/riscv64-linux-valgrind.xml b/coregrind/m_gdbserver/riscv64-linux-valgrind.xml
new file mode 100644
index 000000000..0227f2ee7
--- /dev/null
+++ b/coregrind/m_gdbserver/riscv64-linux-valgrind.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>riscv</architecture>
+  <xi:include href="riscv64-cpu.xml"/>
+  <xi:include href="riscv64-fpu.xml"/>
+  <xi:include href="riscv64-cpu-valgrind-s1.xml"/>
+  <xi:include href="riscv64-fpu-valgrind-s1.xml"/>
+  <xi:include href="riscv64-cpu-valgrind-s2.xml"/>
+  <xi:include href="riscv64-fpu-valgrind-s2.xml"/>
+
+  <feature name="org.gnu.gdb.riscv.linux">
+    <reg name="restart" bitsize="64" group="system"/>
+  </feature>
+</target>
\ No newline at end of file
diff --git a/coregrind/m_gdbserver/riscv64-linux.xml b/coregrind/m_gdbserver/riscv64-linux.xml
new file mode 100644
index 000000000..7c8631220
--- /dev/null
+++ b/coregrind/m_gdbserver/riscv64-linux.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>riscv</architecture>
+  <xi:include href="riscv64-cpu.xml"/>
+  <xi:include href="riscv64-fpu.xml"/>
+
+  <feature name="org.gnu.gdb.riscv.linux">
+    <reg name="restart" bitsize="64" group="system"/>
+  </feature>
+</target>
\ No newline at end of file
diff --git a/coregrind/m_gdbserver/target.c b/coregrind/m_gdbserver/target.c
index f9f32f4aa..4238c608c 100644
--- a/coregrind/m_gdbserver/target.c
+++ b/coregrind/m_gdbserver/target.c
@@ -867,6 +867,8 @@ void valgrind_initialize_target(void)
    mips64_init_architecture(&the_low_target);
 #elif defined(VGA_nanomips)
    nanomips_init_architecture(&the_low_target);
+#elif defined(VGA_riscv64)
+   riscv64_init_architecture(&the_low_target);
 #else
    #error "architecture missing in target.c valgrind_initialize_target"
 #endif
diff --git a/coregrind/m_gdbserver/valgrind-low-riscv64.c b/coregrind/m_gdbserver/valgrind-low-riscv64.c
new file mode 100644
index 000000000..75088fc1e
--- /dev/null
+++ b/coregrind/m_gdbserver/valgrind-low-riscv64.c
@@ -0,0 +1,287 @@
+/* Low level interface to valgrind, for the remote server for GDB integrated
+   in valgrind.
+   Copyright (C) 2022
+   Free Software Foundation, Inc.
+
+   This file is part of VALGRIND.
+   It has been inspired from a file from gdbserver in gdb 6.6.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "server.h"
+#include "target.h"
+#include "regdef.h"
+#include "regcache.h"
+
+#include "pub_core_machine.h"
+#include "pub_core_threadstate.h"
+#include "pub_core_transtab.h"
+#include "pub_core_gdbserver.h"
+#include "pub_core_debuginfo.h"
+
+#include "valgrind_low.h"
+
+#include "libvex_guest_riscv64.h"
+
+/* from GDB gdb/features/riscv/64bit-{cpu,fpu}.c */
+static struct reg regs[] = {
+  { "zero", 0, 64 },
+  { "ra", 64, 64 },
+  { "sp", 128, 64 },
+  { "gp", 192, 64 },
+  { "tp", 256, 64 },
+  { "t0", 320, 64 },
+  { "t1", 384, 64 },
+  { "t2", 448, 64 },
+  { "fp", 512, 64 },
+  { "s1", 576, 64 },
+  { "a0", 640, 64 },
+  { "a1", 704, 64 },
+  { "a2", 768, 64 },
+  { "a3", 832, 64 },
+  { "a4", 896, 64 },
+  { "a5", 960, 64 },
+  { "a6", 1024, 64 },
+  { "a7", 1088, 64 },
+  { "s2", 1152, 64 },
+  { "s3", 1216, 64 },
+  { "s4", 1280, 64 },
+  { "s5", 1344, 64 },
+  { "s6", 1408, 64 },
+  { "s7", 1472, 64 },
+  { "s8", 1536, 64 },
+  { "s9", 1600, 64 },
+  { "s10", 1664, 64 },
+  { "s11", 1728, 64 },
+  { "t3", 1792, 64 },
+  { "t4", 1856, 64 },
+  { "t5", 1920, 64 },
+  { "t6", 1984, 64 },
+  { "pc", 2048, 64 },
+
+  { "ft0", 2112, 64 },
+  { "ft1", 2176, 64 },
+  { "ft2", 2240, 64 },
+  { "ft3", 2304, 64 },
+  { "ft4", 2368, 64 },
+  { "ft5", 2432, 64 },
+  { "ft6", 2496, 64 },
+  { "ft7", 2560, 64 },
+  { "fs0", 2624, 64 },
+  { "fs1", 2688, 64 },
+  { "fa0", 2752, 64 },
+  { "fa1", 2816, 64 },
+  { "fa2", 2880, 64 },
+  { "fa3", 2944, 64 },
+  { "fa4", 3008, 64 },
+  { "fa5", 3072, 64 },
+  { "fa6", 3136, 64 },
+  { "fa7", 3200, 64 },
+  { "fs2", 3264, 64 },
+  { "fs3", 3328, 64 },
+  { "fs4", 3392, 64 },
+  { "fs5", 3456, 64 },
+  { "fs6", 3520, 64 },
+  { "fs7", 3584, 64 },
+  { "fs8", 3648, 64 },
+  { "fs9", 3712, 64 },
+  { "fs10", 3776, 64 },
+  { "fs11", 3840, 64 },
+  { "ft8", 3904, 64 },
+  { "ft9", 3968, 64 },
+  { "ft10", 4032, 64 },
+  { "ft11", 4096, 64 },
+  { "", 4160, 0 },  /* regnums have a hole here */
+  { "fflags", 4160, 32 },
+  { "frm", 4192, 32 },
+  { "fcsr", 4224, 32 },
+};
+
+/* from GDB gdbserver/linux-riscv-low.cc */
+static const char *expedite_regs[] = { "sp", "pc", 0 };
+
+#define num_regs (sizeof (regs) / sizeof (regs[0]))
+
+static
+CORE_ADDR get_pc (void)
+{
+   unsigned long pc;
+
+   collect_register_by_name ("pc", &pc);
+
+   dlog(1, "stop pc is %p\n", (void *) pc);
+   return pc;
+}
+
+static
+void set_pc (CORE_ADDR newpc)
+{
+   supply_register_by_name ("pc", &newpc);
+}
+
+/* store registers in the guest state (gdbserver_to_valgrind)
+   or fetch register from the guest state (valgrind_to_gdbserver). */
+static
+void transfer_register (ThreadId tid, int abs_regno, void * buf,
+                        transfer_direction dir, int size, Bool *mod)
+{
+   ThreadState* tst = VG_(get_ThreadState)(tid);
+   int set = abs_regno / num_regs;
+   int regno = abs_regno % num_regs;
+   *mod = False;
+   UInt v, *p;
+
+   VexGuestRISCV64State* riscv = (VexGuestRISCV64State*) get_arch (set, tst);
+
+   switch (regno) {
+   // numbers here have to match the order of regs above
+   // Attention: gdb order does not match valgrind order.
+   case 0:  VG_(transfer) (&riscv->guest_x0,   buf, dir, size, mod); break;
+   case 1:  VG_(transfer) (&riscv->guest_x1,   buf, dir, size, mod); break;
+   case 2:  VG_(transfer) (&riscv->guest_x2,   buf, dir, size, mod); break;
+   case 3:  VG_(transfer) (&riscv->guest_x3,   buf, dir, size, mod); break;
+   case 4:  VG_(transfer) (&riscv->guest_x4,   buf, dir, size, mod); break;
+   case 5:  VG_(transfer) (&riscv->guest_x5,   buf, dir, size, mod); break;
+   case 6:  VG_(transfer) (&riscv->guest_x6,   buf, dir, size, mod); break;
+   case 7:  VG_(transfer) (&riscv->guest_x7,   buf, dir, size, mod); break;
+   case 8:  VG_(transfer) (&riscv->guest_x8,   buf, dir, size, mod); break;
+   case 9:  VG_(transfer) (&riscv->guest_x9,   buf, dir, size, mod); break;
+   case 10: VG_(transfer) (&riscv->guest_x10,  buf, dir, size, mod); break;
+   case 11: VG_(transfer) (&riscv->guest_x11,  buf, dir, size, mod); break;
+   case 12: VG_(transfer) (&riscv->guest_x12,  buf, dir, size, mod); break;
+   case 13: VG_(transfer) (&riscv->guest_x13,  buf, dir, size, mod); break;
+   case 14: VG_(transfer) (&riscv->guest_x14,  buf, dir, size, mod); break;
+   case 15: VG_(transfer) (&riscv->guest_x15,  buf, dir, size, mod); break;
+   case 16: VG_(transfer) (&riscv->guest_x16,  buf, dir, size, mod); break;
+   case 17: VG_(transfer) (&riscv->guest_x17,  buf, dir, size, mod); break;
+   case 18: VG_(transfer) (&riscv->guest_x18,  buf, dir, size, mod); break;
+   case 19: VG_(transfer) (&riscv->guest_x19,  buf, dir, size, mod); break;
+   case 20: VG_(transfer) (&riscv->guest_x20,  buf, dir, size, mod); break;
+   case 21: VG_(transfer) (&riscv->guest_x21,  buf, dir, size, mod); break;
+   case 22: VG_(transfer) (&riscv->guest_x22,  buf, dir, size, mod); break;
+   case 23: VG_(transfer) (&riscv->guest_x23,  buf, dir, size, mod); break;
+   case 24: VG_(transfer) (&riscv->guest_x24,  buf, dir, size, mod); break;
+   case 25: VG_(transfer) (&riscv->guest_x25,  buf, dir, size, mod); break;
+   case 26: VG_(transfer) (&riscv->guest_x26,  buf, dir, size, mod); break;
+   case 27: VG_(transfer) (&riscv->guest_x27,  buf, dir, size, mod); break;
+   case 28: VG_(transfer) (&riscv->guest_x28,  buf, dir, size, mod); break;
+   case 29: VG_(transfer) (&riscv->guest_x29,  buf, dir, size, mod); break;
+   case 30: VG_(transfer) (&riscv->guest_x30,  buf, dir, size, mod); break;
+   case 31: VG_(transfer) (&riscv->guest_x31,  buf, dir, size, mod); break;
+   case 32: VG_(transfer) (&riscv->guest_pc,   buf, dir, size, mod); break;
+
+   case 33: VG_(transfer) (&riscv->guest_f0,   buf, dir, size, mod); break;
+   case 34: VG_(transfer) (&riscv->guest_f1,   buf, dir, size, mod); break;
+   case 35: VG_(transfer) (&riscv->guest_f2,   buf, dir, size, mod); break;
+   case 36: VG_(transfer) (&riscv->guest_f3,   buf, dir, size, mod); break;
+   case 37: VG_(transfer) (&riscv->guest_f4,   buf, dir, size, mod); break;
+   case 38: VG_(transfer) (&riscv->guest_f5,   buf, dir, size, mod); break;
+   case 39: VG_(transfer) (&riscv->guest_f6,   buf, dir, size, mod); break;
+   case 40: VG_(transfer) (&riscv->guest_f7,   buf, dir, size, mod); break;
+   case 41: VG_(transfer) (&riscv->guest_f8,   buf, dir, size, mod); break;
+   case 42: VG_(transfer) (&riscv->guest_f9,   buf, dir, size, mod); break;
+   case 43: VG_(transfer) (&riscv->guest_f10,  buf, dir, size, mod); break;
+   case 44: VG_(transfer) (&riscv->guest_f11,  buf, dir, size, mod); break;
+   case 45: VG_(transfer) (&riscv->guest_f12,  buf, dir, size, mod); break;
+   case 46: VG_(transfer) (&riscv->guest_f13,  buf, dir, size, mod); break;
+   case 47: VG_(transfer) (&riscv->guest_f14,  buf, dir, size, mod); break;
+   case 48: VG_(transfer) (&riscv->guest_f15,  buf, dir, size, mod); break;
+   case 49: VG_(transfer) (&riscv->guest_f16,  buf, dir, size, mod); break;
+   case 50: VG_(transfer) (&riscv->guest_f17,  buf, dir, size, mod); break;
+   case 51: VG_(transfer) (&riscv->guest_f18,  buf, dir, size, mod); break;
+   case 52: VG_(transfer) (&riscv->guest_f19,  buf, dir, size, mod); break;
+   case 53: VG_(transfer) (&riscv->guest_f20,  buf, dir, size, mod); break;
+   case 54: VG_(transfer) (&riscv->guest_f21,  buf, dir, size, mod); break;
+   case 55: VG_(transfer) (&riscv->guest_f22,  buf, dir, size, mod); break;
+   case 56: VG_(transfer) (&riscv->guest_f23,  buf, dir, size, mod); break;
+   case 57: VG_(transfer) (&riscv->guest_f24,  buf, dir, size, mod); break;
+   case 58: VG_(transfer) (&riscv->guest_f25,  buf, dir, size, mod); break;
+   case 59: VG_(transfer) (&riscv->guest_f26,  buf, dir, size, mod); break;
+   case 60: VG_(transfer) (&riscv->guest_f27,  buf, dir, size, mod); break;
+   case 61: VG_(transfer) (&riscv->guest_f28,  buf, dir, size, mod); break;
+   case 62: VG_(transfer) (&riscv->guest_f29,  buf, dir, size, mod); break;
+   case 63: VG_(transfer) (&riscv->guest_f30,  buf, dir, size, mod); break;
+   case 64: VG_(transfer) (&riscv->guest_f31,  buf, dir, size, mod); break;
+
+   case 65: break;
+
+   case 66: /* fflags = fcsr & 0x1F */
+        p = &riscv->guest_fcsr;
+        if (dir == valgrind_to_gdbserver)
+            v = *p & 0x1F;
+        VG_(transfer) (&v, buf, dir, size, mod);
+        if (dir == gdbserver_to_valgrind)
+            *p = (*p & ~0x1F) | v;
+        break;
+
+   case 67: /* frm = (fcsr & 0xE0) >> 5 */
+        p = &riscv->guest_fcsr;
+        if (dir == valgrind_to_gdbserver)
+            v = (*p & 0xE0) >> 5;
+        VG_(transfer) (&v, buf, dir, size, mod);
+        if (dir == gdbserver_to_valgrind)
+            *p = (*p & ~0xE0) | (v << 5);
+        break;
+
+   case 68: VG_(transfer) (&riscv->guest_fcsr, buf, dir, size, mod); break;
+   default: vg_assert(0);
+   }
+}
+
+static
+const char* target_xml (Bool shadow_mode)
+{
+   if (shadow_mode) {
+      return "riscv64-linux-valgrind.xml";
+   } else {
+      return "riscv64-linux.xml";
+   }
+}
+
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+   VexGuestRISCV64State* riscv = (VexGuestRISCV64State*)&tst->arch.vex;
+
+   /* RISC-V uses Variant I as described by the ELF TLS specification,
+      with tp containing the address one past the end of the TCB.
+
+      from GLIBC sysdeps/riscv/nptl/tls.h, tp is just after tcbhead_t
+        typedef struct {
+            dtv_t *dtv;
+            void *private;
+        } tcbhead_t;
+   */
+   return (CORE_ADDR**)(void *)(riscv->guest_x4 - 2 * sizeof(void *));
+}
+
+static struct valgrind_target_ops low_target = {
+   num_regs,
+   2, //SP
+   regs,
+   transfer_register,
+   get_pc,
+   set_pc,
+   "riscv64",
+   target_xml,
+   target_get_dtv
+};
+
+void riscv64_init_architecture (struct valgrind_target_ops *target)
+{
+   *target = low_target;
+   set_register_cache (regs, num_regs);
+   gdbserver_expedite_regs = expedite_regs;
+}
diff --git a/coregrind/m_gdbserver/valgrind_low.h b/coregrind/m_gdbserver/valgrind_low.h
index d8ae3c908..ef4e19089 100644
--- a/coregrind/m_gdbserver/valgrind_low.h
+++ b/coregrind/m_gdbserver/valgrind_low.h
@@ -109,5 +109,6 @@ extern void s390x_init_architecture (struct valgrind_target_ops *target);
 extern void mips32_init_architecture (struct valgrind_target_ops *target);
 extern void mips64_init_architecture (struct valgrind_target_ops *target);
 extern void nanomips_init_architecture (struct valgrind_target_ops *target);
+extern void riscv64_init_architecture (struct valgrind_target_ops *target);
 
 #endif
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
index c92c3cfb2..f3fe3ce21 100644
--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
@@ -915,9 +915,13 @@ Addr setup_client_stack( void*  init_sp,
             && !defined(VGP_ppc64le_linux) \
             && !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux) \
             && !defined(VGP_nanomips_linux) \
-            && !defined(VGP_s390x_linux)
+            && !defined(VGP_s390x_linux) \
+            && !defined(VGP_riscv64_linux)
          case AT_SYSINFO_EHDR: {
             /* Trash this, because we don't reproduce it */
+            /* riscv64-linux: Keep the VDSO mapping on this platform present.
+               It contains __vdso_rt_sigreturn() which the kernel sets the ra
+               register to point to on a signal delivery. */
             const NSegment* ehdrseg = VG_(am_find_nsegment)((Addr)auxv->u.a_ptr);
             vg_assert(ehdrseg);
             VG_(am_munmap_valgrind)(ehdrseg->start, ehdrseg->end - ehdrseg->start);
@@ -1346,6 +1350,35 @@ void VG_(ii_finalise_image)( IIFinaliseImageInfo iifii )
    arch->vex.guest_PC = iifii.initial_client_IP;
    arch->vex.guest_r31 = iifii.initial_client_SP;
 
+#  elif defined(VGP_riscv64_linux)
+   vg_assert(0 == sizeof(VexGuestRISCV64State) % LibVEX_GUEST_STATE_ALIGN);
+
+   /* Zero out the initial state. */
+   LibVEX_GuestRISCV64_initialise(&arch->vex);
+
+   /* Mark all registers as undefined ... */
+   VG_(memset)(&arch->vex_shadow1, 0xFF, sizeof(VexGuestRISCV64State));
+   VG_(memset)(&arch->vex_shadow2, 0x00, sizeof(VexGuestRISCV64State));
+   /* ... except x2 (sp), pc and fcsr. */
+   arch->vex_shadow1.guest_x2 = 0;
+   arch->vex_shadow1.guest_pc = 0;
+   arch->vex_shadow1.guest_fcsr = 0;
+
+   /* Put essential stuff into the new state. */
+   arch->vex.guest_x2 = iifii.initial_client_SP;
+   arch->vex.guest_pc = iifii.initial_client_IP;
+   /* Initialize fcsr in the same way as done by the Linux kernel:
+      accrued exception flags cleared; round to nearest, ties to even. */
+   arch->vex.guest_fcsr = 0;
+
+   /* Tell the tool about the registers we just wrote. */
+   VG_TRACK(post_reg_write, Vg_CoreStartup, /*tid*/1, VG_O_STACK_PTR, 8);
+   VG_TRACK(post_reg_write, Vg_CoreStartup, /*tid*/1, VG_O_INSTR_PTR, 8);
+   VG_TRACK(post_reg_write, Vg_CoreStartup, /*tid*/1,
+            offsetof(VexGuestRISCV64State, guest_fcsr), 4);
+
+#define PRECISE_GUEST_REG_DEFINEDNESS_AT_STARTUP 1
+
 #  else
 #    error Unknown platform
 #  endif
diff --git a/coregrind/m_libcassert.c b/coregrind/m_libcassert.c
index db9d43d87..8d90e5dff 100644
--- a/coregrind/m_libcassert.c
+++ b/coregrind/m_libcassert.c
@@ -264,6 +264,25 @@
         (srP)->misc.MIPS32.r31 = (UInt)ra;                \
         (srP)->misc.MIPS32.r28 = (UInt)gp;                \
       }
+#elif defined(VGP_riscv64_linux)
+#  define GET_STARTREGS(srP)                              \
+      { ULong pc, sp, fp, ra;                             \
+        __asm__ __volatile__(                             \
+           "jal %0, 0f;"                                  \
+           "0:\n"                                         \
+           "mv %1, sp;"                                   \
+           "mv %2, fp;"                                   \
+           "mv %3, ra;"                                   \
+           : "=r" (pc),                                   \
+             "=r" (sp),                                   \
+             "=r" (fp),                                   \
+             "=r" (ra)                                    \
+        );                                                \
+        (srP)->r_pc = pc;                                 \
+        (srP)->r_sp = sp;                                 \
+        (srP)->misc.RISCV64.r_fp = fp;                    \
+        (srP)->misc.RISCV64.r_ra = ra;                    \
+      }
 #else
 #  error Unknown platform
 #endif
diff --git a/coregrind/m_libcfile.c b/coregrind/m_libcfile.c
index 6098bc581..c1215d5de 100644
--- a/coregrind/m_libcfile.c
+++ b/coregrind/m_libcfile.c
@@ -264,8 +264,9 @@ Bool VG_(resolve_filemode) ( Int fd, Int * result )
 
 SysRes VG_(mknod) ( const HChar* pathname, Int mode, UWord dev )
 {
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
-   /* ARM64 wants to use __NR_mknodat rather than __NR_mknod. */
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
+   /* More recent Linux platforms have only __NR_mknodat and no __NR_mknod. */
    SysRes res = VG_(do_syscall4)(__NR_mknodat,
                                  VKI_AT_FDCWD, (UWord)pathname, mode, dev);
 #  elif defined(VGO_linux) || defined(VGO_darwin)
@@ -290,8 +291,9 @@ SysRes VG_(mknod) ( const HChar* pathname, Int mode, UWord dev )
 
 SysRes VG_(open) ( const HChar* pathname, Int flags, Int mode )
 {
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
-   /* ARM64 wants to use __NR_openat rather than __NR_open. */
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
+   /* More recent Linux platforms have only __NR_openat and no __NR_open. */
    SysRes res = VG_(do_syscall4)(__NR_openat,
                                  VKI_AT_FDCWD, (UWord)pathname, flags, mode);
 #  elif defined(VGO_linux) || defined(VGO_freebsd)
@@ -384,7 +386,8 @@ Int VG_(pipe) ( Int fd[2] )
    } else {
       return -1;
    }
-#  elif defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#  elif defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
    SysRes res = VG_(do_syscall2)(__NR_pipe2, (UWord)fd, 0);
    return sr_isError(res) ? -1 : 0;
 #  elif defined(VGO_linux)
@@ -554,6 +557,10 @@ SysRes VG_(stat) ( const HChar* file_name, struct vg_stat* vgbuf )
      return res;
    }
 #  endif
+#  if defined(VGP_riscv64_linux)
+   /* No fallback defined, statx syscall always exists. */
+   return res;
+#  endif
 #  elif defined(VGO_solaris)
    {
 #     if defined(VGP_x86_solaris)
@@ -731,7 +738,8 @@ SysRes VG_(dup) ( Int oldfd )
 
 SysRes VG_(dup2) ( Int oldfd, Int newfd )
 {
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
    /* We only have dup3, that means we have to mimic dup2.
       The only real difference is when oldfd == newfd.
       dup3 always returns an error, but dup2 returns only an
@@ -777,7 +785,7 @@ Int VG_(rename) ( const HChar* old_name, const HChar* new_name )
 #  if defined(VGO_solaris) || defined(VGP_arm64_linux)
    SysRes res = VG_(do_syscall4)(__NR_renameat, VKI_AT_FDCWD, (UWord)old_name,
                                  VKI_AT_FDCWD, (UWord)new_name);
-#  elif defined(VGP_nanomips_linux)
+#  elif defined(VGP_nanomips_linux) || defined(VGP_riscv64_linux)
    SysRes res = VG_(do_syscall5)(__NR_renameat2, VKI_AT_FDCWD, (UWord)old_name,
                                  VKI_AT_FDCWD, (UWord)new_name, 0);
 
@@ -791,7 +799,8 @@ Int VG_(rename) ( const HChar* old_name, const HChar* new_name )
 
 Int VG_(unlink) ( const HChar* file_name )
 {
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
    SysRes res = VG_(do_syscall2)(__NR_unlinkat, VKI_AT_FDCWD,
                                                 (UWord)file_name);
 #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
@@ -870,8 +879,9 @@ const HChar *VG_(get_startup_wd) ( void )
 SysRes VG_(poll) (struct vki_pollfd *fds, Int nfds, Int timeout)
 {
    SysRes res;
-#  if defined(VGP_arm64_linux)  || defined(VGP_nanomips_linux)
-   /* ARM64 wants to use __NR_ppoll rather than __NR_poll. */
+#  if defined(VGP_arm64_linux)  || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
+   /* More recent Linux platforms have only __NR_ppoll and no __NR_poll. */
    struct vki_timespec timeout_ts;
    if (timeout >= 0) {
       timeout_ts.tv_sec = timeout / 1000;
@@ -915,7 +925,8 @@ SSizeT VG_(readlink) (const HChar* path, HChar* buf, SizeT bufsiz)
 {
    SysRes res;
    /* res = readlink( path, buf, bufsiz ); */
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
    res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
                                            (UWord)path, (UWord)buf, bufsiz);
 #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
@@ -994,7 +1005,8 @@ Int VG_(access) ( const HChar* path, Bool irusr, Bool iwusr, Bool ixusr )
    UWord w = (irusr ? VKI_R_OK : 0)
              | (iwusr ? VKI_W_OK : 0)
              | (ixusr ? VKI_X_OK : 0);
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
    SysRes res = VG_(do_syscall3)(__NR_faccessat, VKI_AT_FDCWD, (UWord)path, w);
 #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
    SysRes res = VG_(do_syscall2)(__NR_access, (UWord)path, w);
@@ -1140,7 +1152,8 @@ SysRes VG_(pread) ( Int fd, void* buf, Int count, OffT offset )
    return res;
 #  elif defined(VGP_amd64_linux) || defined(VGP_s390x_linux) \
       || defined(VGP_ppc64be_linux)  || defined(VGP_ppc64le_linux) \
-      || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
+      || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
+      || defined(VGP_riscv64_linux)
    res = VG_(do_syscall4)(__NR_pread64, fd, (UWord)buf, count, offset);
    return res;
 #  elif defined(VGP_amd64_freebsd) || defined(VGP_arm64_freebsd)
@@ -1404,7 +1417,8 @@ Int VG_(socket) ( Int domain, Int type, Int protocol )
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
-        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd)
+        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+        || defined(VGP_riscv64_linux) || defined(VGO_freebsd)
    SysRes res;
    res = VG_(do_syscall3)(__NR_socket, domain, type, protocol );
    return sr_isError(res) ? -1 : sr_Res(res);
@@ -1459,7 +1473,8 @@ Int my_connect ( Int sockfd, struct vki_sockaddr_in* serv_addr, Int addrlen )
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
-        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd)
+        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+        || defined(VGP_riscv64_linux) || defined(VGO_freebsd)
    SysRes res;
    res = VG_(do_syscall3)(__NR_connect, sockfd, (UWord)serv_addr, addrlen);
    return sr_isError(res) ? -1 : sr_Res(res);
@@ -1506,7 +1521,8 @@ Int VG_(write_socket)( Int sd, const void *msg, Int count )
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
-        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd)
+        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+        || defined(VGP_riscv64_linux) || defined(VGO_freebsd)
    SysRes res;
    res = VG_(do_syscall6)(__NR_sendto, sd, (UWord)msg, 
                                        count, VKI_MSG_NOSIGNAL, 0,0);
@@ -1543,8 +1559,8 @@ Int VG_(getsockname) ( Int sd, struct vki_sockaddr *name, Int *namelen)
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
         || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
-        || defined(VGP_nanomips_linux) || defined(VGO_freebsd) \
-        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
+        || defined(VGP_nanomips_linux) || defined(VGP_riscv64_linux) \
+        || defined(VGO_freebsd)
    SysRes res;
    res = VG_(do_syscall3)( __NR_getsockname,
                            (UWord)sd, (UWord)name, (UWord)namelen );
@@ -1583,7 +1599,8 @@ Int VG_(getpeername) ( Int sd, struct vki_sockaddr *name, Int *namelen)
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
         || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
-        || defined(VGP_nanomips_linux) || defined(VGO_freebsd)
+        || defined(VGP_nanomips_linux) || defined(VGP_riscv64_linux) \
+        || defined(VGO_freebsd)
    SysRes res;
    res = VG_(do_syscall3)( __NR_getpeername,
                            (UWord)sd, (UWord)name, (UWord)namelen );
@@ -1625,7 +1642,7 @@ Int VG_(getsockopt) ( Int sd, Int level, Int optname, void *optval,
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
         || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
-        || defined(VGO_freebsd)
+        || defined(VGP_riscv64_linux) || defined(VGO_freebsd)
    SysRes res;
    res = VG_(do_syscall5)( __NR_getsockopt,
                            (UWord)sd, (UWord)level, (UWord)optname, 
@@ -1669,7 +1686,8 @@ Int VG_(setsockopt) ( Int sd, Int level, Int optname, void *optval,
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
-        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+        || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+        || defined(VGP_riscv64_linux)
    SysRes res;
    res = VG_(do_syscall5)( __NR_setsockopt,
                            (UWord)sd, (UWord)level, (UWord)optname, 
diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c
index 11dabe768..6ee6873dc 100644
--- a/coregrind/m_libcproc.c
+++ b/coregrind/m_libcproc.c
@@ -698,7 +698,8 @@ Int VG_(gettid)(void)
        * the /proc/self link is pointing...
        */
 
-#     if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#     if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+         || defined(VGP_riscv64_linux)
       res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
                              (UWord)"/proc/self",
                              (UWord)pid, sizeof(pid));
@@ -753,7 +754,8 @@ Int VG_(getpid) ( void )
 Int VG_(getpgrp) ( void )
 {
    /* ASSUMES SYSCALL ALWAYS SUCCEEDS */
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
    return sr_Res( VG_(do_syscall1)(__NR_getpgid, 0) );
 #  elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
    return sr_Res( VG_(do_syscall0)(__NR_getpgrp) );
@@ -850,7 +852,7 @@ Int VG_(getgroups)( Int size, UInt* list )
         || defined(VGO_darwin) || defined(VGP_s390x_linux)    \
         || defined(VGP_mips32_linux) || defined(VGP_arm64_linux) \
         || defined(VGO_solaris) || defined(VGP_nanomips_linux) \
-        || defined(VGO_freebsd)
+        || defined(VGP_riscv64_linux) || defined(VGO_freebsd)
    SysRes sres;
    sres = VG_(do_syscall2)(__NR_getgroups, size, (Addr)list);
    if (sr_isError(sres))
@@ -944,7 +946,8 @@ Int VG_(fork) ( void )
       fds[0] = fds[1] = -1;
    }
 
-#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+#  if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
    SysRes res;
    res = VG_(do_syscall5)(__NR_clone, VKI_SIGCHLD,
                           (UWord)NULL, (UWord)NULL, (UWord)NULL, (UWord)NULL);
@@ -1419,10 +1422,22 @@ void VG_(invalidate_icache) ( void *ptr, SizeT nbytes )
                                  (UWord) nbytes, (UWord) 3);
    vg_assert( !sr_isError(sres) );
 
-# elif defined(VGA_nanomips)
-
+#  elif defined(VGA_nanomips)
    __builtin___clear_cache(ptr, (char*)ptr + nbytes);
 
+#  elif defined(VGP_riscv64_linux)
+   /* Make data stores to the area visible to all RISC-V harts. */
+   __asm__ __volatile__("fence w,r");
+
+   /* Ask the kernel to execute fence.i on all harts to guarantee that an
+      instruction fetch on each hart will see any previous data stores visible
+      to the same hart. */
+   Addr   startaddr = (Addr)ptr;
+   Addr   endaddr   = startaddr + nbytes;
+   SysRes sres = VG_(do_syscall3)(__NR_riscv_flush_icache, startaddr, endaddr,
+                                  0 /*flags*/);
+   vg_assert(!sr_isError(sres));
+
 #  endif
 }
 
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index 36db3ab9c..5c96d2e61 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -152,6 +152,11 @@ void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
       = VG_(threads)[tid].arch.vex.guest_r31;
    regs->misc.MIPS64.r28
       = VG_(threads)[tid].arch.vex.guest_r28;
+#  elif defined(VGA_riscv64)
+   regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc;
+   regs->r_sp = VG_(threads)[tid].arch.vex.guest_x2;
+   regs->misc.RISCV64.r_fp = VG_(threads)[tid].arch.vex.guest_x8;
+   regs->misc.RISCV64.r_ra = VG_(threads)[tid].arch.vex.guest_x1;
 #  else
 #    error "Unknown arch"
 #  endif
@@ -369,6 +374,39 @@ static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
    (*f)(tid, "x28", vex->guest_X28);
    (*f)(tid, "x29", vex->guest_X29);
    (*f)(tid, "x30", vex->guest_X30);
+#elif defined(VGA_riscv64)
+   (*f)(tid, "x0" , vex->guest_x0 );
+   (*f)(tid, "x1" , vex->guest_x1 );
+   (*f)(tid, "x2" , vex->guest_x2 );
+   (*f)(tid, "x3" , vex->guest_x3 );
+   (*f)(tid, "x4" , vex->guest_x4 );
+   (*f)(tid, "x5" , vex->guest_x5 );
+   (*f)(tid, "x6" , vex->guest_x6 );
+   (*f)(tid, "x7" , vex->guest_x7 );
+   (*f)(tid, "x8" , vex->guest_x8 );
+   (*f)(tid, "x9" , vex->guest_x9 );
+   (*f)(tid, "x10", vex->guest_x10);
+   (*f)(tid, "x11", vex->guest_x11);
+   (*f)(tid, "x12", vex->guest_x12);
+   (*f)(tid, "x13", vex->guest_x13);
+   (*f)(tid, "x14", vex->guest_x14);
+   (*f)(tid, "x15", vex->guest_x15);
+   (*f)(tid, "x16", vex->guest_x16);
+   (*f)(tid, "x17", vex->guest_x17);
+   (*f)(tid, "x18", vex->guest_x18);
+   (*f)(tid, "x19", vex->guest_x19);
+   (*f)(tid, "x20", vex->guest_x20);
+   (*f)(tid, "x21", vex->guest_x21);
+   (*f)(tid, "x22", vex->guest_x22);
+   (*f)(tid, "x23", vex->guest_x23);
+   (*f)(tid, "x24", vex->guest_x24);
+   (*f)(tid, "x25", vex->guest_x25);
+   (*f)(tid, "x26", vex->guest_x26);
+   (*f)(tid, "x27", vex->guest_x27);
+   (*f)(tid, "x28", vex->guest_x28);
+   (*f)(tid, "x29", vex->guest_x29);
+   (*f)(tid, "x30", vex->guest_x30);
+   (*f)(tid, "x31", vex->guest_x31);
 #else
 #  error Unknown arch
 #endif
@@ -2240,6 +2278,22 @@ Bool VG_(machine_get_hwcaps)( void )
 
      return True;
    }
+
+#elif defined(VGA_riscv64)
+   {
+     va = VexArchRISCV64;
+     vai.endness = VexEndnessLE;
+
+     /* Hardware baseline is RV64GC. */
+     vai.hwcaps = 0;
+
+     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
+
+     VG_(machine_get_cache_info)(&vai);
+
+     return True;
+   }
+
 #else
 #  error "Unknown arch"
 #endif
@@ -2380,6 +2434,10 @@ Int VG_(machine_get_size_of_largest_guest_register) ( void )
 #  elif defined(VGA_mips64)
    return 8;
 
+#  elif defined(VGA_riscv64)
+   /* 64-bit integer and floating-point registers, no vector set. */
+   return 8;
+
 #  else
 #    error "Unknown arch"
 #  endif
@@ -2396,7 +2454,7 @@ void* VG_(fnptr_to_fnentry)( void* f )
       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
       || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
-      || defined(VGP_nanomips_linux)
+      || defined(VGP_nanomips_linux) || defined(VGP_riscv64_linux)
    return f;
 #  elif defined(VGP_ppc64be_linux)
    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 22b4fea0f..b4f8f26df 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -2553,6 +2553,11 @@ static void final_tidyup(ThreadId tid)
    VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
             offsetof(VexGuestPPC64State, guest_GPR3),
             sizeof(VG_(threads)[tid].arch.vex.guest_GPR3));
+#  elif defined(VGA_riscv64)
+   VG_(threads)[tid].arch.vex.guest_x10 = to_run;
+   VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+            offsetof(VexGuestRISCV64State, guest_x10),
+            sizeof(VG_(threads)[tid].arch.vex.guest_x10));
 #  elif defined(VGA_s390x)
    VG_(threads)[tid].arch.vex.guest_r2 = to_run;
    VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
@@ -3086,6 +3091,33 @@ asm(
     ".set pop                                           \n\t"
 ".previous                                              \n\t"
 );
+#elif defined(VGP_riscv64_linux)
+asm("\n"
+    "\t.text\n"
+    "\t.type _start,@function\n"
+    "\t.global _start\n"
+    "_start:\n"
+    /* establish the global pointer in gp */
+    ".option push\n"
+    ".option norelax\n"
+    "\tla gp, __global_pointer$\n"
+    ".option pop\n"
+    /* set up the new stack in t0 */
+    "\tla t0, vgPlain_interim_stack\n"
+    "\tli t1, "VG_STRINGIFY(VG_STACK_GUARD_SZB)"\n"
+    "\tadd t0, t0, t1\n"
+    "\tli t1, "VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)"\n"
+    "\tadd t0, t0, t1\n"
+    "\tli t1, 0xFFFFFF00\n"
+    "\tand t0, t0, t1\n"
+    /* install it, and collect the original one */
+    "\tmv a0, sp\n"
+    "\tmv sp, t0\n"
+    /* call _start_in_C_linux, passing it the startup sp */
+    "\tj _start_in_C_linux\n"
+    "\tunimp\n"
+    ".previous\n"
+);
 #else
 #  error "Unknown platform"
 #endif
diff --git a/coregrind/m_options.c b/coregrind/m_options.c
index ecbe9fc3a..16452f252 100644
--- a/coregrind/m_options.c
+++ b/coregrind/m_options.c
@@ -203,7 +203,8 @@ UInt   VG_(clo_unw_stack_scan_frames) = 5;
 VgSmc VG_(clo_smc_check) = Vg_SmcAllNonFile;
 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
       || defined(VGA_arm) || defined(VGA_arm64) \
-      || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
+      || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) \
+      || defined(VGA_riscv64)
 VgSmc VG_(clo_smc_check) = Vg_SmcStack;
 #else
 #  error "Unknown arch"
diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c
index f50624c2b..14e007b76 100644
--- a/coregrind/m_redir.c
+++ b/coregrind/m_redir.c
@@ -1240,6 +1240,7 @@ Bool VG_(is_soname_ld_so) (const HChar *soname)
    if (VG_STREQ(soname, VG_U_LD_LINUX_AARCH64_SO_1)) return True;
    if (VG_STREQ(soname, VG_U_LD_LINUX_ARMHF_SO_3))   return True;
    if (VG_STREQ(soname, VG_U_LD_LINUX_MIPSN8_S0_1))  return True;
+   if (VG_STREQ(soname, VG_U_LD_LINUX_RISCV64_SO_1)) return True;
 #  elif defined(VGO_freebsd)
    if (VG_STREQ(soname, VG_U_LD_ELF_SO_1))   return True;
    if (VG_STREQ(soname, VG_U_LD_ELF32_SO_1))   return True;
@@ -1688,6 +1689,20 @@ void VG_(redir_initialise) ( void )
       );
    }
 
+#  elif defined(VGP_riscv64_linux)
+   if (0==VG_(strcmp)("Memcheck", VG_(details).name)) {
+      add_hardwired_spec(
+         "ld-linux-riscv64-lp64d.so.1", "strlen",
+         (Addr)&VG_(riscv64_linux_REDIR_FOR_strlen),
+         complain_about_stripped_glibc_ldso
+      );
+      add_hardwired_spec(
+         "ld-linux-riscv64-lp64d.so.1", "index",
+         (Addr)&VG_(riscv64_linux_REDIR_FOR_index),
+         complain_about_stripped_glibc_ldso
+      );
+   }
+
 #  elif defined(VGP_x86_solaris)
    /* If we're using memcheck, use these intercepts right from
       the start, otherwise ld.so makes a lot of noise. */
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index fc8cf7c9c..383ae554a 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -893,6 +893,10 @@ static void do_pre_run_checks ( volatile ThreadState* tst )
 #  if defined(VGA_mips32) || defined(VGA_mips64)
    /* no special requirements */
 #  endif
+
+#  if defined(VGA_riscv64)
+   /* no special requirements */
+#  endif
 }
 
 // NO_VGDB_POLL value ensures vgdb is not polled, while
@@ -1007,6 +1011,8 @@ void run_thread_for_a_while ( /*OUT*/HWord* two_words,
    tst->arch.vex.guest_LLaddr = (RegWord)(-1);
 #  elif defined(VGP_arm64_linux) || defined(VGP_arm64_freebsd)
    tst->arch.vex.guest_LLSC_SIZE = 0;
+#  elif defined(VGP_riscv64_linux)
+   tst->arch.vex.guest_LLSC_SIZE = 0;
 #  endif
 
    if (0) {
@@ -1234,8 +1240,7 @@ static void handle_extension(ThreadId tid)
    vg_assert(VG_(is_running_thread)(tid));
 
    if (err != ExtErr_OK) {
-      ThreadState* tst = VG_(get_ThreadState)(tid);
-      Addr addr = tst->arch.vex.guest_IP_AT_SYSCALL;
+      Addr addr = VG_(get_IP)(tid);
       switch (err) {
       case ExtErr_Illop:
          VG_(synth_sigill)(tid, addr);
@@ -1854,6 +1859,9 @@ void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
 #  define VG_CLREQ_ARGS       guest_r12
 #  define VG_CLREQ_RET        guest_r11
+#elif defined(VGA_riscv64)
+#  define VG_CLREQ_ARGS       guest_x14
+#  define VG_CLREQ_RET        guest_x13
 #else
 #  error Unknown arch
 #endif
diff --git a/coregrind/m_sigframe/sigframe-riscv64-linux.c b/coregrind/m_sigframe/sigframe-riscv64-linux.c
new file mode 100644
index 000000000..40c8781ca
--- /dev/null
+++ b/coregrind/m_sigframe/sigframe-riscv64-linux.c
@@ -0,0 +1,423 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Create/destroy signal delivery frames.                       ---*/
+/*---                                     sigframe-riscv64-linux.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_riscv64_linux)
+
+#include "libvex_guest_offsets.h"
+#include "priv_sigframe.h"
+#include "pub_core_aspacemgr.h"
+#include "pub_core_basics.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_machine.h"
+#include "pub_core_options.h"
+#include "pub_core_sigframe.h"
+#include "pub_core_signals.h"
+#include "pub_core_threadstate.h"
+#include "pub_core_tooliface.h"
+#include "pub_core_trampoline.h"
+#include "pub_core_vki.h"
+
+/*------------------------------------------------------------*/
+/*--- Signal frame layout                                  ---*/
+/*------------------------------------------------------------*/
+
+/* Valgrind-specific parts of the signal frame. */
+struct vg_sigframe {
+   /* Sanity check word. */
+   UInt magicPI;
+
+   /* Safely-saved version of sigNo. */
+   Int sigNo_private;
+
+   /* Sanity check word. */
+   UInt magicE;
+};
+
+/* Complete signal frame. */
+struct rt_sigframe {
+   struct vki_siginfo  info;
+   struct vki_ucontext uc;
+   struct vg_sigframe  vg;
+};
+
+/*------------------------------------------------------------*/
+/*--- Creating a signal frame                              ---*/
+/*------------------------------------------------------------*/
+
+static void synth_ucontext(ThreadState*         tst,
+                           const vki_siginfo_t* si,
+                           const vki_sigset_t*  set,
+                           struct vki_ucontext* uc)
+{
+   VG_(memset)(uc, 0, sizeof(*uc));
+
+   /* Prepare common data. */
+   uc->uc_flags = 0;
+   VG_TRACK(post_mem_write, Vg_CoreSignal, tst->tid, (Addr)&uc->uc_flags,
+            sizeof(uc->uc_flags));
+   uc->uc_link = 0;
+   VG_TRACK(post_mem_write, Vg_CoreSignal, tst->tid, (Addr)&uc->uc_link,
+            sizeof(uc->uc_link));
+   uc->uc_sigmask = *set;
+   VG_TRACK(post_mem_write, Vg_CoreSignal, tst->tid, (Addr)&uc->uc_sigmask,
+            sizeof(uc->uc_sigmask));
+   uc->uc_stack = tst->altstack;
+   VG_TRACK(post_mem_write, Vg_CoreSignal, tst->tid, (Addr)&uc->uc_stack,
+            sizeof(uc->uc_stack));
+
+   struct vki_sigcontext* sc = &uc->uc_mcontext;
+
+   /* Save integer registers. */
+#define IREG_TO_CTX(ureg, vreg)                                                \
+   sc->sc_regs.ureg = tst->arch.vex.guest_##vreg;                              \
+   VG_TRACK(copy_reg_to_mem, Vg_CoreSignal, tst->tid, OFFSET_riscv64_##vreg,   \
+            (Addr)&sc->sc_regs.ureg, sizeof(UWord));
+   IREG_TO_CTX(pc, pc);
+   IREG_TO_CTX(ra, x1);
+   IREG_TO_CTX(sp, x2);
+   IREG_TO_CTX(gp, x3);
+   IREG_TO_CTX(tp, x4);
+   IREG_TO_CTX(t0, x5);
+   IREG_TO_CTX(t1, x6);
+   IREG_TO_CTX(t2, x7);
+   IREG_TO_CTX(s0, x8);
+   IREG_TO_CTX(s1, x9);
+   IREG_TO_CTX(a0, x10);
+   IREG_TO_CTX(a1, x11);
+   IREG_TO_CTX(a2, x12);
+   IREG_TO_CTX(a3, x13);
+   IREG_TO_CTX(a4, x14);
+   IREG_TO_CTX(a5, x15);
+   IREG_TO_CTX(a6, x16);
+   IREG_TO_CTX(a7, x17);
+   IREG_TO_CTX(s2, x18);
+   IREG_TO_CTX(s3, x19);
+   IREG_TO_CTX(s4, x20);
+   IREG_TO_CTX(s5, x21);
+   IREG_TO_CTX(s6, x22);
+   IREG_TO_CTX(s7, x23);
+   IREG_TO_CTX(s8, x24);
+   IREG_TO_CTX(s9, x25);
+   IREG_TO_CTX(s10, x26);
+   IREG_TO_CTX(s11, x27);
+   IREG_TO_CTX(t3, x28);
+   IREG_TO_CTX(t4, x29);
+   IREG_TO_CTX(t5, x30);
+   IREG_TO_CTX(t6, x31);
+#undef IREG_TO_CTX
+
+   /* Save floating point registers. */
+#define FREG_TO_CTX(ureg, vreg, type)                                          \
+   sc->sc_fpregs.d.ureg = tst->arch.vex.guest_##vreg;                          \
+   VG_TRACK(copy_reg_to_mem, Vg_CoreSignal, tst->tid, OFFSET_riscv64_##vreg,   \
+            (Addr)&sc->sc_fpregs.d.ureg, sizeof(type));
+   FREG_TO_CTX(f[0], f0, UWord);
+   FREG_TO_CTX(f[1], f1, UWord);
+   FREG_TO_CTX(f[2], f2, UWord);
+   FREG_TO_CTX(f[3], f3, UWord);
+   FREG_TO_CTX(f[4], f4, UWord);
+   FREG_TO_CTX(f[5], f5, UWord);
+   FREG_TO_CTX(f[6], f6, UWord);
+   FREG_TO_CTX(f[7], f7, UWord);
+   FREG_TO_CTX(f[8], f8, UWord);
+   FREG_TO_CTX(f[9], f9, UWord);
+   FREG_TO_CTX(f[10], f10, UWord);
+   FREG_TO_CTX(f[11], f11, UWord);
+   FREG_TO_CTX(f[12], f12, UWord);
+   FREG_TO_CTX(f[13], f13, UWord);
+   FREG_TO_CTX(f[14], f14, UWord);
+   FREG_TO_CTX(f[15], f15, UWord);
+   FREG_TO_CTX(f[16], f16, UWord);
+   FREG_TO_CTX(f[17], f17, UWord);
+   FREG_TO_CTX(f[18], f18, UWord);
+   FREG_TO_CTX(f[19], f19, UWord);
+   FREG_TO_CTX(f[20], f20, UWord);
+   FREG_TO_CTX(f[21], f21, UWord);
+   FREG_TO_CTX(f[22], f22, UWord);
+   FREG_TO_CTX(f[23], f23, UWord);
+   FREG_TO_CTX(f[24], f24, UWord);
+   FREG_TO_CTX(f[25], f25, UWord);
+   FREG_TO_CTX(f[26], f26, UWord);
+   FREG_TO_CTX(f[27], f27, UWord);
+   FREG_TO_CTX(f[28], f28, UWord);
+   FREG_TO_CTX(f[29], f29, UWord);
+   FREG_TO_CTX(f[30], f30, UWord);
+   FREG_TO_CTX(f[31], f31, UWord);
+   FREG_TO_CTX(fcsr, fcsr, UInt);
+#undef FREG_TO_CTX
+}
+
+/* Build the Valgrind-specific part of a signal frame. */
+static void build_vg_sigframe(struct vg_sigframe* frame, Int sigNo)
+{
+   frame->magicPI       = 0x31415927;
+   frame->sigNo_private = sigNo;
+   frame->magicE        = 0x27182818;
+}
+
+static Addr build_rt_sigframe(ThreadState*         tst,
+                              Addr                 sp_top_of_frame,
+                              const vki_siginfo_t* siginfo,
+                              UInt                 flags,
+                              const vki_sigset_t*  mask)
+{
+   SizeT size = sizeof(struct rt_sigframe);
+   Addr  sp   = VG_ROUNDDN(sp_top_of_frame - size, 16);
+
+   if (!ML_(sf_maybe_extend_stack)(tst, sp, size, flags))
+      return sp_top_of_frame;
+
+   /* Tell the tools that the sigframe is to be written. */
+   VG_TRACK(pre_mem_write, Vg_CoreSignal, tst->tid, "signal handler frame", sp,
+            sizeof(struct rt_sigframe));
+
+   struct rt_sigframe* frame = (struct rt_sigframe*)sp;
+
+   /* Fill in the siginfo. */
+   frame->info = *siginfo;
+
+   /* SIGILL defines addr to be the faulting address. */
+   Int sigNo = siginfo->si_signo;
+   if (sigNo == VKI_SIGILL && siginfo->si_code > 0)
+      frame->info._sifields._sigfault._addr = (void*)VG_(get_IP)(tst->tid);
+
+   VG_TRACK(post_mem_write, Vg_CoreSignal, tst->tid, (Addr)&frame->info,
+            sizeof(frame->info));
+
+   /* Fill in the ucontext. */
+   synth_ucontext(tst, siginfo, mask, &frame->uc);
+
+   /* Fill in the Valgrind-specific part. */
+   build_vg_sigframe(&frame->vg, sigNo);
+
+   return sp;
+}
+
+void VG_(sigframe_create)(ThreadId                   tid,
+                          Bool                       on_altstack,
+                          Addr                       rsp_top_of_frame,
+                          const vki_siginfo_t*       siginfo,
+                          const struct vki_ucontext* siguc,
+                          void*                      handler,
+                          UInt                       flags,
+                          const vki_sigset_t*        mask,
+                          void*                      restorer)
+{
+   /* The restorer functionality (SA_RESTORER) is not used on riscv64-linux. */
+   vg_assert(restorer == NULL);
+
+   ThreadState* tst = VG_(get_ThreadState)(tid);
+
+   /* Build the signal frame on the stack. */
+   Addr sp = build_rt_sigframe(tst, rsp_top_of_frame, siginfo, flags, mask);
+   struct rt_sigframe* frame = (struct rt_sigframe*)sp;
+
+   /* Configure guest registers for the signal delivery. */
+   VG_(set_SP)(tid, sp);
+   VG_TRACK(post_reg_write, Vg_CoreSignal, tid, VG_O_STACK_PTR, sizeof(UWord));
+
+   tst->arch.vex.guest_x10 = siginfo->si_signo;
+   VG_TRACK(post_reg_write, Vg_CoreSignal, tst->tid, OFFSET_riscv64_x10,
+            sizeof(UWord));
+   tst->arch.vex.guest_x11 = (Addr)&frame->info;
+   VG_TRACK(post_reg_write, Vg_CoreSignal, tst->tid, OFFSET_riscv64_x11,
+            sizeof(UWord));
+   tst->arch.vex.guest_x12 = (Addr)&frame->uc;
+   VG_TRACK(post_reg_write, Vg_CoreSignal, tst->tid, OFFSET_riscv64_x12,
+            sizeof(UWord));
+
+   tst->arch.vex.guest_x1 = (Addr)&VG_(riscv64_linux_SUBST_FOR_rt_sigreturn);
+   VG_TRACK(post_reg_write, Vg_CoreSignal, tst->tid, OFFSET_riscv64_x1,
+            sizeof(UWord));
+
+   /* Set up the program counter. Note that it is not necessary to inform the
+      tools about this write because pc is always defined. */
+   VG_(set_IP)(tid, (Addr)handler);
+
+   if (VG_(clo_trace_signals))
+      VG_(message)(
+         Vg_DebugMsg,
+         "VG_(sigframe_create) (thread %u): next pc=%#lx, next sp=%#lx\n", tid,
+         (Addr)handler, sp);
+}
+
+/*------------------------------------------------------------*/
+/*--- Destroying a signal frame                            ---*/
+/*------------------------------------------------------------*/
+
+/* Restore the Valgrind-specific part of a signal frame. The returned value
+   indicates whether the frame is valid. If not then nothing is restored and the
+   client is set to take a segfault. */
+static Bool
+restore_vg_sigframe(ThreadState* tst, struct vg_sigframe* frame, Int* sigNo)
+{
+   if (frame->magicPI != 0x31415927 || frame->magicE != 0x27182818) {
+      VG_(message)(
+         Vg_UserMsg,
+         "Thread %u return signal frame corrupted. Killing process.\n",
+         tst->tid);
+      VG_(set_default_handler)(VKI_SIGSEGV);
+      VG_(synth_fault)(tst->tid);
+      *sigNo = VKI_SIGSEGV;
+      return False;
+   }
+   *sigNo = frame->sigNo_private;
+   return True;
+}
+
+static void restore_ucontext(ThreadState* tst, struct vki_ucontext* uc)
+{
+   /* Restore common data. */
+   VG_TRACK(pre_mem_read, Vg_CoreSignal, tst->tid, "signal frame mask",
+            (Addr)&uc->uc_sigmask, sizeof(uc->uc_sigmask));
+   tst->sig_mask     = uc->uc_sigmask;
+   tst->tmp_sig_mask = tst->sig_mask;
+
+   struct vki_sigcontext* sc = &uc->uc_mcontext;
+
+   /* Restore integer registers. */
+#define IREG_FROM_CTX(ureg, vreg)                                              \
+   tst->arch.vex.guest_##vreg = sc->sc_regs.ureg;                              \
+   VG_TRACK(copy_mem_to_reg, Vg_CoreSignal, tst->tid, (Addr)&sc->sc_regs.ureg, \
+            OFFSET_riscv64_##vreg, sizeof(UWord));
+   IREG_FROM_CTX(pc, pc);
+   IREG_FROM_CTX(ra, x1);
+   IREG_FROM_CTX(sp, x2);
+   IREG_FROM_CTX(gp, x3);
+   IREG_FROM_CTX(tp, x4);
+   IREG_FROM_CTX(t0, x5);
+   IREG_FROM_CTX(t1, x6);
+   IREG_FROM_CTX(t2, x7);
+   IREG_FROM_CTX(s0, x8);
+   IREG_FROM_CTX(s1, x9);
+   IREG_FROM_CTX(a0, x10);
+   IREG_FROM_CTX(a1, x11);
+   IREG_FROM_CTX(a2, x12);
+   IREG_FROM_CTX(a3, x13);
+   IREG_FROM_CTX(a4, x14);
+   IREG_FROM_CTX(a5, x15);
+   IREG_FROM_CTX(a6, x16);
+   IREG_FROM_CTX(a7, x17);
+   IREG_FROM_CTX(s2, x18);
+   IREG_FROM_CTX(s3, x19);
+   IREG_FROM_CTX(s4, x20);
+   IREG_FROM_CTX(s5, x21);
+   IREG_FROM_CTX(s6, x22);
+   IREG_FROM_CTX(s7, x23);
+   IREG_FROM_CTX(s8, x24);
+   IREG_FROM_CTX(s9, x25);
+   IREG_FROM_CTX(s10, x26);
+   IREG_FROM_CTX(s11, x27);
+   IREG_FROM_CTX(t3, x28);
+   IREG_FROM_CTX(t4, x29);
+   IREG_FROM_CTX(t5, x30);
+   IREG_FROM_CTX(t6, x31);
+#undef IREG_FROM_CTX
+
+   /* Restore floating point registers. */
+#define FREG_FROM_CTX(ureg, vreg, type)                                        \
+   tst->arch.vex.guest_##vreg = sc->sc_fpregs.d.ureg;                          \
+   VG_TRACK(copy_mem_to_reg, Vg_CoreSignal, tst->tid,                          \
+            (Addr)&sc->sc_fpregs.d.ureg, OFFSET_riscv64_##vreg, sizeof(type));
+   FREG_FROM_CTX(f[0], f0, UWord);
+   FREG_FROM_CTX(f[1], f1, UWord);
+   FREG_FROM_CTX(f[2], f2, UWord);
+   FREG_FROM_CTX(f[3], f3, UWord);
+   FREG_FROM_CTX(f[4], f4, UWord);
+   FREG_FROM_CTX(f[5], f5, UWord);
+   FREG_FROM_CTX(f[6], f6, UWord);
+   FREG_FROM_CTX(f[7], f7, UWord);
+   FREG_FROM_CTX(f[8], f8, UWord);
+   FREG_FROM_CTX(f[9], f9, UWord);
+   FREG_FROM_CTX(f[10], f10, UWord);
+   FREG_FROM_CTX(f[11], f11, UWord);
+   FREG_FROM_CTX(f[12], f12, UWord);
+   FREG_FROM_CTX(f[13], f13, UWord);
+   FREG_FROM_CTX(f[14], f14, UWord);
+   FREG_FROM_CTX(f[15], f15, UWord);
+   FREG_FROM_CTX(f[16], f16, UWord);
+   FREG_FROM_CTX(f[17], f17, UWord);
+   FREG_FROM_CTX(f[18], f18, UWord);
+   FREG_FROM_CTX(f[19], f19, UWord);
+   FREG_FROM_CTX(f[20], f20, UWord);
+   FREG_FROM_CTX(f[21], f21, UWord);
+   FREG_FROM_CTX(f[22], f22, UWord);
+   FREG_FROM_CTX(f[23], f23, UWord);
+   FREG_FROM_CTX(f[24], f24, UWord);
+   FREG_FROM_CTX(f[25], f25, UWord);
+   FREG_FROM_CTX(f[26], f26, UWord);
+   FREG_FROM_CTX(f[27], f27, UWord);
+   FREG_FROM_CTX(f[28], f28, UWord);
+   FREG_FROM_CTX(f[29], f29, UWord);
+   FREG_FROM_CTX(f[30], f30, UWord);
+   FREG_FROM_CTX(f[31], f31, UWord);
+   FREG_FROM_CTX(fcsr, fcsr, UInt);
+#undef FREG_FROM_CTX
+}
+
+static void
+restore_rt_sigframe(ThreadState* tst, struct rt_sigframe* frame, Int* sigNo)
+{
+   if (restore_vg_sigframe(tst, &frame->vg, sigNo))
+      restore_ucontext(tst, &frame->uc);
+}
+
+void VG_(sigframe_destroy)(ThreadId tid, Bool isRT)
+{
+   /* Non-rt sigreturn does not exist on riscv64-linux. */
+   vg_assert(isRT);
+
+   ThreadState* tst = VG_(get_ThreadState)(tid);
+
+   /* Correctly reestablish the frame base address. */
+   Addr sp = VG_(get_SP)(tid);
+
+   /* Restore a state from the signal frame. */
+   Int sigNo;
+   restore_rt_sigframe(tst, (struct rt_sigframe*)sp, &sigNo);
+
+   VG_TRACK(die_mem_stack_signal, sp - VG_STACK_REDZONE_SZB,
+            sizeof(struct rt_sigframe) + VG_STACK_REDZONE_SZB);
+
+   /* Returning from a signal handler. */
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg, "VG_(sigframe_destroy) (thread %u): pc=%#lx\n",
+                   tid, VG_(get_IP)(tid));
+
+   /* Tell the tools. */
+   VG_TRACK(post_deliver_signal, tid, sigNo);
+}
+
+#endif // defined(VGP_riscv64_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 sigframe-riscv64-linux.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c
index 09acb7cb7..6a6030807 100644
--- a/coregrind/m_signals.c
+++ b/coregrind/m_signals.c
@@ -643,6 +643,19 @@ VgHashTable *ht_sigchld_ignore = NULL;
         (srP)->misc.MIPS32.r28 = (uc)->uc_mcontext.sc_regs[28]; \
       }
 
+#elif defined(VGP_riscv64_linux)
+#  define VG_UCONTEXT_INSTR_PTR(uc)       ((uc)->uc_mcontext.sc_regs.pc)
+#  define VG_UCONTEXT_STACK_PTR(uc)       ((uc)->uc_mcontext.sc_regs.sp)
+#  define VG_UCONTEXT_SYSCALL_SYSRES(uc)                               \
+      /* Convert the value in uc_mcontext.sc_regs.a0 into a SysRes. */ \
+      VG_(mk_SysRes_riscv64_linux)( (uc)->uc_mcontext.sc_regs.a0 )
+#  define VG_UCONTEXT_TO_UnwindStartRegs(srP, uc)                \
+      { (srP)->r_pc = (uc)->uc_mcontext.sc_regs.pc;              \
+        (srP)->r_sp = (uc)->uc_mcontext.sc_regs.sp;              \
+        (srP)->misc.RISCV64.r_fp = (uc)->uc_mcontext.sc_regs.s0; \
+        (srP)->misc.RISCV64.r_ra = (uc)->uc_mcontext.sc_regs.ra; \
+      }
+
 #elif defined(VGP_x86_solaris)
 #  define VG_UCONTEXT_INSTR_PTR(uc)       ((Addr)(uc)->uc_mcontext.gregs[VKI_EIP])
 #  define VG_UCONTEXT_STACK_PTR(uc)       ((Addr)(uc)->uc_mcontext.gregs[VKI_UESP])
@@ -670,6 +683,7 @@ VgHashTable *ht_sigchld_ignore = NULL;
         (srP)->r_sp = (uc)->uc_mcontext.gregs[VKI_REG_RSP];                  \
         (srP)->misc.AMD64.r_rbp = (uc)->uc_mcontext.gregs[VKI_REG_RBP];      \
       }
+
 #else
 #  error Unknown platform
 #endif
@@ -914,8 +928,10 @@ void calculate_SKSS_from_SCSS ( SKSS* dst )
       if (skss_handler != VKI_SIG_IGN && skss_handler != VKI_SIG_DFL)
          skss_flags |= VKI_SA_SIGINFO;
 
+#     if !defined(VGP_riscv64_linux)
       /* use our own restorer */
       skss_flags |= VKI_SA_RESTORER;
+#     endif
 
       /* Create SKSS entry for this signal. */
       if (sig != VKI_SIGKILL && sig != VKI_SIGSTOP)
@@ -1067,6 +1083,16 @@ extern void my_sigreturn(void);
    "   li $t4, " #name "\n" \
    "   syscall[32]\n" \
    ".previous\n"
+
+#elif defined(VGP_riscv64_linux)
+/* Not used on riscv64. */
+#  define _MY_SIGRETURN(name) \
+   ".text\n" \
+   ".globl my_sigreturn\n" \
+   "my_sigreturn:\n" \
+   "   unimp\n" \
+   ".previous\n"
+
 #elif defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
 /* Not used on Solaris. */
 #  define _MY_SIGRETURN(name) \
@@ -1132,9 +1158,10 @@ static void handle_SCSS_change ( Bool force_update )
 
       ksa.ksa_handler = skss.skss_per_sig[sig].skss_handler;
       ksa.sa_flags    = skss.skss_per_sig[sig].skss_flags;
-#     if !defined(VGP_ppc32_linux) && \
+#     if !defined(VGP_ppc32_linux) && !defined(VGP_mips32_linux) && \
+         !defined(VGP_riscv64_linux) && \
          !defined(VGP_x86_darwin) && !defined(VGP_amd64_darwin) && \
-         !defined(VGP_mips32_linux) && !defined(VGO_solaris) && !defined(VGO_freebsd)
+         !defined(VGO_solaris) && !defined(VGO_freebsd)
       ksa.sa_restorer = my_sigreturn;
 #     endif
       /* Re above ifdef (also the assertion below), PaulM says:
@@ -1178,11 +1205,11 @@ static void handle_SCSS_change ( Bool force_update )
 #        endif
          vg_assert(ksa_old.sa_flags 
                    == skss_old.skss_per_sig[sig].skss_flags);
-#        if !defined(VGP_ppc32_linux) && \
+#        if !defined(VGP_ppc32_linux) && !defined(VGP_mips32_linux) && \
+            !defined(VGP_mips64_linux) && !defined(VGP_nanomips_linux) && \
+            !defined(VGP_riscv64_linux) && \
             !defined(VGP_x86_darwin) && !defined(VGP_amd64_darwin) && \
-            !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux) && \
-            !defined(VGP_nanomips_linux) && !defined(VGO_solaris) && \
-            !defined(VGO_freebsd)
+            !defined(VGO_solaris) && !defined(VGO_freebsd)
          vg_assert(ksa_old.sa_restorer == my_sigreturn);
 #        endif
          VG_(sigaddset)( &ksa_old.sa_mask, VKI_SIGKILL );
@@ -1302,8 +1329,8 @@ SysRes VG_(do_sys_sigaction) ( Int signo,
       old_act->ksa_handler = scss.scss_per_sig[signo].scss_handler;
       old_act->sa_flags    = scss.scss_per_sig[signo].scss_flags;
       old_act->sa_mask     = scss.scss_per_sig[signo].scss_mask;
-#     if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
-         !defined(VGO_solaris)
+#     if !defined(VGP_riscv64_linux) && !defined(VGO_darwin) && \
+         !defined(VGO_freebsd) && !defined(VGO_solaris)
       old_act->sa_restorer = scss.scss_per_sig[signo].scss_restorer;
 #     endif
    }
@@ -1315,8 +1342,8 @@ SysRes VG_(do_sys_sigaction) ( Int signo,
       scss.scss_per_sig[signo].scss_mask     = new_act->sa_mask;
 
       scss.scss_per_sig[signo].scss_restorer = NULL;
-#     if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
-         !defined(VGO_solaris)
+#     if !defined(VGP_riscv64_linux) && !defined(VGO_darwin) && \
+         !defined(VGO_freebsd) && !defined(VGO_solaris)
       scss.scss_per_sig[signo].scss_restorer = new_act->sa_restorer;
 #     endif
 
@@ -1675,8 +1702,8 @@ void VG_(kill_self)(Int sigNo)
 
    sa.ksa_handler = VKI_SIG_DFL;
    sa.sa_flags = 0;
-#  if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
-      !defined(VGO_solaris)
+#  if !defined(VGP_riscv64_linux) && !defined(VGO_darwin) && \
+      !defined(VGO_freebsd) && !defined(VGO_solaris)
    sa.sa_restorer = 0;
 #  endif
    VG_(sigemptyset)(&sa.sa_mask);
@@ -3067,8 +3094,8 @@ void pp_ksigaction ( vki_sigaction_toK_t* sa )
    VG_(printf)("pp_ksigaction: handler %p, flags 0x%x, restorer %p\n", 
                sa->ksa_handler, 
                (UInt)sa->sa_flags, 
-#              if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
-                  !defined(VGO_solaris)
+#              if !defined(VGP_riscv64_linux) && !defined(VGO_darwin) && \
+                  !defined(VGO_freebsd) && !defined(VGO_solaris)
                   sa->sa_restorer
 #              else
                   (void*)0
@@ -3090,8 +3117,8 @@ void VG_(set_default_handler)(Int signo)
 
    sa.ksa_handler = VKI_SIG_DFL;
    sa.sa_flags = 0;
-#  if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
-      !defined(VGO_solaris)
+#  if !defined(VGP_riscv64_linux) && !defined(VGO_darwin) && \
+      !defined(VGO_freebsd) && !defined(VGO_solaris)
    sa.sa_restorer = 0;
 #  endif
    VG_(sigemptyset)(&sa.sa_mask);
@@ -3212,8 +3239,8 @@ void VG_(sigstartup_actions) ( void )
 
 	 tsa.ksa_handler = (void *)sync_signalhandler;
 	 tsa.sa_flags = VKI_SA_SIGINFO;
-#        if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
-            !defined(VGO_solaris)
+#        if !defined(VGP_riscv64_linux) && !defined(VGO_darwin) && \
+            !defined(VGO_freebsd) && !defined(VGO_solaris)
 	 tsa.sa_restorer = 0;
 #        endif
 	 VG_(sigfillset)(&tsa.sa_mask);
@@ -3240,8 +3267,8 @@ void VG_(sigstartup_actions) ( void )
       scss.scss_per_sig[i].scss_mask     = sa.sa_mask;
 
       scss.scss_per_sig[i].scss_restorer = NULL;
-#     if !defined(VGO_darwin) && !defined(VGO_freebsd) && \
-         !defined(VGO_solaris)
+#     if !defined(VGP_riscv64_linux) && !defined(VGO_darwin) && \
+         !defined(VGO_freebsd) && !defined(VGO_solaris)
       scss.scss_per_sig[i].scss_restorer = sa.sa_restorer;
 #     endif
 
diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c
index 0889a6517..ce153e97a 100644
--- a/coregrind/m_stacktrace.c
+++ b/coregrind/m_stacktrace.c
@@ -1511,6 +1511,101 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
 
 #endif
 
+/* ------------------------ riscv64 ------------------------- */
+
+#if defined(VGP_riscv64_linux)
+
+UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
+                               /*OUT*/Addr* ips, UInt max_n_ips,
+                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
+                               const UnwindStartRegs* startRegs,
+                               Addr fp_max_orig )
+{
+   Bool  debug = False;
+   Int   i;
+   Addr  fp_max;
+   UInt  n_found = 0;
+   const Int cmrf = VG_(clo_merge_recursive_frames);
+
+   vg_assert(sizeof(Addr) == sizeof(UWord));
+   vg_assert(sizeof(Addr) == sizeof(void*));
+
+   D3UnwindRegs uregs;
+   uregs.pc = startRegs->r_pc;
+   uregs.sp = startRegs->r_sp;
+   uregs.fp = startRegs->misc.RISCV64.r_fp;
+   uregs.ra = startRegs->misc.RISCV64.r_ra;
+   Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
+
+   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
+      stopping when the trail goes cold, which we guess to be
+      when FP is not a reasonable stack location. */
+
+   fp_max = fp_max_orig;
+   if (fp_max >= sizeof(Addr))
+      fp_max -= sizeof(Addr);
+
+   if (debug)
+      VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
+                  "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx ra=0x%lx\n",
+                  max_n_ips, fp_min, fp_max_orig, fp_max,
+                  uregs.pc, uregs.sp, uregs.fp, uregs.ra);
+
+   if (sps) sps[0] = uregs.sp;
+   if (fps) fps[0] = uregs.fp;
+   ips[0] = uregs.pc;
+   i = 1;
+
+   /* Loop unwinding the stack, using CFI. */
+   while (True) {
+      if (debug)
+         VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n",
+                     i, uregs.pc, uregs.sp, uregs.fp, uregs.ra);
+      if (i >= max_n_ips)
+         break;
+
+      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
+         if (sps) sps[i] = uregs.sp;
+         if (fps) fps[i] = uregs.fp;
+         ips[i++] = uregs.pc - 1;
+         if (debug)
+            VG_(printf)(
+               "USING CFI: pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n",
+               uregs.pc, uregs.sp, uregs.fp, uregs.ra);
+         uregs.pc = uregs.pc - 1;
+         RECURSIVE_MERGE(cmrf,ips,i);
+         continue;
+      }
+
+      /* A problem on the first frame? Lets assume it was a bad jump.
+         We will use the link register and the current stack and frame
+         pointers and see if we can use the CFI in the next round. */
+      if (i == 1) {
+         uregs.pc = uregs.ra;
+         uregs.ra = 0;
+
+         if (sps) sps[i] = uregs.sp;
+         if (fps) fps[i] = uregs.fp;
+         ips[i++] = uregs.pc - 1;
+         if (debug)
+            VG_(printf)(
+               "USING bad-jump: pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n",
+               uregs.pc, uregs.sp, uregs.fp, uregs.ra);
+         uregs.pc = uregs.pc - 1;
+         RECURSIVE_MERGE(cmrf,ips,i);
+         continue;
+      }
+
+      /* No luck.  We have to give up. */
+      break;
+   }
+
+   n_found = i;
+   return n_found;
+}
+
+#endif
+
 /*------------------------------------------------------------*/
 /*---                                                      ---*/
 /*--- END platform-dependent unwinder worker functions     ---*/
diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c
index 6ab20694d..8ff9c90bd 100644
--- a/coregrind/m_syscall.c
+++ b/coregrind/m_syscall.c
@@ -204,6 +204,17 @@ SysRes VG_(mk_SysRes_arm64_linux) ( Long val ) {
    return res;
 }
 
+SysRes VG_(mk_SysRes_riscv64_linux) ( Long val ) {
+   SysRes res;
+   res._isError = val >= -4095 && val <= -1;
+   if (res._isError) {
+      res._val = (ULong)(-val);
+   } else {
+      res._val = (ULong)val;
+   }
+   return res;
+}
+
 /* Generic constructors. */
 SysRes VG_(mk_SysRes_Success) ( UWord res ) {
    SysRes r;
@@ -1076,6 +1087,30 @@ asm (
    ".previous                              \n\t"
 );
 
+#elif defined(VGP_riscv64_linux)
+/* Calling convention is: args in a0-a5, sysno in a7, return value in a0.
+   Return value follows the usual convention that -4095 .. -1 (both inclusive)
+   is an error value. All other values are success values.
+
+   Registers a0 to a5 remain unchanged, but syscall_no is in a6 and needs to be
+   moved to a7.
+*/
+extern UWord do_syscall_WRK (
+          UWord a1, UWord a2, UWord a3,
+          UWord a4, UWord a5, UWord a6,
+          UWord syscall_no
+       );
+asm(
+".text\n"
+".globl do_syscall_WRK\n"
+"do_syscall_WRK:\n"
+"        mv a7, a6\n"
+"        li a6, 0\n"
+"        ecall\n"
+"        ret\n"
+".previous\n"
+);
+
 #elif defined(VGP_x86_solaris)
 
 extern ULong
@@ -1324,6 +1359,10 @@ SysRes VG_(do_syscall) ( UWord sysno, RegWord a1, RegWord a2, RegWord a3,
    do_syscall_WRK(a1, a2, a3, a4, a5, a6, sysno, &reg_a0);
    return VG_(mk_SysRes_nanomips_linux)(reg_a0);
 
+#  elif defined(VGP_riscv64_linux)
+   UWord val = do_syscall_WRK(a1, a2, a3, a4, a5, a6, sysno);
+   return VG_(mk_SysRes_riscv64_linux)(val);
+
 #  elif defined(VGP_x86_solaris)
    UInt val, val2, err = False;
    Bool restart;
diff --git a/coregrind/m_syswrap/priv_syswrap-linux.h b/coregrind/m_syswrap/priv_syswrap-linux.h
index d50cdcc98..1e166f704 100644
--- a/coregrind/m_syswrap/priv_syswrap-linux.h
+++ b/coregrind/m_syswrap/priv_syswrap-linux.h
@@ -515,6 +515,13 @@ extern UInt do_syscall_clone_nanomips_linux ( Word (*fn) (void *),  /* a0 - 4 */
                                               Int*  child_tid,      /* a4 - 8 */
                                               Int*  parent_tid,     /* a5 - 9 */
                                               void* tls_ptr);       /* a6 - 10 */
+extern UInt do_syscall_clone_riscv64_linux ( Word (*fn) (void *),
+                                             void* stack,
+                                             Int   flags,
+                                             void* arg,
+                                             Int*  child_tid,
+                                             Int*  parent_tid,
+                                             void* tls_ptr);
 #endif   // __PRIV_SYSWRAP_LINUX_H
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/m_syswrap/priv_types_n_macros.h b/coregrind/m_syswrap/priv_types_n_macros.h
index 6be22f8e7..cc00592ef 100644
--- a/coregrind/m_syswrap/priv_types_n_macros.h
+++ b/coregrind/m_syswrap/priv_types_n_macros.h
@@ -94,7 +94,7 @@ typedef
          || defined(VGP_ppc32_linux) \
          || defined(VGP_arm_linux) || defined(VGP_s390x_linux) \
          || defined(VGP_arm64_linux) \
-         || defined(VGP_nanomips_linux)
+         || defined(VGP_nanomips_linux) || defined(VGP_riscv64_linux)
       Int o_arg1;
       Int o_arg2;
       Int o_arg3;
diff --git a/coregrind/m_syswrap/syscall-riscv64-linux.S b/coregrind/m_syswrap/syscall-riscv64-linux.S
new file mode 100644
index 000000000..cf976cee1
--- /dev/null
+++ b/coregrind/m_syswrap/syscall-riscv64-linux.S
@@ -0,0 +1,198 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Support for doing system calls.      syscall-riscv64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+  This file is part of Valgrind, a dynamic binary instrumentation
+  framework.
+
+  Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_core_basics_asm.h"
+
+#if defined(VGP_riscv64_linux)
+
+#include "pub_core_vkiscnums_asm.h"
+#include "libvex_guest_offsets.h"
+
+
+/*----------------------------------------------------------------*/
+/*
+        Perform a syscall for the client.  This will run a syscall
+        with the client's specific per-thread signal mask.
+
+        The structure of this function is such that, if the syscall is
+        interrupted by a signal, we can determine exactly what
+        execution state we were in with respect to the execution of
+        the syscall by examining the value of pc in the signal
+        handler.  This means that we can always do the appropriate
+        thing to precisely emulate the kernel's signal/syscall
+        interactions.
+
+        The syscall number is taken from the argument, even though it
+        should also be in guest_state->guest_x17.  The syscall result
+        is written back to guest_state->guest_x10 on completion.
+
+        Returns 0 if the syscall was successfully called (even if the
+        syscall itself failed), or a nonzero error code in the lowest
+        8 bits if one of the sigprocmasks failed (there's no way to
+        determine which one failed).  And there's no obvious way to
+        recover from that either, but nevertheless we want to know.
+
+        VG_(fixup_guest_state_after_syscall_interrupted) does the
+        thread state fixup in the case where we were interrupted by a
+        signal.
+
+        Prototype:
+
+   UWord ML_(do_syscall_for_client_WRK)(
+              Int syscallno,                 // a0
+              void* guest_state,             // a1
+              const vki_sigset_t *sysmask,   // a2
+              const vki_sigset_t *postmask,  // a3
+              Int nsigwords)                 // a4
+*/
+/* from vki-riscv64-linux.h */
+#define VKI_SIG_SETMASK 2
+
+.globl ML_(do_syscall_for_client_WRK)
+ML_(do_syscall_for_client_WRK):
+
+   /* Stash callee-saves and our args on the stack */
+   addi sp, sp, -144
+   sd ra, 136(sp)
+   sd s0, 128(sp)
+   sd s1, 120(sp)
+   sd s2, 112(sp)
+   sd s3, 104(sp)
+   sd s4, 96(sp)
+   sd s5, 88(sp)
+   sd s6, 80(sp)
+   sd s7, 72(sp)
+   sd s8, 64(sp)
+   sd s9, 56(sp)
+   sd s10, 48(sp)
+   sd s11, 40(sp)
+   sd a0, 32(sp)
+   sd a1, 24(sp)
+   sd a2, 16(sp)
+   sd a3, 8(sp)
+   sd a4, 0(sp)
+
+1:
+
+   li a7, __NR_rt_sigprocmask
+   li a0, VKI_SIG_SETMASK
+   mv a1, a2 /* sysmask */
+   mv a2, a3 /* postmask */
+   mv a3, a4 /* nsigwords */
+   ecall
+
+
+   ld a5, 24(sp) /* saved a1 == guest_state */
+
+   ld a7, 32(sp) /* saved a0 == syscall# */
+   ld a0, OFFSET_riscv64_x10(a5)
+   ld a1, OFFSET_riscv64_x11(a5)
+   ld a2, OFFSET_riscv64_x12(a5)
+   ld a3, OFFSET_riscv64_x13(a5)
+   ld a4, OFFSET_riscv64_x14(a5)
+   ld a5, OFFSET_riscv64_x15(a5)
+
+2: ecall
+3:
+   ld a5, 24(sp) /* saved a1 == guest_state */
+   sd a0, OFFSET_riscv64_x10(a5)
+
+4:
+   li a7, __NR_rt_sigprocmask
+   li a0, VKI_SIG_SETMASK
+   ld a1, 8(sp) /* saved a3 == postmask */
+   li a2, 0
+   ld a3, 0(sp) /* saved a4 == nsigwords */
+   ecall
+
+   bltz x0, 7f
+
+5: /* Success: return zero */
+   li a0, 0
+   ld ra, 136(sp)
+   ld s0, 128(sp)
+   ld s1, 120(sp)
+   ld s2, 112(sp)
+   ld s3, 104(sp)
+   ld s4, 96(sp)
+   ld s5, 88(sp)
+   ld s6, 80(sp)
+   ld s7, 72(sp)
+   ld s8, 64(sp)
+   ld s9, 56(sp)
+   ld s10, 48(sp)
+   ld s11, 40(sp)
+   addi sp, sp, 144
+   ret
+
+7: /* Failure: return 0x8000 | error code */
+   li a1, 0x8000
+   or a0, a0, a1
+   ld ra, 136(sp)
+   ld s0, 128(sp)
+   ld s1, 120(sp)
+   ld s2, 112(sp)
+   ld s3, 104(sp)
+   ld s4, 96(sp)
+   ld s5, 88(sp)
+   ld s6, 80(sp)
+   ld s7, 72(sp)
+   ld s8, 64(sp)
+   ld s9, 56(sp)
+   ld s10, 48(sp)
+   ld s11, 40(sp)
+   addi sp, sp, 144
+   ret
+
+
+
+.section .rodata
+/* export the ranges so that
+   VG_(fixup_guest_state_after_syscall_interrupted) can do the
+   right thing */
+
+.align 3
+.globl ML_(blksys_setup)
+.globl ML_(blksys_restart)
+.globl ML_(blksys_complete)
+.globl ML_(blksys_committed)
+.globl ML_(blksys_finished)
+ML_(blksys_setup):      .quad 1b
+ML_(blksys_restart):    .quad 2b
+ML_(blksys_complete):   .quad 3b
+ML_(blksys_committed):  .quad 4b
+ML_(blksys_finished):   .quad 5b
+
+#endif // defined(VGP_riscv64_linux)
+
+/* Let the linker know we don't need an executable stack */
+MARK_STACK_NO_EXEC
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_syswrap/syswrap-generic.c b/coregrind/m_syswrap/syswrap-generic.c
index 0c0f67602..f7985cf1b 100644
--- a/coregrind/m_syswrap/syswrap-generic.c
+++ b/coregrind/m_syswrap/syswrap-generic.c
@@ -3642,7 +3642,7 @@ POST(sys_newfstat)
 #endif
 
 #if !defined(VGO_solaris) && !defined(VGP_arm64_linux) && \
-    !defined(VGP_nanomips_linux)
+    !defined(VGP_nanomips_linux) && !defined(VGP_riscv64_linux)
 static vki_sigset_t fork_saved_mask;
 
 // In Linux, the sys_fork() function varies across architectures, but we
@@ -3693,7 +3693,7 @@ PRE(sys_fork)
       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
    }
 }
-#endif // !defined(VGO_solaris) && !defined(VGP_arm64_linux)
+#endif
 
 PRE(sys_ftruncate)
 {
diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c
index 45413fdd9..773452b69 100644
--- a/coregrind/m_syswrap/syswrap-linux.c
+++ b/coregrind/m_syswrap/syswrap-linux.c
@@ -310,6 +310,16 @@ static void run_a_thread_NORETURN ( Word tidW )
          : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode)
          : "memory" , "$t4", "$a0"
       );
+#elif defined(VGP_riscv64_linux)
+      asm volatile (
+         "sw   %1, %0\n"      /* set tst->status = VgTs_Empty */
+         "li   a7, %2\n"      /* set a7 = __NR_exit */
+         "ld   a0, %3\n"      /* set a0 = tst->os_state.exitcode */
+         "ecall\n"            /* exit(tst->os_state.exitcode) */
+         : "=m" (tst->status)
+         : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode)
+         : "a7", "a0"
+      );
 #else
 # error Unknown platform
 #endif
@@ -535,6 +545,13 @@ static SysRes clone_new_thread ( Word (*fn)(void *),
       (ML_(start_thread_NORETURN), stack, flags, ctst,
        child_tidptr, parent_tidptr, NULL);
    res = VG_ (mk_SysRes_nanomips_linux) (ret);
+#elif defined(VGP_riscv64_linux)
+   ULong a0;
+   ctst->arch.vex.guest_x10 = 0;
+   a0 = do_syscall_clone_riscv64_linux
+      (ML_(start_thread_NORETURN), stack, flags, ctst,
+       child_tidptr, parent_tidptr, NULL);
+   res = VG_(mk_SysRes_riscv64_linux)( a0 );
 #else
 # error Unknown platform
 #endif
@@ -597,6 +614,8 @@ static SysRes setup_child_tls (ThreadId ctid, Addr tlsaddr)
 #elif defined(VGP_mips32_linux) || defined(VGP_nanomips_linux)
    ctst->arch.vex.guest_ULR = tlsaddr;
    ctst->arch.vex.guest_r27 = tlsaddr;
+#elif defined(VGP_riscv64_linux)
+   ctst->arch.vex.guest_x4 = tlsaddr;
 #else
 # error Unknown platform
 #endif
@@ -755,7 +774,7 @@ static SysRes ML_(do_fork_clone) ( ThreadId tid, UInt flags,
     || defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)	\
     || defined(VGP_arm_linux) || defined(VGP_mips32_linux) \
     || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
-    || defined(VGP_nanomips_linux)
+    || defined(VGP_nanomips_linux) || defined(VGP_riscv64_linux)
    res = VG_(do_syscall5)( __NR_clone, flags, 
                            (UWord)NULL, (UWord)parent_tidptr, 
                            (UWord)NULL, (UWord)child_tidptr );
@@ -823,7 +842,7 @@ PRE(sys_clone)
     || defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)	\
     || defined(VGP_arm_linux) || defined(VGP_mips32_linux) \
     || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
-    || defined(VGP_nanomips_linux)
+    || defined(VGP_nanomips_linux) || defined(VGP_riscv64_linux)
 #define ARG_CHILD_TIDPTR ARG5
 #define PRA_CHILD_TIDPTR PRA5
 #define ARG_TLS          ARG4
@@ -4360,9 +4379,13 @@ PRE(sys_sigaction)
       PRE_MEM_READ( "sigaction(act->sa_handler)", (Addr)&sa->ksa_handler, sizeof(sa->ksa_handler));
       PRE_MEM_READ( "sigaction(act->sa_mask)", (Addr)&sa->sa_mask, sizeof(sa->sa_mask));
       PRE_MEM_READ( "sigaction(act->sa_flags)", (Addr)&sa->sa_flags, sizeof(sa->sa_flags));
+#     if !defined(VGP_riscv64_linux)
+      /* Check the sa_restorer field. More recent Linux platforms completely
+         drop this member. */
       if (ML_(safe_to_deref)(sa,sizeof(struct vki_old_sigaction))
           && (sa->sa_flags & VKI_SA_RESTORER))
          PRE_MEM_READ( "sigaction(act->sa_restorer)", (Addr)&sa->sa_restorer, sizeof(sa->sa_restorer));
+#     endif
    }
 
    if (ARG3 != 0) {
@@ -4478,9 +4501,11 @@ PRE(sys_rt_sigaction)
       PRE_MEM_READ( "rt_sigaction(act->sa_handler)", (Addr)&sa->ksa_handler, sizeof(sa->ksa_handler));
       PRE_MEM_READ( "rt_sigaction(act->sa_mask)", (Addr)&sa->sa_mask, sizeof(sa->sa_mask));
       PRE_MEM_READ( "rt_sigaction(act->sa_flags)", (Addr)&sa->sa_flags, sizeof(sa->sa_flags));
+#     if !defined(VGP_riscv64_linux)
       if (ML_(safe_to_deref)(sa,sizeof(vki_sigaction_toK_t))
           && (sa->sa_flags & VKI_SA_RESTORER))
          PRE_MEM_READ( "rt_sigaction(act->sa_restorer)", (Addr)&sa->sa_restorer, sizeof(sa->sa_restorer));
+#     endif
    }
    if (ARG3 != 0)
       PRE_MEM_WRITE( "rt_sigaction(oldact)", ARG3, sizeof(vki_sigaction_fromK_t));
@@ -6844,7 +6869,8 @@ POST(sys_lookup_dcookie)
 #endif
 
 #if defined(VGP_amd64_linux) || defined(VGP_s390x_linux)        \
-      || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux)
+      || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \
+      || defined(VGP_riscv64_linux)
 PRE(sys_lookup_dcookie)
 {
    *flags |= SfMayBlock;
diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c
index be4ac0f08..1a7f038d4 100644
--- a/coregrind/m_syswrap/syswrap-main.c
+++ b/coregrind/m_syswrap/syswrap-main.c
@@ -868,6 +868,18 @@ void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
    canonical->arg7  = 0;
    canonical->arg8  = 0;
 
+#elif defined(VGP_riscv64_linux)
+   VexGuestRISCV64State* gst = (VexGuestRISCV64State*)gst_vanilla;
+   canonical->sysno = gst->guest_x17; /* a7 */
+   canonical->arg1  = gst->guest_x10; /* a0 */
+   canonical->arg2  = gst->guest_x11; /* a1 */
+   canonical->arg3  = gst->guest_x12; /* a2 */
+   canonical->arg4  = gst->guest_x13; /* a3 */
+   canonical->arg5  = gst->guest_x14; /* a4 */
+   canonical->arg6  = gst->guest_x15; /* a5 */
+   canonical->arg7  = 0;
+   canonical->arg8  = 0;
+
 #elif defined(VGP_x86_solaris)
    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    UWord *stack = (UWord *)gst->guest_ESP;
@@ -1192,6 +1204,16 @@ void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
    gst->guest_r10 = canonical->arg7;
    gst->guest_r11 = canonical->arg8;
 
+#elif defined(VGP_riscv64_linux)
+   VexGuestRISCV64State* gst = (VexGuestRISCV64State*)gst_vanilla;
+   gst->guest_x17 = canonical->sysno; /* a7 */
+   gst->guest_x10 = canonical->arg1;  /* a0 */
+   gst->guest_x11 = canonical->arg2;  /* a1 */
+   gst->guest_x12 = canonical->arg3;  /* a2 */
+   gst->guest_x13 = canonical->arg4;  /* a3 */
+   gst->guest_x14 = canonical->arg5;  /* a4 */
+   gst->guest_x15 = canonical->arg6;  /* a5 */
+
 #elif defined(VGP_x86_solaris)
    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    UWord *stack = (UWord *)gst->guest_ESP;
@@ -1391,6 +1413,11 @@ void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
    canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
    canonical->what = SsComplete;
 
+#  elif defined(VGP_riscv64_linux)
+   VexGuestRISCV64State* gst = (VexGuestRISCV64State*)gst_vanilla;
+   canonical->sres = VG_(mk_SysRes_riscv64_linux)( gst->guest_x10 );
+   canonical->what = SsComplete;
+
 #  elif defined(VGP_x86_solaris)
    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
@@ -1702,6 +1729,20 @@ void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
              OFFSET_mips32_r4, sizeof(UWord) );
 
+#  elif defined(VGP_riscv64_linux)
+   VexGuestRISCV64State* gst = (VexGuestRISCV64State*)gst_vanilla;
+   vg_assert(canonical->what == SsComplete);
+   if (sr_isError(canonical->sres)) {
+      /* This isn't exactly right, in that really a Failure with res
+         not in the range 1 .. 4095 is unrepresentable in the
+         Linux-riscv64 scheme.  Oh well. */
+      gst->guest_x10 = - (Long)sr_Err(canonical->sres);
+   } else {
+      gst->guest_x10 = sr_Res(canonical->sres);
+   }
+   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+             OFFSET_riscv64_x10, sizeof(UWord) );
+
 #  elif defined(VGP_x86_solaris)
    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    SysRes sres = canonical->sres;
@@ -1939,6 +1980,17 @@ void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
    layout->uu_arg7  = -1; /* impossible value */
    layout->uu_arg8  = -1; /* impossible value */
 
+#elif defined(VGP_riscv64_linux)
+   layout->o_sysno  = OFFSET_riscv64_x17; /* a7 */
+   layout->o_arg1   = OFFSET_riscv64_x10; /* a0 */
+   layout->o_arg2   = OFFSET_riscv64_x11; /* a1 */
+   layout->o_arg3   = OFFSET_riscv64_x12; /* a2 */
+   layout->o_arg4   = OFFSET_riscv64_x13; /* a3 */
+   layout->o_arg5   = OFFSET_riscv64_x14; /* a4 */
+   layout->o_arg6   = OFFSET_riscv64_x15; /* a5 */
+   layout->uu_arg7  = -1; /* impossible value */
+   layout->uu_arg8  = -1; /* impossible value */
+
 #elif defined(VGP_x86_solaris)
    layout->o_sysno  = OFFSET_x86_EAX;
    /* Syscall parameters are on the stack. */
@@ -3038,6 +3090,28 @@ void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
          arch->vex.guest_PC -= 2;
       }
    }
+
+#elif defined(VGP_riscv64_linux)
+   arch->vex.guest_pc -= 4;             // sizeof(ecall)
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      ecall == 73 00 00 00
+   */
+   {
+      UChar *p = (UChar *)arch->vex.guest_pc;
+
+      if (p[0] != 0x73 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00)
+         VG_(message)(
+            Vg_DebugMsg,
+            "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
+            arch->vex.guest_pc, p[0], p[1], p[2], p[3]
+          );
+
+      vg_assert(p[0] == 0x73 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00);
+   }
+
 #elif defined(VGP_x86_solaris)
    arch->vex.guest_EIP -= 2;   // sizeof(int $0x91) or sizeof(syscall)
 
diff --git a/coregrind/m_syswrap/syswrap-riscv64-linux.c b/coregrind/m_syswrap/syswrap-riscv64-linux.c
new file mode 100644
index 000000000..b59ef9020
--- /dev/null
+++ b/coregrind/m_syswrap/syswrap-riscv64-linux.c
@@ -0,0 +1,613 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Platform-specific syscalls stuff.  syswrap-riscv64-linux.c -----*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_riscv64_linux)
+
+#include "pub_core_basics.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_options.h"
+#include "pub_core_sigframe.h"
+#include "pub_core_stacktrace.h"
+#include "pub_core_syscall.h"
+#include "pub_core_syswrap.h"
+#include "pub_core_threadstate.h"
+#include "pub_core_tooliface.h"
+#include "pub_core_transtab.h"
+#include "pub_core_vki.h"
+#include "pub_core_vkiscnums.h"
+
+#include "priv_syswrap-generic.h"
+#include "priv_syswrap-linux.h"
+#include "priv_types_n_macros.h"
+
+/* ---------------------------------------------------------------------
+   clone() handling
+   ------------------------------------------------------------------ */
+
+/* Call f(arg1), but first switch stacks, using 'stack' as the new stack, and
+   use 'retaddr' as f's return-to address. Also, clear all the integer registers
+   before entering f.*/
+__attribute__((noreturn)) void ML_(call_on_new_stack_0_1)(Addr stack,
+                                                          Addr retaddr,
+                                                          void (*f)(Word),
+                                                          Word arg1);
+/* a0 = stack
+   a1 = retaddr
+   a2 = f
+   a3 = arg1 */
+asm(".text\n"
+    ".globl vgModuleLocal_call_on_new_stack_0_1\n"
+    "vgModuleLocal_call_on_new_stack_0_1:\n"
+    "mv sp, a0\n" /* Set the stack pointer. */
+    "mv ra, a1\n" /* Set the return address. */
+    "mv a0, a3\n" /* Set the first argument. */
+    "li t0, 0\n"  /* Clear our GPRs. */
+    "li t1, 0\n"
+    "li t2, 0\n"
+    "li s0, 0\n"
+    "li s1, 0\n"
+    /* Don't zero out a0, already set to the first argument. */
+    "li a1, 0\n"
+    /* Don't zero out a2, holds the target function f(). */
+    "li a3, 0\n"
+    "li a4, 0\n"
+    "li a5, 0\n"
+    "li a6, 0\n"
+    "li a7, 0\n"
+    "li s2, 0\n"
+    "li s3, 0\n"
+    "li s4, 0\n"
+    "li s5, 0\n"
+    "li s6, 0\n"
+    "li s7, 0\n"
+    "li s8, 0\n"
+    "li s9, 0\n"
+    "li s10, 0\n"
+    "li s11, 0\n"
+    "li t3, 0\n"
+    "li t4, 0\n"
+    "li t5, 0\n"
+    "li t6, 0\n"
+    "jr a2\n"
+    ".previous\n");
+
+/* Perform a clone system call. Clone is strange because it has fork()-like
+   return-twice semantics, so it needs special handling here.
+
+   Upon entry, we have:
+
+      Word (*fn)(void*)   in a0
+      void*  child_stack  in a1
+      int    flags        in a2
+      void*  arg          in a3
+      pid_t* child_tid    in a4
+      pid_t* parent_tid   in a5
+      void*  tls_ptr      in a6
+
+   System call requires:
+
+      int    $__NR_clone  in a7
+      int    flags        in a0
+      void*  child_stack  in a1
+      pid_t* parent_tid   in a2
+      void*  tls_ptr      in a3
+      pid_t* child_tid    in a4
+
+   Returns a Long encoded in the linux-riscv64 way, not a SysRes.
+*/
+#define __NR_CLONE VG_STRINGIFY(__NR_clone)
+#define __NR_EXIT  VG_STRINGIFY(__NR_exit)
+
+/* See priv_syswrap-linux.h for arg profile. */
+asm(".text\n"
+    ".globl do_syscall_clone_riscv64_linux\n"
+    "do_syscall_clone_riscv64_linux:\n"
+    /* Set up the child stack, temporarily preserving fn and arg. */
+    "addi a1, a1, -16\n" /* Make space on the stack. */
+    "sd a3, 8(a1)\n"     /* Save arg. */
+    "sd a0, 0(a1)\n"     /* Save fn. */
+
+    /* Setup the syscall. */
+    "li a7, " __NR_CLONE "\n" /* Load the syscall number. */
+    "mv a0, a2\n"             /* Load syscall arg1: flags. */
+    "mv a1, a1\n"             /* Load syscall arg2: child_stack. */
+    "mv a2, a5\n"             /* Load syscall arg3: parent_tid. */
+    "mv a3, a6\n"             /* Load syscall arg4: tls_ptr. */
+    "mv a4, a4\n"             /* Load syscall arg5: child_tid. */
+
+    "ecall\n" /* clone() */
+
+    "bnez a0, 1f\n" /* Child if retval == 0. */
+
+    /* CHILD - call the thread function. */
+    "ld a1, 0(sp)\n" /* Pop fn. */
+    "ld a0, 8(sp)\n" /* Pop fn arg1: arg. */
+    "addi sp, sp, 16\n"
+    "jalr a1\n" /* Call fn. */
+
+    /* Exit with result. */
+    "mv a0, a0\n" /* arg1: return value from fn. */
+    "li a7, " __NR_EXIT "\n"
+
+    "ecall\n"
+
+    /* Exit returned?! */
+    "unimp\n"
+
+    "1:\n" /* PARENT or ERROR. a0 holds return value from the clone syscall. */
+    "ret\n"
+    ".previous\n");
+
+#undef __NR_CLONE
+#undef __NR_EXIT
+
+/* ---------------------------------------------------------------------
+   More thread stuff
+   ------------------------------------------------------------------ */
+
+/* riscv64 doesn't have any architecture specific thread stuff that needs to be
+   cleaned up. */
+void VG_(cleanup_thread)(ThreadArchState* arch) {}
+
+/* ---------------------------------------------------------------------
+   PRE/POST wrappers for riscv64/Linux-specific syscalls
+   ------------------------------------------------------------------ */
+
+#define PRE(name)  DEFN_PRE_TEMPLATE(riscv64_linux, name)
+#define POST(name) DEFN_POST_TEMPLATE(riscv64_linux, name)
+
+/* ARG3 is only used for pointers into the traced process's address space and
+   for offsets into the traced process's struct user_regs_struct. It is never
+   a pointer into this process's memory space, and we should therefore not check
+   anything it points to. */
+static PRE(sys_ptrace)
+{
+   PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", SARG1, SARG2, ARG3, ARG4);
+   PRE_REG_READ4(int, "ptrace", long, request, long, pid, long, addr, long,
+                 data);
+   switch (ARG1) {
+   case VKI_PTRACE_TRACEME:
+      break;
+   case VKI_PTRACE_PEEKTEXT:
+   case VKI_PTRACE_PEEKDATA:
+   case VKI_PTRACE_PEEKUSR:
+      PRE_MEM_WRITE("ptrace(peek)", ARG4, sizeof(long));
+      break;
+   case VKI_PTRACE_GETEVENTMSG:
+      PRE_MEM_WRITE("ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
+      break;
+   case VKI_PTRACE_GETSIGINFO:
+      PRE_MEM_WRITE("ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_SETSIGINFO:
+      PRE_MEM_READ("ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_GETREGSET:
+      ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
+      break;
+   case VKI_PTRACE_SETREGSET:
+      ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
+      break;
+   default:
+      VG_(umsg)("WARNING: unhandled ptrace request %ld.\n", SARG1);
+      if (VG_(clo_verbosity) > 1)
+         VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
+      break;
+   }
+}
+
+static POST(sys_ptrace)
+{
+   switch (ARG1) {
+   case VKI_PTRACE_TRACEME:
+      ML_(linux_POST_traceme)(tid);
+      break;
+   case VKI_PTRACE_PEEKTEXT:
+   case VKI_PTRACE_PEEKDATA:
+   case VKI_PTRACE_PEEKUSR:
+      POST_MEM_WRITE(ARG4, sizeof(long));
+      break;
+   case VKI_PTRACE_GETEVENTMSG:
+      POST_MEM_WRITE(ARG4, sizeof(unsigned long));
+      break;
+   case VKI_PTRACE_GETSIGINFO:
+      /* XXX: This is a simplification. Different parts of the siginfo_t are
+         valid depending on the type of signal. */
+      POST_MEM_WRITE(ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_GETREGSET:
+      ML_(linux_POST_getregset)(tid, ARG3, ARG4);
+      break;
+   default:
+      break;
+   }
+}
+
+static PRE(sys_rt_sigreturn)
+{
+   /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
+      an explanation of what follows. */
+
+   PRINT("rt_sigreturn ( )");
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(tid >= 1 && tid < VG_N_THREADS);
+   vg_assert(VG_(is_running_thread)(tid));
+
+   /* Restore register state from frame and remove it. */
+   VG_(sigframe_destroy)(tid, True);
+
+   /* Tell the driver not to update the guest state with the "result", and set
+      a bogus result to keep it happy. */
+   *flags |= SfNoWriteResult;
+   SET_STATUS_Success(0);
+
+   /* Check to see if any signals arose as a result of this. */
+   *flags |= SfPollAfter;
+}
+
+static PRE(sys_mmap)
+{
+   PRINT("sys_mmap ( %#lx, %lu, %lu, %#lx, %lu, %lu )", ARG1, ARG2, ARG3, ARG4,
+         ARG5, ARG6);
+   PRE_REG_READ6(long, "mmap", unsigned long, start, unsigned long, length,
+                 unsigned long, prot, unsigned long, flags, unsigned long, fd,
+                 unsigned long, offset);
+
+   SysRes r =
+      ML_(generic_PRE_sys_mmap)(tid, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6);
+   SET_STATUS_from_SysRes(r);
+}
+
+static PRE(sys_riscv_flush_icache)
+{
+   PRINT("sys_riscv_flush_icache ( %#lx, %lx, %#lx )", ARG1, ARG2, ARG3);
+   PRE_REG_READ3(long, "riscv_flush_icache", unsigned long, start,
+                 unsigned long, end, unsigned long, flags);
+
+   VG_(discard_translations)((Addr)ARG1, (ULong)ARG2 - (ULong)ARG1,
+                             "PRE(sys_riscv_flush_icache)");
+   SET_STATUS_Success(0);
+}
+
+#undef PRE
+#undef POST
+
+/* ---------------------------------------------------------------------
+   The riscv64/Linux syscall table
+   ------------------------------------------------------------------ */
+
+/* Add a riscv64-linux specific wrapper to a syscall table. */
+#define PLAX_(sysno, name) WRAPPER_ENTRY_X_(riscv64_linux, sysno, name)
+#define PLAXY(sysno, name) WRAPPER_ENTRY_XY(riscv64_linux, sysno, name)
+
+/* This table maps from __NR_xxx syscall numbers to the appropriate PRE/POST
+   sys_foo() wrappers on riscv64. */
+static SyscallTableEntry syscall_main_table[] = {
+   LINXY(__NR_io_setup, sys_io_setup),                             /* 0 */
+   LINX_(__NR_io_destroy, sys_io_destroy),                         /* 1 */
+   LINX_(__NR_io_submit, sys_io_submit),                           /* 2 */
+   LINXY(__NR_io_cancel, sys_io_cancel),                           /* 3 */
+   LINXY(__NR_io_getevents, sys_io_getevents),                     /* 4 */
+   LINX_(__NR_setxattr, sys_setxattr),                             /* 5 */
+   LINX_(__NR_lsetxattr, sys_lsetxattr),                           /* 6 */
+   LINX_(__NR_fsetxattr, sys_fsetxattr),                           /* 7 */
+   LINXY(__NR_getxattr, sys_getxattr),                             /* 8 */
+   LINXY(__NR_lgetxattr, sys_lgetxattr),                           /* 9 */
+   LINXY(__NR_fgetxattr, sys_fgetxattr),                           /* 10 */
+   LINXY(__NR_listxattr, sys_listxattr),                           /* 11 */
+   LINXY(__NR_llistxattr, sys_llistxattr),                         /* 12 */
+   LINXY(__NR_flistxattr, sys_flistxattr),                         /* 13 */
+   LINX_(__NR_removexattr, sys_removexattr),                       /* 14 */
+   LINX_(__NR_lremovexattr, sys_lremovexattr),                     /* 15 */
+   LINX_(__NR_fremovexattr, sys_fremovexattr),                     /* 16 */
+   GENXY(__NR_getcwd, sys_getcwd),                                 /* 17 */
+   LINXY(__NR_lookup_dcookie, sys_lookup_dcookie),                 /* 18 */
+   LINXY(__NR_eventfd2, sys_eventfd2),                             /* 19 */
+   LINXY(__NR_epoll_create1, sys_epoll_create1),                   /* 20 */
+   LINX_(__NR_epoll_ctl, sys_epoll_ctl),                           /* 21 */
+   LINXY(__NR_epoll_pwait, sys_epoll_pwait),                       /* 22 */
+   GENXY(__NR_dup, sys_dup),                                       /* 23 */
+   LINXY(__NR_dup3, sys_dup3),                                     /* 24 */
+   LINXY(__NR_fcntl, sys_fcntl),                                   /* 25 */
+   LINXY(__NR_inotify_init1, sys_inotify_init1),                   /* 26 */
+   LINX_(__NR_inotify_add_watch, sys_inotify_add_watch),           /* 27 */
+   LINX_(__NR_inotify_rm_watch, sys_inotify_rm_watch),             /* 28 */
+   LINXY(__NR_ioctl, sys_ioctl),                                   /* 29 */
+   LINX_(__NR_ioprio_set, sys_ioprio_set),                         /* 30 */
+   LINX_(__NR_ioprio_get, sys_ioprio_get),                         /* 31 */
+   GENX_(__NR_flock, sys_flock),                                   /* 32 */
+   LINX_(__NR_mknodat, sys_mknodat),                               /* 33 */
+   LINX_(__NR_mkdirat, sys_mkdirat),                               /* 34 */
+   LINX_(__NR_unlinkat, sys_unlinkat),                             /* 35 */
+   LINX_(__NR_symlinkat, sys_symlinkat),                           /* 36 */
+   LINX_(__NR_linkat, sys_linkat),                                 /* 37 */
+   LINX_(__NR_umount2, sys_umount),                                /* 39 */
+   LINX_(__NR_mount, sys_mount),                                   /* 40 */
+   LINX_(__NR_pivot_root, sys_pivot_root),                         /* 41 */
+   GENXY(__NR_statfs, sys_statfs),                                 /* 43 */
+   GENXY(__NR_fstatfs, sys_fstatfs),                               /* 44 */
+   GENX_(__NR_truncate, sys_truncate),                             /* 45 */
+   GENX_(__NR_ftruncate, sys_ftruncate),                           /* 46 */
+   LINX_(__NR_fallocate, sys_fallocate),                           /* 47 */
+   LINX_(__NR_faccessat, sys_faccessat),                           /* 48 */
+   GENX_(__NR_chdir, sys_chdir),                                   /* 49 */
+   GENX_(__NR_fchdir, sys_fchdir),                                 /* 50 */
+   GENX_(__NR_chroot, sys_chroot),                                 /* 51 */
+   GENX_(__NR_fchmod, sys_fchmod),                                 /* 52 */
+   LINX_(__NR_fchmodat, sys_fchmodat),                             /* 53 */
+   LINX_(__NR_fchownat, sys_fchownat),                             /* 54 */
+   GENX_(__NR_fchown, sys_fchown),                                 /* 55 */
+   LINXY(__NR_openat, sys_openat),                                 /* 56 */
+   GENX_(__NR_close, sys_close),                                   /* 57 */
+   LINX_(__NR_vhangup, sys_vhangup),                               /* 58 */
+   LINXY(__NR_pipe2, sys_pipe2),                                   /* 59 */
+   LINX_(__NR_quotactl, sys_quotactl),                             /* 60 */
+   GENXY(__NR_getdents64, sys_getdents64),                         /* 61 */
+   LINX_(__NR_lseek, sys_lseek),                                   /* 62 */
+   GENXY(__NR_read, sys_read),                                     /* 63 */
+   GENX_(__NR_write, sys_write),                                   /* 64 */
+   GENXY(__NR_readv, sys_readv),                                   /* 65 */
+   GENX_(__NR_writev, sys_writev),                                 /* 66 */
+   GENXY(__NR_pread64, sys_pread64),                               /* 67 */
+   GENX_(__NR_pwrite64, sys_pwrite64),                             /* 68 */
+   LINXY(__NR_preadv, sys_preadv),                                 /* 69 */
+   LINX_(__NR_pwritev, sys_pwritev),                               /* 70 */
+   LINXY(__NR_sendfile, sys_sendfile),                             /* 71 */
+   LINXY(__NR_pselect6, sys_pselect6),                             /* 72 */
+   LINXY(__NR_ppoll, sys_ppoll),                                   /* 73 */
+   LINXY(__NR_signalfd4, sys_signalfd4),                           /* 74 */
+   LINX_(__NR_vmsplice, sys_vmsplice),                             /* 75 */
+   LINX_(__NR_splice, sys_splice),                                 /* 76 */
+   LINX_(__NR_tee, sys_tee),                                       /* 77 */
+   LINX_(__NR_readlinkat, sys_readlinkat),                         /* 78 */
+   LINXY(__NR_newfstatat, sys_newfstatat),                         /* 79 */
+   GENXY(__NR_fstat, sys_newfstat),                                /* 80 */
+   GENX_(__NR_sync, sys_sync),                                     /* 81 */
+   GENX_(__NR_fsync, sys_fsync),                                   /* 82 */
+   GENX_(__NR_fdatasync, sys_fdatasync),                           /* 83 */
+   LINX_(__NR_sync_file_range, sys_sync_file_range),               /* 84 */
+   LINXY(__NR_timerfd_create, sys_timerfd_create),                 /* 85 */
+   LINXY(__NR_timerfd_settime, sys_timerfd_settime),               /* 86 */
+   LINXY(__NR_timerfd_gettime, sys_timerfd_gettime),               /* 87 */
+   LINX_(__NR_utimensat, sys_utimensat),                           /* 88 */
+   GENX_(__NR_acct, sys_acct),                                     /* 89 */
+   LINXY(__NR_capget, sys_capget),                                 /* 90 */
+   LINX_(__NR_capset, sys_capset),                                 /* 91 */
+   LINX_(__NR_personality, sys_personality),                       /* 92 */
+   GENX_(__NR_exit, sys_exit),                                     /* 93 */
+   LINX_(__NR_exit_group, sys_exit_group),                         /* 94 */
+   LINXY(__NR_waitid, sys_waitid),                                 /* 95 */
+   LINX_(__NR_set_tid_address, sys_set_tid_address),               /* 96 */
+   LINX_(__NR_unshare, sys_unshare),                               /* 97 */
+   LINXY(__NR_futex, sys_futex),                                   /* 98 */
+   LINX_(__NR_set_robust_list, sys_set_robust_list),               /* 99 */
+   LINXY(__NR_get_robust_list, sys_get_robust_list),               /* 100 */
+   GENXY(__NR_nanosleep, sys_nanosleep),                           /* 101 */
+   GENXY(__NR_getitimer, sys_getitimer),                           /* 102 */
+   GENXY(__NR_setitimer, sys_setitimer),                           /* 103 */
+   GENX_(__NR_kexec_load, sys_ni_syscall),                         /* 104 */
+   LINX_(__NR_init_module, sys_init_module),                       /* 105 */
+   LINX_(__NR_delete_module, sys_delete_module),                   /* 106 */
+   LINXY(__NR_timer_create, sys_timer_create),                     /* 107 */
+   LINXY(__NR_timer_gettime, sys_timer_gettime),                   /* 108 */
+   LINX_(__NR_timer_getoverrun, sys_timer_getoverrun),             /* 109 */
+   LINXY(__NR_timer_settime, sys_timer_settime),                   /* 110 */
+   LINX_(__NR_timer_delete, sys_timer_delete),                     /* 111 */
+   LINX_(__NR_clock_settime, sys_clock_settime),                   /* 112 */
+   LINXY(__NR_clock_gettime, sys_clock_gettime),                   /* 113 */
+   LINXY(__NR_clock_getres, sys_clock_getres),                     /* 114 */
+   LINXY(__NR_clock_nanosleep, sys_clock_nanosleep),               /* 115 */
+   LINXY(__NR_syslog, sys_syslog),                                 /* 116 */
+   PLAXY(__NR_ptrace, sys_ptrace),                                 /* 117 */
+   LINXY(__NR_sched_setparam, sys_sched_setparam),                 /* 118 */
+   LINX_(__NR_sched_setscheduler, sys_sched_setscheduler),         /* 119 */
+   LINX_(__NR_sched_getscheduler, sys_sched_getscheduler),         /* 120 */
+   LINXY(__NR_sched_getparam, sys_sched_getparam),                 /* 121 */
+   LINX_(__NR_sched_setaffinity, sys_sched_setaffinity),           /* 122 */
+   LINXY(__NR_sched_getaffinity, sys_sched_getaffinity),           /* 123 */
+   LINX_(__NR_sched_yield, sys_sched_yield),                       /* 124 */
+   LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max), /* 125 */
+   LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min), /* 126 */
+   LINXY(__NR_sched_rr_get_interval, sys_sched_rr_get_interval),   /* 127 */
+   GENX_(__NR_kill, sys_kill),                                     /* 129 */
+   LINXY(__NR_tkill, sys_tkill),                                   /* 130 */
+   LINX_(__NR_tgkill, sys_tgkill),                                 /* 131 */
+   GENXY(__NR_sigaltstack, sys_sigaltstack),                       /* 132 */
+   LINX_(__NR_rt_sigsuspend, sys_rt_sigsuspend),                   /* 133 */
+   LINXY(__NR_rt_sigaction, sys_rt_sigaction),                     /* 134 */
+   LINXY(__NR_rt_sigprocmask, sys_rt_sigprocmask),                 /* 135 */
+   LINXY(__NR_rt_sigpending, sys_rt_sigpending),                   /* 136 */
+   LINXY(__NR_rt_sigtimedwait, sys_rt_sigtimedwait),               /* 137 */
+   LINXY(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo),               /* 138 */
+   PLAX_(__NR_rt_sigreturn, sys_rt_sigreturn),                     /* 139 */
+   GENX_(__NR_setpriority, sys_setpriority),                       /* 140 */
+   GENX_(__NR_getpriority, sys_getpriority),                       /* 141 */
+   GENX_(__NR_setregid, sys_setregid),                             /* 143 */
+   GENX_(__NR_setgid, sys_setgid),                                 /* 144 */
+   GENX_(__NR_setreuid, sys_setreuid),                             /* 145 */
+   GENX_(__NR_setuid, sys_setuid),                                 /* 146 */
+   LINX_(__NR_setresuid, sys_setresuid),                           /* 147 */
+   LINXY(__NR_getresuid, sys_getresuid),                           /* 148 */
+   LINX_(__NR_setresgid, sys_setresgid),                           /* 149 */
+   LINXY(__NR_getresgid, sys_getresgid),                           /* 150 */
+   LINX_(__NR_setfsuid, sys_setfsuid),                             /* 151 */
+   LINX_(__NR_setfsgid, sys_setfsgid),                             /* 152 */
+   GENXY(__NR_times, sys_times),                                   /* 153 */
+   GENX_(__NR_setpgid, sys_setpgid),                               /* 154 */
+   GENX_(__NR_getpgid, sys_getpgid),                               /* 155 */
+   GENX_(__NR_getsid, sys_getsid),                                 /* 156 */
+   GENX_(__NR_setsid, sys_setsid),                                 /* 157 */
+   GENXY(__NR_getgroups, sys_getgroups),                           /* 158 */
+   GENX_(__NR_setgroups, sys_setgroups),                           /* 159 */
+   GENXY(__NR_uname, sys_newuname),                                /* 160 */
+   GENX_(__NR_sethostname, sys_sethostname),                       /* 161 */
+   GENXY(__NR_getrlimit, sys_getrlimit),                           /* 163 */
+   GENX_(__NR_setrlimit, sys_setrlimit),                           /* 164 */
+   GENXY(__NR_getrusage, sys_getrusage),                           /* 165 */
+   GENX_(__NR_umask, sys_umask),                                   /* 166 */
+   LINXY(__NR_prctl, sys_prctl),                                   /* 167 */
+   LINXY(__NR_getcpu, sys_getcpu),                                 /* 168 */
+   GENXY(__NR_gettimeofday, sys_gettimeofday),                     /* 169 */
+   GENX_(__NR_settimeofday, sys_settimeofday),                     /* 170 */
+   LINXY(__NR_adjtimex, sys_adjtimex),                             /* 171 */
+   GENX_(__NR_getpid, sys_getpid),                                 /* 172 */
+   GENX_(__NR_getppid, sys_getppid),                               /* 173 */
+   GENX_(__NR_getuid, sys_getuid),                                 /* 174 */
+   GENX_(__NR_geteuid, sys_geteuid),                               /* 175 */
+   GENX_(__NR_getgid, sys_getgid),                                 /* 176 */
+   GENX_(__NR_getegid, sys_getegid),                               /* 177 */
+   LINX_(__NR_gettid, sys_gettid),                                 /* 178 */
+   LINXY(__NR_sysinfo, sys_sysinfo),                               /* 179 */
+   LINXY(__NR_mq_open, sys_mq_open),                               /* 180 */
+   LINX_(__NR_mq_unlink, sys_mq_unlink),                           /* 181 */
+   LINX_(__NR_mq_timedsend, sys_mq_timedsend),                     /* 182 */
+   LINXY(__NR_mq_timedreceive, sys_mq_timedreceive),               /* 183 */
+   LINX_(__NR_mq_notify, sys_mq_notify),                           /* 184 */
+   LINXY(__NR_mq_getsetattr, sys_mq_getsetattr),                   /* 185 */
+   LINX_(__NR_msgget, sys_msgget),                                 /* 186 */
+   LINXY(__NR_msgctl, sys_msgctl),                                 /* 187 */
+   LINXY(__NR_msgrcv, sys_msgrcv),                                 /* 188 */
+   LINX_(__NR_msgsnd, sys_msgsnd),                                 /* 189 */
+   LINX_(__NR_semget, sys_semget),                                 /* 190 */
+   LINXY(__NR_semctl, sys_semctl),                                 /* 191 */
+   LINX_(__NR_semtimedop, sys_semtimedop),                         /* 192 */
+   LINX_(__NR_semop, sys_semop),                                   /* 193 */
+   LINX_(__NR_shmget, sys_shmget),                                 /* 194 */
+   LINXY(__NR_shmctl, sys_shmctl),                                 /* 195 */
+   LINXY(__NR_shmat, sys_shmat),                                   /* 196 */
+   LINXY(__NR_shmdt, sys_shmdt),                                   /* 197 */
+   LINXY(__NR_socket, sys_socket),                                 /* 198 */
+   LINXY(__NR_socketpair, sys_socketpair),                         /* 199 */
+   LINX_(__NR_bind, sys_bind),                                     /* 200 */
+   LINX_(__NR_listen, sys_listen),                                 /* 201 */
+   LINXY(__NR_accept, sys_accept),                                 /* 202 */
+   LINX_(__NR_connect, sys_connect),                               /* 203 */
+   LINXY(__NR_getsockname, sys_getsockname),                       /* 204 */
+   LINXY(__NR_getpeername, sys_getpeername),                       /* 205 */
+   LINX_(__NR_sendto, sys_sendto),                                 /* 206 */
+   LINXY(__NR_recvfrom, sys_recvfrom),                             /* 207 */
+   LINX_(__NR_setsockopt, sys_setsockopt),                         /* 208 */
+   LINXY(__NR_getsockopt, sys_getsockopt),                         /* 209 */
+   LINX_(__NR_shutdown, sys_shutdown),                             /* 210 */
+   LINX_(__NR_sendmsg, sys_sendmsg),                               /* 211 */
+   LINXY(__NR_recvmsg, sys_recvmsg),                               /* 212 */
+   LINX_(__NR_readahead, sys_readahead),                           /* 213 */
+   GENX_(__NR_brk, sys_brk),                                       /* 214 */
+   GENXY(__NR_munmap, sys_munmap),                                 /* 215 */
+   GENX_(__NR_mremap, sys_mremap),                                 /* 216 */
+   LINX_(__NR_add_key, sys_add_key),                               /* 217 */
+   LINX_(__NR_request_key, sys_request_key),                       /* 218 */
+   LINXY(__NR_keyctl, sys_keyctl),                                 /* 219 */
+   LINX_(__NR_clone, sys_clone),                                   /* 220 */
+   GENX_(__NR_execve, sys_execve),                                 /* 221 */
+   PLAX_(__NR_mmap, sys_mmap),                                     /* 222 */
+   GENX_(__NR_fadvise64, sys_ni_syscall),                          /* 223 */
+   GENXY(__NR_mprotect, sys_mprotect),                             /* 226 */
+   GENX_(__NR_msync, sys_msync),                                   /* 227 */
+   GENX_(__NR_mlock, sys_mlock),                                   /* 228 */
+   GENX_(__NR_munlock, sys_munlock),                               /* 229 */
+   GENX_(__NR_mlockall, sys_mlockall),                             /* 230 */
+   LINX_(__NR_munlockall, sys_munlockall),                         /* 231 */
+   GENXY(__NR_mincore, sys_mincore),                               /* 232 */
+   GENX_(__NR_madvise, sys_madvise),                               /* 233 */
+   LINX_(__NR_mbind, sys_mbind),                                   /* 235 */
+   LINXY(__NR_get_mempolicy, sys_get_mempolicy),                   /* 236 */
+   LINX_(__NR_set_mempolicy, sys_set_mempolicy),                   /* 237 */
+   LINXY(__NR_move_pages, sys_move_pages),                         /* 239 */
+   LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),           /* 240 */
+   LINXY(__NR_perf_event_open, sys_perf_event_open),               /* 241 */
+   LINXY(__NR_accept4, sys_accept4),                               /* 242 */
+   LINXY(__NR_recvmmsg, sys_recvmmsg),                             /* 243 */
+   PLAX_(__NR_riscv_flush_icache, sys_riscv_flush_icache),         /* 259 */
+   GENXY(__NR_wait4, sys_wait4),                                   /* 260 */
+   LINXY(__NR_prlimit64, sys_prlimit64),                           /* 261 */
+   LINXY(__NR_fanotify_init, sys_fanotify_init),                   /* 262 */
+   LINX_(__NR_fanotify_mark, sys_fanotify_mark),                   /* 263 */
+   LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),           /* 264 */
+   LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),           /* 265 */
+   LINXY(__NR_clock_adjtime, sys_clock_adjtime),                   /* 266 */
+   LINX_(__NR_syncfs, sys_syncfs),                                 /* 267 */
+   LINX_(__NR_setns, sys_setns),                                   /* 268 */
+   LINXY(__NR_sendmmsg, sys_sendmmsg),                             /* 269 */
+   LINXY(__NR_process_vm_readv, sys_process_vm_readv),             /* 270 */
+   LINX_(__NR_process_vm_writev, sys_process_vm_writev),           /* 271 */
+   LINX_(__NR_kcmp, sys_kcmp),                                     /* 272 */
+   LINX_(__NR_finit_module, sys_finit_module),                     /* 273 */
+   LINX_(__NR_sched_setattr, sys_sched_setattr),                   /* 274 */
+   LINXY(__NR_sched_getattr, sys_sched_getattr),                   /* 275 */
+   LINX_(__NR_renameat2, sys_renameat2),                           /* 276 */
+   LINXY(__NR_getrandom, sys_getrandom),                           /* 278 */
+   LINXY(__NR_memfd_create, sys_memfd_create),                     /* 279 */
+   LINXY(__NR_bpf, sys_bpf),                                       /* 280 */
+   LINX_(__NR_execveat, sys_execveat),                             /* 281 */
+   LINX_(__NR_membarrier, sys_membarrier),                         /* 283 */
+   GENX_(__NR_mlock2, sys_mlock2),                                 /* 284 */
+   LINX_(__NR_copy_file_range, sys_copy_file_range),               /* 285 */
+   LINXY(__NR_preadv2, sys_preadv2),                               /* 286 */
+   LINX_(__NR_pwritev2, sys_pwritev2),                             /* 287 */
+   LINXY(__NR_statx, sys_statx),                                   /* 291 */
+   GENX_(__NR_rseq, sys_ni_syscall),                               /* 293 */
+   LINXY(__NR_io_uring_setup, sys_io_uring_setup),                 /* 425 */
+   LINXY(__NR_io_uring_enter, sys_io_uring_enter),                 /* 426 */
+   LINXY(__NR_io_uring_register, sys_io_uring_register),           /* 427 */
+   LINXY(__NR_pidfd_open, sys_pidfd_open),                         /* 434 */
+   GENX_(__NR_clone3, sys_ni_syscall),                             /* 435 */
+   LINXY(__NR_close_range, sys_close_range),                       /* 436 */
+   LINXY(__NR_openat2, sys_openat2),                               /* 437 */
+   LINXY(__NR_pidfd_getfd, sys_pidfd_getfd),                       /* 438 */
+   LINX_(__NR_faccessat2, sys_faccessat2),                         /* 439 */
+   LINXY(__NR_epoll_pwait2, sys_epoll_pwait2),                     /* 441 */
+   LINXY(__NR_memfd_secret, sys_memfd_secret),                     /* 447 */
+   LINX_(__NR_fchmodat2, sys_fchmodat2),                           /* 452 */
+};
+
+SyscallTableEntry* ML_(get_linux_syscall_entry)(UInt sysno)
+{
+   const UInt syscall_main_table_size =
+      sizeof(syscall_main_table) / sizeof(syscall_main_table[0]);
+
+   /* Is it in the contiguous initial section of the table? */
+   if (sysno < syscall_main_table_size) {
+      SyscallTableEntry* sys = &syscall_main_table[sysno];
+      if (sys->before == NULL)
+         return NULL; /* no entry */
+      else
+         return sys;
+   }
+
+   /* Can't find a wrapper. */
+   return NULL;
+}
+
+#endif // defined(VGP_riscv64_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                  syswrap-riscv64-linux.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S
index e897963be..c506070d6 100644
--- a/coregrind/m_trampoline.S
+++ b/coregrind/m_trampoline.S
@@ -1585,6 +1585,81 @@ VG_(trampoline_stuff_end):
 #	undef UD2_1024
 #	undef UD2_PAGE
 
+/*---------------- riscv64-linux ----------------*/
+#else
+#if defined(VGP_riscv64_linux)
+
+#	define UD2_4      .word 0
+#	define UD2_16     UD2_4    ; UD2_4    ; UD2_4    ; UD2_4
+#	define UD2_64     UD2_16   ; UD2_16   ; UD2_16   ; UD2_16
+#	define UD2_256    UD2_64   ; UD2_64   ; UD2_64   ; UD2_64
+#	define UD2_1024   UD2_256  ; UD2_256  ; UD2_256  ; UD2_256
+#	define UD2_PAGE   UD2_1024 ; UD2_1024 ; UD2_1024 ; UD2_1024
+
+	/* a leading page of unexecutable code */
+	UD2_PAGE
+
+.global VG_(trampoline_stuff_start)
+VG_(trampoline_stuff_start):
+
+.global VG_(riscv64_linux_SUBST_FOR_rt_sigreturn)
+.type   VG_(riscv64_linux_SUBST_FOR_rt_sigreturn), @function
+VG_(riscv64_linux_SUBST_FOR_rt_sigreturn):
+	.cfi_startproc
+	.cfi_signal_frame
+	li a7, __NR_rt_sigreturn
+	ecall
+	.cfi_endproc
+.size VG_(riscv64_linux_SUBST_FOR_rt_sigreturn), \
+	.-VG_(riscv64_linux_SUBST_FOR_rt_sigreturn)
+
+.global VG_(riscv64_linux_REDIR_FOR_strlen)
+.type   VG_(riscv64_linux_REDIR_FOR_strlen), @function
+VG_(riscv64_linux_REDIR_FOR_strlen):
+	mv a1, a0               /* copy the input string pointer to a1 */
+	li a0, 0                /* set the output length to 0 */
+	lbu a2, 0(a1)           /* load the first character */
+	beq a2, zero, 2f        /* check if the end of string is reached */
+1:
+	addi a0, a0, 1          /* increment the output length by 1 */
+	add a2, a1, a0          /* calculate address of the next character */
+	lbu a2, 0(a2)           /* load the next character */
+	bne a2, zero, 1b        /* check if the end of string is reached */
+	ret
+2:
+	ret
+.size VG_(riscv64_linux_REDIR_FOR_strlen), .-VG_(riscv64_linux_REDIR_FOR_strlen)
+
+.global VG_(riscv64_linux_REDIR_FOR_index)
+.type   VG_(riscv64_linux_REDIR_FOR_index), @function
+VG_(riscv64_linux_REDIR_FOR_index):
+	andi a1, a1, 0xff      /* mask the input character value */
+	j 2f                   /* jump into the test loop */
+1:
+	beq a2, zero, 3f       /* check if the end of string is reached */
+	addi a0, a0, 1         /* advance to the next character */
+2:
+	lbu a2, 0(a0)          /* load the next character */
+	bne a2, a1, 1b         /* check if it matches the looked up character */
+	ret
+3:
+	li a0, 0               /* set the result to "not found" */
+	ret
+.size VG_(riscv64_linux_REDIR_FOR_index), .-VG_(riscv64_linux_REDIR_FOR_index)
+
+.global VG_(trampoline_stuff_end)
+VG_(trampoline_stuff_end):
+
+	/* and a trailing page of unexecutable code */
+	UD2_PAGE
+
+#	undef UD2_4
+#	undef UD2_16
+#	undef UD2_64
+#	undef UD2_256
+#	undef UD2_1024
+#	undef UD2_PAGE
+
 /*---------------- x86-solaris ----------------*/
 #else
 #if defined(VGP_x86_solaris)
@@ -1785,6 +1860,7 @@ VG_(trampoline_stuff_end):
 #endif
 #endif
 #endif
+#endif
 
 /* Let the linker know we don't need an executable stack */
 MARK_STACK_NO_EXEC
diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
index 5696dc4dc..5da88cac7 100644
--- a/coregrind/m_translate.c
+++ b/coregrind/m_translate.c
@@ -1750,6 +1750,10 @@ Bool VG_(translate) ( ThreadId tid,
            vex_archinfo.arm64_requires_fallback_LLSC;
 #  endif
 
+#  if defined(VGP_riscv64_linux)
+   vex_abiinfo.guest__use_fallback_LLSC = True;
+#  endif
+
    /* Set up closure args. */
    closure.tid    = tid;
    closure.nraddr = nraddr;
diff --git a/coregrind/pub_core_basics.h b/coregrind/pub_core_basics.h
index abc5a066a..353e69ce8 100644
--- a/coregrind/pub_core_basics.h
+++ b/coregrind/pub_core_basics.h
@@ -55,8 +55,10 @@
 
 typedef
    struct {
-      ULong r_pc; /* x86:EIP, amd64:RIP, ppc:CIA, arm:R15, mips:pc */
-      ULong r_sp; /* x86:ESP, amd64:RSP, ppc:R1,  arm:R13, mips:sp */
+      ULong r_pc; /* x86:EIP, amd64:RIP, ppc:CIA, arm:R15, mips:pc,
+                     riscv64: pc */
+      ULong r_sp; /* x86:ESP, amd64:RSP, ppc:R1,  arm:R13, mips:sp,
+                     riscv64: x2 */
       union {
          struct {
             UInt r_ebp;
@@ -102,6 +104,10 @@ typedef
             ULong r31;  /* Return address of the last subroutine call */
             ULong r28;
          } MIPS64;
+         struct {
+            ULong r_fp; /* x8 */
+            ULong r_ra; /* x1 */
+         } RISCV64;
       } misc;
    }
    UnwindStartRegs;
diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h
index 4d6ebda81..658f9313a 100644
--- a/coregrind/pub_core_debuginfo.h
+++ b/coregrind/pub_core_debuginfo.h
@@ -134,6 +134,10 @@ typedef
             Addr f4; Addr f5; Addr f6; Addr f7; }
    D3UnwindRegs;
 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
+typedef
+   struct { Addr pc; Addr sp; Addr fp; Addr ra; }
+   D3UnwindRegs;
+#elif defined(VGA_riscv64)
 typedef
    struct { Addr pc; Addr sp; Addr fp; Addr ra; }
    D3UnwindRegs;
diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h
index a1afbe61c..555506a03 100644
--- a/coregrind/pub_core_machine.h
+++ b/coregrind/pub_core_machine.h
@@ -126,6 +126,11 @@
 #  define VG_ELF_MACHINE      EM_NANOMIPS
 #  define VG_ELF_CLASS        ELFCLASS32
 #  undef  VG_PLAT_USES_PPCTOC
+#elif defined(VGP_riscv64_linux)
+#  define VG_ELF_DATA2XXX     ELFDATA2LSB
+#  define VG_ELF_MACHINE      EM_RISCV
+#  define VG_ELF_CLASS        ELFCLASS64
+#  undef  VG_PLAT_USES_PPCTOC
 #else
 #  error Unknown platform
 #endif
@@ -163,6 +168,10 @@
 #  define VG_INSTR_PTR        guest_PC
 #  define VG_STACK_PTR        guest_r29
 #  define VG_FRAME_PTR        guest_r30
+#elif defined(VGA_riscv64)
+#  define VG_INSTR_PTR        guest_pc
+#  define VG_STACK_PTR        guest_x2
+#  define VG_FRAME_PTR        guest_x8
 #else
 #  error Unknown arch
 #endif
diff --git a/coregrind/pub_core_mallocfree.h b/coregrind/pub_core_mallocfree.h
index bbde837e2..da86b9cce 100644
--- a/coregrind/pub_core_mallocfree.h
+++ b/coregrind/pub_core_mallocfree.h
@@ -84,6 +84,7 @@ typedef Int ArenaId;
       defined(VGP_x86_darwin)     || \
       defined(VGP_amd64_darwin)   || \
       defined(VGP_arm64_linux)    || \
+      defined(VGP_riscv64_linux)  || \
       defined(VGP_amd64_solaris)
 #  define VG_MIN_MALLOC_SZB       16
 #else
diff --git a/coregrind/pub_core_syscall.h b/coregrind/pub_core_syscall.h
index 5c42821c8..e1e066fcc 100644
--- a/coregrind/pub_core_syscall.h
+++ b/coregrind/pub_core_syscall.h
@@ -106,6 +106,7 @@ extern SysRes VG_(mk_SysRes_mips32_linux)( UWord v0, UWord v1,
 extern SysRes VG_(mk_SysRes_mips64_linux)( ULong v0, ULong v1,
                                            ULong a3 );
 extern SysRes VG_(mk_SysRes_nanomips_linux)( UWord a0);
+extern SysRes VG_(mk_SysRes_riscv64_linux) ( Long a0 );
 extern SysRes VG_(mk_SysRes_x86_solaris) ( Bool isErr, UInt val, UInt val2 );
 extern SysRes VG_(mk_SysRes_amd64_solaris) ( Bool isErr, ULong val, ULong val2 );
 extern SysRes VG_(mk_SysRes_Error)       ( UWord val );
diff --git a/coregrind/pub_core_trampoline.h b/coregrind/pub_core_trampoline.h
index d0bd6b859..7c7ec4287 100644
--- a/coregrind/pub_core_trampoline.h
+++ b/coregrind/pub_core_trampoline.h
@@ -176,6 +176,12 @@ extern Char* VG_(nanomips_linux_REDIR_FOR_index)( const Char*, Int );
 extern UInt  VG_(nanomips_linux_REDIR_FOR_strlen)( void* );
 #endif
 
+#if defined(VGP_riscv64_linux)
+extern Addr   VG_(riscv64_linux_SUBST_FOR_rt_sigreturn);
+extern HChar* VG_(riscv64_linux_REDIR_FOR_index)( const HChar*, Int );
+extern SizeT  VG_(riscv64_linux_REDIR_FOR_strlen)( const HChar* );
+#endif
+
 #if defined(VGP_x86_solaris)
 extern SizeT VG_(x86_solaris_REDIR_FOR_strcmp)(const HChar *, const HChar *);
 extern SizeT VG_(x86_solaris_REDIR_FOR_strlen)(const HChar *);
diff --git a/coregrind/pub_core_transtab.h b/coregrind/pub_core_transtab.h
index 6cc11f658..cc70a2944 100644
--- a/coregrind/pub_core_transtab.h
+++ b/coregrind/pub_core_transtab.h
@@ -72,7 +72,8 @@ static inline UWord VG_TT_FAST_HASH ( Addr guest ) {
    return merged & VG_TT_FAST_MASK;
 }
 
-#elif defined(VGA_s390x) || defined(VGA_arm) || defined(VGA_nanomips)
+#elif defined(VGA_s390x) || defined(VGA_arm) || defined(VGA_nanomips) \
+      || defined(VGA_riscv64)
 static inline UWord VG_TT_FAST_HASH ( Addr guest ) {
    // Instructions are 2-byte aligned.
    UWord merged = ((UWord)guest) >> 1;
diff --git a/coregrind/pub_core_transtab_asm.h b/coregrind/pub_core_transtab_asm.h
index 8b585f17d..c14e24ebf 100644
--- a/coregrind/pub_core_transtab_asm.h
+++ b/coregrind/pub_core_transtab_asm.h
@@ -67,12 +67,9 @@
    sets to ever be used.  So instead the function is
    (address ^ (address >>u VG_TT_FAST_BITS))[VG_TT_FAST_BITS-1+2 : 0+2]'.
 
-   On arm32, the minimum instruction size is 2, so we discard only the least
-   significant bit of the address, hence:
-   (address ^ (address >>u VG_TT_FAST_BITS))[VG_TT_FAST_BITS-1+1 : 0+1]'.
-
-   On s390x the rightmost bit of an instruction address is zero, so the arm32
-   scheme is used. */
+   On arm32/s390x/riscv64, the minimum instruction size is 2, so we discard only
+   the least significant bit of the address, hence:
+   (address ^ (address >>u VG_TT_FAST_BITS))[VG_TT_FAST_BITS-1+1 : 0+1]'. */
 
 #define VG_TT_FAST_BITS 13
 #define VG_TT_FAST_SETS (1 << VG_TT_FAST_BITS)
@@ -83,7 +80,7 @@
 #if defined(VGA_amd64) || defined(VGA_arm64) \
     || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
     || (defined(VGA_mips64) && defined(VGABI_64)) \
-    || defined(VGA_s390x)
+    || defined(VGA_s390x) || defined(VGA_riscv64)
   // And all other 64-bit hosts
 # define VG_FAST_CACHE_SET_BITS 6
   // These FCS_{g,h}{0,1,2,3} are the values of
diff --git a/coregrind/vgdb-invoker-ptrace.c b/coregrind/vgdb-invoker-ptrace.c
index 78a6a168c..3a6a455d0 100644
--- a/coregrind/vgdb-invoker-ptrace.c
+++ b/coregrind/vgdb-invoker-ptrace.c
@@ -40,6 +40,17 @@
 #include <sys/user.h>
 #include <sys/wait.h>
 
+#if defined(VGA_riscv64)
+/* Glibc on riscv64 does not provide a definition of user or user_regs_struct
+   in sys/user.h. Instead the definition of user_regs_struct is provided by the
+   kernel in asm/ptrace.h. Pull it and then define the expected user
+   structure. */
+#include <asm/ptrace.h>
+struct user {
+   struct user_regs_struct regs;
+};
+#endif
+
 #ifdef PTRACE_GETREGSET
 // TBD: better have a configure test instead ?
 #define HAVE_PTRACE_GETREGSET
@@ -49,10 +60,10 @@
 // So, better do not use PTRACE_GET/SETREGSET
 // Rather we use PTRACE_GETREGS or PTRACE_PEEKUSER.
 
-// The only platform on which we must use PTRACE_GETREGSET is arm64.
+// The only platform on which we must use PTRACE_GETREGSET is here.
 // The resulting vgdb cannot work in a bi-arch setup.
 // -1 means we will check that PTRACE_GETREGSET works.
-#  if defined(VGA_arm64)
+#  if defined(VGA_arm64) || defined(VGA_riscv64)
 #define USE_PTRACE_GETREGSET
 #  endif
 #endif
@@ -874,6 +885,8 @@ Bool invoker_invoke_gdbserver (pid_t pid)
    sp = p[29];
 #elif defined(VGA_mips64)
    sp = user_mod.regs[29];
+#elif defined(VGA_riscv64)
+   sp = user_mod.regs.sp;
 #else
    I_die_here : (sp) architecture missing in vgdb-invoker-ptrace.c
 #endif
@@ -961,6 +974,10 @@ Bool invoker_invoke_gdbserver (pid_t pid)
 
 #elif defined(VGA_mips64)
       assert(0); // cannot vgdb a 32 bits executable with a 64 bits exe
+
+#elif defined(VGA_riscv64)
+      assert(0);
+
 #else
       I_die_here : architecture missing in vgdb-invoker-ptrace.c
 #endif
@@ -1068,6 +1085,10 @@ Bool invoker_invoke_gdbserver (pid_t pid)
       user_mod.regs[31] = bad_return;
       user_mod.regs[34] = shared64->invoke_gdbserver;
       user_mod.regs[25] = shared64->invoke_gdbserver;
+#elif defined(VGA_riscv64)
+      user_mod.regs.a0 = check;
+      user_mod.regs.ra = bad_return;
+      user_mod.regs.pc = shared64->invoke_gdbserver;
 #else
       I_die_here: architecture missing in vgdb-invoker-ptrace.c
 #endif
diff --git a/docs/Makefile.am b/docs/Makefile.am
index f9ba4e395..9fb6cb699 100644
--- a/docs/Makefile.am
+++ b/docs/Makefile.am
@@ -63,6 +63,7 @@ EXTRA_DIST = \
 	internals/porting-to-ARM.txt \
 	internals/qemu-aarch64-linux-HOWTO.txt \
 	internals/qemu-mips64-linux-HOWTO.txt \
+	internals/qemu-riscv64-linux-HOWTO.txt \
 	internals/register-uses.txt \
 	internals/release-HOWTO.txt \
 	internals/segments-seginfos.txt \
diff --git a/docs/internals/qemu-riscv64-linux-HOWTO.txt b/docs/internals/qemu-riscv64-linux-HOWTO.txt
new file mode 100644
index 000000000..eb0ce4dc0
--- /dev/null
+++ b/docs/internals/qemu-riscv64-linux-HOWTO.txt
@@ -0,0 +1,46 @@
+
+How to install and configure a QEMU riscv64-linux installation.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Last updated 25 March 2023
+
+This file is a variant of qemu-aarch64-linux-HOWTO.txt but for RISCV64. It
+differs in use of openSUSE Tumbleweed and an already prepared image.
+
+
+Use a distribution package manager to install QEMU with support for running
+a 64-bit RISC-V system. It should provide the qemu-system-riscv64 executable.
+Make sure the package comes with an OpenSBI binary which is normally stored at
+/usr/share/qemu/opensbi-riscv64-generic-fw_dynamic.bin.
+
+Get a Tumbleweed image and convert it to qcow2:
+
+mkdir tumbleweed && cd tumbleweed
+wget http://download.opensuse.org/ports/riscv/tumbleweed/images/openSUSE-Tumbleweed-RISC-V-JeOS-efi.riscv64.raw.xz
+unxz openSUSE-Tumbleweed-RISC-V-JeOS-efi.riscv64.raw.xz
+qemu-img convert -f raw -O qcow2 openSUSE-Tumbleweed-RISC-V-JeOS-efi.riscv64.raw image.qcow2
+qemu-img resize image.qcow2 20G
+
+Obtain a U-Boot loader:
+
+UBOOT=$(wget https://download.opensuse.org/ports/riscv/tumbleweed/repo/oss/INDEX.gz -O - | gunzip | grep 'u-boot-qemu-riscv64smode-[0123456789\.-]*\.riscv64\.rpm')
+wget --directory-prefix=u-boot https://download.opensuse.org/ports/riscv/tumbleweed/repo/oss/$UBOOT
+rpm2cpio u-boot/u-boot-qemu-riscv64smode-*.riscv64.rpm | cpio -D u-boot -idm
+
+Boot the system:
+
+qemu-system-riscv64 -nographic -machine virt -smp 4 -m 8G \
+  -kernel u-boot/boot/u-boot.bin \
+  -device virtio-blk-device,drive=hd0 -drive file=image.qcow2,format=qcow2,id=hd0 \
+  -device virtio-net-device,netdev=usernet -netdev user,id=usernet,hostfwd=tcp::5555-:22
+
+Now you can ssh into the VM and install stuff as usual:
+
+ssh -p 5555 root@localhost
+[The preset password is linux.]
+
+  (on the guest)
+  useradd username && passwd username
+  zypper install autoconf automake make gcc gcc-c++ git-core
+
+Hack on, etc.
diff --git a/docs/xml/dist-docs.xml b/docs/xml/dist-docs.xml
index 439cdcc5f..31b170885 100644
--- a/docs/xml/dist-docs.xml
+++ b/docs/xml/dist-docs.xml
@@ -122,6 +122,16 @@
     </literallayout>
     </chapter>
 
+  <chapter id="dist.readme-riscv64" 
+             xreflabel="Readme RISCV64">
+    <title>README.riscv64</title>
+    <literallayout>
+      <xi:include href="../../README.riscv64" 
+          parse="text" 
+          xmlns:xi="http://www.w3.org/2001/XInclude" />
+    </literallayout>
+    </chapter>
+
   <chapter id="dist.readme-solaris"
              xreflabel="Readme Solaris">
     <title>README.solaris</title>
diff --git a/drd/drd_bitmap.h b/drd/drd_bitmap.h
index 3b71d749a..fa1506ed4 100644
--- a/drd/drd_bitmap.h
+++ b/drd/drd_bitmap.h
@@ -140,7 +140,7 @@ Addr make_address(const UWord a1, const UWord a0)
 #define BITS_PER_BITS_PER_UWORD 5
 #elif defined(VGA_amd64) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
       || defined(VGA_s390x) || (defined(VGA_mips64) && !defined(VGABI_N32)) \
-      || defined(VGA_arm64)
+      || defined(VGA_arm64) || defined(VGA_riscv64)
 #define BITS_PER_BITS_PER_UWORD 6
 #else
 #error Unknown platform.
diff --git a/drd/drd_load_store.c b/drd/drd_load_store.c
index 80d326a0e..a6fb874bf 100644
--- a/drd/drd_load_store.c
+++ b/drd/drd_load_store.c
@@ -53,6 +53,8 @@
 #define STACK_POINTER_OFFSET OFFSET_mips32_r29
 #elif defined(VGA_mips64)
 #define STACK_POINTER_OFFSET OFFSET_mips64_r29
+#elif defined(VGA_riscv64)
+#define STACK_POINTER_OFFSET OFFSET_riscv64_x2
 #else
 #error Unknown architecture.
 #endif
diff --git a/helgrind/tests/annotate_hbefore.c b/helgrind/tests/annotate_hbefore.c
index 259d3b64c..52dce3e76 100644
--- a/helgrind/tests/annotate_hbefore.c
+++ b/helgrind/tests/annotate_hbefore.c
@@ -314,6 +314,36 @@ UWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
    return success;
 }
 
+#elif defined(VGA_riscv64)
+
+// riscv64
+/* return 1 if success, 0 if failure */
+UWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
+{
+  UWord success;
+  UWord block[3] = { (UWord)addr, nyu, expected};
+
+   __asm__ __volatile__(
+      "ld     t0, 0(%1)"         "\n\t"
+      "ld     t2, 16(%1)"        "\n\t"
+      "ld     t3, 8(%1)"         "\n\t"
+      "lr.d   t1, 0(t0)"         "\n\t"
+      "bne    t1, t2, 1f"        "\n\t"
+      "sc.d   t1, t3, 0(t0)"     "\n\t"
+      "xori   %0, t1, 1"         "\n\t"
+      "j 2f"                     "\n\t"
+      "1:"                       "\n\t"
+      "mv     %0, zero"          "\n\t"
+      "2:"                       "\n\t"
+      : /*out*/ "=r"(success)
+      : /*in*/ "r"(&block[0])
+      : /*trash*/ "t0", "t1", "t2", "t3", "memory"
+   );
+
+   assert(success == 0 || success == 1);
+   return success;
+}
+
 #endif
 
 void atomic_incW ( UWord* w )
diff --git a/helgrind/tests/tc07_hbl1.c b/helgrind/tests/tc07_hbl1.c
index a4250c62c..ee0564d20 100644
--- a/helgrind/tests/tc07_hbl1.c
+++ b/helgrind/tests/tc07_hbl1.c
@@ -19,6 +19,7 @@
 #undef PLAT_arm64_linux
 #undef PLAT_s390x_linux
 #undef PLAT_mips32_linux
+#undef PLAT_riscv64_linux
 #undef PLAT_x86_solaris
 #undef PLAT_amd64_solaris
 
@@ -50,6 +51,8 @@
 #  define PLAT_mips32_linux 1
 #elif defined(__linux__) && defined(__nanomips__)
 #  define PLAT_nanomips_linux 1
+#elif defined(__linux__) && defined(__riscv) && (__riscv_xlen == 64)
+#  define PLAT_riscv64_linux 1
 #elif defined(__sun__) && defined(__i386__)
 #  define PLAT_x86_solaris 1
 #elif defined(__sun__) && defined(__x86_64__)
@@ -134,6 +137,13 @@
       : /*out*/ : /*in*/ "r"(&(_lval))              \
       : /*trash*/ "$t0", "$t1", "memory"            \
    )
+#elif defined(PLAT_riscv64_linux)
+#  define INC(_lval,_lqual)                         \
+     __asm__ __volatile__ (                         \
+      "        amoadd.w zero, %1, (%0)\n"           \
+      : /*out*/ : /*in*/ "r"(&(_lval)), "r"(1)      \
+      : /*trash*/ "memory"                          \
+   )
 #else
 #  error "Fix Me for this platform"
 #endif
diff --git a/helgrind/tests/tc08_hbl2.c b/helgrind/tests/tc08_hbl2.c
index 6a8543fa1..be2b78b01 100644
--- a/helgrind/tests/tc08_hbl2.c
+++ b/helgrind/tests/tc08_hbl2.c
@@ -36,6 +36,7 @@
 #undef PLAT_s390x_linux
 #undef PLAT_mips32_linux
 #undef PLAT_mips64_linux
+#undef PLAT_riscv64_linux
 #undef PLAT_x86_solaris
 #undef PLAT_amd64_solaris
 
@@ -71,6 +72,8 @@
 #endif
 #elif defined(__linux__) && defined(__nanomips__)
 #  define PLAT_nanomips_linux 1
+#elif defined(__linux__) && defined(__riscv) && (__riscv_xlen == 64)
+#  define PLAT_riscv64_linux 1
 #elif defined(__sun__) && defined(__i386__)
 #  define PLAT_x86_solaris 1
 #elif defined(__sun__) && defined(__x86_64__)
@@ -154,6 +157,13 @@
       : /*out*/ : /*in*/ "r"(&(_lval))              \
       : /*trash*/ "$t0", "$t1", "memory"            \
    )
+#elif defined(PLAT_riscv64_linux)
+#  define INC(_lval,_lqual)                         \
+     __asm__ __volatile__ (                         \
+      "        amoadd.w zero, %1, (%0)\n"           \
+      : /*out*/ : /*in*/ "r"(&(_lval)), "r"(1)      \
+      : /*trash*/ "memory"                          \
+   )
 #else
 #  error "Fix Me for this platform"
 #endif
diff --git a/helgrind/tests/tc11_XCHG.c b/helgrind/tests/tc11_XCHG.c
index cc00ba38f..e92b671b7 100644
--- a/helgrind/tests/tc11_XCHG.c
+++ b/helgrind/tests/tc11_XCHG.c
@@ -21,6 +21,7 @@
 #undef PLAT_arm_linux
 #undef PLAT_s390x_linux
 #undef PLAT_mips32_linux
+#undef PLAT_riscv64_linux
 #undef PLAT_x86_solaris
 #undef PLAT_amd64_solaris
 
@@ -52,6 +53,8 @@
 #  define PLAT_mips32_linux 1
 #elif defined(__linux__) && defined(__nanomips__)
 #  define PLAT_nanomips_linux 1
+#elif defined(__linux__) && defined(__riscv) && (__riscv_xlen == 64)
+#  define PLAT_riscv64_linux 1
 #elif defined(__sun__) && defined(__i386__)
 #  define PLAT_x86_solaris 1
 #elif defined(__sun__) && defined(__x86_64__)
@@ -128,7 +131,8 @@
 
 #elif defined(PLAT_ppc32_linux) || defined(PLAT_ppc64_linux) \
       || defined(PLAT_arm_linux) || defined(PLAT_arm64_linux) \
-      || defined(PLAT_arm64_freebsd)
+      || defined(PLAT_arm64_freebsd) \
+      || defined(PLAT_riscv64_linux)
 #  if defined(HAVE_BUILTIN_ATOMIC)
 #    define XCHG_M_R(_addr,_lval)                                           \
         do {                                                                \
diff --git a/include/Makefile.am b/include/Makefile.am
index 8012d7374..10d6b6016 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -64,6 +64,7 @@ nobase_pkginclude_HEADERS = \
 	vki/vki-posixtypes-mips32-linux.h \
 	vki/vki-posixtypes-mips64-linux.h \
 	vki/vki-posixtypes-nanomips-linux.h \
+	vki/vki-posixtypes-riscv64-linux.h \
 	vki/vki-amd64-linux.h		\
 	vki/vki-arm64-linux.h		\
 	vki/vki-ppc32-linux.h		\
@@ -77,6 +78,7 @@ nobase_pkginclude_HEADERS = \
 	vki/vki-mips32-linux.h		\
 	vki/vki-mips64-linux.h		\
 	vki/vki-nanomips-linux.h	\
+	vki/vki-riscv64-linux.h		\
 	vki/vki-scnums-amd64-linux.h	\
 	vki/vki-scnums-arm64-linux.h	\
 	vki/vki-scnums-ppc32-linux.h	\
@@ -88,6 +90,7 @@ nobase_pkginclude_HEADERS = \
 	vki/vki-scnums-mips32-linux.h	\
 	vki/vki-scnums-mips64-linux.h	\
 	vki/vki-scnums-nanomips-linux.h	\
+	vki/vki-scnums-riscv64-linux.h	\
 	vki/vki-scnums-darwin.h         \
 	vki/vki-scnums-solaris.h	\
 	vki/vki-scnums-shared-linux.h	\
diff --git a/include/pub_tool_basics.h b/include/pub_tool_basics.h
index bf5a5ba7f..155454bff 100644
--- a/include/pub_tool_basics.h
+++ b/include/pub_tool_basics.h
@@ -437,7 +437,8 @@ static inline Bool sr_EQ ( UInt sysno, SysRes sr1, SysRes sr2 ) {
 
 #if defined(VGA_x86) || defined(VGA_amd64) || defined (VGA_arm) \
     || ((defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)) \
-    && defined (_MIPSEL)) || defined(VGA_arm64)  || defined(VGA_ppc64le)
+    && defined (_MIPSEL)) || defined(VGA_arm64) || defined(VGA_ppc64le) \
+    || defined(VGA_riscv64)
 #  define VG_LITTLEENDIAN 1
 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_s390x) \
       || ((defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)) \
@@ -485,7 +486,8 @@ static inline Bool sr_EQ ( UInt sysno, SysRes sr1, SysRes sr2 ) {
       || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
       || defined(VGA_arm) || defined(VGA_s390x) \
       || defined(VGA_mips32) || defined(VGA_mips64) \
-      || defined(VGA_arm64) || defined(VGA_nanomips)
+      || defined(VGA_arm64) || defined(VGA_nanomips) \
+      || defined(VGA_riscv64)
 #  define VG_REGPARM(n)            /* */
 #else
 #  error Unknown arch
diff --git a/include/pub_tool_guest.h b/include/pub_tool_guest.h
index 08a72efac..9e5c0c24a 100644
--- a/include/pub_tool_guest.h
+++ b/include/pub_tool_guest.h
@@ -62,6 +62,9 @@
 #elif defined(VGA_mips64)
 #  include "libvex_guest_mips64.h"
    typedef VexGuestMIPS64State VexGuestArchState;
+#elif defined(VGA_riscv64)
+#  include "libvex_guest_riscv64.h"
+   typedef VexGuestRISCV64State VexGuestArchState;
 #else
 #  error Unknown arch
 #endif
diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h
index f46207257..933dc8e12 100644
--- a/include/pub_tool_machine.h
+++ b/include/pub_tool_machine.h
@@ -108,6 +108,12 @@
 #  define VG_CLREQ_SZB             20
 #  define VG_STACK_REDZONE_SZB      0
 
+#elif defined(VGP_riscv64_linux)
+#  define VG_MIN_INSTR_SZB          2
+#  define VG_MAX_INSTR_SZB          4
+#  define VG_CLREQ_SZB             20
+#  define VG_STACK_REDZONE_SZB      0
+
 #else
 #  error Unknown platform
 #endif
diff --git a/include/pub_tool_redir.h b/include/pub_tool_redir.h
index f88d3b571..788db2129 100644
--- a/include/pub_tool_redir.h
+++ b/include/pub_tool_redir.h
@@ -321,6 +321,8 @@
 
 #define  VG_U_LD_LINUX_MIPSN8_S0_1  "ld-linux-mipsn8.so.1"
 
+#define  VG_U_LD_LINUX_RISCV64_SO_1 "ld-linux-riscv64-lp64d.so.1"
+
 #endif
 
 /* --- Sonames for FreeBSD ELF linkers, plus unencoded versions. --- */
diff --git a/include/pub_tool_vkiscnums_asm.h b/include/pub_tool_vkiscnums_asm.h
index 8a26f834a..9303284fe 100644
--- a/include/pub_tool_vkiscnums_asm.h
+++ b/include/pub_tool_vkiscnums_asm.h
@@ -75,6 +75,10 @@
 #  include "vki/vki-scnums-shared-linux.h"
 #  include "vki/vki-scnums-mips64-linux.h"
 
+#elif defined(VGP_riscv64_linux)
+#  include "vki/vki-scnums-shared-linux.h"
+#  include "vki/vki-scnums-riscv64-linux.h"
+
 #elif defined(VGP_x86_freebsd) || defined(VGP_amd64_freebsd) || defined(VGP_arm64_freebsd)
 #  include "vki/vki-scnums-freebsd.h"
 
diff --git a/include/valgrind.h.in b/include/valgrind.h.in
index bc18f40f7..f1710924a 100644
--- a/include/valgrind.h.in
+++ b/include/valgrind.h.in
@@ -126,6 +126,7 @@
 #undef PLAT_mips32_linux
 #undef PLAT_mips64_linux
 #undef PLAT_nanomips_linux
+#undef PLAT_riscv64_linux
 #undef PLAT_x86_solaris
 #undef PLAT_amd64_solaris
 
@@ -172,6 +173,8 @@
 #  define PLAT_mips32_linux 1
 #elif defined(__linux__) && defined(__nanomips__)
 #  define PLAT_nanomips_linux 1
+#elif defined(__linux__) && defined(__riscv) && (__riscv_xlen == 64)
+#  define PLAT_riscv64_linux 1
 #elif defined(__sun) && defined(__i386__)
 #  define PLAT_x86_solaris 1
 #elif defined(__sun) && defined(__x86_64__)
@@ -1129,6 +1132,87 @@ typedef
  } while (0)
 
 #endif
+
+/* ----------------------- riscv64-linux ------------------------ */
+
+#if defined(PLAT_riscv64_linux)
+
+typedef
+   struct {
+      unsigned long int nraddr; /* where's the code? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+            ".option push\n\t"                                    \
+            ".option norvc\n\t"                                   \
+            "srli zero, zero, 3\n\t"                              \
+            "srli zero, zero, 13\n\t"                             \
+            "srli zero, zero, 51\n\t"                             \
+            "srli zero, zero, 61\n\t"
+
+#define __SPECIAL_INSTRUCTION_POSTAMBLE                           \
+            ".option pop\n\t"                                     \
+
+#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
+        _zzq_default, _zzq_request,                               \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                  \
+  __extension__                                                   \
+  ({volatile unsigned long int  _zzq_args[6];                     \
+    volatile unsigned long int  _zzq_result;                      \
+    _zzq_args[0] = (unsigned long int)(_zzq_request);             \
+    _zzq_args[1] = (unsigned long int)(_zzq_arg1);                \
+    _zzq_args[2] = (unsigned long int)(_zzq_arg2);                \
+    _zzq_args[3] = (unsigned long int)(_zzq_arg3);                \
+    _zzq_args[4] = (unsigned long int)(_zzq_arg4);                \
+    _zzq_args[5] = (unsigned long int)(_zzq_arg5);                \
+    __asm__ volatile("mv a3, %1\n\t" /*default*/                  \
+                     "mv a4, %2\n\t" /*ptr*/                      \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* a3 = client_request ( a4 ) */             \
+                     "or a0, a0, a0\n\t"                          \
+                     __SPECIAL_INSTRUCTION_POSTAMBLE              \
+                     "mv %0, a3"     /*result*/                   \
+                     : "=r" (_zzq_result)                         \
+                     : "r" ((unsigned long int)(_zzq_default)),   \
+                       "r" (&_zzq_args[0])                        \
+                     : "memory", "a3", "a4");                     \
+    _zzq_result;                                                  \
+  })
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    unsigned long int __addr;                                     \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* a3 = guest_NRADDR */                      \
+                     "or a1, a1, a1\n\t"                          \
+                     __SPECIAL_INSTRUCTION_POSTAMBLE              \
+                     "mv %0, a3"                                  \
+                     : "=r" (__addr)                              \
+                     :                                            \
+                     : "memory", "a3"                             \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+  }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                    \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* branch-and-link-to-noredir t0 */          \
+                     "or a2, a2, a2\n\t"                          \
+                     __SPECIAL_INSTRUCTION_POSTAMBLE
+
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "or a3, a3, a3\n\t"                         \
+                     __SPECIAL_INSTRUCTION_POSTAMBLE             \
+                     : : : "memory"                              \
+                    );                                           \
+ } while (0)
+
+#endif /* PLAT_riscv64_linux */
+
 /* Insert assembly code for other platforms here... */
 
 #endif /* NVALGRIND */
@@ -6606,6 +6690,456 @@ typedef
 
 #endif /* PLAT_mips64_linux */
 
+/* ----------------------- riscv64-linux ----------------------- */
+
+#if defined(PLAT_riscv64_linux)
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+     "ra",                                                        \
+     "t0", "t1", "t2", "t3", "t4", "t5", "t6",                    \
+     "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",              \
+     "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7",      \
+     "ft8", "ft9", "ft10", "ft11",                                \
+     "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7"
+
+/* s11 is callee-saved, so we can use it to save and restore sp around
+   the hidden call. */
+#define VALGRIND_ALIGN_STACK               \
+      "mv s11, sp\n\t"                     \
+      "andi sp, sp, 0xfffffffffffffff0\n\t"
+#define VALGRIND_RESTORE_STACK             \
+      "mv sp, s11\n\t"
+
+/* These CALL_FN_ macros assume that on riscv64-linux,
+   sizeof(unsigned long) == 8. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[1];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0\n"                                            \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[2];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0\n"                                            \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0\n"                                            \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[4];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0\n"                                            \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[5];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[6];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld a4, 40(%1) \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[7];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld a4, 40(%1) \n\t"                                     \
+         "ld a5, 48(%1) \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[8];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld a4, 40(%1) \n\t"                                     \
+         "ld a5, 48(%1) \n\t"                                     \
+         "ld a6, 56(%1) \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[9];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld a4, 40(%1) \n\t"                                     \
+         "ld a5, 48(%1) \n\t"                                     \
+         "ld a6, 56(%1) \n\t"                                     \
+         "ld a7, 64(%1) \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[10];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "addi sp, sp, -16 \n\t"                                  \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld a4, 40(%1) \n\t"                                     \
+         "ld a5, 48(%1) \n\t"                                     \
+         "ld a6, 56(%1) \n\t"                                     \
+         "ld a7, 64(%1) \n\t"                                     \
+         "ld t0, 72(%1) \n\t"                                     \
+         "sd t0, 0(sp)  \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[11];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "addi sp, sp, -16 \n\t"                                  \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld a4, 40(%1) \n\t"                                     \
+         "ld a5, 48(%1) \n\t"                                     \
+         "ld a6, 56(%1) \n\t"                                     \
+         "ld a7, 64(%1) \n\t"                                     \
+         "ld t0, 72(%1) \n\t"                                     \
+         "sd t0, 0(sp)  \n\t"                                     \
+         "ld t0, 80(%1) \n\t"                                     \
+         "sd t0, 8(sp)  \n\t"                                     \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11)     \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[12];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "addi sp, sp, -32 \n\t"                                  \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld a4, 40(%1) \n\t"                                     \
+         "ld a5, 48(%1) \n\t"                                     \
+         "ld a6, 56(%1) \n\t"                                     \
+         "ld a7, 64(%1) \n\t"                                     \
+         "ld t0, 72(%1) \n\t"                                     \
+         "sd t0, 0(sp)  \n\t"                                     \
+         "ld t0, 80(%1) \n\t"                                     \
+         "sd t0, 8(sp)  \n\t"                                     \
+         "ld t0, 88(%1) \n\t"                                     \
+         "sd t0, 16(sp)  \n\t"                                    \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11,     \
+                                  arg12)                          \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[13];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      _argvec[12] = (unsigned long)(arg12);                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "addi sp, sp, -32 \n\t"                                  \
+         "ld a0, 8(%1) \n\t"                                      \
+         "ld a1, 16(%1) \n\t"                                     \
+         "ld a2, 24(%1) \n\t"                                     \
+         "ld a3, 32(%1) \n\t"                                     \
+         "ld a4, 40(%1) \n\t"                                     \
+         "ld a5, 48(%1) \n\t"                                     \
+         "ld a6, 56(%1) \n\t"                                     \
+         "ld a7, 64(%1) \n\t"                                     \
+         "ld t0, 72(%1) \n\t"                                     \
+         "sd t0, 0(sp)  \n\t"                                     \
+         "ld t0, 80(%1) \n\t"                                     \
+         "sd t0, 8(sp)  \n\t"                                     \
+         "ld t0, 88(%1) \n\t"                                     \
+         "sd t0, 16(sp)  \n\t"                                    \
+         "ld t0, 96(%1) \n\t"                                     \
+         "sd t0, 24(sp)  \n\t"                                    \
+         "ld t0, 0(%1) \n\t"  /* target->t0 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_T0                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mv %0, a0"                                              \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS, "s11"         \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_riscv64_linux */
+
 /* ------------------------------------------------------------------ */
 /* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS.               */
 /*                                                                    */
@@ -7162,6 +7696,7 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
 #undef PLAT_mips32_linux
 #undef PLAT_mips64_linux
 #undef PLAT_nanomips_linux
+#undef PLAT_riscv64_linux
 #undef PLAT_x86_solaris
 #undef PLAT_amd64_solaris
 
diff --git a/include/vki/vki-linux.h b/include/vki/vki-linux.h
index be3d76690..bb626dbff 100644
--- a/include/vki/vki-linux.h
+++ b/include/vki/vki-linux.h
@@ -97,6 +97,8 @@
 #  include "vki-posixtypes-mips64-linux.h"
 #elif defined(VGA_nanomips)
 #  include "vki-posixtypes-nanomips-linux.h"
+#elif defined(VGA_riscv64)
+#  include "vki-posixtypes-riscv64-linux.h"
 #else
 #  error Unknown platform
 #endif
@@ -225,6 +227,8 @@ typedef unsigned int	        vki_uint;
 #  include "vki-mips64-linux.h"
 #elif defined(VGA_nanomips)
 #  include "vki-nanomips-linux.h"
+#elif defined(VGA_riscv64)
+#  include "vki-riscv64-linux.h"
 #else
 #  error Unknown platform
 #endif
diff --git a/include/vki/vki-posixtypes-riscv64-linux.h b/include/vki/vki-posixtypes-riscv64-linux.h
new file mode 100644
index 000000000..b8dfc9ce7
--- /dev/null
+++ b/include/vki/vki-posixtypes-riscv64-linux.h
@@ -0,0 +1,66 @@
+
+/*--------------------------------------------------------------------*/
+/*--- riscv64/Linux-specific kernel interface: posix types.        ---*/
+/*---                               vki-posixtypes-riscv64-linux.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VKI_POSIXTYPES_RISCV64_LINUX_H
+#define __VKI_POSIXTYPES_RISCV64_LINUX_H
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/posix_types.h
+//----------------------------------------------------------------------
+
+typedef unsigned int	__vki_kernel_mode_t;
+typedef long		__vki_kernel_off_t;
+typedef int		__vki_kernel_pid_t;
+typedef int		__vki_kernel_ipc_pid_t;
+typedef unsigned int	__vki_kernel_uid_t;
+typedef unsigned int	__vki_kernel_gid_t;
+typedef unsigned long	__vki_kernel_size_t;
+typedef long		__vki_kernel_time_t;
+typedef long		__vki_kernel_suseconds_t;
+typedef long		__vki_kernel_clock_t;
+typedef int		__vki_kernel_timer_t;
+typedef int		__vki_kernel_clockid_t;
+typedef char *		__vki_kernel_caddr_t;
+typedef unsigned int	__vki_kernel_uid32_t;
+typedef unsigned int	__vki_kernel_gid32_t;
+
+typedef unsigned int	__vki_kernel_old_uid_t;
+typedef unsigned int	__vki_kernel_old_gid_t;
+
+typedef long long	__vki_kernel_loff_t;
+
+typedef struct {
+	int	val[2];
+} __vki_kernel_fsid_t;
+
+#endif // __VKI_POSIXTYPES_RISCV64_LINUX_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                           vki-posixtypes-riscv64-linux.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/vki/vki-riscv64-linux.h b/include/vki/vki-riscv64-linux.h
new file mode 100644
index 000000000..3b206a2d8
--- /dev/null
+++ b/include/vki/vki-riscv64-linux.h
@@ -0,0 +1,635 @@
+
+/*--------------------------------------------------------------------*/
+/*--- riscv64/Linux-specific kernel interface. vki-riscv64-linux.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VKI_RISCV64_LINUX_H
+#define __VKI_RISCV64_LINUX_H
+
+// riscv64 is little-endian.
+#define VKI_LITTLE_ENDIAN  1
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/int-ll64.h
+//----------------------------------------------------------------------
+
+typedef unsigned char __vki_u8;
+
+typedef __signed__ short __vki_s16;
+typedef unsigned short __vki_u16;
+
+typedef __signed__ int __vki_s32;
+typedef unsigned int __vki_u32;
+
+typedef __signed__ long long __vki_s64;
+typedef unsigned long long __vki_u64;
+
+typedef unsigned short vki_u16;
+
+typedef unsigned int vki_u32;
+
+//----------------------------------------------------------------------
+// From linux-6.0/arch/riscv/include/asm/page.h
+//----------------------------------------------------------------------
+
+#define VKI_PAGE_SHIFT	(12)
+#define VKI_PAGE_SIZE	(1UL << VKI_PAGE_SHIFT)
+#define VKI_MAX_PAGE_SHIFT	VKI_PAGE_SHIFT
+#define VKI_MAX_PAGE_SIZE	VKI_PAGE_SIZE
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/asm-generic/shmparam.h
+//----------------------------------------------------------------------
+
+#define VKI_SHMLBA VKI_PAGE_SIZE	/* attach addr a multiple of this */
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/signal-defs.h
+//----------------------------------------------------------------------
+
+#define VKI_SIG_BLOCK          0	/* for blocking signals */
+#define VKI_SIG_UNBLOCK        1	/* for unblocking signals */
+#define VKI_SIG_SETMASK        2	/* for setting the signal mask */
+
+typedef void __vki_signalfn_t(int);
+typedef __vki_signalfn_t __user *__vki_sighandler_t;
+
+#define VKI_SIG_DFL	((__vki_sighandler_t)0)	/* default signal handling */
+#define VKI_SIG_IGN	((__vki_sighandler_t)1)	/* ignore signal */
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/signal.h
+//----------------------------------------------------------------------
+
+#define _VKI_NSIG	64
+#define _VKI_NSIG_BPW	64
+#define _VKI_NSIG_WORDS	(_VKI_NSIG / _VKI_NSIG_BPW)
+
+typedef unsigned long vki_old_sigset_t;
+
+typedef struct {
+	unsigned long sig[_VKI_NSIG_WORDS];
+} vki_sigset_t;
+
+#define VKI_SIGHUP		 1
+#define VKI_SIGINT		 2
+#define VKI_SIGQUIT		 3
+#define VKI_SIGILL		 4
+#define VKI_SIGTRAP		 5
+#define VKI_SIGABRT		 6
+#define VKI_SIGBUS		 7
+#define VKI_SIGFPE		 8
+#define VKI_SIGKILL		 9
+#define VKI_SIGUSR1		10
+#define VKI_SIGSEGV		11
+#define VKI_SIGUSR2		12
+#define VKI_SIGPIPE		13
+#define VKI_SIGALRM		14
+#define VKI_SIGTERM		15
+#define VKI_SIGSTKFLT		16
+#define VKI_SIGCHLD		17
+#define VKI_SIGCONT		18
+#define VKI_SIGSTOP		19
+#define VKI_SIGTSTP		20
+#define VKI_SIGTTIN		21
+#define VKI_SIGTTOU		22
+#define VKI_SIGURG		23
+#define VKI_SIGXCPU		24
+#define VKI_SIGXFSZ		25
+#define VKI_SIGVTALRM		26
+#define VKI_SIGPROF		27
+#define VKI_SIGWINCH		28
+#define VKI_SIGIO		29
+#define VKI_SIGPWR		30
+#define VKI_SIGSYS		31
+#define	VKI_SIGUNUSED		31
+
+#define VKI_SIGRTMIN		32
+#define VKI_SIGRTMAX		_VKI_NSIG
+
+#define VKI_SA_NOCLDSTOP	0x00000001
+#define VKI_SA_NOCLDWAIT	0x00000002
+#define VKI_SA_SIGINFO		0x00000004
+#define VKI_SA_ONSTACK		0x08000000
+#define VKI_SA_RESTART		0x10000000
+#define VKI_SA_NODEFER		0x40000000
+#define VKI_SA_RESETHAND	0x80000000
+
+#define VKI_SA_NOMASK	VKI_SA_NODEFER
+#define VKI_SA_ONESHOT	VKI_SA_RESETHAND
+
+#define VKI_MINSIGSTKSZ	2048
+
+struct vki_sigaction_base {
+	__vki_sighandler_t ksa_handler;
+	unsigned long sa_flags;
+	vki_sigset_t sa_mask;
+};
+
+/* On Linux we use the same type for passing sigactions to
+   and from the kernel.  Hence: */
+typedef  struct vki_sigaction_base  vki_sigaction_toK_t;
+typedef  struct vki_sigaction_base  vki_sigaction_fromK_t;
+
+typedef struct vki_sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	vki_size_t ss_size;
+} vki_stack_t;
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/linux/signal.h
+//----------------------------------------------------------------------
+
+#define VKI_SS_ONSTACK	1
+#define VKI_SS_DISABLE	2
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/mman-common.h
+//----------------------------------------------------------------------
+
+#define VKI_PROT_READ	0x1		/* page can be read */
+#define VKI_PROT_WRITE	0x2		/* page can be written */
+#define VKI_PROT_EXEC	0x4		/* page can be executed */
+#define VKI_PROT_NONE	0x0		/* page can not be accessed */
+#define VKI_PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define VKI_PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define VKI_MAP_FIXED	0x10		/* Interpret addr exactly */
+#define VKI_MAP_ANONYMOUS	0x20	/* don't use a file */
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/mman.h
+//----------------------------------------------------------------------
+
+#define VKI_MAP_NORESERVE       0x4000  /* don't check for reservations */
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/linux/mman.h
+//----------------------------------------------------------------------
+
+#define VKI_MAP_SHARED	0x01		/* Share changes */
+#define VKI_MAP_PRIVATE	0x02		/* Changes are private */
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/fcntl.h
+//----------------------------------------------------------------------
+
+#define VKI_O_ACCMODE	00000003
+#define VKI_O_RDONLY	00000000
+#define VKI_O_WRONLY	00000001
+#define VKI_O_RDWR	00000002
+#define VKI_O_CREAT	00000100	/* not fcntl */
+#define VKI_O_EXCL	00000200	/* not fcntl */
+#define VKI_O_TRUNC	00001000	/* not fcntl */
+#define VKI_O_APPEND	00002000
+#define VKI_O_NONBLOCK	00004000
+#define VKI_O_DIRECT	00040000	/* direct disk access hint */
+#define VKI_O_LARGEFILE	00100000
+
+#define VKI_F_DUPFD		0	/* dup */
+#define VKI_F_GETFD		1	/* get close_on_exec */
+#define VKI_F_SETFD		2	/* set/clear close_on_exec */
+#define VKI_F_GETFL		3	/* get file->f_flags */
+#define VKI_F_SETFL		4	/* set file->f_flags */
+#define VKI_F_GETLK		5
+#define VKI_F_SETLK		6
+#define VKI_F_SETLKW		7
+
+#define VKI_F_SETOWN		8	/*  for sockets. */
+#define VKI_F_GETOWN		9	/*  for sockets. */
+#define VKI_F_SETSIG		10	/*  for sockets. */
+#define VKI_F_GETSIG		11	/*  for sockets. */
+
+#define VKI_F_SETOWN_EX		15
+#define VKI_F_GETOWN_EX		16
+
+#define VKI_F_OFD_GETLK		36
+#define VKI_F_OFD_SETLK		37
+#define VKI_F_OFD_SETLKW	38
+
+struct vki_f_owner_ex {
+	int	type;
+	__vki_kernel_pid_t	pid;
+};
+
+#define VKI_FD_CLOEXEC	1	/* actually anything with low bit set goes */
+
+#define VKI_F_LINUX_SPECIFIC_BASE	1024
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/linux/fcntl.h
+//----------------------------------------------------------------------
+
+#define VKI_AT_FDCWD		-100
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/resource.h
+//----------------------------------------------------------------------
+
+#define VKI_RLIMIT_DATA		2	/* max data size */
+#define VKI_RLIMIT_STACK	3	/* max stack size */
+#define VKI_RLIMIT_CORE		4	/* max core file size */
+#define VKI_RLIMIT_NOFILE	7	/* max number of open files */
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/socket.h
+//----------------------------------------------------------------------
+
+#define VKI_SOL_SOCKET	1
+
+#define VKI_SO_TYPE	3
+
+#define VKI_SO_ATTACH_FILTER	26
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/sockios.h
+//----------------------------------------------------------------------
+
+#define VKI_SIOCSPGRP		0x8902
+#define VKI_SIOCGPGRP		0x8904
+#define VKI_SIOCATMARK		0x8905
+#define VKI_SIOCGSTAMP		0x8906		/* Get stamp (timeval) */
+#define VKI_SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/stat.h
+//----------------------------------------------------------------------
+
+struct vki_stat {
+	unsigned long	st_dev;		/* Device.  */
+	unsigned long	st_ino;		/* File serial number.  */
+	unsigned int	st_mode;	/* File mode.  */
+	unsigned int	st_nlink;	/* Link count.  */
+	unsigned int	st_uid;		/* User ID of the file's owner.  */
+	unsigned int	st_gid;		/* Group ID of the file's group. */
+	unsigned long	st_rdev;	/* Device number, if device.  */
+	unsigned long	__pad1;
+	long		st_size;	/* Size of file, in bytes.  */
+	int		st_blksize;	/* Optimal block size for I/O.  */
+	int		__pad2;
+	long		st_blocks;	/* Number 512-byte blocks allocated. */
+	long		st_atime;	/* Time of last access.  */
+	unsigned long	st_atime_nsec;
+	long		st_mtime;	/* Time of last modification.  */
+	unsigned long	st_mtime_nsec;
+	long		st_ctime;	/* Time of last status change.  */
+	unsigned long	st_ctime_nsec;
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/statfs.h
+//----------------------------------------------------------------------
+
+struct vki_statfs {
+	long f_type;
+	long f_bsize;
+	long f_blocks;
+	long f_bfree;
+	long f_bavail;
+	long f_files;
+	long f_ffree;
+	__vki_kernel_fsid_t f_fsid;
+	long f_namelen;
+	long f_frsize;
+	long f_flags;
+	long f_spare[4];
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/termios.h
+//----------------------------------------------------------------------
+
+struct vki_winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define VKI_NCC 8
+struct vki_termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[VKI_NCC];	/* control characters */
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/termbits.h
+//----------------------------------------------------------------------
+
+typedef unsigned char	vki_cc_t;
+typedef unsigned int	vki_tcflag_t;
+
+#define VKI_NCCS 19
+struct vki_termios {
+	vki_tcflag_t c_iflag;		/* input mode flags */
+	vki_tcflag_t c_oflag;		/* output mode flags */
+	vki_tcflag_t c_cflag;		/* control mode flags */
+	vki_tcflag_t c_lflag;		/* local mode flags */
+	vki_cc_t c_line;		/* line discipline */
+	vki_cc_t c_cc[VKI_NCCS];	/* control characters */
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/ioctl.h
+//----------------------------------------------------------------------
+
+#define _VKI_IOC_NRBITS		8
+#define _VKI_IOC_TYPEBITS	8
+#define _VKI_IOC_SIZEBITS	14
+#define _VKI_IOC_DIRBITS	2
+
+#define _VKI_IOC_SIZEMASK	((1 << _VKI_IOC_SIZEBITS)-1)
+#define _VKI_IOC_DIRMASK	((1 << _VKI_IOC_DIRBITS)-1)
+
+#define _VKI_IOC_NRSHIFT	0
+#define _VKI_IOC_TYPESHIFT	(_VKI_IOC_NRSHIFT+_VKI_IOC_NRBITS)
+#define _VKI_IOC_SIZESHIFT	(_VKI_IOC_TYPESHIFT+_VKI_IOC_TYPEBITS)
+#define _VKI_IOC_DIRSHIFT	(_VKI_IOC_SIZESHIFT+_VKI_IOC_SIZEBITS)
+
+#define _VKI_IOC_NONE	0U
+#define _VKI_IOC_WRITE	1U
+#define _VKI_IOC_READ	2U
+
+#define _VKI_IOC(dir,type,nr,size) \
+	(((dir)  << _VKI_IOC_DIRSHIFT) | \
+	 ((type) << _VKI_IOC_TYPESHIFT) | \
+	 ((nr)   << _VKI_IOC_NRSHIFT) | \
+	 ((size) << _VKI_IOC_SIZESHIFT))
+
+#define _VKI_IO(type,nr)	_VKI_IOC(_VKI_IOC_NONE,(type),(nr),0)
+#define _VKI_IOR(type,nr,size)	_VKI_IOC(_VKI_IOC_READ,(type),(nr),sizeof(size))
+#define _VKI_IOW(type,nr,size)	_VKI_IOC(_VKI_IOC_WRITE,(type),(nr),sizeof(size))
+#define _VKI_IOWR(type,nr,size)	_VKI_IOC(_VKI_IOC_READ|_VKI_IOC_WRITE,(type),(nr),sizeof(size))
+
+#define _VKI_IOC_DIR(nr)		(((nr) >> _VKI_IOC_DIRSHIFT) & _VKI_IOC_DIRMASK)
+#define _VKI_IOC_SIZE(nr)		(((nr) >> _VKI_IOC_SIZESHIFT) & _VKI_IOC_SIZEMASK)
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/ioctls.h
+//----------------------------------------------------------------------
+
+#define VKI_TCGETS	0x5401
+#define VKI_TCSETS	0x5402
+#define VKI_TCSETSW	0x5403
+#define VKI_TCSETSF	0x5404
+#define VKI_TCGETA	0x5405
+#define VKI_TCSETA	0x5406
+#define VKI_TCSETAW	0x5407
+#define VKI_TCSETAF	0x5408
+#define VKI_TCSBRK	0x5409
+#define VKI_TCXONC	0x540A
+#define VKI_TCFLSH	0x540B
+#define VKI_TIOCSCTTY	0x540E
+#define VKI_TIOCGPGRP	0x540F
+#define VKI_TIOCSPGRP	0x5410
+#define VKI_TIOCOUTQ	0x5411
+#define VKI_TIOCGWINSZ	0x5413
+#define VKI_TIOCSWINSZ	0x5414
+#define VKI_TIOCMGET	0x5415
+#define VKI_TIOCMBIS	0x5416
+#define VKI_TIOCMBIC	0x5417
+#define VKI_TIOCMSET	0x5418
+#define VKI_FIONREAD	0x541B
+#define VKI_TIOCLINUX	0x541C
+#define VKI_TIOCGSERIAL	0x541E
+#define VKI_TIOCSSERIAL	0x541F
+#define VKI_FIONBIO	0x5421
+#define VKI_TIOCNOTTY	0x5422
+#define VKI_TCSBRKP	0x5425	/* Needed for POSIX tcsendbreak() */
+#define VKI_TIOCGPTN	_VKI_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define VKI_TIOCSPTLCK	_VKI_IOW('T',0x31, int) /* Lock/unlock Pty */
+
+#define VKI_FIONCLEX    0x5450
+#define VKI_FIOCLEX     0x5451
+#define VKI_FIOASYNC	0x5452
+#define VKI_TIOCSERGETLSR   0x5459 /* Get line status register */
+
+#define VKI_TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/poll.h
+//----------------------------------------------------------------------
+
+#define VKI_POLLIN		0x0001
+
+struct vki_pollfd {
+	int fd;
+	short events;
+	short revents;
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/arch/riscv/include/uapi/asm/ptrace.h
+//----------------------------------------------------------------------
+
+struct vki_user_regs_struct {
+	unsigned long pc;
+	unsigned long ra;
+	unsigned long sp;
+	unsigned long gp;
+	unsigned long tp;
+	unsigned long t0;
+	unsigned long t1;
+	unsigned long t2;
+	unsigned long s0;
+	unsigned long s1;
+	unsigned long a0;
+	unsigned long a1;
+	unsigned long a2;
+	unsigned long a3;
+	unsigned long a4;
+	unsigned long a5;
+	unsigned long a6;
+	unsigned long a7;
+	unsigned long s2;
+	unsigned long s3;
+	unsigned long s4;
+	unsigned long s5;
+	unsigned long s6;
+	unsigned long s7;
+	unsigned long s8;
+	unsigned long s9;
+	unsigned long s10;
+	unsigned long s11;
+	unsigned long t3;
+	unsigned long t4;
+	unsigned long t5;
+	unsigned long t6;
+};
+
+struct __vki_riscv_f_ext_state {
+	__vki_u32 f[32];
+	__vki_u32 fcsr;
+};
+
+struct __vki_riscv_d_ext_state {
+	__vki_u64 f[32];
+	__vki_u32 fcsr;
+};
+
+struct __vki_riscv_q_ext_state {
+	__vki_u64 f[64] __attribute__((aligned(16)));
+	__vki_u32 fcsr;
+	__vki_u32 reserved[3];
+};
+
+union __vki_riscv_fp_state {
+	struct __vki_riscv_f_ext_state f;
+	struct __vki_riscv_d_ext_state d;
+	struct __vki_riscv_q_ext_state q;
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/arch/riscv/include/uapi/asm/sigcontext.h
+//----------------------------------------------------------------------
+
+struct vki_sigcontext {
+	struct vki_user_regs_struct sc_regs;
+	union __vki_riscv_fp_state sc_fpregs;
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/arch/riscv/include/uapi/asm/elf.h
+//----------------------------------------------------------------------
+
+typedef unsigned long vki_elf_greg_t;
+typedef struct vki_user_regs_struct vki_elf_gregset_t;
+#define VKI_ELF_NGREG (sizeof (struct vki_elf_gregset_t) / sizeof(vki_elf_greg_t))
+
+typedef union __vki_riscv_fp_state vki_elf_fpregset_t;
+
+//----------------------------------------------------------------------
+// From linux-6.0/arch/riscv/include/uapi/asm/ucontext.h
+//----------------------------------------------------------------------
+
+struct vki_ucontext {
+	unsigned long		uc_flags;
+	struct vki_ucontext	*uc_link;
+	vki_stack_t		uc_stack;
+	vki_sigset_t		uc_sigmask;
+	__vki_u8		__unused[1024 / 8 - sizeof(vki_sigset_t)];
+	struct vki_sigcontext	uc_mcontext;
+};
+
+typedef char vki_modify_ldt_t;
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/ipcbuf.h
+//----------------------------------------------------------------------
+
+struct vki_ipc64_perm {
+	__vki_kernel_key_t	key;
+	__vki_kernel_uid32_t	uid;
+	__vki_kernel_gid32_t	gid;
+	__vki_kernel_uid32_t	cuid;
+	__vki_kernel_gid32_t	cgid;
+	__vki_kernel_mode_t	mode;
+        unsigned char           __pad1[4 - sizeof(__vki_kernel_mode_t)];
+	unsigned short		seq;
+	unsigned short		__pad2;
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/sembuf.h
+//----------------------------------------------------------------------
+
+struct vki_semid64_ds {
+	struct vki_ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+	__vki_kernel_time_t	sem_otime;		/* last semop time */
+	__vki_kernel_time_t	sem_ctime;		/* last change time */
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/msgbuf.h
+//----------------------------------------------------------------------
+
+struct vki_msqid64_ds {
+	struct vki_ipc64_perm msg_perm;
+	__vki_kernel_time_t msg_stime;	/* last msgsnd time */
+	__vki_kernel_time_t msg_rtime;	/* last msgrcv time */
+	__vki_kernel_time_t msg_ctime;	/* last change time */
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__vki_kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__vki_kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/shmbuf.h
+//----------------------------------------------------------------------
+
+struct vki_shmid64_ds {
+	struct vki_ipc64_perm	shm_perm;	/* operation perms */
+	vki_size_t		shm_segsz;	/* size of segment (bytes) */
+	__vki_kernel_time_t	shm_atime;	/* last attach time */
+	__vki_kernel_time_t	shm_dtime;	/* last detach time */
+	__vki_kernel_time_t	shm_ctime;	/* last change time */
+	__vki_kernel_pid_t	shm_cpid;	/* pid of creator */
+	__vki_kernel_pid_t	shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+struct vki_shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+//----------------------------------------------------------------------
+// From linux-6.0/include/uapi/asm-generic/errno.h
+//----------------------------------------------------------------------
+
+#define	VKI_ENOSYS		38	/* Invalid system call number */
+#define	VKI_EOVERFLOW		75	/* Value too large for defined data type */
+
+#endif // __VKI_RISCV64_LINUX_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      vki-riscv64-linux.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/vki/vki-scnums-riscv64-linux.h b/include/vki/vki-scnums-riscv64-linux.h
new file mode 100644
index 000000000..15ba9308d
--- /dev/null
+++ b/include/vki/vki-scnums-riscv64-linux.h
@@ -0,0 +1,329 @@
+
+/*--------------------------------------------------------------------*/
+/*--- System call numbers for riscv64-linux.                       ---*/
+/*---                                   vki-scnums-riscv64-linux.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VKI_SCNUMS_RISCV64_LINUX_H
+#define __VKI_SCNUMS_RISCV64_LINUX_H
+
+// From linux-6.0/arch/riscv/include/uapi/asm/unistd.h
+// is a #include of
+//      linux-6.0/include/uapi/asm-generic/unistd.h
+
+#define __NR_io_setup 0
+#define __NR_io_destroy 1
+#define __NR_io_submit 2
+#define __NR_io_cancel 3
+#define __NR_io_getevents 4
+#define __NR_setxattr 5
+#define __NR_lsetxattr 6
+#define __NR_fsetxattr 7
+#define __NR_getxattr 8
+#define __NR_lgetxattr 9
+#define __NR_fgetxattr 10
+#define __NR_listxattr 11
+#define __NR_llistxattr 12
+#define __NR_flistxattr 13
+#define __NR_removexattr 14
+#define __NR_lremovexattr 15
+#define __NR_fremovexattr 16
+#define __NR_getcwd 17
+#define __NR_lookup_dcookie 18
+#define __NR_eventfd2 19
+#define __NR_epoll_create1 20
+#define __NR_epoll_ctl 21
+#define __NR_epoll_pwait 22
+#define __NR_dup 23
+#define __NR_dup3 24
+#define __NR3264_fcntl 25
+#define __NR_inotify_init1 26
+#define __NR_inotify_add_watch 27
+#define __NR_inotify_rm_watch 28
+#define __NR_ioctl 29
+#define __NR_ioprio_set 30
+#define __NR_ioprio_get 31
+#define __NR_flock 32
+#define __NR_mknodat 33
+#define __NR_mkdirat 34
+#define __NR_unlinkat 35
+#define __NR_symlinkat 36
+#define __NR_linkat 37
+#define __NR_umount2 39
+#define __NR_mount 40
+#define __NR_pivot_root 41
+#define __NR3264_statfs 43
+#define __NR3264_fstatfs 44
+#define __NR3264_truncate 45
+#define __NR3264_ftruncate 46
+#define __NR_fallocate 47
+#define __NR_faccessat 48
+#define __NR_chdir 49
+#define __NR_fchdir 50
+#define __NR_chroot 51
+#define __NR_fchmod 52
+#define __NR_fchmodat 53
+#define __NR_fchownat 54
+#define __NR_fchown 55
+#define __NR_openat 56
+#define __NR_close 57
+#define __NR_vhangup 58
+#define __NR_pipe2 59
+#define __NR_quotactl 60
+#define __NR_getdents64 61
+#define __NR3264_lseek 62
+#define __NR_read 63
+#define __NR_write 64
+#define __NR_readv 65
+#define __NR_writev 66
+#define __NR_pread64 67
+#define __NR_pwrite64 68
+#define __NR_preadv 69
+#define __NR_pwritev 70
+#define __NR3264_sendfile 71
+#define __NR_pselect6 72
+#define __NR_ppoll 73
+#define __NR_signalfd4 74
+#define __NR_vmsplice 75
+#define __NR_splice 76
+#define __NR_tee 77
+#define __NR_readlinkat 78
+#define __NR3264_fstatat 79
+#define __NR3264_fstat 80
+#define __NR_sync 81
+#define __NR_fsync 82
+#define __NR_fdatasync 83
+#define __NR_sync_file_range 84
+#define __NR_timerfd_create 85
+#define __NR_timerfd_settime 86
+#define __NR_timerfd_gettime 87
+#define __NR_utimensat 88
+#define __NR_acct 89
+#define __NR_capget 90
+#define __NR_capset 91
+#define __NR_personality 92
+#define __NR_exit 93
+#define __NR_exit_group 94
+#define __NR_waitid 95
+#define __NR_set_tid_address 96
+#define __NR_unshare 97
+#define __NR_futex 98
+#define __NR_set_robust_list 99
+#define __NR_get_robust_list 100
+#define __NR_nanosleep 101
+#define __NR_getitimer 102
+#define __NR_setitimer 103
+#define __NR_kexec_load 104
+#define __NR_init_module 105
+#define __NR_delete_module 106
+#define __NR_timer_create 107
+#define __NR_timer_gettime 108
+#define __NR_timer_getoverrun 109
+#define __NR_timer_settime 110
+#define __NR_timer_delete 111
+#define __NR_clock_settime 112
+#define __NR_clock_gettime 113
+#define __NR_clock_getres 114
+#define __NR_clock_nanosleep 115
+#define __NR_syslog 116
+#define __NR_ptrace 117
+#define __NR_sched_setparam 118
+#define __NR_sched_setscheduler 119
+#define __NR_sched_getscheduler 120
+#define __NR_sched_getparam 121
+#define __NR_sched_setaffinity 122
+#define __NR_sched_getaffinity 123
+#define __NR_sched_yield 124
+#define __NR_sched_get_priority_max 125
+#define __NR_sched_get_priority_min 126
+#define __NR_sched_rr_get_interval 127
+#define __NR_kill 129
+#define __NR_tkill 130
+#define __NR_tgkill 131
+#define __NR_sigaltstack 132
+#define __NR_rt_sigsuspend 133
+#define __NR_rt_sigaction 134
+#define __NR_rt_sigprocmask 135
+#define __NR_rt_sigpending 136
+#define __NR_rt_sigtimedwait 137
+#define __NR_rt_sigqueueinfo 138
+#define __NR_rt_sigreturn 139
+#define __NR_setpriority 140
+#define __NR_getpriority 141
+#define __NR_setregid 143
+#define __NR_setgid 144
+#define __NR_setreuid 145
+#define __NR_setuid 146
+#define __NR_setresuid 147
+#define __NR_getresuid 148
+#define __NR_setresgid 149
+#define __NR_getresgid 150
+#define __NR_setfsuid 151
+#define __NR_setfsgid 152
+#define __NR_times 153
+#define __NR_setpgid 154
+#define __NR_getpgid 155
+#define __NR_getsid 156
+#define __NR_setsid 157
+#define __NR_getgroups 158
+#define __NR_setgroups 159
+#define __NR_uname 160
+#define __NR_sethostname 161
+#define __NR_getrlimit 163
+#define __NR_setrlimit 164
+#define __NR_getrusage 165
+#define __NR_umask 166
+#define __NR_prctl 167
+#define __NR_getcpu 168
+#define __NR_gettimeofday 169
+#define __NR_settimeofday 170
+#define __NR_adjtimex 171
+#define __NR_getpid 172
+#define __NR_getppid 173
+#define __NR_getuid 174
+#define __NR_geteuid 175
+#define __NR_getgid 176
+#define __NR_getegid 177
+#define __NR_gettid 178
+#define __NR_sysinfo 179
+#define __NR_mq_open 180
+#define __NR_mq_unlink 181
+#define __NR_mq_timedsend 182
+#define __NR_mq_timedreceive 183
+#define __NR_mq_notify 184
+#define __NR_mq_getsetattr 185
+#define __NR_msgget 186
+#define __NR_msgctl 187
+#define __NR_msgrcv 188
+#define __NR_msgsnd 189
+#define __NR_semget 190
+#define __NR_semctl 191
+#define __NR_semtimedop 192
+#define __NR_semop 193
+#define __NR_shmget 194
+#define __NR_shmctl 195
+#define __NR_shmat 196
+#define __NR_shmdt 197
+#define __NR_socket 198
+#define __NR_socketpair 199
+#define __NR_bind 200
+#define __NR_listen 201
+#define __NR_accept 202
+#define __NR_connect 203
+#define __NR_getsockname 204
+#define __NR_getpeername 205
+#define __NR_sendto 206
+#define __NR_recvfrom 207
+#define __NR_setsockopt 208
+#define __NR_getsockopt 209
+#define __NR_shutdown 210
+#define __NR_sendmsg 211
+#define __NR_recvmsg 212
+#define __NR_readahead 213
+#define __NR_brk 214
+#define __NR_munmap 215
+#define __NR_mremap 216
+#define __NR_add_key 217
+#define __NR_request_key 218
+#define __NR_keyctl 219
+#define __NR_clone 220
+#define __NR_execve 221
+#define __NR3264_mmap 222
+#define __NR3264_fadvise64 223
+#define __NR_mprotect 226
+#define __NR_msync 227
+#define __NR_mlock 228
+#define __NR_munlock 229
+#define __NR_mlockall 230
+#define __NR_munlockall 231
+#define __NR_mincore 232
+#define __NR_madvise 233
+#define __NR_mbind 235
+#define __NR_get_mempolicy 236
+#define __NR_set_mempolicy 237
+#define __NR_move_pages 239
+#define __NR_rt_tgsigqueueinfo 240
+#define __NR_perf_event_open 241
+#define __NR_accept4 242
+#define __NR_recvmmsg 243
+#define __NR_arch_specific_syscall 244
+#define __NR_wait4 260
+#define __NR_prlimit64 261
+#define __NR_fanotify_init 262
+#define __NR_fanotify_mark 263
+#define __NR_name_to_handle_at 264
+#define __NR_open_by_handle_at 265
+#define __NR_clock_adjtime 266
+#define __NR_syncfs 267
+#define __NR_setns 268
+#define __NR_sendmmsg 269
+#define __NR_process_vm_readv 270
+#define __NR_process_vm_writev 271
+#define __NR_kcmp 272
+#define __NR_finit_module 273
+#define __NR_sched_setattr 274
+#define __NR_sched_getattr 275
+#define __NR_renameat2 276
+#define __NR_getrandom 278
+#define __NR_memfd_create 279
+#define __NR_bpf 280
+#define __NR_execveat 281
+#define __NR_userfaultfd 282
+#define __NR_membarrier 283
+#define __NR_mlock2 284
+#define __NR_copy_file_range 285
+#define __NR_preadv2 286
+#define __NR_pwritev2 287
+#define __NR_pkey_free 290
+#define __NR_statx 291
+#define __NR_rseq 293
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_clone3 435
+#define __NR_close_range 436
+#define __NR_faccessat2 439
+#define __NR_memfd_secret 447
+
+#define __NR_fcntl __NR3264_fcntl
+#define __NR_statfs __NR3264_statfs
+#define __NR_fstatfs __NR3264_fstatfs
+#define __NR_truncate __NR3264_truncate
+#define __NR_ftruncate __NR3264_ftruncate
+#define __NR_lseek __NR3264_lseek
+#define __NR_sendfile __NR3264_sendfile
+#define __NR_newfstatat __NR3264_fstatat
+#define __NR_fstat __NR3264_fstat
+#define __NR_mmap __NR3264_mmap
+#define __NR_fadvise64 __NR3264_fadvise64
+
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+
+#endif /* __VKI_SCNUMS_RISCV64_LINUX_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                               vki-scnums-riscv64-linux.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
index 176c8e5cb..34df0011a 100644
--- a/memcheck/mc_machine.c
+++ b/memcheck/mc_machine.c
@@ -1396,6 +1396,104 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
 #  undef GOF
 #  undef SZB
 
+   /* ------------------- riscv64 ------------------- */
+
+#  elif defined(VGA_riscv64)
+
+#  define GOF(_fieldname) \
+      (offsetof(VexGuestRISCV64State,guest_##_fieldname))
+#  define SZB(_fieldname) \
+      (sizeof(((VexGuestRISCV64State*)0)->guest_##_fieldname))
+
+   Int  o    = offset;
+   Int  sz   = szB;
+   Bool is48 = sz == 8 || sz == 4;
+
+   tl_assert(sz > 0);
+   tl_assert(host_is_little_endian());
+
+   if (o == GOF(x0)  && is48) return -1;
+   if (o == GOF(x1)  && is48) return o;
+   if (o == GOF(x2)  && is48) return o;
+   if (o == GOF(x3)  && is48) return o;
+   if (o == GOF(x4)  && is48) return o;
+   if (o == GOF(x5)  && is48) return o;
+   if (o == GOF(x6)  && is48) return o;
+   if (o == GOF(x7)  && is48) return o;
+   if (o == GOF(x8)  && is48) return o;
+   if (o == GOF(x9)  && is48) return o;
+   if (o == GOF(x10) && is48) return o;
+   if (o == GOF(x11) && is48) return o;
+   if (o == GOF(x12) && is48) return o;
+   if (o == GOF(x13) && is48) return o;
+   if (o == GOF(x14) && is48) return o;
+   if (o == GOF(x15) && is48) return o;
+   if (o == GOF(x16) && is48) return o;
+   if (o == GOF(x17) && is48) return o;
+   if (o == GOF(x18) && is48) return o;
+   if (o == GOF(x19) && is48) return o;
+   if (o == GOF(x20) && is48) return o;
+   if (o == GOF(x21) && is48) return o;
+   if (o == GOF(x22) && is48) return o;
+   if (o == GOF(x23) && is48) return o;
+   if (o == GOF(x24) && is48) return o;
+   if (o == GOF(x25) && is48) return o;
+   if (o == GOF(x26) && is48) return o;
+   if (o == GOF(x27) && is48) return o;
+   if (o == GOF(x28) && is48) return o;
+   if (o == GOF(x29) && is48) return o;
+   if (o == GOF(x30) && is48) return o;
+   if (o == GOF(x31) && is48) return o;
+   if (o == GOF(pc)  && sz == 8) return -1;
+
+   if (o >= GOF(f0)   && o+sz <= GOF(f0) +SZB(f0))  return GOF(f0);
+   if (o >= GOF(f1)   && o+sz <= GOF(f1) +SZB(f1))  return GOF(f1);
+   if (o >= GOF(f2)   && o+sz <= GOF(f2) +SZB(f2))  return GOF(f2);
+   if (o >= GOF(f3)   && o+sz <= GOF(f3) +SZB(f3))  return GOF(f3);
+   if (o >= GOF(f4)   && o+sz <= GOF(f4) +SZB(f4))  return GOF(f4);
+   if (o >= GOF(f5)   && o+sz <= GOF(f5) +SZB(f5))  return GOF(f5);
+   if (o >= GOF(f6)   && o+sz <= GOF(f6) +SZB(f6))  return GOF(f6);
+   if (o >= GOF(f7)   && o+sz <= GOF(f7) +SZB(f7))  return GOF(f7);
+   if (o >= GOF(f8)   && o+sz <= GOF(f8) +SZB(f8))  return GOF(f8);
+   if (o >= GOF(f9)   && o+sz <= GOF(f9) +SZB(f9))  return GOF(f9);
+   if (o >= GOF(f10)  && o+sz <= GOF(f10)+SZB(f10)) return GOF(f10);
+   if (o >= GOF(f11)  && o+sz <= GOF(f11)+SZB(f11)) return GOF(f11);
+   if (o >= GOF(f12)  && o+sz <= GOF(f12)+SZB(f12)) return GOF(f12);
+   if (o >= GOF(f13)  && o+sz <= GOF(f13)+SZB(f13)) return GOF(f13);
+   if (o >= GOF(f14)  && o+sz <= GOF(f14)+SZB(f14)) return GOF(f14);
+   if (o >= GOF(f15)  && o+sz <= GOF(f15)+SZB(f15)) return GOF(f15);
+   if (o >= GOF(f16)  && o+sz <= GOF(f16)+SZB(f16)) return GOF(f16);
+   if (o >= GOF(f17)  && o+sz <= GOF(f17)+SZB(f17)) return GOF(f17);
+   if (o >= GOF(f18)  && o+sz <= GOF(f18)+SZB(f18)) return GOF(f18);
+   if (o >= GOF(f19)  && o+sz <= GOF(f19)+SZB(f19)) return GOF(f19);
+   if (o >= GOF(f20)  && o+sz <= GOF(f20)+SZB(f20)) return GOF(f20);
+   if (o >= GOF(f21)  && o+sz <= GOF(f21)+SZB(f21)) return GOF(f21);
+   if (o >= GOF(f22)  && o+sz <= GOF(f22)+SZB(f22)) return GOF(f22);
+   if (o >= GOF(f23)  && o+sz <= GOF(f23)+SZB(f23)) return GOF(f23);
+   if (o >= GOF(f24)  && o+sz <= GOF(f24)+SZB(f24)) return GOF(f24);
+   if (o >= GOF(f25)  && o+sz <= GOF(f25)+SZB(f25)) return GOF(f25);
+   if (o >= GOF(f26)  && o+sz <= GOF(f26)+SZB(f26)) return GOF(f26);
+   if (o >= GOF(f27)  && o+sz <= GOF(f27)+SZB(f27)) return GOF(f27);
+   if (o >= GOF(f28)  && o+sz <= GOF(f28)+SZB(f28)) return GOF(f28);
+   if (o >= GOF(f29)  && o+sz <= GOF(f29)+SZB(f29)) return GOF(f29);
+   if (o >= GOF(f30)  && o+sz <= GOF(f30)+SZB(f30)) return GOF(f30);
+   if (o >= GOF(f31)  && o+sz <= GOF(f31)+SZB(f31)) return GOF(f31);
+   if (o == GOF(fcsr) && sz == 4) return o;
+
+   if (o == GOF(EMNOTE)  && sz == 4) return -1;
+   if (o == GOF(CMSTART) && sz == 8) return -1;
+   if (o == GOF(CMLEN)   && sz == 8) return -1;
+   if (o == GOF(NRADDR)  && sz == 4) return -1;
+
+   if (o == GOF(LLSC_SIZE) && sz == 8) return -1;
+   if (o == GOF(LLSC_ADDR) && sz == 8) return o;
+   if (o == GOF(LLSC_DATA) && sz == 8) return o;
+
+   VG_(printf)("MC_(get_otrack_shadow_offset)(riscv64)(off=%d,sz=%d)\n",
+               offset,szB);
+   tl_assert(0);
+#  undef GOF
+
 #  else
 #    error "FIXME: not implemented for this architecture"
 #  endif
@@ -1517,6 +1615,13 @@ IRType MC_(get_otrack_reg_array_equiv_int_type) ( IRRegArray* arr )
    VG_(printf)("\n");
    tl_assert(0);
 
+   /* ------------------- riscv64 ------------------- */
+#  elif defined(VGA_riscv64)
+   VG_(printf)("get_reg_array_equiv_int_type(riscv64): unhandled: ");
+   ppIRRegArray(arr);
+   VG_(printf)("\n");
+   tl_assert(0);
+
 #  else
 #    error "FIXME: not implemented for this architecture"
 #  endif
diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
index 0c3bb6055..ad4eba0fc 100644
--- a/memcheck/tests/Makefile.am
+++ b/memcheck/tests/Makefile.am
@@ -53,6 +53,9 @@ endif
 if VGCONF_PLATFORMS_INCLUDE_ARM64_LINUX
 SUBDIRS += arm64-linux
 endif
+if VGCONF_PLATFORMS_INCLUDE_RISCV64_LINUX
+SUBDIRS += riscv64-linux
+endif
 if VGCONF_PLATFORMS_INCLUDE_X86_SOLARIS
 SUBDIRS += x86-solaris
 endif
@@ -67,7 +70,7 @@ SUBDIRS += amd64-freebsd
 endif
 
 DIST_SUBDIRS = x86 amd64 arm64 ppc32 ppc64 s390x linux \
-		darwin solaris x86-linux amd64-linux arm64-linux \
+		darwin solaris x86-linux amd64-linux arm64-linux riscv64-linux \
 		x86-solaris amd64-solaris mips32 mips64 \
 		freebsd amd64-freebsd x86-freebsd \
 		common .
diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c
index 1c738c530..89b6e1f75 100644
--- a/memcheck/tests/atomic_incs.c
+++ b/memcheck/tests/atomic_incs.c
@@ -245,6 +245,26 @@ __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
       );
    } while (block[2] != 1);
 #endif
+#elif defined(VGA_riscv64)
+   unsigned long long int block[3]
+      = { (unsigned long long int)p, (unsigned long long int)n,
+          0xFFFFFFFFFFFFFFFFULL};
+   do {
+      __asm__ __volatile__(
+         "mv     t0, %0"         "\n\t"
+         "ld     t1, (t0)"       "\n\t" // p
+         "ld     t2, 8(t0)"      "\n\t" // n
+         "lr.w   t3, (t1)"       "\n\t"
+         "slli   t3, t3, 56"     "\n\t" // sign-extend
+         "srai   t3, t3, 56"     "\n\t"
+         "add    t3, t3, t2"     "\n\t"
+         "sc.w   t4, t3, (t1)"   "\n\t"
+         "sd     t4, 16(t0)"     "\n\t"
+         : /*out*/
+         : /*in*/ "r"(&block[0])
+         : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
+      );
+   } while (block[2] != 0);
 #else
 # error "Unsupported arch"
 #endif
@@ -461,6 +481,26 @@ __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
       );
    } while (block[2] != 1);
 #endif
+#elif defined(VGA_riscv64)
+   unsigned long long int block[3]
+   = { (unsigned long long int)p, (unsigned long long int)n,
+       0xFFFFFFFFFFFFFFFFULL};
+   do {
+      __asm__ __volatile__(
+         "mv     t0, %0"         "\n\t"
+         "ld     t1, (t0)"       "\n\t" // p
+         "ld     t2, 8(t0)"      "\n\t" // n
+         "lr.w   t3, (t1)"       "\n\t"
+         "slli   t3, t3, 48"     "\n\t" // sign-extend
+         "srai   t3, t3, 48"     "\n\t"
+         "add    t3, t3, t2"     "\n\t"
+         "sc.w   t4, t3, (t1)"   "\n\t"
+         "sd     t4, 16(t0)"     "\n\t"
+         : /*out*/
+         : /*in*/ "r"(&block[0])
+         : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
+      );
+   } while (block[2] != 0);
 #else
 # error "Unsupported arch"
 #endif
@@ -616,6 +656,24 @@ __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
       );
    } while (block[2] != 1);
+#elif defined(VGA_riscv64)
+   unsigned long long int block[3]
+   = { (unsigned long long int)p, (unsigned long long int)n,
+       0xFFFFFFFFFFFFFFFFULL};
+   do {
+      __asm__ __volatile__(
+         "mv     t0, %0"         "\n\t"
+         "ld     t1, (t0)"       "\n\t" // p
+         "ld     t2, 8(t0)"      "\n\t" // n
+         "lr.w   t3, (t1)"       "\n\t"
+         "add    t3, t3, t2"     "\n\t"
+         "sc.w   t4, t3, (t1)"   "\n\t"
+         "sd     t4, 16(t0)"     "\n\t"
+         : /*out*/
+         : /*in*/ "r"(&block[0])
+         : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
+      );
+   } while (block[2] != 0);
 #else
 # error "Unsupported arch"
 #endif
@@ -718,6 +776,24 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
       );
    } while (block[2] != 1);
+#elif defined(VGA_riscv64)
+   unsigned long long int block[3]
+   = { (unsigned long long int)p, (unsigned long long int)n,
+       0xFFFFFFFFFFFFFFFFULL};
+   do {
+      __asm__ __volatile__(
+         "mv     t0, %0"         "\n\t"
+         "ld     t1, (t0)"       "\n\t" // p
+         "ld     t2, 8(t0)"      "\n\t" // n
+         "lr.d   t3, (t1)"       "\n\t"
+         "add    t3, t3, t2"     "\n\t"
+         "sc.d   t4, t3, (t1)"   "\n\t"
+         "sd     t4, 16(t0)"     "\n\t"
+         : /*out*/
+         : /*in*/ "r"(&block[0])
+         : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
+      );
+   } while (block[2] != 0);
 #else
 # error "Unsupported arch"
 #endif
@@ -731,7 +807,7 @@ __attribute__((noinline)) void atomic_add_128bit ( MyU128* p,
     || defined(VGA_amd64) \
     || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
     || defined(VGA_arm) \
-    || defined(VGA_s390x)
+    || defined(VGA_s390x) || defined(VGA_riscv64)
    /* do nothing; is not supported */
 #elif defined(VGA_arm64)
    unsigned long long int block[3]
diff --git a/memcheck/tests/leak-segv-jmp.c b/memcheck/tests/leak-segv-jmp.c
index 30fe2a1a9..15cc9f8fb 100644
--- a/memcheck/tests/leak-segv-jmp.c
+++ b/memcheck/tests/leak-segv-jmp.c
@@ -183,6 +183,23 @@ extern UWord do_syscall_WRK (
    return out;
 }
 
+#elif defined(VGP_riscv64_linux)
+extern UWord do_syscall_WRK (
+          UWord a1, UWord a2, UWord a3,
+          UWord a4, UWord a5, UWord a6,
+          UWord syscall_no
+       );
+asm(
+".text\n"
+".globl do_syscall_WRK\n"
+"do_syscall_WRK:\n"
+"        mv a7, a6\n"
+"        li a6, 0\n"
+"        ecall\n"
+"        ret\n"
+".previous\n"
+);
+
 #elif defined(VGP_x86_solaris)
 extern ULong
 do_syscall_WRK(UWord a1, UWord a2, UWord a3,
@@ -369,7 +386,7 @@ static void non_simd_mprotect (long tid, void* addr, long len)
                                     &err);
    if (err)
       mprotect_result = -1;
-#elif defined(VGP_arm64_linux)
+#elif defined(VGP_arm64_linux) || defined(VGP_riscv64_linux)
    mprotect_result = do_syscall_WRK((UWord) addr, len, PROT_NONE,
                                     0, 0, 0,
                                     __NR_mprotect);
diff --git a/memcheck/tests/leak-segv-jmp.stderr.exp b/memcheck/tests/leak-segv-jmp.stderr.exp
index 147bdf8cd..e18418d44 100644
--- a/memcheck/tests/leak-segv-jmp.stderr.exp
+++ b/memcheck/tests/leak-segv-jmp.stderr.exp
@@ -14,8 +14,8 @@ To see them, rerun with: --leak-check=full --show-leak-kinds=all
 expecting a leak
 1,000 bytes in 1 blocks are definitely lost in loss record ... of ...
    at 0x........: malloc (vg_replace_malloc.c:...)
-   by 0x........: f (leak-segv-jmp.c:420)
-   by 0x........: main (leak-segv-jmp.c:495)
+   by 0x........: f (leak-segv-jmp.c:437)
+   by 0x........: main (leak-segv-jmp.c:512)
 
 LEAK SUMMARY:
    definitely lost: 1,000 bytes in 1 blocks
@@ -30,8 +30,8 @@ mprotect result 0
 expecting a leak again
 1,000 bytes in 1 blocks are definitely lost in loss record ... of ...
    at 0x........: malloc (vg_replace_malloc.c:...)
-   by 0x........: f (leak-segv-jmp.c:420)
-   by 0x........: main (leak-segv-jmp.c:495)
+   by 0x........: f (leak-segv-jmp.c:437)
+   by 0x........: main (leak-segv-jmp.c:512)
 
 LEAK SUMMARY:
    definitely lost: 1,000 bytes in 1 blocks
@@ -46,8 +46,8 @@ full mprotect result 0
 expecting a leak again after full mprotect
 1,000 bytes in 1 blocks are definitely lost in loss record ... of ...
    at 0x........: malloc (vg_replace_malloc.c:...)
-   by 0x........: f (leak-segv-jmp.c:420)
-   by 0x........: main (leak-segv-jmp.c:495)
+   by 0x........: f (leak-segv-jmp.c:437)
+   by 0x........: main (leak-segv-jmp.c:512)
 
 LEAK SUMMARY:
    definitely lost: 1,000 bytes in 1 blocks
@@ -62,13 +62,13 @@ mprotect result 0
 expecting heuristic not to crash after full mprotect
 1,000 bytes in 1 blocks are definitely lost in loss record ... of ...
    at 0x........: malloc (vg_replace_malloc.c:...)
-   by 0x........: f (leak-segv-jmp.c:420)
-   by 0x........: main (leak-segv-jmp.c:495)
+   by 0x........: f (leak-segv-jmp.c:437)
+   by 0x........: main (leak-segv-jmp.c:512)
 
 200,000 bytes in 1 blocks are possibly lost in loss record ... of ...
    at 0x........: calloc (vg_replace_malloc.c:...)
-   by 0x........: f (leak-segv-jmp.c:467)
-   by 0x........: main (leak-segv-jmp.c:495)
+   by 0x........: f (leak-segv-jmp.c:484)
+   by 0x........: main (leak-segv-jmp.c:512)
 
 LEAK SUMMARY:
    definitely lost: 1,000 bytes in 1 blocks
diff --git a/memcheck/tests/leak.h b/memcheck/tests/leak.h
index bf78d5866..f9a2db290 100644
--- a/memcheck/tests/leak.h
+++ b/memcheck/tests/leak.h
@@ -181,6 +181,11 @@
       __asm__ __volatile__ ("mov x17, 0\n\t");                              \
       __asm__ __volatile__ ("mov x18, 0\n\t");                              \
    } while (0)
+#elif defined(__riscv)
+#define CLEAR_CALLER_SAVED_REGS \
+  do { \
+    __asm__ __volatile__( "li a0, 0" : : :/*trash*/"a0" ); \
+  } while (0)
 #else
 #define CLEAR_CALLER_SAVED_REGS  /*nothing*/
 #endif
diff --git a/memcheck/tests/riscv64-linux/Makefile.am b/memcheck/tests/riscv64-linux/Makefile.am
new file mode 100644
index 000000000..b2757b7ca
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/Makefile.am
@@ -0,0 +1,18 @@
+
+include $(top_srcdir)/Makefile.tool-tests.am
+
+dist_noinst_SCRIPTS = filter_stderr
+
+EXTRA_DIST = \
+	context_float.stdout.exp context_float.stderr.exp context_float.vgtest \
+	context_integer.stdout.exp context_integer.stderr.exp context_integer.vgtest \
+	scalar.stderr.exp scalar.vgtest
+
+check_PROGRAMS = \
+	context_float \
+	context_integer \
+	scalar
+
+AM_CFLAGS    += @FLAG_M64@
+AM_CXXFLAGS  += @FLAG_M64@
+AM_CCASFLAGS += @FLAG_M64@
diff --git a/memcheck/tests/riscv64-linux/context_float.c b/memcheck/tests/riscv64-linux/context_float.c
new file mode 100644
index 000000000..462385a5c
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/context_float.c
@@ -0,0 +1,548 @@
+/* Test if values in floating-point registers are correctly propagated into and
+   out of a signal handler and also check that the same applies for
+   uninitialised values and their origins.
+
+   Register usage in the test:
+              before signal -> in signal handler    -> after return
+   f0      -- 0,def         -> unchanged            -> 0,def
+   f1      -- 0,undef       -> unchanged            -> 0,undef
+   f2      -- 0,def         -> set to 0,undef       -> 0,undef
+   f3      -- 0,undef       -> set to 0,def         -> 0,def
+   f4      -- 1,def         -> increment by 1,def   -> 2,def
+   f5      -- 1,undef       -> increment by 1,def   -> 2,undef
+   f6      -- 1,def         -> increment by 1,undef -> 2,undef
+   f7      -- 1,undef       -> increment by 1,undef -> 2,undef
+   f8      -- DBL_MAX,def   -> unchanged            -> DBL_MAX,def
+   f9      -- DBL_MAX,undef -> unchanged            -> DBL_MAX,undef
+   f10     -- DBL_MAX,def   -> set to 0,undef       -> 0,undef
+   f11     -- DBL_MAX,undef -> set to 0,def         -> 0,def
+   f12     -- 0,def         -> set to DBL_MAX,def   -> DBL_MAX,def
+   f13     -- 0,undef       -> set to DBL_MAX,undef -> DBL_MAX,undef
+   f14     -- 0,def         -> decrement by 0,def   -> 0,def
+   f15     -- 0,undef       -> decrement by 0,def   -> 0,undef
+   f16     -- 0,def         -> decrement by 0,undef -> 0,undef
+   f17     -- 0,undef       -> decrement by 0,undef -> 0,undef
+   f18     -- 0,def         -> decrement by 1,def   -> -1,def
+   f19     -- 0,undef       -> decrement by 1,def   -> -1,undef
+   f20     -- 0,def         -> decrement by 1,undef -> -1,undef
+   f21     -- 0,undef       -> decrement by 1,undef -> -1,undef
+   f22-f30 -- 0,def         -> set to 1,undef       -> 1,undef
+   f31     -- 1,undef       -> set 0,def            -> 0,def
+   fcsr:
+    fflags -- 0b10101,def   -> set to 0b01010,undef -> 0b01010,undef
+    frm    -- 0b001,undef   -> set to 0b100,def     -> 0b100,def
+ */
+
+#include <assert.h>
+#include <float.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <valgrind.h>
+
+typedef union {
+   unsigned long u64;
+   double        f64;
+} uf64;
+
+static ucontext_t    uc;
+static unsigned long x0, x1;
+static unsigned long dbl_max, dbl_p1, dbl_m1, dbl_p2;
+
+static void sighandler(int sig, siginfo_t* sip, void* arg)
+{
+   ucontext_t* ucp = (ucontext_t*)arg;
+
+   uc = *ucp;
+
+   /* Reset fcsr so its undefinedness doesn't affect the following calculations.
+    */
+   __asm__ __volatile__("fscsr zero");
+
+#define FPREG_MOD(fpreg, op, mod)                                              \
+   do {                                                                        \
+      uf64 t1, t2;                                                             \
+      t1.u64 = fpreg;                                                          \
+      t2.u64 = mod;                                                            \
+      t1.f64 op t2.f64;                                                        \
+      fpreg = t1.u64;                                                          \
+   } while (0)
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[2], =, x0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[3], =, 0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[4], +=, dbl_p1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[5], +=, dbl_p1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[6], +=, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[7], +=, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[10], =, x0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[11], =, 0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[12], =, dbl_max);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[13], =, dbl_max + x0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[14], -=, 0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[15], -=, 0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[16], -=, x0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[17], -=, x0);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[18], -=, dbl_p1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[19], -=, dbl_p1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[20], -=, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[21], -=, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[22], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[23], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[24], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[25], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[26], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[27], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[28], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[29], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[30], =, dbl_p1 + x1);
+   FPREG_MOD(ucp->uc_mcontext.__fpregs.__d.__f[31], =, 0);
+#undef FPREG_MOD
+
+   ucp->uc_mcontext.__fpregs.__d.__fcsr =
+      0b100 << 5 | ((0b01010 | x0) & 0b11111);
+}
+
+int main(void)
+{
+   /* Uninitialised, but we know px0[0] is 0x0. */
+   unsigned long* px0 = malloc(sizeof(*px0));
+   x0                 = px0[0];
+
+   /* Uninitialised, but we know px1[0] is 0x0. */
+   unsigned long* px1 = malloc(sizeof(*px1));
+   x1                 = px1[0];
+
+   uf64 tmp;
+   tmp.f64 = DBL_MAX;
+   dbl_max = tmp.u64;
+   tmp.f64 = 1.0;
+   dbl_p1  = tmp.u64;
+   tmp.f64 = -1.0;
+   dbl_m1  = tmp.u64;
+   tmp.f64 = 2.0;
+   dbl_p2  = tmp.u64;
+
+   struct sigaction sa = {};
+   sa.sa_sigaction     = sighandler;
+   if (sigaction(SIGUSR1, &sa, NULL)) {
+      perror("sigaction");
+      return 1;
+   }
+
+   unsigned long regs_in[33] = {
+      0,
+      x0,
+      0,
+      x0,
+      dbl_p1,
+      dbl_p1 + x1,
+      dbl_p1,
+      dbl_p1 + x1,
+      dbl_max,
+      dbl_max + x0,
+      dbl_max,
+      dbl_max + x0,
+      0,
+      x0,
+      0,
+      x0,
+      0,
+      x0,
+      0,
+      x0,
+      0,
+      x0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      0,
+      dbl_p1 + x1,
+      ((0b001 | x0) & 0b111) << 5 | 0b10101,
+   };
+   unsigned long regs_out[33] = {};
+
+   pid_t                   pid          = getpid();
+   register unsigned long* t0 asm("t0") = regs_in;
+   register unsigned long* t1 asm("t1") = regs_out;
+   register unsigned long  a7 asm("a7") = SYS_kill;
+   register unsigned long  a0 asm("a0") = pid;
+   register unsigned long  a1 asm("a1") = SIGUSR1;
+   __asm__ __volatile__(
+      /* Spill all test registers, keep the 16-byte sp alignment. */
+      "add sp, sp, -272\n\t"
+      "fsd f0, 0(sp)\n\t"
+      "fsd f1, 8(sp)\n\t"
+      "fsd f2, 16(sp)\n\t"
+      "fsd f3, 24(sp)\n\t"
+      "fsd f4, 32(sp)\n\t"
+      "fsd f5, 40(sp)\n\t"
+      "fsd f6, 48(sp)\n\t"
+      "fsd f7, 56(sp)\n\t"
+      "fsd f8, 64(sp)\n\t"
+      "fsd f9, 72(sp)\n\t"
+      "fsd f10, 80(sp)\n\t"
+      "fsd f11, 88(sp)\n\t"
+      "fsd f12, 96(sp)\n\t"
+      "fsd f13, 104(sp)\n\t"
+      "fsd f14, 112(sp)\n\t"
+      "fsd f15, 120(sp)\n\t"
+      "fsd f16, 128(sp)\n\t"
+      "fsd f17, 136(sp)\n\t"
+      "fsd f18, 144(sp)\n\t"
+      "fsd f19, 152(sp)\n\t"
+      "fsd f20, 160(sp)\n\t"
+      "fsd f21, 168(sp)\n\t"
+      "fsd f22, 176(sp)\n\t"
+      "fsd f23, 184(sp)\n\t"
+      "fsd f24, 192(sp)\n\t"
+      "fsd f25, 200(sp)\n\t"
+      "fsd f26, 208(sp)\n\t"
+      "fsd f27, 216(sp)\n\t"
+      "fsd f28, 224(sp)\n\t"
+      "fsd f29, 232(sp)\n\t"
+      "fsd f30, 240(sp)\n\t"
+      "fsd f31, 248(sp)\n\t"
+      "frcsr t2\n\t"
+      "sd t2, 256(sp)\n\t"
+
+      /* Set values in the test registers. */
+      "fld f0, 0(%[in])\n\t"
+      "fld f1, 8(%[in])\n\t"
+      "fld f2, 16(%[in])\n\t"
+      "fld f3, 24(%[in])\n\t"
+      "fld f4, 32(%[in])\n\t"
+      "fld f5, 40(%[in])\n\t"
+      "fld f6, 48(%[in])\n\t"
+      "fld f7, 56(%[in])\n\t"
+      "fld f8, 64(%[in])\n\t"
+      "fld f9, 72(%[in])\n\t"
+      "fld f10, 80(%[in])\n\t"
+      "fld f11, 88(%[in])\n\t"
+      "fld f12, 96(%[in])\n\t"
+      "fld f13, 104(%[in])\n\t"
+      "fld f14, 112(%[in])\n\t"
+      "fld f15, 120(%[in])\n\t"
+      "fld f16, 128(%[in])\n\t"
+      "fld f17, 136(%[in])\n\t"
+      "fld f18, 144(%[in])\n\t"
+      "fld f19, 152(%[in])\n\t"
+      "fld f20, 160(%[in])\n\t"
+      "fld f21, 168(%[in])\n\t"
+      "fld f22, 176(%[in])\n\t"
+      "fld f23, 184(%[in])\n\t"
+      "fld f24, 192(%[in])\n\t"
+      "fld f25, 200(%[in])\n\t"
+      "fld f26, 208(%[in])\n\t"
+      "fld f27, 216(%[in])\n\t"
+      "fld f28, 224(%[in])\n\t"
+      "fld f29, 232(%[in])\n\t"
+      "fld f30, 240(%[in])\n\t"
+      "fld f31, 248(%[in])\n\t"
+      "ld t2, 256(%[in])\n\t"
+      "fscsr t2\n\t"
+
+      /* Trigger the signal handler. */
+      "ecall\n\t"
+
+      /* Store updated values in the test registers. */
+      "fsd f0, 0(%[out])\n\t"
+      "fsd f1, 8(%[out])\n\t"
+      "fsd f2, 16(%[out])\n\t"
+      "fsd f3, 24(%[out])\n\t"
+      "fsd f4, 32(%[out])\n\t"
+      "fsd f5, 40(%[out])\n\t"
+      "fsd f6, 48(%[out])\n\t"
+      "fsd f7, 56(%[out])\n\t"
+      "fsd f8, 64(%[out])\n\t"
+      "fsd f9, 72(%[out])\n\t"
+      "fsd f10, 80(%[out])\n\t"
+      "fsd f11, 88(%[out])\n\t"
+      "fsd f12, 96(%[out])\n\t"
+      "fsd f13, 104(%[out])\n\t"
+      "fsd f14, 112(%[out])\n\t"
+      "fsd f15, 120(%[out])\n\t"
+      "fsd f16, 128(%[out])\n\t"
+      "fsd f17, 136(%[out])\n\t"
+      "fsd f18, 144(%[out])\n\t"
+      "fsd f19, 152(%[out])\n\t"
+      "fsd f20, 160(%[out])\n\t"
+      "fsd f21, 168(%[out])\n\t"
+      "fsd f22, 176(%[out])\n\t"
+      "fsd f23, 184(%[out])\n\t"
+      "fsd f24, 192(%[out])\n\t"
+      "fsd f25, 200(%[out])\n\t"
+      "fsd f26, 208(%[out])\n\t"
+      "fsd f27, 216(%[out])\n\t"
+      "fsd f28, 224(%[out])\n\t"
+      "fsd f29, 232(%[out])\n\t"
+      "fsd f30, 240(%[out])\n\t"
+      "fsd f31, 248(%[out])\n\t"
+      "frcsr t2\n\t"
+      "sd t2, 256(%[out])\n\t"
+
+      /* Restore their original values. */
+      "fld f0, 0(sp)\n\t"
+      "fld f1, 8(sp)\n\t"
+      "fld f2, 16(sp)\n\t"
+      "fld f3, 24(sp)\n\t"
+      "fld f4, 32(sp)\n\t"
+      "fld f5, 40(sp)\n\t"
+      "fld f6, 48(sp)\n\t"
+      "fld f7, 56(sp)\n\t"
+      "fld f8, 64(sp)\n\t"
+      "fld f9, 72(sp)\n\t"
+      "fld f10, 80(sp)\n\t"
+      "fld f11, 88(sp)\n\t"
+      "fld f12, 96(sp)\n\t"
+      "fld f13, 104(sp)\n\t"
+      "fld f14, 112(sp)\n\t"
+      "fld f15, 120(sp)\n\t"
+      "fld f16, 128(sp)\n\t"
+      "fld f17, 136(sp)\n\t"
+      "fld f18, 144(sp)\n\t"
+      "fld f19, 152(sp)\n\t"
+      "fld f20, 160(sp)\n\t"
+      "fld f21, 168(sp)\n\t"
+      "fld f22, 176(sp)\n\t"
+      "fld f23, 184(sp)\n\t"
+      "fld f24, 192(sp)\n\t"
+      "fld f25, 200(sp)\n\t"
+      "fld f26, 208(sp)\n\t"
+      "fld f27, 216(sp)\n\t"
+      "fld f28, 224(sp)\n\t"
+      "fld f29, 232(sp)\n\t"
+      "fld f30, 240(sp)\n\t"
+      "fld f31, 248(sp)\n\t"
+      "ld t2, 256(sp)\n\t"
+      "fscsr t2\n\t"
+      "add sp, sp, 272\n\t"
+      :
+      : [in] "r"(t0), [out] "r"(t1), "r"(a7), "r"(a0), "r"(a1)
+      : "t2", "memory");
+
+   printf("Values before the signal:\n");
+   VALGRIND_DISABLE_ERROR_REPORTING;
+   printf("   f0=%#lx\n", regs_in[0]);
+   printf("   f1=%#lx\n", regs_in[1]);
+   printf("   f2=%#lx\n", regs_in[2]);
+   printf("   f3=%#lx\n", regs_in[3]);
+   printf("   f4=%#lx\n", regs_in[4]);
+   printf("   f5=%#lx\n", regs_in[5]);
+   printf("   f6=%#lx\n", regs_in[6]);
+   printf("   f7=%#lx\n", regs_in[7]);
+   printf("   f8=%#lx\n", regs_in[8]);
+   printf("   f9=%#lx\n", regs_in[9]);
+   printf("  f10=%#lx\n", regs_in[10]);
+   printf("  f11=%#lx\n", regs_in[11]);
+   printf("  f12=%#lx\n", regs_in[12]);
+   printf("  f13=%#lx\n", regs_in[13]);
+   printf("  f14=%#lx\n", regs_in[14]);
+   printf("  f15=%#lx\n", regs_in[15]);
+   printf("  f16=%#lx\n", regs_in[16]);
+   printf("  f17=%#lx\n", regs_in[17]);
+   printf("  f18=%#lx\n", regs_in[18]);
+   printf("  f19=%#lx\n", regs_in[19]);
+   printf("  f20=%#lx\n", regs_in[20]);
+   printf("  f21=%#lx\n", regs_in[21]);
+   printf("  f22=%#lx\n", regs_in[22]);
+   printf("  f23=%#lx\n", regs_in[23]);
+   printf("  f24=%#lx\n", regs_in[24]);
+   printf("  f25=%#lx\n", regs_in[25]);
+   printf("  f26=%#lx\n", regs_in[26]);
+   printf("  f27=%#lx\n", regs_in[27]);
+   printf("  f28=%#lx\n", regs_in[28]);
+   printf("  f29=%#lx\n", regs_in[29]);
+   printf("  f30=%#lx\n", regs_in[30]);
+   printf("  f31=%#lx\n", regs_in[31]);
+   printf(" fcsr=%#lx\n", regs_in[32]);
+   VALGRIND_ENABLE_ERROR_REPORTING;
+   /* Check which registers contain uninitialized values. */
+   assert(regs_in[0] == 0);
+   assert(regs_in[1] == 0);
+   assert(regs_in[2] == 0);
+   assert(regs_in[3] == 0);
+   assert(regs_in[4] == dbl_p1);
+   assert(regs_in[5] == dbl_p1);
+   assert(regs_in[6] == dbl_p1);
+   assert(regs_in[7] == dbl_p1);
+   assert(regs_in[8] == dbl_max);
+   assert(regs_in[9] == dbl_max);
+   assert(regs_in[10] == dbl_max);
+   assert(regs_in[11] == dbl_max);
+   assert(regs_in[12] == 0);
+   assert(regs_in[13] == 0);
+   assert(regs_in[14] == 0);
+   assert(regs_in[15] == 0);
+   assert(regs_in[16] == 0);
+   assert(regs_in[17] == 0);
+   assert(regs_in[18] == 0);
+   assert(regs_in[19] == 0);
+   assert(regs_in[20] == 0);
+   assert(regs_in[21] == 0);
+   assert(regs_in[22] == 0);
+   assert(regs_in[23] == 0);
+   assert(regs_in[24] == 0);
+   assert(regs_in[25] == 0);
+   assert(regs_in[26] == 0);
+   assert(regs_in[27] == 0);
+   assert(regs_in[28] == 0);
+   assert(regs_in[29] == 0);
+   assert(regs_in[30] == 0);
+   assert(regs_in[31] == dbl_p1);
+   assert(((regs_in[32] >> 0) & 0b11111) == 0b10101);
+   assert(((regs_in[32] >> 5) & 0b111) == 0b001);
+
+   printf("Values in the signal handler:\n");
+   VALGRIND_DISABLE_ERROR_REPORTING;
+   printf("   f0=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[0]);
+   printf("   f1=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[1]);
+   printf("   f2=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[2]);
+   printf("   f3=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[3]);
+   printf("   f4=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[4]);
+   printf("   f5=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[5]);
+   printf("   f6=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[6]);
+   printf("   f7=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[7]);
+   printf("   f8=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[8]);
+   printf("   f9=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[9]);
+   printf("  f10=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[10]);
+   printf("  f11=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[11]);
+   printf("  f12=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[12]);
+   printf("  f13=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[13]);
+   printf("  f14=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[14]);
+   printf("  f15=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[15]);
+   printf("  f16=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[16]);
+   printf("  f17=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[17]);
+   printf("  f18=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[18]);
+   printf("  f19=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[19]);
+   printf("  f20=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[20]);
+   printf("  f21=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[21]);
+   printf("  f22=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[22]);
+   printf("  f23=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[23]);
+   printf("  f24=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[24]);
+   printf("  f25=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[25]);
+   printf("  f26=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[26]);
+   printf("  f27=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[27]);
+   printf("  f28=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[28]);
+   printf("  f29=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[29]);
+   printf("  f30=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[30]);
+   printf("  f31=%#llx\n", uc.uc_mcontext.__fpregs.__d.__f[31]);
+   printf(" fcsr=%#x\n", uc.uc_mcontext.__fpregs.__d.__fcsr);
+   VALGRIND_ENABLE_ERROR_REPORTING;
+   assert(uc.uc_mcontext.__fpregs.__d.__f[0] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[1] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[2] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[3] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[4] == dbl_p1);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[5] == dbl_p1);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[6] == dbl_p1);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[7] == dbl_p1);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[8] == dbl_max);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[9] == dbl_max);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[10] == dbl_max);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[11] == dbl_max);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[12] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[13] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[14] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[15] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[16] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[17] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[18] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[19] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[20] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[21] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[22] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[23] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[24] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[25] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[26] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[27] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[28] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[29] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[30] == 0);
+   assert(uc.uc_mcontext.__fpregs.__d.__f[31] == dbl_p1);
+   assert(((uc.uc_mcontext.__fpregs.__d.__fcsr >> 0) & 0b11111) == 0b10101);
+   assert(((uc.uc_mcontext.__fpregs.__d.__fcsr >> 5) & 0b111) == 0b001);
+
+   printf("Values after return from the signal handler:\n");
+   VALGRIND_DISABLE_ERROR_REPORTING;
+   printf("   f0=%#lx\n", regs_out[0]);
+   printf("   f1=%#lx\n", regs_out[1]);
+   printf("   f2=%#lx\n", regs_out[2]);
+   printf("   f3=%#lx\n", regs_out[3]);
+   printf("   f4=%#lx\n", regs_out[4]);
+   printf("   f5=%#lx\n", regs_out[5]);
+   printf("   f6=%#lx\n", regs_out[6]);
+   printf("   f7=%#lx\n", regs_out[7]);
+   printf("   f8=%#lx\n", regs_out[8]);
+   printf("   f9=%#lx\n", regs_out[9]);
+   printf("  f10=%#lx\n", regs_out[10]);
+   printf("  f11=%#lx\n", regs_out[11]);
+   printf("  f12=%#lx\n", regs_out[12]);
+   printf("  f13=%#lx\n", regs_out[13]);
+   printf("  f14=%#lx\n", regs_out[14]);
+   printf("  f15=%#lx\n", regs_out[15]);
+   printf("  f16=%#lx\n", regs_out[16]);
+   printf("  f17=%#lx\n", regs_out[17]);
+   printf("  f18=%#lx\n", regs_out[18]);
+   printf("  f19=%#lx\n", regs_out[19]);
+   printf("  f20=%#lx\n", regs_out[20]);
+   printf("  f21=%#lx\n", regs_out[21]);
+   printf("  f22=%#lx\n", regs_out[22]);
+   printf("  f23=%#lx\n", regs_out[23]);
+   printf("  f24=%#lx\n", regs_out[24]);
+   printf("  f25=%#lx\n", regs_out[25]);
+   printf("  f26=%#lx\n", regs_out[26]);
+   printf("  f27=%#lx\n", regs_out[27]);
+   printf("  f28=%#lx\n", regs_out[28]);
+   printf("  f29=%#lx\n", regs_out[29]);
+   printf("  f30=%#lx\n", regs_out[30]);
+   printf("  f31=%#lx\n", regs_out[31]);
+   printf(" fcsr=%#lx\n", regs_out[32]);
+   VALGRIND_ENABLE_ERROR_REPORTING;
+   assert(regs_out[0] == 0);
+   assert(regs_out[1] == 0);
+   assert(regs_out[2] == 0);
+   assert(regs_out[3] == 0);
+   assert(regs_out[4] == dbl_p2);
+   assert(regs_out[5] == dbl_p2);
+   assert(regs_out[6] == dbl_p2);
+   assert(regs_out[7] == dbl_p2);
+   assert(regs_out[8] == dbl_max);
+   assert(regs_out[9] == dbl_max);
+   assert(regs_out[10] == 0);
+   assert(regs_out[11] == 0);
+   assert(regs_out[12] == dbl_max);
+   assert(regs_out[13] == dbl_max);
+   assert(regs_out[14] == 0);
+   assert(regs_out[15] == 0);
+   assert(regs_out[16] == 0);
+   assert(regs_out[17] == 0);
+   assert(regs_out[18] == dbl_m1);
+   assert(regs_out[19] == dbl_m1);
+   assert(regs_out[20] == dbl_m1);
+   assert(regs_out[21] == dbl_m1);
+   assert(regs_out[22] == dbl_p1);
+   assert(regs_out[23] == dbl_p1);
+   assert(regs_out[24] == dbl_p1);
+   assert(regs_out[25] == dbl_p1);
+   assert(regs_out[26] == dbl_p1);
+   assert(regs_out[27] == dbl_p1);
+   assert(regs_out[28] == dbl_p1);
+   assert(regs_out[29] == dbl_p1);
+   assert(regs_out[30] == dbl_p1);
+   assert(regs_out[31] == 0);
+   assert(((regs_out[32] >> 0) & 0b11111) == 0b01010);
+   assert(((regs_out[32] >> 5) & 0b111) == 0b100);
+
+   free(px0);
+   free(px1);
+
+   return 0;
+}
diff --git a/memcheck/tests/riscv64-linux/context_float.stderr.exp b/memcheck/tests/riscv64-linux/context_float.stderr.exp
new file mode 100644
index 000000000..34f59181d
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/context_float.stderr.exp
@@ -0,0 +1,299 @@
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:368)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:370)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:372)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:374)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:376)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:378)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:380)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:382)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:384)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:386)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:388)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:398)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:400)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:439)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:441)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:443)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:445)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:447)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:449)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:451)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:453)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:455)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:457)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:459)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:469)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:471)
+ Uninitialised value was created
+   at 0x........: main (context_float.c:177)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:510)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:511)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:514)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:515)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:516)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:518)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:519)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:522)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:524)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:525)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:526)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:528)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:529)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:530)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:531)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:532)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:533)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:534)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:535)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:536)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:537)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:538)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:539)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:114)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_float.c:541)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_float.c:110)
+
diff --git a/memcheck/tests/riscv64-linux/context_float.stdout.exp b/memcheck/tests/riscv64-linux/context_float.stdout.exp
new file mode 100644
index 000000000..588063bf9
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/context_float.stdout.exp
@@ -0,0 +1,102 @@
+Values before the signal:
+   f0=0
+   f1=0
+   f2=0
+   f3=0
+   f4=0x3ff0000000000000
+   f5=0x3ff0000000000000
+   f6=0x3ff0000000000000
+   f7=0x3ff0000000000000
+   f8=0x7fefffffffffffff
+   f9=0x7fefffffffffffff
+  f10=0x7fefffffffffffff
+  f11=0x7fefffffffffffff
+  f12=0
+  f13=0
+  f14=0
+  f15=0
+  f16=0
+  f17=0
+  f18=0
+  f19=0
+  f20=0
+  f21=0
+  f22=0
+  f23=0
+  f24=0
+  f25=0
+  f26=0
+  f27=0
+  f28=0
+  f29=0
+  f30=0
+  f31=0x3ff0000000000000
+ fcsr=0x35
+Values in the signal handler:
+   f0=0
+   f1=0
+   f2=0
+   f3=0
+   f4=0x3ff0000000000000
+   f5=0x3ff0000000000000
+   f6=0x3ff0000000000000
+   f7=0x3ff0000000000000
+   f8=0x7fefffffffffffff
+   f9=0x7fefffffffffffff
+  f10=0x7fefffffffffffff
+  f11=0x7fefffffffffffff
+  f12=0
+  f13=0
+  f14=0
+  f15=0
+  f16=0
+  f17=0
+  f18=0
+  f19=0
+  f20=0
+  f21=0
+  f22=0
+  f23=0
+  f24=0
+  f25=0
+  f26=0
+  f27=0
+  f28=0
+  f29=0
+  f30=0
+  f31=0x3ff0000000000000
+ fcsr=0x35
+Values after return from the signal handler:
+   f0=0
+   f1=0
+   f2=0
+   f3=0
+   f4=0x4000000000000000
+   f5=0x4000000000000000
+   f6=0x4000000000000000
+   f7=0x4000000000000000
+   f8=0x7fefffffffffffff
+   f9=0x7fefffffffffffff
+  f10=0
+  f11=0
+  f12=0x7fefffffffffffff
+  f13=0x7fefffffffffffff
+  f14=0
+  f15=0
+  f16=0
+  f17=0
+  f18=0xbff0000000000000
+  f19=0xbff0000000000000
+  f20=0xbff0000000000000
+  f21=0xbff0000000000000
+  f22=0x3ff0000000000000
+  f23=0x3ff0000000000000
+  f24=0x3ff0000000000000
+  f25=0x3ff0000000000000
+  f26=0x3ff0000000000000
+  f27=0x3ff0000000000000
+  f28=0x3ff0000000000000
+  f29=0x3ff0000000000000
+  f30=0x3ff0000000000000
+  f31=0
+ fcsr=0x8a
diff --git a/memcheck/tests/riscv64-linux/context_float.vgtest b/memcheck/tests/riscv64-linux/context_float.vgtest
new file mode 100644
index 000000000..0f2a88cd2
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/context_float.vgtest
@@ -0,0 +1,2 @@
+prog: context_float
+vgopts: -q --track-origins=yes
diff --git a/memcheck/tests/riscv64-linux/context_integer.c b/memcheck/tests/riscv64-linux/context_integer.c
new file mode 100644
index 000000000..070a9f759
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/context_integer.c
@@ -0,0 +1,374 @@
+/* Test if values in integer registers are correctly propagated into and out of
+   a signal handler and also check that the same applies for uninitialised
+   values and their origins.
+
+   Register usage in the test:
+   zero (x0) -- unused
+   ra   (x1) -- unused
+   sp   (x2) -- unused
+   gp   (x3) -- unused
+   tp   (x4) -- unused
+
+   t0   (x5) -- holds address of regs_in
+   t1   (x6) -- holds address of regs_out
+   a0  (x10) -- current pid
+   a1  (x11) -- SIGUSR1
+   a7  (x17) -- SYS_kill
+
+                before signal   -> in signal handler      -> after return
+   t2   (x7) -- 0,def           -> unchanged              -> 0,def
+   s0   (x8) -- 0,undef         -> unchanged              -> 0,undef
+   s1   (x9) -- 0,def           -> set to 0,undef         -> 0,undef
+   a2  (x12) -- 0,undef         -> set to 0,def           -> 0,def
+   a3  (x13) -- 1,def           -> increment by 1,def     -> 2,def
+   a4  (x14) -- 1,undef         -> increment by 1,def     -> 2,undef
+   a5  (x15) -- 1,def           -> increment by 1,undef   -> 2,undef
+   a6  (x16) -- 1,undef         -> increment by 1,undef   -> 2,undef
+   s2  (x18) -- ULONG_MAX,def   -> unchanged              -> ULONG_MAX,def
+   s3  (x19) -- ULONG_MAX,undef -> unchanged              -> ULONG_MAX,undef
+   s4  (x20) -- ULONG_MAX,def   -> set to 0,undef         -> 0,undef
+   s5  (x21) -- ULONG_MAX,undef -> set to 0,def           -> 0,def
+   s6  (x22) -- 0,def           -> set to ULONG_MAX,def   -> ULONG_MAX,def
+   s7  (x23) -- 0,undef         -> set to ULONG_MAX,undef -> ULONG_MAX,undef
+   s8  (x24) -- 0,def           -> decrement by 0,def     -> 0,def
+   s9  (x25) -- 0,undef         -> decrement by 0,def     -> 0,undef
+   s10 (x26) -- 0,def           -> decrement by 0,undef   -> 0,undef
+   s11 (x27) -- 0,undef         -> decrement by 0,undef   -> 0,undef
+   t3  (x28) -- 0,def           -> decrement by 1,def     -> ULONG_MAX,def
+   t4  (x29) -- 0,undef         -> decrement by 1,def     -> ULONG_MAX,undef
+   t5  (x30) -- 0,def           -> decrement by 1,undef   -> ULONG_MAX,undef
+   t6  (x31) -- 0,undef         -> decrement by 1,undef   -> ULONG_MAX,undef
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <valgrind.h>
+
+static ucontext_t    uc;
+static unsigned long x0, x1;
+
+static void sighandler(int sig, siginfo_t* sip, void* arg)
+{
+   ucontext_t* ucp = (ucontext_t*)arg;
+
+   uc = *ucp;
+
+   ucp->uc_mcontext.__gregs[9]  = x0;
+   ucp->uc_mcontext.__gregs[12] = 0;
+   ucp->uc_mcontext.__gregs[13] += 1;
+   ucp->uc_mcontext.__gregs[14] += 1;
+   ucp->uc_mcontext.__gregs[15] += x1;
+   ucp->uc_mcontext.__gregs[16] += x1;
+   ucp->uc_mcontext.__gregs[20] = x0;
+   ucp->uc_mcontext.__gregs[21] = 0;
+   ucp->uc_mcontext.__gregs[22] = ULONG_MAX;
+   ucp->uc_mcontext.__gregs[23] = ULONG_MAX + x0;
+   ucp->uc_mcontext.__gregs[24] -= 0;
+   ucp->uc_mcontext.__gregs[25] -= 0;
+   ucp->uc_mcontext.__gregs[26] -= x0;
+   ucp->uc_mcontext.__gregs[27] -= x0;
+   ucp->uc_mcontext.__gregs[28] -= 1;
+   ucp->uc_mcontext.__gregs[29] -= 1;
+   ucp->uc_mcontext.__gregs[30] -= x1;
+   ucp->uc_mcontext.__gregs[31] -= x1;
+}
+
+int main(void)
+{
+   /* Uninitialised, but we know px0[0] is 0x0. */
+   unsigned long* px0 = malloc(sizeof(*px0));
+   x0                 = px0[0];
+
+   /* Uninitialised, but we know px1[0] is 0x0. */
+   unsigned long* px1 = malloc(sizeof(*px1));
+   x1                 = px1[0] + 1;
+
+   struct sigaction sa = {};
+   sa.sa_sigaction     = sighandler;
+   if (sigaction(SIGUSR1, &sa, NULL)) {
+      perror("sigaction");
+      return 1;
+   }
+
+   unsigned long regs_in[22] = {
+      0,         x0,
+      0,         x0,
+      1,         x1,
+      1,         x1,
+      ULONG_MAX, ULONG_MAX + x0,
+      ULONG_MAX, ULONG_MAX + x0,
+      0,         x0,
+      0,         x0,
+      0,         x0,
+      0,         x0,
+      0,         x0,
+   };
+   unsigned long regs_out[22] = {};
+
+   pid_t                   pid          = getpid();
+   register unsigned long* t0 asm("t0") = regs_in;
+   register unsigned long* t1 asm("t1") = regs_out;
+   register unsigned long  a7 asm("a7") = SYS_kill;
+   register unsigned long  a0 asm("a0") = pid;
+   register unsigned long  a1 asm("a1") = SIGUSR1;
+   __asm__ __volatile__(
+      /* Spill all test registers, keep the 16-byte sp alignment. */
+      "add sp, sp, -176\n\t"
+      "sd t2, 0(sp)\n\t"
+      "sd s0, 8(sp)\n\t"
+      "sd s1, 16(sp)\n\t"
+      "sd a2, 24(sp)\n\t"
+      "sd a3, 32(sp)\n\t"
+      "sd a4, 40(sp)\n\t"
+      "sd a5, 48(sp)\n\t"
+      "sd a6, 56(sp)\n\t"
+      "sd s2, 64(sp)\n\t"
+      "sd s3, 72(sp)\n\t"
+      "sd s4, 80(sp)\n\t"
+      "sd s5, 88(sp)\n\t"
+      "sd s6, 96(sp)\n\t"
+      "sd s7, 104(sp)\n\t"
+      "sd s8, 112(sp)\n\t"
+      "sd s9, 120(sp)\n\t"
+      "sd s10, 128(sp)\n\t"
+      "sd s11, 136(sp)\n\t"
+      "sd t3, 144(sp)\n\t"
+      "sd t4, 152(sp)\n\t"
+      "sd t5, 160(sp)\n\t"
+      "sd t6, 168(sp)\n\t"
+
+      /* Set values in the test registers. */
+      "ld t2, 0(%[in])\n\t"
+      "ld s0, 8(%[in])\n\t"
+      "ld s1, 16(%[in])\n\t"
+      "ld a2, 24(%[in])\n\t"
+      "ld a3, 32(%[in])\n\t"
+      "ld a4, 40(%[in])\n\t"
+      "ld a5, 48(%[in])\n\t"
+      "ld a6, 56(%[in])\n\t"
+      "ld s2, 64(%[in])\n\t"
+      "ld s3, 72(%[in])\n\t"
+      "ld s4, 80(%[in])\n\t"
+      "ld s5, 88(%[in])\n\t"
+      "ld s6, 96(%[in])\n\t"
+      "ld s7, 104(%[in])\n\t"
+      "ld s8, 112(%[in])\n\t"
+      "ld s9, 120(%[in])\n\t"
+      "ld s10, 128(%[in])\n\t"
+      "ld s11, 136(%[in])\n\t"
+      "ld t3, 144(%[in])\n\t"
+      "ld t4, 152(%[in])\n\t"
+      "ld t5, 160(%[in])\n\t"
+      "ld t6, 168(%[in])\n\t"
+
+      /* Trigger the signal handler. */
+      "ecall\n\t"
+
+      /* Store updated values in the test registers. */
+      "sd t2, 0(%[out])\n\t"
+      "sd s0, 8(%[out])\n\t"
+      "sd s1, 16(%[out])\n\t"
+      "sd a2, 24(%[out])\n\t"
+      "sd a3, 32(%[out])\n\t"
+      "sd a4, 40(%[out])\n\t"
+      "sd a5, 48(%[out])\n\t"
+      "sd a6, 56(%[out])\n\t"
+      "sd s2, 64(%[out])\n\t"
+      "sd s3, 72(%[out])\n\t"
+      "sd s4, 80(%[out])\n\t"
+      "sd s5, 88(%[out])\n\t"
+      "sd s6, 96(%[out])\n\t"
+      "sd s7, 104(%[out])\n\t"
+      "sd s8, 112(%[out])\n\t"
+      "sd s9, 120(%[out])\n\t"
+      "sd s10, 128(%[out])\n\t"
+      "sd s11, 136(%[out])\n\t"
+      "sd t3, 144(%[out])\n\t"
+      "sd t4, 152(%[out])\n\t"
+      "sd t5, 160(%[out])\n\t"
+      "sd t6, 168(%[out])\n\t"
+
+      /* Restore their original values. */
+      "ld t2, 0(sp)\n\t"
+      "ld s0, 8(sp)\n\t"
+      "ld s1, 16(sp)\n\t"
+      "ld a2, 24(sp)\n\t"
+      "ld a3, 32(sp)\n\t"
+      "ld a4, 40(sp)\n\t"
+      "ld a5, 48(sp)\n\t"
+      "ld a6, 56(sp)\n\t"
+      "ld s2, 64(sp)\n\t"
+      "ld s3, 72(sp)\n\t"
+      "ld s4, 80(sp)\n\t"
+      "ld s5, 88(sp)\n\t"
+      "ld s6, 96(sp)\n\t"
+      "ld s7, 104(sp)\n\t"
+      "ld s8, 112(sp)\n\t"
+      "ld s9, 120(sp)\n\t"
+      "ld s10, 128(sp)\n\t"
+      "ld s11, 136(sp)\n\t"
+      "ld t3, 144(sp)\n\t"
+      "ld t4, 152(sp)\n\t"
+      "ld t5, 160(sp)\n\t"
+      "ld t6, 168(sp)\n\t"
+      "add sp, sp, 176\n\t"
+      :
+      : [in] "r"(t0), [out] "r"(t1), "r"(a7), "r"(a0), "r"(a1)
+      : "memory");
+
+   printf("Values before the signal:\n");
+   VALGRIND_DISABLE_ERROR_REPORTING;
+   printf("  t2=%#lx\n", regs_in[0]);
+   printf("  s0=%#lx\n", regs_in[1]);
+   printf("  s1=%#lx\n", regs_in[2]);
+   printf("  a2=%#lx\n", regs_in[3]);
+   printf("  a3=%#lx\n", regs_in[4]);
+   printf("  a4=%#lx\n", regs_in[5]);
+   printf("  a5=%#lx\n", regs_in[6]);
+   printf("  a6=%#lx\n", regs_in[7]);
+   printf("  s2=%#lx\n", regs_in[8]);
+   printf("  s3=%#lx\n", regs_in[9]);
+   printf("  s4=%#lx\n", regs_in[10]);
+   printf("  s5=%#lx\n", regs_in[11]);
+   printf("  s6=%#lx\n", regs_in[12]);
+   printf("  s7=%#lx\n", regs_in[13]);
+   printf("  s8=%#lx\n", regs_in[14]);
+   printf("  s9=%#lx\n", regs_in[15]);
+   printf("  s10=%#lx\n", regs_in[16]);
+   printf("  s11=%#lx\n", regs_in[17]);
+   printf("  t3=%#lx\n", regs_in[18]);
+   printf("  t4=%#lx\n", regs_in[19]);
+   printf("  t5=%#lx\n", regs_in[20]);
+   printf("  t6=%#lx\n", regs_in[21]);
+   VALGRIND_ENABLE_ERROR_REPORTING;
+   /* Check which registers contain uninitialized values. */
+   assert(regs_in[0] == 0);
+   assert(regs_in[1] == 0);
+   assert(regs_in[2] == 0);
+   assert(regs_in[3] == 0);
+   assert(regs_in[4] == 1);
+   assert(regs_in[5] == 1);
+   assert(regs_in[6] == 1);
+   assert(regs_in[7] == 1);
+   assert(regs_in[8] == ULONG_MAX);
+   assert(regs_in[9] == ULONG_MAX);
+   assert(regs_in[10] == ULONG_MAX);
+   assert(regs_in[11] == ULONG_MAX);
+   assert(regs_in[12] == 0);
+   assert(regs_in[13] == 0);
+   assert(regs_in[14] == 0);
+   assert(regs_in[15] == 0);
+   assert(regs_in[16] == 0);
+   assert(regs_in[17] == 0);
+   assert(regs_in[18] == 0);
+   assert(regs_in[19] == 0);
+   assert(regs_in[20] == 0);
+   assert(regs_in[21] == 0);
+
+   printf("Values in the signal handler:\n");
+   VALGRIND_DISABLE_ERROR_REPORTING;
+   printf("  t2=%#lx\n", uc.uc_mcontext.__gregs[7]);
+   printf("  s0=%#lx\n", uc.uc_mcontext.__gregs[8]);
+   printf("  s1=%#lx\n", uc.uc_mcontext.__gregs[9]);
+   printf("  a2=%#lx\n", uc.uc_mcontext.__gregs[12]);
+   printf("  a3=%#lx\n", uc.uc_mcontext.__gregs[13]);
+   printf("  a4=%#lx\n", uc.uc_mcontext.__gregs[14]);
+   printf("  a5=%#lx\n", uc.uc_mcontext.__gregs[15]);
+   printf("  a6=%#lx\n", uc.uc_mcontext.__gregs[16]);
+   printf("  s2=%#lx\n", uc.uc_mcontext.__gregs[18]);
+   printf("  s3=%#lx\n", uc.uc_mcontext.__gregs[19]);
+   printf("  s4=%#lx\n", uc.uc_mcontext.__gregs[20]);
+   printf("  s5=%#lx\n", uc.uc_mcontext.__gregs[21]);
+   printf("  s6=%#lx\n", uc.uc_mcontext.__gregs[22]);
+   printf("  s7=%#lx\n", uc.uc_mcontext.__gregs[23]);
+   printf("  s8=%#lx\n", uc.uc_mcontext.__gregs[24]);
+   printf("  s9=%#lx\n", uc.uc_mcontext.__gregs[25]);
+   printf("  s10=%#lx\n", uc.uc_mcontext.__gregs[26]);
+   printf("  s11=%#lx\n", uc.uc_mcontext.__gregs[27]);
+   printf("  t3=%#lx\n", uc.uc_mcontext.__gregs[28]);
+   printf("  t4=%#lx\n", uc.uc_mcontext.__gregs[29]);
+   printf("  t5=%#lx\n", uc.uc_mcontext.__gregs[30]);
+   printf("  t6=%#lx\n", uc.uc_mcontext.__gregs[31]);
+   VALGRIND_ENABLE_ERROR_REPORTING;
+   assert(uc.uc_mcontext.__gregs[7] == 0);
+   assert(uc.uc_mcontext.__gregs[8] == 0);
+   assert(uc.uc_mcontext.__gregs[9] == 0);
+   assert(uc.uc_mcontext.__gregs[12] == 0);
+   assert(uc.uc_mcontext.__gregs[13] == 1);
+   assert(uc.uc_mcontext.__gregs[14] == 1);
+   assert(uc.uc_mcontext.__gregs[15] == 1);
+   assert(uc.uc_mcontext.__gregs[16] == 1);
+   assert(uc.uc_mcontext.__gregs[18] == ULONG_MAX);
+   assert(uc.uc_mcontext.__gregs[19] == ULONG_MAX);
+   assert(uc.uc_mcontext.__gregs[20] == ULONG_MAX);
+   assert(uc.uc_mcontext.__gregs[21] == ULONG_MAX);
+   assert(uc.uc_mcontext.__gregs[22] == 0);
+   assert(uc.uc_mcontext.__gregs[23] == 0);
+   assert(uc.uc_mcontext.__gregs[24] == 0);
+   assert(uc.uc_mcontext.__gregs[25] == 0);
+   assert(uc.uc_mcontext.__gregs[26] == 0);
+   assert(uc.uc_mcontext.__gregs[27] == 0);
+   assert(uc.uc_mcontext.__gregs[28] == 0);
+   assert(uc.uc_mcontext.__gregs[29] == 0);
+   assert(uc.uc_mcontext.__gregs[30] == 0);
+   assert(uc.uc_mcontext.__gregs[31] == 0);
+
+   printf("Values after return from the signal handler:\n");
+   VALGRIND_DISABLE_ERROR_REPORTING;
+   printf("  t2=%#lx\n", regs_out[0]);
+   printf("  s0=%#lx\n", regs_out[1]);
+   printf("  s1=%#lx\n", regs_out[2]);
+   printf("  a2=%#lx\n", regs_out[3]);
+   printf("  a3=%#lx\n", regs_out[4]);
+   printf("  a4=%#lx\n", regs_out[5]);
+   printf("  a5=%#lx\n", regs_out[6]);
+   printf("  a6=%#lx\n", regs_out[7]);
+   printf("  s2=%#lx\n", regs_out[8]);
+   printf("  s3=%#lx\n", regs_out[9]);
+   printf("  s4=%#lx\n", regs_out[10]);
+   printf("  s5=%#lx\n", regs_out[11]);
+   printf("  s6=%#lx\n", regs_out[12]);
+   printf("  s7=%#lx\n", regs_out[13]);
+   printf("  s8=%#lx\n", regs_out[14]);
+   printf("  s9=%#lx\n", regs_out[15]);
+   printf("  s10=%#lx\n", regs_out[16]);
+   printf("  s11=%#lx\n", regs_out[17]);
+   printf("  t3=%#lx\n", regs_out[18]);
+   printf("  t4=%#lx\n", regs_out[19]);
+   printf("  t5=%#lx\n", regs_out[20]);
+   printf("  t6=%#lx\n", regs_out[21]);
+   VALGRIND_ENABLE_ERROR_REPORTING;
+   assert(regs_out[0] == 0);
+   assert(regs_out[1] == 0);
+   assert(regs_out[2] == 0);
+   assert(regs_out[3] == 0);
+   assert(regs_out[4] == 2);
+   assert(regs_out[5] == 2);
+   assert(regs_out[6] == 2);
+   assert(regs_out[7] == 2);
+   assert(regs_out[8] == ULONG_MAX);
+   assert(regs_out[9] == ULONG_MAX);
+   assert(regs_out[10] == 0);
+   assert(regs_out[11] == 0);
+   assert(regs_out[12] == ULONG_MAX);
+   assert(regs_out[13] == ULONG_MAX);
+   assert(regs_out[14] == 0);
+   assert(regs_out[15] == 0);
+   assert(regs_out[16] == 0);
+   assert(regs_out[17] == 0);
+   assert(regs_out[18] == ULONG_MAX);
+   assert(regs_out[19] == ULONG_MAX);
+   assert(regs_out[20] == ULONG_MAX);
+   assert(regs_out[21] == ULONG_MAX);
+
+   free(px0);
+   free(px1);
+
+   return 0;
+}
diff --git a/memcheck/tests/riscv64-linux/context_integer.stderr.exp b/memcheck/tests/riscv64-linux/context_integer.stderr.exp
new file mode 100644
index 000000000..20cf89090
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/context_integer.stderr.exp
@@ -0,0 +1,216 @@
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:252)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:254)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:256)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:258)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:260)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:262)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:264)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:266)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:268)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:270)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:272)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:300)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:302)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:304)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:306)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:308)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:310)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:312)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:314)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:316)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:318)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:320)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:348)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:349)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:352)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:353)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:354)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:356)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:357)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:360)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:362)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:363)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:364)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:366)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:85)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:367)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (context_integer.c:368)
+ Uninitialised value was created by a heap allocation
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (context_integer.c:89)
+
diff --git a/memcheck/tests/riscv64-linux/context_integer.stdout.exp b/memcheck/tests/riscv64-linux/context_integer.stdout.exp
new file mode 100644
index 000000000..a092638fc
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/context_integer.stdout.exp
@@ -0,0 +1,69 @@
+Values before the signal:
+  t2=0
+  s0=0
+  s1=0
+  a2=0
+  a3=0x1
+  a4=0x1
+  a5=0x1
+  a6=0x1
+  s2=0xffffffffffffffff
+  s3=0xffffffffffffffff
+  s4=0xffffffffffffffff
+  s5=0xffffffffffffffff
+  s6=0
+  s7=0
+  s8=0
+  s9=0
+  s10=0
+  s11=0
+  t3=0
+  t4=0
+  t5=0
+  t6=0
+Values in the signal handler:
+  t2=0
+  s0=0
+  s1=0
+  a2=0
+  a3=0x1
+  a4=0x1
+  a5=0x1
+  a6=0x1
+  s2=0xffffffffffffffff
+  s3=0xffffffffffffffff
+  s4=0xffffffffffffffff
+  s5=0xffffffffffffffff
+  s6=0
+  s7=0
+  s8=0
+  s9=0
+  s10=0
+  s11=0
+  t3=0
+  t4=0
+  t5=0
+  t6=0
+Values after return from the signal handler:
+  t2=0
+  s0=0
+  s1=0
+  a2=0
+  a3=0x2
+  a4=0x2
+  a5=0x2
+  a6=0x2
+  s2=0xffffffffffffffff
+  s3=0xffffffffffffffff
+  s4=0
+  s5=0
+  s6=0xffffffffffffffff
+  s7=0xffffffffffffffff
+  s8=0
+  s9=0
+  s10=0
+  s11=0
+  t3=0xffffffffffffffff
+  t4=0xffffffffffffffff
+  t5=0xffffffffffffffff
+  t6=0xffffffffffffffff
diff --git a/memcheck/tests/riscv64-linux/context_integer.vgtest b/memcheck/tests/riscv64-linux/context_integer.vgtest
new file mode 100644
index 000000000..556abc9c3
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/context_integer.vgtest
@@ -0,0 +1,2 @@
+prog: context_integer
+vgopts: -q --track-origins=yes
diff --git a/memcheck/tests/riscv64-linux/filter_stderr b/memcheck/tests/riscv64-linux/filter_stderr
new file mode 100755
index 000000000..a778e971f
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/filter_stderr
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+../filter_stderr "$@"
diff --git a/memcheck/tests/riscv64-linux/scalar.c b/memcheck/tests/riscv64-linux/scalar.c
new file mode 100644
index 000000000..a4d48cf87
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/scalar.c
@@ -0,0 +1,127 @@
+/* This is the riscv64 variant of memcheck/tests/x86-linux/scalar.c. It checks
+   a selected subset of all syscalls, ones that are in some way interesting from
+   the platform perspective. */
+
+#define _GNU_SOURCE
+#include "../../../include/vki/vki-scnums-riscv64-linux.h"
+#include "../../memcheck.h"
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#define GO(__NR_xxx, s)                                                        \
+   fprintf(stderr,                                                             \
+           "-----------------------------------------------------\n"           \
+           "%4d:%24s %s\n"                                                     \
+           "-----------------------------------------------------\n",          \
+           __NR_xxx, #__NR_xxx, s);
+
+#define SY   res = syscall
+#define FAIL assert(-1 == res);
+#define SUCC assert(-1 != res);
+
+#define FAILx(E)                                                               \
+   do {                                                                        \
+      int myerrno = errno;                                                     \
+      if (-1 == res) {                                                         \
+         if (E == myerrno) {                                                   \
+            /* as expected */                                                  \
+         } else {                                                              \
+            fprintf(stderr, "Expected error %s (%d), got %d\n", #E, E,         \
+                    myerrno);                                                  \
+            exit(1);                                                           \
+         }                                                                     \
+      } else {                                                                 \
+         fprintf(stderr, "Expected error %s (%d), got success\n", #E, E);      \
+         exit(1);                                                              \
+      }                                                                        \
+   } while (0);
+
+int main(void)
+{
+   /* Uninitialised, but we know px[0] is 0x0. */
+   long* px = malloc(sizeof(*px));
+   long  x0 = px[0];
+   long  res;
+
+   /* Check the syscall number 0 and two trivial generic syscalls. */
+
+   /* __NR_io_setup 0 */
+   GO(__NR_io_setup, "2s 1m");
+   SY(__NR_io_setup, x0, x0);
+   FAIL;
+
+   /* __NR_read 63 */
+   /* Nb: here we are also getting an error from the syscall arg itself. */
+   GO(__NR_read, "1+3s 1m");
+   SY(__NR_read + x0, x0, x0, x0 + 1);
+   FAIL;
+
+   /* __NR_write 64 */
+   GO(__NR_write, "3s 1m");
+   SY(__NR_write, x0, x0, x0 + 1);
+   FAIL;
+
+   /* Check syscalls not implemented by the kernel on riscv64 and explicitly
+      rejected by Valgrind. */
+
+   /* __NR_kexec_load 104 */
+   GO(__NR_kexec_load, "n/i");
+   SY(__NR_kexec_load);
+   FAILx(ENOSYS);
+
+   /* __NR_fadvise64 223 */
+   GO(__NR_fadvise64, "n/i");
+   SY(__NR_fadvise64);
+   FAILx(ENOSYS);
+
+   /* __NR_rseq 293 */
+   GO(__NR_rseq, "n/i");
+   SY(__NR_rseq);
+   FAILx(ENOSYS);
+
+   /* __NR_clone3 435 */
+   GO(__NR_clone3, "n/i");
+   SY(__NR_clone3);
+   FAILx(ENOSYS);
+
+   /* Check platform-specific wrappers. */
+
+   /* __NR_ptrace 117 */
+   GO(__NR_ptrace, "4s 1m");
+   SY(__NR_ptrace, x0 + PTRACE_PEEKTEXT, x0, x0, x0);
+   FAIL;
+
+   /* __NR_rt_sigreturn 139 */
+   /* Skipped as it is not valid to call this syscall within this context. */
+   GO(__NR_rt_sigreturn, "n/a");
+   /*SY(__NR_rt_sigreturn); FAIL;*/
+
+   /* __NR_mmap 222 */
+   GO(__NR_mmap, "6s 0m");
+   SY(__NR_mmap, x0, x0, x0, x0, x0 - 1, x0);
+   FAIL;
+
+   /* __NR_riscv_flush_icache 259 */
+   GO(__NR_riscv_flush_icache, "3s 0m");
+   SY(__NR_riscv_flush_icache, x0, x0, x0);
+   SUCC;
+
+   /* Finally, check an invalid syscall and __NR_exit. */
+
+   /* No such syscall... */
+   GO(9999, "1e");
+   SY(9999);
+   FAILx(ENOSYS);
+
+   /* __NR_exit 1 */
+   GO(__NR_exit, "1s 0m");
+   SY(__NR_exit, x0);
+   FAIL;
+
+   assert(0);
+}
diff --git a/memcheck/tests/riscv64-linux/scalar.stderr.exp b/memcheck/tests/riscv64-linux/scalar.stderr.exp
new file mode 100644
index 000000000..6a9533dec
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/scalar.stderr.exp
@@ -0,0 +1,156 @@
+-----------------------------------------------------
+   0:           __NR_io_setup 2s 1m
+-----------------------------------------------------
+Syscall param io_setup(nr_events) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:55)
+
+Syscall param io_setup(ctxp) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:55)
+
+Syscall param io_setup(ctxp) points to unaddressable byte(s)
+   ...
+   by 0x........: main (scalar.c:55)
+ Address 0x........ is not stack'd, malloc'd or (recently) free'd
+
+-----------------------------------------------------
+  63:               __NR_read 1+3s 1m
+-----------------------------------------------------
+Syscall param (syscallno) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:61)
+
+Syscall param read(fd) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:61)
+
+Syscall param read(buf) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:61)
+
+Syscall param read(count) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:61)
+
+Syscall param read(buf) points to unaddressable byte(s)
+   ...
+   by 0x........: main (scalar.c:61)
+ Address 0x........ is not stack'd, malloc'd or (recently) free'd
+
+-----------------------------------------------------
+  64:              __NR_write 3s 1m
+-----------------------------------------------------
+Syscall param write(fd) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:66)
+
+Syscall param write(buf) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:66)
+
+Syscall param write(count) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:66)
+
+Syscall param write(buf) points to unaddressable byte(s)
+   ...
+   by 0x........: main (scalar.c:66)
+ Address 0x........ is not stack'd, malloc'd or (recently) free'd
+
+-----------------------------------------------------
+ 104:         __NR_kexec_load n/i
+-----------------------------------------------------
+-----------------------------------------------------
+ 223:          __NR_fadvise64 n/i
+-----------------------------------------------------
+-----------------------------------------------------
+ 293:               __NR_rseq n/i
+-----------------------------------------------------
+-----------------------------------------------------
+ 435:             __NR_clone3 n/i
+-----------------------------------------------------
+-----------------------------------------------------
+ 117:             __NR_ptrace 4s 1m
+-----------------------------------------------------
+Syscall param ptrace(request) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:96)
+
+Syscall param ptrace(pid) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:96)
+
+Syscall param ptrace(addr) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:96)
+
+Syscall param ptrace(data) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:96)
+
+Syscall param ptrace(peek) points to unaddressable byte(s)
+   ...
+   by 0x........: main (scalar.c:96)
+ Address 0x........ is not stack'd, malloc'd or (recently) free'd
+
+-----------------------------------------------------
+ 139:       __NR_rt_sigreturn n/a
+-----------------------------------------------------
+-----------------------------------------------------
+ 222:               __NR_mmap 6s 0m
+-----------------------------------------------------
+Syscall param mmap(start) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:106)
+
+Syscall param mmap(length) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:106)
+
+Syscall param mmap(prot) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:106)
+
+Syscall param mmap(flags) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:106)
+
+Syscall param mmap(fd) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:106)
+
+Syscall param mmap(offset) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:106)
+
+-----------------------------------------------------
+ 259: __NR_riscv_flush_icache 3s 0m
+-----------------------------------------------------
+Syscall param riscv_flush_icache(start) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:111)
+
+Syscall param riscv_flush_icache(end) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:111)
+
+Syscall param riscv_flush_icache(flags) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:111)
+
+-----------------------------------------------------
+9999:                    9999 1e
+-----------------------------------------------------
+WARNING: unhandled riscv64-linux syscall: 9999
+You may be able to write your own handler.
+Read the file README_MISSING_SYSCALL_OR_IOCTL.
+Nevertheless we consider this a bug.  Please report
+it at http://valgrind.org/support/bug_reports.html.
+-----------------------------------------------------
+  93:               __NR_exit 1s 0m
+-----------------------------------------------------
+Syscall param exit(status) contains uninitialised byte(s)
+   ...
+   by 0x........: main (scalar.c:123)
+
diff --git a/memcheck/tests/riscv64-linux/scalar.vgtest b/memcheck/tests/riscv64-linux/scalar.vgtest
new file mode 100644
index 000000000..897d9e73c
--- /dev/null
+++ b/memcheck/tests/riscv64-linux/scalar.vgtest
@@ -0,0 +1,3 @@
+prog: scalar
+vgopts: -q --error-limit=no
+args: < scalar.c
diff --git a/memcheck/tests/sh-mem-random.c b/memcheck/tests/sh-mem-random.c
index ae82248ae..89785bd8f 100644
--- a/memcheck/tests/sh-mem-random.c
+++ b/memcheck/tests/sh-mem-random.c
@@ -247,9 +247,10 @@ int main(void)
       int nbytes_p;
       // (U1*)(UWord)constULL funny casting to keep gcc quiet on
       // 32-bit platforms
-      U1* huge_addr = (U1*)(UWord)0x6600000000ULL;  // 408GB
+      U1* huge_addr = (U1*)(UWord)0x3300000000ULL;  // 204GB
       // Note, kernel 2.6.? on Athlon64 refuses fixed mmap requests
-      // at above 512GB.
+      // at above 512GB. Further, Linux on RISC-V with Sv39 has user mappings
+      // limited to 256GB.
 
       printf("-------- testing auxmap range --------\n");
 
diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am
index 185993f20..5b36d3718 100644
--- a/none/tests/Makefile.am
+++ b/none/tests/Makefile.am
@@ -35,6 +35,9 @@ endif
 if VGCONF_ARCHS_INCLUDE_NANOMIPS
 SUBDIRS += nanomips
 endif
+if VGCONF_ARCHS_INCLUDE_RISCV64
+SUBDIRS += riscv64
+endif
 
 
 # OS-specific tests
@@ -75,8 +78,9 @@ SUBDIRS += x86-freebsd
 endif
 
 DIST_SUBDIRS = x86 amd64 ppc32 ppc64 arm arm64 s390x mips32 mips64 nanomips \
-               linux darwin solaris freebsd amd64-linux x86-linux amd64-darwin \
-               x86-darwin amd64-solaris x86-solaris x86-freebsd scripts .
+               riscv64 linux darwin solaris freebsd amd64-linux x86-linux \
+               amd64-darwin x86-darwin amd64-solaris x86-solaris x86-freebsd \
+               scripts .
 
 dist_noinst_SCRIPTS = \
 	filter_cmdline0 \
@@ -346,6 +350,11 @@ libvex_test_CFLAGS      = $(AM_CFLAGS) @FLAG_FSANITIZE@
 libvex_test_LDADD       = ../../VEX/libvex-@VGCONF_ARCH_PRI@-@VGCONF_OS@.a \
 			   @LIB_UBSAN@
 libvexmultiarch_test_CFLAGS= $(AM_CFLAGS) @FLAG_FSANITIZE@
+if VGCONF_ARCHS_INCLUDE_RISCV64
+# Disable RISC-V linker relaxation, it takes GNU ld 2.39 tens of minutes to sort
+# it through on this large test.
+libvexmultiarch_test_LDFLAGS = -Wl,--no-relax
+endif
 libvexmultiarch_test_LDADD = \
 	../../VEX/libvexmultiarch-@VGCONF_ARCH_PRI@-@VGCONF_OS@.a \
 	../../VEX/libvex-@VGCONF_ARCH_PRI@-@VGCONF_OS@.a @LIB_UBSAN@
diff --git a/none/tests/allexec_prepare_prereq b/none/tests/allexec_prepare_prereq
index a541f4299..fa4d31706 100755
--- a/none/tests/allexec_prepare_prereq
+++ b/none/tests/allexec_prepare_prereq
@@ -34,5 +34,6 @@ pair s390x_unexisting_in_32bits s390x
 pair arm                        arm64
 pair mips32                     mips64
 pair nanomips                   nanoMIPS_unexisting_in_64bits
+pair riscv_unexisting_in_32bits riscv64
 
 exit 0
diff --git a/none/tests/faultstatus.c b/none/tests/faultstatus.c
index cdb1a80ba..d2f208ad5 100644
--- a/none/tests/faultstatus.c
+++ b/none/tests/faultstatus.c
@@ -11,7 +11,7 @@
 #include "../../config.h"
 
 /* Division by zero triggers a SIGFPE on x86 and x86_64,
-   but not on the PowerPC architecture.
+   but not on the PowerPC, AArch64 and RISC-V architectures.
 
    On ARM-Linux, we do get a SIGFPE, but not from the faulting of a
    division instruction (there isn't any such thing) but rather
@@ -19,7 +19,7 @@
    Hence we get a SIGFPE but the SI_CODE is different from that on
    x86/amd64-linux.
  */
-#if defined(__powerpc__) || defined(__aarch64__)
+#if defined(__powerpc__) || defined(__aarch64__) || defined(__riscv)
 #  define DIVISION_BY_ZERO_TRIGGERS_FPE 0
 #if defined(VGO_freebsd)
 #  define DIVISION_BY_ZERO_SI_CODE      SI_LWP
diff --git a/none/tests/libvex_test.c b/none/tests/libvex_test.c
index 5b57a4c2e..6a8086ace 100644
--- a/none/tests/libvex_test.c
+++ b/none/tests/libvex_test.c
@@ -76,6 +76,8 @@ __attribute__((noinline)) static void get_guest_arch(VexArch    *ga)
    *ga = VexArchMIPS64;
 #elif defined(VGA_nanomips)
    *ga = VexArchNANOMIPS;
+#elif defined(VGA_riscv64)
+   *ga = VexArchRISCV64;
 #else
    missing arch;
 #endif
@@ -113,6 +115,7 @@ static VexEndness arch_endness (VexArch va) {
          else
             return VexEndnessBE;
       }
+   case VexArchRISCV64: return VexEndnessLE;
    default: failure_exit();
    }
 }
@@ -139,6 +142,7 @@ static UInt arch_hwcaps (VexArch va) {
    case VexArchMIPS64: return VEX_PRID_COMP_MIPS | VEX_MIPS_HOST_FR;
 #endif
    case VexArchNANOMIPS: return 0;
+   case VexArchRISCV64: return 0;
    default: failure_exit();
    }
 }
@@ -156,6 +160,7 @@ static Bool mode64 (VexArch va) {
    case VexArchMIPS32: return False;
    case VexArchMIPS64: return True;
    case VexArchNANOMIPS: return False;
+   case VexArchRISCV64: return True;
    default: failure_exit();
    }
 }
@@ -275,7 +280,7 @@ int main(int argc, char **argv)
    // explicitly via command line arguments.
    if (multiarch) {
       VexArch va;
-      for (va = VexArchX86; va <= VexArchNANOMIPS; va++) {
+      for (va = VexArchX86; va <= VexArchRISCV64; va++) {
          vta.arch_host = va;
          vta.archinfo_host.endness = arch_endness (vta.arch_host);
          vta.archinfo_host.hwcaps = arch_hwcaps (vta.arch_host);
diff --git a/none/tests/riscv64/Makefile.am b/none/tests/riscv64/Makefile.am
new file mode 100644
index 000000000..b2de2bd39
--- /dev/null
+++ b/none/tests/riscv64/Makefile.am
@@ -0,0 +1,31 @@
+
+include $(top_srcdir)/Makefile.tool-tests.am
+
+dist_noinst_SCRIPTS = filter_stderr
+
+noinst_HEADERS = testinst.h
+
+EXTRA_DIST = \
+	atomic.stdout.exp atomic.stderr.exp atomic.vgtest \
+	compressed.stdout.exp compressed.stderr.exp compressed.vgtest \
+	csr.stdout.exp csr.stderr.exp csr.vgtest \
+	float32.stdout.exp float32.stderr.exp float32.vgtest \
+	float64.stdout.exp float64.stderr.exp float64.vgtest \
+	integer.stdout.exp integer.stderr.exp integer.vgtest \
+	muldiv.stdout.exp muldiv.stderr.exp muldiv.vgtest
+
+check_PROGRAMS = \
+	allexec \
+	atomic \
+	compressed \
+	csr \
+	float32 \
+	float64 \
+	integer \
+	muldiv
+
+AM_CFLAGS    += @FLAG_M64@
+AM_CXXFLAGS  += @FLAG_M64@
+AM_CCASFLAGS += @FLAG_M64@
+
+allexec_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@
diff --git a/none/tests/riscv64/allexec.c b/none/tests/riscv64/allexec.c
new file mode 120000
index 000000000..6d6a9cf28
--- /dev/null
+++ b/none/tests/riscv64/allexec.c
@@ -0,0 +1 @@
+../allexec.c
\ No newline at end of file
diff --git a/none/tests/riscv64/atomic.c b/none/tests/riscv64/atomic.c
new file mode 100644
index 000000000..793d1d6fc
--- /dev/null
+++ b/none/tests/riscv64/atomic.c
@@ -0,0 +1,280 @@
+/* Tests for the RV64A standard atomic instruction-set extension. */
+
+#include "testinst.h"
+
+static void test_atomic_shared(void)
+{
+   printf("RV64A atomic instruction set, shared operations\n");
+
+   /* ------------------- lr.w rd, (rs1) -------------------- */
+   /* ----------------- sc.w rd, rs2, (rs1) ----------------- */
+   TESTINST_2_1_LRSC(4, "lr.w a0, (a2)", "sc.w a1, a0, (a2)", a0, a1, a2);
+
+   TESTINST_2_1_LRSC(4, "lr.w t4, (t6)", "sc.w t5, t4, (t6)", t4, t5, t6);
+   TESTINST_2_1_LRSC(4, "lr.w zero, (a1)", "sc.w a0, zero, (a1)", zero, a0, a1);
+   TESTINST_2_1_LRSC(4, "lr.w a0, (a1)", "sc.w zero, a0, (a1)", a0, zero, a1);
+
+   /* -------------- amoswap.w rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amoswap.w a0, a1, (a2)", 0xabcdef0123456789, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amoswap.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amoswap.w zero, a0, (a1)", 0xabcdef0123456789, zero,
+                     a0, a1);
+   TESTINST_1_2_AMOX(4, "amoswap.w a0, zero, (a1)", 0xabcdef0123456789, a0,
+                     zero, a1);
+
+   /* --------------- amoadd.w rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amoadd.w a0, a1, (a2)", 0xabcdef0123456789, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amoadd.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amoadd.w zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amoadd.w a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amoxor.w rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amoxor.w a0, a1, (a2)", 0xabcdef0123456789, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amoxor.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amoxor.w zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amoxor.w a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amoand.w rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amoand.w a0, a1, (a2)", 0xabcdef0123456789, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amoand.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amoand.w zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amoand.w a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amoor.w rd, rs2, (rs1) ---------------- */
+   TESTINST_1_2_AMOX(4, "amoor.w a0, a1, (a2)", 0xabcdef0123456789, a0, a1, a2);
+
+   TESTINST_1_2_AMOX(4, "amoor.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5, t6);
+   TESTINST_1_2_AMOX(4, "amoor.w zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amoor.w a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amomin.w rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amomin.w a0, a1, (a2)", 0x0000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomin.w a0, a1, (a2)", 0x000000007fffffff, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomin.w a0, a1, (a2)", 0x0000000080000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomin.w a0, a1, (a2)", 0x00000000ffffffff, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amomin.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amomin.w zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amomin.w a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amomax.w rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amomax.w a0, a1, (a2)", 0x0000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomax.w a0, a1, (a2)", 0x000000007fffffff, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomax.w a0, a1, (a2)", 0x0000000080000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomax.w a0, a1, (a2)", 0x00000000ffffffff, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amomax.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amomax.w zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amomax.w a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* -------------- amominu.w rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amominu.w a0, a1, (a2)", 0x0000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amominu.w a0, a1, (a2)", 0x000000007fffffff, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amominu.w a0, a1, (a2)", 0x0000000080000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amominu.w a0, a1, (a2)", 0x00000000ffffffff, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amominu.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amominu.w zero, a0, (a1)", 0xabcdef0123456789, zero,
+                     a0, a1);
+   TESTINST_1_2_AMOX(4, "amominu.w a0, zero, (a1)", 0xabcdef0123456789, a0,
+                     zero, a1);
+
+   /* -------------- amomaxu.w rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amomaxu.w a0, a1, (a2)", 0x0000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomaxu.w a0, a1, (a2)", 0x000000007fffffff, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomaxu.w a0, a1, (a2)", 0x0000000080000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomaxu.w a0, a1, (a2)", 0x00000000ffffffff, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amomaxu.w t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amomaxu.w zero, a0, (a1)", 0xabcdef0123456789, zero,
+                     a0, a1);
+   TESTINST_1_2_AMOX(4, "amomaxu.w a0, zero, (a1)", 0xabcdef0123456789, a0,
+                     zero, a1);
+
+   printf("\n");
+}
+
+static void test_atomic_additions(void)
+{
+   printf("RV64A atomic instruction set, additions\n");
+
+   /* ------------------- lr.d rd, (rs1) -------------------- */
+   /* ----------------- sc.d rd, rs2, (rs1) ----------------- */
+   TESTINST_2_1_LRSC(4, "lr.d a0, (a2)", "sc.d a1, a0, (a2)", a0, a1, a2);
+
+   TESTINST_2_1_LRSC(4, "lr.d t4, (t6)", "sc.d t5, t4, (t6)", t4, t5, t6);
+   TESTINST_2_1_LRSC(4, "lr.d zero, (a1)", "sc.d a0, zero, (a1)", zero, a0, a1);
+   TESTINST_2_1_LRSC(4, "lr.d a0, (a1)", "sc.d zero, a0, (a1)", a0, zero, a1);
+
+   /* -------------- amoswap.d rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amoswap.d a0, a1, (a2)", 0xabcdef0123456789, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amoswap.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amoswap.d zero, a0, (a1)", 0xabcdef0123456789, zero,
+                     a0, a1);
+   TESTINST_1_2_AMOX(4, "amoswap.d a0, zero, (a1)", 0xabcdef0123456789, a0,
+                     zero, a1);
+
+   /* --------------- amoadd.d rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amoadd.d a0, a1, (a2)", 0xabcdef0123456789, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amoadd.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amoadd.d zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amoadd.d a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amoxor.d rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amoxor.d a0, a1, (a2)", 0xabcdef0123456789, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amoxor.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amoxor.d zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amoxor.d a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amoand.d rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amoand.d a0, a1, (a2)", 0xabcdef0123456789, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amoand.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amoand.d zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amoand.d a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amoor.d rd, rs2, (rs1) ---------------- */
+   TESTINST_1_2_AMOX(4, "amoor.d a0, a1, (a2)", 0xabcdef0123456789, a0, a1, a2);
+
+   TESTINST_1_2_AMOX(4, "amoor.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5, t6);
+   TESTINST_1_2_AMOX(4, "amoor.d zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amoor.d a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amomin.d rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amomin.d a0, a1, (a2)", 0x0000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomin.d a0, a1, (a2)", 0x7fffffffffffffff, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomin.d a0, a1, (a2)", 0x8000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomin.d a0, a1, (a2)", 0xffffffffffffffff, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amomin.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amomin.d zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amomin.d a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* --------------- amomax.d rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amomax.d a0, a1, (a2)", 0x0000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomax.d a0, a1, (a2)", 0x7fffffffffffffff, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomax.d a0, a1, (a2)", 0x8000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomax.d a0, a1, (a2)", 0xffffffffffffffff, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amomax.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amomax.d zero, a0, (a1)", 0xabcdef0123456789, zero, a0,
+                     a1);
+   TESTINST_1_2_AMOX(4, "amomax.d a0, zero, (a1)", 0xabcdef0123456789, a0, zero,
+                     a1);
+
+   /* -------------- amominu.d rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amominu.d a0, a1, (a2)", 0x0000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amominu.d a0, a1, (a2)", 0x7fffffffffffffff, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amominu.d a0, a1, (a2)", 0x8000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amominu.d a0, a1, (a2)", 0xffffffffffffffff, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amominu.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amominu.d zero, a0, (a1)", 0xabcdef0123456789, zero,
+                     a0, a1);
+   TESTINST_1_2_AMOX(4, "amominu.d a0, zero, (a1)", 0xabcdef0123456789, a0,
+                     zero, a1);
+
+   /* -------------- amomaxu.d rd, rs2, (rs1) --------------- */
+   TESTINST_1_2_AMOX(4, "amomaxu.d a0, a1, (a2)", 0x0000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomaxu.d a0, a1, (a2)", 0x7fffffffffffffff, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomaxu.d a0, a1, (a2)", 0x8000000000000000, a0, a1,
+                     a2);
+   TESTINST_1_2_AMOX(4, "amomaxu.d a0, a1, (a2)", 0xffffffffffffffff, a0, a1,
+                     a2);
+
+   TESTINST_1_2_AMOX(4, "amomaxu.d t4, t5, (t6)", 0xabcdef0123456789, t4, t5,
+                     t6);
+   TESTINST_1_2_AMOX(4, "amomaxu.d zero, a0, (a1)", 0xabcdef0123456789, zero,
+                     a0, a1);
+   TESTINST_1_2_AMOX(4, "amomaxu.d a0, zero, (a1)", 0xabcdef0123456789, a0,
+                     zero, a1);
+}
+
+int main(void)
+{
+   test_atomic_shared();
+   test_atomic_additions();
+   return 0;
+}
diff --git a/none/tests/riscv64/atomic.stderr.exp b/none/tests/riscv64/atomic.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/none/tests/riscv64/atomic.stdout.exp b/none/tests/riscv64/atomic.stdout.exp
new file mode 100644
index 000000000..225d76107
--- /dev/null
+++ b/none/tests/riscv64/atomic.stdout.exp
@@ -0,0 +1,467 @@
+RV64A atomic instruction set, shared operations
+lr.w a0, (a2) ::
+  inputs: a2=&area_mid
+  output: a0=0xffffffffaf27d13b
+sc.w a1, a0, (a2) ::
+  inputs: a2=&area_mid, a0=0x0000000050d82ec4
+  output: a1=0x0000000000000000
+  [+000]  c4 2e d8 50 .. .. .. .. .. .. .. .. .. .. .. ..
+sc.w a1, a0, (a2) ::
+  inputs: a2=&area_mid, a0=0xffffffffaf27d13b
+  output: a1=0x0000000000000001
+lr.w t4, (t6) ::
+  inputs: t6=&area_mid
+  output: t4=0x0000000056a044b2
+sc.w t5, t4, (t6) ::
+  inputs: t6=&area_mid, t4=0xffffffffa95fbb4d
+  output: t5=0x0000000000000000
+  [+000]  4d bb 5f a9 .. .. .. .. .. .. .. .. .. .. .. ..
+sc.w t5, t4, (t6) ::
+  inputs: t6=&area_mid, t4=0x0000000056a044b2
+  output: t5=0x0000000000000001
+lr.w zero, (a1) ::
+  inputs: a1=&area_mid
+  output: zero=0x0000000000000000
+sc.w a0, zero, (a1) ::
+  inputs: a1=&area_mid, zero=0x0000000000000000
+  output: a0=0x0000000000000000
+  [+000]  00 00 00 00 .. .. .. .. .. .. .. .. .. .. .. ..
+sc.w a0, zero, (a1) ::
+  inputs: a1=&area_mid, zero=0x0000000000000000
+  output: a0=0x0000000000000001
+lr.w a0, (a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffb7839b97
+sc.w zero, a0, (a1) ::
+  inputs: a1=&area_mid, a0=0x00000000487c6468
+  output: zero=0x0000000000000000
+  [+000]  68 64 7c 48 .. .. .. .. .. .. .. .. .. .. .. ..
+sc.w zero, a0, (a1) ::
+  inputs: a1=&area_mid, a0=0xffffffffb7839b97
+  output: zero=0x0000000000000000
+amoswap.w a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0xffffffffa705f65d
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+amoswap.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x000000003df76c96
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+amoswap.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+amoswap.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xffffffffc95c9810
+  [+000]  00 00 00 00 .. .. .. .. .. .. .. .. .. .. .. ..
+amoadd.w a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0xfffffffff7e8c6a9
+  [+000]  32 2e 2e 1b .. .. .. .. .. .. .. .. .. .. .. ..
+amoadd.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x000000003b13ff64
+  [+000]  ed 66 59 5e .. .. .. .. .. .. .. .. .. .. .. ..
+amoadd.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  f7 65 31 d4 .. .. .. .. .. .. .. .. .. .. .. ..
+amoadd.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x00000000727c80f3
+  no memory changes
+amoxor.w a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0xffffffff9ed0411e
+  [+000]  97 26 95 bd .. .. .. .. .. .. .. .. .. .. .. ..
+amoxor.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x0000000050f6fd1c
+  [+000]  95 9a b3 73 .. .. .. .. .. .. .. .. .. .. .. ..
+amoxor.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  91 17 b2 80 .. .. .. .. .. .. .. .. .. .. .. ..
+amoxor.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xffffffffb3e1553f
+  no memory changes
+amoand.w a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0xffffffff9dbf68bc
+  [+000]  88 60 05 01 .. .. .. .. .. .. .. .. .. .. .. ..
+amoand.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x000000007d9d67bc
+  [+000]  88 .. 05 21 .. .. .. .. .. .. .. .. .. .. .. ..
+amoand.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  09 04 00 22 .. .. .. .. .. .. .. .. .. .. .. ..
+amoand.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xffffffff8c8b14f4
+  [+000]  00 00 00 00 .. .. .. .. .. .. .. .. .. .. .. ..
+amoor.w a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0xfffffffff4b23b84
+  [+000]  8d 7f f7 f7 .. .. .. .. .. .. .. .. .. .. .. ..
+amoor.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0xffffffffc10a3c47
+  [+000]  cf 7f 4f e3 .. .. .. .. .. .. .. .. .. .. .. ..
+amoor.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  e9 f7 df 33 .. .. .. .. .. .. .. .. .. .. .. ..
+amoor.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xfffffffffc7bc013
+  no memory changes
+amomin.w a0, a1, (a2) ::
+  inputs: a1=0x0000000000000000, a2=&area_mid
+  output: a0=0xffffffffa2acb976
+  no memory changes
+amomin.w a0, a1, (a2) ::
+  inputs: a1=0x000000007fffffff, a2=&area_mid
+  output: a0=0x000000001d3d7dbb
+  no memory changes
+amomin.w a0, a1, (a2) ::
+  inputs: a1=0x0000000080000000, a2=&area_mid
+  output: a0=0xffffffff8a3ac80e
+  [+000]  00 00 00 80 .. .. .. .. .. .. .. .. .. .. .. ..
+amomin.w a0, a1, (a2) ::
+  inputs: a1=0x00000000ffffffff, a2=&area_mid
+  output: a0=0x0000000004b0569c
+  [+000]  ff ff ff ff .. .. .. .. .. .. .. .. .. .. .. ..
+amomin.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0xffffffffa7aae391
+  no memory changes
+amomin.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+amomin.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xffffffffdb5ce85e
+  no memory changes
+amomax.w a0, a1, (a2) ::
+  inputs: a1=0x0000000000000000, a2=&area_mid
+  output: a0=0xffffffffa32bd88e
+  [+000]  00 00 00 00 .. .. .. .. .. .. .. .. .. .. .. ..
+amomax.w a0, a1, (a2) ::
+  inputs: a1=0x000000007fffffff, a2=&area_mid
+  output: a0=0x0000000004afb8d5
+  [+000]  ff ff ff 7f .. .. .. .. .. .. .. .. .. .. .. ..
+amomax.w a0, a1, (a2) ::
+  inputs: a1=0x0000000080000000, a2=&area_mid
+  output: a0=0x000000001bf2425f
+  no memory changes
+amomax.w a0, a1, (a2) ::
+  inputs: a1=0x00000000ffffffff, a2=&area_mid
+  output: a0=0x0000000004033357
+  no memory changes
+amomax.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0xffffffffd9eb46ea
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+amomax.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+amomax.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xffffffffbd76c58f
+  [+000]  00 00 00 00 .. .. .. .. .. .. .. .. .. .. .. ..
+amominu.w a0, a1, (a2) ::
+  inputs: a1=0x0000000000000000, a2=&area_mid
+  output: a0=0x00000000042fa9fa
+  [+000]  00 00 00 00 .. .. .. .. .. .. .. .. .. .. .. ..
+amominu.w a0, a1, (a2) ::
+  inputs: a1=0x000000007fffffff, a2=&area_mid
+  output: a0=0xffffffffa7f19faf
+  [+000]  ff ff ff 7f .. .. .. .. .. .. .. .. .. .. .. ..
+amominu.w a0, a1, (a2) ::
+  inputs: a1=0x0000000080000000, a2=&area_mid
+  output: a0=0xffffffffc5c764db
+  [+000]  00 00 00 80 .. .. .. .. .. .. .. .. .. .. .. ..
+amominu.w a0, a1, (a2) ::
+  inputs: a1=0x00000000ffffffff, a2=&area_mid
+  output: a0=0x0000000077beb4a9
+  no memory changes
+amominu.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0xffffffffdbe14b46
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+amominu.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+amominu.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x0000000028dc3c9c
+  [+000]  00 00 00 00 .. .. .. .. .. .. .. .. .. .. .. ..
+amomaxu.w a0, a1, (a2) ::
+  inputs: a1=0x0000000000000000, a2=&area_mid
+  output: a0=0x0000000049cc0fac
+  no memory changes
+amomaxu.w a0, a1, (a2) ::
+  inputs: a1=0x000000007fffffff, a2=&area_mid
+  output: a0=0xffffffff8b19183c
+  no memory changes
+amomaxu.w a0, a1, (a2) ::
+  inputs: a1=0x0000000080000000, a2=&area_mid
+  output: a0=0x000000000acd1475
+  [+000]  00 00 00 80 .. .. .. .. .. .. .. .. .. .. .. ..
+amomaxu.w a0, a1, (a2) ::
+  inputs: a1=0x00000000ffffffff, a2=&area_mid
+  output: a0=0xffffffffe3f6bf86
+  [+000]  ff ff ff ff .. .. .. .. .. .. .. .. .. .. .. ..
+amomaxu.w t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x0000000031a0d599
+  no memory changes
+amomaxu.w zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+amomaxu.w a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xffffffff9fa43077
+  no memory changes
+
+RV64A atomic instruction set, additions
+lr.d a0, (a2) ::
+  inputs: a2=&area_mid
+  output: a0=0x05d75ec6f616ee9a
+sc.d a1, a0, (a2) ::
+  inputs: a2=&area_mid, a0=0xfa28a13909e91165
+  output: a1=0x0000000000000000
+  [+000]  65 11 e9 09 39 a1 28 fa .. .. .. .. .. .. .. ..
+sc.d a1, a0, (a2) ::
+  inputs: a2=&area_mid, a0=0x05d75ec6f616ee9a
+  output: a1=0x0000000000000001
+lr.d t4, (t6) ::
+  inputs: t6=&area_mid
+  output: t4=0x141625713239066f
+sc.d t5, t4, (t6) ::
+  inputs: t6=&area_mid, t4=0xebe9da8ecdc6f990
+  output: t5=0x0000000000000000
+  [+000]  90 f9 c6 cd 8e da e9 eb .. .. .. .. .. .. .. ..
+sc.d t5, t4, (t6) ::
+  inputs: t6=&area_mid, t4=0x141625713239066f
+  output: t5=0x0000000000000001
+lr.d zero, (a1) ::
+  inputs: a1=&area_mid
+  output: zero=0x0000000000000000
+sc.d a0, zero, (a1) ::
+  inputs: a1=&area_mid, zero=0x0000000000000000
+  output: a0=0x0000000000000000
+  [+000]  00 00 00 00 00 00 00 00 .. .. .. .. .. .. .. ..
+sc.d a0, zero, (a1) ::
+  inputs: a1=&area_mid, zero=0x0000000000000000
+  output: a0=0x0000000000000001
+lr.d a0, (a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xf2db8f44cbbf37e2
+sc.d zero, a0, (a1) ::
+  inputs: a1=&area_mid, a0=0x0d2470bb3440c81d
+  output: zero=0x0000000000000000
+  [+000]  1d c8 40 34 bb 70 24 0d .. .. .. .. .. .. .. ..
+sc.d zero, a0, (a1) ::
+  inputs: a1=&area_mid, a0=0xf2db8f44cbbf37e2
+  output: zero=0x0000000000000000
+amoswap.d a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0x78fb29445f3bc8d7
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+amoswap.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x34a901384a97a32f
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+amoswap.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+amoswap.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xb9dd5dab8e212ab7
+  [+000]  00 00 00 00 00 00 00 00 .. .. .. .. .. .. .. ..
+amoadd.d a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0x3bfcd80321664d3e
+  [+000]  c7 b4 ab 44 04 c7 ca e7 .. .. .. .. .. .. .. ..
+amoadd.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x63d9810079bbabd9
+  [+000]  62 13 01 9d 01 70 a7 0f .. .. .. .. .. .. .. ..
+amoadd.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  3a 68 72 d5 8d c4 0e b7 .. .. .. .. .. .. .. ..
+amoadd.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x10fd4e94e9c808f5
+  no memory changes
+amoxor.d a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0x4edb6a053a967ecf
+  [+000]  46 19 d3 19 04 85 16 e5 .. .. .. .. .. .. .. ..
+amoxor.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0xa1a7a4c9c0a51f6b
+  [+000]  e2 78 e0 e3 c8 4b 6a 0a .. .. .. .. .. .. .. ..
+amoxor.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  7e c0 45 b4 cf 96 e3 4e .. .. .. .. .. .. .. ..
+amoxor.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0xf63a63fedcb4d29c
+  no memory changes
+amoand.d a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0xb097e047aacc5b89
+  [+000]  .. 43 44 22 01 .. 85 a0 .. .. .. .. .. .. .. ..
+amoand.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0xef136b941e54ffe8
+  [+000]  88 67 44 02 00 .. 01 ab .. .. .. .. .. .. .. ..
+amoand.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  81 61 41 03 01 .. 49 8b .. .. .. .. .. .. .. ..
+amoand.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x6c949cea66e687ae
+  [+000]  00 00 00 00 00 00 00 00 .. .. .. .. .. .. .. ..
+amoor.d a0, a1, (a2) ::
+  inputs: a1=0xabcdef0123456789, a2=&area_mid
+  output: a0=0x623139cb7207e36c
+  [+000]  ed e7 47 73 .. ff fd eb .. .. .. .. .. .. .. ..
+amoor.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x4c1cd56194c94a4e
+  [+000]  cf 6f cd b7 .. ff dd ef .. .. .. .. .. .. .. ..
+amoor.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  ff .. 77 .. 97 ef ed ab .. .. .. .. .. .. .. ..
+amoor.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x710cf757885d2728
+  no memory changes
+amomin.d a0, a1, (a2) ::
+  inputs: a1=0x0000000000000000, a2=&area_mid
+  output: a0=0x63a8769192481679
+  [+000]  00 00 00 00 00 00 00 00 .. .. .. .. .. .. .. ..
+amomin.d a0, a1, (a2) ::
+  inputs: a1=0x7fffffffffffffff, a2=&area_mid
+  output: a0=0xb9c3e32f2103009d
+  no memory changes
+amomin.d a0, a1, (a2) ::
+  inputs: a1=0x8000000000000000, a2=&area_mid
+  output: a0=0x5127ba1c529aa0bf
+  [+000]  00 00 00 00 00 00 00 80 .. .. .. .. .. .. .. ..
+amomin.d a0, a1, (a2) ::
+  inputs: a1=0xffffffffffffffff, a2=&area_mid
+  output: a0=0x06a17746411ab40c
+  [+000]  ff ff ff ff ff ff ff ff .. .. .. .. .. .. .. ..
+amomin.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0xb3fd9698098ef5b0
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+amomin.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+amomin.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x698bec649583f5aa
+  [+000]  00 00 00 00 00 00 00 00 .. .. .. .. .. .. .. ..
+amomax.d a0, a1, (a2) ::
+  inputs: a1=0x0000000000000000, a2=&area_mid
+  output: a0=0x2a541ab7911c2b5a
+  no memory changes
+amomax.d a0, a1, (a2) ::
+  inputs: a1=0x7fffffffffffffff, a2=&area_mid
+  output: a0=0x532f9ae1d7da8010
+  [+000]  ff ff ff ff ff ff ff 7f .. .. .. .. .. .. .. ..
+amomax.d a0, a1, (a2) ::
+  inputs: a1=0x8000000000000000, a2=&area_mid
+  output: a0=0xc2e9e9cf82c7aff8
+  no memory changes
+amomax.d a0, a1, (a2) ::
+  inputs: a1=0xffffffffffffffff, a2=&area_mid
+  output: a0=0x514c816eaff2763f
+  no memory changes
+amomax.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0xde24e0a879648e11
+  no memory changes
+amomax.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+amomax.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x5d68e1a25652a804
+  no memory changes
+amominu.d a0, a1, (a2) ::
+  inputs: a1=0x0000000000000000, a2=&area_mid
+  output: a0=0x086a7a39a1e6217d
+  [+000]  00 00 00 00 00 00 00 00 .. .. .. .. .. .. .. ..
+amominu.d a0, a1, (a2) ::
+  inputs: a1=0x7fffffffffffffff, a2=&area_mid
+  output: a0=0x2112ca1cf9f1dd31
+  no memory changes
+amominu.d a0, a1, (a2) ::
+  inputs: a1=0x8000000000000000, a2=&area_mid
+  output: a0=0x822c4c377b82984c
+  [+000]  00 00 00 00 00 00 00 80 .. .. .. .. .. .. .. ..
+amominu.d a0, a1, (a2) ::
+  inputs: a1=0xffffffffffffffff, a2=&area_mid
+  output: a0=0x08847c7642a20df9
+  no memory changes
+amominu.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x8fe6d7c56a5ff965
+  no memory changes
+amominu.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+amominu.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x11f7fa4450de2529
+  [+000]  00 00 00 00 00 00 00 00 .. .. .. .. .. .. .. ..
+amomaxu.d a0, a1, (a2) ::
+  inputs: a1=0x0000000000000000, a2=&area_mid
+  output: a0=0xc33ebc4b44b8ddd8
+  no memory changes
+amomaxu.d a0, a1, (a2) ::
+  inputs: a1=0x7fffffffffffffff, a2=&area_mid
+  output: a0=0xe6c097130b5efcf6
+  no memory changes
+amomaxu.d a0, a1, (a2) ::
+  inputs: a1=0x8000000000000000, a2=&area_mid
+  output: a0=0x56470887bfdd3daf
+  [+000]  00 00 00 00 00 00 00 80 .. .. .. .. .. .. .. ..
+amomaxu.d a0, a1, (a2) ::
+  inputs: a1=0xffffffffffffffff, a2=&area_mid
+  output: a0=0xef9f8c927c405d2f
+  [+000]  ff ff ff ff ff ff ff ff .. .. .. .. .. .. .. ..
+amomaxu.d t4, t5, (t6) ::
+  inputs: t5=0xabcdef0123456789, t6=&area_mid
+  output: t4=0x8d969e225f9318a0
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+amomaxu.d zero, a0, (a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  output: zero=0x0000000000000000
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+amomaxu.d a0, zero, (a1) ::
+  inputs: zero=0xabcdef0123456789, a1=&area_mid
+  output: a0=0x478d5d7e053a4e0c
+  no memory changes
diff --git a/none/tests/riscv64/atomic.vgtest b/none/tests/riscv64/atomic.vgtest
new file mode 100644
index 000000000..51295e709
--- /dev/null
+++ b/none/tests/riscv64/atomic.vgtest
@@ -0,0 +1,2 @@
+prog: atomic
+vgopts: -q
diff --git a/none/tests/riscv64/compressed.c b/none/tests/riscv64/compressed.c
new file mode 100644
index 000000000..29e119ce6
--- /dev/null
+++ b/none/tests/riscv64/compressed.c
@@ -0,0 +1,456 @@
+/* Tests for the RV64C standard compressed instruction-set extension. */
+
+#include "testinst.h"
+
+static void test_compressed_00(void)
+{
+   printf("RV64C compressed instruction set, quadrant 0\n");
+
+   /* ------------- c.addi4spn rd, nzuimm[9:2] -------------- */
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 4", 0x0000000000001000, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 8", 0x0000000000001000, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 16", 0x0000000000001000, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 32", 0x0000000000001000, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 64", 0x0000000000001000, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 128", 0x0000000000001000, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 256", 0x0000000000001000, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 512", 0x0000000000001000, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 1020", 0x0000000000001000, a0, sp);
+
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 4", 0x000000007ffffffc, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 4", 0x00000000fffffffb, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a0, sp, 4", 0x00000000fffffffc, a0, sp);
+   TESTINST_1_1(2, "c.addi4spn a5, sp, 4", 0x0000000000001000, a0, sp);
+
+   /* -------------- c.fld rd, uimm[7:3](rs1) --------------- */
+   TESTINST_1_1_FLOAD(2, "c.fld fa0, 0(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(2, "c.fld fa0, 8(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(2, "c.fld fa0, 16(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(2, "c.fld fa0, 32(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(2, "c.fld fa0, 64(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(2, "c.fld fa0, 128(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(2, "c.fld fa0, 248(a1)", fa0, a1);
+
+   TESTINST_1_1_FLOAD(2, "c.fld fa4, 0(a5)", fa4, a5);
+
+   /* --------------- c.lw rd, uimm[6:2](rs1) --------------- */
+   TESTINST_1_1_LOAD(2, "c.lw a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.lw a0, 4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.lw a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.lw a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.lw a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.lw a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.lw a0, 124(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(2, "c.lw a4, 0(a5)", a4, a5);
+
+   /* --------------- c.ld rd, uimm[7:3](rs1) --------------- */
+   TESTINST_1_1_LOAD(2, "c.ld a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.ld a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.ld a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.ld a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.ld a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.ld a0, 128(a1)", a0, a1);
+   TESTINST_1_1_LOAD(2, "c.ld a0, 248(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(2, "c.ld a4, 0(a5)", a4, a5);
+
+   /* -------------- c.fsd rs2, uimm[7:3](rs1) -------------- */
+   TESTINST_0_2_FSTORE(2, "c.fsd fa0, 0(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(2, "c.fsd fa0, 8(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(2, "c.fsd fa0, 16(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(2, "c.fsd fa0, 32(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(2, "c.fsd fa0, 64(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(2, "c.fsd fa0, 128(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(2, "c.fsd fa0, 248(a1)", 0xabcdef0123456789, fa0, a1);
+
+   TESTINST_0_2_FSTORE(2, "c.fsd fa4, 0(a5)", 0xabcdef0123456789, fa4, a5);
+
+   /* -------------- c.sw rs2, uimm[6:2](rs1) --------------- */
+   TESTINST_0_2_STORE(2, "c.sw a0, 0(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sw a0, 4(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sw a0, 8(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sw a0, 16(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sw a0, 32(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sw a0, 64(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sw a0, 124(a1)", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_0_2_STORE(2, "c.sw a4, 0(a5)", 0xabcdef0123456789, a4, a5);
+
+   /* -------------- c.sd rs2, uimm[7:3](rs1) --------------- */
+   TESTINST_0_2_STORE(2, "c.sd a0, 0(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sd a0, 8(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sd a0, 16(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sd a0, 32(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sd a0, 64(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sd a0, 128(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(2, "c.sd a0, 248(a1)", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_0_2_STORE(2, "c.sd a4, 0(a5)", 0xabcdef0123456789, a4, a5);
+
+   printf("\n");
+}
+
+static void test_compressed_01(void)
+{
+   printf("RV64C compressed instruction set, quadrant 1\n");
+
+   /* ------------------------ c.nop ------------------------ */
+   TESTINST_0_0(2, "c.nop");
+
+   /* -------------- c.addi rd_rs1, nzimm[5:0] -------------- */
+   TESTINST_1_1(2, "c.addi a0, 1", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, 2", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, 4", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, 8", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, 16", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, 31", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, -1", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, -32", 0x0000000000001000, a0, a0);
+
+   TESTINST_1_1(2, "c.addi a0, 1", 0x000000007fffffff, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, 1", 0x00000000fffffffe, a0, a0);
+   TESTINST_1_1(2, "c.addi a0, 1", 0x00000000ffffffff, a0, a0);
+   TESTINST_1_1(2, "c.addi t6, 1", 0x0000000000001000, t6, t6);
+
+   /* -------------- c.addiw rd_rs1, imm[5:0] --------------- */
+   TESTINST_1_1(2, "c.addiw a0, 0", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, 1", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, 2", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, 4", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, 8", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, 16", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, 31", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, -1", 0x0000000000001000, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, -32", 0x0000000000001000, a0, a0);
+
+   TESTINST_1_1(2, "c.addiw a0, 1", 0x000000007fffffff, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, 1", 0x00000000fffffffe, a0, a0);
+   TESTINST_1_1(2, "c.addiw a0, 1", 0x00000000ffffffff, a0, a0);
+   TESTINST_1_1(2, "c.addiw t6, 0", 0x0000000000001000, t6, t6);
+
+   /* ------------------ c.li rd, imm[5:0] ------------------ */
+   TESTINST_1_0(2, "c.li a0, 0", a0);
+   TESTINST_1_0(2, "c.li a0, 1", a0);
+   TESTINST_1_0(2, "c.li a0, 2", a0);
+   TESTINST_1_0(2, "c.li a0, 4", a0);
+   TESTINST_1_0(2, "c.li a0, 8", a0);
+   TESTINST_1_0(2, "c.li a0, 15", a0);
+   TESTINST_1_0(2, "c.li a0, -1", a0);
+   TESTINST_1_0(2, "c.li a0, -16", a0);
+
+   TESTINST_1_0(2, "c.li t6, 1", t6);
+
+   /* ---------------- c.addi16sp nzimm[9:4] ---------------- */
+   TESTINST_1_1(2, "c.addi16sp sp, 16", 0x0000000000001000, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, 32", 0x0000000000001000, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, 64", 0x0000000000001000, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, 128", 0x0000000000001000, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, 256", 0x0000000000001000, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, 496", 0x0000000000001000, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, -16", 0x0000000000001000, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, -512", 0x0000000000001000, sp, sp);
+
+   TESTINST_1_1(2, "c.addi16sp sp, 16", 0x000000007ffffff0, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, 16", 0x00000000ffffffef, sp, sp);
+   TESTINST_1_1(2, "c.addi16sp sp, 16", 0x00000000fffffff0, sp, sp);
+
+   /* --------------- c.lui rd, nzimm[17:12] ---------------- */
+   TESTINST_1_0(2, "c.lui a0, 1", a0);
+   TESTINST_1_0(2, "c.lui a0, 2", a0);
+   TESTINST_1_0(2, "c.lui a0, 4", a0);
+   TESTINST_1_0(2, "c.lui a0, 8", a0);
+   TESTINST_1_0(2, "c.lui a0, 16", a0);
+   TESTINST_1_0(2, "c.lui a0, 31", a0);
+   TESTINST_1_0(2, "c.lui a0, 0xfffff" /* -1 */, a0);
+   TESTINST_1_0(2, "c.lui a0, 0xfffe0" /* -32 */, a0);
+
+   TESTINST_1_0(2, "c.lui t6, 1", t6);
+
+   /* ------------- c.srli rd_rs1, nzuimm[5:0] -------------- */
+   TESTINST_1_1(2, "c.srli a0, 1", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srli a0, 2", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srli a0, 4", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srli a0, 8", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srli a0, 16", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srli a0, 32", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srli a0, 63", 0xabcdef0123456789, a0, a0);
+
+   TESTINST_1_1(2, "c.srli a5, 1", 0xabcdef0123456789, a5, a5);
+
+   /* ------------- c.srai rd_rs1, nzuimm[5:0] -------------- */
+   TESTINST_1_1(2, "c.srai a0, 1", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srai a0, 2", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srai a0, 4", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srai a0, 8", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srai a0, 16", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srai a0, 32", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.srai a0, 63", 0xabcdef0123456789, a0, a0);
+
+   TESTINST_1_1(2, "c.srai a5, 1", 0xabcdef0123456789, a5, a5);
+
+   /* --------------- c.andi rd_rs1, imm[5:0] --------------- */
+   TESTINST_1_1(2, "c.andi a0, 0", 0xffffffffffffffff, a0, a0);
+   TESTINST_1_1(2, "c.andi a0, 1", 0xffffffffffffffff, a0, a0);
+   TESTINST_1_1(2, "c.andi a0, 2", 0xffffffffffffffff, a0, a0);
+   TESTINST_1_1(2, "c.andi a0, 4", 0xffffffffffffffff, a0, a0);
+   TESTINST_1_1(2, "c.andi a0, 8", 0xffffffffffffffff, a0, a0);
+   TESTINST_1_1(2, "c.andi a0, 16", 0xffffffffffffffff, a0, a0);
+   TESTINST_1_1(2, "c.andi a0, 31", 0xffffffffffffffff, a0, a0);
+
+   TESTINST_1_1(2, "c.andi a5, 0", 0xffffffffffffffff, a5, a5);
+
+   /* ------------------ c.sub rd_rs1, rs2 ------------------ */
+   TESTINST_1_2(2, "c.sub a0, a1", 0x0000000000001000, 0x0000000000000fff, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.sub a0, a1", 0x0000000000001000, 0x0000000000001000, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.sub a0, a1", 0x0000000000001000, 0x0000000000001001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.sub a0, a1", 0xffffffffffffffff, 0x0000000000000000, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.sub a0, a1", 0x0000000100000000, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.sub a4, a5", 0x0000000000001000, 0x0000000000000fff, a4,
+                a4, a5);
+
+   /* ------------------ c.xor rd_rs1, rs2 ------------------ */
+   TESTINST_1_2(2, "c.xor a0, a1", 0x0000ffff0000ffff, 0x00000000ffffffff, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.xor a4, a5", 0x0000ffff0000ffff, 0x00000000ffffffff, a4,
+                a4, a5);
+
+   /* ------------------ c.or rd_rs1, rs2 ------------------- */
+   TESTINST_1_2(2, "c.or a0, a1", 0x0000ffff0000ffff, 0x00000000ffffffff, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.or a4, a5", 0x0000ffff0000ffff, 0x00000000ffffffff, a4,
+                a4, a5);
+
+   /* ------------------ c.and rd_rs1, rs2 ------------------ */
+   TESTINST_1_2(2, "c.and a0, a1", 0x0000ffff0000ffff, 0x00000000ffffffff, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.and a4, a5", 0x0000ffff0000ffff, 0x00000000ffffffff, a4,
+                a4, a5);
+
+   /* ----------------- c.subw rd_rs1, rs2 ------------------ */
+   TESTINST_1_2(2, "c.subw a0, a1", 0x0000000000001000, 0x0000000000000fff, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.subw a0, a1", 0x0000000000001000, 0x0000000000001000, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.subw a0, a1", 0x0000000000001000, 0x0000000000001001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.subw a0, a1", 0xffffffffffffffff, 0x0000000000000000, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.subw a0, a1", 0x0000000100000000, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.subw a4, a5", 0x0000000000001000, 0x0000000000000fff, a4,
+                a4, a5);
+
+   /* ----------------- c.addw rd_rs1, rs2 ------------------ */
+   TESTINST_1_2(2, "c.addw a0, a1", 0x0000000000001000, 0x0000000000002000, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.addw a0, a1", 0x000000007fffffff, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.addw a0, a1", 0x00000000fffffffe, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.addw a0, a1", 0x00000000ffffffff, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.addw a0, a1", 0xfffffffffffffffe, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.addw a0, a1", 0xffffffffffffffff, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.addw a4, a5", 0x0000000000001000, 0x0000000000002000, a4,
+                a4, a5);
+
+   /* -------------------- c.j imm[11:1] -------------------- */
+   TESTINST_0_0_J_RANGE(2, "c.j .+4", 4);
+   TESTINST_0_0_J_RANGE(2, "c.j .+6", 6);
+   TESTINST_0_0_J_RANGE(2, "c.j .+8", 8);
+   TESTINST_0_0_J_RANGE(2, "c.j .+16", 16);
+   TESTINST_0_0_J_RANGE(2, "c.j .+32", 32);
+   TESTINST_0_0_J_RANGE(2, "c.j .+64", 64);
+   TESTINST_0_0_J_RANGE(2, "c.j .+128", 128);
+   TESTINST_0_0_J_RANGE(2, "c.j .+256", 256);
+   TESTINST_0_0_J_RANGE(2, "c.j .+512", 512);
+   TESTINST_0_0_J_RANGE(2, "c.j .+1024", 1024);
+   TESTINST_0_0_J_RANGE(2, "c.j .+2044", 2044);
+   TESTINST_0_0_J_RANGE(2, "c.j .-4", -4);
+   TESTINST_0_0_J_RANGE(2, "c.j .-6", -6);
+   TESTINST_0_0_J_RANGE(2, "c.j .-2048", -2048);
+
+   /* ---------------- c.beqz rs1, imm[8:1] ----------------- */
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .+4", 0, 4, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .+6", 0, 6, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .+8", 0, 8, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .+16", 0, 16, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .+32", 0, 32, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .+64", 0, 64, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .+128", 0, 128, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .+252", 0, 252, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .-4", 0, -4, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .-6", 0, -6, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a0, .-256", 0, -256, a0);
+
+   TESTINST_0_1_BxxZ_RANGE(2, "c.beqz a5, .+4", 0, 4, a5);
+   TESTINST_0_1_BxxZ_COND(2, "c.beqz a0, 1f", 0, a0);
+   TESTINST_0_1_BxxZ_COND(2, "c.beqz a0, 1f", 1, a0);
+
+   /* ---------------- c.bnez rs1, imm[8:1] ----------------- */
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .+4", 1, 4, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .+6", 1, 6, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .+8", 1, 8, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .+16", 1, 16, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .+32", 1, 32, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .+64", 1, 64, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .+128", 1, 128, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .+252", 1, 252, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .-4", 1, -4, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .-6", 1, -6, a0);
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a0, .-256", 1, -256, a0);
+
+   TESTINST_0_1_BxxZ_RANGE(2, "c.bnez a5, .+4", 1, 4, a5);
+   TESTINST_0_1_BxxZ_COND(2, "c.bnez a0, 1f", 0, a0);
+   TESTINST_0_1_BxxZ_COND(2, "c.bnez a0, 1f", 1, a0);
+
+   printf("\n");
+}
+
+static void test_compressed_10(void)
+{
+   printf("RV64C compressed instruction set, quadrant 2\n");
+
+   /* ------------- c.slli rd_rs1, nzuimm[5:0] -------------- */
+   TESTINST_1_1(2, "c.slli a0, 1", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.slli a0, 2", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.slli a0, 4", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.slli a0, 8", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.slli a0, 16", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.slli a0, 32", 0xabcdef0123456789, a0, a0);
+   TESTINST_1_1(2, "c.slli a0, 63", 0xabcdef0123456789, a0, a0);
+
+   TESTINST_1_1(2, "c.slli a5, 1", 0xabcdef0123456789, a5, a5);
+
+   /* -------------- c.fldsp rd, uimm[8:3](x2) -------------- */
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa0, 0(sp)", fa0, sp);
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa0, 8(sp)", fa0, sp);
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa0, 16(sp)", fa0, sp);
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa0, 32(sp)", fa0, sp);
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa0, 64(sp)", fa0, sp);
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa0, 128(sp)", fa0, sp);
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa0, 256(sp)", fa0, sp);
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa0, 504(sp)", fa0, sp);
+
+   TESTINST_1_1_FLOAD(2, "c.fldsp fa5, 0(sp)", fa5, sp);
+
+   /* -------------- c.lwsp rd, uimm[7:2](x2) --------------- */
+   TESTINST_1_1_LOAD(2, "c.lwsp a0, 0(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.lwsp a0, 4(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.lwsp a0, 8(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.lwsp a0, 16(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.lwsp a0, 32(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.lwsp a0, 64(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.lwsp a0, 128(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.lwsp a0, 252(sp)", a0, sp);
+
+   TESTINST_1_1_LOAD(2, "c.lwsp a5, 0(sp)", a5, sp);
+
+   /* -------------- c.ldsp rd, uimm[8:3](x2) --------------- */
+   TESTINST_1_1_LOAD(2, "c.ldsp a0, 0(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.ldsp a0, 8(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.ldsp a0, 16(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.ldsp a0, 32(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.ldsp a0, 64(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.ldsp a0, 128(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.ldsp a0, 256(sp)", a0, sp);
+   TESTINST_1_1_LOAD(2, "c.ldsp a0, 504(sp)", a0, sp);
+
+   TESTINST_1_1_LOAD(2, "c.ldsp a5, 0(sp)", a5, sp);
+
+   /* ---------------------- c.jr rs1 ----------------------- */
+   TESTINST_0_1_JR_RANGE(2, "c.jr t0", "1f+4", 4, t0);
+   TESTINST_0_1_JR_RANGE(2, "c.jr t0", "1f+6", 6, t0);
+   TESTINST_0_1_JR_RANGE(2, "c.jr t0", "1f+8", 8, t0);
+   TESTINST_0_1_JR_RANGE(2, "c.jr t0", "1f-4", -4, t0);
+   TESTINST_0_1_JR_RANGE(2, "c.jr t0", "1f-6", -6, t0);
+   TESTINST_0_1_JR_RANGE(2, "c.jr t0", "1f-8", -8, t0);
+
+   TESTINST_0_1_JR_RANGE(2, "c.jr t6", "1f+4", 4, t6);
+
+   /* -------------------- c.mv rd, rs2 --------------------- */
+   TESTINST_1_1(2, "c.mv t0, t6", 0xabcdef0123456789, t0, t6);
+   TESTINST_1_1(2, "c.mv t6, t0", 0xabcdef0123456789, t6, t0);
+   TESTINST_1_1(2, "c.mv s0, s11", 0xabcdef0123456789, s0, s11);
+   TESTINST_1_1(2, "c.mv s11, s0", 0xabcdef0123456789, s11, s0);
+   TESTINST_1_1(2, "c.mv a0, a7", 0xabcdef0123456789, a0, a7);
+   TESTINST_1_1(2, "c.mv a7, a0", 0xabcdef0123456789, a7, a0);
+
+   /* --------------------- c.jalr rs1 ---------------------- */
+   TESTINST_1_1_JALR_RANGE(2, "c.jalr t0", "1f+4", 4, ra, t0);
+   TESTINST_1_1_JALR_RANGE(2, "c.jalr t0", "1f+6", 6, ra, t0);
+   TESTINST_1_1_JALR_RANGE(2, "c.jalr t0", "1f+8", 8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(2, "c.jalr t0", "1f-4", -4, ra, t0);
+   TESTINST_1_1_JALR_RANGE(2, "c.jalr t0", "1f-6", -6, ra, t0);
+   TESTINST_1_1_JALR_RANGE(2, "c.jalr t0", "1f-8", -8, ra, t0);
+
+   TESTINST_1_1_JALR_RANGE(2, "c.jalr t6", "1f+4", 4, ra, t6);
+
+   /* ------------------ c.add rd_rs1, rs2 ------------------ */
+   TESTINST_1_2(2, "c.add a0, a1", 0x0000000000001000, 0x0000000000002000, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.add a0, a1", 0x000000007fffffff, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.add a0, a1", 0x00000000fffffffe, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.add a0, a1", 0x00000000ffffffff, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.add a0, a1", 0xfffffffffffffffe, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.add a0, a1", 0xffffffffffffffff, 0x0000000000000001, a0,
+                a0, a1);
+   TESTINST_1_2(2, "c.add a4, a5", 0x0000000000001000, 0x0000000000002000, a4,
+                a4, a5);
+
+   /* ------------- c.fsdsp rs2, uimm[8:3](x2) -------------- */
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa0, 0(sp)", 0xabcdef0123456789, fa0, sp);
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa0, 8(sp)", 0xabcdef0123456789, fa0, sp);
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa0, 16(sp)", 0xabcdef0123456789, fa0, sp);
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa0, 32(sp)", 0xabcdef0123456789, fa0, sp);
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa0, 64(sp)", 0xabcdef0123456789, fa0, sp);
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa0, 128(sp)", 0xabcdef0123456789, fa0, sp);
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa0, 256(sp)", 0xabcdef0123456789, fa0, sp);
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa0, 504(sp)", 0xabcdef0123456789, fa0, sp);
+
+   TESTINST_0_2_FSTORE(2, "c.fsdsp fa5, 0(sp)", 0xabcdef0123456789, fa5, sp);
+
+   /* -------------- c.swsp rs2, uimm[7:2](x2) -------------- */
+   TESTINST_0_2_STORE(2, "c.swsp a0, 0(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.swsp a0, 4(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.swsp a0, 8(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.swsp a0, 16(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.swsp a0, 32(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.swsp a0, 64(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.swsp a0, 128(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.swsp a0, 252(sp)", 0xabcdef0123456789, a0, sp);
+
+   TESTINST_0_2_STORE(2, "c.swsp a5, 0(sp)", 0xabcdef0123456789, a5, sp);
+
+   /* -------------- c.sdsp rs2, uimm[8:3](x2) -------------- */
+   TESTINST_0_2_STORE(2, "c.sdsp a0, 0(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.sdsp a0, 8(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.sdsp a0, 16(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.sdsp a0, 32(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.sdsp a0, 64(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.sdsp a0, 128(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.sdsp a0, 256(sp)", 0xabcdef0123456789, a0, sp);
+   TESTINST_0_2_STORE(2, "c.sdsp a0, 504(sp)", 0xabcdef0123456789, a0, sp);
+
+   TESTINST_0_2_STORE(2, "c.sdsp a5, 0(sp)", 0xabcdef0123456789, a5, sp);
+}
+
+int main(void)
+{
+   test_compressed_00();
+   test_compressed_01();
+   test_compressed_10();
+   return 0;
+}
diff --git a/none/tests/riscv64/compressed.stderr.exp b/none/tests/riscv64/compressed.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/none/tests/riscv64/compressed.stdout.exp b/none/tests/riscv64/compressed.stdout.exp
new file mode 100644
index 000000000..221cece29
--- /dev/null
+++ b/none/tests/riscv64/compressed.stdout.exp
@@ -0,0 +1,917 @@
+RV64C compressed instruction set, quadrant 0
+c.addi4spn a0, sp, 4 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000001004
+c.addi4spn a0, sp, 8 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000001008
+c.addi4spn a0, sp, 16 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000001010
+c.addi4spn a0, sp, 32 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000001020
+c.addi4spn a0, sp, 64 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000001040
+c.addi4spn a0, sp, 128 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000001080
+c.addi4spn a0, sp, 256 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000001100
+c.addi4spn a0, sp, 512 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000001200
+c.addi4spn a0, sp, 1020 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x00000000000013fc
+c.addi4spn a0, sp, 4 ::
+  inputs: sp=0x000000007ffffffc
+  output: a0=0x0000000080000000
+c.addi4spn a0, sp, 4 ::
+  inputs: sp=0x00000000fffffffb
+  output: a0=0x00000000ffffffff
+c.addi4spn a0, sp, 4 ::
+  inputs: sp=0x00000000fffffffc
+  output: a0=0x0000000100000000
+c.addi4spn a5, sp, 4 ::
+  inputs: sp=0x0000000000001000
+  output: a0=0x0000000000000020
+c.fld fa0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xbeafe48541dc8da0
+  no memory changes
+c.fld fa0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xba6d23fbddcfb6e4
+  no memory changes
+c.fld fa0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xe23b6d7d6753321d
+  no memory changes
+c.fld fa0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x849d6e092767dabd
+  no memory changes
+c.fld fa0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xec83e542163f4e88
+  no memory changes
+c.fld fa0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x21989d257082ce6f
+  no memory changes
+c.fld fa0, 248(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x63082c1746f49884
+  no memory changes
+c.fld fa4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: fa4=0x5486cc410e1801e6
+  no memory changes
+c.lw a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000720eda7
+  no memory changes
+c.lw a0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffec7a332e
+  no memory changes
+c.lw a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xfffffffff26fc107
+  no memory changes
+c.lw a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffb4a7dcfa
+  no memory changes
+c.lw a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffff81406130
+  no memory changes
+c.lw a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000da412a7
+  no memory changes
+c.lw a0, 124(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000006a330ec8
+  no memory changes
+c.lw a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0xffffffffd45c61ed
+  no memory changes
+c.ld a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x82501ceacc654dae
+  no memory changes
+c.ld a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x6dbeca915808e621
+  no memory changes
+c.ld a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x863d8543d33dd28a
+  no memory changes
+c.ld a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x07ff662e72b0598a
+  no memory changes
+c.ld a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x30a49d2822488e15
+  no memory changes
+c.ld a0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xe539d48afb0b8e7d
+  no memory changes
+c.ld a0, 248(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x36f9f34ce2cde861
+  no memory changes
+c.ld a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0x172704a799a1c1f4
+  no memory changes
+c.fsd fa0, 0(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsd fa0, 8(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+c.fsd fa0, 16(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsd fa0, 32(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsd fa0, 64(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsd fa0, 128(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+128]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsd fa0, 248(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+240]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+c.fsd fa4, 0(a5) ::
+  inputs: fa4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sw a0, 0(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.sw a0, 4(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. 89 67 45 23 .. .. .. .. .. .. .. ..
+c.sw a0, 8(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 .. .. .. ..
+c.sw a0, 16(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.sw a0, 32(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.sw a0, 64(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.sw a0, 124(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+112]  .. .. .. .. .. .. .. .. .. .. .. .. 89 67 45 23
+c.sw a4, 0(a5) ::
+  inputs: a4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.sd a0, 0(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 67 45 23 01 .. cd ab .. .. .. .. .. .. .. ..
+c.sd a0, 8(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+c.sd a0, 16(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sd a0, 32(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sd a0, 64(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sd a0, 128(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+128]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sd a0, 248(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+240]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+c.sd a4, 0(a5) ::
+  inputs: a4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+
+RV64C compressed instruction set, quadrant 1
+c.nop ::
+c.addi a0, 1 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001001
+c.addi a0, 2 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001002
+c.addi a0, 4 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001004
+c.addi a0, 8 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001008
+c.addi a0, 16 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001010
+c.addi a0, 31 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x000000000000101f
+c.addi a0, -1 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000000fff
+c.addi a0, -32 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000000fe0
+c.addi a0, 1 ::
+  inputs: a0=0x000000007fffffff
+  output: a0=0x0000000080000000
+c.addi a0, 1 ::
+  inputs: a0=0x00000000fffffffe
+  output: a0=0x00000000ffffffff
+c.addi a0, 1 ::
+  inputs: a0=0x00000000ffffffff
+  output: a0=0x0000000100000000
+c.addi t6, 1 ::
+  inputs: t6=0x0000000000001000
+  output: t6=0x0000000000001001
+c.addiw a0, 0 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001000
+c.addiw a0, 1 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001001
+c.addiw a0, 2 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001002
+c.addiw a0, 4 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001004
+c.addiw a0, 8 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001008
+c.addiw a0, 16 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000001010
+c.addiw a0, 31 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x000000000000101f
+c.addiw a0, -1 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000000fff
+c.addiw a0, -32 ::
+  inputs: a0=0x0000000000001000
+  output: a0=0x0000000000000fe0
+c.addiw a0, 1 ::
+  inputs: a0=0x000000007fffffff
+  output: a0=0xffffffff80000000
+c.addiw a0, 1 ::
+  inputs: a0=0x00000000fffffffe
+  output: a0=0xffffffffffffffff
+c.addiw a0, 1 ::
+  inputs: a0=0x00000000ffffffff
+  output: a0=0x0000000000000000
+c.addiw t6, 0 ::
+  inputs: t6=0x0000000000001000
+  output: t6=0x0000000000001000
+c.li a0, 0 ::
+  output: a0=0x0000000000000000
+c.li a0, 1 ::
+  output: a0=0x0000000000000001
+c.li a0, 2 ::
+  output: a0=0x0000000000000002
+c.li a0, 4 ::
+  output: a0=0x0000000000000004
+c.li a0, 8 ::
+  output: a0=0x0000000000000008
+c.li a0, 15 ::
+  output: a0=0x000000000000000f
+c.li a0, -1 ::
+  output: a0=0xffffffffffffffff
+c.li a0, -16 ::
+  output: a0=0xfffffffffffffff0
+c.li t6, 1 ::
+  output: t6=0x0000000000000001
+c.addi16sp sp, 16 ::
+  inputs: sp=0x0000000000001000
+  output: sp=0x0000000000001010
+c.addi16sp sp, 32 ::
+  inputs: sp=0x0000000000001000
+  output: sp=0x0000000000001020
+c.addi16sp sp, 64 ::
+  inputs: sp=0x0000000000001000
+  output: sp=0x0000000000001040
+c.addi16sp sp, 128 ::
+  inputs: sp=0x0000000000001000
+  output: sp=0x0000000000001080
+c.addi16sp sp, 256 ::
+  inputs: sp=0x0000000000001000
+  output: sp=0x0000000000001100
+c.addi16sp sp, 496 ::
+  inputs: sp=0x0000000000001000
+  output: sp=0x00000000000011f0
+c.addi16sp sp, -16 ::
+  inputs: sp=0x0000000000001000
+  output: sp=0x0000000000000ff0
+c.addi16sp sp, -512 ::
+  inputs: sp=0x0000000000001000
+  output: sp=0x0000000000000e00
+c.addi16sp sp, 16 ::
+  inputs: sp=0x000000007ffffff0
+  output: sp=0x0000000080000000
+c.addi16sp sp, 16 ::
+  inputs: sp=0x00000000ffffffef
+  output: sp=0x00000000ffffffff
+c.addi16sp sp, 16 ::
+  inputs: sp=0x00000000fffffff0
+  output: sp=0x0000000100000000
+c.lui a0, 1 ::
+  output: a0=0x0000000000001000
+c.lui a0, 2 ::
+  output: a0=0x0000000000002000
+c.lui a0, 4 ::
+  output: a0=0x0000000000004000
+c.lui a0, 8 ::
+  output: a0=0x0000000000008000
+c.lui a0, 16 ::
+  output: a0=0x0000000000010000
+c.lui a0, 31 ::
+  output: a0=0x000000000001f000
+c.lui a0, 0xfffff ::
+  output: a0=0xfffffffffffff000
+c.lui a0, 0xfffe0 ::
+  output: a0=0xfffffffffffe0000
+c.lui t6, 1 ::
+  output: t6=0x0000000000001000
+c.srli a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x55e6f78091a2b3c4
+c.srli a0, 2 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x2af37bc048d159e2
+c.srli a0, 4 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x0abcdef012345678
+c.srli a0, 8 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x00abcdef01234567
+c.srli a0, 16 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x0000abcdef012345
+c.srli a0, 32 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x00000000abcdef01
+c.srli a0, 63 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x0000000000000001
+c.srli a5, 1 ::
+  inputs: a5=0xabcdef0123456789
+  output: a5=0x55e6f78091a2b3c4
+c.srai a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xd5e6f78091a2b3c4
+c.srai a0, 2 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xeaf37bc048d159e2
+c.srai a0, 4 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xfabcdef012345678
+c.srai a0, 8 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xffabcdef01234567
+c.srai a0, 16 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xffffabcdef012345
+c.srai a0, 32 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xffffffffabcdef01
+c.srai a0, 63 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xffffffffffffffff
+c.srai a5, 1 ::
+  inputs: a5=0xabcdef0123456789
+  output: a5=0xd5e6f78091a2b3c4
+c.andi a0, 0 ::
+  inputs: a0=0xffffffffffffffff
+  output: a0=0x0000000000000000
+c.andi a0, 1 ::
+  inputs: a0=0xffffffffffffffff
+  output: a0=0x0000000000000001
+c.andi a0, 2 ::
+  inputs: a0=0xffffffffffffffff
+  output: a0=0x0000000000000002
+c.andi a0, 4 ::
+  inputs: a0=0xffffffffffffffff
+  output: a0=0x0000000000000004
+c.andi a0, 8 ::
+  inputs: a0=0xffffffffffffffff
+  output: a0=0x0000000000000008
+c.andi a0, 16 ::
+  inputs: a0=0xffffffffffffffff
+  output: a0=0x0000000000000010
+c.andi a0, 31 ::
+  inputs: a0=0xffffffffffffffff
+  output: a0=0x000000000000001f
+c.andi a5, 0 ::
+  inputs: a5=0xffffffffffffffff
+  output: a5=0x0000000000000000
+c.sub a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000000fff
+  output: a0=0x0000000000000001
+c.sub a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000001000
+  output: a0=0x0000000000000000
+c.sub a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000001001
+  output: a0=0xffffffffffffffff
+c.sub a0, a1 ::
+  inputs: a0=0xffffffffffffffff, a1=0x0000000000000000
+  output: a0=0xffffffffffffffff
+c.sub a0, a1 ::
+  inputs: a0=0x0000000100000000, a1=0x0000000000000001
+  output: a0=0x00000000ffffffff
+c.sub a4, a5 ::
+  inputs: a4=0x0000000000001000, a5=0x0000000000000fff
+  output: a4=0x0000000000000001
+c.xor a0, a1 ::
+  inputs: a0=0x0000ffff0000ffff, a1=0x00000000ffffffff
+  output: a0=0x0000ffffffff0000
+c.xor a4, a5 ::
+  inputs: a4=0x0000ffff0000ffff, a5=0x00000000ffffffff
+  output: a4=0x0000ffffffff0000
+c.or a0, a1 ::
+  inputs: a0=0x0000ffff0000ffff, a1=0x00000000ffffffff
+  output: a0=0x0000ffffffffffff
+c.or a4, a5 ::
+  inputs: a4=0x0000ffff0000ffff, a5=0x00000000ffffffff
+  output: a4=0x0000ffffffffffff
+c.and a0, a1 ::
+  inputs: a0=0x0000ffff0000ffff, a1=0x00000000ffffffff
+  output: a0=0x000000000000ffff
+c.and a4, a5 ::
+  inputs: a4=0x0000ffff0000ffff, a5=0x00000000ffffffff
+  output: a4=0x000000000000ffff
+c.subw a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000000fff
+  output: a0=0x0000000000000001
+c.subw a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000001000
+  output: a0=0x0000000000000000
+c.subw a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000001001
+  output: a0=0xffffffffffffffff
+c.subw a0, a1 ::
+  inputs: a0=0xffffffffffffffff, a1=0x0000000000000000
+  output: a0=0xffffffffffffffff
+c.subw a0, a1 ::
+  inputs: a0=0x0000000100000000, a1=0x0000000000000001
+  output: a0=0xffffffffffffffff
+c.subw a4, a5 ::
+  inputs: a4=0x0000000000001000, a5=0x0000000000000fff
+  output: a4=0x0000000000000001
+c.addw a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: a0=0x0000000000003000
+c.addw a0, a1 ::
+  inputs: a0=0x000000007fffffff, a1=0x0000000000000001
+  output: a0=0xffffffff80000000
+c.addw a0, a1 ::
+  inputs: a0=0x00000000fffffffe, a1=0x0000000000000001
+  output: a0=0xffffffffffffffff
+c.addw a0, a1 ::
+  inputs: a0=0x00000000ffffffff, a1=0x0000000000000001
+  output: a0=0x0000000000000000
+c.addw a0, a1 ::
+  inputs: a0=0xfffffffffffffffe, a1=0x0000000000000001
+  output: a0=0xffffffffffffffff
+c.addw a0, a1 ::
+  inputs: a0=0xffffffffffffffff, a1=0x0000000000000001
+  output: a0=0x0000000000000000
+c.addw a4, a5 ::
+  inputs: a4=0x0000000000001000, a5=0x0000000000002000
+  output: a4=0x0000000000003000
+c.j .+4 ::
+  target: reached
+c.j .+6 ::
+  target: reached
+c.j .+8 ::
+  target: reached
+c.j .+16 ::
+  target: reached
+c.j .+32 ::
+  target: reached
+c.j .+64 ::
+  target: reached
+c.j .+128 ::
+  target: reached
+c.j .+256 ::
+  target: reached
+c.j .+512 ::
+  target: reached
+c.j .+1024 ::
+  target: reached
+c.j .+2044 ::
+  target: reached
+c.j .-4 ::
+  target: reached
+c.j .-6 ::
+  target: reached
+c.j .-2048 ::
+  target: reached
+c.beqz a0, .+4 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .+6 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .+8 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .+16 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .+32 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .+64 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .+128 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .+252 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .-4 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .-6 ::
+  inputs: a0=0
+  target: reached
+c.beqz a0, .-256 ::
+  inputs: a0=0
+  target: reached
+c.beqz a5, .+4 ::
+  inputs: a5=0
+  target: reached
+c.beqz a0, 1f ::
+  inputs: a0=0
+  branch: taken
+c.beqz a0, 1f ::
+  inputs: a0=1
+  branch: not taken
+c.bnez a0, .+4 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .+6 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .+8 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .+16 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .+32 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .+64 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .+128 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .+252 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .-4 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .-6 ::
+  inputs: a0=1
+  target: reached
+c.bnez a0, .-256 ::
+  inputs: a0=1
+  target: reached
+c.bnez a5, .+4 ::
+  inputs: a5=1
+  target: reached
+c.bnez a0, 1f ::
+  inputs: a0=0
+  branch: not taken
+c.bnez a0, 1f ::
+  inputs: a0=1
+  branch: taken
+
+RV64C compressed instruction set, quadrant 2
+c.slli a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x579bde02468acf12
+c.slli a0, 2 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xaf37bc048d159e24
+c.slli a0, 4 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xbcdef01234567890
+c.slli a0, 8 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xcdef012345678900
+c.slli a0, 16 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0xef01234567890000
+c.slli a0, 32 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x2345678900000000
+c.slli a0, 63 ::
+  inputs: a0=0xabcdef0123456789
+  output: a0=0x8000000000000000
+c.slli a5, 1 ::
+  inputs: a5=0xabcdef0123456789
+  output: a5=0x579bde02468acf12
+c.fldsp fa0, 0(sp) ::
+  inputs: sp=&area_mid
+  output: fa0=0x09938bb5e378ccc9
+  no memory changes
+c.fldsp fa0, 8(sp) ::
+  inputs: sp=&area_mid
+  output: fa0=0xd46119bc4f7b459c
+  no memory changes
+c.fldsp fa0, 16(sp) ::
+  inputs: sp=&area_mid
+  output: fa0=0xcd40b4ceaa101165
+  no memory changes
+c.fldsp fa0, 32(sp) ::
+  inputs: sp=&area_mid
+  output: fa0=0x0ec2557909435825
+  no memory changes
+c.fldsp fa0, 64(sp) ::
+  inputs: sp=&area_mid
+  output: fa0=0xb7e70cf3395b0d30
+  no memory changes
+c.fldsp fa0, 128(sp) ::
+  inputs: sp=&area_mid
+  output: fa0=0x6c7c4355121e0d98
+  no memory changes
+c.fldsp fa0, 256(sp) ::
+  inputs: sp=&area_mid
+  output: fa0=0xee5b125eb6a74a8d
+  no memory changes
+c.fldsp fa0, 504(sp) ::
+  inputs: sp=&area_mid
+  output: fa0=0xbd4f1ec2a7cdc5d1
+  no memory changes
+c.fldsp fa5, 0(sp) ::
+  inputs: sp=&area_mid
+  output: fa5=0x6a642668a9bd2cd0
+  no memory changes
+c.lwsp a0, 0(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0xffffffffa2c51890
+  no memory changes
+c.lwsp a0, 4(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x0000000003588d55
+  no memory changes
+c.lwsp a0, 8(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0xffffffffbd9fc2c3
+  no memory changes
+c.lwsp a0, 16(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0xffffffff8dc294e9
+  no memory changes
+c.lwsp a0, 32(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x00000000185128a6
+  no memory changes
+c.lwsp a0, 64(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0xffffffffa02175a9
+  no memory changes
+c.lwsp a0, 128(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x000000002a54a500
+  no memory changes
+c.lwsp a0, 252(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x0000000042735f59
+  no memory changes
+c.lwsp a5, 0(sp) ::
+  inputs: sp=&area_mid
+  output: a5=0x00000000670a7797
+  no memory changes
+c.ldsp a0, 0(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x6429290760136358
+  no memory changes
+c.ldsp a0, 8(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0xbefd16247abc5ae2
+  no memory changes
+c.ldsp a0, 16(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x05a2ce0c43b76420
+  no memory changes
+c.ldsp a0, 32(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x22efeba3bef6670c
+  no memory changes
+c.ldsp a0, 64(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x82ad9af526269470
+  no memory changes
+c.ldsp a0, 128(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0xa872c2086f198487
+  no memory changes
+c.ldsp a0, 256(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0x0ab07070f302a1dc
+  no memory changes
+c.ldsp a0, 504(sp) ::
+  inputs: sp=&area_mid
+  output: a0=0xcb1f9e3eb6021eea
+  no memory changes
+c.ldsp a5, 0(sp) ::
+  inputs: sp=&area_mid
+  output: a5=0xc6fac5ba2658c35f
+  no memory changes
+c.jr t0 ::
+  inputs: t0=1f+4
+  target: reached
+c.jr t0 ::
+  inputs: t0=1f+6
+  target: reached
+c.jr t0 ::
+  inputs: t0=1f+8
+  target: reached
+c.jr t0 ::
+  inputs: t0=1f-4
+  target: reached
+c.jr t0 ::
+  inputs: t0=1f-6
+  target: reached
+c.jr t0 ::
+  inputs: t0=1f-8
+  target: reached
+c.jr t6 ::
+  inputs: t6=1f+4
+  target: reached
+c.mv t0, t6 ::
+  inputs: t6=0xabcdef0123456789
+  output: t0=0xabcdef0123456789
+c.mv t6, t0 ::
+  inputs: t0=0xabcdef0123456789
+  output: t6=0xabcdef0123456789
+c.mv s0, s11 ::
+  inputs: s11=0xabcdef0123456789
+  output: s0=0xabcdef0123456789
+c.mv s11, s0 ::
+  inputs: s0=0xabcdef0123456789
+  output: s11=0xabcdef0123456789
+c.mv a0, a7 ::
+  inputs: a7=0xabcdef0123456789
+  output: a0=0xabcdef0123456789
+c.mv a7, a0 ::
+  inputs: a0=0xabcdef0123456789
+  output: a7=0xabcdef0123456789
+c.jalr t0 ::
+  inputs: t0=1f+4
+  output: ra=1f+2
+  target: reached
+c.jalr t0 ::
+  inputs: t0=1f+6
+  output: ra=1f+2
+  target: reached
+c.jalr t0 ::
+  inputs: t0=1f+8
+  output: ra=1f+2
+  target: reached
+c.jalr t0 ::
+  inputs: t0=1f-4
+  output: ra=1f+2
+  target: reached
+c.jalr t0 ::
+  inputs: t0=1f-6
+  output: ra=1f+2
+  target: reached
+c.jalr t0 ::
+  inputs: t0=1f-8
+  output: ra=1f+2
+  target: reached
+c.jalr t6 ::
+  inputs: t6=1f+4
+  output: ra=1f+2
+  target: reached
+c.add a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: a0=0x0000000000003000
+c.add a0, a1 ::
+  inputs: a0=0x000000007fffffff, a1=0x0000000000000001
+  output: a0=0x0000000080000000
+c.add a0, a1 ::
+  inputs: a0=0x00000000fffffffe, a1=0x0000000000000001
+  output: a0=0x00000000ffffffff
+c.add a0, a1 ::
+  inputs: a0=0x00000000ffffffff, a1=0x0000000000000001
+  output: a0=0x0000000100000000
+c.add a0, a1 ::
+  inputs: a0=0xfffffffffffffffe, a1=0x0000000000000001
+  output: a0=0xffffffffffffffff
+c.add a0, a1 ::
+  inputs: a0=0xffffffffffffffff, a1=0x0000000000000001
+  output: a0=0x0000000000000000
+c.add a4, a5 ::
+  inputs: a4=0x0000000000001000, a5=0x0000000000002000
+  output: a4=0x0000000000003000
+c.fsdsp fa0, 0(sp) ::
+  inputs: fa0=0xabcdef0123456789, sp=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsdsp fa0, 8(sp) ::
+  inputs: fa0=0xabcdef0123456789, sp=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+c.fsdsp fa0, 16(sp) ::
+  inputs: fa0=0xabcdef0123456789, sp=&area_mid
+  [+016]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsdsp fa0, 32(sp) ::
+  inputs: fa0=0xabcdef0123456789, sp=&area_mid
+  [+032]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsdsp fa0, 64(sp) ::
+  inputs: fa0=0xabcdef0123456789, sp=&area_mid
+  [+064]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsdsp fa0, 128(sp) ::
+  inputs: fa0=0xabcdef0123456789, sp=&area_mid
+  [+128]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsdsp fa0, 256(sp) ::
+  inputs: fa0=0xabcdef0123456789, sp=&area_mid
+  [+256]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.fsdsp fa0, 504(sp) ::
+  inputs: fa0=0xabcdef0123456789, sp=&area_mid
+  [+496]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+c.fsdsp fa5, 0(sp) ::
+  inputs: fa5=0xabcdef0123456789, sp=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.swsp a0, 0(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.swsp a0, 4(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+000]  .. .. .. .. 89 67 45 23 .. .. .. .. .. .. .. ..
+c.swsp a0, 8(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 .. .. .. ..
+c.swsp a0, 16(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+016]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.swsp a0, 32(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+032]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.swsp a0, 64(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+064]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.swsp a0, 128(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+128]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.swsp a0, 252(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+240]  .. .. .. .. .. .. .. .. .. .. .. .. 89 67 45 23
+c.swsp a5, 0(sp) ::
+  inputs: a5=0xabcdef0123456789, sp=&area_mid
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+c.sdsp a0, 0(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sdsp a0, 8(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+c.sdsp a0, 16(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+016]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sdsp a0, 32(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+032]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sdsp a0, 64(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+064]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sdsp a0, 128(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+128]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sdsp a0, 256(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+256]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+c.sdsp a0, 504(sp) ::
+  inputs: a0=0xabcdef0123456789, sp=&area_mid
+  [+496]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+c.sdsp a5, 0(sp) ::
+  inputs: a5=0xabcdef0123456789, sp=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
diff --git a/none/tests/riscv64/compressed.vgtest b/none/tests/riscv64/compressed.vgtest
new file mode 100644
index 000000000..5c3d44864
--- /dev/null
+++ b/none/tests/riscv64/compressed.vgtest
@@ -0,0 +1,2 @@
+prog: compressed
+vgopts: -q
diff --git a/none/tests/riscv64/csr.c b/none/tests/riscv64/csr.c
new file mode 100644
index 000000000..41360be6d
--- /dev/null
+++ b/none/tests/riscv64/csr.c
@@ -0,0 +1,127 @@
+/* Tests for the RV64Zicsr standard control-and-status register instruction-set
+   extension. */
+
+#include "testinst.h"
+
+static void test_csr64_shared(void)
+{
+   printf("RV64Zicsr control-and-status register instruction set, shared "
+          "operations\n");
+
+   /* ----------------- csrrw rd, csr, rs1 ------------------ */
+   /* fflags */
+   TESTINST_1_1_CSR(4, "csrrw a0, fflags, a1", 0x00, 0x01, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fflags, a1", 0x00, 0x1f, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fflags, a1", 0xff, 0x1e, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fflags, a1", 0xff, 0x00, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fflags, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrw t5, fflags, t6", 0x00, 0x01, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrw zero, fflags, a1", 0xff, 0x01, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fflags, zero", 0xff, 0x00, a0, fcsr, zero);
+
+   /* frm */
+   TESTINST_1_1_CSR(4, "csrrw a0, frm, a1", 0x00, 0x1, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, frm, a1", 0x00, 0x7, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, frm, a1", 0xff, 0x6, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, frm, a1", 0xff, 0x0, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, frm, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrw t5, frm, t6", 0x00, 0x1, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrw zero, frm, a1", 0xff, 0x1, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, frm, zero", 0xff, 0x0, a0, fcsr, zero);
+
+   /* fcsr */
+   TESTINST_1_1_CSR(4, "csrrw a0, fcsr, a1", 0x00, 0x01, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fcsr, a1", 0x00, 0xff, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fcsr, a1", 0xff, 0xfe, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fcsr, a1", 0xff, 0x00, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fcsr, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrw t5, fcsr, t6", 0x00, 0x01, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrw zero, fcsr, a1", 0xff, 0x01, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrw a0, fcsr, zero", 0xff, 0x00, a0, fcsr, zero);
+
+   /* ----------------- csrrs rd, csr, rs1 ------------------ */
+   /* fflags */
+   TESTINST_1_1_CSR(4, "csrrs a0, fflags, a1", 0x00, 0x01, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fflags, a1", 0x00, 0x1f, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fflags, a1", 0xff, 0x1e, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fflags, a1", 0xff, 0x00, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fflags, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrs t5, fflags, t6", 0x00, 0x01, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrs zero, fflags, a1", 0xff, 0x01, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fflags, zero", 0xff, 0x00, a0, fcsr, zero);
+
+   /* frm */
+   TESTINST_1_1_CSR(4, "csrrs a0, frm, a1", 0x00, 0x1, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, frm, a1", 0x00, 0x7, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, frm, a1", 0xff, 0x6, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, frm, a1", 0xff, 0x0, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, frm, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrs t5, frm, t6", 0x00, 0x1, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrs zero, frm, a1", 0xff, 0x1, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, frm, zero", 0xff, 0x0, a0, fcsr, zero);
+
+   /* fcsr */
+   TESTINST_1_1_CSR(4, "csrrs a0, fcsr, a1", 0x00, 0x01, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fcsr, a1", 0x00, 0xff, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fcsr, a1", 0xff, 0xfe, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fcsr, a1", 0xff, 0x00, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fcsr, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrs t5, fcsr, t6", 0x00, 0x01, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrs zero, fcsr, a1", 0xff, 0x01, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrs a0, fcsr, zero", 0xff, 0x00, a0, fcsr, zero);
+
+   /* ----------------- csrrc rd, csr, rs1 ------------------ */
+   /* fflags */
+   TESTINST_1_1_CSR(4, "csrrc a0, fflags, a1", 0x00, 0x01, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fflags, a1", 0x00, 0x1f, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fflags, a1", 0xff, 0x1e, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fflags, a1", 0xff, 0x00, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fflags, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrc t5, fflags, t6", 0x00, 0x01, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrc zero, fflags, a1", 0xff, 0x01, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fflags, zero", 0xff, 0x00, a0, fcsr, zero);
+
+   /* frm */
+   TESTINST_1_1_CSR(4, "csrrc a0, frm, a1", 0x00, 0x1, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, frm, a1", 0x00, 0x7, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, frm, a1", 0xff, 0x6, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, frm, a1", 0xff, 0x0, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, frm, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrc t5, frm, t6", 0x00, 0x1, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrc zero, frm, a1", 0xff, 0x1, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, frm, zero", 0xff, 0x0, a0, fcsr, zero);
+
+   /* fcsr */
+   TESTINST_1_1_CSR(4, "csrrc a0, fcsr, a1", 0x00, 0x01, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fcsr, a1", 0x00, 0xff, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fcsr, a1", 0xff, 0xfe, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fcsr, a1", 0xff, 0x00, a0, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fcsr, a1", 0x00, 0xff, a0, fcsr, a1);
+
+   TESTINST_1_1_CSR(4, "csrrc t5, fcsr, t6", 0x00, 0x01, t5, fcsr, t6);
+   TESTINST_1_1_CSR(4, "csrrc zero, fcsr, a1", 0xff, 0x01, zero, fcsr, a1);
+   TESTINST_1_1_CSR(4, "csrrc a0, fcsr, zero", 0xff, 0x00, a0, fcsr, zero);
+
+   /* -------------- csrrwi rd, csr, uimm[4:0] -------------- */
+   /* Not currently handled. */
+
+   /* -------------- csrrsi rd, csr, uimm[4:0] -------------- */
+   /* Not currently handled. */
+
+   /* -------------- csrrci rd, csr, uimm[4:0] -------------- */
+   /* Not currently handled. */
+}
+
+int main(void)
+{
+   test_csr64_shared();
+   return 0;
+}
diff --git a/none/tests/riscv64/csr.stderr.exp b/none/tests/riscv64/csr.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/none/tests/riscv64/csr.stdout.exp b/none/tests/riscv64/csr.stdout.exp
new file mode 100644
index 000000000..e773e205c
--- /dev/null
+++ b/none/tests/riscv64/csr.stdout.exp
@@ -0,0 +1,217 @@
+RV64Zicsr control-and-status register instruction set, shared operations
+csrrw a0, fflags, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000001
+csrrw a0, fflags, a1 ::
+  inputs: a1=0x000000000000001f, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x000000000000001f
+csrrw a0, fflags, a1 ::
+  inputs: a1=0x000000000000001e, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000fe
+csrrw a0, fflags, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000e0
+csrrw a0, fflags, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x000000000000001f
+csrrw t5, fflags, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000001
+csrrw zero, fflags, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x00000000000000e1
+csrrw a0, fflags, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000e0
+csrrw a0, frm, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000020
+csrrw a0, frm, a1 ::
+  inputs: a1=0x0000000000000007, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x00000000000000e0
+csrrw a0, frm, a1 ::
+  inputs: a1=0x0000000000000006, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x00000000000000df
+csrrw a0, frm, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x000000000000001f
+csrrw a0, frm, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x00000000000000e0
+csrrw t5, frm, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000020
+csrrw zero, frm, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x000000000000003f
+csrrw a0, frm, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x000000000000001f
+csrrw a0, fcsr, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000001
+csrrw a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x00000000000000ff
+csrrw a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000fe, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x00000000000000fe
+csrrw a0, fcsr, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x0000000000000000
+csrrw a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x00000000000000ff
+csrrw t5, fcsr, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000001
+csrrw zero, fcsr, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x0000000000000001
+csrrw a0, fcsr, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x0000000000000000
+csrrs a0, fflags, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000001
+csrrs a0, fflags, a1 ::
+  inputs: a1=0x000000000000001f, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x000000000000001f
+csrrs a0, fflags, a1 ::
+  inputs: a1=0x000000000000001e, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000ff
+csrrs a0, fflags, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000ff
+csrrs a0, fflags, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x000000000000001f
+csrrs t5, fflags, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000001
+csrrs zero, fflags, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x00000000000000ff
+csrrs a0, fflags, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000ff
+csrrs a0, frm, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000020
+csrrs a0, frm, a1 ::
+  inputs: a1=0x0000000000000007, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x00000000000000e0
+csrrs a0, frm, a1 ::
+  inputs: a1=0x0000000000000006, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x00000000000000ff
+csrrs a0, frm, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x00000000000000ff
+csrrs a0, frm, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x00000000000000e0
+csrrs t5, frm, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000020
+csrrs zero, frm, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x00000000000000ff
+csrrs a0, frm, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x00000000000000ff
+csrrs a0, fcsr, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000001
+csrrs a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x00000000000000ff
+csrrs a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000fe, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x00000000000000ff
+csrrs a0, fcsr, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x00000000000000ff
+csrrs a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x00000000000000ff
+csrrs t5, fcsr, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000001
+csrrs zero, fcsr, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x00000000000000ff
+csrrs a0, fcsr, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x00000000000000ff
+csrrc a0, fflags, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc a0, fflags, a1 ::
+  inputs: a1=0x000000000000001f, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc a0, fflags, a1 ::
+  inputs: a1=0x000000000000001e, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000e1
+csrrc a0, fflags, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000ff
+csrrc a0, fflags, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc t5, fflags, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000000
+csrrc zero, fflags, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x00000000000000fe
+csrrc a0, fflags, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x000000000000001f, fcsr=0x00000000000000ff
+csrrc a0, frm, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc a0, frm, a1 ::
+  inputs: a1=0x0000000000000007, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc a0, frm, a1 ::
+  inputs: a1=0x0000000000000006, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x000000000000003f
+csrrc a0, frm, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x00000000000000ff
+csrrc a0, frm, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc t5, frm, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000000
+csrrc zero, frm, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x00000000000000df
+csrrc a0, frm, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x0000000000000007, fcsr=0x00000000000000ff
+csrrc a0, fcsr, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000fe, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x0000000000000001
+csrrc a0, fcsr, a1 ::
+  inputs: a1=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x00000000000000ff
+csrrc a0, fcsr, a1 ::
+  inputs: a1=0x00000000000000ff, fcsr=0x0000000000000000
+  output: a0=0x0000000000000000, fcsr=0x0000000000000000
+csrrc t5, fcsr, t6 ::
+  inputs: t6=0x0000000000000001, fcsr=0x0000000000000000
+  output: t5=0x0000000000000000, fcsr=0x0000000000000000
+csrrc zero, fcsr, a1 ::
+  inputs: a1=0x0000000000000001, fcsr=0x00000000000000ff
+  output: zero=0x0000000000000000, fcsr=0x00000000000000fe
+csrrc a0, fcsr, zero ::
+  inputs: zero=0x0000000000000000, fcsr=0x00000000000000ff
+  output: a0=0x00000000000000ff, fcsr=0x00000000000000ff
diff --git a/none/tests/riscv64/csr.vgtest b/none/tests/riscv64/csr.vgtest
new file mode 100644
index 000000000..99742a4db
--- /dev/null
+++ b/none/tests/riscv64/csr.vgtest
@@ -0,0 +1,2 @@
+prog: csr
+vgopts: -q
diff --git a/none/tests/riscv64/filter_stderr b/none/tests/riscv64/filter_stderr
new file mode 100755
index 000000000..0ae9313a9
--- /dev/null
+++ b/none/tests/riscv64/filter_stderr
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+../filter_stderr
diff --git a/none/tests/riscv64/float32.c b/none/tests/riscv64/float32.c
new file mode 100644
index 000000000..b63305a64
--- /dev/null
+++ b/none/tests/riscv64/float32.c
@@ -0,0 +1,1588 @@
+/* Tests for the RV64F standard single-precision floating-point instruction-set
+   extension. */
+
+#include "testinst.h"
+
+static void test_float32_shared(void)
+{
+   printf("RV64F single-precision FP instruction set, shared operations\n");
+
+   /* --------------- flw rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_FLOAD(4, "flw fa0, 0(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 4(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 8(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 16(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 32(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 64(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 128(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 256(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 512(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 1024(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, 2040(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, -4(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "flw fa0, -2048(a1)", fa0, a1);
+
+   TESTINST_1_1_FLOAD(4, "flw fa4, 0(a5)", fa4, a5);
+
+   /* --------------- fsw rs2, imm[11:0](rs1) --------------- */
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 0(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 4(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 8(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 16(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 32(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 64(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 128(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 256(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 512(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 1024(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, 2040(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, -4(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsw fa0, -2048(a1)", 0xabcdef0123456789, fa0, a1);
+
+   TESTINST_0_2_FSTORE(4, "fsw fa4, 0(a5)", 0xabcdef0123456789, fa4, a5);
+
+   /* ------------ fmadd.s rd, rs1, rs2, rs3, rm ------------ */
+   /* 3.0 * 2.0 + 1.0 -> 7.0 */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff40400000,
+                  0xffffffff40000000, 0xffffffff3f800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + -1.0 -> 0.0 */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffbf800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * FLT_TRUE_MIN + -FLT_TRUE_MIN -> FLT_TRUE_MIN (no UF because exact)
+    */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff00000001, 0xffffffff80000001, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * FLT_MAX + -FLT_MAX -> FLT_MAX */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f7fffff, 0xffffffffff7fffff, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * FLT_MAX + 0.0 -> INFINITY (OF, NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f7fffff, 0xffffffff00000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * INFINITY + -INFINITY -> qNAN (NV) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f800000, 0xffffffffff800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rne", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) + FLT_EPSILON/2 (RNE) -> 2nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rne", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rtz", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -FLT_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rtz", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffffb3800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rdn", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -FLT_EPSILON/2 (RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rdn", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffffb3800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rup", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -FLT_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rup", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffffb3800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rmm", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -FLT_EPSILON/2 (RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3, rmm", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffffb3800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) + FLT_EPSILON/2 (DYN-RNE) -> 2nextafterf(1.0) (NX)
+    */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -FLT_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffffb3800000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -FLT_EPSILON/2 (DYN-RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffffb3800000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (DYN-RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -FLT_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffffb3800000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + FLT_EPSILON/2 (DYN-RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x80, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -FLT_EPSILON/2 (DYN-RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffffb3800000, 0x80, fa0, fa1, fa2,
+                  fa3);
+
+   /* ------------ fmsub.s rd, rs1, rs2, rs3, rm ------------ */
+   /* 3.0 * 2.0 - 1.0 -> 5.0 */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff40400000,
+                  0xffffffff40000000, 0xffffffff3f800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 - 1.0 -> 0.0 */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff3f800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * FLT_TRUE_MIN - FLT_TRUE_MIN -> FLT_TRUE_MIN (no UF because exact)
+    */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff00000001, 0xffffffff00000001, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * FLT_MAX - FLT_MAX -> FLT_MAX */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f7fffff, 0xffffffff7f7fffff, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * FLT_MAX - 0.0 -> INFINITY (OF, NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f7fffff, 0xffffffff00000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * INFINITY - INFINITY -> qNAN (NV) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f800000, 0xffffffff7f800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rne", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 2nextafterf(1.0) - FLT_EPSILON/2 (RNE) -> 2nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rne", 0xffffffff3f800000,
+                  0xffffffff3f800002, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rtz", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - FLT_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rtz", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rdn", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - FLT_EPSILON/2 (RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rdn", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rup", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - FLT_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rup", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rmm", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - FLT_EPSILON/2 (RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3, rmm", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 2nextafterf(1.0) - FLT_EPSILON/2 (DYN-RNE) -> 2nextafterf(1.0) (NX)
+    */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800002, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - FLT_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffff33800000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - FLT_EPSILON/2 (DYN-RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffff33800000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (DYN-RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - FLT_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffff33800000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafterf(1.0) - FLT_EPSILON/2 (DYN-RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x80, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - FLT_EPSILON/2 (DYN-RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0xffffffff33800000, 0x80, fa0, fa1, fa2,
+                  fa3);
+
+   /* ----------- fnmsub.s rd, rs1, rs2, rs3, rm ------------ */
+   /* -(3.0 * 2.0) + 1.0 -> -5.0 */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff40400000,
+                  0xffffffff40000000, 0xffffffff3f800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + 1.0 -> 0.0 */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff3f800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * FLT_TRUE_MIN) + FLT_TRUE_MIN -> -FLT_TRUE_MIN (no UF because
+      exact) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff00000001, 0xffffffff00000001, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * FLT_MAX) + FLT_MAX -> -FLT_MAX */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f7fffff, 0xffffffff7f7fffff, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * FLT_MAX) + 0.0 -> -INFINITY (OF, NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f7fffff, 0xffffffff00000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * INFINITY) + INFINITY -> qNAN (NV) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f800000, 0xffffffff7f800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rne", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) + FLT_EPSILON/2 (RNE) -> 2nextafterf(1.0) (NX)
+    */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rne", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rtz", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -FLT_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rtz", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffb3800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rdn", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -FLT_EPSILON/2 (RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rdn", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffb3800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rup", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -FLT_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rup", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffb3800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rmm", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -FLT_EPSILON/2 (RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3, rmm", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffb3800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) + FLT_EPSILON/2 (DYN-RNE) ->
+      2nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -FLT_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffb3800000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -FLT_EPSILON/2 (DYN-RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffb3800000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (DYN-RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -FLT_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffb3800000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + FLT_EPSILON/2 (DYN-RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x80, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -FLT_EPSILON/2 (DYN-RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffb3800000, 0x80, fa0, fa1, fa2,
+                  fa3);
+
+   /* ----------- fnmadd.s rd, rs1, rs2, rs3, rm ------------ */
+   /* -(3.0 * 2.0) - 1.0 -> -7.0 */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff40400000,
+                  0xffffffff40000000, 0xffffffff3f800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - -1.0 -> 0.0 */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffffbf800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * FLT_TRUE_MIN) - -FLT_TRUE_MIN -> -FLT_TRUE_MIN (no UF because
+      exact) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff00000001, 0xffffffff80000001, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * FLT_MAX) - -FLT_MAX -> -FLT_MAX */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f7fffff, 0xffffffffff7fffff, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * FLT_MAX) - 0.0 -> -INFINITY (OF, NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f7fffff, 0xffffffff00000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * INFINITY) - -INFINITY -> qNAN (NV) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff40000000,
+                  0xffffffff7f800000, 0xffffffffff800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rne", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 2nextafterf(1.0)) - FLT_EPSILON/2 (RNE) -> 2nextafterf(1.0) (NX)
+    */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rne", 0xffffffffbf800000,
+                  0xffffffff3f800002, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rtz", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - FLT_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rtz", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rdn", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - FLT_EPSILON/2 (RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rdn", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rup", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - FLT_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rup", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rmm", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - FLT_EPSILON/2 (RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3, rmm", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 2nextafterf(1.0)) - FLT_EPSILON/2 (DYN-RNE) -> 2nextafterf(1.0)
+      (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800002, 0xffffffff33800000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - FLT_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - FLT_EPSILON/2 (DYN-RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (DYN-RUP) ->
+      nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - FLT_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafterf(1.0)) - FLT_EPSILON/2 (DYN-RMM) ->
+      nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffffbf800000,
+                  0xffffffff3f800001, 0xffffffff33800000, 0x80, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - FLT_EPSILON/2 (DYN-RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.s fa0, fa1, fa2, fa3", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0xffffffff33800000, 0x80, fa0, fa1, fa2,
+                  fa3);
+
+   /* --------------- fadd.s rd, rs1, rs2, rm --------------- */
+   /* 2.0 + 1.0 -> 3.0 */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff40000000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + -1.0 -> 0.0 */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffffbf800000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN + FLT_TRUE_MIN -> 2*FLT_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff00000001, 0x00, fa0, fa1, fa2);
+   /* FLT_MAX + FLT_MAX -> INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff7f7fffff,
+                  0xffffffff7f7fffff, 0x00, fa0, fa1, fa2);
+   /* -FLT_MAX + -FLT_MAX -> -INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffffff7fffff,
+                  0xffffffffff7fffff, 0x00, fa0, fa1, fa2);
+   /* nextafterf(FLT_MIN) + -FLT_MIN -> FLT_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff00800001,
+                  0xffffffff80800000, 0x00, fa0, fa1, fa2);
+   /* INFINITY + -INFINITY -> qNAN (NV) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff7f800000,
+                  0xffffffffff800000, 0x00, fa0, fa1, fa2);
+
+   /* 1.0 + FLT_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rne", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* nextafterf(1.0) + FLT_EPSILON/2 (RNE) -> 2nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rne", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + FLT_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rtz", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* -1.0 + -FLT_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rtz", 0xffffffffbf800000,
+                  0xffffffffb3800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + FLT_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rdn", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* -1.0 + -FLT_EPSILON/2 (RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rdn", 0xffffffffbf800000,
+                  0xffffffffb3800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + FLT_EPSILON/2 (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rup", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* -1.0 + -FLT_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rup", 0xffffffffbf800000,
+                  0xffffffffb3800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + FLT_EPSILON/2 (RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rmm", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* -1.0 + -FLT_EPSILON/2 (RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2, rmm", 0xffffffffbf800000,
+                  0xffffffffb3800000, 0x00, fa0, fa1, fa2);
+
+   /* 1.0 + FLT_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* nextafterf(1.0) + FLT_EPSILON/2 (DYN-RNE) -> 2nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + FLT_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x20, fa0, fa1, fa2);
+   /* -1.0 + -FLT_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffffb3800000, 0x20, fa0, fa1, fa2);
+   /* 1.0 + FLT_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x40, fa0, fa1, fa2);
+   /* -1.0 + -FLT_EPSILON/2 (DYN-RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffffb3800000, 0x40, fa0, fa1, fa2);
+   /* 1.0 + FLT_EPSILON/2 (DYN-RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x60, fa0, fa1, fa2);
+   /* -1.0 + -FLT_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffffb3800000, 0x60, fa0, fa1, fa2);
+   /* 1.0 + FLT_EPSILON/2 (DYN-RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff33800000, 0x80, fa0, fa1, fa2);
+   /* -1.0 + -FLT_EPSILON/2 (DYN-RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffffb3800000, 0x80, fa0, fa1, fa2);
+
+   /* --------------- fsub.s rd, rs1, rs2, rm --------------- */
+   /* 2.0 - 1.0 -> 1.0 */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff40000000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 - 1.0 -> 0.0 */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN - -FLT_TRUE_MIN -> 2*FLT_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff80000001, 0x00, fa0, fa1, fa2);
+   /* FLT_MAX - -FLT_MAX -> INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff7f7fffff,
+                  0xffffffffff7fffff, 0x00, fa0, fa1, fa2);
+   /* -FLT_MAX - FLT_MAX -> -INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffffff7fffff,
+                  0xffffffff7f7fffff, 0x00, fa0, fa1, fa2);
+   /* nextafterf(FLT_MIN) - FLT_MIN -> FLT_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff00800001,
+                  0xffffffff00800000, 0x00, fa0, fa1, fa2);
+   /* INFINITY - INFINITY -> qNAN (NV) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff7f800000,
+                  0xffffffff7f800000, 0x00, fa0, fa1, fa2);
+
+   /* nextafterf(1.0) - FLT_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rne", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* 2nextafterf(1.0) - FLT_EPSILON/2 (RNE) -> 2nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rne", 0xffffffff3f800002,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* nextafterf(1.0) - FLT_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rtz", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* -1.0 - FLT_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rtz", 0xffffffffbf800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* nextafterf(1.0) - FLT_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rdn", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* -1.0 - FLT_EPSILON/2 (RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rdn", 0xffffffffbf800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* nextafterf(1.0) - FLT_EPSILON/2 (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rup", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* -1.0 - FLT_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rup", 0xffffffffbf800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* nextafterf(1.0) - FLT_EPSILON/2 (RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rmm", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* -1.0 - FLT_EPSILON/2 (RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2, rmm", 0xffffffffbf800000,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+
+   /* nextafterf(1.0) - FLT_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* 2nextafterf(1.0) - FLT_EPSILON/2 (DYN-RNE) -> 2nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff3f800002,
+                  0xffffffff33800000, 0x00, fa0, fa1, fa2);
+   /* nextafterf(1.0) - FLT_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x20, fa0, fa1, fa2);
+   /* -1.0 - FLT_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff33800000, 0x20, fa0, fa1, fa2);
+   /* nextafterf(1.0) - FLT_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x40, fa0, fa1, fa2);
+   /* -1.0 - FLT_EPSILON/2 (DYN-RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff33800000, 0x40, fa0, fa1, fa2);
+   /* nextafterf(1.0) - FLT_EPSILON/2 (DYN-RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x60, fa0, fa1, fa2);
+   /* -1.0 - FLT_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff33800000, 0x60, fa0, fa1, fa2);
+   /* nextafterf(1.0) - FLT_EPSILON/2 (DYN-RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffff3f800001,
+                  0xffffffff33800000, 0x80, fa0, fa1, fa2);
+   /* -1.0 - FLT_EPSILON/2 (DYN-RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff33800000, 0x80, fa0, fa1, fa2);
+
+   /* --------------- fmul.s rd, rs1, rs2, rm --------------- */
+   /* 2.0 * 1.0 -> 2.0 */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff40000000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 * 0.0 -> 0.0 */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff00000000, 0x00, fa0, fa1, fa2);
+   /* 2**-74 * 2**-75 -> 2**-149 aka FLT_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff1a800000,
+                  0xffffffff1a000000, 0x00, fa0, fa1, fa2);
+   /* FLT_MAX * FLT_MAX -> INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff7f7fffff,
+                  0xffffffff7f7fffff, 0x00, fa0, fa1, fa2);
+   /* FLT_MAX * -FLT_MAX -> -INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff7f7fffff,
+                  0xffffffffff7fffff, 0x00, fa0, fa1, fa2);
+   /* 1.0 * INFINITY -> INFINITY */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff7f800000, 0x00, fa0, fa1, fa2);
+   /* 0.0 * INFINITY -> qNAN (NV) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff7f800000, 0x00, fa0, fa1, fa2);
+
+   /* FLT_TRUE_MIN * 0.5 (RNE) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rne", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* 3*FLT_TRUE_MIN * 0.5 (RNE) -> 2*FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rne", 0xffffffff00000003,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN * 0.5 (RTZ) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rtz", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN * 0.5 (RTZ) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rtz", 0xffffffff80000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN * 0.5 (RDN) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rdn", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN * 0.5 (RDN) -> -FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rdn", 0xffffffff80000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN * 0.5 (RUP) -> FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rup", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN * 0.5 (RUP) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rup", 0xffffffff80000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN * 0.5 (RMM) -> FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rmm", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN * 0.5 (RMM) -> -FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2, rmm", 0xffffffff80000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+
+   /* FLT_TRUE_MIN * 0.5 (DYN-RNE) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* 3*FLT_TRUE_MIN * 0.5 (DYN-RNE) -> 2*FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff00000003,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN * 0.5 (DYN-RTZ) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x20, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN * 0.5 (DYN-RTZ) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff80000001,
+                  0xffffffff3f000000, 0x20, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN * 0.5 (DYN-RDN) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x40, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN * 0.5 (DYN-RDN) -> -FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff80000001,
+                  0xffffffff3f000000, 0x40, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN * 0.5 (DYN-RUP) -> FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x60, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN * 0.5 (DYN-RUP) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff80000001,
+                  0xffffffff3f000000, 0x60, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN * 0.5 (DYN-RMM) -> FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff3f000000, 0x80, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN * 0.5 (DYN-RMM) -> -FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.s fa0, fa1, fa2", 0xffffffff80000001,
+                  0xffffffff3f000000, 0x80, fa0, fa1, fa2);
+
+   /* --------------- fdiv.s rd, rs1, rs2, rm --------------- */
+   /* 2.0 / 1.0 -> 2.0 */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff40000000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa2);
+   /* 0.0 / 1.0 -> 0.0 */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 / 2**127 -> 1**-127 (no UF because exact) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff7f000000, 0x00, fa0, fa1, fa2);
+   /* FLT_MAX / 0.5 -> INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff7f7fffff,
+                  0xffffffff3f000000, 0x00, fa0, fa1, fa2);
+   /* FLT_MAX / -0.5 -> -INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff7f7fffff,
+                  0xffffffffbf000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 / INFINITY -> 0.0 */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff7f800000, 0x00, fa0, fa1, fa2);
+   /* 1.0 / 0.0 -> INFINITY (DZ) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff00000000, 0x00, fa0, fa1, fa2);
+   /* 0.0 / 0.0 -> qNAN (NV) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff00000000, 0x00, fa0, fa1, fa2);
+
+   /* FLT_TRUE_MIN / 2.0 (RNE) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rne", 0xffffffff00000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* 3*FLT_TRUE_MIN / 2.0 (RNE) -> 2*FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rne", 0xffffffff00000003,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN / 2.0 (RTZ) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rtz", 0xffffffff00000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN / 2.0 (RTZ) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rtz", 0xffffffff80000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN / 2.0 (RDN) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rdn", 0xffffffff00000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN / 2.0 (RDN) -> -FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rdn", 0xffffffff80000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN / 2.0 (RUP) -> FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rup", 0xffffffff00000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN / 2.0 (RUP) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rup", 0xffffffff80000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN / 2.0 (RMM) -> FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rmm", 0xffffffff00000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN / 2.0 (RMM) -> -FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2, rmm", 0xffffffff80000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+
+   /* FLT_TRUE_MIN / 2.0 (DYN-RNE) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* 3*FLT_TRUE_MIN / 2.0 (DYN-RNE) -> 2*FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff00000003,
+                  0xffffffff40000000, 0x00, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN / 2.0 (DYN-RTZ) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff40000000, 0x20, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN / 2.0 (DYN-RTZ) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff80000001,
+                  0xffffffff40000000, 0x20, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN / 2.0 (DYN-RDN) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff40000000, 0x40, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN / 2.0 (DYN-RDN) -> -FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff80000001,
+                  0xffffffff40000000, 0x40, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN / 2.0 (DYN-RUP) -> FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff40000000, 0x60, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN / 2.0 (DYN-RUP) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff80000001,
+                  0xffffffff40000000, 0x60, fa0, fa1, fa2);
+   /* FLT_TRUE_MIN / 2.0 (DYN-RMM) -> FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff00000001,
+                  0xffffffff40000000, 0x80, fa0, fa1, fa2);
+   /* -FLT_TRUE_MIN / 2.0 (DYN-RMM) -> -FLT_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.s fa0, fa1, fa2", 0xffffffff80000001,
+                  0xffffffff40000000, 0x80, fa0, fa1, fa2);
+
+   /* ----------------- fsqrt.s rd, rs1, rm ----------------- */
+   /* sqrt(0.0) -> 0.0 */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff00000000, 0x00, fa0, fa1);
+   /* sqrt(INFINITY) -> INFINITY */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff7f800000, 0x00, fa0, fa1);
+   /* sqrt(2*FLT_TRUE_MIN) -> 2**-74 */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff00000002, 0x00, fa0, fa1);
+   /* sqrt(qNAN) -> qNAN */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff7fc00000, 0x00, fa0, fa1);
+   /* sqrt(-1.0) -> qNAN (NV) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffffbf800000, 0x00, fa0, fa1);
+
+   /* sqrt(nextafterf(1.0)) (RNE) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rne", 0xffffffff3f800001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafterf(1.0)) (RNE) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rne", 0xffffffff3f800002, 0x00, fa0,
+                  fa1);
+   /* sqrt(nextafterf(1.0)) (RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rtz", 0xffffffff3f800001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafterf(1.0)) (RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rtz", 0xffffffff3f800002, 0x00, fa0,
+                  fa1);
+   /* sqrt(nextafterf(1.0)) (RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rdn", 0xffffffff3f800001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafterf(1.0)) (RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rdn", 0xffffffff3f800002, 0x00, fa0,
+                  fa1);
+   /* sqrt(nextafterf(1.0)) (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rup", 0xffffffff3f800001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafterf(1.0)) (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rup", 0xffffffff3f800002, 0x00, fa0,
+                  fa1);
+   /* sqrt(nextafterf(1.0)) (RMM) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rmm", 0xffffffff3f800001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafterf(1.0)) (RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1, rmm", 0xffffffff3f800002, 0x00, fa0,
+                  fa1);
+
+   /* sqrt(nextafterf(1.0)) (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800001, 0x00, fa0, fa1);
+   /* sqrt(2nextafterf(1.0)) (DYN-RNE) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800002, 0x00, fa0, fa1);
+   /* sqrt(nextafterf(1.0)) (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800001, 0x20, fa0, fa1);
+   /* sqrt(2nextafterf(1.0)) (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800002, 0x20, fa0, fa1);
+   /* sqrt(nextafterf(1.0)) (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800001, 0x40, fa0, fa1);
+   /* sqrt(2nextafterf(1.0)) (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800002, 0x40, fa0, fa1);
+   /* sqrt(nextafterf(1.0)) (DYN-RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800001, 0x60, fa0, fa1);
+   /* sqrt(2nextafterf(1.0)) (DYN-RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800002, 0x60, fa0, fa1);
+   /* sqrt(nextafterf(1.0)) (DYN-RMM) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800001, 0x80, fa0, fa1);
+   /* sqrt(2nextafterf(1.0)) (DYN-RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.s fa0, fa1", 0xffffffff3f800002, 0x80, fa0, fa1);
+
+   /* ---------------- fsgnj.s rd, rs1, rs2 ----------------- */
+   /* fmv.s rd, rs1 */
+   TESTINST_1_2_F(4, "fsgnj.s fa0, fa1, fa1", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa1);
+   TESTINST_1_2_F(4, "fsgnj.s fa0, fa1, fa1", 0xffffffffbf800000,
+                  0xffffffffbf800000, 0x00, fa0, fa1, fa1);
+
+   /* fsgnj(1.0, +) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnj.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff7fffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnj(1.0, -) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnj.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff80000000, 0x00, fa0, fa1, fa2);
+   /* fsgnj(-1.0, +) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnj.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff7fffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnj(-1.0, -) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnj.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff80000000, 0x00, fa0, fa1, fa2);
+
+   /* ---------------- fsgnjn.s rd, rs1, rs2 ---------------- */
+   /* fneg.s rd, rs1 */
+   TESTINST_1_2_F(4, "fsgnjn.s fa0, fa1, fa1", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa1);
+   TESTINST_1_2_F(4, "fsgnjn.s fa0, fa1, fa1", 0xffffffffbf800000,
+                  0xffffffffbf800000, 0x00, fa0, fa1, fa1);
+
+   /* fsgnjn(1.0, +) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnjn.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff7fffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnjn(1.0, -) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnjn.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff80000000, 0x00, fa0, fa1, fa2);
+   /* fsgnjn(-1.0, +) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnjn.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff7fffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnjn(-1.0, -) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnjn.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff80000000, 0x00, fa0, fa1, fa2);
+
+   /* ---------------- fsgnjx.s rd, rs1, rs2 ---------------- */
+   /* fabs.s rd, rs1 */
+   TESTINST_1_2_F(4, "fsgnjx.s fa0, fa1, fa1", 0xffffffff3f800000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa1);
+   TESTINST_1_2_F(4, "fsgnjx.s fa0, fa1, fa1", 0xffffffffbf800000,
+                  0xffffffffbf800000, 0x00, fa0, fa1, fa1);
+
+   /* fsgnjx(1.0, +) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnjx.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff7fffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnjx(1.0, -) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnjx.s fa0, fa1, fa2", 0xffffffff3f800000,
+                  0xffffffff80000000, 0x00, fa0, fa1, fa2);
+   /* fsgnjx(-1.0, +) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnjx.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff7fffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnjx(-1.0, -) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnjx.s fa0, fa1, fa2", 0xffffffffbf800000,
+                  0xffffffff80000000, 0x00, fa0, fa1, fa2);
+
+   /* ----------------- fmin.s rd, rs1, rs2 ----------------- */
+   /* min(0.0, 1.0) -> 0.0 */
+   TESTINST_1_2_F(4, "fmin.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa2);
+   /* min(0.0, -0.0) -> -0.0 */
+   TESTINST_1_2_F(4, "fmin.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff80000000, 0x00, fa0, fa1, fa2);
+   /* min(-0.0, 0.0) -> -0.0 */
+   TESTINST_1_2_F(4, "fmin.s fa0, fa1, fa2", 0xffffffff80000000,
+                  0xffffffff00000000, 0x00, fa0, fa1, fa2);
+   /* min(INFINITY, INFINITY) -> INFINITY */
+   TESTINST_1_2_F(4, "fmin.s fa0, fa1, fa2", 0xffffffff7f800000,
+                  0xffffffff7f800000, 0x00, fa0, fa1, fa2);
+   /* min(0.0, qNAN) -> 0.0 */
+   TESTINST_1_2_F(4, "fmin.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff7fc00000, 0x00, fa0, fa1, fa2);
+   /* min(0.0, sNAN) -> 0.0 (NV) */
+   TESTINST_1_2_F(4, "fmin.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff7fa00000, 0x00, fa0, fa1, fa2);
+
+   /* ----------------- fmax.s rd, rs1, rs2 ----------------- */
+   /* max(0.0, 1.0) -> 1.0 */
+   TESTINST_1_2_F(4, "fmax.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff3f800000, 0x00, fa0, fa1, fa2);
+   /* max(0.0, -0.0) -> 0.0 */
+   TESTINST_1_2_F(4, "fmax.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff80000000, 0x00, fa0, fa1, fa2);
+   /* max(-0.0, 0.0) -> 0.0 */
+   TESTINST_1_2_F(4, "fmax.s fa0, fa1, fa2", 0xffffffff80000000,
+                  0xffffffff00000000, 0x00, fa0, fa1, fa2);
+   /* max(INFINITY, INFINITY) -> INFINITY */
+   TESTINST_1_2_F(4, "fmax.s fa0, fa1, fa2", 0xffffffff7f800000,
+                  0xffffffff7f800000, 0x00, fa0, fa1, fa2);
+   /* max(0.0, qNAN) -> 0.0 */
+   TESTINST_1_2_F(4, "fmax.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff7fc00000, 0x00, fa0, fa1, fa2);
+   /* max(0.0, sNAN) -> 0.0 (NV) */
+   TESTINST_1_2_F(4, "fmax.s fa0, fa1, fa2", 0xffffffff00000000,
+                  0xffffffff7fa00000, 0x00, fa0, fa1, fa2);
+
+   /* ---------------- fcvt.w.s rd, rs1, rm ----------------- */
+   /* 0.0 -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff00000000, 0x00, a0, fa0);
+   /* FLT_TRUE_MIN -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff00000001, 0x00, a0, fa0);
+   /* INFINITY -> 2**31-1 aka INT_MAX (NV)  */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff7f800000, 0x00, a0, fa0);
+   /* qNAN -> 2**31-1 aka INT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff7fc00000, 0x00, a0, fa0);
+   /* nextafterf(2**31, 0.0) -> 2**31-128 */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff4effffff, 0x00, a0, fa0);
+   /* -2**31 -> -2**31 aka INT_MIN */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffffcf000000, 0x00, a0, fa0);
+   /* 2**31 -> 2**31-1 aka INT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff4f000000, 0x00, a0, fa0);
+   /* -nextafterf(2**31) -> -2**31 aka INT_MIN (NV) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffffcf000001, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.w.s zero, fa0", 0xffffffff3f800000, 0x00, zero,
+                   fa0);
+
+   /* 0.5 (RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rne", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 1.5 (RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rne", 0xffffffff3fc00000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rtz", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rtz", 0xffffffffbf000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rdn", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RDN) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rdn", 0xffffffffbf000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rup", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RUP) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rup", 0xffffffffbf000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rmm", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RMM) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0, rmm", 0xffffffffbf000000, 0x00, a0,
+                   fa0);
+
+   /* 0.5 (DYN-RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff3f000000, 0x00, a0, fa0);
+   /* 1.5 (DYN-RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff3fc00000, 0x00, a0, fa0);
+   /* 0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff3f000000, 0x20, a0, fa0);
+   /* -0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffffbf000000, 0x20, a0, fa0);
+   /* 0.5 (DYN-RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff3f000000, 0x40, a0, fa0);
+   /* -0.5 (DYN-RDN) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffffbf000000, 0x40, a0, fa0);
+   /* 0.5 (DYN-RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff3f000000, 0x60, a0, fa0);
+   /* -0.5 (DYN-RUP) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffffbf000000, 0x60, a0, fa0);
+   /* 0.5 (DYN-RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffff3f000000, 0x80, a0, fa0);
+   /* -0.5 (DYN-RMM) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.s a0, fa0", 0xffffffffbf000000, 0x80, a0, fa0);
+
+   /* ---------------- fcvt.wu.s rd, rs1, rm ---------------- */
+   /* 0.0 -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff00000000, 0x00, a0, fa0);
+   /* FLT_TRUE_MIN -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff00000001, 0x00, a0, fa0);
+   /* INFINITY -> 2**32-1 aka UINT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff7f800000, 0x00, a0, fa0);
+   /* qNAN -> 2**32-1 aka UINT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff7fc00000, 0x00, a0, fa0);
+   /* nextafterf(2**32, 0.0) -> 2**32-256 */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff4f7fffff, 0x00, a0, fa0);
+   /* 2**32 -> 2**32-1 aka UINT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff4f800000, 0x00, a0, fa0);
+   /* -1.0 -> 0 (NV) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffffbf800000, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.wu.s zero, fa0", 0xffffffff3f800000, 0x00, zero,
+                   fa0);
+
+   /* 0.5 (RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0, rne", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 1.5 (RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0, rne", 0xffffffff3fc00000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0, rtz", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0, rdn", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0, rup", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0, rmm", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+
+   /* 0.5 (DYN-RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff3f000000, 0x00, a0, fa0);
+   /* 1.5 (DYN-RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff3fc00000, 0x00, a0, fa0);
+   /* 0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff3f000000, 0x20, a0, fa0);
+   /* 0.5 (DYN-RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff3f000000, 0x40, a0, fa0);
+   /* 0.5 (DYN-RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff3f000000, 0x60, a0, fa0);
+   /* 0.5 (DYN-RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.s a0, fa0", 0xffffffff3f000000, 0x80, a0, fa0);
+
+   /* ------------------- fmv.x.w rd, rs1 ------------------- */
+   TESTINST_1_1_IF(4, "fmv.x.w a0, fa0", 0xabcdef0123456789, 0x00, a0, fa0);
+
+   /* "0xffffffff7fffffff" -> "0x000000007fffffff" */
+   TESTINST_1_1_IF(4, "fmv.x.w a0, fa0", 0xffffffff7fffffff, 0x00, a0, fa0);
+   /* "0x0000000080000000" -> "0xffffffff80000000" */
+   TESTINST_1_1_IF(4, "fmv.x.w a0, fa0", 0x0000000080000000, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fmv.x.w zero, fa0", 0xffffffff3f800000, 0x00, zero, fa0);
+
+   /* ----------------- feq.s rd, rs1, rs2 ------------------ */
+   /* 0.0 == 1.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "feq.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff3f800000, 0x00, a0, fa0, fa1);
+   /* 0.0 == 0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "feq.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff00000000, 0x00, a0, fa0, fa1);
+   /* 0.0 == -0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "feq.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff80000000, 0x00, a0, fa0, fa1);
+   /* -0.0 == 0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "feq.s a0, fa0, fa1", 0xffffffff80000000,
+                     0xffffffff00000000, 0x00, a0, fa0, fa1);
+   /* INFINITY == INFINITY -> 1 */
+   TESTINST_1_2_FCMP(4, "feq.s a0, fa0, fa1", 0xffffffff7f800000,
+                     0xffffffff7f800000, 0x00, a0, fa0, fa1);
+   /* 0.0 == qNAN -> 0 */
+   TESTINST_1_2_FCMP(4, "feq.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff7fc00000, 0x00, a0, fa0, fa1);
+   /* 0.0 == sNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "feq.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff7fa00000, 0x00, a0, fa0, fa1);
+
+   /* sNAN == sNAN (rd=zero) -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "feq.s zero, fa0, fa1", 0xffffffff7fa00000,
+                     0xffffffff7fa00000, 0x00, zero, fa0, fa1);
+
+   /* ----------------- flt.s rd, rs1, rs2 ------------------ */
+   /* 0.0 < 0.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "flt.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff00000000, 0x00, a0, fa0, fa1);
+   /* 0.0 < 1.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "flt.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff3f800000, 0x00, a0, fa0, fa1);
+   /* 0.0 < -0.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "flt.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff80000000, 0x00, a0, fa0, fa1);
+   /* -0.0 < 0.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "flt.s a0, fa0, fa1", 0xffffffff80000000,
+                     0xffffffff00000000, 0x00, a0, fa0, fa1);
+   /* INFINITY < INFINITY -> 0 */
+   TESTINST_1_2_FCMP(4, "flt.s a0, fa0, fa1", 0xffffffff7f800000,
+                     0xffffffff7f800000, 0x00, a0, fa0, fa1);
+   /* 0.0 < qNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "flt.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff7fc00000, 0x00, a0, fa0, fa1);
+   /* 0.0 < sNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "flt.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff7fa00000, 0x00, a0, fa0, fa1);
+
+   /* sNAN < sNAN (rd=zero) -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "flt.s zero, fa0, fa1", 0xffffffff7fa00000,
+                     0xffffffff7fa00000, 0x00, zero, fa0, fa1);
+
+   /* ----------------- fle.s rd, rs1, rs2 ------------------ */
+   /* 1.0 < 0.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "fle.s a0, fa0, fa1", 0xffffffff3f800000,
+                     0xffffffff00000000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= 0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff00000000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= 1.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff3f800000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= -0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff80000000, 0x00, a0, fa0, fa1);
+   /* -0.0 <= 0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.s a0, fa0, fa1", 0xffffffff80000000,
+                     0xffffffff00000000, 0x00, a0, fa0, fa1);
+   /* INFINITY <= INFINITY -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.s a0, fa0, fa1", 0xffffffff7f800000,
+                     0xffffffff7f800000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= qNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "fle.s a0, fa0, fa1", 0xffffffff00000000,
+                     0xffffffff7fc00000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= sNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "fle.s a0, fa0, fa1", 0xffffffff7fa00000,
+                     0x7ff4000000000000, 0x00, a0, fa0, fa1);
+
+   /* sNAN <= sNAN (rd=zero) -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "fle.s zero, fa0, fa1", 0xffffffff7fa00000,
+                     0xffffffff7fa00000, 0x00, zero, fa0, fa1);
+
+   /* ------------------ fclass.s rd, rs1 ------------------- */
+   /* fclass(-INFINITY) -> 0x001 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffffff800000, 0x00, a0, fa0);
+   /* fclass(-1.0) -> 0x002 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffffbf800000, 0x00, a0, fa0);
+   /* fclass(-FLT_TRUE_MIN) -> 0x004 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffff80000001, 0x00, a0, fa0);
+   /* fclass(-0.0) -> 0x008 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffff80000000, 0x00, a0, fa0);
+   /* fclass(0.0) -> 0x010 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffff00000000, 0x00, a0, fa0);
+   /* fclass(FLT_TRUE_MIN) -> 0x020 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffff00000001, 0x00, a0, fa0);
+   /* fclass(1.0) -> 0x040 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffff3f800000, 0x00, a0, fa0);
+   /* fclass(INFINITY) -> 0x080 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffff7f800000, 0x00, a0, fa0);
+   /* fclass(sNAN) -> 0x100 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffff7fa00000, 0x00, a0, fa0);
+   /* fclass(qNAN) -> 0x200 */
+   TESTINST_1_1_IF(4, "fclass.s a0, fa0", 0xffffffff7fc00000, 0x00, a0, fa0);
+
+   /* fclass(-INFINITY) (rd=zero) -> 0x000 */
+   TESTINST_1_1_IF(4, "fclass.s zero, fa0", 0xffffffffff800000, 0x00, zero,
+                   fa0);
+
+   /* ---------------- fcvt.s.w rd, rs1, rm ----------------- */
+   /* 0 -> 0.0 */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x0000000000000000, 0x00, fa0, a0);
+   /* 2**31-128 -> nextafterf(2**31, 0.0) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x000000007fffff80, 0x00, fa0, a0);
+   /* 2**31-1 aka INT_MAX -> 2**31 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x000000007fffffff, 0x00, fa0, a0);
+   /* -2**31 aka INT_MIN -> -2**31 */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x0000000080000000, 0x00, fa0, a0);
+
+   /* 2**24+1 (RNE) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rne", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+3 (RNE) -> 2**24+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rne", 0x0000000001000003, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RTZ) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rtz", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* -2**24-1 (RTZ) -> -2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rtz", 0xfffffffffeffffff, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RDN) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rdn", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* -2**24-1 (RDN) -> -2**24-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rdn", 0xfffffffffeffffff, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RUP) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rup", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* -2**24-1 (RUP) -> -2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rup", 0xfffffffffeffffff, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RMM) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rmm", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* -2**24-1 (RMM) -> -2**24-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0, rmm", 0xfffffffffeffffff, 0x00, fa0,
+                   a0);
+
+   /* 2**24+1 (DYN-RNE) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x0000000001000001, 0x00, fa0, a0);
+   /* 2**24+3 (DYN-RNE) -> 2**24+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x0000000001000003, 0x00, fa0, a0);
+   /* 2**24+1 (DYN-RTZ) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x0000000001000001, 0x20, fa0, a0);
+   /* -2**24-1 (DYN-RTZ) -> -2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0xfffffffffeffffff, 0x20, fa0, a0);
+   /* 2**24+1 (DYN-RDN) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x0000000001000001, 0x40, fa0, a0);
+   /* -2**24-1 (DYN-RDN) -> -2**24-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0xfffffffffeffffff, 0x40, fa0, a0);
+   /* 2**24+1 (DYN-RUP) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x0000000001000001, 0x60, fa0, a0);
+   /* -2**24-1 (DYN-RUP) -> -2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0xfffffffffeffffff, 0x60, fa0, a0);
+   /* 2**24+1 (DYN-RMM) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0x0000000001000001, 0x80, fa0, a0);
+   /* -2**24-1 (DYN-RMM) -> -2**24-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.w fa0, a0", 0xfffffffffeffffff, 0x80, fa0, a0);
+
+   /* ---------------- fcvt.s.wu rd, rs1, rm ---------------- */
+   /* 0 -> 0.0 */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x0000000000000000, 0x00, fa0, a0);
+   /* 2**32-256 -> nextafterf(2**32, 0.0) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x00000000ffffff00, 0x00, fa0, a0);
+   /* 2**32-1 aka UINT_MAX -> 2**32 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x00000000ffffffff, 0x00, fa0, a0);
+
+   /* 2**24+1 (RNE) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0, rne", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+3 (RNE) -> 2**24+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0, rne", 0x0000000001000003, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RTZ) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0, rtz", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RDN) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0, rdn", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RUP) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0, rup", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RMM) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0, rmm", 0x0000000001000001, 0x00, fa0,
+                   a0);
+
+   /* 2**24+1 (DYN-RNE) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x0000000001000001, 0x00, fa0, a0);
+   /* 2**24+3 (DYN-RNE) -> 2**24+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x0000000001000003, 0x00, fa0, a0);
+   /* 2**24+1 (DYN-RTZ) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x0000000001000001, 0x20, fa0, a0);
+   /* 2**24+1 (DYN-RDN) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x0000000001000001, 0x40, fa0, a0);
+   /* 2**24+1 (DYN-RUP) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x0000000001000001, 0x60, fa0, a0);
+   /* 2**24+1 (DYN-RMM) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.wu fa0, a0", 0x0000000001000001, 0x80, fa0, a0);
+
+   /* ------------------- fmv.w.x rd, rs1 ------------------- */
+   TESTINST_1_1_FI(4, "fmv.w.x fa0, a0", 0xabcdef0123456789, 0x00, fa0, a0);
+
+   printf("\n");
+}
+
+static void test_float32_additions(void)
+{
+   printf("RV64F single-precision FP instruction set, additions\n");
+
+   /* ---------------- fcvt.l.s rd, rs1, rm ----------------- */
+   /* 0.0 -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff00000000, 0x00, a0, fa0);
+   /* FLT_TRUE_MIN -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff00000001, 0x00, a0, fa0);
+   /* INFINITY -> 2**63-1 aka LONG_MAX (NV)  */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff7f800000, 0x00, a0, fa0);
+   /* qNAN -> 2**63-1 aka LONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff7fc00000, 0x00, a0, fa0);
+   /* nextafterf(2**63, 0.0) -> 2**63-2**39 */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff5effffff, 0x00, a0, fa0);
+   /* -2**63 -> -2**63 aka LONG_MIN */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffffdf000000, 0x00, a0, fa0);
+   /* 2**63 -> 2**63-1 aka LONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff5f000000, 0x00, a0, fa0);
+   /* -nextafterf(2**63) -> -2**63 aka LONG_MIN (NV) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffffdf000001, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.l.s zero, fa0", 0xffffffff3f800000, 0x00, zero,
+                   fa0);
+
+   /* 0.5 (RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rne", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 1.5 (RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rne", 0xffffffff3fc00000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rtz", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rtz", 0xffffffffbf000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rdn", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RDN) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rdn", 0xffffffffbf000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rup", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RUP) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rup", 0xffffffffbf000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rmm", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RMM) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0, rmm", 0xffffffffbf000000, 0x00, a0,
+                   fa0);
+
+   /* 0.5 (DYN-RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff3f000000, 0x00, a0, fa0);
+   /* 1.5 (DYN-RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff3fc00000, 0x00, a0, fa0);
+   /* 0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff3f000000, 0x20, a0, fa0);
+   /* -0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffffbf000000, 0x20, a0, fa0);
+   /* 0.5 (DYN-RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff3f000000, 0x40, a0, fa0);
+   /* -0.5 (DYN-RDN) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffffbf000000, 0x40, a0, fa0);
+   /* 0.5 (DYN-RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff3f000000, 0x60, a0, fa0);
+   /* -0.5 (DYN-RUP) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffffbf000000, 0x60, a0, fa0);
+   /* 0.5 (DYN-RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffff3f000000, 0x80, a0, fa0);
+   /* -0.5 (DYN-RMM) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.s a0, fa0", 0xffffffffbf000000, 0x80, a0, fa0);
+
+   /* ---------------- fcvt.lu.s rd, rs1, rm ---------------- */
+   /* 0.0 -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff00000000, 0x00, a0, fa0);
+   /* FLT_TRUE_MIN -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff00000001, 0x00, a0, fa0);
+   /* INFINITY -> 2**64-1 aka ULONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff7f800000, 0x00, a0, fa0);
+   /* qNAN -> 2**64-1 aka ULONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff7fc00000, 0x00, a0, fa0);
+   /* nextafterf(2**64, 0.0) -> 2**64-2**40 */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff5f7fffff, 0x00, a0, fa0);
+   /* 2**64 -> 2**64-1 aka ULONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff5f800000, 0x00, a0, fa0);
+   /* -1.0 -> 0 (NV) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffffbf800000, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.lu.s zero, fa0", 0xffffffff3f800000, 0x00, zero,
+                   fa0);
+
+   /* 0.5 (RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0, rne", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 1.5 (RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0, rne", 0xffffffff3fc00000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0, rtz", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0, rdn", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0, rup", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0, rmm", 0xffffffff3f000000, 0x00, a0,
+                   fa0);
+
+   /* 0.5 (DYN-RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff3f000000, 0x00, a0, fa0);
+   /* 1.5 (DYN-RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff3fc00000, 0x00, a0, fa0);
+   /* 0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff3f000000, 0x20, a0, fa0);
+   /* 0.5 (DYN-RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff3f000000, 0x40, a0, fa0);
+   /* 0.5 (DYN-RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff3f000000, 0x60, a0, fa0);
+   /* 0.5 (DYN-RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.s a0, fa0", 0xffffffff3f000000, 0x80, a0, fa0);
+
+   /* ---------------- fcvt.s.l rd, rs1, rm ----------------- */
+   /* 0 -> 0.0 */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x0000000000000000, 0x00, fa0, a0);
+   /* 2**63-2**39 -> nextafterf(2**63, 0.0) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x7fffff8000000000, 0x00, fa0, a0);
+   /* 2**63-1 aka LONG_MAX -> 2**63 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x7fffffffffffffff, 0x00, fa0, a0);
+   /* -2**63 aka LONG_MIN -> -2**63 */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x8000000000000000, 0x00, fa0, a0);
+
+   /* 2**24+1 (RNE) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rne", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+3 (RNE) -> 2**24+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rne", 0x0000000001000003, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RTZ) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rtz", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* -2**24-1 (RTZ) -> -2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rtz", 0xfffffffffeffffff, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RDN) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rdn", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* -2**24-1 (RDN) -> -2**24-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rdn", 0xfffffffffeffffff, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RUP) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rup", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* -2**24-1 (RUP) -> -2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rup", 0xfffffffffeffffff, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RMM) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rmm", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* -2**24-1 (RMM) -> -2**24-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0, rmm", 0xfffffffffeffffff, 0x00, fa0,
+                   a0);
+
+   /* 2**24+1 (DYN-RNE) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x0000000001000001, 0x00, fa0, a0);
+   /* 2**24+3 (DYN-RNE) -> 2**24+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x0000000001000003, 0x00, fa0, a0);
+   /* 2**24+1 (DYN-RTZ) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x0000000001000001, 0x20, fa0, a0);
+   /* -2**24-1 (DYN-RTZ) -> -2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0xfffffffffeffffff, 0x20, fa0, a0);
+   /* 2**24+1 (DYN-RDN) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x0000000001000001, 0x40, fa0, a0);
+   /* -2**24-1 (DYN-RDN) -> -2**24-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0xfffffffffeffffff, 0x40, fa0, a0);
+   /* 2**24+1 (DYN-RUP) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x0000000001000001, 0x60, fa0, a0);
+   /* -2**24-1 (DYN-RUP) -> -2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0xfffffffffeffffff, 0x60, fa0, a0);
+   /* 2**24+1 (DYN-RMM) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0x0000000001000001, 0x80, fa0, a0);
+   /* -2**24-1 (DYN-RMM) -> -2**24-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.l fa0, a0", 0xfffffffffeffffff, 0x80, fa0, a0);
+
+   /* ---------------- fcvt.s.lu rd, rs1, rm ---------------- */
+   /* 0 -> 0.0 */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0x0000000000000000, 0x00, fa0, a0);
+   /* 2**64-2**40 -> nextafterf(2**64, 0.0) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0xffffff0000000000, 0x00, fa0, a0);
+   /* 2**64-1 aka ULONG_MAX -> 2**64 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0xffffffffffffffff, 0x00, fa0, a0);
+
+   /* 2**24+1 (RNE) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0, rne", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+3 (RNE) -> 2**24+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0, rne", 0x0000000001000003, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RTZ) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0, rtz", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RDN) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0, rdn", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RUP) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0, rup", 0x0000000001000001, 0x00, fa0,
+                   a0);
+   /* 2**24+1 (RMM) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0, rmm", 0x0000000001000001, 0x00, fa0,
+                   a0);
+
+   /* 2**24+1 (DYN-RNE) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0x0000000001000001, 0x00, fa0, a0);
+   /* 2**24+3 (DYN-RNE) -> 2**24+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0x0000000001000003, 0x00, fa0, a0);
+   /* 2**24+1 (DYN-RTZ) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0x0000000001000001, 0x20, fa0, a0);
+   /* 2**24+1 (DYN-RDN) -> 2**24 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0x0000000001000001, 0x40, fa0, a0);
+   /* 2**24+1 (DYN-RUP) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0x0000000001000001, 0x60, fa0, a0);
+   /* 2**24+1 (DYN-RMM) -> 2**24+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.s.lu fa0, a0", 0x0000000001000001, 0x80, fa0, a0);
+}
+
+int main(void)
+{
+   test_float32_shared();
+   test_float32_additions();
+   return 0;
+}
diff --git a/none/tests/riscv64/float32.stderr.exp b/none/tests/riscv64/float32.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/none/tests/riscv64/float32.stdout.exp b/none/tests/riscv64/float32.stdout.exp
new file mode 100644
index 000000000..013c7eda2
--- /dev/null
+++ b/none/tests/riscv64/float32.stdout.exp
@@ -0,0 +1,1556 @@
+RV64F single-precision FP instruction set, shared operations
+flw fa0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffff41dc8da0
+  no memory changes
+flw fa0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffff8aa9987b
+  no memory changes
+flw fa0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffff355229e8
+  no memory changes
+flw fa0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xfffffffffeb20cc3
+  no memory changes
+flw fa0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffffdb9b22c9
+  no memory changes
+flw fa0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffff879ff2e1
+  no memory changes
+flw fa0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffff593a2a60
+  no memory changes
+flw fa0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffffec73d785
+  no memory changes
+flw fa0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffff622d87cb
+  no memory changes
+flw fa0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffffad1b44c8
+  no memory changes
+flw fa0, 2040(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xfffffffff39743f9
+  no memory changes
+flw fa0, -4(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffff70a54ae6
+  no memory changes
+flw fa0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xffffffff2d7e670a
+  no memory changes
+flw fa4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: fa4=0xffffffffe24b896b
+  no memory changes
+fsw fa0, 0(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa0, 4(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. 89 67 45 23 .. .. .. .. .. .. .. ..
+fsw fa0, 8(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 .. .. .. ..
+fsw fa0, 16(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa0, 32(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa0, 64(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa0, 128(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+128]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa0, 256(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+256]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa0, 512(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+512]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa0, 1024(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+1024]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa0, 2040(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+2032]  .. .. .. .. .. .. .. .. 89 67 45 23 .. .. .. ..
+fsw fa0, -4(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [-016]  .. .. .. .. .. .. .. .. .. .. .. .. 89 67 45 23
+fsw fa0, -2048(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [-2048]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fsw fa4, 0(a5) ::
+  inputs: fa4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40400000, fa2=0xffffffff40000000, fa3=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff40e00000, fcsr=0x00000000
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffbf800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff00000001, fa3=0xffffffff80000001, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000000
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f7fffff, fa3=0xffffffffff7fffff, fcsr=0x00000000
+  output: fa0=0xffffffff7f7fffff, fcsr=0x00000000
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f7fffff, fa3=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000005
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f800000, fa3=0xffffffffff800000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fmadd.s fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffffb3800000, fcsr=0x00000020
+  output: fa0=0xffffffffbf800000, fcsr=0x00000021
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffffb3800000, fcsr=0x00000040
+  output: fa0=0xffffffffbf800001, fcsr=0x00000041
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffffb3800000, fcsr=0x00000060
+  output: fa0=0xffffffffbf800000, fcsr=0x00000061
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffff3f800001, fcsr=0x00000081
+fmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffffb3800000, fcsr=0x00000080
+  output: fa0=0xffffffffbf800001, fcsr=0x00000081
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40400000, fa2=0xffffffff40000000, fa3=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff40a00000, fcsr=0x00000000
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff00000001, fa3=0xffffffff00000001, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000000
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f7fffff, fa3=0xffffffff7f7fffff, fcsr=0x00000000
+  output: fa0=0xffffffff7f7fffff, fcsr=0x00000000
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f7fffff, fa3=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000005
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f800000, fa3=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fmsub.s fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800002, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800002, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffffbf800000, fcsr=0x00000021
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffffbf800001, fcsr=0x00000041
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffffbf800000, fcsr=0x00000061
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffff3f800001, fcsr=0x00000081
+fmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fa3=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffffbf800001, fcsr=0x00000081
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40400000, fa2=0xffffffff40000000, fa3=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffffc0a00000, fcsr=0x00000000
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff00000001, fa3=0xffffffff00000001, fcsr=0x00000000
+  output: fa0=0xffffffff80000001, fcsr=0x00000000
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f7fffff, fa3=0xffffffff7f7fffff, fcsr=0x00000000
+  output: fa0=0xffffffffff7fffff, fcsr=0x00000000
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f7fffff, fa3=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffffff800000, fcsr=0x00000005
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f800000, fa3=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fnmsub.s fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffb3800000, fcsr=0x00000020
+  output: fa0=0xffffffffbf800000, fcsr=0x00000021
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffb3800000, fcsr=0x00000040
+  output: fa0=0xffffffffbf800001, fcsr=0x00000041
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffb3800000, fcsr=0x00000060
+  output: fa0=0xffffffffbf800000, fcsr=0x00000061
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffff3f800001, fcsr=0x00000081
+fnmsub.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffb3800000, fcsr=0x00000080
+  output: fa0=0xffffffffbf800001, fcsr=0x00000081
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40400000, fa2=0xffffffff40000000, fa3=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffffc0e00000, fcsr=0x00000000
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffffbf800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff00000001, fa3=0xffffffff80000001, fcsr=0x00000000
+  output: fa0=0xffffffff80000001, fcsr=0x00000000
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f7fffff, fa3=0xffffffffff7fffff, fcsr=0x00000000
+  output: fa0=0xffffffffff7fffff, fcsr=0x00000000
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f7fffff, fa3=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffffff800000, fcsr=0x00000005
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff7f800000, fa3=0xffffffffff800000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fnmadd.s fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800002, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800002, fa3=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffffbf800000, fcsr=0x00000021
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffffbf800001, fcsr=0x00000041
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffffbf800000, fcsr=0x00000061
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff3f800001, fa3=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffff3f800001, fcsr=0x00000081
+fnmadd.s fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fa3=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffffbf800001, fcsr=0x00000081
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff40400000, fcsr=0x00000000
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffffbf800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff00000001, fcsr=0x00000000
+  output: fa0=0xffffffff00000002, fcsr=0x00000000
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f7fffff, fa2=0xffffffff7f7fffff, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000005
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffff7fffff, fa2=0xffffffffff7fffff, fcsr=0x00000000
+  output: fa0=0xffffffffff800000, fcsr=0x00000005
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00800001, fa2=0xffffffff80800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000000
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f800000, fa2=0xffffffffff800000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fadd.s fa0, fa1, fa2, rne ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rne ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rup ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rup ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fadd.s fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffffb3800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffffb3800000, fcsr=0x00000020
+  output: fa0=0xffffffffbf800000, fcsr=0x00000021
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffffb3800000, fcsr=0x00000040
+  output: fa0=0xffffffffbf800001, fcsr=0x00000041
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffffb3800000, fcsr=0x00000060
+  output: fa0=0xffffffffbf800000, fcsr=0x00000061
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffff3f800001, fcsr=0x00000081
+fadd.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffffb3800000, fcsr=0x00000080
+  output: fa0=0xffffffffbf800001, fcsr=0x00000081
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff80000001, fcsr=0x00000000
+  output: fa0=0xffffffff00000002, fcsr=0x00000000
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f7fffff, fa2=0xffffffffff7fffff, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000005
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffff7fffff, fa2=0xffffffff7f7fffff, fcsr=0x00000000
+  output: fa0=0xffffffffff800000, fcsr=0x00000005
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00800001, fa2=0xffffffff00800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000000
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f800000, fa2=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fsub.s fa0, fa1, fa2, rne ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rne ::
+  inputs: fa1=0xffffffff3f800002, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rup ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rup ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fsub.s fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800002, fa2=0xffffffff33800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff33800000, fcsr=0x00000020
+  output: fa0=0xffffffffbf800000, fcsr=0x00000021
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff33800000, fcsr=0x00000040
+  output: fa0=0xffffffffbf800001, fcsr=0x00000041
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff33800000, fcsr=0x00000060
+  output: fa0=0xffffffffbf800000, fcsr=0x00000061
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800001, fa2=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffff3f800001, fcsr=0x00000081
+fsub.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff33800000, fcsr=0x00000080
+  output: fa0=0xffffffffbf800001, fcsr=0x00000081
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff40000000, fcsr=0x00000000
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff1a800000, fa2=0xffffffff1a000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000000
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f7fffff, fa2=0xffffffff7f7fffff, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000005
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f7fffff, fa2=0xffffffffff7fffff, fcsr=0x00000000
+  output: fa0=0xffffffffff800000, fcsr=0x00000005
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000000
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fmul.s fa0, fa1, fa2, rne ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rne ::
+  inputs: fa1=0xffffffff00000003, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000002, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000000, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000001, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rup ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rup ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000000, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000003
+fmul.s fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000001, fcsr=0x00000003
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000003, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000002, fcsr=0x00000003
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000020
+  output: fa0=0xffffffff00000000, fcsr=0x00000023
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff3f000000, fcsr=0x00000020
+  output: fa0=0xffffffff80000000, fcsr=0x00000023
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000040
+  output: fa0=0xffffffff00000000, fcsr=0x00000043
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff3f000000, fcsr=0x00000040
+  output: fa0=0xffffffff80000001, fcsr=0x00000043
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000060
+  output: fa0=0xffffffff00000001, fcsr=0x00000063
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff3f000000, fcsr=0x00000060
+  output: fa0=0xffffffff80000000, fcsr=0x00000063
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff3f000000, fcsr=0x00000080
+  output: fa0=0xffffffff00000001, fcsr=0x00000083
+fmul.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff3f000000, fcsr=0x00000080
+  output: fa0=0xffffffff80000001, fcsr=0x00000083
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff40000000, fa2=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff40000000, fcsr=0x00000000
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff7f000000, fcsr=0x00000000
+  output: fa0=0xffffffff00400000, fcsr=0x00000000
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f7fffff, fa2=0xffffffff3f000000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000005
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f7fffff, fa2=0xffffffffbf000000, fcsr=0x00000000
+  output: fa0=0xffffffffff800000, fcsr=0x00000005
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000008
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fdiv.s fa0, fa1, fa2, rne ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rne ::
+  inputs: fa1=0xffffffff00000003, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000002, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000000, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000001, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rup ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rup ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000000, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000001, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000001, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000003, fa2=0xffffffff40000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000002, fcsr=0x00000003
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000020
+  output: fa0=0xffffffff00000000, fcsr=0x00000023
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff40000000, fcsr=0x00000020
+  output: fa0=0xffffffff80000000, fcsr=0x00000023
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000040
+  output: fa0=0xffffffff00000000, fcsr=0x00000043
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff40000000, fcsr=0x00000040
+  output: fa0=0xffffffff80000001, fcsr=0x00000043
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000060
+  output: fa0=0xffffffff00000001, fcsr=0x00000063
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff40000000, fcsr=0x00000060
+  output: fa0=0xffffffff80000000, fcsr=0x00000063
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000001, fa2=0xffffffff40000000, fcsr=0x00000080
+  output: fa0=0xffffffff00000001, fcsr=0x00000083
+fdiv.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000001, fa2=0xffffffff40000000, fcsr=0x00000080
+  output: fa0=0xffffffff80000001, fcsr=0x00000083
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000000
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff00000002, fcsr=0x00000000
+  output: fa0=0xffffffff1a800000, fcsr=0x00000000
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff7fc00000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000000
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffffbf800000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000010
+fsqrt.s fa0, fa1, rne ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsqrt.s fa0, fa1, rne ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fsqrt.s fa0, fa1, rtz ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsqrt.s fa0, fa1, rtz ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsqrt.s fa0, fa1, rdn ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsqrt.s fa0, fa1, rdn ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsqrt.s fa0, fa1, rup ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fsqrt.s fa0, fa1, rup ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fsqrt.s fa0, fa1, rmm ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsqrt.s fa0, fa1, rmm ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800001, fcsr=0x00000080
+  output: fa0=0xffffffff3f800000, fcsr=0x00000081
+fsqrt.s fa0, fa1 ::
+  inputs: fa1=0xffffffff3f800002, fcsr=0x00000080
+  output: fa0=0xffffffff3f800001, fcsr=0x00000081
+fsgnj.s fa0, fa1, fa1 ::
+  inputs: fa1=0xffffffff3f800000, fa1=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnj.s fa0, fa1, fa1 ::
+  inputs: fa1=0xffffffffbf800000, fa1=0xffffffffbf800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000000
+fsgnj.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff7fffffff, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnj.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000000
+fsgnj.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff7fffffff, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnj.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000000
+fsgnjn.s fa0, fa1, fa1 ::
+  inputs: fa1=0xffffffff3f800000, fa1=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000000
+fsgnjn.s fa0, fa1, fa1 ::
+  inputs: fa1=0xffffffffbf800000, fa1=0xffffffffbf800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnjn.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff7fffffff, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000000
+fsgnjn.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnjn.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff7fffffff, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000000
+fsgnjn.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnjx.s fa0, fa1, fa1 ::
+  inputs: fa1=0xffffffff3f800000, fa1=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnjx.s fa0, fa1, fa1 ::
+  inputs: fa1=0xffffffffbf800000, fa1=0xffffffffbf800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnjx.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff7fffffff, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fsgnjx.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff3f800000, fa2=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000000
+fsgnjx.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff7fffffff, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000000
+fsgnjx.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffffbf800000, fa2=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fmin.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fmin.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000000, fcsr=0x00000000
+fmin.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000000, fa2=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffff80000000, fcsr=0x00000000
+fmin.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f800000, fa2=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000000
+fmin.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff7fc00000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fmin.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff7fa00000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000010
+fmax.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff3f800000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000000
+fmax.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fmax.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff80000000, fa2=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fmax.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff7f800000, fa2=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000000
+fmax.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff7fc00000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fmax.s fa0, fa1, fa2 ::
+  inputs: fa1=0xffffffff00000000, fa2=0xffffffff7fa00000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000010
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000001, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff7f800000, fcsr=0x00000000
+  output: a0=0x000000007fffffff, fcsr=0x00000010
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff7fc00000, fcsr=0x00000000
+  output: a0=0x000000007fffffff, fcsr=0x00000010
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff4effffff, fcsr=0x00000000
+  output: a0=0x000000007fffff80, fcsr=0x00000000
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffffcf000000, fcsr=0x00000000
+  output: a0=0xffffffff80000000, fcsr=0x00000000
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff4f000000, fcsr=0x00000000
+  output: a0=0x000000007fffffff, fcsr=0x00000010
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffffcf000001, fcsr=0x00000000
+  output: a0=0xffffffff80000000, fcsr=0x00000010
+fcvt.w.s zero, fa0 ::
+  inputs: fa0=0xffffffff3f800000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.w.s a0, fa0, rne ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.s a0, fa0, rne ::
+  inputs: fa0=0xffffffff3fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.w.s a0, fa0, rtz ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.s a0, fa0, rtz ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.s a0, fa0, rdn ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.s a0, fa0, rdn ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000001
+fcvt.w.s a0, fa0, rup ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.w.s a0, fa0, rup ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.s a0, fa0, rmm ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.w.s a0, fa0, rmm ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000001
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff3fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000040
+  output: a0=0x0000000000000000, fcsr=0x00000041
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000040
+  output: a0=0xffffffffffffffff, fcsr=0x00000041
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000060
+  output: a0=0x0000000000000001, fcsr=0x00000061
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000060
+  output: a0=0x0000000000000000, fcsr=0x00000061
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000080
+  output: a0=0x0000000000000001, fcsr=0x00000081
+fcvt.w.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000080
+  output: a0=0xffffffffffffffff, fcsr=0x00000081
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000001, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff7f800000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff7fc00000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff4f7fffff, fcsr=0x00000000
+  output: a0=0xffffffffffffff00, fcsr=0x00000000
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff4f800000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf800000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+fcvt.wu.s zero, fa0 ::
+  inputs: fa0=0xffffffff3f800000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.wu.s a0, fa0, rne ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.s a0, fa0, rne ::
+  inputs: fa0=0xffffffff3fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.wu.s a0, fa0, rtz ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.s a0, fa0, rdn ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.s a0, fa0, rup ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.wu.s a0, fa0, rmm ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000040
+  output: a0=0x0000000000000000, fcsr=0x00000041
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000060
+  output: a0=0x0000000000000001, fcsr=0x00000061
+fcvt.wu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000080
+  output: a0=0x0000000000000001, fcsr=0x00000081
+fmv.x.w a0, fa0 ::
+  inputs: fa0=0xabcdef0123456789, fcsr=0x00000000
+  output: a0=0x0000000023456789, fcsr=0x00000000
+fmv.x.w a0, fa0 ::
+  inputs: fa0=0xffffffff7fffffff, fcsr=0x00000000
+  output: a0=0x000000007fffffff, fcsr=0x00000000
+fmv.x.w a0, fa0 ::
+  inputs: fa0=0x0000000080000000, fcsr=0x00000000
+  output: a0=0xffffffff80000000, fcsr=0x00000000
+fmv.x.w zero, fa0 ::
+  inputs: fa0=0xffffffff3f800000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+feq.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff3f800000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+feq.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+feq.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff80000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+feq.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff80000000, fa1=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+feq.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff7f800000, fa1=0xffffffff7f800000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+feq.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff7fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+feq.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff7fa00000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+feq.s zero, fa0, fa1 ::
+  inputs: fa0=0xffffffff7fa00000, fa1=0xffffffff7fa00000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000010
+flt.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+flt.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff3f800000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+flt.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff80000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+flt.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff80000000, fa1=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+flt.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff7f800000, fa1=0xffffffff7f800000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+flt.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff7fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+flt.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff7fa00000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+flt.s zero, fa0, fa1 ::
+  inputs: fa0=0xffffffff7fa00000, fa1=0xffffffff7fa00000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000010
+fle.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff3f800000, fa1=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fle.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff3f800000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff80000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff80000000, fa1=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff7f800000, fa1=0xffffffff7f800000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff00000000, fa1=0xffffffff7fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+fle.s a0, fa0, fa1 ::
+  inputs: fa0=0xffffffff7fa00000, fa1=0x7ff4000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+fle.s zero, fa0, fa1 ::
+  inputs: fa0=0xffffffff7fa00000, fa1=0xffffffff7fa00000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000010
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffffff800000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf800000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffff80000001, fcsr=0x00000000
+  output: a0=0x0000000000000004, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffff80000000, fcsr=0x00000000
+  output: a0=0x0000000000000008, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000010, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000001, fcsr=0x00000000
+  output: a0=0x0000000000000020, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f800000, fcsr=0x00000000
+  output: a0=0x0000000000000040, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffff7f800000, fcsr=0x00000000
+  output: a0=0x0000000000000080, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffff7fa00000, fcsr=0x00000000
+  output: a0=0x0000000000000100, fcsr=0x00000000
+fclass.s a0, fa0 ::
+  inputs: fa0=0xffffffff7fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000200, fcsr=0x00000000
+fclass.s zero, fa0 ::
+  inputs: fa0=0xffffffffff800000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x000000007fffff80, fcsr=0x00000000
+  output: fa0=0xffffffff4effffff, fcsr=0x00000000
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x000000007fffffff, fcsr=0x00000000
+  output: fa0=0xffffffff4f000000, fcsr=0x00000001
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x0000000080000000, fcsr=0x00000000
+  output: fa0=0xffffffffcf000000, fcsr=0x00000000
+fcvt.s.w fa0, a0, rne ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.w fa0, a0, rne ::
+  inputs: a0=0x0000000001000003, fcsr=0x00000000
+  output: fa0=0xffffffff4b800002, fcsr=0x00000001
+fcvt.s.w fa0, a0, rtz ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.w fa0, a0, rtz ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000000
+  output: fa0=0xffffffffcb800000, fcsr=0x00000001
+fcvt.s.w fa0, a0, rdn ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.w fa0, a0, rdn ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000000
+  output: fa0=0xffffffffcb800001, fcsr=0x00000001
+fcvt.s.w fa0, a0, rup ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800001, fcsr=0x00000001
+fcvt.s.w fa0, a0, rup ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000000
+  output: fa0=0xffffffffcb800000, fcsr=0x00000001
+fcvt.s.w fa0, a0, rmm ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800001, fcsr=0x00000001
+fcvt.s.w fa0, a0, rmm ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000000
+  output: fa0=0xffffffffcb800001, fcsr=0x00000001
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x0000000001000003, fcsr=0x00000000
+  output: fa0=0xffffffff4b800002, fcsr=0x00000001
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000020
+  output: fa0=0xffffffff4b800000, fcsr=0x00000021
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000020
+  output: fa0=0xffffffffcb800000, fcsr=0x00000021
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000040
+  output: fa0=0xffffffff4b800000, fcsr=0x00000041
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000040
+  output: fa0=0xffffffffcb800001, fcsr=0x00000041
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000060
+  output: fa0=0xffffffff4b800001, fcsr=0x00000061
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000060
+  output: fa0=0xffffffffcb800000, fcsr=0x00000061
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000080
+  output: fa0=0xffffffff4b800001, fcsr=0x00000081
+fcvt.s.w fa0, a0 ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000080
+  output: fa0=0xffffffffcb800001, fcsr=0x00000081
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x00000000ffffff00, fcsr=0x00000000
+  output: fa0=0xffffffff4f7fffff, fcsr=0x00000000
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x00000000ffffffff, fcsr=0x00000000
+  output: fa0=0xffffffff4f800000, fcsr=0x00000001
+fcvt.s.wu fa0, a0, rne ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.wu fa0, a0, rne ::
+  inputs: a0=0x0000000001000003, fcsr=0x00000000
+  output: fa0=0xffffffff4b800002, fcsr=0x00000001
+fcvt.s.wu fa0, a0, rtz ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.wu fa0, a0, rdn ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.wu fa0, a0, rup ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800001, fcsr=0x00000001
+fcvt.s.wu fa0, a0, rmm ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800001, fcsr=0x00000001
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x0000000001000003, fcsr=0x00000000
+  output: fa0=0xffffffff4b800002, fcsr=0x00000001
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000020
+  output: fa0=0xffffffff4b800000, fcsr=0x00000021
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000040
+  output: fa0=0xffffffff4b800000, fcsr=0x00000041
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000060
+  output: fa0=0xffffffff4b800001, fcsr=0x00000061
+fcvt.s.wu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000080
+  output: fa0=0xffffffff4b800001, fcsr=0x00000081
+fmv.w.x fa0, a0 ::
+  inputs: a0=0xabcdef0123456789, fcsr=0x00000000
+  output: fa0=0xffffffff23456789, fcsr=0x00000000
+
+RV64F single-precision FP instruction set, additions
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000001, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff7f800000, fcsr=0x00000000
+  output: a0=0x7fffffffffffffff, fcsr=0x00000010
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff7fc00000, fcsr=0x00000000
+  output: a0=0x7fffffffffffffff, fcsr=0x00000010
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff5effffff, fcsr=0x00000000
+  output: a0=0x7fffff8000000000, fcsr=0x00000000
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffffdf000000, fcsr=0x00000000
+  output: a0=0x8000000000000000, fcsr=0x00000000
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff5f000000, fcsr=0x00000000
+  output: a0=0x7fffffffffffffff, fcsr=0x00000010
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffffdf000001, fcsr=0x00000000
+  output: a0=0x8000000000000000, fcsr=0x00000010
+fcvt.l.s zero, fa0 ::
+  inputs: fa0=0xffffffff3f800000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.l.s a0, fa0, rne ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.s a0, fa0, rne ::
+  inputs: fa0=0xffffffff3fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.l.s a0, fa0, rtz ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.s a0, fa0, rtz ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.s a0, fa0, rdn ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.s a0, fa0, rdn ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000001
+fcvt.l.s a0, fa0, rup ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.l.s a0, fa0, rup ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.s a0, fa0, rmm ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.l.s a0, fa0, rmm ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000001
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff3fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000040
+  output: a0=0x0000000000000000, fcsr=0x00000041
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000040
+  output: a0=0xffffffffffffffff, fcsr=0x00000041
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000060
+  output: a0=0x0000000000000001, fcsr=0x00000061
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000060
+  output: a0=0x0000000000000000, fcsr=0x00000061
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000080
+  output: a0=0x0000000000000001, fcsr=0x00000081
+fcvt.l.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf000000, fcsr=0x00000080
+  output: a0=0xffffffffffffffff, fcsr=0x00000081
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff00000001, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff7f800000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff7fc00000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff5f7fffff, fcsr=0x00000000
+  output: a0=0xffffff0000000000, fcsr=0x00000000
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff5f800000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffffbf800000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+fcvt.lu.s zero, fa0 ::
+  inputs: fa0=0xffffffff3f800000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.lu.s a0, fa0, rne ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.s a0, fa0, rne ::
+  inputs: fa0=0xffffffff3fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.lu.s a0, fa0, rtz ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.s a0, fa0, rdn ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.s a0, fa0, rup ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.lu.s a0, fa0, rmm ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3fc00000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000040
+  output: a0=0x0000000000000000, fcsr=0x00000041
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000060
+  output: a0=0x0000000000000001, fcsr=0x00000061
+fcvt.lu.s a0, fa0 ::
+  inputs: fa0=0xffffffff3f000000, fcsr=0x00000080
+  output: a0=0x0000000000000001, fcsr=0x00000081
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x7fffff8000000000, fcsr=0x00000000
+  output: fa0=0xffffffff5effffff, fcsr=0x00000000
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x7fffffffffffffff, fcsr=0x00000000
+  output: fa0=0xffffffff5f000000, fcsr=0x00000001
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0xffffffffdf000000, fcsr=0x00000000
+fcvt.s.l fa0, a0, rne ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.l fa0, a0, rne ::
+  inputs: a0=0x0000000001000003, fcsr=0x00000000
+  output: fa0=0xffffffff4b800002, fcsr=0x00000001
+fcvt.s.l fa0, a0, rtz ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.l fa0, a0, rtz ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000000
+  output: fa0=0xffffffffcb800000, fcsr=0x00000001
+fcvt.s.l fa0, a0, rdn ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.l fa0, a0, rdn ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000000
+  output: fa0=0xffffffffcb800001, fcsr=0x00000001
+fcvt.s.l fa0, a0, rup ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800001, fcsr=0x00000001
+fcvt.s.l fa0, a0, rup ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000000
+  output: fa0=0xffffffffcb800000, fcsr=0x00000001
+fcvt.s.l fa0, a0, rmm ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800001, fcsr=0x00000001
+fcvt.s.l fa0, a0, rmm ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000000
+  output: fa0=0xffffffffcb800001, fcsr=0x00000001
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x0000000001000003, fcsr=0x00000000
+  output: fa0=0xffffffff4b800002, fcsr=0x00000001
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000020
+  output: fa0=0xffffffff4b800000, fcsr=0x00000021
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000020
+  output: fa0=0xffffffffcb800000, fcsr=0x00000021
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000040
+  output: fa0=0xffffffff4b800000, fcsr=0x00000041
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000040
+  output: fa0=0xffffffffcb800001, fcsr=0x00000041
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000060
+  output: fa0=0xffffffff4b800001, fcsr=0x00000061
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000060
+  output: fa0=0xffffffffcb800000, fcsr=0x00000061
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000080
+  output: fa0=0xffffffff4b800001, fcsr=0x00000081
+fcvt.s.l fa0, a0 ::
+  inputs: a0=0xfffffffffeffffff, fcsr=0x00000080
+  output: fa0=0xffffffffcb800001, fcsr=0x00000081
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0xffffff0000000000, fcsr=0x00000000
+  output: fa0=0xffffffff5f7fffff, fcsr=0x00000000
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0xffffffffffffffff, fcsr=0x00000000
+  output: fa0=0xffffffff5f800000, fcsr=0x00000001
+fcvt.s.lu fa0, a0, rne ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.lu fa0, a0, rne ::
+  inputs: a0=0x0000000001000003, fcsr=0x00000000
+  output: fa0=0xffffffff4b800002, fcsr=0x00000001
+fcvt.s.lu fa0, a0, rtz ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.lu fa0, a0, rdn ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.lu fa0, a0, rup ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800001, fcsr=0x00000001
+fcvt.s.lu fa0, a0, rmm ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800001, fcsr=0x00000001
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000000
+  output: fa0=0xffffffff4b800000, fcsr=0x00000001
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0x0000000001000003, fcsr=0x00000000
+  output: fa0=0xffffffff4b800002, fcsr=0x00000001
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000020
+  output: fa0=0xffffffff4b800000, fcsr=0x00000021
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000040
+  output: fa0=0xffffffff4b800000, fcsr=0x00000041
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000060
+  output: fa0=0xffffffff4b800001, fcsr=0x00000061
+fcvt.s.lu fa0, a0 ::
+  inputs: a0=0x0000000001000001, fcsr=0x00000080
+  output: fa0=0xffffffff4b800001, fcsr=0x00000081
diff --git a/none/tests/riscv64/float32.vgtest b/none/tests/riscv64/float32.vgtest
new file mode 100644
index 000000000..88f405d37
--- /dev/null
+++ b/none/tests/riscv64/float32.vgtest
@@ -0,0 +1,2 @@
+prog: float32
+vgopts: -q
diff --git a/none/tests/riscv64/float64.c b/none/tests/riscv64/float64.c
new file mode 100644
index 000000000..4203bcf45
--- /dev/null
+++ b/none/tests/riscv64/float64.c
@@ -0,0 +1,1580 @@
+/* Tests for the RV64D standard double-precision floating-point instruction-set
+   extension. */
+
+#include "testinst.h"
+
+static void test_float64_shared(void)
+{
+   printf("RV64D double-precision FP instruction set, shared operations\n");
+
+   /* --------------- fld rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_FLOAD(4, "fld fa0, 0(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 4(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 8(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 16(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 32(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 64(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 128(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 256(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 512(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 1024(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, 2040(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, -4(a1)", fa0, a1);
+   TESTINST_1_1_FLOAD(4, "fld fa0, -2048(a1)", fa0, a1);
+
+   TESTINST_1_1_FLOAD(4, "fld fa4, 0(a5)", fa4, a5);
+
+   /* --------------- fsd rs2, imm[11:0](rs1) --------------- */
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 0(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 4(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 8(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 16(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 32(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 64(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 128(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 256(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 512(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 1024(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, 2040(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, -4(a1)", 0xabcdef0123456789, fa0, a1);
+   TESTINST_0_2_FSTORE(4, "fsd fa0, -2048(a1)", 0xabcdef0123456789, fa0, a1);
+
+   TESTINST_0_2_FSTORE(4, "fsd fa4, 0(a5)", 0xabcdef0123456789, fa4, a5);
+
+   /* ------------ fmadd.d rd, rs1, rs2, rs3, rm ------------ */
+   /* 3.0 * 2.0 + 1.0 -> 7.0 */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x4008000000000000,
+                  0x4000000000000000, 0x3ff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + -1.0 -> 0.0 */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * DBL_TRUE_MIN + -DBL_TRUE_MIN -> DBL_TRUE_MIN (no UF because exact)
+    */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x0000000000000001, 0x8000000000000001, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * DBL_MAX + -DBL_MAX -> DBL_MAX */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7fefffffffffffff, 0xffefffffffffffff, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * DBL_MAX + 0.0 -> INFINITY (OF, NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7fefffffffffffff, 0x0000000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * INFINITY + -INFINITY -> qNAN (NV) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7ff0000000000000, 0xfff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rne", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) + DBL_EPSILON/2 (RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rne", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rtz", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -DBL_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rtz", 0x3ff0000000000000,
+                  0xbff0000000000000, 0xbca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rdn", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -DBL_EPSILON/2 (RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rdn", 0x3ff0000000000000,
+                  0xbff0000000000000, 0xbca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rup", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -DBL_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rup", 0x3ff0000000000000,
+                  0xbff0000000000000, 0xbca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rmm", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -DBL_EPSILON/2 (RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3, rmm", 0x3ff0000000000000,
+                  0xbff0000000000000, 0xbca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) + DBL_EPSILON/2 (DYN-RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -DBL_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0xbff0000000000000, 0xbca0000000000000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -DBL_EPSILON/2 (DYN-RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0xbff0000000000000, 0xbca0000000000000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (DYN-RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -DBL_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0xbff0000000000000, 0xbca0000000000000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 + DBL_EPSILON/2 (DYN-RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x80, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 + -DBL_EPSILON/2 (DYN-RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0xbff0000000000000, 0xbca0000000000000, 0x80, fa0, fa1, fa2,
+                  fa3);
+
+   /* ------------ fmsub.d rd, rs1, rs2, rs3, rm ------------ */
+   /* 3.0 * 2.0 - 1.0 -> 5.0 */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x4008000000000000,
+                  0x4000000000000000, 0x3ff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 1.0 - 1.0 -> 0.0 */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * DBL_TRUE_MIN - DBL_TRUE_MIN -> DBL_TRUE_MIN (no UF because exact)
+    */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x0000000000000001, 0x0000000000000001, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * DBL_MAX - DBL_MAX -> DBL_MAX */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7fefffffffffffff, 0x7fefffffffffffff, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * DBL_MAX - 0.0 -> INFINITY (OF, NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7fefffffffffffff, 0x0000000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 2.0 * INFINITY - INFINITY -> qNAN (NV) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7ff0000000000000, 0x7ff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rne", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 2nextafter(1.0) - DBL_EPSILON/2 (RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rne", 0x3ff0000000000000,
+                  0x3ff0000000000002, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rtz", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - DBL_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rtz", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rdn", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - DBL_EPSILON/2 (RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rdn", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rup", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - DBL_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rup", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rmm", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - DBL_EPSILON/2 (RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3, rmm", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * 2nextafter(1.0) - DBL_EPSILON/2 (DYN-RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000002, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - DBL_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x3ca0000000000000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - DBL_EPSILON/2 (DYN-RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x3ca0000000000000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (DYN-RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - DBL_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x3ca0000000000000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * nextafter(1.0) - DBL_EPSILON/2 (DYN-RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x80, fa0, fa1, fa2,
+                  fa3);
+   /* 1.0 * -1.0 - DBL_EPSILON/2 (DYN-RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x3ca0000000000000, 0x80, fa0, fa1, fa2,
+                  fa3);
+
+   /* ----------- fnmsub.d rd, rs1, rs2, rs3, rm ------------ */
+   /* -(3.0 * 2.0) + 1.0 -> -5.0 */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x4008000000000000,
+                  0x4000000000000000, 0x3ff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + 1.0 -> 0.0 */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * DBL_TRUE_MIN) + DBL_TRUE_MIN -> -DBL_TRUE_MIN (no UF because
+      exact) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x0000000000000001, 0x0000000000000001, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * DBL_MAX) + DBL_MAX -> -DBL_MAX */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7fefffffffffffff, 0x7fefffffffffffff, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * DBL_MAX) + 0.0 -> -INFINITY (OF, NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7fefffffffffffff, 0x0000000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * INFINITY) + INFINITY -> qNAN (NV) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7ff0000000000000, 0x7ff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rne", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) + DBL_EPSILON/2 (RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rne", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rtz", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -DBL_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rtz", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rdn", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -DBL_EPSILON/2 (RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rdn", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rup", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -DBL_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rup", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rmm", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -DBL_EPSILON/2 (RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3, rmm", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) + DBL_EPSILON/2 (DYN-RNE) -> 2nextafter(1.0) (NX)
+    */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -DBL_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbca0000000000000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -DBL_EPSILON/2 (DYN-RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbca0000000000000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (DYN-RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -DBL_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbca0000000000000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 1.0) + DBL_EPSILON/2 (DYN-RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x80, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) + -DBL_EPSILON/2 (DYN-RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmsub.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbca0000000000000, 0x80, fa0, fa1, fa2,
+                  fa3);
+
+   /* ----------- fnmadd.d rd, rs1, rs2, rs3, rm ------------ */
+   /* -(3.0 * 2.0) - 1.0 -> -7.0 */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x4008000000000000,
+                  0x4000000000000000, 0x3ff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - -1.0 -> 0.0 */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0xbff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * DBL_TRUE_MIN) - -DBL_TRUE_MIN -> -DBL_TRUE_MIN (no UF because
+      exact) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x0000000000000001, 0x8000000000000001, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * DBL_MAX) - -DBL_MAX -> -DBL_MAX */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7fefffffffffffff, 0xffefffffffffffff, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * DBL_MAX) - 0.0 -> -INFINITY (OF, NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7fefffffffffffff, 0x0000000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(2.0 * INFINITY) - -INFINITY -> qNAN (NV) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x4000000000000000,
+                  0x7ff0000000000000, 0xfff0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rne", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 2nextafter(1.0)) - DBL_EPSILON/2 (RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rne", 0xbff0000000000000,
+                  0x3ff0000000000002, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rtz", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - DBL_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rtz", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rdn", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - DBL_EPSILON/2 (RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rdn", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rup", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - DBL_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rup", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rmm", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - DBL_EPSILON/2 (RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3, rmm", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * 2nextafter(1.0)) - DBL_EPSILON/2 (DYN-RNE) -> 2nextafter(1.0)
+      (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000002, 0x3ca0000000000000, 0x00, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - DBL_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x20, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - DBL_EPSILON/2 (DYN-RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x40, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (DYN-RUP) -> nextafter(1.0) (NX)
+    */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - DBL_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x60, fa0, fa1, fa2,
+                  fa3);
+   /* -(-1.0 * nextafter(1.0)) - DBL_EPSILON/2 (DYN-RMM) -> nextafter(1.0) (NX)
+    */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0xbff0000000000000,
+                  0x3ff0000000000001, 0x3ca0000000000000, 0x80, fa0, fa1, fa2,
+                  fa3);
+   /* -(1.0 * 1.0) - DBL_EPSILON/2 (DYN-RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_3_F(4, "fnmadd.d fa0, fa1, fa2, fa3", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x3ca0000000000000, 0x80, fa0, fa1, fa2,
+                  fa3);
+
+   /* --------------- fadd.d rd, rs1, rs2, rm --------------- */
+   /* 2.0 + 1.0 -> 3.0 */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x4000000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + -1.0 -> 0.0 */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0xbff0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN + DBL_TRUE_MIN -> 2*DBL_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x0000000000000001, 0x00, fa0, fa1, fa2);
+   /* DBL_MAX + DBL_MAX -> INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x7fefffffffffffff,
+                  0x7fefffffffffffff, 0x00, fa0, fa1, fa2);
+   /* -DBL_MAX + -DBL_MAX -> -INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0xffefffffffffffff,
+                  0xffefffffffffffff, 0x00, fa0, fa1, fa2);
+   /* nextafter(DBL_MIN) + -DBL_MIN -> DBL_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x0010000000000001,
+                  0x8010000000000000, 0x00, fa0, fa1, fa2);
+   /* INFINITY + -INFINITY -> qNAN (NV) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x7ff0000000000000,
+                  0xfff0000000000000, 0x00, fa0, fa1, fa2);
+
+   /* 1.0 + DBL_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rne", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* nextafter(1.0) + DBL_EPSILON/2 (RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rne", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + DBL_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rtz", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* -1.0 + -DBL_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rtz", 0xbff0000000000000,
+                  0xbca0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + DBL_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rdn", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* -1.0 + -DBL_EPSILON/2 (RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rdn", 0xbff0000000000000,
+                  0xbca0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + DBL_EPSILON/2 (RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rup", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* -1.0 + -DBL_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rup", 0xbff0000000000000,
+                  0xbca0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + DBL_EPSILON/2 (RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rmm", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* -1.0 + -DBL_EPSILON/2 (RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2, rmm", 0xbff0000000000000,
+                  0xbca0000000000000, 0x00, fa0, fa1, fa2);
+
+   /* 1.0 + DBL_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* nextafter(1.0) + DBL_EPSILON/2 (DYN-RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 + DBL_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x20, fa0, fa1, fa2);
+   /* -1.0 + -DBL_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0xbca0000000000000, 0x20, fa0, fa1, fa2);
+   /* 1.0 + DBL_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x40, fa0, fa1, fa2);
+   /* -1.0 + -DBL_EPSILON/2 (DYN-RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0xbca0000000000000, 0x40, fa0, fa1, fa2);
+   /* 1.0 + DBL_EPSILON/2 (DYN-RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x60, fa0, fa1, fa2);
+   /* -1.0 + -DBL_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0xbca0000000000000, 0x60, fa0, fa1, fa2);
+   /* 1.0 + DBL_EPSILON/2 (DYN-RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x3ca0000000000000, 0x80, fa0, fa1, fa2);
+   /* -1.0 + -DBL_EPSILON/2 (DYN-RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fadd.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0xbca0000000000000, 0x80, fa0, fa1, fa2);
+
+   /* --------------- fsub.d rd, rs1, rs2, rm --------------- */
+   /* 2.0 - 1.0 -> 1.0 */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x4000000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 - 1.0 -> 0.0 */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN - -DBL_TRUE_MIN -> 2*DBL_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x8000000000000001, 0x00, fa0, fa1, fa2);
+   /* DBL_MAX - -DBL_MAX -> INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x7fefffffffffffff,
+                  0xffefffffffffffff, 0x00, fa0, fa1, fa2);
+   /* -DBL_MAX - DBL_MAX -> -INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0xffefffffffffffff,
+                  0x7fefffffffffffff, 0x00, fa0, fa1, fa2);
+   /* nextafter(DBL_MIN) - DBL_MIN -> DBL_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x0010000000000001,
+                  0x0010000000000000, 0x00, fa0, fa1, fa2);
+   /* INFINITY - INFINITY -> qNAN (NV) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x7ff0000000000000,
+                  0x7ff0000000000000, 0x00, fa0, fa1, fa2);
+
+   /* nextafter(1.0) - DBL_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rne", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* 2nextafter(1.0) - DBL_EPSILON/2 (RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rne", 0x3ff0000000000002,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* nextafter(1.0) - DBL_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rtz", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* -1.0 - DBL_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rtz", 0xbff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* nextafter(1.0) - DBL_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rdn", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* -1.0 - DBL_EPSILON/2 (RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rdn", 0xbff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* nextafter(1.0) - DBL_EPSILON/2 (RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rup", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* -1.0 - DBL_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rup", 0xbff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* nextafter(1.0) - DBL_EPSILON/2 (RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rmm", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* -1.0 - DBL_EPSILON/2 (RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2, rmm", 0xbff0000000000000,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+
+   /* nextafter(1.0) - DBL_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* 2nextafter(1.0) - DBL_EPSILON/2 (DYN-RNE) -> 2nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x3ff0000000000002,
+                  0x3ca0000000000000, 0x00, fa0, fa1, fa2);
+   /* nextafter(1.0) - DBL_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x20, fa0, fa1, fa2);
+   /* -1.0 - DBL_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x3ca0000000000000, 0x20, fa0, fa1, fa2);
+   /* nextafter(1.0) - DBL_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x40, fa0, fa1, fa2);
+   /* -1.0 - DBL_EPSILON/2 (DYN-RDN) -> -nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x3ca0000000000000, 0x40, fa0, fa1, fa2);
+   /* nextafter(1.0) - DBL_EPSILON/2 (DYN-RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x60, fa0, fa1, fa2);
+   /* -1.0 - DBL_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x3ca0000000000000, 0x60, fa0, fa1, fa2);
+   /* nextafter(1.0) - DBL_EPSILON/2 (DYN-RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0x3ff0000000000001,
+                  0x3ca0000000000000, 0x80, fa0, fa1, fa2);
+   /* -1.0 - DBL_EPSILON/2 (DYN-RMM) -> -nextafter(1.0) (NX) */
+   TESTINST_1_2_F(4, "fsub.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x3ca0000000000000, 0x80, fa0, fa1, fa2);
+
+   /* --------------- fmul.d rd, rs1, rs2, rm --------------- */
+   /* 2.0 * 1.0 -> 2.0 */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x4000000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 * 0.0 -> 0.0 */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x0000000000000000, 0x00, fa0, fa1, fa2);
+   /* 2**-537 * 2**-537 -> 2**-1074 aka DBL_TRUE_MIN (no UF because exact) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x1e60000000000000,
+                  0x1e60000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_MAX * DBL_MAX -> INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x7fefffffffffffff,
+                  0x7fefffffffffffff, 0x00, fa0, fa1, fa2);
+   /* DBL_MAX * -DBL_MAX -> -INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x7fefffffffffffff,
+                  0xffefffffffffffff, 0x00, fa0, fa1, fa2);
+   /* 1.0 * INFINITY -> INFINITY */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x7ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* 0.0 * INFINITY -> qNAN (NV) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x7ff0000000000000, 0x00, fa0, fa1, fa2);
+
+   /* DBL_TRUE_MIN * 0.5 (RNE) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rne", 0x0000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* 3*DBL_TRUE_MIN * 0.5 (RNE) -> 2*DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rne", 0x0000000000000003,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN * 0.5 (RTZ) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rtz", 0x0000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN * 0.5 (RTZ) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rtz", 0x8000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN * 0.5 (RDN) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rdn", 0x0000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN * 0.5 (RDN) -> -DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rdn", 0x8000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN * 0.5 (RUP) -> DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rup", 0x0000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN * 0.5 (RUP) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rup", 0x8000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN * 0.5 (RMM) -> DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rmm", 0x0000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN * 0.5 (RMM) -> -DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2, rmm", 0x8000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+
+   /* DBL_TRUE_MIN * 0.5 (DYN-RNE) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* 3*DBL_TRUE_MIN * 0.5 (DYN-RNE) -> 2*DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x0000000000000003,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN * 0.5 (DYN-RTZ) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x3fe0000000000000, 0x20, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN * 0.5 (DYN-RTZ) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x8000000000000001,
+                  0x3fe0000000000000, 0x20, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN * 0.5 (DYN-RDN) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x3fe0000000000000, 0x40, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN * 0.5 (DYN-RDN) -> -DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x8000000000000001,
+                  0x3fe0000000000000, 0x40, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN * 0.5 (DYN-RUP) -> DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x3fe0000000000000, 0x60, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN * 0.5 (DYN-RUP) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x8000000000000001,
+                  0x3fe0000000000000, 0x60, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN * 0.5 (DYN-RMM) -> DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x3fe0000000000000, 0x80, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN * 0.5 (DYN-RMM) -> -DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fmul.d fa0, fa1, fa2", 0x8000000000000001,
+                  0x3fe0000000000000, 0x80, fa0, fa1, fa2);
+
+   /* --------------- fdiv.d rd, rs1, rs2, rm --------------- */
+   /* 2.0 / 1.0 -> 2.0 */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x4000000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* 0.0 / 1.0 -> 0.0 */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 / 2**1023 -> 1**-1023 (no UF because exact) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x7fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_MAX / 0.5 -> INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x7fefffffffffffff,
+                  0x3fe0000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_MAX / -0.5 -> -INFINITY (OF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x7fefffffffffffff,
+                  0xbfe0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 / INFINITY -> 0.0 */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x7ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* 1.0 / 0.0 -> INFINITY (DZ) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x0000000000000000, 0x00, fa0, fa1, fa2);
+   /* 0.0 / 0.0 -> qNAN (NV) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x0000000000000000, 0x00, fa0, fa1, fa2);
+
+   /* DBL_TRUE_MIN / 2.0 (RNE) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rne", 0x0000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* 3*DBL_TRUE_MIN / 2.0 (RNE) -> 2*DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rne", 0x0000000000000003,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN / 2.0 (RTZ) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rtz", 0x0000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN / 2.0 (RTZ) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rtz", 0x8000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN / 2.0 (RDN) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rdn", 0x0000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN / 2.0 (RDN) -> -DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rdn", 0x8000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN / 2.0 (RUP) -> DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rup", 0x0000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN / 2.0 (RUP) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rup", 0x8000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN / 2.0 (RMM) -> DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rmm", 0x0000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN / 2.0 (RMM) -> -DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2, rmm", 0x8000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+
+   /* DBL_TRUE_MIN / 2.0 (DYN-RNE) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* 3*DBL_TRUE_MIN / 2.0 (DYN-RNE) -> 2*DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x0000000000000003,
+                  0x4000000000000000, 0x00, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN / 2.0 (DYN-RTZ) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x4000000000000000, 0x20, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN / 2.0 (DYN-RTZ) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x8000000000000001,
+                  0x4000000000000000, 0x20, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN / 2.0 (DYN-RDN) -> 0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x4000000000000000, 0x40, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN / 2.0 (DYN-RDN) -> -DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x8000000000000001,
+                  0x4000000000000000, 0x40, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN / 2.0 (DYN-RUP) -> DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x4000000000000000, 0x60, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN / 2.0 (DYN-RUP) -> -0.0 (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x8000000000000001,
+                  0x4000000000000000, 0x60, fa0, fa1, fa2);
+   /* DBL_TRUE_MIN / 2.0 (DYN-RMM) -> DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x0000000000000001,
+                  0x4000000000000000, 0x80, fa0, fa1, fa2);
+   /* -DBL_TRUE_MIN / 2.0 (DYN-RMM) -> -DBL_TRUE_MIN (UF, NX) */
+   TESTINST_1_2_F(4, "fdiv.d fa0, fa1, fa2", 0x8000000000000001,
+                  0x4000000000000000, 0x80, fa0, fa1, fa2);
+
+   /* ----------------- fsqrt.d rd, rs1, rm ----------------- */
+   /* sqrt(0.0) -> 0.0 */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x0000000000000000, 0x00, fa0, fa1);
+   /* sqrt(INFINITY) -> INFINITY */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x7ff0000000000000, 0x00, fa0, fa1);
+   /* sqrt(DBL_TRUE_MIN) -> 2**-537 */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x0000000000000001, 0x00, fa0, fa1);
+   /* sqrt(qNAN) -> qNAN */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x7ff8000000000000, 0x00, fa0, fa1);
+   /* sqrt(-1.0) -> qNAN (NV) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0xbff0000000000000, 0x00, fa0, fa1);
+
+   /* sqrt(nextafter(1.0)) (RNE) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rne", 0x3ff0000000000001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafter(1.0)) (RNE) -> nextafter(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rne", 0x3ff0000000000002, 0x00, fa0,
+                  fa1);
+   /* sqrt(nextafter(1.0)) (RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rtz", 0x3ff0000000000001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafter(1.0)) (RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rtz", 0x3ff0000000000002, 0x00, fa0,
+                  fa1);
+   /* sqrt(nextafter(1.0)) (RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rdn", 0x3ff0000000000001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafter(1.0)) (RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rdn", 0x3ff0000000000002, 0x00, fa0,
+                  fa1);
+   /* sqrt(nextafter(1.0)) (RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rup", 0x3ff0000000000001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafter(1.0)) (RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rup", 0x3ff0000000000002, 0x00, fa0,
+                  fa1);
+   /* sqrt(nextafter(1.0)) (RMM) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rmm", 0x3ff0000000000001, 0x00, fa0,
+                  fa1);
+   /* sqrt(2nextafter(1.0)) (RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1, rmm", 0x3ff0000000000002, 0x00, fa0,
+                  fa1);
+
+   /* sqrt(nextafter(1.0)) (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000001, 0x00, fa0, fa1);
+   /* sqrt(2nextafter(1.0)) (DYN-RNE) -> nextafter(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000002, 0x00, fa0, fa1);
+   /* sqrt(nextafter(1.0)) (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000001, 0x20, fa0, fa1);
+   /* sqrt(2nextafter(1.0)) (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000002, 0x20, fa0, fa1);
+   /* sqrt(nextafter(1.0)) (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000001, 0x40, fa0, fa1);
+   /* sqrt(2nextafter(1.0)) (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000002, 0x40, fa0, fa1);
+   /* sqrt(nextafter(1.0)) (DYN-RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000001, 0x60, fa0, fa1);
+   /* sqrt(2nextafter(1.0)) (DYN-RUP) -> nextafter(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000002, 0x60, fa0, fa1);
+   /* sqrt(nextafter(1.0)) (DYN-RMM) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000001, 0x80, fa0, fa1);
+   /* sqrt(2nextafter(1.0)) (DYN-RMM) -> nextafter(1.0) (NX) */
+   TESTINST_1_1_F(4, "fsqrt.d fa0, fa1", 0x3ff0000000000002, 0x80, fa0, fa1);
+
+   /* ---------------- fsgnj.d rd, rs1, rs2 ----------------- */
+   /* fmv.d rd, rs1 */
+   TESTINST_1_2_F(4, "fsgnj.d fa0, fa1, fa1", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa1);
+   TESTINST_1_2_F(4, "fsgnj.d fa0, fa1, fa1", 0xbff0000000000000,
+                  0xbff0000000000000, 0x00, fa0, fa1, fa1);
+
+   /* fsgnj(1.0, +) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnj.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x7fffffffffffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnj(1.0, -) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnj.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x8000000000000000, 0x00, fa0, fa1, fa2);
+   /* fsgnj(-1.0, +) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnj.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x7fffffffffffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnj(-1.0, -) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnj.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x8000000000000000, 0x00, fa0, fa1, fa2);
+
+   /* ---------------- fsgnjn.d rd, rs1, rs2 ---------------- */
+   /* fneg.d rd, rs1 */
+   TESTINST_1_2_F(4, "fsgnjn.d fa0, fa1, fa1", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa1);
+   TESTINST_1_2_F(4, "fsgnjn.d fa0, fa1, fa1", 0xbff0000000000000,
+                  0xbff0000000000000, 0x00, fa0, fa1, fa1);
+
+   /* fsgnjn(1.0, +) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnjn.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x7fffffffffffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnjn(1.0, -) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnjn.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x8000000000000000, 0x00, fa0, fa1, fa2);
+   /* fsgnjn(-1.0, +) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnjn.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x7fffffffffffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnjn(-1.0, -) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnjn.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x8000000000000000, 0x00, fa0, fa1, fa2);
+
+   /* ---------------- fsgnjx.d rd, rs1, rs2 ---------------- */
+   /* fabs.d rd, rs1 */
+   TESTINST_1_2_F(4, "fsgnjx.d fa0, fa1, fa1", 0x3ff0000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa1);
+   TESTINST_1_2_F(4, "fsgnjx.d fa0, fa1, fa1", 0xbff0000000000000,
+                  0xbff0000000000000, 0x00, fa0, fa1, fa1);
+
+   /* fsgnjx(1.0, +) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnjx.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x7fffffffffffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnjx(1.0, -) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnjx.d fa0, fa1, fa2", 0x3ff0000000000000,
+                  0x8000000000000000, 0x00, fa0, fa1, fa2);
+   /* fsgnjx(-1.0, +) -> -1.0 */
+   TESTINST_1_2_F(4, "fsgnjx.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x7fffffffffffffff, 0x00, fa0, fa1, fa2);
+   /* fsgnjx(-1.0, -) -> 1.0 */
+   TESTINST_1_2_F(4, "fsgnjx.d fa0, fa1, fa2", 0xbff0000000000000,
+                  0x8000000000000000, 0x00, fa0, fa1, fa2);
+
+   /* ----------------- fmin.d rd, rs1, rs2 ----------------- */
+   /* min(0.0, 1.0) -> 0.0 */
+   TESTINST_1_2_F(4, "fmin.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* min(0.0, -0.0) -> -0.0 */
+   TESTINST_1_2_F(4, "fmin.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x8000000000000000, 0x00, fa0, fa1, fa2);
+   /* min(-0.0, 0.0) -> -0.0 */
+   TESTINST_1_2_F(4, "fmin.d fa0, fa1, fa2", 0x8000000000000000,
+                  0x0000000000000000, 0x00, fa0, fa1, fa2);
+   /* min(INFINITY, INFINITY) -> INFINITY */
+   TESTINST_1_2_F(4, "fmin.d fa0, fa1, fa2", 0x7ff0000000000000,
+                  0x7ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* min(0.0, qNAN) -> 0.0 */
+   TESTINST_1_2_F(4, "fmin.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x7ff8000000000000, 0x00, fa0, fa1, fa2);
+   /* min(0.0, sNAN) -> 0.0 (NV) */
+   TESTINST_1_2_F(4, "fmin.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x7ff4000000000000, 0x00, fa0, fa1, fa2);
+
+   /* ----------------- fmax.d rd, rs1, rs2 ----------------- */
+   /* max(0.0, 1.0) -> 1.0 */
+   TESTINST_1_2_F(4, "fmax.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x3ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* max(0.0, -0.0) -> 0.0 */
+   TESTINST_1_2_F(4, "fmax.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x8000000000000000, 0x00, fa0, fa1, fa2);
+   /* max(-0.0, 0.0) -> 0.0 */
+   TESTINST_1_2_F(4, "fmax.d fa0, fa1, fa2", 0x8000000000000000,
+                  0x0000000000000000, 0x00, fa0, fa1, fa2);
+   /* max(INFINITY, INFINITY) -> INFINITY */
+   TESTINST_1_2_F(4, "fmax.d fa0, fa1, fa2", 0x7ff0000000000000,
+                  0x7ff0000000000000, 0x00, fa0, fa1, fa2);
+   /* max(0.0, qNAN) -> 0.0 */
+   TESTINST_1_2_F(4, "fmax.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x7ff8000000000000, 0x00, fa0, fa1, fa2);
+   /* max(0.0, sNAN) -> 0.0 (NV) */
+   TESTINST_1_2_F(4, "fmax.d fa0, fa1, fa2", 0x0000000000000000,
+                  0x7ff4000000000000, 0x00, fa0, fa1, fa2);
+
+   /* ---------------- fcvt.s.d rd, rs1, rm ----------------- */
+   /* 0.0 -> 0.0 */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x0000000000000000, 0x00, fa0, fa1);
+   /* DBL_TRUE_MIN -> 0.0 (UF, NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x0000000000000001, 0x00, fa0, fa1);
+   /* INFINITY -> INFINITY */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x7ff0000000000000, 0x00, fa0, fa1);
+   /* qNAN -> qNAN */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x7ff8000000000000, 0x00, fa0, fa1);
+   /* FLT_MAX -> FLT_MAX */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x47efffffe0000000, 0x00, fa0, fa1);
+   /* -FLT_MAX -> -FLT_MAX */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0xc7efffffe0000000, 0x00, fa0, fa1);
+   /* nextafter(FLT_MAX) -> FLT_MAX (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x47efffffe0000001, 0x00, fa0, fa1);
+   /* -nextafter(FLT_MAX) -> -FLT_MAX (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0xc7efffffe0000001, 0x00, fa0, fa1);
+   /* DBL_MAX -> FLT_MAX (OF, NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x7fefffffffffffff, 0x00, fa0, fa1);
+
+   /* 1.0 + FLT_EPSILON/2 (RNE) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rne", 0x3ff0000010000000, 0x00, fa0,
+                  fa1);
+   /* nextafterf(1.0) + FLT_EPSILON/2 (RNE) -> 2nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rne", 0x3ff0000030000000, 0x00, fa0,
+                  fa1);
+   /* 1.0 + FLT_EPSILON/2 (RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rtz", 0x3ff0000010000000, 0x00, fa0,
+                  fa1);
+   /* -1.0 + -FLT_EPSILON/2 (RTZ) -> -1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rtz", 0xbff0000010000000, 0x00, fa0,
+                  fa1);
+   /* 1.0 + FLT_EPSILON/2 (RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rdn", 0x3ff0000010000000, 0x00, fa0,
+                  fa1);
+   /* -1.0 + -FLT_EPSILON/2 (RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rdn", 0xbff0000010000000, 0x00, fa0,
+                  fa1);
+   /* 1.0 + FLT_EPSILON/2 (RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rup", 0x3ff0000010000000, 0x00, fa0,
+                  fa1);
+   /* -1.0 + -FLT_EPSILON/2 (RUP) -> -1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rup", 0xbff0000010000000, 0x00, fa0,
+                  fa1);
+   /* 1.0 + FLT_EPSILON/2 (RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rmm", 0x3ff0000010000000, 0x00, fa0,
+                  fa1);
+   /* -1.0 + -FLT_EPSILON/2 (RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1, rmm", 0xbff0000010000000, 0x00, fa0,
+                  fa1);
+
+   /* 1.0 + FLT_EPSILON/2 (DYN-RNE) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x3ff0000010000000, 0x00, fa0, fa1);
+   /* nextafterf(1.0) + FLT_EPSILON/2 (DYN-RNE) -> 2nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x3ff0000030000000, 0x00, fa0, fa1);
+   /* 1.0 + FLT_EPSILON/2 (DYN-RTZ) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x3ff0000010000000, 0x20, fa0, fa1);
+   /* -1.0 + -FLT_EPSILON/2 (DYN-RTZ) -> -1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0xbff0000010000000, 0x20, fa0, fa1);
+   /* 1.0 + FLT_EPSILON/2 (DYN-RDN) -> 1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x3ff0000010000000, 0x40, fa0, fa1);
+   /* -1.0 + -FLT_EPSILON/2 (DYN-RDN) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0xbff0000010000000, 0x40, fa0, fa1);
+   /* 1.0 + FLT_EPSILON/2 (DYN-RUP) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x3ff0000010000000, 0x60, fa0, fa1);
+   /* -1.0 + -FLT_EPSILON/2 (DYN-RUP) -> -1.0 (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0xbff0000010000000, 0x60, fa0, fa1);
+   /* 1.0 + FLT_EPSILON/2 (DYN-RMM) -> nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0x3ff0000010000000, 0x80, fa0, fa1);
+   /* -1.0 + -FLT_EPSILON/2 (DYN-RMM) -> -nextafterf(1.0) (NX) */
+   TESTINST_1_1_F(4, "fcvt.s.d fa0, fa1", 0xbff0000010000000, 0x80, fa0, fa1);
+
+   /* ---------------- fcvt.d.s rd, rs1, rm ----------------- */
+   /* 0.0 -> 0.0 */
+   TESTINST_1_1_F(4, "fcvt.d.s fa0, fa1", 0xffffffff00000000, 0x00, fa0, fa1);
+   /* FLT_TRUE_MIN -> FLT_TRUE_MIN */
+   TESTINST_1_1_F(4, "fcvt.d.s fa0, fa1", 0xffffffff00000001, 0x00, fa0, fa1);
+   /* INFINITY -> INFINITY */
+   TESTINST_1_1_F(4, "fcvt.d.s fa0, fa1", 0xffffffff7f800000, 0x00, fa0, fa1);
+   /* qNAN -> qNAN */
+   TESTINST_1_1_F(4, "fcvt.d.s fa0, fa1", 0xffffffff7fc00000, 0x00, fa0, fa1);
+   /* FLT_MAX -> FLT_MAX */
+   TESTINST_1_1_F(4, "fcvt.d.s fa0, fa1", 0xffffffff7f7fffff, 0x00, fa0, fa1);
+   /* -FLT_MAX -> -FLT_MAX */
+   TESTINST_1_1_F(4, "fcvt.d.s fa0, fa1", 0xffffffffff7fffff, 0x00, fa0, fa1);
+#if 0
+   /* TODO Implement correctly. */
+   /* non-NaN-boxed 0.0 -> qNaN */
+   TESTINST_1_1_F(4, "fcvt.d.s fa0, fa1", 0x0000000000000000, 0x00, fa0, fa1);
+#endif
+
+   /* ----------------- feq.d rd, rs1, rs2 ------------------ */
+   /* 0.0 == 1.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "feq.d a0, fa0, fa1", 0x0000000000000000,
+                     0x3ff0000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 == 0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "feq.d a0, fa0, fa1", 0x0000000000000000,
+                     0x0000000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 == -0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "feq.d a0, fa0, fa1", 0x0000000000000000,
+                     0x8000000000000000, 0x00, a0, fa0, fa1);
+   /* -0.0 == 0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "feq.d a0, fa0, fa1", 0x8000000000000000,
+                     0x0000000000000000, 0x00, a0, fa0, fa1);
+   /* INFINITY == INFINITY -> 1 */
+   TESTINST_1_2_FCMP(4, "feq.d a0, fa0, fa1", 0x7ff0000000000000,
+                     0x7ff0000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 == qNAN -> 0 */
+   TESTINST_1_2_FCMP(4, "feq.d a0, fa0, fa1", 0x0000000000000000,
+                     0x7ff8000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 == sNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "feq.d a0, fa0, fa1", 0x0000000000000000,
+                     0x7ff4000000000000, 0x00, a0, fa0, fa1);
+
+   /* sNAN == sNAN (rd=zero) -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "feq.d zero, fa0, fa1", 0x7ff4000000000000,
+                     0x7ff4000000000000, 0x00, zero, fa0, fa1);
+
+   /* ----------------- flt.d rd, rs1, rs2 ------------------ */
+   /* 0.0 < 0.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "flt.d a0, fa0, fa1", 0x0000000000000000,
+                     0x0000000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 < 1.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "flt.d a0, fa0, fa1", 0x0000000000000000,
+                     0x3ff0000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 < -0.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "flt.d a0, fa0, fa1", 0x0000000000000000,
+                     0x8000000000000000, 0x00, a0, fa0, fa1);
+   /* -0.0 < 0.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "flt.d a0, fa0, fa1", 0x8000000000000000,
+                     0x0000000000000000, 0x00, a0, fa0, fa1);
+   /* INFINITY < INFINITY -> 0 */
+   TESTINST_1_2_FCMP(4, "flt.d a0, fa0, fa1", 0x7ff0000000000000,
+                     0x7ff0000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 < qNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "flt.d a0, fa0, fa1", 0x0000000000000000,
+                     0x7ff8000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 < sNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "flt.d a0, fa0, fa1", 0x0000000000000000,
+                     0x7ff4000000000000, 0x00, a0, fa0, fa1);
+
+   /* sNAN < sNAN (rd=zero) -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "flt.d zero, fa0, fa1", 0x7ff4000000000000,
+                     0x7ff4000000000000, 0x00, zero, fa0, fa1);
+
+   /* ----------------- fle.d rd, rs1, rs2 ------------------ */
+   /* 1.0 < 0.0 -> 0 */
+   TESTINST_1_2_FCMP(4, "fle.d a0, fa0, fa1", 0x3ff0000000000000,
+                     0x0000000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= 0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.d a0, fa0, fa1", 0x0000000000000000,
+                     0x0000000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= 1.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.d a0, fa0, fa1", 0x0000000000000000,
+                     0x3ff0000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= -0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.d a0, fa0, fa1", 0x0000000000000000,
+                     0x8000000000000000, 0x00, a0, fa0, fa1);
+   /* -0.0 <= 0.0 -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.d a0, fa0, fa1", 0x8000000000000000,
+                     0x0000000000000000, 0x00, a0, fa0, fa1);
+   /* INFINITY <= INFINITY -> 1 */
+   TESTINST_1_2_FCMP(4, "fle.d a0, fa0, fa1", 0x7ff0000000000000,
+                     0x7ff0000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= qNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "fle.d a0, fa0, fa1", 0x0000000000000000,
+                     0x7ff8000000000000, 0x00, a0, fa0, fa1);
+   /* 0.0 <= sNAN -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "fle.d a0, fa0, fa1", 0x0000000000000000,
+                     0x7ff4000000000000, 0x00, a0, fa0, fa1);
+
+   /* sNAN <= sNAN (rd=zero) -> 0 (NV) */
+   TESTINST_1_2_FCMP(4, "fle.d zero, fa0, fa1", 0x7ff4000000000000,
+                     0x7ff4000000000000, 0x00, zero, fa0, fa1);
+
+   /* ------------------ fclass.d rd, rs1 ------------------- */
+   /* fclass(-INFINITY) -> 0x001 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0xfff0000000000000, 0x00, a0, fa0);
+   /* fclass(-1.0) -> 0x002 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0xbff0000000000000, 0x00, a0, fa0);
+   /* fclass(-DBL_TRUE_MIN) -> 0x004 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0x8000000000000001, 0x00, a0, fa0);
+   /* fclass(-0.0) -> 0x008 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0x8000000000000000, 0x00, a0, fa0);
+   /* fclass(0.0) -> 0x010 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0x0000000000000000, 0x00, a0, fa0);
+   /* fclass(DBL_TRUE_MIN) -> 0x020 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0x0000000000000001, 0x00, a0, fa0);
+   /* fclass(1.0) -> 0x040 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0x3ff0000000000000, 0x00, a0, fa0);
+   /* fclass(INFINITY) -> 0x080 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0x7ff0000000000000, 0x00, a0, fa0);
+   /* fclass(sNAN) -> 0x100 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0x7ff4000000000000, 0x00, a0, fa0);
+   /* fclass(qNAN) -> 0x200 */
+   TESTINST_1_1_IF(4, "fclass.d a0, fa0", 0x7ff8000000000000, 0x00, a0, fa0);
+
+   /* fclass(-INFINITY) (rd=zero) -> 0x000 */
+   TESTINST_1_1_IF(4, "fclass.d zero, fa0", 0xfff0000000000000, 0x00, zero,
+                   fa0);
+
+   /* ---------------- fcvt.w.d rd, rs1, rm ----------------- */
+   /* 0.0 -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x0000000000000000, 0x00, a0, fa0);
+   /* DBL_TRUE_MIN -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x0000000000000001, 0x00, a0, fa0);
+   /* INFINITY -> 2**31-1 aka INT_MAX (NV)  */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x7ff0000000000000, 0x00, a0, fa0);
+   /* qNAN -> 2**31-1 aka INT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x7ff8000000000000, 0x00, a0, fa0);
+   /* 2**31-1 -> 2**31-1 aka INT_MAX */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x41dfffffffc00000, 0x00, a0, fa0);
+   /* -2**31 -> -2**31 aka INT_MIN */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0xc1e0000000000000, 0x00, a0, fa0);
+   /* 2**31 -> 2**31-1 aka INT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x41e0000000000000, 0x00, a0, fa0);
+   /* -2**31-1 -> -2**31 aka INT_MIN (NV) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0xc1e0000000200000, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.w.d zero, fa0", 0x3ff0000000000000, 0x00, zero,
+                   fa0);
+
+   /* 0.5 (RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rne", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 1.5 (RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rne", 0x3ff8000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rtz", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rtz", 0xbfe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rdn", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RDN) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rdn", 0xbfe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rup", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RUP) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rup", 0xbfe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rmm", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RMM) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0, rmm", 0xbfe0000000000000, 0x00, a0,
+                   fa0);
+
+   /* 0.5 (DYN-RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x3fe0000000000000, 0x00, a0, fa0);
+   /* 1.5 (DYN-RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x3ff8000000000000, 0x00, a0, fa0);
+   /* 0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x3fe0000000000000, 0x20, a0, fa0);
+   /* -0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0xbfe0000000000000, 0x20, a0, fa0);
+   /* 0.5 (DYN-RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x3fe0000000000000, 0x40, a0, fa0);
+   /* -0.5 (DYN-RDN) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0xbfe0000000000000, 0x40, a0, fa0);
+   /* 0.5 (DYN-RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x3fe0000000000000, 0x60, a0, fa0);
+   /* -0.5 (DYN-RUP) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0xbfe0000000000000, 0x60, a0, fa0);
+   /* 0.5 (DYN-RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0x3fe0000000000000, 0x80, a0, fa0);
+   /* -0.5 (DYN-RMM) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.w.d a0, fa0", 0xbfe0000000000000, 0x80, a0, fa0);
+
+   /* ---------------- fcvt.wu.d rd, rs1, rm ---------------- */
+   /* 0.0 -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x0000000000000000, 0x00, a0, fa0);
+   /* DBL_TRUE_MIN -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x0000000000000001, 0x00, a0, fa0);
+   /* INFINITY -> 2**32-1 aka UINT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x7ff0000000000000, 0x00, a0, fa0);
+   /* qNAN -> 2**32-1 aka UINT_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x7ff8000000000000, 0x00, a0, fa0);
+   /* 2**32-1 -> 2**32-1 aka UINT_MAX */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x41efffffffe00000, 0x00, a0, fa0);
+   /* -1.0 -> 0 (NV) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0xbff0000000000000, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.wu.d zero, fa0", 0x3ff0000000000000, 0x00, zero,
+                   fa0);
+
+   /* 0.5 (RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0, rne", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 1.5 (RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0, rne", 0x3ff8000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0, rtz", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0, rdn", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0, rup", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0, rmm", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+
+   /* 0.5 (DYN-RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x3fe0000000000000, 0x00, a0, fa0);
+   /* 1.5 (DYN-RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x3ff8000000000000, 0x00, a0, fa0);
+   /* 0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x3fe0000000000000, 0x20, a0, fa0);
+   /* 0.5 (DYN-RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x3fe0000000000000, 0x40, a0, fa0);
+   /* 0.5 (DYN-RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x3fe0000000000000, 0x60, a0, fa0);
+   /* 0.5 (DYN-RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.wu.d a0, fa0", 0x3fe0000000000000, 0x80, a0, fa0);
+
+   /* ---------------- fcvt.d.w rd, rs1, rm ----------------- */
+   /* 0 -> 0.0 */
+   TESTINST_1_1_FI(4, "fcvt.d.w fa0, a0", 0x0000000000000000, 0x00, fa0, a0);
+   /* 2**31-1 aka INT_MAX -> 2**31-1 */
+   TESTINST_1_1_FI(4, "fcvt.d.w fa0, a0", 0x000000007fffffff, 0x00, fa0, a0);
+   /* -2**31 aka INT_MIN -> -2**31 */
+   TESTINST_1_1_FI(4, "fcvt.d.w fa0, a0", 0xffffffff80000000, 0x00, fa0, a0);
+
+   /* ---------------- fcvt.d.wu rd, rs1, rm ---------------- */
+   /* 0 -> 0.0 */
+   TESTINST_1_1_FI(4, "fcvt.d.wu fa0, a0", 0x0000000000000000, 0x00, fa0, a0);
+   /* 2**32-1 aka UINT_MAX -> 2**32-1 */
+   TESTINST_1_1_FI(4, "fcvt.d.wu fa0, a0", 0x00000000ffffffff, 0x00, fa0, a0);
+
+   printf("\n");
+}
+
+static void test_float64_additions(void)
+{
+   printf("RV64D double-precision FP instruction set, additions\n");
+
+   /* ---------------- fcvt.l.d rd, rs1, rm ----------------- */
+   /* 0.0 -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x0000000000000000, 0x00, a0, fa0);
+   /* DBL_TRUE_MIN -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x0000000000000001, 0x00, a0, fa0);
+   /* INFINITY -> 2**63-1 aka LONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x7ff0000000000000, 0x00, a0, fa0);
+   /* qNAN -> 2**63-1 aka LONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x7ff8000000000000, 0x00, a0, fa0);
+   /* nextafter(2**63, 0.0) -> 2**63-1024 */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x43dfffffffffffff, 0x00, a0, fa0);
+   /* -2**63 -> -2**63 aka LONG_MIN */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0xc3e0000000000000, 0x00, a0, fa0);
+   /* 2**63 -> 2**63-1 aka LONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x43e0000000000000, 0x00, a0, fa0);
+   /* -nextafter(2**63) -> -2**63 aka LONG_MIN (NV) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0xc3e0000000000001, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.l.d zero, fa0", 0x3ff0000000000000, 0x00, zero,
+                   fa0);
+
+   /* 0.5 (RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rne", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 1.5 (RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rne", 0x3ff8000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rtz", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rtz", 0xbfe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rdn", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RDN) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rdn", 0xbfe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rup", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RUP) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rup", 0xbfe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rmm", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* -0.5 (RMM) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0, rmm", 0xbfe0000000000000, 0x00, a0,
+                   fa0);
+
+   /* 0.5 (DYN-RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x3fe0000000000000, 0x00, a0, fa0);
+   /* 1.5 (DYN-RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x3ff8000000000000, 0x00, a0, fa0);
+   /* 0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x3fe0000000000000, 0x20, a0, fa0);
+   /* -0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0xbfe0000000000000, 0x20, a0, fa0);
+   /* 0.5 (DYN-RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x3fe0000000000000, 0x40, a0, fa0);
+   /* -0.5 (DYN-RDN) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0xbfe0000000000000, 0x40, a0, fa0);
+   /* 0.5 (DYN-RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x3fe0000000000000, 0x60, a0, fa0);
+   /* -0.5 (DYN-RUP) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0xbfe0000000000000, 0x60, a0, fa0);
+   /* 0.5 (DYN-RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0x3fe0000000000000, 0x80, a0, fa0);
+   /* -0.5 (DYN-RMM) -> -1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.l.d a0, fa0", 0xbfe0000000000000, 0x80, a0, fa0);
+
+   /* ---------------- fcvt.lu.d rd, rs1, rm ---------------- */
+   /* 0.0 -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x0000000000000000, 0x00, a0, fa0);
+   /* DBL_TRUE_MIN -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x0000000000000001, 0x00, a0, fa0);
+   /* INFINITY -> 2**64-1 aka ULONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x7ff0000000000000, 0x00, a0, fa0);
+   /* qNAN -> 2**64-1 aka ULONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x7ff8000000000000, 0x00, a0, fa0);
+   /* nextafter(2**64, 0.0) -> 2**63-2048 */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x43efffffffffffff, 0x00, a0, fa0);
+   /* 2**64 -> 2**64-1 aka ULONG_MAX (NV) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x43f0000000000000, 0x00, a0, fa0);
+   /* -1.0 -> 0 (NV) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0xbff0000000000000, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fcvt.lu.d zero, fa0", 0x3ff0000000000000, 0x00, zero,
+                   fa0);
+
+   /* 0.5 (RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0, rne", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 1.5 (RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0, rne", 0x3ff8000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0, rtz", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0, rdn", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0, rup", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+   /* 0.5 (RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0, rmm", 0x3fe0000000000000, 0x00, a0,
+                   fa0);
+
+   /* 0.5 (DYN-RNE) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x3fe0000000000000, 0x00, a0, fa0);
+   /* 1.5 (DYN-RNE) -> 2 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x3ff8000000000000, 0x00, a0, fa0);
+   /* 0.5 (DYN-RTZ) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x3fe0000000000000, 0x20, a0, fa0);
+   /* 0.5 (DYN-RDN) -> 0 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x3fe0000000000000, 0x40, a0, fa0);
+   /* 0.5 (DYN-RUP) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x3fe0000000000000, 0x60, a0, fa0);
+   /* 0.5 (DYN-RMM) -> 1 (NX) */
+   TESTINST_1_1_IF(4, "fcvt.lu.d a0, fa0", 0x3fe0000000000000, 0x80, a0, fa0);
+
+   /* ------------------- fmv.x.d rd, rs1 ------------------- */
+   TESTINST_1_1_IF(4, "fmv.x.d a0, fa0", 0xabcdef0123456789, 0x00, a0, fa0);
+
+   /* 1.0 (rd=zero) -> 0 */
+   TESTINST_1_1_IF(4, "fmv.x.d zero, fa0", 0x3ff0000000000000, 0x00, zero, fa0);
+
+   /* ---------------- fcvt.d.l rd, rs1, rm ----------------- */
+   /* 0 -> 0.0 */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x0000000000000000, 0x00, fa0, a0);
+   /* 2**63-1024 -> nextafter(2**63, 0.0) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x7ffffffffffffc00, 0x00, fa0, a0);
+   /* 2**63-1 aka LONG_MAX -> 2**63 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x7fffffffffffffff, 0x00, fa0, a0);
+   /* -2**63 aka LONG_MIN -> -2**63 */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x8000000000000000, 0x00, fa0, a0);
+
+   /* 2**53+1 (RNE) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rne", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* 2**53+3 (RNE) -> 2**53+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rne", 0x0020000000000003, 0x00, fa0,
+                   a0);
+   /* 2**53+1 (RTZ) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rtz", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* -2**53-1 (RTZ) -> -2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rtz", 0xffdfffffffffffff, 0x00, fa0,
+                   a0);
+   /* 2**53+1 (RDN) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rdn", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* -2**53-1 (RDN) -> -2**53-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rdn", 0xffdfffffffffffff, 0x00, fa0,
+                   a0);
+   /* 2**53+1 (RUP) -> 2**53+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rup", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* -2**53-1 (RUP) -> -2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rup", 0xffdfffffffffffff, 0x00, fa0,
+                   a0);
+   /* 2**53+1 (RMM) -> 2**53+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rmm", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* -2**53-1 (RMM) -> -2**53-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0, rmm", 0xffdfffffffffffff, 0x00, fa0,
+                   a0);
+
+   /* 2**53+1 (DYN-RNE) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x0020000000000001, 0x00, fa0, a0);
+   /* 2**53+3 (DYN-RNE) -> 2**53+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x0020000000000003, 0x00, fa0, a0);
+   /* 2**53+1 (DYN-RTZ) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x0020000000000001, 0x20, fa0, a0);
+   /* -2**53-1 (DYN-RTZ) -> -2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0xffdfffffffffffff, 0x20, fa0, a0);
+   /* 2**53+1 (DYN-RDN) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x0020000000000001, 0x40, fa0, a0);
+   /* -2**53-1 (DYN-RDN) -> -2**53-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0xffdfffffffffffff, 0x40, fa0, a0);
+   /* 2**53+1 (DYN-RUP) -> 2**53+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x0020000000000001, 0x60, fa0, a0);
+   /* -2**53-1 (DYN-RUP) -> -2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0xffdfffffffffffff, 0x60, fa0, a0);
+   /* 2**53+1 (DYN-RMM) -> 2**53+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0x0020000000000001, 0x80, fa0, a0);
+   /* -2**53-1 (DYN-RMM) -> -2**53-2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.l fa0, a0", 0xffdfffffffffffff, 0x80, fa0, a0);
+
+   /* ---------------- fcvt.d.lu rd, rs1, rm ---------------- */
+   /* 0 -> 0.0 */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0x0000000000000000, 0x00, fa0, a0);
+   /* 2**64-2048 -> nextafter(2**64, 0.0) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0xfffffffffffff800, 0x00, fa0, a0);
+   /* 2**64-1 aka ULONG_MAX -> 2**64 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0xffffffffffffffff, 0x00, fa0, a0);
+
+   /* 2**53+1 (RNE) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0, rne", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* 2**53+3 (RNE) -> 2**53+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0, rne", 0x0020000000000003, 0x00, fa0,
+                   a0);
+   /* 2**53+1 (RTZ) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0, rtz", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* 2**53+1 (RDN) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0, rdn", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* 2**53+1 (RUP) -> 2**53+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0, rup", 0x0020000000000001, 0x00, fa0,
+                   a0);
+   /* 2**53+1 (RMM) -> 2**53+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0, rmm", 0x0020000000000001, 0x00, fa0,
+                   a0);
+
+   /* 2**53+1 (DYN-RNE) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0x0020000000000001, 0x00, fa0, a0);
+   /* 2**53+3 (DYN-RNE) -> 2**53+4 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0x0020000000000003, 0x00, fa0, a0);
+   /* 2**53+1 (DYN-RTZ) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0x0020000000000001, 0x20, fa0, a0);
+   /* 2**53+1 (DYN-RDN) -> 2**53 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0x0020000000000001, 0x40, fa0, a0);
+   /* 2**53+1 (DYN-RUP) -> 2**53+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0x0020000000000001, 0x60, fa0, a0);
+   /* 2**53+1 (DYN-RMM) -> 2**53+2 (NX) */
+   TESTINST_1_1_FI(4, "fcvt.d.lu fa0, a0", 0x0020000000000001, 0x80, fa0, a0);
+
+   /* ------------------- fmv.d.x rd, rs1 ------------------- */
+   TESTINST_1_1_FI(4, "fmv.d.x fa0, a0", 0xabcdef0123456789, 0x00, fa0, a0);
+}
+
+int main(void)
+{
+   test_float64_shared();
+   test_float64_additions();
+   return 0;
+}
diff --git a/none/tests/riscv64/float64.stderr.exp b/none/tests/riscv64/float64.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/none/tests/riscv64/float64.stdout.exp b/none/tests/riscv64/float64.stdout.exp
new file mode 100644
index 000000000..894ccb539
--- /dev/null
+++ b/none/tests/riscv64/float64.stdout.exp
@@ -0,0 +1,1551 @@
+RV64D double-precision FP instruction set, shared operations
+fld fa0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xbeafe48541dc8da0
+  no memory changes
+fld fa0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xddcfb6e48aa9987b
+  no memory changes
+fld fa0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xd592cd65355229e8
+  no memory changes
+fld fa0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x2debefbafeb20cc3
+  no memory changes
+fld fa0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xcc833e0bdb9b22c9
+  no memory changes
+fld fa0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xb055d150879ff2e1
+  no memory changes
+fld fa0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xde42c04b593a2a60
+  no memory changes
+fld fa0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x4fd0ff83ec73d785
+  no memory changes
+fld fa0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x95cbb202622d87cb
+  no memory changes
+fld fa0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x4f68505bad1b44c8
+  no memory changes
+fld fa0, 2040(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x02cebbb5f39743f9
+  no memory changes
+fld fa0, -4(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0xf13ab1ea70a54ae6
+  no memory changes
+fld fa0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: fa0=0x2babb4d62d7e670a
+  no memory changes
+fld fa4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: fa4=0x1d620107e24b896b
+  no memory changes
+fsd fa0, 0(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa0, 4(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. 89 67 45 23 01 ef cd ab .. .. .. ..
+fsd fa0, 8(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+fsd fa0, 16(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa0, 32(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa0, 64(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa0, 128(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+128]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa0, 256(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+256]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa0, 512(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+512]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa0, 1024(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+1024]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa0, 2040(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [+2032]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+fsd fa0, -4(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [-016]  .. .. .. .. .. .. .. .. .. .. .. .. 89 67 45 23
+  [+000]  01 ef cd ab .. .. .. .. .. .. .. .. .. .. .. ..
+fsd fa0, -2048(a1) ::
+  inputs: fa0=0xabcdef0123456789, a1=&area_mid
+  [-2048]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fsd fa4, 0(a5) ::
+  inputs: fa4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4008000000000000, fa2=0x4000000000000000, fa3=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x401c000000000000, fcsr=0x00000000
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x0000000000000001, fa3=0x8000000000000001, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000000
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7fefffffffffffff, fa3=0xffefffffffffffff, fcsr=0x00000000
+  output: fa0=0x7fefffffffffffff, fcsr=0x00000000
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7fefffffffffffff, fa3=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000005
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7ff0000000000000, fa3=0xfff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fmadd.d fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0x3ff0000000000000, fcsr=0x00000021
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000020
+  output: fa0=0xbff0000000000000, fcsr=0x00000021
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0x3ff0000000000000, fcsr=0x00000041
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000040
+  output: fa0=0xbff0000000000001, fcsr=0x00000041
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0x3ff0000000000001, fcsr=0x00000061
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000060
+  output: fa0=0xbff0000000000000, fcsr=0x00000061
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0x3ff0000000000001, fcsr=0x00000081
+fmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000080
+  output: fa0=0xbff0000000000001, fcsr=0x00000081
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4008000000000000, fa2=0x4000000000000000, fa3=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x4014000000000000, fcsr=0x00000000
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x0000000000000001, fa3=0x0000000000000001, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000000
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7fefffffffffffff, fa3=0x7fefffffffffffff, fcsr=0x00000000
+  output: fa0=0x7fefffffffffffff, fcsr=0x00000000
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7fefffffffffffff, fa3=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000005
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7ff0000000000000, fa3=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fmsub.d fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000002, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000002, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0x3ff0000000000000, fcsr=0x00000021
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0xbff0000000000000, fcsr=0x00000021
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0x3ff0000000000000, fcsr=0x00000041
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0xbff0000000000001, fcsr=0x00000041
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0x3ff0000000000001, fcsr=0x00000061
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0xbff0000000000000, fcsr=0x00000061
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0x3ff0000000000001, fcsr=0x00000081
+fmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0xbff0000000000001, fcsr=0x00000081
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4008000000000000, fa2=0x4000000000000000, fa3=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0xc014000000000000, fcsr=0x00000000
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x0000000000000001, fa3=0x0000000000000001, fcsr=0x00000000
+  output: fa0=0x8000000000000001, fcsr=0x00000000
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7fefffffffffffff, fa3=0x7fefffffffffffff, fcsr=0x00000000
+  output: fa0=0xffefffffffffffff, fcsr=0x00000000
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7fefffffffffffff, fa3=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0xfff0000000000000, fcsr=0x00000005
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7ff0000000000000, fa3=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fnmsub.d fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0x3ff0000000000000, fcsr=0x00000021
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000020
+  output: fa0=0xbff0000000000000, fcsr=0x00000021
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0x3ff0000000000000, fcsr=0x00000041
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000040
+  output: fa0=0xbff0000000000001, fcsr=0x00000041
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0x3ff0000000000001, fcsr=0x00000061
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000060
+  output: fa0=0xbff0000000000000, fcsr=0x00000061
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0x3ff0000000000001, fcsr=0x00000081
+fnmsub.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbca0000000000000, fcsr=0x00000080
+  output: fa0=0xbff0000000000001, fcsr=0x00000081
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4008000000000000, fa2=0x4000000000000000, fa3=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0xc01c000000000000, fcsr=0x00000000
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0xbff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x0000000000000001, fa3=0x8000000000000001, fcsr=0x00000000
+  output: fa0=0x8000000000000001, fcsr=0x00000000
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7fefffffffffffff, fa3=0xffefffffffffffff, fcsr=0x00000000
+  output: fa0=0xffefffffffffffff, fcsr=0x00000000
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7fefffffffffffff, fa3=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0xfff0000000000000, fcsr=0x00000005
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x4000000000000000, fa2=0x7ff0000000000000, fa3=0xfff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fnmadd.d fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rne ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000002, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rtz ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rdn ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rup ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3, rmm ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000002, fa3=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0x3ff0000000000000, fcsr=0x00000021
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0xbff0000000000000, fcsr=0x00000021
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0x3ff0000000000000, fcsr=0x00000041
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0xbff0000000000001, fcsr=0x00000041
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0x3ff0000000000001, fcsr=0x00000061
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0xbff0000000000000, fcsr=0x00000061
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ff0000000000001, fa3=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0x3ff0000000000001, fcsr=0x00000081
+fnmadd.d fa0, fa1, fa2, fa3 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fa3=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0xbff0000000000001, fcsr=0x00000081
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x4000000000000000, fa2=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x4008000000000000, fcsr=0x00000000
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0xbff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x0000000000000001, fcsr=0x00000000
+  output: fa0=0x0000000000000002, fcsr=0x00000000
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7fefffffffffffff, fa2=0x7fefffffffffffff, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000005
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0xffefffffffffffff, fa2=0xffefffffffffffff, fcsr=0x00000000
+  output: fa0=0xfff0000000000000, fcsr=0x00000005
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0010000000000001, fa2=0x8010000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000000
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7ff0000000000000, fa2=0xfff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fadd.d fa0, fa1, fa2, rne ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rne ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rtz ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xbff0000000000000, fa2=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rdn ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xbff0000000000000, fa2=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rup ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rup ::
+  inputs: fa1=0xbff0000000000000, fa2=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rmm ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fadd.d fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xbff0000000000000, fa2=0xbca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0x3ff0000000000000, fcsr=0x00000021
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0xbca0000000000000, fcsr=0x00000020
+  output: fa0=0xbff0000000000000, fcsr=0x00000021
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0x3ff0000000000000, fcsr=0x00000041
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0xbca0000000000000, fcsr=0x00000040
+  output: fa0=0xbff0000000000001, fcsr=0x00000041
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0x3ff0000000000001, fcsr=0x00000061
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0xbca0000000000000, fcsr=0x00000060
+  output: fa0=0xbff0000000000000, fcsr=0x00000061
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0x3ff0000000000001, fcsr=0x00000081
+fadd.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0xbca0000000000000, fcsr=0x00000080
+  output: fa0=0xbff0000000000001, fcsr=0x00000081
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x4000000000000000, fa2=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x8000000000000001, fcsr=0x00000000
+  output: fa0=0x0000000000000002, fcsr=0x00000000
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7fefffffffffffff, fa2=0xffefffffffffffff, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000005
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0xffefffffffffffff, fa2=0x7fefffffffffffff, fcsr=0x00000000
+  output: fa0=0xfff0000000000000, fcsr=0x00000005
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0010000000000001, fa2=0x0010000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000000
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7ff0000000000000, fa2=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fsub.d fa0, fa1, fa2, rne ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rne ::
+  inputs: fa1=0x3ff0000000000002, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rtz ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rtz ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rdn ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rdn ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rup ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rup ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rmm ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fsub.d fa0, fa1, fa2, rmm ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000001, fcsr=0x00000001
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000002, fa2=0x3ca0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000002, fcsr=0x00000001
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0x3ff0000000000000, fcsr=0x00000021
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000020
+  output: fa0=0xbff0000000000000, fcsr=0x00000021
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0x3ff0000000000000, fcsr=0x00000041
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000040
+  output: fa0=0xbff0000000000001, fcsr=0x00000041
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0x3ff0000000000001, fcsr=0x00000061
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000060
+  output: fa0=0xbff0000000000000, fcsr=0x00000061
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000001, fa2=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0x3ff0000000000001, fcsr=0x00000081
+fsub.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x3ca0000000000000, fcsr=0x00000080
+  output: fa0=0xbff0000000000001, fcsr=0x00000081
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x4000000000000000, fa2=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x4000000000000000, fcsr=0x00000000
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x1e60000000000000, fa2=0x1e60000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000000
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7fefffffffffffff, fa2=0x7fefffffffffffff, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000005
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7fefffffffffffff, fa2=0xffefffffffffffff, fcsr=0x00000000
+  output: fa0=0xfff0000000000000, fcsr=0x00000005
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000000
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fmul.d fa0, fa1, fa2, rne ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rne ::
+  inputs: fa1=0x0000000000000003, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000002, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rtz ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rtz ::
+  inputs: fa1=0x8000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000000, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rdn ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rdn ::
+  inputs: fa1=0x8000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000001, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rup ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rup ::
+  inputs: fa1=0x8000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000000, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rmm ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000003
+fmul.d fa0, fa1, fa2, rmm ::
+  inputs: fa1=0x8000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000001, fcsr=0x00000003
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000003
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000003, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000002, fcsr=0x00000003
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000020
+  output: fa0=0x0000000000000000, fcsr=0x00000023
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000020
+  output: fa0=0x8000000000000000, fcsr=0x00000023
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000040
+  output: fa0=0x0000000000000000, fcsr=0x00000043
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000040
+  output: fa0=0x8000000000000001, fcsr=0x00000043
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000060
+  output: fa0=0x0000000000000001, fcsr=0x00000063
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000060
+  output: fa0=0x8000000000000000, fcsr=0x00000063
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000080
+  output: fa0=0x0000000000000001, fcsr=0x00000083
+fmul.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000001, fa2=0x3fe0000000000000, fcsr=0x00000080
+  output: fa0=0x8000000000000001, fcsr=0x00000083
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x4000000000000000, fa2=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x4000000000000000, fcsr=0x00000000
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x7fe0000000000000, fcsr=0x00000000
+  output: fa0=0x0008000000000000, fcsr=0x00000000
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7fefffffffffffff, fa2=0x3fe0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000005
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7fefffffffffffff, fa2=0xbfe0000000000000, fcsr=0x00000000
+  output: fa0=0xfff0000000000000, fcsr=0x00000005
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000008
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fdiv.d fa0, fa1, fa2, rne ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rne ::
+  inputs: fa1=0x0000000000000003, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000002, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rtz ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rtz ::
+  inputs: fa1=0x8000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000000, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rdn ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rdn ::
+  inputs: fa1=0x8000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000001, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rup ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rup ::
+  inputs: fa1=0x8000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000000, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rmm ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000001, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2, rmm ::
+  inputs: fa1=0x8000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000001, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000003, fa2=0x4000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000002, fcsr=0x00000003
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000020
+  output: fa0=0x0000000000000000, fcsr=0x00000023
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000001, fa2=0x4000000000000000, fcsr=0x00000020
+  output: fa0=0x8000000000000000, fcsr=0x00000023
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000040
+  output: fa0=0x0000000000000000, fcsr=0x00000043
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000001, fa2=0x4000000000000000, fcsr=0x00000040
+  output: fa0=0x8000000000000001, fcsr=0x00000043
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000060
+  output: fa0=0x0000000000000001, fcsr=0x00000063
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000001, fa2=0x4000000000000000, fcsr=0x00000060
+  output: fa0=0x8000000000000000, fcsr=0x00000063
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000001, fa2=0x4000000000000000, fcsr=0x00000080
+  output: fa0=0x0000000000000001, fcsr=0x00000083
+fdiv.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000001, fa2=0x4000000000000000, fcsr=0x00000080
+  output: fa0=0x8000000000000001, fcsr=0x00000083
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000000
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x0000000000000001, fcsr=0x00000000
+  output: fa0=0x1e60000000000000, fcsr=0x00000000
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x7ff8000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000000
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0xbff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000010
+fsqrt.d fa0, fa1, rne ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsqrt.d fa0, fa1, rne ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fsqrt.d fa0, fa1, rtz ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsqrt.d fa0, fa1, rtz ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsqrt.d fa0, fa1, rdn ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsqrt.d fa0, fa1, rdn ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsqrt.d fa0, fa1, rup ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fsqrt.d fa0, fa1, rup ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fsqrt.d fa0, fa1, rmm ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsqrt.d fa0, fa1, rmm ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000001
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000000
+  output: fa0=0x3ff0000000000001, fcsr=0x00000001
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000020
+  output: fa0=0x3ff0000000000000, fcsr=0x00000021
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000020
+  output: fa0=0x3ff0000000000000, fcsr=0x00000021
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000040
+  output: fa0=0x3ff0000000000000, fcsr=0x00000041
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000040
+  output: fa0=0x3ff0000000000000, fcsr=0x00000041
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000060
+  output: fa0=0x3ff0000000000001, fcsr=0x00000061
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000060
+  output: fa0=0x3ff0000000000001, fcsr=0x00000061
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000001, fcsr=0x00000080
+  output: fa0=0x3ff0000000000000, fcsr=0x00000081
+fsqrt.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000000000002, fcsr=0x00000080
+  output: fa0=0x3ff0000000000001, fcsr=0x00000081
+fsgnj.d fa0, fa1, fa1 ::
+  inputs: fa1=0x3ff0000000000000, fa1=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnj.d fa0, fa1, fa1 ::
+  inputs: fa1=0xbff0000000000000, fa1=0xbff0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000000
+fsgnj.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x7fffffffffffffff, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnj.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000000
+fsgnj.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x7fffffffffffffff, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnj.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000000
+fsgnjn.d fa0, fa1, fa1 ::
+  inputs: fa1=0x3ff0000000000000, fa1=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000000
+fsgnjn.d fa0, fa1, fa1 ::
+  inputs: fa1=0xbff0000000000000, fa1=0xbff0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnjn.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x7fffffffffffffff, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000000
+fsgnjn.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnjn.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x7fffffffffffffff, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000000
+fsgnjn.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnjx.d fa0, fa1, fa1 ::
+  inputs: fa1=0x3ff0000000000000, fa1=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnjx.d fa0, fa1, fa1 ::
+  inputs: fa1=0xbff0000000000000, fa1=0xbff0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnjx.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x7fffffffffffffff, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fsgnjx.d fa0, fa1, fa2 ::
+  inputs: fa1=0x3ff0000000000000, fa2=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000000
+fsgnjx.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x7fffffffffffffff, fcsr=0x00000000
+  output: fa0=0xbff0000000000000, fcsr=0x00000000
+fsgnjx.d fa0, fa1, fa2 ::
+  inputs: fa1=0xbff0000000000000, fa2=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fmin.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fmin.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000000, fcsr=0x00000000
+fmin.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000000, fa2=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x8000000000000000, fcsr=0x00000000
+fmin.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7ff0000000000000, fa2=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000000
+fmin.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x7ff8000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fmin.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x7ff4000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000010
+fmax.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x3ff0000000000000, fcsr=0x00000000
+  output: fa0=0x3ff0000000000000, fcsr=0x00000000
+fmax.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fmax.d fa0, fa1, fa2 ::
+  inputs: fa1=0x8000000000000000, fa2=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fmax.d fa0, fa1, fa2 ::
+  inputs: fa1=0x7ff0000000000000, fa2=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000000
+fmax.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x7ff8000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fmax.d fa0, fa1, fa2 ::
+  inputs: fa1=0x0000000000000000, fa2=0x7ff4000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000010
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000000
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x0000000000000001, fcsr=0x00000000
+  output: fa0=0xffffffff00000000, fcsr=0x00000003
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x7ff0000000000000, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000000
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x7ff8000000000000, fcsr=0x00000000
+  output: fa0=0xffffffff7fc00000, fcsr=0x00000000
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x47efffffe0000000, fcsr=0x00000000
+  output: fa0=0xffffffff7f7fffff, fcsr=0x00000000
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0xc7efffffe0000000, fcsr=0x00000000
+  output: fa0=0xffffffffff7fffff, fcsr=0x00000000
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x47efffffe0000001, fcsr=0x00000000
+  output: fa0=0xffffffff7f7fffff, fcsr=0x00000001
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0xc7efffffe0000001, fcsr=0x00000000
+  output: fa0=0xffffffffff7fffff, fcsr=0x00000001
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x7fefffffffffffff, fcsr=0x00000000
+  output: fa0=0xffffffff7f800000, fcsr=0x00000005
+fcvt.s.d fa0, fa1, rne ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rne ::
+  inputs: fa1=0x3ff0000030000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rtz ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rtz ::
+  inputs: fa1=0xbff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rdn ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rdn ::
+  inputs: fa1=0xbff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rup ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rup ::
+  inputs: fa1=0xbff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800000, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rmm ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800001, fcsr=0x00000001
+fcvt.s.d fa0, fa1, rmm ::
+  inputs: fa1=0xbff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffffbf800001, fcsr=0x00000001
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800000, fcsr=0x00000001
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000030000000, fcsr=0x00000000
+  output: fa0=0xffffffff3f800002, fcsr=0x00000001
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000020
+  output: fa0=0xffffffff3f800000, fcsr=0x00000021
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0xbff0000010000000, fcsr=0x00000020
+  output: fa0=0xffffffffbf800000, fcsr=0x00000021
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000040
+  output: fa0=0xffffffff3f800000, fcsr=0x00000041
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0xbff0000010000000, fcsr=0x00000040
+  output: fa0=0xffffffffbf800001, fcsr=0x00000041
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000060
+  output: fa0=0xffffffff3f800001, fcsr=0x00000061
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0xbff0000010000000, fcsr=0x00000060
+  output: fa0=0xffffffffbf800000, fcsr=0x00000061
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0x3ff0000010000000, fcsr=0x00000080
+  output: fa0=0xffffffff3f800001, fcsr=0x00000081
+fcvt.s.d fa0, fa1 ::
+  inputs: fa1=0xbff0000010000000, fcsr=0x00000080
+  output: fa0=0xffffffffbf800001, fcsr=0x00000081
+fcvt.d.s fa0, fa1 ::
+  inputs: fa1=0xffffffff00000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fcvt.d.s fa0, fa1 ::
+  inputs: fa1=0xffffffff00000001, fcsr=0x00000000
+  output: fa0=0x36a0000000000000, fcsr=0x00000000
+fcvt.d.s fa0, fa1 ::
+  inputs: fa1=0xffffffff7f800000, fcsr=0x00000000
+  output: fa0=0x7ff0000000000000, fcsr=0x00000000
+fcvt.d.s fa0, fa1 ::
+  inputs: fa1=0xffffffff7fc00000, fcsr=0x00000000
+  output: fa0=0x7ff8000000000000, fcsr=0x00000000
+fcvt.d.s fa0, fa1 ::
+  inputs: fa1=0xffffffff7f7fffff, fcsr=0x00000000
+  output: fa0=0x47efffffe0000000, fcsr=0x00000000
+fcvt.d.s fa0, fa1 ::
+  inputs: fa1=0xffffffffff7fffff, fcsr=0x00000000
+  output: fa0=0xc7efffffe0000000, fcsr=0x00000000
+feq.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x3ff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+feq.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+feq.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x8000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+feq.d a0, fa0, fa1 ::
+  inputs: fa0=0x8000000000000000, fa1=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+feq.d a0, fa0, fa1 ::
+  inputs: fa0=0x7ff0000000000000, fa1=0x7ff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+feq.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x7ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+feq.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x7ff4000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+feq.d zero, fa0, fa1 ::
+  inputs: fa0=0x7ff4000000000000, fa1=0x7ff4000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000010
+flt.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+flt.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x3ff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+flt.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x8000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+flt.d a0, fa0, fa1 ::
+  inputs: fa0=0x8000000000000000, fa1=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+flt.d a0, fa0, fa1 ::
+  inputs: fa0=0x7ff0000000000000, fa1=0x7ff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+flt.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x7ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+flt.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x7ff4000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+flt.d zero, fa0, fa1 ::
+  inputs: fa0=0x7ff4000000000000, fa1=0x7ff4000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000010
+fle.d a0, fa0, fa1 ::
+  inputs: fa0=0x3ff0000000000000, fa1=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fle.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x3ff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x8000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.d a0, fa0, fa1 ::
+  inputs: fa0=0x8000000000000000, fa1=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.d a0, fa0, fa1 ::
+  inputs: fa0=0x7ff0000000000000, fa1=0x7ff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fle.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x7ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+fle.d a0, fa0, fa1 ::
+  inputs: fa0=0x0000000000000000, fa1=0x7ff4000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+fle.d zero, fa0, fa1 ::
+  inputs: fa0=0x7ff4000000000000, fa1=0x7ff4000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000010
+fclass.d a0, fa0 ::
+  inputs: fa0=0xfff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0xbff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0x8000000000000001, fcsr=0x00000000
+  output: a0=0x0000000000000004, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0x8000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000008, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000010, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0x0000000000000001, fcsr=0x00000000
+  output: a0=0x0000000000000020, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0x3ff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000040, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0x7ff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000080, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0x7ff4000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000100, fcsr=0x00000000
+fclass.d a0, fa0 ::
+  inputs: fa0=0x7ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000200, fcsr=0x00000000
+fclass.d zero, fa0 ::
+  inputs: fa0=0xfff0000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x0000000000000001, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x7ff0000000000000, fcsr=0x00000000
+  output: a0=0x000000007fffffff, fcsr=0x00000010
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x7ff8000000000000, fcsr=0x00000000
+  output: a0=0x000000007fffffff, fcsr=0x00000010
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x41dfffffffc00000, fcsr=0x00000000
+  output: a0=0x000000007fffffff, fcsr=0x00000000
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0xc1e0000000000000, fcsr=0x00000000
+  output: a0=0xffffffff80000000, fcsr=0x00000000
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x41e0000000000000, fcsr=0x00000000
+  output: a0=0x000000007fffffff, fcsr=0x00000010
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0xc1e0000000200000, fcsr=0x00000000
+  output: a0=0xffffffff80000000, fcsr=0x00000010
+fcvt.w.d zero, fa0 ::
+  inputs: fa0=0x3ff0000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.w.d a0, fa0, rne ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.d a0, fa0, rne ::
+  inputs: fa0=0x3ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.w.d a0, fa0, rtz ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.d a0, fa0, rtz ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.d a0, fa0, rdn ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.d a0, fa0, rdn ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000001
+fcvt.w.d a0, fa0, rup ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.w.d a0, fa0, rup ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.d a0, fa0, rmm ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.w.d a0, fa0, rmm ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000001
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x3ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000040
+  output: a0=0x0000000000000000, fcsr=0x00000041
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000040
+  output: a0=0xffffffffffffffff, fcsr=0x00000041
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000060
+  output: a0=0x0000000000000001, fcsr=0x00000061
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000060
+  output: a0=0x0000000000000000, fcsr=0x00000061
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000080
+  output: a0=0x0000000000000001, fcsr=0x00000081
+fcvt.w.d a0, fa0 ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000080
+  output: a0=0xffffffffffffffff, fcsr=0x00000081
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x0000000000000001, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x7ff0000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x7ff8000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x41efffffffe00000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000000
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0xbff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+fcvt.wu.d zero, fa0 ::
+  inputs: fa0=0x3ff0000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.wu.d a0, fa0, rne ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.d a0, fa0, rne ::
+  inputs: fa0=0x3ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.wu.d a0, fa0, rtz ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.d a0, fa0, rdn ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.d a0, fa0, rup ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.wu.d a0, fa0, rmm ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x3ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000040
+  output: a0=0x0000000000000000, fcsr=0x00000041
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000060
+  output: a0=0x0000000000000001, fcsr=0x00000061
+fcvt.wu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000080
+  output: a0=0x0000000000000001, fcsr=0x00000081
+fcvt.d.w fa0, a0 ::
+  inputs: a0=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fcvt.d.w fa0, a0 ::
+  inputs: a0=0x000000007fffffff, fcsr=0x00000000
+  output: fa0=0x41dfffffffc00000, fcsr=0x00000000
+fcvt.d.w fa0, a0 ::
+  inputs: a0=0xffffffff80000000, fcsr=0x00000000
+  output: fa0=0xc1e0000000000000, fcsr=0x00000000
+fcvt.d.wu fa0, a0 ::
+  inputs: a0=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fcvt.d.wu fa0, a0 ::
+  inputs: a0=0x00000000ffffffff, fcsr=0x00000000
+  output: fa0=0x41efffffffe00000, fcsr=0x00000000
+
+RV64D double-precision FP instruction set, additions
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x0000000000000001, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x7ff0000000000000, fcsr=0x00000000
+  output: a0=0x7fffffffffffffff, fcsr=0x00000010
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x7ff8000000000000, fcsr=0x00000000
+  output: a0=0x7fffffffffffffff, fcsr=0x00000010
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x43dfffffffffffff, fcsr=0x00000000
+  output: a0=0x7ffffffffffffc00, fcsr=0x00000000
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0xc3e0000000000000, fcsr=0x00000000
+  output: a0=0x8000000000000000, fcsr=0x00000000
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x43e0000000000000, fcsr=0x00000000
+  output: a0=0x7fffffffffffffff, fcsr=0x00000010
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0xc3e0000000000001, fcsr=0x00000000
+  output: a0=0x8000000000000000, fcsr=0x00000010
+fcvt.l.d zero, fa0 ::
+  inputs: fa0=0x3ff0000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.l.d a0, fa0, rne ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.d a0, fa0, rne ::
+  inputs: fa0=0x3ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.l.d a0, fa0, rtz ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.d a0, fa0, rtz ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.d a0, fa0, rdn ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.d a0, fa0, rdn ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000001
+fcvt.l.d a0, fa0, rup ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.l.d a0, fa0, rup ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.d a0, fa0, rmm ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.l.d a0, fa0, rmm ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000001
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x3ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000040
+  output: a0=0x0000000000000000, fcsr=0x00000041
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000040
+  output: a0=0xffffffffffffffff, fcsr=0x00000041
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000060
+  output: a0=0x0000000000000001, fcsr=0x00000061
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000060
+  output: a0=0x0000000000000000, fcsr=0x00000061
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000080
+  output: a0=0x0000000000000001, fcsr=0x00000081
+fcvt.l.d a0, fa0 ::
+  inputs: fa0=0xbfe0000000000000, fcsr=0x00000080
+  output: a0=0xffffffffffffffff, fcsr=0x00000081
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x0000000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000000
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x0000000000000001, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x7ff0000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x7ff8000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x43efffffffffffff, fcsr=0x00000000
+  output: a0=0xfffffffffffff800, fcsr=0x00000000
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x43f0000000000000, fcsr=0x00000000
+  output: a0=0xffffffffffffffff, fcsr=0x00000010
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0xbff0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000010
+fcvt.lu.d zero, fa0 ::
+  inputs: fa0=0x3ff0000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.lu.d a0, fa0, rne ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.d a0, fa0, rne ::
+  inputs: fa0=0x3ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.lu.d a0, fa0, rtz ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.d a0, fa0, rdn ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.d a0, fa0, rup ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.lu.d a0, fa0, rmm ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000001, fcsr=0x00000001
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000000, fcsr=0x00000001
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x3ff8000000000000, fcsr=0x00000000
+  output: a0=0x0000000000000002, fcsr=0x00000001
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000020
+  output: a0=0x0000000000000000, fcsr=0x00000021
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000040
+  output: a0=0x0000000000000000, fcsr=0x00000041
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000060
+  output: a0=0x0000000000000001, fcsr=0x00000061
+fcvt.lu.d a0, fa0 ::
+  inputs: fa0=0x3fe0000000000000, fcsr=0x00000080
+  output: a0=0x0000000000000001, fcsr=0x00000081
+fmv.x.d a0, fa0 ::
+  inputs: fa0=0xabcdef0123456789, fcsr=0x00000000
+  output: a0=0xabcdef0123456789, fcsr=0x00000000
+fmv.x.d zero, fa0 ::
+  inputs: fa0=0x3ff0000000000000, fcsr=0x00000000
+  output: zero=0x0000000000000000, fcsr=0x00000000
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x7ffffffffffffc00, fcsr=0x00000000
+  output: fa0=0x43dfffffffffffff, fcsr=0x00000000
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x7fffffffffffffff, fcsr=0x00000000
+  output: fa0=0x43e0000000000000, fcsr=0x00000001
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x8000000000000000, fcsr=0x00000000
+  output: fa0=0xc3e0000000000000, fcsr=0x00000000
+fcvt.d.l fa0, a0, rne ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000000, fcsr=0x00000001
+fcvt.d.l fa0, a0, rne ::
+  inputs: a0=0x0020000000000003, fcsr=0x00000000
+  output: fa0=0x4340000000000002, fcsr=0x00000001
+fcvt.d.l fa0, a0, rtz ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000000, fcsr=0x00000001
+fcvt.d.l fa0, a0, rtz ::
+  inputs: a0=0xffdfffffffffffff, fcsr=0x00000000
+  output: fa0=0xc340000000000000, fcsr=0x00000001
+fcvt.d.l fa0, a0, rdn ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000000, fcsr=0x00000001
+fcvt.d.l fa0, a0, rdn ::
+  inputs: a0=0xffdfffffffffffff, fcsr=0x00000000
+  output: fa0=0xc340000000000001, fcsr=0x00000001
+fcvt.d.l fa0, a0, rup ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000001, fcsr=0x00000001
+fcvt.d.l fa0, a0, rup ::
+  inputs: a0=0xffdfffffffffffff, fcsr=0x00000000
+  output: fa0=0xc340000000000000, fcsr=0x00000001
+fcvt.d.l fa0, a0, rmm ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000001, fcsr=0x00000001
+fcvt.d.l fa0, a0, rmm ::
+  inputs: a0=0xffdfffffffffffff, fcsr=0x00000000
+  output: fa0=0xc340000000000001, fcsr=0x00000001
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000000, fcsr=0x00000001
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x0020000000000003, fcsr=0x00000000
+  output: fa0=0x4340000000000002, fcsr=0x00000001
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000020
+  output: fa0=0x4340000000000000, fcsr=0x00000021
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0xffdfffffffffffff, fcsr=0x00000020
+  output: fa0=0xc340000000000000, fcsr=0x00000021
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000040
+  output: fa0=0x4340000000000000, fcsr=0x00000041
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0xffdfffffffffffff, fcsr=0x00000040
+  output: fa0=0xc340000000000001, fcsr=0x00000041
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000060
+  output: fa0=0x4340000000000001, fcsr=0x00000061
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0xffdfffffffffffff, fcsr=0x00000060
+  output: fa0=0xc340000000000000, fcsr=0x00000061
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000080
+  output: fa0=0x4340000000000001, fcsr=0x00000081
+fcvt.d.l fa0, a0 ::
+  inputs: a0=0xffdfffffffffffff, fcsr=0x00000080
+  output: fa0=0xc340000000000001, fcsr=0x00000081
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0x0000000000000000, fcsr=0x00000000
+  output: fa0=0x0000000000000000, fcsr=0x00000000
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0xfffffffffffff800, fcsr=0x00000000
+  output: fa0=0x43efffffffffffff, fcsr=0x00000000
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0xffffffffffffffff, fcsr=0x00000000
+  output: fa0=0x43f0000000000000, fcsr=0x00000001
+fcvt.d.lu fa0, a0, rne ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000000, fcsr=0x00000001
+fcvt.d.lu fa0, a0, rne ::
+  inputs: a0=0x0020000000000003, fcsr=0x00000000
+  output: fa0=0x4340000000000002, fcsr=0x00000001
+fcvt.d.lu fa0, a0, rtz ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000000, fcsr=0x00000001
+fcvt.d.lu fa0, a0, rdn ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000000, fcsr=0x00000001
+fcvt.d.lu fa0, a0, rup ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000001, fcsr=0x00000001
+fcvt.d.lu fa0, a0, rmm ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000001, fcsr=0x00000001
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000000
+  output: fa0=0x4340000000000000, fcsr=0x00000001
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0x0020000000000003, fcsr=0x00000000
+  output: fa0=0x4340000000000002, fcsr=0x00000001
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000020
+  output: fa0=0x4340000000000000, fcsr=0x00000021
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000040
+  output: fa0=0x4340000000000000, fcsr=0x00000041
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000060
+  output: fa0=0x4340000000000001, fcsr=0x00000061
+fcvt.d.lu fa0, a0 ::
+  inputs: a0=0x0020000000000001, fcsr=0x00000080
+  output: fa0=0x4340000000000001, fcsr=0x00000081
+fmv.d.x fa0, a0 ::
+  inputs: a0=0xabcdef0123456789, fcsr=0x00000000
+  output: fa0=0xabcdef0123456789, fcsr=0x00000000
diff --git a/none/tests/riscv64/float64.vgtest b/none/tests/riscv64/float64.vgtest
new file mode 100644
index 000000000..df9fcecd3
--- /dev/null
+++ b/none/tests/riscv64/float64.vgtest
@@ -0,0 +1,2 @@
+prog: float64
+vgopts: -q
diff --git a/none/tests/riscv64/integer.c b/none/tests/riscv64/integer.c
new file mode 100644
index 000000000..689273e8f
--- /dev/null
+++ b/none/tests/riscv64/integer.c
@@ -0,0 +1,824 @@
+/* Tests for the RV64I base integer instruction set. */
+
+#include "testinst.h"
+
+static void test_integer_shared(void)
+{
+   printf("RV64I base instruction set, shared operations\n");
+
+   /* ----------------- lui rd, imm[31:12] ------------------ */
+   TESTINST_1_0(4, "lui a0, 0", a0);
+   TESTINST_1_0(4, "lui a0, 1", a0);
+   TESTINST_1_0(4, "lui a0, 2", a0);
+   TESTINST_1_0(4, "lui a0, 4", a0);
+   TESTINST_1_0(4, "lui a0, 8", a0);
+   TESTINST_1_0(4, "lui a0, 16", a0);
+   TESTINST_1_0(4, "lui a0, 32", a0);
+   TESTINST_1_0(4, "lui a0, 64", a0);
+   TESTINST_1_0(4, "lui a0, 128", a0);
+   TESTINST_1_0(4, "lui a0, 256", a0);
+   TESTINST_1_0(4, "lui a0, 512", a0);
+   TESTINST_1_0(4, "lui a0, 1024", a0);
+   TESTINST_1_0(4, "lui a0, 2048", a0);
+   TESTINST_1_0(4, "lui a0, 4096", a0);
+   TESTINST_1_0(4, "lui a0, 8192", a0);
+   TESTINST_1_0(4, "lui a0, 16384", a0);
+   TESTINST_1_0(4, "lui a0, 32768", a0);
+   TESTINST_1_0(4, "lui a0, 65536", a0);
+   TESTINST_1_0(4, "lui a0, 131072", a0);
+   TESTINST_1_0(4, "lui a0, 262144", a0);
+   TESTINST_1_0(4, "lui a0, 524288", a0);
+   TESTINST_1_0(4, "lui a0, 1048575", a0);
+
+   TESTINST_1_0(4, "lui t6, 1", t6);
+   TESTINST_1_0(4, "lui zero, 1", zero);
+
+   /* ---------------- auipc rd, imm[31:12] ----------------- */
+   TESTINST_1_0_AUIPC(4, "auipc a0, 0", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 1", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 2", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 4", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 8", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 16", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 32", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 64", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 128", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 256", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 512", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 1024", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 2048", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 4096", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 8192", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 16384", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 32768", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 65536", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 131072", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 262144", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 524288", a0);
+   TESTINST_1_0_AUIPC(4, "auipc a0, 1048575", a0);
+
+   TESTINST_1_0_AUIPC(4, "auipc t6, 1", t6);
+   TESTINST_1_0_AUIPC(4, "auipc zero, 1", zero);
+
+   /* ------------------ jal rd, imm[20:1] ------------------ */
+   /* Note: Only the imm[11:1] range is tested. */
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+4", 4, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+6", 6, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+8", 8, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+16", 16, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+32", 32, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+64", 64, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+128", 128, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+256", 256, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+512", 512, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+1024", 1024, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .+2048", 2048, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .-4", -4, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .-6", -6, t0);
+   TESTINST_1_0_JAL_RANGE(4, "jal t0, .-2048", -2048, t0);
+
+   TESTINST_1_0_JAL_RANGE(4, "jal t6, .+4", 4, t6);
+   TESTINST_1_0_JAL_RANGE(4, "jal zero, .+4", 4, zero);
+
+   /* --------------- jalr rd, imm[11:0](rs1) --------------- */
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 0(t0)", "1f+4", 4, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 0(t0)", "1f+6", 6, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 0(t0)", "1f+8", 8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 0(t0)", "1f-4", -4, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 0(t0)", "1f-6", -6, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 0(t0)", "1f-8", -8, ra, t0);
+
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 0(t0)", "1f-8", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 1(t0)", "1f-9", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 2(t0)", "1f-10", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 4(t0)", "1f-12", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 8(t0)", "1f-16", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 16(t0)", "1f-24", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 32(t0)", "1f-40", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 64(t0)", "1f-72", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 128(t0)", "1f-136", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 256(t0)", "1f-264", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 512(t0)", "1f-520", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 1024(t0)", "1f-1032", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 2047(t0)", "1f-2055", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, -1(t0)", "1f-7", -8, ra, t0);
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, -2048(t0)", "1f+2040", -8, ra, t0);
+
+   TESTINST_1_1_JALR_RANGE(4, "jalr ra, 0(t6)", "1f+4", 4, ra, t6);
+   TESTINST_1_1_JALR_RANGE(4, "jalr zero, 0(a0)", "1f+4", 4, zero, a0);
+
+   /* --------------- beq rs1, rs2, imm[12:1] --------------- */
+   /* Note: Only the imm[11:1] range is tested. */
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+4", 0, 0, 4, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+6", 0, 0, 6, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+8", 0, 0, 8, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+16", 0, 0, 16, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+32", 0, 0, 32, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+64", 0, 0, 64, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+128", 0, 0, 128, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+256", 0, 0, 256, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+512", 0, 0, 512, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+1024", 0, 0, 1024, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .+2048", 0, 0, 2048, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .-4", 0, 0, -4, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .-6", 0, 0, -6, a0, a1);
+   TESTINST_0_2_Bxx_RANGE(4, "beq a0, a1, .-2048", 0, 0, -2048, a0, a1);
+
+   TESTINST_0_2_Bxx_RANGE(4, "beq t5, t6, .+4", 0, 0, 4, t5, t6);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, a1, 1f", 0, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, a1, 1f", 0, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, a1, 1f", 1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, a1, 1f", 1, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, zero, 1f", 0, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, zero, 1f", 1, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "beq zero, a0, 1f", 0, 0, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "beq zero, a0, 1f", 0, 1, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, a1, 1f", 0, -1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, a1, 1f", -1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "beq a0, a1, 1f", -1, -1, a0, a1);
+
+   /* --------------- bne rs1, rs2, imm[12:1] --------------- */
+   TESTINST_0_2_Bxx_COND(4, "bne a0, a1, 1f", 0, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bne a0, a1, 1f", 0, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bne a0, a1, 1f", 1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bne a0, a1, 1f", 1, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bne a0, zero, 1f", 0, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "bne a0, zero, 1f", 1, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "bne zero, a0, 1f", 0, 0, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "bne zero, a0, 1f", 0, 1, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "bne a0, a1, 1f", 0, -1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bne a0, a1, 1f", -1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bne a0, a1, 1f", -1, -1, a0, a1);
+
+   /* --------------- blt rs1, rs2, imm[12:1] --------------- */
+   TESTINST_0_2_Bxx_COND(4, "blt a0, a1, 1f", 0, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "blt a0, a1, 1f", 0, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "blt a0, a1, 1f", 1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "blt a0, a1, 1f", 1, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "blt a0, zero, 1f", 0, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "blt a0, zero, 1f", 1, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "blt zero, a0, 1f", 0, 0, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "blt zero, a0, 1f", 0, 1, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "blt a0, a1, 1f", 0, -1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "blt a0, a1, 1f", -1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "blt a0, a1, 1f", -1, -1, a0, a1);
+
+   /* --------------- bge rs1, rs2, imm[12:1] --------------- */
+   TESTINST_0_2_Bxx_COND(4, "bge a0, a1, 1f", 0, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bge a0, a1, 1f", 0, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bge a0, a1, 1f", 1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bge a0, a1, 1f", 1, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bge a0, zero, 1f", 0, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "bge a0, zero, 1f", 1, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "bge zero, a0, 1f", 0, 0, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "bge zero, a0, 1f", 0, 1, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "bge a0, a1, 1f", 0, -1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bge a0, a1, 1f", -1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bge a0, a1, 1f", -1, -1, a0, a1);
+
+   /* -------------- bltu rs1, rs2, imm[12:1] --------------- */
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, a1, 1f", 0, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, a1, 1f", 0, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, a1, 1f", 1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, a1, 1f", 1, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, zero, 1f", 0, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, zero, 1f", 1, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "bltu zero, a0, 1f", 0, 0, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "bltu zero, a0, 1f", 0, 1, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, a1, 1f", 0, -1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, a1, 1f", -1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bltu a0, a1, 1f", -1, -1, a0, a1);
+
+   /* -------------- bgeu rs1, rs2, imm[12:1] --------------- */
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, a1, 1f", 0, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, a1, 1f", 0, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, a1, 1f", 1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, a1, 1f", 1, 1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, zero, 1f", 0, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, zero, 1f", 1, 0, a0, zero);
+   TESTINST_0_2_Bxx_COND(4, "bgeu zero, a0, 1f", 0, 0, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "bgeu zero, a0, 1f", 0, 1, zero, a0);
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, a1, 1f", 0, -1, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, a1, 1f", -1, 0, a0, a1);
+   TESTINST_0_2_Bxx_COND(4, "bgeu a0, a1, 1f", -1, -1, a0, a1);
+
+   /* ---------------- lb rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_LOAD(4, "lb a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 1(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 2(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 128(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 256(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 512(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 1024(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, 2047(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, -1(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lb a0, -2048(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(4, "lb a4, 0(a5)", a4, a5);
+   TESTINST_1_1_LOAD(4, "lb zero, 0(a0)", zero, a0);
+
+   /* ---------------- lh rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_LOAD(4, "lh a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 2(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 128(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 256(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 512(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 1024(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, 2046(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, -2(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lh a0, -2048(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(4, "lh a4, 0(a5)", a4, a5);
+   TESTINST_1_1_LOAD(4, "lh zero, 0(a0)", zero, a0);
+
+   /* ---------------- lw rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_LOAD(4, "lw a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 128(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 256(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 512(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 1024(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, 2044(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, -4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lw a0, -2048(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(4, "lw a4, 0(a5)", a4, a5);
+   TESTINST_1_1_LOAD(4, "lw zero, 0(a0)", zero, a0);
+
+   /* --------------- lbu rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_LOAD(4, "lbu a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 1(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 2(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 128(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 256(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 512(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 1024(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, 2047(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, -1(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lbu a0, -2048(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(4, "lbu a4, 0(a5)", a4, a5);
+   TESTINST_1_1_LOAD(4, "lbu zero, 0(a0)", zero, a0);
+
+   /* --------------- lhu rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_LOAD(4, "lhu a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 2(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 128(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 256(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 512(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 1024(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, 2046(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, -2(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lhu a0, -2048(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(4, "lhu a4, 0(a5)", a4, a5);
+   TESTINST_1_1_LOAD(4, "lhu zero, 0(a0)", zero, a0);
+
+   /* --------------- sb rs2, imm[11:0](rs1) ---------------- */
+   TESTINST_0_2_STORE(4, "sb a0, 0(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 1(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 2(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 4(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 8(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 16(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 32(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 64(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 128(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 256(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 512(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 1024(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, 2047(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, -1(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sb a0, -2048(a1)", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_0_2_STORE(4, "sb a4, 0(a5)", 0xabcdef0123456789, a4, a5);
+
+   /* --------------- sh rs2, imm[11:0](rs1) ---------------- */
+   TESTINST_0_2_STORE(4, "sh a0, 0(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 2(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 4(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 8(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 16(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 32(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 64(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 128(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 256(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 512(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 1024(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, 2046(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, -2(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sh a0, -2048(a1)", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_0_2_STORE(4, "sh a4, 0(a5)", 0xabcdef0123456789, a4, a5);
+
+   /* --------------- sw rs2, imm[11:0](rs1) ---------------- */
+   TESTINST_0_2_STORE(4, "sw a0, 0(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 4(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 8(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 16(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 32(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 64(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 128(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 256(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 512(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 1024(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, 2044(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, -4(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sw a0, -2048(a1)", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_0_2_STORE(4, "sw a4, 0(a5)", 0xabcdef0123456789, a4, a5);
+
+   /* --------------- addi rd, rs1, imm[11:0] --------------- */
+   TESTINST_1_1(4, "addi a0, a1, 1", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 2", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 4", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 8", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 16", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 32", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 64", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 128", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 256", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 1024", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 2047", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, -1", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, -2048", 0x0000000000001000, a0, a1);
+
+   TESTINST_1_1(4, "addi a0, a1, 1", 0x000000007fffffff, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 1", 0x00000000fffffffe, a0, a1);
+   TESTINST_1_1(4, "addi a0, a1, 1", 0x00000000ffffffff, a0, a1);
+   TESTINST_1_1(4, "addi t5, t6, 1", 0x0000000000001000, t5, t6);
+   TESTINST_1_1(4, "addi zero, a0, 1", 0x0000000000001000, zero, a0);
+
+   /* --------------- slti rd, rs1, imm[11:0] --------------- */
+   TESTINST_1_1(4, "slti a0, a1, 0", 0x0000000000000000, a0, a1);
+   TESTINST_1_1(4, "slti a0, a1, 0", 0x0000000000000001, a0, a1);
+   TESTINST_1_1(4, "slti a0, a1, 0", 0xffffffffffffffff, a0, a1);
+   TESTINST_1_1(4, "slti a0, a1, 0x7ff", 0x00000000000007ff, a0, a1);
+   TESTINST_1_1(4, "slti a0, a1, 0x7ff", 0x0000000000000800, a0, a1);
+   TESTINST_1_1(4, "slti a0, a1, 0xffffffffffffffff", 0xffffffffffffffff, a0,
+                a1);
+   TESTINST_1_1(4, "slti a0, a1, 0xffffffffffffffff", 0x0000000000000000, a0,
+                a1);
+
+   TESTINST_1_1(4, "slti t5, t6, 0", 0x0000000000000000, t5, t6);
+   TESTINST_1_1(4, "slti t5, t6, 0", 0x0000000000000001, t5, t6);
+   TESTINST_1_1(4, "slti zero, a0, 1", 0x0000000000000000, zero, a0);
+
+   /* -------------- sltiu rd, rs1, imm[11:0] --------------- */
+   TESTINST_1_1(4, "sltiu a0, a1, 0", 0x0000000000000000, a0, a1);
+   TESTINST_1_1(4, "sltiu a0, a1, 0", 0x0000000000000001, a0, a1);
+   TESTINST_1_1(4, "sltiu a0, a1, 0", 0xffffffffffffffff, a0, a1);
+   TESTINST_1_1(4, "sltiu a0, a1, 0x7ff", 0x00000000000007ff, a0, a1);
+   TESTINST_1_1(4, "sltiu a0, a1, 0x7ff", 0x0000000000000800, a0, a1);
+   TESTINST_1_1(4, "sltiu a0, a1, 0xffffffffffffffff", 0xffffffffffffffff, a0,
+                a1);
+   TESTINST_1_1(4, "sltiu a0, a1, 0xffffffffffffffff", 0x0000000000000000, a0,
+                a1);
+
+   TESTINST_1_1(4, "sltiu t5, t6, 0", 0x0000000000000000, t5, t6);
+   TESTINST_1_1(4, "sltiu t5, t6, 0", 0x0000000000000001, t5, t6);
+   TESTINST_1_1(4, "sltiu zero, a0, 1", 0x0000000000000000, zero, a0);
+
+   /* --------------- xori rd, rs1, imm[11:0] --------------- */
+   TESTINST_1_1(4, "xori a0, a1, 0", 0x0000ffff0000ffff, a0, a1);
+   TESTINST_1_1(4, "xori a0, a1, 0", 0xffff0000ffff0000, a0, a1);
+   TESTINST_1_1(4, "xori a0, a1, 0x7ff", 0x0000ffff0000ffff, a0, a1);
+   TESTINST_1_1(4, "xori a0, a1, 0x7ff", 0xffff0000ffff0000, a0, a1);
+   TESTINST_1_1(4, "xori a0, a1, 0xffffffffffffffff", 0x0000ffff0000ffff, a0,
+                a1);
+   TESTINST_1_1(4, "xori a0, a1, 0xffffffffffffffff", 0xffff0000ffff0000, a0,
+                a1);
+
+   TESTINST_1_1(4, "xori t5, t6, 0", 0x0000ffff0000ffff, t5, t6);
+   TESTINST_1_1(4, "xori zero, a0, 0x7ff", 0x0000ffff0000ffff, zero, a0);
+
+   /* --------------- ori rd, rs1, imm[11:0] ---------------- */
+   TESTINST_1_1(4, "ori a0, a1, 0", 0x0000ffff0000ffff, a0, a1);
+   TESTINST_1_1(4, "ori a0, a1, 0", 0xffff0000ffff0000, a0, a1);
+   TESTINST_1_1(4, "ori a0, a1, 0x7ff", 0x0000ffff0000ffff, a0, a1);
+   TESTINST_1_1(4, "ori a0, a1, 0x7ff", 0xffff0000ffff0000, a0, a1);
+   TESTINST_1_1(4, "ori a0, a1, 0xffffffffffffffff", 0x0000ffff0000ffff, a0,
+                a1);
+   TESTINST_1_1(4, "ori a0, a1, 0xffffffffffffffff", 0xffff0000ffff0000, a0,
+                a1);
+
+   TESTINST_1_1(4, "ori t5, t6, 0", 0x0000ffff0000ffff, t5, t6);
+   TESTINST_1_1(4, "ori zero, a0, 0x7ff", 0x0000ffff0000ffff, zero, a0);
+
+   /* --------------- andi rd, rs1, imm[11:0] --------------- */
+   TESTINST_1_1(4, "andi a0, a1, 0", 0x0000ffff0000ffff, a0, a1);
+   TESTINST_1_1(4, "andi a0, a1, 0", 0xffff0000ffff0000, a0, a1);
+   TESTINST_1_1(4, "andi a0, a1, 0x7ff", 0x0000ffff0000ffff, a0, a1);
+   TESTINST_1_1(4, "andi a0, a1, 0x7ff", 0xffff0000ffff0000, a0, a1);
+   TESTINST_1_1(4, "andi a0, a1, 0xffffffffffffffff", 0x0000ffff0000ffff, a0,
+                a1);
+   TESTINST_1_1(4, "andi a0, a1, 0xffffffffffffffff", 0xffff0000ffff0000, a0,
+                a1);
+
+   TESTINST_1_1(4, "andi t5, t6, 0", 0x0000ffff0000ffff, t5, t6);
+   TESTINST_1_1(4, "andi zero, a0, 0x7ff", 0x0000ffff0000ffff, zero, a0);
+
+   /* --------------- slli rd, rs1, uimm[5:0] --------------- */
+   TESTINST_1_1(4, "slli a0, a1, 0", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slli a0, a1, 1", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slli a0, a1, 2", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slli a0, a1, 4", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slli a0, a1, 8", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slli a0, a1, 16", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slli a0, a1, 32", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slli a0, a1, 63", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_1_1(4, "slli t5, t6, 1", 0xabcdef0123456789, t5, t6);
+   TESTINST_1_1(4, "slli zero, a0, 1", 0xabcdef0123456789, zero, a0);
+
+   /* --------------- srli rd, rs1, uimm[5:0] --------------- */
+   TESTINST_1_1(4, "srli a0, a1, 0", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srli a0, a1, 1", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srli a0, a1, 2", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srli a0, a1, 4", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srli a0, a1, 8", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srli a0, a1, 16", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srli a0, a1, 32", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srli a0, a1, 63", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_1_1(4, "srli t5, t6, 1", 0xabcdef0123456789, t5, t6);
+   TESTINST_1_1(4, "srli zero, a0, 1", 0xabcdef0123456789, zero, a0);
+
+   /* --------------- srai rd, rs1, uimm[5:0] --------------- */
+   TESTINST_1_1(4, "srai a0, a1, 0", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srai a0, a1, 1", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srai a0, a1, 2", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srai a0, a1, 4", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srai a0, a1, 8", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srai a0, a1, 16", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srai a0, a1, 32", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srai a0, a1, 63", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_1_1(4, "srai t5, t6, 1", 0xabcdef0123456789, t5, t6);
+   TESTINST_1_1(4, "srai zero, a0, 1", 0xabcdef0123456789, zero, a0);
+
+   /* ------------------ add rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "add a0, a1, a2", 0x0000000000001000, 0x0000000000002000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "add a0, a1, a2", 0x000000007fffffff, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "add a0, a1, a2", 0x00000000fffffffe, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "add a0, a1, a2", 0x00000000ffffffff, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "add a0, a1, a2", 0xfffffffffffffffe, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "add a0, a1, a2", 0xffffffffffffffff, 0x0000000000000001, a0,
+                a1, a2);
+
+   TESTINST_1_2(4, "add t4, t5, t6", 0x0000000000001000, 0x0000000000002000, t4,
+                t5, t6);
+   TESTINST_1_2(4, "add zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ sub rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "sub a0, a1, a2", 0x0000000000001000, 0x0000000000000fff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "sub a0, a1, a2", 0x0000000000001000, 0x0000000000001000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "sub a0, a1, a2", 0x0000000000001000, 0x0000000000001001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "sub a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "sub a0, a1, a2", 0x0000000100000000, 0x0000000000000001, a0,
+                a1, a2);
+
+   TESTINST_1_2(4, "sub t4, t5, t6", 0x0000000000001000, 0x0000000000000fff, t4,
+                t5, t6);
+   TESTINST_1_2(4, "sub zero, a0, a1", 0x0000000000001000, 0x0000000000000fff,
+                zero, a0, a1);
+
+   /* ------------------ sll rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 0, a0, a1, a2);
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 1, a0, a1, a2);
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 2, a0, a1, a2);
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 4, a0, a1, a2);
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 8, a0, a1, a2);
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 16, a0, a1, a2);
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 32, a0, a1, a2);
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 63, a0, a1, a2);
+   TESTINST_1_2(4, "sll a0, a1, a2", 0xabcdef0123456789, 64, a0, a1, a2);
+
+   TESTINST_1_2(4, "sll t4, t5, t6", 0xabcdef0123456789, 1, t4, t5, t6);
+   TESTINST_1_2(4, "sll zero, a0, a1", 0xabcdef0123456789, 1, zero, a0, a1);
+
+   /* ------------------ slt rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "slt a0, a1, a2", 0x0000000000000000, 0x0000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "slt a0, a1, a2", 0x0000000000000000, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "slt a0, a1, a2", 0x0000000000000000, 0xffffffffffffffff, a0,
+                a1, a2);
+
+   TESTINST_1_2(4, "slt t4, t5, t6", 0x0000000000000000, 0x0000000000000000, t4,
+                t5, t6);
+   TESTINST_1_2(4, "slt t4, t5, t6", 0x0000000000000000, 0x0000000000000001, t4,
+                t5, t6);
+   TESTINST_1_2(4, "slt zero, a0, a1", 0x0000000000000000, 0x0000000000000001,
+                zero, a0, a1);
+
+   /* ------------------ sltu rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "sltu a0, a1, a2", 0x0000000000000000, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "sltu a0, a1, a2", 0x0000000000000000, 0x0000000000000001,
+                a0, a1, a2);
+   TESTINST_1_2(4, "sltu a0, a1, a2", 0x0000000000000000, 0xffffffffffffffff,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "sltu t4, t5, t6", 0x0000000000000000, 0x0000000000000000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "sltu t4, t5, t6", 0x0000000000000000, 0x0000000000000001,
+                t4, t5, t6);
+   TESTINST_1_2(4, "sltu zero, a0, a1", 0x0000000000000000, 0x0000000000000001,
+                zero, a0, a1);
+
+   /* ------------------ xor rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "xor a0, a1, a2", 0x0000ffff0000ffff, 0x00000000ffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "xor t4, t5, t6", 0x0000ffff0000ffff, 0x00000000ffffffff, t4,
+                t5, t6);
+   TESTINST_1_2(4, "xor zero, a0, a1", 0x0000ffff0000ffff, 0x00000000ffffffff,
+                zero, a0, a1);
+
+   /* ------------------ srl rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 0, a0, a1, a2);
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 1, a0, a1, a2);
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 2, a0, a1, a2);
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 4, a0, a1, a2);
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 8, a0, a1, a2);
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 16, a0, a1, a2);
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 32, a0, a1, a2);
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 63, a0, a1, a2);
+   TESTINST_1_2(4, "srl a0, a1, a2", 0xabcdef0123456789, 64, a0, a1, a2);
+
+   TESTINST_1_2(4, "srl t4, t5, t6", 0xabcdef0123456789, 1, t4, t5, t6);
+   TESTINST_1_2(4, "srl zero, a0, a1", 0xabcdef0123456789, 1, zero, a0, a1);
+
+   /* ------------------ sra rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 0, a0, a1, a2);
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 1, a0, a1, a2);
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 2, a0, a1, a2);
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 4, a0, a1, a2);
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 8, a0, a1, a2);
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 16, a0, a1, a2);
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 32, a0, a1, a2);
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 63, a0, a1, a2);
+   TESTINST_1_2(4, "sra a0, a1, a2", 0xabcdef0123456789, 64, a0, a1, a2);
+
+   TESTINST_1_2(4, "sra t4, t5, t6", 0xabcdef0123456789, 1, t4, t5, t6);
+   TESTINST_1_2(4, "sra zero, a0, a1", 0xabcdef0123456789, 1, zero, a0, a1);
+
+   /* ------------------- or rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "or a0, a1, a2", 0x0000ffff0000ffff, 0x00000000ffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "or t4, t5, t6", 0x0000ffff0000ffff, 0x00000000ffffffff, t4,
+                t5, t6);
+   TESTINST_1_2(4, "or zero, a0, a1", 0x0000ffff0000ffff, 0x00000000ffffffff,
+                zero, a0, a1);
+
+   /* ------------------ and rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "and a0, a1, a2", 0x0000ffff0000ffff, 0x00000000ffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "and t4, t5, t6", 0x0000ffff0000ffff, 0x00000000ffffffff, t4,
+                t5, t6);
+   TESTINST_1_2(4, "and zero, a0, a1", 0x0000ffff0000ffff, 0x00000000ffffffff,
+                zero, a0, a1);
+
+   /* ------------------------ fence ------------------------ */
+   TESTINST_0_0(4, "fence");
+   TESTINST_0_0(4, "fence.tso");
+
+   /* ------------------------ ecall ------------------------ */
+   /* Not tested here. */
+
+   /* ----------------------- ebreak ------------------------ */
+   /* Not tested here. */
+
+   printf("\n");
+}
+
+static void test_integer_additions(void)
+{
+   printf("RV64I base instruction set, additions\n");
+
+   /* --------------- lwu rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_LOAD(4, "lwu a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 128(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 256(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 512(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 1024(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, 2044(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, -4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "lwu a0, -2048(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(4, "lwu a4, 0(a5)", a4, a5);
+   TESTINST_1_1_LOAD(4, "lwu zero, 0(a0)", zero, a0);
+
+   /* ---------------- ld rd, imm[11:0](rs1) ---------------- */
+   TESTINST_1_1_LOAD(4, "ld a0, 0(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 8(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 16(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 32(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 64(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 128(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 256(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 512(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 1024(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, 2040(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, -4(a1)", a0, a1);
+   TESTINST_1_1_LOAD(4, "ld a0, -2048(a1)", a0, a1);
+
+   TESTINST_1_1_LOAD(4, "ld a4, 0(a5)", a4, a5);
+   TESTINST_1_1_LOAD(4, "ld zero, 0(a0)", zero, a0);
+
+   /* --------------- sd rs2, imm[11:0](rs1) ---------------- */
+   TESTINST_0_2_STORE(4, "sd a0, 0(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 4(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 8(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 16(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 32(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 64(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 128(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 256(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 512(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 1024(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, 2040(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, -4(a1)", 0xabcdef0123456789, a0, a1);
+   TESTINST_0_2_STORE(4, "sd a0, -2048(a1)", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_0_2_STORE(4, "sd a4, 0(a5)", 0xabcdef0123456789, a4, a5);
+
+   /* -------------- addiw rd, rs1, imm[11:0] --------------- */
+   TESTINST_1_1(4, "addiw a0, a1, 1", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 2", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 4", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 8", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 16", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 32", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 64", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 128", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 256", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 1024", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 2047", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, -1", 0x0000000000001000, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, -2048", 0x0000000000001000, a0, a1);
+
+   TESTINST_1_1(4, "addiw a0, a1, 1", 0x000000007fffffff, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 1", 0x00000000fffffffe, a0, a1);
+   TESTINST_1_1(4, "addiw a0, a1, 1", 0x00000000ffffffff, a0, a1);
+   TESTINST_1_1(4, "addiw t5, t6, 1", 0x0000000000001000, t5, t6);
+   TESTINST_1_1(4, "addiw zero, a0, 1", 0x0000000000001000, zero, a0);
+
+   /* -------------- slliw rd, rs1, uimm[4:0] --------------- */
+   TESTINST_1_1(4, "slliw a0, a1, 0", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slliw a0, a1, 1", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slliw a0, a1, 2", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slliw a0, a1, 4", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slliw a0, a1, 8", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slliw a0, a1, 16", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "slliw a0, a1, 31", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_1_1(4, "slliw t5, t6, 1", 0xabcdef0123456789, t5, t6);
+   TESTINST_1_1(4, "slliw zero, a0, 1", 0xabcdef0123456789, zero, a0);
+
+   /* -------------- srliw rd, rs1, uimm[4:0] --------------- */
+   TESTINST_1_1(4, "srliw a0, a1, 0", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srliw a0, a1, 1", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srliw a0, a1, 2", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srliw a0, a1, 4", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srliw a0, a1, 8", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srliw a0, a1, 16", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "srliw a0, a1, 31", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_1_1(4, "srliw t5, t6, 1", 0xabcdef0123456789, t5, t6);
+   TESTINST_1_1(4, "srliw zero, a0, 1", 0xabcdef0123456789, zero, a0);
+
+   /* -------------- sraiw rd, rs1, uimm[4:0] --------------- */
+   TESTINST_1_1(4, "sraiw a0, a1, 0", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "sraiw a0, a1, 1", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "sraiw a0, a1, 2", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "sraiw a0, a1, 4", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "sraiw a0, a1, 8", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "sraiw a0, a1, 16", 0xabcdef0123456789, a0, a1);
+   TESTINST_1_1(4, "sraiw a0, a1, 31", 0xabcdef0123456789, a0, a1);
+
+   TESTINST_1_1(4, "srai t5, t6, 1", 0xabcdef0123456789, t5, t6);
+   TESTINST_1_1(4, "srai zero, a0, 1", 0xabcdef0123456789, zero, a0);
+
+   /* ------------------ addw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "addw a0, a1, a2", 0x0000000000001000, 0x0000000000002000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "addw a0, a1, a2", 0x000000007fffffff, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "addw a0, a1, a2", 0x00000000fffffffe, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "addw a0, a1, a2", 0x00000000ffffffff, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "addw a0, a1, a2", 0xfffffffffffffffe, 0x0000000000000001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "addw a0, a1, a2", 0xffffffffffffffff, 0x0000000000000001, a0,
+                a1, a2);
+
+   TESTINST_1_2(4, "addw t4, t5, t6", 0x0000000000001000, 0x0000000000002000, t4,
+                t5, t6);
+   TESTINST_1_2(4, "addw zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ subw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "subw a0, a1, a2", 0x0000000000001000, 0x0000000000000fff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "subw a0, a1, a2", 0x0000000000001000, 0x0000000000001000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "subw a0, a1, a2", 0x0000000000001000, 0x0000000000001001, a0,
+                a1, a2);
+   TESTINST_1_2(4, "subw a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "subw a0, a1, a2", 0x0000000100000000, 0x0000000000000001, a0,
+                a1, a2);
+
+   TESTINST_1_2(4, "subw t4, t5, t6", 0x0000000000001000, 0x0000000000000fff, t4,
+                t5, t6);
+   TESTINST_1_2(4, "subw zero, a0, a1", 0x0000000000001000, 0x0000000000000fff,
+                zero, a0, a1);
+
+   /* ------------------ sllw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "sllw a0, a1, a2", 0xabcdef0123456789, 0, a0, a1, a2);
+   TESTINST_1_2(4, "sllw a0, a1, a2", 0xabcdef0123456789, 1, a0, a1, a2);
+   TESTINST_1_2(4, "sllw a0, a1, a2", 0xabcdef0123456789, 2, a0, a1, a2);
+   TESTINST_1_2(4, "sllw a0, a1, a2", 0xabcdef0123456789, 4, a0, a1, a2);
+   TESTINST_1_2(4, "sllw a0, a1, a2", 0xabcdef0123456789, 8, a0, a1, a2);
+   TESTINST_1_2(4, "sllw a0, a1, a2", 0xabcdef0123456789, 16, a0, a1, a2);
+   TESTINST_1_2(4, "sllw a0, a1, a2", 0xabcdef0123456789, 31, a0, a1, a2);
+   TESTINST_1_2(4, "sllw a0, a1, a2", 0xabcdef0123456789, 32, a0, a1, a2);
+
+   TESTINST_1_2(4, "sllw t4, t5, t6", 0xabcdef0123456789, 1, t4, t5, t6);
+   TESTINST_1_2(4, "sllw zero, a0, a1", 0xabcdef0123456789, 1, zero, a0, a1);
+
+   /* ------------------ srlw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "srlw a0, a1, a2", 0xabcdef0123456789, 0, a0, a1, a2);
+   TESTINST_1_2(4, "srlw a0, a1, a2", 0xabcdef0123456789, 1, a0, a1, a2);
+   TESTINST_1_2(4, "srlw a0, a1, a2", 0xabcdef0123456789, 2, a0, a1, a2);
+   TESTINST_1_2(4, "srlw a0, a1, a2", 0xabcdef0123456789, 4, a0, a1, a2);
+   TESTINST_1_2(4, "srlw a0, a1, a2", 0xabcdef0123456789, 8, a0, a1, a2);
+   TESTINST_1_2(4, "srlw a0, a1, a2", 0xabcdef0123456789, 16, a0, a1, a2);
+   TESTINST_1_2(4, "srlw a0, a1, a2", 0xabcdef0123456789, 31, a0, a1, a2);
+   TESTINST_1_2(4, "srlw a0, a1, a2", 0xabcdef0123456789, 32, a0, a1, a2);
+
+   TESTINST_1_2(4, "srlw t4, t5, t6", 0xabcdef0123456789, 1, t4, t5, t6);
+   TESTINST_1_2(4, "srlw zero, a0, a1", 0xabcdef0123456789, 1, zero, a0, a1);
+
+   /* ------------------ sraw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "sraw a0, a1, a2", 0xabcdef0123456789, 0, a0, a1, a2);
+   TESTINST_1_2(4, "sraw a0, a1, a2", 0xabcdef0123456789, 1, a0, a1, a2);
+   TESTINST_1_2(4, "sraw a0, a1, a2", 0xabcdef0123456789, 2, a0, a1, a2);
+   TESTINST_1_2(4, "sraw a0, a1, a2", 0xabcdef0123456789, 4, a0, a1, a2);
+   TESTINST_1_2(4, "sraw a0, a1, a2", 0xabcdef0123456789, 8, a0, a1, a2);
+   TESTINST_1_2(4, "sraw a0, a1, a2", 0xabcdef0123456789, 16, a0, a1, a2);
+   TESTINST_1_2(4, "sraw a0, a1, a2", 0xabcdef0123456789, 31, a0, a1, a2);
+   TESTINST_1_2(4, "sraw a0, a1, a2", 0xabcdef0123456789, 32, a0, a1, a2);
+
+   TESTINST_1_2(4, "sraw t4, t5, t6", 0xabcdef0123456789, 1, t4, t5, t6);
+   TESTINST_1_2(4, "sraw zero, a0, a1", 0xabcdef0123456789, 1, zero, a0, a1);
+}
+
+int main(void)
+{
+   test_integer_shared();
+   test_integer_additions();
+   return 0;
+}
diff --git a/none/tests/riscv64/integer.stderr.exp b/none/tests/riscv64/integer.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/none/tests/riscv64/integer.stdout.exp b/none/tests/riscv64/integer.stdout.exp
new file mode 100644
index 000000000..87c859202
--- /dev/null
+++ b/none/tests/riscv64/integer.stdout.exp
@@ -0,0 +1,1859 @@
+RV64I base instruction set, shared operations
+lui a0, 0 ::
+  output: a0=0x0000000000000000
+lui a0, 1 ::
+  output: a0=0x0000000000001000
+lui a0, 2 ::
+  output: a0=0x0000000000002000
+lui a0, 4 ::
+  output: a0=0x0000000000004000
+lui a0, 8 ::
+  output: a0=0x0000000000008000
+lui a0, 16 ::
+  output: a0=0x0000000000010000
+lui a0, 32 ::
+  output: a0=0x0000000000020000
+lui a0, 64 ::
+  output: a0=0x0000000000040000
+lui a0, 128 ::
+  output: a0=0x0000000000080000
+lui a0, 256 ::
+  output: a0=0x0000000000100000
+lui a0, 512 ::
+  output: a0=0x0000000000200000
+lui a0, 1024 ::
+  output: a0=0x0000000000400000
+lui a0, 2048 ::
+  output: a0=0x0000000000800000
+lui a0, 4096 ::
+  output: a0=0x0000000001000000
+lui a0, 8192 ::
+  output: a0=0x0000000002000000
+lui a0, 16384 ::
+  output: a0=0x0000000004000000
+lui a0, 32768 ::
+  output: a0=0x0000000008000000
+lui a0, 65536 ::
+  output: a0=0x0000000010000000
+lui a0, 131072 ::
+  output: a0=0x0000000020000000
+lui a0, 262144 ::
+  output: a0=0x0000000040000000
+lui a0, 524288 ::
+  output: a0=0xffffffff80000000
+lui a0, 1048575 ::
+  output: a0=0xfffffffffffff000
+lui t6, 1 ::
+  output: t6=0x0000000000001000
+lui zero, 1 ::
+  output: zero=0x0000000000000000
+auipc a0, 0 ::
+  output: a0=1f+0
+auipc a0, 1 ::
+  output: a0=1f+4096
+auipc a0, 2 ::
+  output: a0=1f+8192
+auipc a0, 4 ::
+  output: a0=1f+16384
+auipc a0, 8 ::
+  output: a0=1f+32768
+auipc a0, 16 ::
+  output: a0=1f+65536
+auipc a0, 32 ::
+  output: a0=1f+131072
+auipc a0, 64 ::
+  output: a0=1f+262144
+auipc a0, 128 ::
+  output: a0=1f+524288
+auipc a0, 256 ::
+  output: a0=1f+1048576
+auipc a0, 512 ::
+  output: a0=1f+2097152
+auipc a0, 1024 ::
+  output: a0=1f+4194304
+auipc a0, 2048 ::
+  output: a0=1f+8388608
+auipc a0, 4096 ::
+  output: a0=1f+16777216
+auipc a0, 8192 ::
+  output: a0=1f+33554432
+auipc a0, 16384 ::
+  output: a0=1f+67108864
+auipc a0, 32768 ::
+  output: a0=1f+134217728
+auipc a0, 65536 ::
+  output: a0=1f+268435456
+auipc a0, 131072 ::
+  output: a0=1f+536870912
+auipc a0, 262144 ::
+  output: a0=1f+1073741824
+auipc a0, 524288 ::
+  output: a0=1f-2147483648
+auipc a0, 1048575 ::
+  output: a0=1f-4096
+auipc t6, 1 ::
+  output: t6=1f+4096
+auipc zero, 1 ::
+  output: zero=0x0000000000000000
+jal t0, .+4 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+6 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+8 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+16 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+32 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+64 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+128 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+256 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+512 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+1024 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .+2048 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .-4 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .-6 ::
+  output: t0=1f+4
+  target: reached
+jal t0, .-2048 ::
+  output: t0=1f+4
+  target: reached
+jal t6, .+4 ::
+  output: t6=1f+4
+  target: reached
+jal zero, .+4 ::
+  output: zero=0x0000000000000000
+  target: reached
+jalr ra, 0(t0) ::
+  inputs: t0=1f+4
+  output: ra=1f+4
+  target: reached
+jalr ra, 0(t0) ::
+  inputs: t0=1f+6
+  output: ra=1f+4
+  target: reached
+jalr ra, 0(t0) ::
+  inputs: t0=1f+8
+  output: ra=1f+4
+  target: reached
+jalr ra, 0(t0) ::
+  inputs: t0=1f-4
+  output: ra=1f+4
+  target: reached
+jalr ra, 0(t0) ::
+  inputs: t0=1f-6
+  output: ra=1f+4
+  target: reached
+jalr ra, 0(t0) ::
+  inputs: t0=1f-8
+  output: ra=1f+4
+  target: reached
+jalr ra, 0(t0) ::
+  inputs: t0=1f-8
+  output: ra=1f+4
+  target: reached
+jalr ra, 1(t0) ::
+  inputs: t0=1f-9
+  output: ra=1f+4
+  target: reached
+jalr ra, 2(t0) ::
+  inputs: t0=1f-10
+  output: ra=1f+4
+  target: reached
+jalr ra, 4(t0) ::
+  inputs: t0=1f-12
+  output: ra=1f+4
+  target: reached
+jalr ra, 8(t0) ::
+  inputs: t0=1f-16
+  output: ra=1f+4
+  target: reached
+jalr ra, 16(t0) ::
+  inputs: t0=1f-24
+  output: ra=1f+4
+  target: reached
+jalr ra, 32(t0) ::
+  inputs: t0=1f-40
+  output: ra=1f+4
+  target: reached
+jalr ra, 64(t0) ::
+  inputs: t0=1f-72
+  output: ra=1f+4
+  target: reached
+jalr ra, 128(t0) ::
+  inputs: t0=1f-136
+  output: ra=1f+4
+  target: reached
+jalr ra, 256(t0) ::
+  inputs: t0=1f-264
+  output: ra=1f+4
+  target: reached
+jalr ra, 512(t0) ::
+  inputs: t0=1f-520
+  output: ra=1f+4
+  target: reached
+jalr ra, 1024(t0) ::
+  inputs: t0=1f-1032
+  output: ra=1f+4
+  target: reached
+jalr ra, 2047(t0) ::
+  inputs: t0=1f-2055
+  output: ra=1f+4
+  target: reached
+jalr ra, -1(t0) ::
+  inputs: t0=1f-7
+  output: ra=1f+4
+  target: reached
+jalr ra, -2048(t0) ::
+  inputs: t0=1f+2040
+  output: ra=1f+4
+  target: reached
+jalr ra, 0(t6) ::
+  inputs: t6=1f+4
+  output: ra=1f+4
+  target: reached
+jalr zero, 0(a0) ::
+  inputs: a0=1f+4
+  output: zero=0x0000000000000000
+  target: reached
+beq a0, a1, .+4 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+6 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+8 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+16 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+32 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+64 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+128 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+256 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+512 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+1024 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .+2048 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .-4 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .-6 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq a0, a1, .-2048 ::
+  inputs: a0=0, a1=0
+  target: reached
+beq t5, t6, .+4 ::
+  inputs: t5=0, t6=0
+  target: reached
+beq a0, a1, 1f ::
+  inputs: a0=0, a1=0
+  branch: taken
+beq a0, a1, 1f ::
+  inputs: a0=0, a1=1
+  branch: not taken
+beq a0, a1, 1f ::
+  inputs: a0=1, a1=0
+  branch: not taken
+beq a0, a1, 1f ::
+  inputs: a0=1, a1=1
+  branch: taken
+beq a0, zero, 1f ::
+  inputs: a0=0, zero=0
+  branch: taken
+beq a0, zero, 1f ::
+  inputs: a0=1, zero=0
+  branch: not taken
+beq zero, a0, 1f ::
+  inputs: zero=0, a0=0
+  branch: taken
+beq zero, a0, 1f ::
+  inputs: zero=0, a0=1
+  branch: not taken
+beq a0, a1, 1f ::
+  inputs: a0=0, a1=-1
+  branch: not taken
+beq a0, a1, 1f ::
+  inputs: a0=-1, a1=0
+  branch: not taken
+beq a0, a1, 1f ::
+  inputs: a0=-1, a1=-1
+  branch: taken
+bne a0, a1, 1f ::
+  inputs: a0=0, a1=0
+  branch: not taken
+bne a0, a1, 1f ::
+  inputs: a0=0, a1=1
+  branch: taken
+bne a0, a1, 1f ::
+  inputs: a0=1, a1=0
+  branch: taken
+bne a0, a1, 1f ::
+  inputs: a0=1, a1=1
+  branch: not taken
+bne a0, zero, 1f ::
+  inputs: a0=0, zero=0
+  branch: not taken
+bne a0, zero, 1f ::
+  inputs: a0=1, zero=0
+  branch: taken
+bne zero, a0, 1f ::
+  inputs: zero=0, a0=0
+  branch: not taken
+bne zero, a0, 1f ::
+  inputs: zero=0, a0=1
+  branch: taken
+bne a0, a1, 1f ::
+  inputs: a0=0, a1=-1
+  branch: taken
+bne a0, a1, 1f ::
+  inputs: a0=-1, a1=0
+  branch: taken
+bne a0, a1, 1f ::
+  inputs: a0=-1, a1=-1
+  branch: not taken
+blt a0, a1, 1f ::
+  inputs: a0=0, a1=0
+  branch: not taken
+blt a0, a1, 1f ::
+  inputs: a0=0, a1=1
+  branch: taken
+blt a0, a1, 1f ::
+  inputs: a0=1, a1=0
+  branch: not taken
+blt a0, a1, 1f ::
+  inputs: a0=1, a1=1
+  branch: not taken
+blt a0, zero, 1f ::
+  inputs: a0=0, zero=0
+  branch: not taken
+blt a0, zero, 1f ::
+  inputs: a0=1, zero=0
+  branch: not taken
+blt zero, a0, 1f ::
+  inputs: zero=0, a0=0
+  branch: not taken
+blt zero, a0, 1f ::
+  inputs: zero=0, a0=1
+  branch: taken
+blt a0, a1, 1f ::
+  inputs: a0=0, a1=-1
+  branch: not taken
+blt a0, a1, 1f ::
+  inputs: a0=-1, a1=0
+  branch: taken
+blt a0, a1, 1f ::
+  inputs: a0=-1, a1=-1
+  branch: not taken
+bge a0, a1, 1f ::
+  inputs: a0=0, a1=0
+  branch: taken
+bge a0, a1, 1f ::
+  inputs: a0=0, a1=1
+  branch: not taken
+bge a0, a1, 1f ::
+  inputs: a0=1, a1=0
+  branch: taken
+bge a0, a1, 1f ::
+  inputs: a0=1, a1=1
+  branch: taken
+bge a0, zero, 1f ::
+  inputs: a0=0, zero=0
+  branch: taken
+bge a0, zero, 1f ::
+  inputs: a0=1, zero=0
+  branch: taken
+bge zero, a0, 1f ::
+  inputs: zero=0, a0=0
+  branch: taken
+bge zero, a0, 1f ::
+  inputs: zero=0, a0=1
+  branch: not taken
+bge a0, a1, 1f ::
+  inputs: a0=0, a1=-1
+  branch: taken
+bge a0, a1, 1f ::
+  inputs: a0=-1, a1=0
+  branch: not taken
+bge a0, a1, 1f ::
+  inputs: a0=-1, a1=-1
+  branch: taken
+bltu a0, a1, 1f ::
+  inputs: a0=0, a1=0
+  branch: not taken
+bltu a0, a1, 1f ::
+  inputs: a0=0, a1=1
+  branch: taken
+bltu a0, a1, 1f ::
+  inputs: a0=1, a1=0
+  branch: not taken
+bltu a0, a1, 1f ::
+  inputs: a0=1, a1=1
+  branch: not taken
+bltu a0, zero, 1f ::
+  inputs: a0=0, zero=0
+  branch: not taken
+bltu a0, zero, 1f ::
+  inputs: a0=1, zero=0
+  branch: not taken
+bltu zero, a0, 1f ::
+  inputs: zero=0, a0=0
+  branch: not taken
+bltu zero, a0, 1f ::
+  inputs: zero=0, a0=1
+  branch: taken
+bltu a0, a1, 1f ::
+  inputs: a0=0, a1=-1
+  branch: taken
+bltu a0, a1, 1f ::
+  inputs: a0=-1, a1=0
+  branch: not taken
+bltu a0, a1, 1f ::
+  inputs: a0=-1, a1=-1
+  branch: not taken
+bgeu a0, a1, 1f ::
+  inputs: a0=0, a1=0
+  branch: taken
+bgeu a0, a1, 1f ::
+  inputs: a0=0, a1=1
+  branch: not taken
+bgeu a0, a1, 1f ::
+  inputs: a0=1, a1=0
+  branch: taken
+bgeu a0, a1, 1f ::
+  inputs: a0=1, a1=1
+  branch: taken
+bgeu a0, zero, 1f ::
+  inputs: a0=0, zero=0
+  branch: taken
+bgeu a0, zero, 1f ::
+  inputs: a0=1, zero=0
+  branch: taken
+bgeu zero, a0, 1f ::
+  inputs: zero=0, a0=0
+  branch: taken
+bgeu zero, a0, 1f ::
+  inputs: zero=0, a0=1
+  branch: not taken
+bgeu a0, a1, 1f ::
+  inputs: a0=0, a1=-1
+  branch: not taken
+bgeu a0, a1, 1f ::
+  inputs: a0=-1, a1=0
+  branch: taken
+bgeu a0, a1, 1f ::
+  inputs: a0=-1, a1=-1
+  branch: taken
+lb a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffffa0
+  no memory changes
+lb a0, 1(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000079
+  no memory changes
+lb a0, 2(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffffed
+  no memory changes
+lb a0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000068
+  no memory changes
+lb a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffffef
+  no memory changes
+lb a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000011
+  no memory changes
+lb a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffffe3
+  no memory changes
+lb a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffff92
+  no memory changes
+lb a0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000042
+  no memory changes
+lb a0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffffc7
+  no memory changes
+lb a0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffffcd
+  no memory changes
+lb a0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000004a
+  no memory changes
+lb a0, 2047(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffffbc
+  no memory changes
+lb a0, -1(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000002b
+  no memory changes
+lb a0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffff8c
+  no memory changes
+lb a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0xffffffffffffffed
+  no memory changes
+lb zero, 0(a0) ::
+  inputs: a0=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+lh a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000396f
+  no memory changes
+lh a0, 2(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffbe76
+  no memory changes
+lh a0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000036cd
+  no memory changes
+lh a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000003f2d
+  no memory changes
+lh a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000607f
+  no memory changes
+lh a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000031b1
+  no memory changes
+lh a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000007a20
+  no memory changes
+lh a0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffa24f
+  no memory changes
+lh a0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000002ed4
+  no memory changes
+lh a0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffff9eda
+  no memory changes
+lh a0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffdc57
+  no memory changes
+lh a0, 2046(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffff8f1e
+  no memory changes
+lh a0, -2(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xfffffffffffffef5
+  no memory changes
+lh a0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffffffff99
+  no memory changes
+lh a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0x00000000000020fa
+  no memory changes
+lh zero, 0(a0) ::
+  inputs: a0=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+lw a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000050f7f87c
+  no memory changes
+lw a0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffdde6ba3c
+  no memory changes
+lw a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffff8349fb67
+  no memory changes
+lw a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000006ce42545
+  no memory changes
+lw a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000027636871
+  no memory changes
+lw a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000f131555
+  no memory changes
+lw a0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000005806056c
+  no memory changes
+lw a0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xffffffffdcef22c1
+  no memory changes
+lw a0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000003109b267
+  no memory changes
+lw a0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000003db72f24
+  no memory changes
+lw a0, 2044(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000006e0e703
+  no memory changes
+lw a0, -4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000074b87535
+  no memory changes
+lw a0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000003d9ad2e6
+  no memory changes
+lw a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0xfffffffff267f447
+  no memory changes
+lw zero, 0(a0) ::
+  inputs: a0=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+lbu a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000000c9
+  no memory changes
+lbu a0, 1(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000000b8
+  no memory changes
+lbu a0, 2(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000089
+  no memory changes
+lbu a0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000098
+  no memory changes
+lbu a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000000a8
+  no memory changes
+lbu a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000005a
+  no memory changes
+lbu a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000004c
+  no memory changes
+lbu a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000003b
+  no memory changes
+lbu a0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000006a
+  no memory changes
+lbu a0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000000ef
+  no memory changes
+lbu a0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000000f5
+  no memory changes
+lbu a0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000072
+  no memory changes
+lbu a0, 2047(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000036
+  no memory changes
+lbu a0, -1(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000000a5
+  no memory changes
+lbu a0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000000b4
+  no memory changes
+lbu a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0x0000000000000015
+  no memory changes
+lbu zero, 0(a0) ::
+  inputs: a0=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+lhu a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000007797
+  no memory changes
+lhu a0, 2(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000006013
+  no memory changes
+lhu a0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000ddfe
+  no memory changes
+lhu a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000cde5
+  no memory changes
+lhu a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000003ec7
+  no memory changes
+lhu a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000af19
+  no memory changes
+lhu a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000038c8
+  no memory changes
+lhu a0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000e078
+  no memory changes
+lhu a0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000006dfd
+  no memory changes
+lhu a0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000000dd03
+  no memory changes
+lhu a0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000001a80
+  no memory changes
+lhu a0, 2046(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000000af2
+  no memory changes
+lhu a0, -2(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000000079ca
+  no memory changes
+lhu a0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000000003dc2
+  no memory changes
+lhu a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0x0000000000005f23
+  no memory changes
+lhu zero, 0(a0) ::
+  inputs: a0=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+sb a0, 0(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 1(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. 89 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 2(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. 89 .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 4(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. 89 .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 8(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 .. .. .. .. .. .. ..
+sb a0, 16(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 32(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 64(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 128(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+128]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 256(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+256]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 512(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+512]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 1024(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+1024]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a0, 2047(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+2032]  .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. 89
+sb a0, -1(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [-016]  .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. 89
+sb a0, -2048(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [-2048]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sb a4, 0(a5) ::
+  inputs: a4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 .. .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 0(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 2(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. 89 67 .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 4(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. 89 67 .. .. .. .. .. .. .. .. .. ..
+sh a0, 8(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 .. .. .. .. .. ..
+sh a0, 16(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 32(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 64(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 128(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+128]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 256(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+256]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 512(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+512]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 1024(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+1024]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a0, 2046(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+2032]  .. .. .. .. .. .. .. .. .. .. .. .. .. .. 89 67
+sh a0, -2(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [-016]  .. .. .. .. .. .. .. .. .. .. .. .. .. .. 89 67
+sh a0, -2048(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [-2048]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sh a4, 0(a5) ::
+  inputs: a4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 67 .. .. .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 0(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 4(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. 89 67 45 23 .. .. .. .. .. .. .. ..
+sw a0, 8(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 .. .. .. ..
+sw a0, 16(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 32(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 64(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 128(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+128]  89 .. 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 256(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+256]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 512(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+512]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 1024(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+1024]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a0, 2044(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+2032]  .. .. .. .. .. .. .. .. .. .. .. .. 89 67 45 23
+sw a0, -4(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [-016]  .. .. .. .. .. .. .. .. .. .. .. .. 89 67 45 23
+sw a0, -2048(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [-2048]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+sw a4, 0(a5) ::
+  inputs: a4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 67 45 23 .. .. .. .. .. .. .. .. .. .. .. ..
+addi a0, a1, 1 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001001
+addi a0, a1, 2 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001002
+addi a0, a1, 4 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001004
+addi a0, a1, 8 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001008
+addi a0, a1, 16 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001010
+addi a0, a1, 32 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001020
+addi a0, a1, 64 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001040
+addi a0, a1, 128 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001080
+addi a0, a1, 256 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001100
+addi a0, a1, 1024 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001400
+addi a0, a1, 2047 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x00000000000017ff
+addi a0, a1, -1 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000000fff
+addi a0, a1, -2048 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000000800
+addi a0, a1, 1 ::
+  inputs: a1=0x000000007fffffff
+  output: a0=0x0000000080000000
+addi a0, a1, 1 ::
+  inputs: a1=0x00000000fffffffe
+  output: a0=0x00000000ffffffff
+addi a0, a1, 1 ::
+  inputs: a1=0x00000000ffffffff
+  output: a0=0x0000000100000000
+addi t5, t6, 1 ::
+  inputs: t6=0x0000000000001000
+  output: t5=0x0000000000001001
+addi zero, a0, 1 ::
+  inputs: a0=0x0000000000001000
+  output: zero=0x0000000000000000
+slti a0, a1, 0 ::
+  inputs: a1=0x0000000000000000
+  output: a0=0x0000000000000000
+slti a0, a1, 0 ::
+  inputs: a1=0x0000000000000001
+  output: a0=0x0000000000000000
+slti a0, a1, 0 ::
+  inputs: a1=0xffffffffffffffff
+  output: a0=0x0000000000000001
+slti a0, a1, 0x7ff ::
+  inputs: a1=0x00000000000007ff
+  output: a0=0x0000000000000000
+slti a0, a1, 0x7ff ::
+  inputs: a1=0x0000000000000800
+  output: a0=0x0000000000000000
+slti a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0xffffffffffffffff
+  output: a0=0x0000000000000000
+slti a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0x0000000000000000
+  output: a0=0x0000000000000000
+slti t5, t6, 0 ::
+  inputs: t6=0x0000000000000000
+  output: t5=0x0000000000000000
+slti t5, t6, 0 ::
+  inputs: t6=0x0000000000000001
+  output: t5=0x0000000000000000
+slti zero, a0, 1 ::
+  inputs: a0=0x0000000000000000
+  output: zero=0x0000000000000000
+sltiu a0, a1, 0 ::
+  inputs: a1=0x0000000000000000
+  output: a0=0x0000000000000000
+sltiu a0, a1, 0 ::
+  inputs: a1=0x0000000000000001
+  output: a0=0x0000000000000000
+sltiu a0, a1, 0 ::
+  inputs: a1=0xffffffffffffffff
+  output: a0=0x0000000000000000
+sltiu a0, a1, 0x7ff ::
+  inputs: a1=0x00000000000007ff
+  output: a0=0x0000000000000000
+sltiu a0, a1, 0x7ff ::
+  inputs: a1=0x0000000000000800
+  output: a0=0x0000000000000000
+sltiu a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0xffffffffffffffff
+  output: a0=0x0000000000000000
+sltiu a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0x0000000000000000
+  output: a0=0x0000000000000001
+sltiu t5, t6, 0 ::
+  inputs: t6=0x0000000000000000
+  output: t5=0x0000000000000000
+sltiu t5, t6, 0 ::
+  inputs: t6=0x0000000000000001
+  output: t5=0x0000000000000000
+sltiu zero, a0, 1 ::
+  inputs: a0=0x0000000000000000
+  output: zero=0x0000000000000000
+xori a0, a1, 0 ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0x0000ffff0000ffff
+xori a0, a1, 0 ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0xffff0000ffff0000
+xori a0, a1, 0x7ff ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0x0000ffff0000f800
+xori a0, a1, 0x7ff ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0xffff0000ffff07ff
+xori a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0xffff0000ffff0000
+xori a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0x0000ffff0000ffff
+xori t5, t6, 0 ::
+  inputs: t6=0x0000ffff0000ffff
+  output: t5=0x0000ffff0000ffff
+xori zero, a0, 0x7ff ::
+  inputs: a0=0x0000ffff0000ffff
+  output: zero=0x0000000000000000
+ori a0, a1, 0 ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0x0000ffff0000ffff
+ori a0, a1, 0 ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0xffff0000ffff0000
+ori a0, a1, 0x7ff ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0x0000ffff0000ffff
+ori a0, a1, 0x7ff ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0xffff0000ffff07ff
+ori a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0xffffffffffffffff
+ori a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0xffffffffffffffff
+ori t5, t6, 0 ::
+  inputs: t6=0x0000ffff0000ffff
+  output: t5=0x0000ffff0000ffff
+ori zero, a0, 0x7ff ::
+  inputs: a0=0x0000ffff0000ffff
+  output: zero=0x0000000000000000
+andi a0, a1, 0 ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0x0000000000000000
+andi a0, a1, 0 ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0x0000000000000000
+andi a0, a1, 0x7ff ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0x00000000000007ff
+andi a0, a1, 0x7ff ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0x0000000000000000
+andi a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0x0000ffff0000ffff
+  output: a0=0x0000ffff0000ffff
+andi a0, a1, 0xffffffffffffffff ::
+  inputs: a1=0xffff0000ffff0000
+  output: a0=0xffff0000ffff0000
+andi t5, t6, 0 ::
+  inputs: t6=0x0000ffff0000ffff
+  output: t5=0x0000000000000000
+andi zero, a0, 0x7ff ::
+  inputs: a0=0x0000ffff0000ffff
+  output: zero=0x0000000000000000
+slli a0, a1, 0 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xabcdef0123456789
+slli a0, a1, 1 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x579bde02468acf12
+slli a0, a1, 2 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xaf37bc048d159e24
+slli a0, a1, 4 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xbcdef01234567890
+slli a0, a1, 8 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xcdef012345678900
+slli a0, a1, 16 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xef01234567890000
+slli a0, a1, 32 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x2345678900000000
+slli a0, a1, 63 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x8000000000000000
+slli t5, t6, 1 ::
+  inputs: t6=0xabcdef0123456789
+  output: t5=0x579bde02468acf12
+slli zero, a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: zero=0x0000000000000000
+srli a0, a1, 0 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xabcdef0123456789
+srli a0, a1, 1 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x55e6f78091a2b3c4
+srli a0, a1, 2 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x2af37bc048d159e2
+srli a0, a1, 4 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0abcdef012345678
+srli a0, a1, 8 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x00abcdef01234567
+srli a0, a1, 16 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000abcdef012345
+srli a0, a1, 32 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x00000000abcdef01
+srli a0, a1, 63 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000000000001
+srli t5, t6, 1 ::
+  inputs: t6=0xabcdef0123456789
+  output: t5=0x55e6f78091a2b3c4
+srli zero, a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: zero=0x0000000000000000
+srai a0, a1, 0 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xabcdef0123456789
+srai a0, a1, 1 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xd5e6f78091a2b3c4
+srai a0, a1, 2 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xeaf37bc048d159e2
+srai a0, a1, 4 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xfabcdef012345678
+srai a0, a1, 8 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xffabcdef01234567
+srai a0, a1, 16 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xffffabcdef012345
+srai a0, a1, 32 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xffffffffabcdef01
+srai a0, a1, 63 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xffffffffffffffff
+srai t5, t6, 1 ::
+  inputs: t6=0xabcdef0123456789
+  output: t5=0xd5e6f78091a2b3c4
+srai zero, a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: zero=0x0000000000000000
+add a0, a1, a2 ::
+  inputs: a1=0x0000000000001000, a2=0x0000000000002000
+  output: a0=0x0000000000003000
+add a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x0000000000000001
+  output: a0=0x0000000080000000
+add a0, a1, a2 ::
+  inputs: a1=0x00000000fffffffe, a2=0x0000000000000001
+  output: a0=0x00000000ffffffff
+add a0, a1, a2 ::
+  inputs: a1=0x00000000ffffffff, a2=0x0000000000000001
+  output: a0=0x0000000100000000
+add a0, a1, a2 ::
+  inputs: a1=0xfffffffffffffffe, a2=0x0000000000000001
+  output: a0=0xffffffffffffffff
+add a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000001
+  output: a0=0x0000000000000000
+add t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000000003000
+add zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+sub a0, a1, a2 ::
+  inputs: a1=0x0000000000001000, a2=0x0000000000000fff
+  output: a0=0x0000000000000001
+sub a0, a1, a2 ::
+  inputs: a1=0x0000000000001000, a2=0x0000000000001000
+  output: a0=0x0000000000000000
+sub a0, a1, a2 ::
+  inputs: a1=0x0000000000001000, a2=0x0000000000001001
+  output: a0=0xffffffffffffffff
+sub a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+sub a0, a1, a2 ::
+  inputs: a1=0x0000000100000000, a2=0x0000000000000001
+  output: a0=0x00000000ffffffff
+sub t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000000fff
+  output: t4=0x0000000000000001
+sub zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000000fff
+  output: zero=0x0000000000000000
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000000
+  output: a0=0xabcdef0123456789
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000001
+  output: a0=0x579bde02468acf12
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000002
+  output: a0=0xaf37bc048d159e24
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000004
+  output: a0=0xbcdef01234567890
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000008
+  output: a0=0xcdef012345678900
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000010
+  output: a0=0xef01234567890000
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000020
+  output: a0=0x2345678900000000
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x000000000000003f
+  output: a0=0x8000000000000000
+sll a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000040
+  output: a0=0xabcdef0123456789
+sll t4, t5, t6 ::
+  inputs: t5=0xabcdef0123456789, t6=0x0000000000000001
+  output: t4=0x579bde02468acf12
+sll zero, a0, a1 ::
+  inputs: a0=0xabcdef0123456789, a1=0x0000000000000001
+  output: zero=0x0000000000000000
+slt a0, a1, a2 ::
+  inputs: a1=0x0000000000000000, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+slt a0, a1, a2 ::
+  inputs: a1=0x0000000000000000, a2=0x0000000000000001
+  output: a0=0x0000000000000001
+slt a0, a1, a2 ::
+  inputs: a1=0x0000000000000000, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+slt t4, t5, t6 ::
+  inputs: t5=0x0000000000000000, t6=0x0000000000000000
+  output: t4=0x0000000000000000
+slt t4, t5, t6 ::
+  inputs: t5=0x0000000000000000, t6=0x0000000000000001
+  output: t4=0x0000000000000001
+slt zero, a0, a1 ::
+  inputs: a0=0x0000000000000000, a1=0x0000000000000001
+  output: zero=0x0000000000000000
+sltu a0, a1, a2 ::
+  inputs: a1=0x0000000000000000, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+sltu a0, a1, a2 ::
+  inputs: a1=0x0000000000000000, a2=0x0000000000000001
+  output: a0=0x0000000000000001
+sltu a0, a1, a2 ::
+  inputs: a1=0x0000000000000000, a2=0xffffffffffffffff
+  output: a0=0x0000000000000001
+sltu t4, t5, t6 ::
+  inputs: t5=0x0000000000000000, t6=0x0000000000000000
+  output: t4=0x0000000000000000
+sltu t4, t5, t6 ::
+  inputs: t5=0x0000000000000000, t6=0x0000000000000001
+  output: t4=0x0000000000000001
+sltu zero, a0, a1 ::
+  inputs: a0=0x0000000000000000, a1=0x0000000000000001
+  output: zero=0x0000000000000000
+xor a0, a1, a2 ::
+  inputs: a1=0x0000ffff0000ffff, a2=0x00000000ffffffff
+  output: a0=0x0000ffffffff0000
+xor t4, t5, t6 ::
+  inputs: t5=0x0000ffff0000ffff, t6=0x00000000ffffffff
+  output: t4=0x0000ffffffff0000
+xor zero, a0, a1 ::
+  inputs: a0=0x0000ffff0000ffff, a1=0x00000000ffffffff
+  output: zero=0x0000000000000000
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000000
+  output: a0=0xabcdef0123456789
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000001
+  output: a0=0x55e6f78091a2b3c4
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000002
+  output: a0=0x2af37bc048d159e2
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000004
+  output: a0=0x0abcdef012345678
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000008
+  output: a0=0x00abcdef01234567
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000010
+  output: a0=0x0000abcdef012345
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000020
+  output: a0=0x00000000abcdef01
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x000000000000003f
+  output: a0=0x0000000000000001
+srl a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000040
+  output: a0=0xabcdef0123456789
+srl t4, t5, t6 ::
+  inputs: t5=0xabcdef0123456789, t6=0x0000000000000001
+  output: t4=0x55e6f78091a2b3c4
+srl zero, a0, a1 ::
+  inputs: a0=0xabcdef0123456789, a1=0x0000000000000001
+  output: zero=0x0000000000000000
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000000
+  output: a0=0xabcdef0123456789
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000001
+  output: a0=0xd5e6f78091a2b3c4
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000002
+  output: a0=0xeaf37bc048d159e2
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000004
+  output: a0=0xfabcdef012345678
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000008
+  output: a0=0xffabcdef01234567
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000010
+  output: a0=0xffffabcdef012345
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000020
+  output: a0=0xffffffffabcdef01
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x000000000000003f
+  output: a0=0xffffffffffffffff
+sra a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000040
+  output: a0=0xabcdef0123456789
+sra t4, t5, t6 ::
+  inputs: t5=0xabcdef0123456789, t6=0x0000000000000001
+  output: t4=0xd5e6f78091a2b3c4
+sra zero, a0, a1 ::
+  inputs: a0=0xabcdef0123456789, a1=0x0000000000000001
+  output: zero=0x0000000000000000
+or a0, a1, a2 ::
+  inputs: a1=0x0000ffff0000ffff, a2=0x00000000ffffffff
+  output: a0=0x0000ffffffffffff
+or t4, t5, t6 ::
+  inputs: t5=0x0000ffff0000ffff, t6=0x00000000ffffffff
+  output: t4=0x0000ffffffffffff
+or zero, a0, a1 ::
+  inputs: a0=0x0000ffff0000ffff, a1=0x00000000ffffffff
+  output: zero=0x0000000000000000
+and a0, a1, a2 ::
+  inputs: a1=0x0000ffff0000ffff, a2=0x00000000ffffffff
+  output: a0=0x000000000000ffff
+and t4, t5, t6 ::
+  inputs: t5=0x0000ffff0000ffff, t6=0x00000000ffffffff
+  output: t4=0x000000000000ffff
+and zero, a0, a1 ::
+  inputs: a0=0x0000ffff0000ffff, a1=0x00000000ffffffff
+  output: zero=0x0000000000000000
+fence ::
+fence.tso ::
+
+RV64I base instruction set, additions
+lwu a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000ab16b28b
+  no memory changes
+lwu a0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000000ec1edba
+  no memory changes
+lwu a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000006117bfcc
+  no memory changes
+lwu a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000002d4154e2
+  no memory changes
+lwu a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000ce7e8e1c
+  no memory changes
+lwu a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000022aa69b
+  no memory changes
+lwu a0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000e3156eea
+  no memory changes
+lwu a0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000097ee3bb0
+  no memory changes
+lwu a0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x000000004ce82b35
+  no memory changes
+lwu a0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000185668b2
+  no memory changes
+lwu a0, 2044(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x00000000e3fcbe9a
+  no memory changes
+lwu a0, -4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000052d34dcc
+  no memory changes
+lwu a0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0000000098b98bf4
+  no memory changes
+lwu a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0x000000004d86ad56
+  no memory changes
+lwu zero, 0(a0) ::
+  inputs: a0=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+ld a0, 0(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x396ebd333e9785d7
+  no memory changes
+ld a0, 4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x2d4a090205687129
+  no memory changes
+ld a0, 8(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xd300b16a85cd7c06
+  no memory changes
+ld a0, 16(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x0de93ef601cc1aa8
+  no memory changes
+ld a0, 32(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x933e83556593c5dc
+  no memory changes
+ld a0, 64(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xc30c82379dd342d0
+  no memory changes
+ld a0, 128(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x88f1496a86e6d207
+  no memory changes
+ld a0, 256(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x2a6f38124a0f2f9c
+  no memory changes
+ld a0, 512(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xd04a4b711fa93ec2
+  no memory changes
+ld a0, 1024(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x4aa8a98a2b57bc7f
+  no memory changes
+ld a0, 2040(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0x59bee9ecfe744029
+  no memory changes
+ld a0, -4(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xeef6a921c896781e
+  no memory changes
+ld a0, -2048(a1) ::
+  inputs: a1=&area_mid
+  output: a0=0xa56a8d842a3a5f41
+  no memory changes
+ld a4, 0(a5) ::
+  inputs: a5=&area_mid
+  output: a4=0x9821dab6df0781a2
+  no memory changes
+ld zero, 0(a0) ::
+  inputs: a0=&area_mid
+  output: zero=0x0000000000000000
+  no memory changes
+sd a0, 0(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a0, 4(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. 89 67 45 23 01 ef cd ab .. .. .. ..
+sd a0, 8(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+000]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+sd a0, 16(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+016]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a0, 32(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+032]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a0, 64(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+064]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a0, 128(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+128]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a0, 256(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+256]  .. 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a0, 512(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+512]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a0, 1024(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+1024]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a0, 2040(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [+2032]  .. .. .. .. .. .. .. .. 89 67 45 23 01 ef cd ab
+sd a0, -4(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [-016]  .. .. .. .. .. .. .. .. .. .. .. .. 89 67 45 23
+  [+000]  01 ef cd ab .. .. .. .. .. .. .. .. .. .. .. ..
+sd a0, -2048(a1) ::
+  inputs: a0=0xabcdef0123456789, a1=&area_mid
+  [-2048]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+sd a4, 0(a5) ::
+  inputs: a4=0xabcdef0123456789, a5=&area_mid
+  [+000]  89 67 45 23 01 ef cd ab .. .. .. .. .. .. .. ..
+addiw a0, a1, 1 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001001
+addiw a0, a1, 2 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001002
+addiw a0, a1, 4 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001004
+addiw a0, a1, 8 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001008
+addiw a0, a1, 16 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001010
+addiw a0, a1, 32 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001020
+addiw a0, a1, 64 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001040
+addiw a0, a1, 128 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001080
+addiw a0, a1, 256 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001100
+addiw a0, a1, 1024 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000001400
+addiw a0, a1, 2047 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x00000000000017ff
+addiw a0, a1, -1 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000000fff
+addiw a0, a1, -2048 ::
+  inputs: a1=0x0000000000001000
+  output: a0=0x0000000000000800
+addiw a0, a1, 1 ::
+  inputs: a1=0x000000007fffffff
+  output: a0=0xffffffff80000000
+addiw a0, a1, 1 ::
+  inputs: a1=0x00000000fffffffe
+  output: a0=0xffffffffffffffff
+addiw a0, a1, 1 ::
+  inputs: a1=0x00000000ffffffff
+  output: a0=0x0000000000000000
+addiw t5, t6, 1 ::
+  inputs: t6=0x0000000000001000
+  output: t5=0x0000000000001001
+addiw zero, a0, 1 ::
+  inputs: a0=0x0000000000001000
+  output: zero=0x0000000000000000
+slliw a0, a1, 0 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000023456789
+slliw a0, a1, 1 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x00000000468acf12
+slliw a0, a1, 2 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xffffffff8d159e24
+slliw a0, a1, 4 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000034567890
+slliw a0, a1, 8 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000045678900
+slliw a0, a1, 16 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000067890000
+slliw a0, a1, 31 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0xffffffff80000000
+slliw t5, t6, 1 ::
+  inputs: t6=0xabcdef0123456789
+  output: t5=0x00000000468acf12
+slliw zero, a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: zero=0x0000000000000000
+srliw a0, a1, 0 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000023456789
+srliw a0, a1, 1 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000011a2b3c4
+srliw a0, a1, 2 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000008d159e2
+srliw a0, a1, 4 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000002345678
+srliw a0, a1, 8 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000000234567
+srliw a0, a1, 16 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000000002345
+srliw a0, a1, 31 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000000000000
+srliw t5, t6, 1 ::
+  inputs: t6=0xabcdef0123456789
+  output: t5=0x0000000011a2b3c4
+srliw zero, a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: zero=0x0000000000000000
+sraiw a0, a1, 0 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000023456789
+sraiw a0, a1, 1 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000011a2b3c4
+sraiw a0, a1, 2 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000008d159e2
+sraiw a0, a1, 4 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000002345678
+sraiw a0, a1, 8 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000000234567
+sraiw a0, a1, 16 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000000002345
+sraiw a0, a1, 31 ::
+  inputs: a1=0xabcdef0123456789
+  output: a0=0x0000000000000000
+srai t5, t6, 1 ::
+  inputs: t6=0xabcdef0123456789
+  output: t5=0xd5e6f78091a2b3c4
+srai zero, a0, 1 ::
+  inputs: a0=0xabcdef0123456789
+  output: zero=0x0000000000000000
+addw a0, a1, a2 ::
+  inputs: a1=0x0000000000001000, a2=0x0000000000002000
+  output: a0=0x0000000000003000
+addw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x0000000000000001
+  output: a0=0xffffffff80000000
+addw a0, a1, a2 ::
+  inputs: a1=0x00000000fffffffe, a2=0x0000000000000001
+  output: a0=0xffffffffffffffff
+addw a0, a1, a2 ::
+  inputs: a1=0x00000000ffffffff, a2=0x0000000000000001
+  output: a0=0x0000000000000000
+addw a0, a1, a2 ::
+  inputs: a1=0xfffffffffffffffe, a2=0x0000000000000001
+  output: a0=0xffffffffffffffff
+addw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000001
+  output: a0=0x0000000000000000
+addw t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000000003000
+addw zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+subw a0, a1, a2 ::
+  inputs: a1=0x0000000000001000, a2=0x0000000000000fff
+  output: a0=0x0000000000000001
+subw a0, a1, a2 ::
+  inputs: a1=0x0000000000001000, a2=0x0000000000001000
+  output: a0=0x0000000000000000
+subw a0, a1, a2 ::
+  inputs: a1=0x0000000000001000, a2=0x0000000000001001
+  output: a0=0xffffffffffffffff
+subw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+subw a0, a1, a2 ::
+  inputs: a1=0x0000000100000000, a2=0x0000000000000001
+  output: a0=0xffffffffffffffff
+subw t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000000fff
+  output: t4=0x0000000000000001
+subw zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000000fff
+  output: zero=0x0000000000000000
+sllw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000000
+  output: a0=0x0000000023456789
+sllw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000001
+  output: a0=0x00000000468acf12
+sllw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000002
+  output: a0=0xffffffff8d159e24
+sllw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000004
+  output: a0=0x0000000034567890
+sllw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000008
+  output: a0=0x0000000045678900
+sllw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000010
+  output: a0=0x0000000067890000
+sllw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x000000000000001f
+  output: a0=0xffffffff80000000
+sllw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000020
+  output: a0=0x0000000023456789
+sllw t4, t5, t6 ::
+  inputs: t5=0xabcdef0123456789, t6=0x0000000000000001
+  output: t4=0x00000000468acf12
+sllw zero, a0, a1 ::
+  inputs: a0=0xabcdef0123456789, a1=0x0000000000000001
+  output: zero=0x0000000000000000
+srlw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000000
+  output: a0=0x0000000023456789
+srlw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000001
+  output: a0=0x0000000011a2b3c4
+srlw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000002
+  output: a0=0x0000000008d159e2
+srlw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000004
+  output: a0=0x0000000002345678
+srlw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000008
+  output: a0=0x0000000000234567
+srlw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000010
+  output: a0=0x0000000000002345
+srlw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x000000000000001f
+  output: a0=0x0000000000000000
+srlw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000020
+  output: a0=0x0000000023456789
+srlw t4, t5, t6 ::
+  inputs: t5=0xabcdef0123456789, t6=0x0000000000000001
+  output: t4=0x0000000011a2b3c4
+srlw zero, a0, a1 ::
+  inputs: a0=0xabcdef0123456789, a1=0x0000000000000001
+  output: zero=0x0000000000000000
+sraw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000000
+  output: a0=0x0000000023456789
+sraw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000001
+  output: a0=0x0000000011a2b3c4
+sraw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000002
+  output: a0=0x0000000008d159e2
+sraw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000004
+  output: a0=0x0000000002345678
+sraw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000008
+  output: a0=0x0000000000234567
+sraw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000010
+  output: a0=0x0000000000002345
+sraw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x000000000000001f
+  output: a0=0x0000000000000000
+sraw a0, a1, a2 ::
+  inputs: a1=0xabcdef0123456789, a2=0x0000000000000020
+  output: a0=0x0000000023456789
+sraw t4, t5, t6 ::
+  inputs: t5=0xabcdef0123456789, t6=0x0000000000000001
+  output: t4=0x0000000011a2b3c4
+sraw zero, a0, a1 ::
+  inputs: a0=0xabcdef0123456789, a1=0x0000000000000001
+  output: zero=0x0000000000000000
diff --git a/none/tests/riscv64/integer.vgtest b/none/tests/riscv64/integer.vgtest
new file mode 100644
index 000000000..daa059178
--- /dev/null
+++ b/none/tests/riscv64/integer.vgtest
@@ -0,0 +1,2 @@
+prog: integer
+vgopts: -q
diff --git a/none/tests/riscv64/muldiv.c b/none/tests/riscv64/muldiv.c
new file mode 100644
index 000000000..f6546c1c9
--- /dev/null
+++ b/none/tests/riscv64/muldiv.c
@@ -0,0 +1,351 @@
+/* Tests for the RV64M standard multiplication and division instruction-set
+   extension. */
+
+#include "testinst.h"
+
+static void test_muldiv_shared(void)
+{
+   printf(
+      "RV64M multiplication and division instruction set, shared operations\n");
+
+   /* ------------------ mul rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "mul a0, a1, a2", 0x0000000000005000, 0x0000000000002000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0x7fffffffffffffff, 0x0000000000000002, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0x7fffffffffffffff, 0x7fffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0x7fffffffffffffff, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0x8000000000000000, 0x0000000000000002, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0x8000000000000000, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0x8000000000000000, 0x8000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0x0000000000000001, 0x0000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "mul a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000, a0,
+                a1, a2);
+
+   TESTINST_1_2(4, "mul t4, t5, t6", 0x0000000000001000, 0x0000000000002000, t4,
+                t5, t6);
+   TESTINST_1_2(4, "mul zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ mulh rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0x7fffffffffffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0x7fffffffffffffff, 0x7fffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0x7fffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0x8000000000000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0x8000000000000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0x8000000000000000, 0x8000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulh a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "mulh t4, t5, t6", 0x0000000000001000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "mulh zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ----------------- mulhsu rd, rs1, rs2 ----------------- */
+   /* Not currently handled. */
+
+   /* ----------------- mulhu rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0x7fffffffffffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0x7fffffffffffffff, 0x7fffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0x7fffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0x8000000000000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0x8000000000000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0x8000000000000000, 0x8000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulhu a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "mulhu t4, t5, t6", 0x0000000000001000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "mulhu zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ div rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "div a0, a1, a2", 0x0000000000005000, 0x0000000000002000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0x7fffffffffffffff, 0x0000000000000002, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0x7fffffffffffffff, 0x7fffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0x7fffffffffffffff, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0x8000000000000000, 0x0000000000000002, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0x8000000000000000, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0x8000000000000000, 0x8000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0x0000000000000001, 0x0000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "div a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000, a0,
+                a1, a2);
+
+   TESTINST_1_2(4, "div t4, t5, t6", 0x0000000000005000, 0x0000000000002000, t4,
+                t5, t6);
+   TESTINST_1_2(4, "div zero, a0, a1", 0x0000000000005000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ divu rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "divu a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0x7fffffffffffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0x7fffffffffffffff, 0x7fffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0x7fffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0x8000000000000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0x8000000000000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0x8000000000000000, 0x8000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divu a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "divu t4, t5, t6", 0x0000000000005000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "divu zero, a0, a1", 0x0000000000005000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ rem rd, rs1, rs2 ------------------- */
+   TESTINST_1_2(4, "rem a0, a1, a2", 0x0000000000005000, 0x0000000000002000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0x7fffffffffffffff, 0x0000000000000002, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0x7fffffffffffffff, 0x7fffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0x7fffffffffffffff, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0x8000000000000000, 0x0000000000000002, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0x8000000000000000, 0xffffffffffffffff, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0x8000000000000000, 0x8000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0x0000000000000001, 0x0000000000000000, a0,
+                a1, a2);
+   TESTINST_1_2(4, "rem a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000, a0,
+                a1, a2);
+
+   TESTINST_1_2(4, "rem t4, t5, t6", 0x0000000000005000, 0x0000000000002000, t4,
+                t5, t6);
+   TESTINST_1_2(4, "rem zero, a0, a1", 0x0000000000005000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ remu rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "remu a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0x7fffffffffffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0x7fffffffffffffff, 0x7fffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0x7fffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0x8000000000000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0x8000000000000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0x8000000000000000, 0x8000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remu a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "remu t4, t5, t6", 0x0000000000005000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "remu zero, a0, a1", 0x0000000000005000, 0x0000000000002000,
+                zero, a0, a1);
+
+   printf("\n");
+}
+
+static void test_muldiv_additions(void)
+{
+   printf("RV64M multiplication and division instruction set, additions\n");
+
+   /* ------------------ mulw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0x000000007fffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0x000000007fffffff, 0x000000007fffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0x000000007fffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0x0000000080000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0x0000000080000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0x0000000080000000, 0x0000000080000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "mulw a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "mulw t4, t5, t6", 0x0000000000001000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "mulw zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ divw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "divw a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0x000000007fffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0x000000007fffffff, 0x000000007fffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0x000000007fffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0x0000000080000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0x0000000080000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0x0000000080000000, 0x0000000080000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divw a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "divw t4, t5, t6", 0x0000000000001000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "divw zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ----------------- divuw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0x000000007fffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0x000000007fffffff, 0x000000007fffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0x000000007fffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0x0000000080000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0x0000000080000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0x0000000080000000, 0x0000000080000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "divuw a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "divuw t4, t5, t6", 0x0000000000001000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "divuw zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ------------------ remw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "remw a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0x000000007fffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0x000000007fffffff, 0x000000007fffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0x000000007fffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0x0000000080000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0x0000000080000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0x0000000080000000, 0x0000000080000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remw a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "remw t4, t5, t6", 0x0000000000001000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "remw zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+
+   /* ----------------- remuw rd, rs1, rs2 ------------------ */
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0x0000000000005000, 0x0000000000002000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0x000000007fffffff, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0x000000007fffffff, 0x000000007fffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0x000000007fffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0xffffffffffffffff, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0x0000000080000000, 0x0000000000000002,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0x0000000080000000, 0xffffffffffffffff,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0x0000000080000000, 0x0000000080000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0x0000000000000001, 0x0000000000000000,
+                a0, a1, a2);
+   TESTINST_1_2(4, "remuw a0, a1, a2", 0xffffffffffffffff, 0x0000000000000000,
+                a0, a1, a2);
+
+   TESTINST_1_2(4, "remuw t4, t5, t6", 0x0000000000001000, 0x0000000000002000,
+                t4, t5, t6);
+   TESTINST_1_2(4, "remuw zero, a0, a1", 0x0000000000001000, 0x0000000000002000,
+                zero, a0, a1);
+}
+
+int main(void)
+{
+   test_muldiv_shared();
+   test_muldiv_additions();
+   return 0;
+}
diff --git a/none/tests/riscv64/muldiv.stderr.exp b/none/tests/riscv64/muldiv.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/none/tests/riscv64/muldiv.stdout.exp b/none/tests/riscv64/muldiv.stdout.exp
new file mode 100644
index 000000000..a8760465b
--- /dev/null
+++ b/none/tests/riscv64/muldiv.stdout.exp
@@ -0,0 +1,435 @@
+RV64M multiplication and division instruction set, shared operations
+mul a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x000000000a000000
+mul a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x0000000000000002
+  output: a0=0xfffffffffffffffe
+mul a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x7fffffffffffffff
+  output: a0=0x0000000000000001
+mul a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x8000000000000001
+mul a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000001
+mul a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x0000000000000002
+  output: a0=0x0000000000000000
+mul a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0xffffffffffffffff
+  output: a0=0x8000000000000000
+mul a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x8000000000000000
+  output: a0=0x0000000000000000
+mul a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+mul a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+mul t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000002000000
+mul zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+mulh a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000000000
+mulh a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x0000000000000002
+  output: a0=0x0000000000000000
+mulh a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x7fffffffffffffff
+  output: a0=0x3fffffffffffffff
+mulh a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0xffffffffffffffff
+mulh a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+mulh a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x0000000000000002
+  output: a0=0xffffffffffffffff
+mulh a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+mulh a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x8000000000000000
+  output: a0=0x4000000000000000
+mulh a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+mulh a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+mulh t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000000000000
+mulh zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+mulhu a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000000000
+mulhu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x0000000000000002
+  output: a0=0x0000000000000000
+mulhu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x7fffffffffffffff
+  output: a0=0x3fffffffffffffff
+mulhu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x7ffffffffffffffe
+mulhu a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0xfffffffffffffffe
+mulhu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x0000000000000002
+  output: a0=0x0000000000000001
+mulhu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0xffffffffffffffff
+  output: a0=0x7fffffffffffffff
+mulhu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x8000000000000000
+  output: a0=0x4000000000000000
+mulhu a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+mulhu a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+mulhu t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000000000000
+mulhu zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+div a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000000002
+div a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x0000000000000002
+  output: a0=0x3fffffffffffffff
+div a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x7fffffffffffffff
+  output: a0=0x0000000000000001
+div a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x8000000000000001
+div a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000001
+div a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x0000000000000002
+  output: a0=0xc000000000000000
+div a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0xffffffffffffffff
+  output: a0=0x8000000000000000
+div a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x8000000000000000
+  output: a0=0x0000000000000001
+div a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+div a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+div t4, t5, t6 ::
+  inputs: t5=0x0000000000005000, t6=0x0000000000002000
+  output: t4=0x0000000000000002
+div zero, a0, a1 ::
+  inputs: a0=0x0000000000005000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+divu a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000000002
+divu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x0000000000000002
+  output: a0=0x3fffffffffffffff
+divu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x7fffffffffffffff
+  output: a0=0x0000000000000001
+divu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+divu a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000001
+divu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x0000000000000002
+  output: a0=0x4000000000000000
+divu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+divu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x8000000000000000
+  output: a0=0x0000000000000001
+divu a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+divu a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+divu t4, t5, t6 ::
+  inputs: t5=0x0000000000005000, t6=0x0000000000002000
+  output: t4=0x0000000000000002
+divu zero, a0, a1 ::
+  inputs: a0=0x0000000000005000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+rem a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000001000
+rem a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x0000000000000002
+  output: a0=0x0000000000000001
+rem a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x7fffffffffffffff
+  output: a0=0x0000000000000000
+rem a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+rem a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+rem a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x0000000000000002
+  output: a0=0x0000000000000000
+rem a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+rem a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x8000000000000000
+  output: a0=0x0000000000000000
+rem a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0x0000000000000001
+rem a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+rem t4, t5, t6 ::
+  inputs: t5=0x0000000000005000, t6=0x0000000000002000
+  output: t4=0x0000000000001000
+rem zero, a0, a1 ::
+  inputs: a0=0x0000000000005000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+remu a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000001000
+remu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x0000000000000002
+  output: a0=0x0000000000000001
+remu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0x7fffffffffffffff
+  output: a0=0x0000000000000000
+remu a0, a1, a2 ::
+  inputs: a1=0x7fffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x7fffffffffffffff
+remu a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+remu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x0000000000000002
+  output: a0=0x0000000000000000
+remu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0xffffffffffffffff
+  output: a0=0x8000000000000000
+remu a0, a1, a2 ::
+  inputs: a1=0x8000000000000000, a2=0x8000000000000000
+  output: a0=0x0000000000000000
+remu a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0x0000000000000001
+remu a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+remu t4, t5, t6 ::
+  inputs: t5=0x0000000000005000, t6=0x0000000000002000
+  output: t4=0x0000000000001000
+remu zero, a0, a1 ::
+  inputs: a0=0x0000000000005000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+
+RV64M multiplication and division instruction set, additions
+mulw a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x000000000a000000
+mulw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x0000000000000002
+  output: a0=0xfffffffffffffffe
+mulw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x000000007fffffff
+  output: a0=0x0000000000000001
+mulw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0xffffffffffffffff
+  output: a0=0xffffffff80000001
+mulw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000001
+mulw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000000000002
+  output: a0=0x0000000000000000
+mulw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0xffffffffffffffff
+  output: a0=0xffffffff80000000
+mulw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000080000000
+  output: a0=0x0000000000000000
+mulw a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+mulw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0x0000000000000000
+mulw t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000002000000
+mulw zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+divw a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000000002
+divw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x0000000000000002
+  output: a0=0x000000003fffffff
+divw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x000000007fffffff
+  output: a0=0x0000000000000001
+divw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0xffffffffffffffff
+  output: a0=0xffffffff80000001
+divw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000001
+divw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000000000002
+  output: a0=0xffffffffc0000000
+divw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0xffffffffffffffff
+  output: a0=0xffffffff80000000
+divw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000080000000
+  output: a0=0x0000000000000001
+divw a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+divw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+divw t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000000000000
+divw zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+divuw a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000000002
+divuw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x0000000000000002
+  output: a0=0x000000003fffffff
+divuw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x000000007fffffff
+  output: a0=0x0000000000000001
+divuw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+divuw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000001
+divuw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000000000002
+  output: a0=0x0000000040000000
+divuw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+divuw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000080000000
+  output: a0=0x0000000000000001
+divuw a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+divuw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+divuw t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000000000000
+divuw zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+remw a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000001000
+remw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x0000000000000002
+  output: a0=0x0000000000000001
+remw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x000000007fffffff
+  output: a0=0x0000000000000000
+remw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+remw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+remw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000000000002
+  output: a0=0x0000000000000000
+remw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+remw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000080000000
+  output: a0=0x0000000000000000
+remw a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0x0000000000000001
+remw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+remw t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000000001000
+remw zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
+remuw a0, a1, a2 ::
+  inputs: a1=0x0000000000005000, a2=0x0000000000002000
+  output: a0=0x0000000000001000
+remuw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x0000000000000002
+  output: a0=0x0000000000000001
+remuw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0x000000007fffffff
+  output: a0=0x0000000000000000
+remuw a0, a1, a2 ::
+  inputs: a1=0x000000007fffffff, a2=0xffffffffffffffff
+  output: a0=0x000000007fffffff
+remuw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0xffffffffffffffff
+  output: a0=0x0000000000000000
+remuw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000000000002
+  output: a0=0x0000000000000000
+remuw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0xffffffffffffffff
+  output: a0=0xffffffff80000000
+remuw a0, a1, a2 ::
+  inputs: a1=0x0000000080000000, a2=0x0000000080000000
+  output: a0=0x0000000000000000
+remuw a0, a1, a2 ::
+  inputs: a1=0x0000000000000001, a2=0x0000000000000000
+  output: a0=0x0000000000000001
+remuw a0, a1, a2 ::
+  inputs: a1=0xffffffffffffffff, a2=0x0000000000000000
+  output: a0=0xffffffffffffffff
+remuw t4, t5, t6 ::
+  inputs: t5=0x0000000000001000, t6=0x0000000000002000
+  output: t4=0x0000000000001000
+remuw zero, a0, a1 ::
+  inputs: a0=0x0000000000001000, a1=0x0000000000002000
+  output: zero=0x0000000000000000
diff --git a/none/tests/riscv64/muldiv.vgtest b/none/tests/riscv64/muldiv.vgtest
new file mode 100644
index 000000000..a62a02b23
--- /dev/null
+++ b/none/tests/riscv64/muldiv.vgtest
@@ -0,0 +1,2 @@
+prog: muldiv
+vgopts: -q
diff --git a/none/tests/riscv64/testinst.h b/none/tests/riscv64/testinst.h
new file mode 100644
index 000000000..aa4a250bc
--- /dev/null
+++ b/none/tests/riscv64/testinst.h
@@ -0,0 +1,758 @@
+#include "tests/malloc.h"
+#include <stdbool.h>
+#include <stdio.h>
+
+/* Helper functions. */
+
+static inline unsigned char rand_uchar(void)
+{
+   static unsigned int seed = 80021;
+
+   seed = 1103515245 * seed + 12345;
+   return (seed >> 17) & 0xFF;
+}
+
+static void show_block_diff(unsigned char* block1,
+                            unsigned char* block2,
+                            size_t         n,
+                            size_t         offset)
+{
+   bool block_changed = false;
+   for (size_t i = 0; i < n; i += 16) {
+      bool line_changed = false;
+      for (size_t j = i; j < n && j < i + 16; j++) {
+         if (block1[j] != block2[j]) {
+            line_changed = true;
+            break;
+         }
+      }
+      if (!line_changed)
+         continue;
+
+      if (i < offset)
+         printf("  [-%03zu] ", offset - i);
+      else
+         printf("  [+%03zu] ", i - offset);
+      for (size_t j = i; j < n && j < i + 16; j++) {
+         unsigned char diff = block1[j] - block2[j];
+         if (diff == 0)
+            printf(" ..");
+         else
+            printf(" %02x", block2[j]);
+      }
+      printf("\n");
+
+      block_changed = true;
+   }
+   if (!block_changed)
+      printf("  no memory changes\n");
+}
+
+/* Macros for testing individual instructions
+
+   Naming is in form TESTINST_<#outputs>_<#inputs>_<suffix-id>.
+
+   Environment to test each instruction is set up by a carefully crafted inline
+   assembly. The code implements own handling of input and output operands
+   which most importantly allows also use of the sp register as an instruction
+   operand. Register t1 is reserved for this purpose and must be avoided in
+   instruction tests.
+ */
+
+/* Disable clang-format for the test macros because it would mess up the inline
+   assembly. */
+/* clang-format off */
+
+#define ASMINST_2(instruction)                                                 \
+   ".option push;"                                                             \
+   ".option rvc;"                                                              \
+   instruction ";"                                                             \
+   ".option pop"
+
+#define ASMINST_4(instruction)                                                 \
+   ".option push;"                                                             \
+   ".option norvc;"                                                            \
+   instruction ";"                                                             \
+   ".option pop"
+
+#define TESTINST_0_0(length, instruction)                                      \
+   {                                                                           \
+      __asm__ __volatile__(ASMINST_##length(instruction));                     \
+      printf("%s ::\n", instruction);                                          \
+   }
+
+#define TESTINST_1_0(length, instruction, rd)                                  \
+   {                                                                           \
+      unsigned long w[1 /*out*/ + 1 /*spill*/] = {0, 0};                       \
+      /* w[0] = output rd value                                                \
+         w[1] = spill slot for rd                                              \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         "sd " #rd ", 8(%[w]);"        /* Spill rd. */                         \
+         ASMINST_##length(instruction) ";"                                     \
+         "sd " #rd ", 0(%[w]);"        /* Save result of the operation. */     \
+         "ld " #rd ", 8(%[w]);"        /* Reload rd. */                        \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "memory");                                                          \
+      printf("%s ::\n", instruction);                                          \
+      printf("  output: %s=0x%016lx\n", #rd, w[0]);                            \
+   }
+
+#define TESTINST_1_1(length, instruction, rs1_val, rd, rs1)                    \
+   {                                                                           \
+      unsigned long w[1 /*out*/ + 1 /*in*/ + 2 /*spill*/] = {                  \
+         0, (unsigned long)rs1_val, 0, 0};                                     \
+      /* w[0] = output rd value                                                \
+         w[1] = input rs1 value                                                \
+         w[2] = spill slot for rd                                              \
+         w[3] = spill slot for rs1                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         "sd " #rd ", 16(%[w]);"       /* Spill rd. */                         \
+         "sd " #rs1 ", 24(%[w]);"      /* Spill rs1. */                        \
+         "ld " #rs1 ", 8(%[w]);"       /* Load the first input. */             \
+         ASMINST_##length(instruction) ";"                                     \
+         "sd " #rd ", 0(%[w]);"        /* Save result of the operation. */     \
+         "ld " #rd ", 16(%[w]);"       /* Reload rd. */                        \
+         "ld " #rs1 ", 24(%[w]);"      /* Reload rs1. */                       \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "memory");                                                          \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=0x%016lx\n", #rs1, (unsigned long)rs1_val);         \
+      printf("  output: %s=0x%016lx\n", #rd, w[0]);                            \
+   }
+
+#define TESTINST_1_2(length, instruction, rs1_val, rs2_val, rd, rs1, rs2)      \
+   {                                                                           \
+      unsigned long w[1 /*out*/ + 2 /*in*/ + 3 /*spill*/] = {                  \
+         0, (unsigned long)rs1_val, (unsigned long)rs2_val, 0, 0, 0};          \
+      /* w[0] = output rd value                                                \
+         w[1] = input rs1 value                                                \
+         w[2] = input rs2 value                                                \
+         w[3] = spill slot for rd                                              \
+         w[4] = spill slot for rs1                                             \
+         w[5] = spill slot for rs2                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         "sd " #rd ", 24(%[w]);"       /* Spill rd. */                         \
+         "sd " #rs1 ", 32(%[w]);"      /* Spill rs1. */                        \
+         "sd " #rs2 ", 40(%[w]);"      /* Spill rs2. */                        \
+         "ld " #rs1 ", 8(%[w]);"       /* Load the first input. */             \
+         "ld " #rs2 ", 16(%[w]);"      /* Load the second input. */            \
+         ASMINST_##length(instruction) ";"                                     \
+         "sd " #rd ", 0(%[w]);"        /* Save result of the operation. */     \
+         "ld " #rd ", 24(%[w]);"       /* Reload rd. */                        \
+         "ld " #rs1 ", 32(%[w]);"      /* Reload rs1. */                       \
+         "ld " #rs2 ", 40(%[w]);"      /* Reload rs2. */                       \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "memory");                                                          \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=0x%016lx, %s=0x%016lx\n", #rs1,                     \
+             (unsigned long)rs1_val, #rs2, (unsigned long)rs2_val);            \
+      printf("  output: %s=0x%016lx\n", #rd, w[0]);                            \
+   }
+
+#define TYPED_LOAD(length, instruction, rd, rs1, ipre)                         \
+   {                                                                           \
+      const size_t   N     = 4096;                                             \
+      unsigned char* area  = memalign16(N);                                    \
+      unsigned char* area2 = memalign16(N);                                    \
+      for (size_t i = 0; i < N; i++)                                           \
+         area[i] = area2[i] = rand_uchar();                                    \
+      unsigned long w[1 /*out*/ + 1 /*in*/ + 2 /*spill*/] = {                  \
+         0, (unsigned long)(area2 + N / 2), 0, 0};                             \
+      /* w[0] = output rd value                                                \
+         w[1] = input rs1 value                                                \
+         w[2] = spill slot for rd                                              \
+         w[3] = spill slot for rs1                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         ipre "sd " #rd ", 16(%[w]);"  /* Spill rd. */                         \
+         "sd " #rs1 ", 24(%[w]);"      /* Spill rs1. */                        \
+         "ld " #rs1 ", 8(%[w]);"       /* Load the first input. */             \
+         ASMINST_##length(instruction) ";"                                     \
+         ipre "sd " #rd ", 0(%[w]);"   /* Save result of the operation. */     \
+         ipre "ld " #rd ", 16(%[w]);"  /* Reload rd. */                        \
+         "ld " #rs1 ", 24(%[w]);"      /* Reload rs1. */                       \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "memory");                                                          \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=&area_mid\n", #rs1);                                \
+      printf("  output: %s=0x%016lx\n", #rd, w[0]);                            \
+      show_block_diff(area, area2, N, N / 2);                                  \
+      free(area);                                                              \
+      free(area2);                                                             \
+   }
+
+#define TESTINST_1_1_LOAD(length, instruction, rd, rs1)                        \
+   TYPED_LOAD(length, instruction, rd, rs1, "")
+
+#define TESTINST_1_1_FLOAD(length, instruction, rd, rs1)                       \
+   TYPED_LOAD(length, instruction, rd, rs1, "f")
+
+#define TYPED_STORE(length, instruction, rs2_val, rs2, rs1, ipre)              \
+   {                                                                           \
+      const size_t   N     = 4096;                                             \
+      unsigned char* area  = memalign16(N);                                    \
+      unsigned char* area2 = memalign16(N);                                    \
+      for (size_t i = 0; i < N; i++)                                           \
+         area[i] = area2[i] = rand_uchar();                                    \
+      unsigned long w[2 /*in*/ + 2 /*spill*/] = {                              \
+         (unsigned long)rs2_val, (unsigned long)(area2 + N / 2), 0, 0};        \
+      /* w[0] = input rs2 value                                                \
+         w[1] = input rs1 value                                                \
+         w[2] = spill slot for rs2                                             \
+         w[3] = spill slot for rs1                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         ipre "sd " #rs2 ", 16(%[w]);" /* Spill rs2. */                        \
+         "sd " #rs1 ", 24(%[w]);"      /* Spill rs1. */                        \
+         ipre "ld " #rs2 ", 0(%[w]);"  /* Load the first input. */             \
+         "ld " #rs1 ", 8(%[w]);"       /* Load the second input. */            \
+         ASMINST_##length(instruction) ";"                                     \
+         ipre "ld " #rs2 ", 16(%[w]);" /* Reload rs2. */                       \
+         "ld " #rs1 ", 24(%[w]);"      /* Reload rs1. */                       \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "memory");                                                          \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=0x%016lx, %s=&area_mid\n", #rs2,                    \
+             (unsigned long)rs2_val, #rs1);                                    \
+      show_block_diff(area, area2, N, N / 2);                                  \
+      free(area);                                                              \
+      free(area2);                                                             \
+   }
+
+#define TESTINST_0_2_STORE(length, instruction, rs2_val, rs2, rs1)             \
+   TYPED_STORE(length, instruction, rs2_val, rs2, rs1, "")
+
+#define TESTINST_0_2_FSTORE(length, instruction, rs2_val, rs2, rs1)            \
+   TYPED_STORE(length, instruction, rs2_val, rs2, rs1, "f")
+
+#define TESTINST_2_1_LRSC(length, lr_instruction, sc_instruction, lr_rd,       \
+                          sc_rd, rs1)                                          \
+   {                                                                           \
+      const size_t   N     = 32;                                               \
+      unsigned char* area  = memalign16(N);                                    \
+      unsigned char* area2 = memalign16(N);                                    \
+      for (size_t i = 0; i < N; i++)                                           \
+         area2[i] = rand_uchar();                                              \
+      unsigned long w[4 /*out*/ + 1 /*in*/ + 3 /*spill*/];                     \
+      /* w[0] = output lr_rd value                                             \
+         w[1] = modded lr_rd value                                             \
+         w[2] = output sc_rd value, first instruction                          \
+         w[3] = output sc_rd value, second instruction                         \
+         w[4] = address of the area midpoint                                   \
+         w[5] = spill slot for lr_rd                                           \
+         w[6] = spill slot for sc_rd                                           \
+         w[7] = spill slot for rs1                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      do {                                                                     \
+         w[0] = w[1] = w[2] = w[3] = w[5] = w[6] = w[7] = 0;                   \
+         w[4] = (unsigned long)(area2 + N / 2);                                \
+         for (size_t i = 0; i < N; i++)                                        \
+            area[i] = area2[i];                                                \
+         __asm__ __volatile__(                                                 \
+            "sd " #lr_rd ", 40(%[w]);" /* Spill lr_rd. */                      \
+            "sd " #sc_rd ", 48(%[w]);" /* Spill sc_rd. */                      \
+            "sd " #rs1 ", 56(%[w]);"   /* Spill rs1. */                        \
+            "ld " #rs1 ", 32(%[w]);"   /* Load the first input. */             \
+            /* Perform a load and create a reservation. */                     \
+            ASMINST_##length(lr_instruction) ";"                               \
+            "mv t2, " #lr_rd ";"       /* Record the loaded value. */          \
+            /* Store a negated value which should succeed. */                  \
+            "not " #lr_rd ", " #lr_rd ";" /* Modify the loaded value. */       \
+            ASMINST_##length(sc_instruction) ";"                               \
+            "sd t2, 0(%[w]);"          /* Save result of the lr operation. */  \
+            "sd " #lr_rd" , 8(%[w]);"  /* Save result of the not operation. */ \
+            "sd " #sc_rd ", 16(%[w]);" /* Save result of the sc operation. */  \
+            /* Store back the original value which should now fail. */         \
+            "mv " #lr_rd ", t2;"       /* Get the original value. */           \
+            ASMINST_##length(sc_instruction) ";"                               \
+            "sd " #sc_rd ", 24(%[w]);" /* Save result of the sc operation. */  \
+            "ld " #lr_rd ", 40(%[w]);" /* Reload lr_rd. */                     \
+            "ld " #sc_rd ", 48(%[w]);" /* Reload sc_rd. */                     \
+            "ld " #rs1 ", 56(%[w]);"   /* Reload rs1. */                       \
+            :                                                                  \
+            : [w] "r"(t1)                                                      \
+            : "t2", "memory");                                                 \
+         /* Re-run the test in case it happens that the first sc instruction   \
+            unexpectedly fails. */                                             \
+      } while (w[2] != 0);                                                     \
+      printf("%s ::\n", lr_instruction);                                       \
+      printf("  inputs: %s=&area_mid\n", #rs1);                                \
+      printf("  output: %s=0x%016lx\n", #lr_rd, w[0]);                         \
+      printf("%s ::\n", sc_instruction);                                       \
+      printf("  inputs: %s=&area_mid, %s=0x%016lx\n", #rs1, #lr_rd, w[1]);     \
+      printf("  output: %s=0x%016lx\n", #sc_rd, w[2]);                         \
+      show_block_diff(area, area2, N, N / 2);                                  \
+      printf("%s ::\n", sc_instruction);                                       \
+      printf("  inputs: %s=&area_mid, %s=0x%016lx\n", #rs1, #lr_rd, w[0]);     \
+      printf("  output: %s=0x%016lx\n", #sc_rd, w[3]);                         \
+      free(area);                                                              \
+      free(area2);                                                             \
+   }
+
+#define TESTINST_1_2_AMOX(length, instruction, rs2_val, rd, rs2, rs1)          \
+   {                                                                           \
+      const size_t   N     = 32;                                               \
+      unsigned char* area  = memalign16(N);                                    \
+      unsigned char* area2 = memalign16(N);                                    \
+      for (size_t i = 0; i < N; i++)                                           \
+         area[i] = area2[i] = rand_uchar();                                    \
+      unsigned long w[1 /*out*/ + 2 /*in*/ + 3 /*spill*/] = {                  \
+         0, (unsigned long)rs2_val, (unsigned long)(area2 + N / 2), 0, 0, 0};  \
+      /* w[0] = output rd value                                                \
+         w[1] = input rs2 value                                                \
+         w[2] = address of the area midpoint                                   \
+         w[3] = spill slot for rd                                              \
+         w[4] = spill slot for rs2                                             \
+         w[5] = spill slot for rs1                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         "sd " #rd ", 24(%[w]);"       /* Spill rd. */                         \
+         "sd " #rs2 ", 32(%[w]);"      /* Spill rs2. */                        \
+         "sd " #rs1 ", 40(%[w]);"      /* Spill rs1. */                        \
+         "ld " #rs2 ", 8(%[w]);"       /* Load the first input. */             \
+         "ld " #rs1 ", 16(%[w]);"      /* Load the second input. */            \
+         ASMINST_##length(instruction) ";"                                     \
+         "sd " #rd ", 0(%[w]);"        /* Save result of the operation. */     \
+         "ld " #rd ", 24(%[w]);"       /* Reload rd. */                        \
+         "ld " #rs2 ", 32(%[w]);"      /* Reload rs2. */                       \
+         "ld " #rs1 ", 40(%[w]);"      /* Reload rs1. */                       \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "memory");                                                          \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=0x%016lx, %s=&area_mid\n", #rs2,                    \
+             (unsigned long)rs2_val, #rs1);                                    \
+      printf("  output: %s=0x%016lx\n", #rd, w[0]);                            \
+      show_block_diff(area, area2, N, N / 2);                                  \
+      free(area);                                                              \
+      free(area2);                                                             \
+   }
+
+#define DEST_FMT_zero              "0x%016lx"
+#define DEST_DIFF_zero(dest, base) (dest)
+
+#define DEST_FMT_norm              "1f%+ld"
+#define DEST_DIFF_norm(dest, base) ((long)(dest - base))
+#define DEST_FMT_ra                DEST_FMT_norm
+#define DEST_DIFF_ra(dest, base)   DEST_DIFF_norm(dest, base)
+#define DEST_FMT_a0                DEST_FMT_norm
+#define DEST_DIFF_a0(dest, base)   DEST_DIFF_norm(dest, base)
+#define DEST_FMT_t0                DEST_FMT_norm
+#define DEST_DIFF_t0(dest, base)   DEST_DIFF_norm(dest, base)
+#define DEST_FMT_t6                DEST_FMT_norm
+#define DEST_DIFF_t6(dest, base)   DEST_DIFF_norm(dest, base)
+
+#define DEST_FMT_unused              "%s"
+#define DEST_DIFF_unused(dest, base) "unused"
+
+#define TESTINST_1_0_AUIPC(length, instruction, rd)                            \
+   {                                                                           \
+      unsigned long w[2 /*out*/ + 1 /*spill*/] = {0, 0, 0};                    \
+      /* w[0] = output rd value                                                \
+         w[1] = address of the test instruction                                \
+         w[2] = spill slot for rd                                              \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         "sd " #rd ", 16(%[w]);"       /* Spill rd. */                         \
+         "1:;"                                                                 \
+         ASMINST_##length(instruction) ";"                                     \
+         "sd " #rd ", 0(%[w]);"        /* Save result of the operation. */     \
+         "la t2, 1b;"                                                          \
+         "sd t2, 8(%[w]);"             /* Store address of the test instr. */  \
+         "ld " #rd ", 16(%[w]);"       /* Reload rd. */                        \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "t2", "memory");                                                    \
+      printf("%s ::\n", instruction);                                          \
+      printf("  output: %s=" DEST_FMT_##rd "\n", #rd,                          \
+             DEST_DIFF_##rd(w[0], w[1]));                                      \
+   }
+
+#define JMP_RANGE(length, instruction, rs1_val, rs2_val, offset, rd, rs1, rs2) \
+   {                                                                           \
+      unsigned long w[5 /*out*/ + 3 /*spill*/] = {0, 0, 0, 0, 0, 0, 0, 0};     \
+      /* w[0] = output rd value                                                \
+         w[1] = address of the test instruction                                \
+         w[2] = flag that rd is valid                                          \
+         w[3] = flag that rs1 is valid                                         \
+         w[4] = flag that rs2 is valid                                         \
+         w[5] = spill slot for rd                                              \
+         w[6] = spill slot for rs1                                             \
+         w[7] = spill slot for rs2                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         ".if \"" #rd "\" != \"unused\";"                                      \
+         "sd " #rd ", 40(%[w]);"       /* Spill rd. */                         \
+         ".endif;"                                                             \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "sd " #rs1 ", 48(%[w]);"      /* Spill rs1. */                        \
+         "la " #rs1 ", " rs1_val ";"   /* Load the first input. */             \
+         ".endif;"                                                             \
+         ".if \"" #rs2 "\" != \"unused\";"                                     \
+         "sd " #rs2 ", 56(%[w]);"      /* Spill rs2. */                        \
+         "la " #rs2 ", " rs2_val ";"   /* Load the second input. */            \
+         ".endif;"                                                             \
+         "j 1f;"                                                               \
+         ".option push;"                                                       \
+         ".option norvc;"                                                      \
+         /* Generate a target area for negative offset. */                     \
+         ".if " #offset " < 0;"                                                \
+         ".if 4096 + " #offset " > 0; .space 4096 + " #offset "; .endif;"      \
+         "j 2f;"                                                               \
+         ".if -" #offset " - 4 > 0; .space -" #offset " - 4; .endif;"          \
+         ".else;"                                                              \
+         ".space 4096;"                                                        \
+         ".endif;"                                                             \
+         "1:;"                                                                 \
+         ASMINST_##length(instruction) ";"                                     \
+         /* Generate a target area for positive offset. */                     \
+         ".if " #length " == 2; .space 2; .endif;"                             \
+         ".if " #offset " > 0;"                                                \
+         ".if " #offset " - 4 > 0; .space " #offset " - 4; .endif;"            \
+         "j 2f;"                                                               \
+         ".if 4094 - " #offset " > 0; .space 4094 - " #offset "; .endif;"      \
+         ".else;"                                                              \
+         ".space 4094;"                                                        \
+         ".endif;"                                                             \
+         "2:;"                                                                 \
+         ".option pop;"                                                        \
+         ".if \"" #rd "\" != \"unused\";"                                      \
+         "sd " #rd ", 0(%[w]);"        /* Store the output return address. */  \
+         "la t2, 1b;"                                                          \
+         "sd t2, 8(%[w]);"             /* Store address of the test instr. */  \
+         "li t2, 1;"                                                           \
+         "sd t2, 16(%[w]);"            /* Flag that rd is valid. */            \
+         ".endif;"                                                             \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "li t2, 1;"                                                           \
+         "sd t2, 24(%[w]);"            /* Flag that rs1 is valid. */           \
+         ".endif;"                                                             \
+         ".if \"" #rs2 "\" != \"unused\";"                                     \
+         "li t2, 1;"                                                           \
+         "sd t2, 32(%[w]);"            /* Flag that rs2 is valid. */           \
+         ".endif;"                                                             \
+         ".if \"" #rd "\" != \"unused\";"                                      \
+         "ld " #rd ", 40(%[w]);"       /* Reload rd. */                        \
+         ".endif;"                                                             \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "ld " #rs1 ", 48(%[w]);"      /* Reload rs1. */                       \
+         ".endif;"                                                             \
+         ".if \"" #rs2 "\" != \"unused\";"                                     \
+         "ld " #rs2 ", 56(%[w]);"      /* Reload rs2. */                       \
+         ".endif;"                                                             \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "t2", "memory");                                                    \
+      printf("%s ::\n", instruction);                                          \
+      if (w[3] != 0) { /* If rs1 is valid. */                                  \
+         printf("  inputs: %s=%s", #rs1, rs1_val);                             \
+         if (w[4] != 0) /* If rs2 is valid. */                                 \
+            printf(", %s=%s", #rs2, rs2_val);                                  \
+         printf("\n");                                                         \
+      }                                                                        \
+      if (w[2] != 0) /* If rd is valid. */                                     \
+         printf("  output: %s=" DEST_FMT_##rd "\n", #rd,                       \
+                DEST_DIFF_##rd(w[0], w[1]));                                   \
+      printf("  target: reached\n");                                           \
+   }
+
+#define TESTINST_0_0_J_RANGE(length, instruction, offset)                      \
+   JMP_RANGE(length, instruction, "0", "0", offset, unused, unused, unused)
+
+#define TESTINST_0_1_JR_RANGE(length, instruction, rs1_val, offset, rs1)       \
+   JMP_RANGE(length, instruction, rs1_val, "0", offset, unused, rs1, unused)
+
+#define TESTINST_1_0_JAL_RANGE(length, instruction, offset, rd)                \
+   JMP_RANGE(length, instruction, "0", "0", offset, rd, unused, unused)
+
+#define TESTINST_1_1_JALR_RANGE(length, instruction, rs1_val, offset, rd, rs1) \
+   JMP_RANGE(length, instruction, rs1_val, "0", offset, rd, rs1, unused)
+
+#define TESTINST_0_1_BxxZ_RANGE(length, instruction, rs1_val, offset, rs1)     \
+   JMP_RANGE(length, instruction, #rs1_val, "0", offset, unused, rs1, unused)
+
+#define TESTINST_0_2_Bxx_RANGE(length, instruction, rs1_val, rs2_val, offset,  \
+                               rs1, rs2)                                       \
+   JMP_RANGE(length, instruction, #rs1_val, #rs2_val, offset, unused, rs1, rs2)
+
+#define JMP_COND(length, instruction, rs1_val, rs2_val, rs1, rs2)              \
+   {                                                                           \
+      unsigned long w[3 /*out*/ + 2 /*spill*/] = {0, 0, 0, 0, 0};              \
+      /* w[0] = flag that the branch was taken                                 \
+         w[1] = flag that rs1 is valid                                         \
+         w[2] = flag that rs2 is valid                                         \
+         w[3] = spill slot for rs1                                             \
+         w[4] = spill slot for rs2                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         "li t2, 1;"                                                           \
+         "sd t2, 0(%[w]);"             /* Set result to "taken". */            \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "sd " #rs1 ", 24(%[w]);"      /* Spill rs1. */                        \
+         "la " #rs1 ", " rs1_val ";"   /* Load the first input. */             \
+         ".endif;"                                                             \
+         ".if \"" #rs2 "\" != \"unused\";"                                     \
+         "sd " #rs2 ", 32(%[w]);"      /* Spill rs2. */                        \
+         "la " #rs2 ", " rs2_val ";"   /* Load the second input. */            \
+         ".endif;"                                                             \
+         ASMINST_##length(instruction) ";"                                     \
+         "li t2, 0;"                                                           \
+         "sd t2, 0(%[w]);"             /* Set result to "not taken". */        \
+         "1:;"                                                                 \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "li t2, 1;"                                                           \
+         "sd t2, 8(%[w]);"             /* Flag that rs1 is valid. */           \
+         ".endif;"                                                             \
+         ".if \"" #rs2 "\" != \"unused\";"                                     \
+         "li t2, 1;"                                                           \
+         "sd t2, 16(%[w]);"            /* Flag that rs2 is valid. */           \
+         ".endif;"                                                             \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "ld " #rs1 ", 24(%[w]);"      /* Reload rs1. */                       \
+         ".endif;"                                                             \
+         ".if \"" #rs2 "\" != \"unused\";"                                     \
+         "ld " #rs2 ", 32(%[w]);"      /* Reload rs2. */                       \
+         ".endif;"                                                             \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "t2", "memory");                                                    \
+      printf("%s ::\n", instruction);                                          \
+      if (w[1] != 0) { /* If rs1 is valid. */                                  \
+         printf("  inputs: %s=%s", #rs1, rs1_val);                             \
+         if (w[2] != 0) /* If rs2 is valid. */                                 \
+            printf(", %s=%s", #rs2, rs2_val);                                  \
+         printf("\n");                                                         \
+      }                                                                        \
+      printf("  branch: %s\n", w[0] ? "taken" : "not taken");                  \
+   }
+
+#define TESTINST_0_1_BxxZ_COND(length, instruction, rs1_val, rs1)              \
+   JMP_COND(length, instruction, #rs1_val, "0", rs1, unused)
+
+#define TESTINST_0_2_Bxx_COND(length, instruction, rs1_val, rs2_val, rs1, rs2) \
+   JMP_COND(length, instruction, #rs1_val, #rs2_val, rs1, rs2)
+
+#define TYPED_X_X(length, instruction, rs1_val, fcsr_val, rd, rs1, dpre, spre) \
+   {                                                                           \
+      unsigned long w[2 /*out*/ + 2 /*in*/ + 3 /*spill*/] = {                  \
+         0, 0, (unsigned long)rs1_val, (unsigned long)fcsr_val, 0, 0, 0};      \
+      /* w[0] = output rd value                                                \
+         w[1] = output fcsr value                                              \
+         w[2] = input rs1 value                                                \
+         w[3] = input fcsr value                                               \
+         w[4] = spill slot for rd                                              \
+         w[5] = spill slot for fcsr                                            \
+         w[6] = spill slot for rs1                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         dpre "sd " #rd ", 32(%[w]);"  /* Spill rd. */                         \
+         "frcsr t2;"                                                           \
+         "sd t2, 40(%[w]);"            /* Spill fcsr. */                       \
+         spre "sd " #rs1 ", 48(%[w]);" /* Spill rs1. */                        \
+         "ld t2, 24(%[w]);"                                                    \
+         "fscsr t2;"                   /* Load fcsr. */                        \
+         spre "ld " #rs1 ", 16(%[w]);" /* Load the first input. */             \
+         ASMINST_##length(instruction) ";"                                     \
+         dpre "sd " #rd ", 0(%[w]);"   /* Save result of the operation. */     \
+         "frcsr t2;"                                                           \
+         "sd t2, 8(%[w]);"             /* Save fcsr. */                        \
+         "ld t2, 40(%[w]);"                                                    \
+         "fscsr t2;"                   /* Reload fcsr. */                      \
+         dpre "ld " #rd ", 32(%[w]);"  /* Reload rd. */                        \
+         spre "ld " #rs1 ", 48(%[w]);" /* Reload rs1. */                       \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "t2", "memory");                                                    \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=0x%016lx, fcsr=0x%08lx\n", #rs1,                    \
+             (unsigned long)rs1_val, (unsigned long)fcsr_val);                 \
+      printf("  output: %s=0x%016lx, fcsr=0x%08lx\n", #rd, w[0], w[1]);        \
+   }
+
+#define TESTINST_1_1_F(length, instruction, rs1_val, fcsr_val, rd, rs1)        \
+    TYPED_X_X(length, instruction, rs1_val, fcsr_val, rd, rs1, "f", "f")
+
+#define TESTINST_1_1_IF(length, instruction, rs1_val, fcsr_val, rd, rs1)       \
+    TYPED_X_X(length, instruction, rs1_val, fcsr_val, rd, rs1, "", "f")
+
+#define TESTINST_1_1_FI(length, instruction, rs1_val, fcsr_val, rd, rs1)       \
+    TYPED_X_X(length, instruction, rs1_val, fcsr_val, rd, rs1, "f", "")
+
+#define TYPED_X_FF(length, instruction, rs1_val, rs2_val, fcsr_val, rd, rs1,   \
+                   rs2, dpre)                                                  \
+   {                                                                           \
+      unsigned long w[2 /*out*/ + 3 /*in*/ + 4 /*spill*/] = {                  \
+         0, 0, (unsigned long)rs1_val, (unsigned long)rs2_val,                 \
+         (unsigned long)fcsr_val, 0, 0, 0, 0};                                 \
+      /* w[0] = output rd value                                                \
+         w[1] = output fcsr value                                              \
+         w[2] = input rs1 value                                                \
+         w[3] = input rs2 value                                                \
+         w[4] = input fcsr value                                               \
+         w[5] = spill slot for rd                                              \
+         w[6] = spill slot for fcsr                                            \
+         w[7] = spill slot for rs1                                             \
+         w[8] = spill slot for rs2                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         dpre "sd " #rd ", 40(%[w]);"  /* Spill rd. */                         \
+         "frcsr t2;"                                                           \
+         "sd t2, 48(%[w]);"            /* Spill fcsr. */                       \
+         "fsd " #rs1 ", 56(%[w]);"     /* Spill rs1. */                        \
+         "fsd " #rs2 ", 64(%[w]);"     /* Spill rs2. */                        \
+         "ld t2, 32(%[w]);"                                                    \
+         "fscsr t2;"                   /* Load fcsr. */                        \
+         "fld " #rs1 ", 16(%[w]);"     /* Load the first input. */             \
+         "fld " #rs2 ", 24(%[w]);"     /* Load the second input. */            \
+         ASMINST_##length(instruction) ";"                                     \
+         dpre "sd " #rd ", 0(%[w]);"   /* Save result of the operation. */     \
+         "frcsr t2;"                                                           \
+         "sd t2, 8(%[w]);"             /* Save fcsr. */                        \
+         "ld t2, 48(%[w]);"                                                    \
+         "fscsr t2;"                   /* Reload fcsr. */                      \
+         dpre "ld " #rd ", 40(%[w]);"  /* Reload rd. */                        \
+         "fld " #rs1 ", 56(%[w]);"     /* Reload rs1. */                       \
+         "fld " #rs2 ", 64(%[w]);"     /* Reload rs2. */                       \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "t2", "memory");                                                    \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=0x%016lx, %s=0x%016lx, fcsr=0x%08lx\n", #rs1,       \
+             (unsigned long)rs1_val, #rs2, (unsigned long)rs2_val,             \
+             (unsigned long)fcsr_val);                                         \
+      printf("  output: %s=0x%016lx, fcsr=0x%08lx\n", #rd, w[0], w[1]);        \
+   }
+
+#define TESTINST_1_2_F(length, instruction, rs1_val, rs2_val, fcsr_val, rd,    \
+                       rs1, rs2)                                               \
+    TYPED_X_FF(length, instruction, rs1_val, rs2_val, fcsr_val, rd, rs1, rs2,  \
+               "f")
+
+#define TESTINST_1_2_FCMP(length, instruction, rs1_val, rs2_val, fcsr_val, rd, \
+                          rs1, rs2)                                            \
+    TYPED_X_FF(length, instruction, rs1_val, rs2_val, fcsr_val, rd, rs1, rs2,  \
+               "")
+
+#define TESTINST_1_3_F(length, instruction, rs1_val, rs2_val, rs3_val,         \
+                       fcsr_val, rd, rs1, rs2, rs3)                            \
+   {                                                                           \
+      unsigned long w[2 /*out*/ + 4 /*in*/ + 5 /*spill*/] = {                  \
+         0, 0, (unsigned long)rs1_val, (unsigned long)rs2_val,                 \
+         (unsigned long)rs3_val, (unsigned long)fcsr_val, 0, 0, 0, 0, 0};      \
+      /* w[0] = output rd value                                                \
+         w[1] = output fcsr value                                              \
+         w[2] = input rs1 value                                                \
+         w[3] = input rs2 value                                                \
+         w[4] = input rs3 value                                                \
+         w[5] = input fcsr value                                               \
+         w[6] = spill slot for rd                                              \
+         w[7] = spill slot for fcsr                                            \
+         w[8] = spill slot for rs1                                             \
+         w[9] = spill slot for rs2                                             \
+         w[10] = spill slot for rs3                                            \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         "fsd " #rd ", 48(%[w]);"      /* Spill rd. */                         \
+         "frcsr t2;"                                                           \
+         "sd t2, 56(%[w]);"            /* Spill fcsr. */                       \
+         "fsd " #rs1 ", 64(%[w]);"     /* Spill rs1. */                        \
+         "fsd " #rs2 ", 72(%[w]);"     /* Spill rs2. */                        \
+         "fsd " #rs3 ", 80(%[w]);"     /* Spill rs3. */                        \
+         "ld t2, 40(%[w]);"                                                    \
+         "fscsr t2;"                   /* Load fcsr. */                        \
+         "fld " #rs1 ", 16(%[w]);"     /* Load the first input. */             \
+         "fld " #rs2 ", 24(%[w]);"     /* Load the second input. */            \
+         "fld " #rs3 ", 32(%[w]);"     /* Load the third input. */             \
+         ASMINST_##length(instruction) ";"                                     \
+         "fsd " #rd ", 0(%[w]);"       /* Save result of the operation. */     \
+         "frcsr t2;"                                                           \
+         "sd t2, 8(%[w]);"             /* Save fcsr. */                        \
+         "ld t2, 56(%[w]);"                                                    \
+         "fscsr t2;"                   /* Reload fcsr. */                      \
+         "fld " #rd ", 48(%[w]);"      /* Reload rd. */                        \
+         "fld " #rs1 ", 64(%[w]);"     /* Reload rs1. */                       \
+         "fld " #rs2 ", 72(%[w]);"     /* Reload rs2. */                       \
+         "fld " #rs3 ", 80(%[w]);"     /* Reload rs2. */                       \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "t2", "memory");                                                    \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=0x%016lx, %s=0x%016lx, %s=0x%016lx, "               \
+             "fcsr=0x%08lx\n", #rs1, (unsigned long)rs1_val, #rs2,             \
+             (unsigned long)rs2_val, #rs3, (unsigned long)rs3_val,             \
+             (unsigned long)fcsr_val);                                         \
+      printf("  output: %s=0x%016lx, fcsr=0x%08lx\n", #rd, w[0], w[1]);        \
+   }
+
+#define TESTINST_1_1_CSR(length, instruction, csr_val, rs1_val, rd, csr, rs1)  \
+   {                                                                           \
+      unsigned long w[2 /*out*/ + 2 /*in*/ + 3 /*spill*/] = {                  \
+         0, 0, (unsigned long)csr_val, (unsigned long)rs1_val, 0, 0, 0};       \
+      /* w[0] = output rd value                                                \
+         w[1] = output csr value                                               \
+         w[2] = input csr value                                                \
+         w[3] = input rs1 value                                                \
+         w[4] = spill slot for rd                                              \
+         w[5] = spill slot for csr                                             \
+         w[6] = spill slot for rs1                                             \
+       */                                                                      \
+      register unsigned long* t1 asm("t1") = w;                                \
+      __asm__ __volatile__(                                                    \
+         ".if \"" #rd "\" != \"unused\";"                                      \
+         "sd " #rd ", 32(%[w]);"       /* Spill rd. */                         \
+         ".endif;"                                                             \
+         "csrr t2, " #csr ";"                                                  \
+         "sd t2, 40(%[w]);"            /* Spill csr. */                        \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "sd " #rs1 ", 48(%[w]);"      /* Spill rs1. */                        \
+         ".endif;"                                                             \
+         "ld t2, 16(%[w]);"                                                    \
+         "csrw " #csr ", t2;"          /* Load csr. */                         \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "ld " #rs1 ", 24(%[w]);"      /* Load the first input. */             \
+         ".endif;"                                                             \
+         ASMINST_##length(instruction) ";"                                     \
+         ".if \"" #rd "\" != \"unused\";"                                      \
+         "sd " #rd ", 0(%[w]);"        /* Save result of the operation. */     \
+         ".endif;"                                                             \
+         "csrr t2, " #csr ";"                                                  \
+         "sd t2, 8(%[w]);"             /* Save csr. */                         \
+         "ld t2, 40(%[w]);"                                                    \
+         "csrw " #csr ", t2;"          /* Reload csr. */                       \
+         ".if \"" #rd "\" != \"unused\";"                                      \
+         "ld " #rd ", 32(%[w]);"       /* Reload rd. */                        \
+         ".endif;"                                                             \
+         ".if \"" #rs1 "\" != \"unused\";"                                     \
+         "ld " #rs1 ", 48(%[w]);"      /* Reload rs1. */                       \
+         ".endif;"                                                             \
+         :                                                                     \
+         : [w] "r"(t1)                                                         \
+         : "t2", "memory");                                                    \
+      printf("%s ::\n", instruction);                                          \
+      printf("  inputs: %s=0x%016lx, %s=0x%016lx\n", #rs1,                     \
+             (unsigned long)rs1_val, #csr, (unsigned long)csr_val);            \
+      printf("  output: %s=0x%016lx, %s=0x%016lx\n", #rd, w[0], #csr, w[1]);   \
+   }
+
+/* clang-format on */
diff --git a/tests/arch_test.c b/tests/arch_test.c
index 4dbb8ca10..84b1f1307 100644
--- a/tests/arch_test.c
+++ b/tests/arch_test.c
@@ -34,6 +34,7 @@ char* all_archs[] = {
    "mips32",
    "mips64",
    "nanomips",
+   "riscv64",
    NULL
 };
 
@@ -79,6 +80,10 @@ static Bool go(char* arch)
 
 #elif defined(VGP_nanomips_linux)
    if ( 0 == strcmp( arch, "nanomips" ) ) return True;
+
+#elif defined(VGP_riscv64_linux)
+   if ( 0 == strcmp( arch, "riscv64" ) ) return True;
+
 #else
 #  error Unknown platform
 #endif   // VGP_*
diff --git a/tests/check_headers_and_includes b/tests/check_headers_and_includes
index 70e134896..8e92001b6 100755
--- a/tests/check_headers_and_includes
+++ b/tests/check_headers_and_includes
@@ -69,6 +69,7 @@ my %dirs_to_ignore = (
     "tests" => 1,
     "gdbserver_tests" => 1,
     "mpi" => 1,
+    "riscv64" => 1,
     "solaris" => 1
     );
 
diff --git a/tests/platform_test b/tests/platform_test
index c23a4f645..9762d0c09 100644
--- a/tests/platform_test
+++ b/tests/platform_test
@@ -14,6 +14,7 @@ all_platforms=
 all_platforms="$all_platforms x86-linux amd64-linux ppc32-linux ppc64-linux"
 all_platforms="$all_platforms arm-linux arm64-linux"
 all_platforms="$all_platforms s390x-linux mips32-linux mips64-linux"
+all_platforms="$all_platforms riscv64-linux"
 all_platforms="$all_platforms x86-darwin amd64-darwin"
 all_platforms="$all_platforms x86-solaris amd64-solaris"
 all_platforms="$all_platforms x86-freebsd amd64-freebsd"

Places

File riscv.patch of Package valgrind

Places