Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
devel:languages:lua
luajit
luajit-s390x.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File luajit-s390x.patch of Package luajit
From dee73f516f0da49e930dcfa1dd61720dcb69b7dd Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich <iii@linux.ibm.com> Date: Thu, 31 Aug 2023 11:18:32 +0200 Subject: [PATCH 1/2] Add s390x architecture support to DynASM s390x (IBM Z) is an architecture of server computers produced by IBM. It is supported by a number of open source code generators, such as GCC, LLVM, OpenJDK, eBPF, QEMU, Valgrind and Cranelift. One of the missing pieces in the ecosystem support is LuaJIT. The s390x support for LuaJIT was initially developed by @ketank-new, @mundaym and @niravthakkar. It found its way into moonjit and luajit2 forks, as well as Fedora distro (as a patch). There were also smaller contributions by @preetikhorjuvenkar, @Bisht13, @velemas, @AlekseiNikiforovIBM, and @iii-i. This is a cumulative patch of the DynASM changes from this work. It contains all the contributions squashed together, plus minor stylistic cleanups. Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> --- dynasm/dasm_s390x.h | 546 ++++++ dynasm/dasm_s390x.lua | 1634 ++++++++++++++++++ src/Makefile | 4 src/host/buildvm.c | 2 src/host/buildvm_asm.c | 53 src/jit/bcsave.lua | 1 src/jit/dis_s390x.lua | 1 src/lib_jit.c | 2 src/lj_arch.h | 19 src/lj_asm.c | 2 src/lj_ccall.c | 50 src/lj_ccall.h | 15 src/lj_ccallback.c | 9 src/lj_err.c | 3 src/lj_frame.h | 14 src/lj_target.h | 2 src/lj_target_s390x.h | 80 src/vm_s390x.dasc | 4269 +++++++++++++++++++++++++++++++++++++++++++++++++ 18 files changed, 6706 insertions(+) create mode 100644 dynasm/dasm_s390x.h create mode 100644 dynasm/dasm_s390x.lua --- /dev/null +++ b/dynasm/dasm_s390x.h @@ -0,0 +1,546 @@ +/* +** DynASM s390x encoding engine. +** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +#define DASM_ARCH "s390x" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, + DASM_DISP12, DASM_DISP20, + DASM_IMM8, DASM_IMM16, DASM_IMM32, + DASM_LEN8R,DASM_LEN4HR,DASM_LEN4LR, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned short *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) + DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) + DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList) actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) + memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = + (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned short ins = *p++; + unsigned short action = ins; + if (action >= DASM__MAX) { + ofs += 2; + continue; + } + + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: + goto stop; + case DASM_SECTION: + n = *p++ & 255; + CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; + goto stop; + case DASM_ESC: + p++; + ofs += 2; + break; + case DASM_REL_EXT: + p++; + ofs += 4; + break; + case DASM_ALIGN: + ofs += *p++; + b[pos++] = ofs; + break; + case DASM_REL_LG: + if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */ + ofs += 2; + } + n = *p++ - 10; + pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { + CK(n >= 10 || *pl < 0, RANGE_LG); + CKPL(lg, LG); + goto putrel; + } + pl += 10; + n = *pl; + if (n < 0) + n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */ + ofs += 2; + } + pl = D->pclabels + n; + CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + ofs += 2; + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + *p++ - 10; + CKPL(lg, LG); + goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; + CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { + int *pb = DASM_POS2PTR(D, n); + n = *pb; + *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM8: + b[pos++] = n; + break; + case DASM_IMM16: + CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I); /* TODO: is this the right way to handle unsigned immediates? */ + ofs += 2; + b[pos++] = n; + break; + case DASM_IMM32: + ofs += 4; + b[pos++] = n; + break; + case DASM_DISP20: + CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I); + b[pos++] = n; + break; + case DASM_DISP12: + CK((n >> 12) == 0, RANGE_I); + b[pos++] = n; + break; + case DASM_LEN8R: + CK(n >= 1 && n <= 256, RANGE_I); + b[pos++] = n; + break; + case DASM_LEN4HR: + case DASM_LEN4LR: + CK(n >= 1 && n <= 128, RANGE_I); + b[pos++] = n; + break; + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} + +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t * szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) + return D->status; + { + int pc; + for (pc = 0; pc * sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) + return DASM_S_UNDEF_PC | pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { + int *pb = DASM_POS2PTR(D, n); + n = *pb; + *pb = -idx; + } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned short ins = *p++; + unsigned short action = ins; + switch (action) { + case DASM_STOP: + case DASM_SECTION: + goto stop; + case DASM_ESC: + p++; + break; + case DASM_REL_EXT: + p++; + break; + case DASM_ALIGN: + ofs -= (b[pos++] + ofs) & *p++; + break; + case DASM_REL_LG: + case DASM_REL_PC: + p++; + pos++; + break; + case DASM_LABEL_LG: + case DASM_LABEL_PC: + p++; + b[pos++] += ofs; + break; + case DASM_IMM8: + case DASM_IMM16: + case DASM_IMM32: + case DASM_DISP20: + case DASM_DISP12: + case DASM_LEN8R: + case DASM_LEN4HR: + case DASM_LEN4LR: + pos++; + break; + } + } + stop:(void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned short *cp = (unsigned short *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned short ins = *p++; + unsigned short action = ins; + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: + case DASM_SECTION: + goto stop; + case DASM_ESC: + *cp++ = *p++; + break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, *p++, 1) - 4; + goto patchrel; + case DASM_ALIGN: + ins = *p++; + /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */ + while ((((char *)cp - base) & ins)) + *cp++ = 0x0700; /* nop */ + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); + p++; /* skip argument */ + patchrel: + /* Offsets are halfword aligned (so need to be halved). */ + n += 2; /* Offset is relative to start of instruction. */ + if (cp[-1] >> 12 == 0xc) { + *cp++ = n >> 17; + } else { + CK(-(1 << 16) <= n && n < (1 << 16) && (n & 1) == 0, RANGE_LG); + } + *cp++ = n >> 1; + break; + case DASM_LABEL_LG: + ins = *p++; + if (ins >= 20) + D->globals[ins - 10] = (void *)(base + n); + break; + case DASM_LABEL_PC: + break; + case DASM_IMM8: + cp[-1] |= n & 0xff; + break; + case DASM_IMM16: + *cp++ = n; + break; + case DASM_IMM32: + *cp++ = n >> 16; + *cp++ = n; + break; + case DASM_DISP20: + cp[-2] |= n & 0xfff; + cp[-1] |= (n >> 4) & 0xff00; + break; + case DASM_DISP12: + cp[-1] |= n & 0xfff; + break; + case DASM_LEN8R: + cp[-1] |= (n - 1) & 0xff; + break; + case DASM_LEN4HR: + cp[-1] |= ((n - 1) << 4) & 0xf0; + break; + case DASM_LEN4LR: + cp[-1] |= (n - 1) & 0x0f; + break; + default: + *cp++ = ins; + break; + } + } + stop:(void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} + +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc * sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) + return *DASM_POS2PTR(D, -pos); + if (pos > 0) + return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { + D->status = DASM_S_UNDEF_LG | i; + break; + } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC | (D->section - D->sections); + return D->status; +} +#endif --- /dev/null +++ b/dynasm/dasm_s390x.lua @@ -0,0 +1,1634 @@ +------------------------------------------------------------------------------ +-- DynASM s390x module. +-- +-- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "s390x", + description = "DynASM s390x module", + version = "1.4.0", + vernum = 10400, + release = "2015-10-18", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex = bit.ror, bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM8", "IMM16", "IMM32", "LEN8R","LEN4HR","LEN4LR", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +local max_action = 0 +for n, name in ipairs(action_names) do + map_action[name] = n-1 + max_action = n +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n, name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +local function havearg(a) + return a == "ESC" or + a == "SECTION" or + a == "REL_LG" or + a == "LABEL_LG" or + a == "REL_EXT" +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned short ", name, "[", nn, "] = {") + local esc = false -- also need to escape for action arguments + for i = 1, nn do + assert(out:write("\n 0x", sub(tohex(actlist[i]), 5, 8))) + if i ~= nn then assert(out:write(",")) end + local name = action_names[actlist[i]+1] + if not esc and name then + assert(out:write(" /* ", name, " */")) + esc = havearg(name) + else + esc = false + end + end + assert(out:write("\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add halfword to action list. +local function wputxhw(n) + assert(n >= 0 and n <= 0xffff, "halfword out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxhw(w) + if val then wputxhw(val) end -- Not sure about this, do we always have one arg? + if a then actargs[#actargs+1] = a end + if val or a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped halfword. +local function wputhw(n) + if n <= max_action then waction("ESC") end + wputxhw(n) +end + +-- Reserve position for halfword. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20, next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20, next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20, next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0, next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0, next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +-- Ext. register name -> int. name. +local map_archdef = { sp = "r15" } + +-- Int. register name -> ext. name. +local map_reg_rev = { r15 = "sp" } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_cond = { + o = 1, h = 2, nle = 3, l = 4, + nhe = 5, lh = 6, ne = 7, e = 8, + nlh = 9, he = 10, nl = 11, le = 12, + nh = 13, no = 14, [""] = 15, +} + +------------------------------------------------------------------------------ + +local function parse_reg(expr) + if not expr then werror("expected register name") end + local tname, ovreg = match(expr, "^([%w_]+):(r1?%d)$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^[rf](1?%d)$") + if r then + r = tonumber(r) + if r <= 15 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok then return y end + end + return nil +end + +local function is_uint12(num) + return 0 <= num and num < 4096 +end + +local function is_int20(num) + return -shl(1, 19) <= num and num < shl(1, 19) +end + +local function is_int32(num) + return -2147483648 <= num and num < 2147483648 +end + +local function is_uint16(num) + return 0 <= num and num < 0xffff +end + +local function is_int16(num) + return -32768 <= num and num < 32768 +end + +local function is_int8(num) + return -128 <= num and num < 128 +end + +local function is_uint8(num) + return 0 <= num and num < 256 +end + +-- Split a memory operand of the form d(b) or d(x,b) into d, x and b. +-- If x is not specified then it is 0. +local function split_memop(arg) + local reg = "[%w_:]+" + local d, x, b = match(arg, "^(.*)%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$") + if d then + return d, parse_reg(x), parse_reg(b) + end + local d, b = match(arg, "^(.*)%(%s*("..reg..")%s*%)$") + if d then + return d, 0, parse_reg(b) + end + -- Assume the two registers are passed as "(r1,r2)", and displacement(d) is not specified. TODO: not sure if we want to do this, GAS doesn't. + local x, b = match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$") + if b then + return 0, parse_reg(x), parse_reg(b) + end + -- Accept a lone integer as a displacement. TODO: allow expressions/variables here? Interacts badly with the other rules currently. + local d = match(arg,"^(-?[%d]+)$") + if d then + return d, 0, 0 + end + local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$") + if reg then + local r, tp = parse_reg(reg) + if tp then + return format(tp.ctypefmt, tailr), 0, r + end + end + werror("bad memory operand: "..arg) + return nil +end + +-- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x +-- are GPRs. +-- If the fourth return value is not-nil then it needs to be called to +-- insert an action. +-- Encoded as: xbddd +local function parse_mem_bx(arg) + local d, x, b = split_memop(arg) + local dval = tonumber(d) + if dval then + if not is_uint12(dval) then + werror("displacement out of range: ", dval) + end + return dval, x, b, nil + end + if match(d, "^[rf]1?[0-9]?") then + werror("expected immediate operand, got register") + end + return 0, x, b, function() waction("DISP12", nil, d) end +end + +-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR. +-- Encoded as: bddd +local function parse_mem_b(arg) + local d, x, b, a = parse_mem_bx(arg) + if x ~= 0 then + werror("unexpected index register") + end + return d, b, a +end + +-- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2 +-- and b and x are GPRs. +-- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits). +local function parse_mem_bxy(arg) + local d, x, b = split_memop(arg) + local dval = tonumber(d) + if dval then + if not is_int20(dval) then + werror("displacement out of range: ", dval) + end + return dval, x, b, nil + end + if match(d, "^[rf]1?[0-9]?") then + werror("expected immediate operand, got register") + end + return 0, x, b, function() waction("DISP20", nil, d) end +end + +-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and +-- b is a GPR. +-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits). +local function parse_mem_by(arg) + local d, x, b, a = parse_mem_bxy(arg) + if x ~= 0 then + werror("unexpected index register") + end + return d, b, a +end + +-- Parse memory operand of the form d(l, b) where 0 <= d < 4096, 1 <= l <= 256, +-- and b is a GPR. +local function parse_mem_lb(arg) + local reg = "r1?[0-9]" + local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$") + if not d then + -- TODO: handle values without registers? + -- TODO: handle registers without a displacement? + werror("bad memory operand: "..arg) + return nil + end + local dval = tonumber(d) + local dact = nil + if dval then + if not is_uint12(dval) then + werror("displacement out of range: ", dval) + end + else + dval = 0 + dact = function() waction("DISP12", nil, d) end + end + local lval = tonumber(l) + local lact = nil + if lval then + if lval < 1 or lval > 256 then + werror("length out of range: ", dval) + end + lval = lval - 1 + else + lval = 0 + lact = function() waction("LEN8R", nil, l) end + end + return dval, lval, parse_reg(b), dact, lact +end + +local function parse_mem_l2b(arg, high_l) + local reg = "r1?[0-9]" + local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$") + if not d then + -- TODO: handle values without registers? + -- TODO: handle registers without a displacement? + werror("bad memory operand: "..arg) + return nil + end + local dval = tonumber(d) + local dact = nil + if dval then + if not is_uint12(dval) then + werror("displacement out of range: ", dval) + end + else + dval = 0 + dact = function() waction("DISP12", nil, d) end + end + local lval = tonumber(l) + local lact = nil + if lval then + if lval < 1 or lval > 128 then + werror("length out of range: ", dval) + end + lval = lval - 1 + else + lval = 0 + if high_l then + lact = function() waction("LEN4HR", nil, l) end + else + lact = function() waction("LEN4LR", nil, l) end + end + end + return dval, lval, parse_reg(b), dact, lact +end + +local function parse_imm32(imm) + local imm_val = tonumber(imm) + if imm_val then + if not is_int32(imm_val) then + werror("immediate value out of range: ", imm_val) + end + wputhw(band(shr(imm_val, 16), 0xffff)) + wputhw(band(imm_val, 0xffff)) + elseif match(imm, "^[rfv]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):(r1?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM32", nil, imm) -- if we get label + end +end + +local function parse_imm16(imm) + local imm_val = tonumber(imm) + if imm_val then + if not is_int16(imm_val) and not is_uint16(imm_val) then + werror("immediate value out of range: ", imm_val) + end + wputhw(band(imm_val, 0xffff)) + elseif match(imm, "^[rfv]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):(r1?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM16", nil, imm) + end +end + +local function parse_imm8(imm) + local imm_val = tonumber(imm) + if imm_val then + if not is_int8(imm_val) and not is_uint8(imm_val) then + werror("Immediate value out of range: ", imm_val) + end + return imm_val, nil + end + return 0, function() waction("IMM8", nil, imm) end +end + +local function parse_mask(mask) + local m3 = parse_number(mask) + if m3 then + if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then + return m3 + else + werror("Mask value should be 0,1 or 3-7: ", m3) + end + end +end + +local function parse_mask2(mask) + local m4 = parse_number(mask) + if ( m4 >=0 and m4 <=1) then + return m4 + else + werror("Mask value should be 0 or 1: ", m4) + end +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +-- Template strings for s390x instructions. +map_op = { + a_2 = "00005a000000RX-a", + ad_2 = "00006a000000RX-a", + adb_2 = "ed000000001aRXE", + adbr_2 = "0000b31a0000RRE", + adr_2 = "000000002a00RR", + ae_2 = "00007a000000RX-a", + aeb_2 = "ed000000000aRXE", + aebr_2 = "0000b30a0000RRE", + aer_2 = "000000003a00RR", + afi_2 = "c20900000000RIL-a", + ag_2 = "e30000000008RXY-a", + agf_2 = "e30000000018RXY-a", + agfi_2 = "c20800000000RIL-a", + agfr_2 = "0000b9180000RRE", + aghi_2 = "0000a70b0000RI-a", + agr_2 = "0000b9080000RRE", + ah_2 = "00004a000000RX-a", + ahi_2 = "0000a70a0000RI-a", + ahy_2 = "e3000000007aRXY-a", + aih_2 = "cc0800000000RIL-a", + al_2 = "00005e000000RX-a", + alc_2 = "e30000000098RXY-a", + alcg_2 = "e30000000088RXY-a", + alcgr_2 = "0000b9880000RRE", + alcr_2 = "0000b9980000RRE", + alfi_2 = "c20b00000000RIL-a", + alg_2 = "e3000000000aRXY-a", + algf_2 = "e3000000001aRXY-a", + algfi_2 = "c20a00000000RIL-a", + algfr_2 = "0000b91a0000RRE", + algr_2 = "0000b90a0000RRE", + alr_2 = "000000001e00RR", + alsih_2 = "cc0a00000000RIL-a", + alsihn_2 = "cc0b00000000RIL-a", + aly_2 = "e3000000005eRXY-a", + ap_2 = "fa0000000000SS-b", + ar_2 = "000000001a00RR", + au_2 = "00007e000000RX-a", + aur_2 = "000000003e00RR", + aw_2 = "00006e000000RX-a", + awr_2 = "000000002e00RR", + axbr_2 = "0000b34a0000RRE", + axr_2 = "000000003600RR", + ay_2 = "e3000000005aRXY-a", + bakr_2 = "0000b2400000RRE", + bal_2 = "000045000000RX-a", + balr_2 = "000000000500RR", + bas_2 = "00004d000000RX-a", + basr_2 = "000000000d00RR", + bassm_2 = "000000000c00RR", + bc_2 = "000047000000RX-b", + bc_2 = "000047000000RX-b", + bcr_2 = "000000000700RR", + bct_2 = "000046000000RX-a", + bctg_2 = "e30000000046RXY-a", + bctgr_2 = "0000b9460000RRE", + bctr_2 = "000000000600RR", + bras_2 = "0000a7050000RI-b", + brasl_2 = "c00500000000RIL-b", + brc_2 = "0000a7040000RI-c", + brcl_2 = "c00400000000RIL-c", + brcl_2 = "c00400000000RIL-c", + brct_2 = "0000a7060000RI-b", + brctg_2 = "0000a7070000RI-b", + brcth_2 = "cc0600000000RIL-b", + brxh_3 = "000084000000RSI", + brxhg_3 = "ec0000000044RIE-e", + bsa_2 = "0000b25a0000RRE", + bsg_2 = "0000b2580000RRE", + bsm_2 = "000000000b00RR", + bxh_3 = "000086000000RS-a", + bxhg_3 = "eb0000000044RSY-a", + bxle_3 = "000087000000RS-a", + bxleg_3 = "eb0000000045RSY-a", + c_2 = "000059000000RX-a", + cd_2 = "000069000000RX-a", + cdb_2 = "ed0000000019RXE", + cdbr_2 = "0000b3190000RRE", + cdfbr_2 = "0000b3950000RRE", + cdfbra_4 = "0000b3950000RRF-e", + cdfr_2 = "0000b3b50000RRE", + cdftr_2 = "0000b9510000RRE", + cdgbr_2 = "0000b3a50000RRE", + cdgbra_4 = "0000b3a50000RRF-e", + cdgr_2 = "0000b3c50000RRE", + cdgtr_2 = "0000b3f10000RRE", + cdr_2 = "000000002900RR", + cds_3 = "0000bb000000RS-a", + cdsg_3 = "eb000000003eRSY-a", + cdstr_2 = "0000b3f30000RRE", + cdsy_3 = "eb0000000031RSY-a", + cdtr_2 = "0000b3e40000RRE", + cdutr_2 = "0000b3f20000RRE", + ce_2 = "000079000000RX-a", + ceb_2 = "ed0000000009RXE", + cebr_2 = "0000b3090000RRE", + cedtr_2 = "0000b3f40000RRE", + cefbr_2 = "0000b3940000RRE", + cefbra_4 = "0000b3940000RRF-e", + cefr_2 = "0000b3b40000RRE", + cegbr_2 = "0000b3a40000RRE", + cegbra_4 = "0000b3a40000RRF-e", + cegr_2 = "0000b3c40000RRE", + cer_2 = "000000003900RR", + cextr_2 = "0000b3fc0000RRE", + cfdbr_3 = "0000b3990000RRF-e", + cfdbra_4 = "0000b3990000RRF-e", + cfebr_3 = "0000b3980000RRF-e", + cfebra_4 = "0000b3980000RRF-e", + cfi_2 = "c20d00000000RIL-a", + cfxbr_3 = "0000b39a0000RRF-e", + cfxbra_4 = "0000b39a0000RRF-e", + cg_2 = "e30000000020RXY-a", + cgdbr_3 = "0000b3a90000RRF-e", + cgdbra_4 = "0000b3a90000RRF-e", + cgebr_3 = "0000b3a80000RRF-e", + cgebra_4 = "0000b3a80000RRF-e", + cgf_2 = "e30000000030RXY-a", + cgfi_2 = "c20c00000000RIL-a", + cgfr_2 = "0000b9300000RRE", + cgfrl_2 = "c60c00000000RIL-b", + cgh_2 = "e30000000034RXY-a", + cghi_2 = "0000a70f0000RI-a", + cghrl_2 = "c60400000000RIL-b", + cgr_2 = "0000b9200000RRE", + cgrl_2 = "c60800000000RIL-b", + cgxbr_3 = "0000b3aa0000RRF-e", + cgxbra_4 = "0000b3aa0000RRF-e", + ch_2 = "000049000000RX-a", + chf_2 = "e300000000cdRXY-a", + chhr_2 = "0000b9cd0000RRE", + chi_2 = "0000a70e0000RI-a", + chlr_2 = "0000b9dd0000RRE", + chrl_2 = "c60500000000RIL-b", + chy_2 = "e30000000079RXY-a", + cih_2 = "cc0d00000000RIL-a", + cksm_2 = "0000b2410000RRE", + cl_2 = "000055000000RX-a", + clc_2 = "d50000000000SS-a", + clcl_2 = "000000000f00RR", + clcle_3 = "0000a9000000RS-a", + clclu_3 = "eb000000008fRSY-a", + clfi_2 = "c20f00000000RIL-a", + clg_2 = "e30000000021RXY-a", + clgf_2 = "e30000000031RXY-a", + clgfi_2 = "c20e00000000RIL-a", + clgfr_2 = "0000b9310000RRE", + clgfrl_2 = "c60e00000000RIL-b", + clghrl_2 = "c60600000000RIL-b", + clgr_2 = "0000b9210000RRE", + clgrl_2 = "c60a00000000RIL-b", + clhf_2 = "e300000000cfRXY-a", + clhhr_2 = "0000b9cf0000RRE", + clhlr_2 = "0000b9df0000RRE", + clhrl_2 = "c60700000000RIL-b", + cli_2 = "000095000000SI", + clih_2 = "cc0f00000000RIL-a", + clm_3 = "0000bd000000RS-b", + clmh_3 = "eb0000000020RSY-b", + clmy_3 = "eb0000000021RSY-b", + clr_2 = "000000001500RR", + clrl_2 = "c60f00000000RIL-b", + clst_2 = "0000b25d0000RRE", + cly_2 = "e30000000055RXY-a", + cmpsc_2 = "0000b2630000RRE", + cpya_2 = "0000b24d0000RRE", + cr_2 = "000000001900RR", + crl_2 = "c60d00000000RIL-b", + cs_3 = "0000ba000000RS-a", + csg_3 = "eb0000000030RSY-a", + csp_2 = "0000b2500000RRE", + cspg_2 = "0000b98a0000RRE", + csy_3 = "eb0000000014RSY-a", + cu41_2 = "0000b9b20000RRE", + cu42_2 = "0000b9b30000RRE", + cudtr_2 = "0000b3e20000RRE", + cuse_2 = "0000b2570000RRE", + cuxtr_2 = "0000b3ea0000RRE", + cvb_2 = "00004f000000RX-a", + cvbg_2 = "e3000000000eRXY-a", + cvby_2 = "e30000000006RXY-a", + cvd_2 = "00004e000000RX-a", + cvdg_2 = "e3000000002eRXY-a", + cvdy_2 = "e30000000026RXY-a", + cxbr_2 = "0000b3490000RRE", + cxfbr_2 = "0000b3960000RRE", + cxfbra_4 = "0000b3960000RRF-e", + cxfr_2 = "0000b3b60000RRE", + cxftr_2 = "0000b9590000RRE", + cxgbr_2 = "0000b3a60000RRE", + cxgbra_4 = "0000b3a60000RRF-e", + cxgr_2 = "0000b3c60000RRE", + cxgtr_2 = "0000b3f90000RRE", + cxr_2 = "0000b3690000RRE", + cxstr_2 = "0000b3fb0000RRE", + cxtr_2 = "0000b3ec0000RRE", + cxutr_2 = "0000b3fa0000RRE", + cy_2 = "e30000000059RXY-a", + d_2 = "00005d000000RX-a", + dd_2 = "00006d000000RX-a", + ddb_2 = "ed000000001dRXE", + ddbr_2 = "0000b31d0000RRE", + ddr_2 = "000000002d00RR", + de_2 = "00007d000000RX-a", + deb_2 = "ed000000000dRXE", + debr_2 = "0000b30d0000RRE", + der_2 = "000000003d00RR", + didbr_4 = "0000b35b0000RRF-b", + dl_2 = "e30000000097RXY-a", + dlg_2 = "e30000000087RXY-a", + dlgr_2 = "0000b9870000RRE", + dlr_2 = "0000b9970000RRE", + dr_2 = "000000001d00RR", + dsg_2 = "e3000000000dRXY-a", + dsgf_2 = "e3000000001dRXY-a", + dsgfr_2 = "0000b91d0000RRE", + dsgr_2 = "0000b90d0000RRE", + dxbr_2 = "0000b34d0000RRE", + dxr_2 = "0000b22d0000RRE", + ear_2 = "0000b24f0000RRE", + ecag_3 = "eb000000004cRSY-a", + ed_2 = "de0000000000SS-a", + edmk_2 = "df0000000000SS-a", + eedtr_2 = "0000b3e50000RRE", + eextr_2 = "0000b3ed0000RRE", + efpc_2 = "0000b38c0000RRE", + epair_2 = "0000b99a0000RRE", + epar_2 = "0000b2260000RRE", + epsw_2 = "0000b98d0000RRE", + ereg_2 = "0000b2490000RRE", + eregg_2 = "0000b90e0000RRE", + esair_2 = "0000b99b0000RRE", + esar_2 = "0000b2270000RRE", + esdtr_2 = "0000b3e70000RRE", + esea_2 = "0000b99d0000RRE", + esta_2 = "0000b24a0000RRE", + esxtr_2 = "0000b3ef0000RRE", + ex_2 = "000044000000RX-a", + exrl_2 = "c60000000000RIL-b", + fidbra_4 = "0000b35f0000RRF-e", + fidr_2 = "0000b37f0000RRE", + fier_2 = "0000b3770000RRE", + fixr_2 = "0000b3670000RRE", + flogr_2 = "0000b9830000RRE", + hdr_2 = "000000002400RR", + her_2 = "000000003400RR", + iac_2 = "0000b2240000RRE", + ic_2 = "000043000000RX-a", + icm_3 = "0000bf000000RS-b", + icmh_3 = "eb0000000080RSY-b", + icmy_3 = "eb0000000081RSY-b", + icy_2 = "e30000000073RXY-a", + iihf_2 = "c00800000000RIL-a", + iihh_2 = "0000a5000000RI-a", + iihl_2 = "0000a5010000RI-a", + iilf_2 = "c00900000000RIL-a", + iilh_2 = "0000a5020000RI-a", + iill_2 = "0000a5030000RI-a", + ipm_2 = "0000b2220000RRE", + iske_2 = "0000b2290000RRE", + ivsk_2 = "0000b2230000RRE", + kdbr_2 = "0000b3180000RRE", + kdtr_2 = "0000b3e00000RRE", + kebr_2 = "0000b3080000RRE", + kimd_2 = "0000b93e0000RRE", + klmd_2 = "0000b93f0000RRE", + km_2 = "0000b92e0000RRE", + kmac_2 = "0000b91e0000RRE", + kmc_2 = "0000b92f0000RRE", + kmf_2 = "0000b92a0000RRE", + kmo_2 = "0000b92b0000RRE", + kxbr_2 = "0000b3480000RRE", + kxtr_2 = "0000b3e80000RRE", + l_2 = "000058000000RX-a", + la_2 = "000041000000RX-a", + laa_3 = "eb00000000f8RSY-a", + laag_3 = "eb00000000e8RSY-a", + laal_3 = "eb00000000faRSY-a", + laalg_3 = "eb00000000eaRSY-a", + lae_2 = "000051000000RX-a", + laey_2 = "e30000000075RXY-a", + lam_3 = "00009a000000RS-a", + lamy_3 = "eb000000009aRSY-a", + lan_3 = "eb00000000f4RSY-a", + lang_3 = "eb00000000e4RSY-a", + lao_3 = "eb00000000f6RSY-a", + laog_3 = "eb00000000e6RSY-a", + larl_2 = "c00000000000RIL-b", + lax_3 = "eb00000000f7RSY-a", + laxg_3 = "eb00000000e7RSY-a", + lay_2 = "e30000000071RXY-a", + lb_2 = "e30000000076RXY-a", + lbh_2 = "e300000000c0RXY-a", + lbr_2 = "0000b9260000RRE", + lcdbr_2 = "0000b3130000RRE", + lcdfr_2 = "0000b3730000RRE", + lcdr_2 = "000000002300RR", + lcebr_2 = "0000b3030000RRE", + lcer_2 = "000000003300RR", + lcgfr_2 = "0000b9130000RRE", + lcgr_2 = "0000b9030000RRE", + lcr_2 = "000000001300RR", + lctl_3 = "0000b7000000RS-a", + lctlg_3 = "eb000000002fRSY-a", + lcxbr_2 = "0000b3430000RRE", + lcxr_2 = "0000b3630000RRE", + ld_2 = "000068000000RX-a", + ldebr_2 = "0000b3040000RRE", + lder_2 = "0000b3240000RRE", + ldgr_2 = "0000b3c10000RRE", + ldr_2 = "000000002800RR", + ldxbr_2 = "0000b3450000RRE", + ldxr_2 = "000000002500RR", + ldy_2 = "ed0000000065RXY-a", + le_2 = "000078000000RX-a", + ledbr_2 = "0000b3440000RRE", + ledr_2 = "000000003500RR", + ler_2 = "000000003800RR", + lexbr_2 = "0000b3460000RRE", + lexr_2 = "0000b3660000RRE", + ley_2 = "ed0000000064RXY-a", + lfh_2 = "e300000000caRXY-a", + lg_2 = "e30000000004RXY-a", + lgb_2 = "e30000000077RXY-a", + lgbr_2 = "0000b9060000RRE", + lgdr_2 = "0000b3cd0000RRE", + lgf_2 = "e30000000014RXY-a", + lgfi_2 = "c00100000000RIL-a", + lgfr_2 = "0000b9140000RRE", + lgfrl_2 = "c40c00000000RIL-b", + lgh_2 = "e30000000015RXY-a", + lghi_2 = "0000a7090000RI-a", + lghr_2 = "0000b9070000RRE", + lghrl_2 = "c40400000000RIL-b", + lgr_2 = "0000b9040000RRE", + lgrl_2 = "c40800000000RIL-b", + lh_2 = "000048000000RX-a", + lhh_2 = "e300000000c4RXY-a", + lhi_2 = "0000a7080000RI-a", + lhr_2 = "0000b9270000RRE", + lhrl_2 = "c40500000000RIL-b", + lhy_2 = "e30000000078RXY-a", + llc_2 = "e30000000094RXY-a", + llch_2 = "e300000000c2RXY-a", + llcr_2 = "0000b9940000RRE", + llgc_2 = "e30000000090RXY-a", + llgcr_2 = "0000b9840000RRE", + llgf_2 = "e30000000016RXY-a", + llgfr_2 = "0000b9160000RRE", + llgfrl_2 = "c40e00000000RIL-b", + llgh_2 = "e30000000091RXY-a", + llghr_2 = "0000b9850000RRE", + llghrl_2 = "c40600000000RIL-b", + llgt_2 = "e30000000017RXY-a", + llgtr_2 = "0000b9170000RRE", + llh_2 = "e30000000095RXY-a", + llhh_2 = "e300000000c6RXY-a", + llhr_2 = "0000b9950000RRE", + llhrl_2 = "c40200000000RIL-b", + llihf_2 = "c00e00000000RIL-a", + llihh_2 = "0000a50c0000RI-a", + llihl_2 = "0000a50d0000RI-a", + llilf_2 = "c00f00000000RIL-a", + llilh_2 = "0000a50e0000RI-a", + llill_2 = "0000a50f0000RI-a", + lm_3 = "000098000000RS-a", + lmg_3 = "eb0000000004RSY-a", + lmh_3 = "eb0000000096RSY-a", + lmy_3 = "eb0000000098RSY-a", + lndbr_2 = "0000b3110000RRE", + lndfr_2 = "0000b3710000RRE", + lndr_2 = "000000002100RR", + lnebr_2 = "0000b3010000RRE", + lner_2 = "000000003100RR", + lngfr_2 = "0000b9110000RRE", + lngr_2 = "0000b9010000RRE", + lnr_2 = "000000001100RR", + lnxbr_2 = "0000b3410000RRE", + lnxr_2 = "0000b3610000RRE", + loc_3 = "eb00000000f2RSY-b", + locg_3 = "eb00000000e2RSY-b", + lpdbr_2 = "0000b3100000RRE", + lpdfr_2 = "0000b3700000RRE", + lpdr_2 = "000000002000RR", + lpebr_2 = "0000b3000000RRE", + lper_2 = "000000003000RR", + lpgfr_2 = "0000b9100000RRE", + lpgr_2 = "0000b9000000RRE", + lpq_2 = "e3000000008fRXY-a", + lpr_2 = "000000001000RR", + lpxbr_2 = "0000b3400000RRE", + lpxr_2 = "0000b3600000RRE", + lr_2 = "000000001800RR", + lra_2 = "0000b1000000RX-a", + lrag_2 = "e30000000003RXY-a", + lray_2 = "e30000000013RXY-a", + lrdr_2 = "000000002500RR", + lrer_2 = "000000003500RR", + lrl_2 = "c40d00000000RIL-b", + lrv_2 = "e3000000001eRXY-a", + lrvg_2 = "e3000000000fRXY-a", + lrvgr_2 = "0000b90f0000RRE", + lrvh_2 = "e3000000001fRXY-a", + lrvr_2 = "0000b91f0000RRE", + lt_2 = "e30000000012RXY-a", + ltdbr_2 = "0000b3120000RRE", + ltdr_2 = "000000002200RR", + ltdtr_2 = "0000b3d60000RRE", + ltebr_2 = "0000b3020000RRE", + lter_2 = "000000003200RR", + ltg_2 = "e30000000002RXY-a", + ltgf_2 = "e30000000032RXY-a", + ltgfr_2 = "0000b9120000RRE", + ltgr_2 = "0000b9020000RRE", + ltr_2 = "000000001200RR", + ltxbr_2 = "0000b3420000RRE", + ltxr_2 = "0000b3620000RRE", + ltxtr_2 = "0000b3de0000RRE", + lura_2 = "0000b24b0000RRE", + lurag_2 = "0000b9050000RRE", + lxdbr_2 = "0000b3050000RRE", + lxdr_2 = "0000b3250000RRE", + lxebr_2 = "0000b3060000RRE", + lxer_2 = "0000b3260000RRE", + lxr_2 = "0000b3650000RRE", + ly_2 = "e30000000058RXY-a", + lzdr_2 = "0000b3750000RRE", + lzer_2 = "0000b3740000RRE", + lzxr_2 = "0000b3760000RRE", + m_2 = "00005c000000RX-a", + madb_3 = "ed000000001eRXF", + maeb_3 = "ed000000000eRXF", + maebr_3 = "0000b30e0000RRD", + maer_3 = "0000b32e0000RRD", + md_2 = "00006c000000RX-a", + mdb_2 = "ed000000001cRXE", + mdbr_2 = "0000b31c0000RRE", + mde_2 = "00007c000000RX-a", + mdeb_2 = "ed000000000cRXE", + mdebr_2 = "0000b30c0000RRE", + mder_2 = "000000003c00RR", + mdr_2 = "000000002c00RR", + me_2 = "00007c000000RX-a", + meeb_2 = "ed0000000017RXE", + meebr_2 = "0000b3170000RRE", + meer_2 = "0000b3370000RRE", + mer_2 = "000000003c00RR", + mfy_2 = "e3000000005cRXY-a", + mghi_2 = "0000a70d0000RI-a", + mh_2 = "00004c000000RX-a", + mhi_2 = "0000a70c0000RI-a", + mhy_2 = "e3000000007cRXY-a", + ml_2 = "e30000000096RXY-a", + mlg_2 = "e30000000086RXY-a", + mlgr_2 = "0000b9860000RRE", + mlr_2 = "0000b9960000RRE", + mr_2 = "000000001c00RR", + ms_2 = "000071000000RX-a", + msfi_2 = "c20100000000RIL-a", + msg_2 = "e3000000000cRXY-a", + msgf_2 = "e3000000001cRXY-a", + msgfi_2 = "c20000000000RIL-a", + msgfr_2 = "0000b91c0000RRE", + msgr_2 = "0000b90c0000RRE", + msr_2 = "0000b2520000RRE", + msta_2 = "0000b2470000RRE", + msy_2 = "e30000000051RXY-a", + mvc_2 = "d20000000000SS-a", + mvcin_2 = "e80000000000SS-a", + mvcl_2 = "000000000e00RR", + mvcle_3 = "0000a8000000RS-a", + mvclu_3 = "eb000000008eRSY-a", + mvghi_2 = "e54800000000SIL", + mvhhi_2 = "e54400000000SIL", + mvhi_2 = "e54c00000000SIL", + mvi_2 = "000092000000SI", + mvn_2 = "d10000000000SS-a", + mvpg_2 = "0000b2540000RRE", + mvst_2 = "0000b2550000RRE", + mvz_2 = "d30000000000SS-a", + mxbr_2 = "0000b34c0000RRE", + mxd_2 = "000067000000RX-a", + mxdb_2 = "ed0000000007RXE", + mxdbr_2 = "0000b3070000RRE", + mxdr_2 = "000000002700RR", + mxr_2 = "000000002600RR", + n_2 = "000054000000RX-a", + nc_2 = "d40000000000SS-a", + ng_2 = "e30000000080RXY-a", + ngr_2 = "0000b9800000RRE", + ni_2 = "000094000000SI", + nihf_2 = "c00a00000000RIL-a", + nihh_2 = "0000a5040000RI-a", + nihl_2 = "0000a5050000RI-a", + nilf_2 = "c00b00000000RIL-a", + nilh_2 = "0000a5060000RI-a", + nill_2 = "0000a5070000RI-a", + nr_2 = "000000001400RR", + ny_2 = "e30000000054RXY-a", + o_2 = "000056000000RX-a", + oc_2 = "d60000000000SS-a", + og_2 = "e30000000081RXY-a", + ogr_2 = "0000b9810000RRE", + oi_2 = "000096000000SI", + oihf_2 = "c00c00000000RIL-a", + oihh_2 = "0000a5080000RI-a", + oihl_2 = "0000a5090000RI-a", + oilf_2 = "c00d00000000RIL-a", + oilh_2 = "0000a50a0000RI-a", + oill_2 = "0000a50b0000RI-a", + or_2 = "000000001600RR", + oy_2 = "e30000000056RXY-a", + palb_2 = "0000b2480000RRE", + pcc_2 = "0000b92c0000RRE", + pckmo_2 = "0000b9280000RRE", + pfd_2 = "e30000000036m", + pfdrl_2 = "c60200000000RIL-c", + pfmf_2 = "0000b9af0000RRE", + pgin_2 = "0000b22e0000RRE", + pgout_2 = "0000b22f0000RRE", + popcnt_2 = "0000b9e10000RRE", + pt_2 = "0000b2280000RRE", + ptf_2 = "0000b9a20000RRE", + pti_2 = "0000b99e0000RRE", + rll_3 = "eb000000001dRSY-a", + rllg_3 = "eb000000001cRSY-a", + rrbe_2 = "0000b22a0000RRE", + rrbm_2 = "0000b9ae0000RRE", + s_2 = "00005b000000RX-a", + sar_2 = "0000b24e0000RRE", + sd_2 = "00006b000000RX-a", + sdb_2 = "ed000000001bRXE", + sdbr_2 = "0000b31b0000RRE", + sdr_2 = "000000002b00RR", + se_2 = "00007b000000RX-a", + seb_2 = "ed000000000bRXE", + sebr_2 = "0000b30b0000RRE", + ser_2 = "000000003b00RR", + sfasr_2 = "0000b3850000RRE", + sfpc_2 = "0000b3840000RRE", + sg_2 = "e30000000009RXY-a", + sgf_2 = "e30000000019RXY-a", + sgfr_2 = "0000b9190000RRE", + sgr_2 = "0000b9090000RRE", + sh_2 = "00004b000000RX-a", + shy_2 = "e3000000007bRXY-a", + sl_2 = "00005f000000RX-a", + sla_2 = "00008b000000RS-a", + slag_3 = "eb000000000bRSY-a", + slak_3 = "eb00000000ddRSY-a", + slb_2 = "e30000000099RXY-a", + slbg_2 = "e30000000089RXY-a", + slbgr_2 = "0000b9890000RRE", + slbr_2 = "0000b9990000RRE", + slda_2 = "00008f000000RS-a", + sldl_2 = "00008d000000RS-a", + slfi_2 = "c20500000000RIL-a", + slg_2 = "e3000000000bRXY-a", + slgf_2 = "e3000000001bRXY-a", + slgfi_2 = "c20400000000RIL-a", + slgfr_2 = "0000b91b0000RRE", + slgr_2 = "0000b90b0000RRE", + sll_2 = "000089000000RS-a", + sllg_3 = "eb000000000dRSY-a", + sllk_3 = "eb00000000dfRSY-a", + slr_2 = "000000001f00RR", + sly_2 = "e3000000005fRXY-a", + spm_2 = "000000000400RR", + sqdb_2 = "ed0000000015RXE", + sqdbr_2 = "0000b3150000RRE", + sqdr_2 = "0000b2440000RRE", + sqeb_2 = "ed0000000014RXE", + sqebr_2 = "0000b3140000RRE", + sqer_2 = "0000b2450000RRE", + sqxbr_2 = "0000b3160000RRE", + sqxr_2 = "0000b3360000RRE", + sr_2 = "000000001b00RR", + sra_2 = "00008a000000RS-a", + srag_3 = "eb000000000aRSY-a", + srak_3 = "eb00000000dcRSY-a", + srda_2 = "00008e000000RS-a", + srdl_2 = "00008c000000RS-a", + srl_2 = "000088000000RS-a", + srlg_3 = "eb000000000cRSY-a", + srlk_3 = "eb00000000deRSY-a", + srst_2 = "0000b25e0000RRE", + srstu_2 = "0000b9be0000RRE", + ssair_2 = "0000b99f0000RRE", + ssar_2 = "0000b2250000RRE", + st_2 = "000050000000RX-a", + stam_3 = "00009b000000RS-a", + stamy_3 = "eb000000009bRSY-a", + stc_2 = "000042000000RX-a", + stch_2 = "e300000000c3RXY-a", + stcm_3 = "0000be000000RS-b", + stcmh_3 = "eb000000002cRSY-b", + stcmy_3 = "eb000000002dRSY-b", + stctg_3 = "eb0000000025RSY-a", + stctl_3 = "0000b6000000RS-a", + stcy_2 = "e30000000072RXY-a", + std_2 = "000060000000RX-a", + stdy_2 = "ed0000000067RXY-a", + ste_2 = "000070000000RX-a", + stey_2 = "ed0000000066RXY-a", + stfh_2 = "e300000000cbRXY-a", + stfl_1 = "0000b2b10000S", + stg_2 = "e30000000024RXY-a", + stgrl_2 = "c40b00000000RIL-b", + sth_2 = "000040000000RX-a", + sthh_2 = "e300000000c7RXY-a", + sthrl_2 = "c40700000000RIL-b", + sthy_2 = "e30000000070RXY-a", + stm_3 = "000090000000RS-a", + stmg_3 = "eb0000000024RSY-a", + stmh_3 = "eb0000000026RSY-a", + stmy_3 = "eb0000000090RSY-a", + stoc_3 = "eb00000000f3RSY-b", + stocg_3 = "eb00000000e3RSY-b", + stpq_2 = "e3000000008eRXY-a", + strl_2 = "c40f00000000RIL-b", + strv_2 = "e3000000003eRXY-a", + strvg_2 = "e3000000002fRXY-a", + strvh_2 = "e3000000003fRXY-a", + stura_2 = "0000b2460000RRE", + sturg_2 = "0000b9250000RRE", + sty_2 = "e30000000050RXY-a", + su_2 = "00007f000000RX-a", + sur_2 = "000000003f00RR", + svc_1 = "000000000a00I", + sw_2 = "00006f000000RX-a", + swr_2 = "000000002f00RR", + sxbr_2 = "0000b34b0000RRE", + sxr_2 = "000000003700RR", + sy_2 = "e3000000005bRXY-a", + tar_2 = "0000b24c0000RRE", + tb_2 = "0000b22c0000RRE", + thder_2 = "0000b3580000RRE", + thdr_2 = "0000b3590000RRE", + tm_2 = "000091000000SI", + tmhh_2 = "0000a7020000RI-a", + tmhl_2 = "0000a7030000RI-a", + tmlh_2 = "0000a7000000RI-a", + tmll_2 = "0000a7010000RI-a", + tmy_2 = "eb0000000051SIY", + tr_2 = "dc0000000000SS-a", + trace_3 = "000099000000RS-a", + tracg_3 = "eb000000000fRSY-a", + tre_2 = "0000b2a50000RRE", + trt_2 = "dd0000000000SS-a", + trtr_2 = "d00000000000SS-a", + unpka_2 = "ea0000000000SS-a", + unpku_2 = "e20000000000SS-a", + x_2 = "000057000000RX-a", + xc_2 = "d70000000000SS-a", + xg_2 = "e30000000082RXY-a", + xgr_2 = "0000b9820000RRE", + xi_2 = "000097000000SI", + xihf_2 = "c00600000000RIL-a", + xilf_2 = "c00700000000RIL-a", + xr_2 = "000000001700RR", + xy_2 = "e30000000057RXY-a", +} +for cond, c in pairs(map_cond) do + -- Extended mnemonics for branches. + -- TODO: replace 'B' with correct encoding. + -- brc + map_op["j"..cond.."_1"] = "0000"..tohex(0xa7040000+shl(c, 20)).."RI-c" + -- brcl + map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c, 20)).."0000".."RIL-c" + -- bc + map_op["b"..cond.."_1"] = "0000"..tohex(0x47000000+shl(c, 20)).."RX-b" + -- bcr + map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c, 4)).."RR" +end +------------------------------------------------------------------------------ +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + -- Read the template in 16-bit chunks. + -- Leading halfword zeroes should not be written out. + local op0 = tonumber(sub(template, 1, 4), 16) + local op1 = tonumber(sub(template, 5, 8), 16) + local op2 = tonumber(sub(template, 9, 12), 16) + + -- Process each character. + local p = sub(template, 13) + if p == "I" then + local imm_val, a = parse_imm8(params[1]) + op2 = op2 + imm_val + wputhw(op2) + if a then a() end + elseif p == "RI-a" then + op1 = op1 + shl(parse_reg(params[1]), 4) + wputhw(op1) + parse_imm16(params[2]) + elseif p == "RI-b" then + op1 = op1 + shl(parse_reg(params[1]), 4) + wputhw(op1) + local mode, n, s = parse_label(params[2]) + waction("REL_"..mode, n, s) + elseif p == "RI-c" then + if #params > 1 then + op1 = op1 + shl(parse_num(params[1]), 4) + end + wputhw(op1) + local mode, n, s = parse_label(params[#params]) + waction("REL_"..mode, n, s) + elseif p == "RIE-e" then + op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + wputhw1(op0) + local mode, n, s = parse_label(params[3]) + waction("REL_"..mode, n, s) + wputhw(op2) + elseif p == "RIL-a" then + op0 = op0 + shl(parse_reg(params[1]), 4) + wputhw(op0); + parse_imm32(params[2]) + elseif p == "RIL-b" then + op0 = op0 + shl(parse_reg(params[1]), 4) + wputhw(op0) + local mode, n, s = parse_label(params[2]) + waction("REL_"..mode, n, s) + elseif p == "RIL-c" then + if #params > 1 then + op0 = op0 + shl(parse_num(params[1]), 4) + end + wputhw(op0) + local mode, n, s = parse_label(params[#params]) + waction("REL_"..mode, n, s) + elseif p == "RR" then + if #params > 1 then + op2 = op2 + shl(parse_reg(params[1]), 4) + end + op2 = op2 + parse_reg(params[#params]) + wputhw(op2) + elseif p == "RRD" then + wputhw(op1) + op2 = op2 + shl(parse_reg(params[1]), 12) + shl(parse_reg(params[2]), 4) + parse_reg(params[3]) + wputhw(op2) + elseif p == "RRE" then + op2 = op2 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + wputhw(op1); wputhw(op2) + elseif p == "RRF-b" then + wputhw(op1) + op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_reg(params[2]), 12) + parse_reg(params[3]) + shl(parse_mask(params[4]), 8) + wputhw(op2) + elseif p == "RRF-e" then + wputhw(op1) + op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_mask(params[2]), 12) + parse_reg(params[3]) + if params[4] then + op2 = op2 + shl(parse_mask2(params[4]), 8) + end + wputhw(op2) + elseif p == "RS-a" then + if (params[3]) then + local d, b, a = parse_mem_b(params[3]) + op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + op2 = op2 + shl(b, 12) + d + else + local d, b, a = parse_mem_b(params[2]) + op1 = op1 + shl(parse_reg(params[1]), 4) + op2 = op2 + shl(b, 12) + d + end + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "RS-b" then + local m = parse_mask(params[2]) + local d, b, a = parse_mem_b(params[3]) + op1 = op1 + shl(parse_reg(params[1]), 4) + m + op2 = op2 + shl(b, 12) + d + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "RSI" then + op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + wputhw(op1) + local mode, n, s = parse_label(params[3]) + waction("REL_"..mode, n, s) + elseif p == "RSY-a" then + local d, b, a = parse_mem_by(params[3]) + op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + op1 = op1 + shl(b, 12) + band(d, 0xfff) + op2 = op2 + band(shr(d, 4), 0xff00) + wputhw(op0); wputhw(op1); wputhw(op2) + if a then a() end -- a() emits action. + elseif p == "RX-a" then + local d, x, b, a = parse_mem_bx(params[2]) + op1 = op1 + shl(parse_reg(params[1]), 4) + x + op2 = op2 + shl(b, 12) + d + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "RX-b" then + local d, x, b, a = parse_mem_bx(params[#params]) + if #params > 1 then + op1 = op1 + shl(parse_num(params[1]), 4) + end + op1 = op1 + x + op2 = op2 + shl(b, 12) + d + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "RXE" then + local d, x, b, a = parse_mem_bx(params[2]) + op0 = op0 + shl(parse_reg(params[1]), 4) + x + op1 = op1 + shl(b, 12) + d + wputhw(op0); wputhw(op1) + if a then a() end + wputhw(op2); + elseif p == "RXF" then + local d, x, b, a = parse_mem_bx(params[3]) + op0 = op0 + shl(parse_reg(params[2]), 4) + x + op1 = op1 + shl(b, 12) + d + wputhw(op0); wputhw(op1) + if a then a() end + op2 = op2 + shl(parse_reg(params[1]), 12) + wputhw(op2) + elseif p == "RXY-a" then + local d, x, b, a = parse_mem_bxy(params[2]) + op0 = op0 + shl(parse_reg(params[1]), 4) + x + op1 = op1 + shl(b, 12) + band(d, 0xfff) + op2 = op2 + band(shr(d, 4), 0xff00) + wputhw(op0); wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "S" then + wputhw(op1); + local d, b, a = parse_mem_b(params[1]) + op2 = op2 + shl(b, 12) + d + wputhw(op2) + if a then a() end + elseif p == "SI" then + local imm_val, a = parse_imm8(params[2]) + op1 = op1 + imm_val + wputhw(op1) + if a then a() end + local d, b, a = parse_mem_b(params[1]) + op2 = op2 + shl(b, 12) + d + wputhw(op2) + if a then a() end + elseif p == "SIL" then + wputhw(op0) + local d, b, a = parse_mem_b(params[1]) + op1 = op1 + shl(b, 12) + d + wputhw(op1) + if a then a() end + parse_imm16(params[2]) + elseif p == "SIY" then + local imm8, iact = parse_imm8(params[2]) + op0 = op0 + shl(imm8, 8) + wputhw(op0) + if iact then iact() end + local d, b, a = parse_mem_by(params[1]) + op1 = op1 + shl(b, 12) + band(d, 0xfff) + op2 = op2 + band(shr(d, 4), 0xff00) + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "SS-a" then + local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1]) + local d2, b2, d2a = parse_mem_b(params[2]) + op0 = op0 + l1 + op1 = op1 + shl(b1, 12) + d1 + op2 = op2 + shl(b2, 12) + d2 + wputhw(op0) + if l1a then l1a() end + wputhw(op1) + if d1a then d1a() end + wputhw(op2) + if d2a then d2a() end + elseif p == "SS-b" then + local high_l = true + local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1], high_l) + high_l = false + local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2], high_l) + op0 = op0 + shl(l1, 4) + l2 + op1 = op1 + shl(b1, 12) + d1 + op2 = op2 + shl(b2, 12) + d2 + wputhw(op0) + if l1a then l1a() end + if l2a then l2a() end + wputhw(op1) + if d1a then d1a() end + wputhw(op2) + if d2a then d2a() end + else + werror("unrecognized encoding") + end +end + +function op_template(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x%x%x%x%x", "") end + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 5 positions. + if secpos+5 > maxsecpos then wflush() end + local lpos, apos, spos = #actlist, #actargs, secpos + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + end + error(err, 0) +end +map_op[".template__"] = op_template +------------------------------------------------------------------------------ +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end +------------------------------------------------------------------------------ +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end +------------------------------------------------------------------------------ +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _, p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1, 8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action halfword is 2**n-1. + return + end + end + end + werror("bad alignment") +end +------------------------------------------------------------------------------ +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _, name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end +------------------------------------------------------------------------------ +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end +------------------------------------------------------------------------------ +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end +------------------------------------------------------------------------------ +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end +return _M +------------------------------------------------------------------------------ --- a/src/Makefile +++ b/src/Makefile @@ -244,6 +244,9 @@ else ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) TARGET_LJARCH= arm else +ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH))) + TARGET_LJARCH= s390x +else ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) TARGET_ARCH= -D__AARCH64EB__=1 @@ -272,6 +275,7 @@ else endif endif endif +endif endif endif endif --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, #include "../dynasm/dasm_ppc.h" #elif LJ_TARGET_MIPS #include "../dynasm/dasm_mips.h" +#elif LJ_TARGET_S390X +#include "../dynasm/dasm_s390x.h" #else #error "No support for this architecture (yet)" #endif --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -87,6 +87,54 @@ err: } fprintf(ctx->fp, "\t%s %s\n", opname, sym); } +#elif LJ_TARGET_S390X +/* Emit halfwords piecewise as assembler text. */ +static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n) +{ + uint16_t *cp = (uint16_t*)p; + n /= 2; + int i; + for (i = 0; i < n; i++) { + if ((i & 7) == 0) + fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]); + else + fprintf(ctx->fp, ",0x%hx", cp[i]); + if ((i & 7) == 7) putc('\n', ctx->fp); + } + if ((n & 7) != 0) putc('\n', ctx->fp); +} + +/* Emit s390x text relocations. */ +static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n, + const char *sym) +{ + if (n & 1 || n < 2) { + fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n); + exit(1); + } + n -= 2; + const char *opname = NULL; + const char *argt = ""; /* Inserted before argument. */ + int opcode = *(uint16_t*)(&cp[n]); + int arg = (opcode>>4) & 0xf; + switch (opcode & 0xff0f) { + case 0xa705: opname = "bras"; argt = "%r"; break; + case 0xc005: opname = "brasl"; argt = "%r"; break; + case 0xa704: opname = "brc"; break; + case 0xc004: opname = "brcl"; break; + default: + fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n", + sym); + exit(1); + } + emit_asm_halfwords(ctx, cp, n); + if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) { + /* Various fixups for external symbols outside of our binary. */ + fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym); + return; + } + fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym); +} #else /* Emit words piecewise as assembler text. */ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) @@ -302,6 +350,9 @@ void emit_asm(BuildCtx *ctx) emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); } ofs += n+4; +#elif LJ_TARGET_S390X + emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); + ofs += n+4; #else emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); ofs += n; @@ -310,6 +361,8 @@ void emit_asm(BuildCtx *ctx) } #if LJ_TARGET_X86ORX64 emit_asm_bytes(ctx, ctx->code+ofs, next-ofs); +#elif LJ_TARGET_S390X + emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs); #else emit_asm_words(ctx, ctx->code+ofs, next-ofs); #endif --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -101,6 +101,7 @@ local map_arch = { mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, + s390x = { e = "be", b = 64, m = 22, }, } local map_os = { --- /dev/null +++ b/src/jit/dis_s390x.lua @@ -0,0 +1 @@ +-- Not yet implemented. --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -702,6 +702,8 @@ static uint32_t jit_cpudetect(void) } #endif +#elif LJ_TARGET_S390X + /* No optional CPU features to detect (for now). */ #else #error "Missing CPU detection for this architecture" #endif --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -31,6 +31,8 @@ #define LUAJIT_ARCH_mips32 6 #define LUAJIT_ARCH_MIPS64 7 #define LUAJIT_ARCH_mips64 7 +#define LUAJIT_ARCH_S390X 8 +#define LUAJIT_ARCH_s390x 8 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -59,6 +61,8 @@ #define LUAJIT_TARGET LUAJIT_ARCH_ARM #elif defined(__aarch64__) || defined(_M_ARM64) #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 +#elif defined(__s390x__) || defined(__s390x) +#define LUAJIT_TARGET LUAJIT_ARCH_S390X #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #define LUAJIT_TARGET LUAJIT_ARCH_PPC #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) @@ -439,6 +443,21 @@ #define LJ_ARCH_VERSION 10 #endif +#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X + +#define LJ_ARCH_NAME "s390x" +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN LUAJIT_BE +#define LJ_TARGET_S390X 1 +#define LJ_TARGET_EHRETREG 0xe +#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */ +#define LJ_TARGET_MASKSHIFT 1 +#define LJ_TARGET_MASKROT 1 +#define LJ_TARGET_UNALIGNED 1 +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#define LJ_TARGET_GC64 1 +#define LJ_ARCH_NOJIT 1 /* NYI */ + #else #error "No target architecture defined" #endif --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1708,6 +1708,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS #include "lj_asm_mips.h" +#elif LJ_TARGET_S390X +#include "lj_asm_s390x.h" #else #error "Missing assembler for target CPU" #endif --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -575,6 +575,40 @@ goto done; \ } +#elif LJ_TARGET_S390X +/* -- POSIX/s390x calling conventions --------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = 1; /* Return all structs by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET \ + cc->retref = 1; /* Return all complex values by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET2 \ + UNUSED(dp); /* Nothing to do. */ + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \ + if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex numbers by reference. */ \ + /* TODO: not sure why this is different to structs. */ \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + +#define CCALL_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \ + } else { \ + if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -999,6 +1033,9 @@ static int ccall_set_args(lua_State *L, CType *d; CTSize sz; MSize n, isfp = 0, isva = 0; +#if LJ_TARGET_S390X + MSize onstack = 0; +#endif void *dp, *rp = NULL; if (fid) { /* Get argument type from field. */ @@ -1037,6 +1074,9 @@ static int ccall_set_args(lua_State *L, CCALL_HANDLE_REGARG /* Handle register arguments. */ /* Otherwise pass argument on stack. */ +#if LJ_TARGET_S390X + onstack = 1; +#endif if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ MSize align = (1u << ctype_align(d->info)) - 1; if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1)) @@ -1086,6 +1126,16 @@ static int ccall_set_args(lua_State *L, *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ } #endif +#if LJ_TARGET_S390X + /* Arguments need to be sign-/zero-extended to 64-bits. */ + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || + (isfp && onstack)) && d->size <= 4) { + if (d->info & CTF_UNSIGNED || isfp) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_X64 && LJ_ABI_WIN if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ if (nfpr == ngpr) --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -129,6 +129,21 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; +#elif LJ_TARGET_S390X + +#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */ +#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */ +#define CCALL_NRET_GPR 1 /* GPR 2 */ +#define CCALL_NRET_FPR 1 /* FPR 0 */ +#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */ +#define CCALL_SPS_FREE 0 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + float f; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -516,6 +516,15 @@ void lj_ccallback_mcode_free(CTState *ct if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ((float *)dp)[1] = *(float *)dp; +#elif LJ_TARGET_S390X + +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \ + } else { \ + if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif --- a/src/lj_err.c +++ b/src/lj_err.c @@ -442,6 +442,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int ver if (version != 1) return _URC_FATAL_PHASE1_ERROR; cf = (void *)_Unwind_GetCFA(ctx); +#ifdef LJ_TARGET_S390X + cf -= 160; /* CFA points 160 bytes above r15. */ +#endif L = cframe_L(cf); if ((actions & _UA_SEARCH_PHASE)) { #if LJ_UNWIND_EXT --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -264,6 +264,20 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL #endif #define CFRAME_OFS_MULTRES 0 #define CFRAME_SHIFT_MULTRES 3 +#elif LJ_TARGET_S390X +#define CFRAME_OFS_ERRF 280 +#define CFRAME_OFS_NRES 272 +#define CFRAME_OFS_PREV 264 +#define CFRAME_OFS_L 256 +#define CFRAME_OFS_PC 168 +#define CFRAME_OFS_MULTRES 160 +#define CFRAME_SIZE 240 +/* +** TODO: it would be good if we always decoded param*8 like +** the RISC architectures do. If so then SHIFT_MULTRES will +** need to change to 3. +*/ +#define CFRAME_SHIFT_MULTRES 0 #else #error "Missing CFRAME_* definitions for this architecture" #endif --- a/src/lj_target.h +++ b/src/lj_target.h @@ -143,6 +143,8 @@ typedef uint32_t RegCost; #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS #include "lj_target_mips.h" +#elif LJ_TARGET_S390X +#include "lj_target_s390x.h" #else #error "Missing include for target CPU" #endif --- /dev/null +++ b/src/lj_target_s390x.h @@ -0,0 +1,80 @@ +/* +** Definitions for IBM z/Architecture (s390x) CPUs. +** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_S390X_H +#define _LJ_TARGET_S390X_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#define GPRDEF(_) \ + _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ + _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) +#define FPRDEF(_) \ + _(F0) _(F1) _(F2) _(F3) \ + _(F4) _(F5) _(F6) _(F7) \ + _(F8) _(F9) _(F10) _(F11) \ + _(F12) _(F13) _(F14) _(F15) +// TODO: VREG? + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + + /* Calling conventions. */ + RID_SP = RID_R15, + RID_RET = RID_R2, + RID_FPRET = RID_F0, + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_R7, /* Interpreter BASE. */ + RID_LPC = RID_R9, /* Interpreter PC. */ + RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_R0, + RID_MIN_FPR = RID_F0, + RID_MAX_GPR = RID_MIN_FPR, + RID_MAX_FPR = RID_MAX, + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, +}; + +/* -- Register sets ------------------------------------------------------- */ + +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the *.dasc file(s). +** +** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. +*/ +#define SPS_FIXED 2 +#define SPS_FIRST 2 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +#define EXITSTUB_SPACING 4 +#define EXITSTUBS_PER_GROUP 32 + +/* -- Instructions -------------------------------------------------------- */ + +#endif --- /dev/null +++ b/src/vm_s390x.dasc @@ -0,0 +1,4269 @@ +|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +| +|// This assembly targets the instruction set available on z10 (and newer) +|// machines. +| +|// ELF ABI registers: +|// r0,r1 | | volatile | +|// r2 | parameter and return value | volatile | +|// r3-r5 | parameter | volatile | +|// r6 | parameter | saved | +|// r7-r11 | | saved | +|// r12 | GOT pointer (needed?) | saved | +|// r13 | literal pool (not needed) | saved | +|// r14 | return address | volatile | +|// r15 | stack pointer | saved | +|// f0,f2,f4,f6 | parameter and return value | volatile | +|// f1,f3,f5,f7 | | volatile | +|// f8-f15 | | saved | +|// ar0,ar1 | TLS | volatile | +|// ar2-ar15 | | volatile | +| +|.arch s390x +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter, callee-saved. +|.define KBASE, r8 // Constants of current Lua function. +|.define PC, r9 // Next PC. +|.define DISPATCH, r10 // Opcode dispatch table. +|.define ITYPE, r11 // Temporary used for type information. +|.define BASE, r13 // Base of current Lua stack frame. +| +|// The following temporaries are not saved across C calls, except for RB. +|.define RA, r4 // Overlaps CARG3. +|.define RB, r7 // Must be callee-save. +|.define RC, r5 // Overlaps CARG4. +|.define RD, r6 // Overlaps CARG5. +| +|// Calling conventions. Also used as temporaries. +|.define CARG1, r2 +|.define CARG2, r3 +|.define CARG3, r4 +|.define CARG4, r5 +|.define CARG5, r6 +| +|.define FARG1, f0 +|.define FARG2, f2 +|.define FARG3, f4 +|.define FARG4, f6 +| +|.define CRET1, r2 +| +|.define TMPR0, r0 +|.define TMPR1, r1 +|.define OP, r2 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned. +| +|// Register save area. +|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes). +|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated). +| +|// Argument save area. +|.define SAVE_ERRF, 280(sp) // Argument 4, in r5. +|.define SAVE_NRES, 272(sp) // Argument 3, in r4. Size is 4-bytes. +|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3. +|.define SAVE_L, 256(sp) // Argument 1, in r2. +|.define RESERVED, 248(sp) // Reserved for compiler use. +|.define BACKCHAIN, 240(sp) // <- sp entering interpreter. +| +|// Interpreter stack frame. +|.define SAVE_FPR15, 232(sp) +|.define SAVE_FPR14, 224(sp) +|.define SAVE_FPR13, 216(sp) +|.define SAVE_FPR12, 208(sp) +|.define SAVE_FPR11, 200(sp) +|.define SAVE_FPR10, 192(sp) +|.define SAVE_FPR9, 184(sp) +|.define SAVE_FPR8, 176(sp) +|.define SAVE_PC, 168(sp) +|.define SAVE_MULTRES, 160(sp) +|.define SAVE_TMP, 160(sp) // Overlaps SAVE_MULTRES +|.define SAVE_TMP_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES). +| +|// Callee save area (allocated by interpreter). +|.define CALLEESAVE, 000(sp) // <- sp in interpreter. +| +|.macro saveregs +| stmg r6, r15, SAVE_GPRS_P +| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame. +| std f8, SAVE_FPR8 // f8-f15 are callee-saved. +| std f9, SAVE_FPR9 +| std f10, SAVE_FPR10 +| std f11, SAVE_FPR11 +| std f12, SAVE_FPR12 +| std f13, SAVE_FPR13 +| std f14, SAVE_FPR14 +| std f15, SAVE_FPR15 +|.endmacro +| +|.macro restoreregs +| ld f8, SAVE_FPR8 // f8-f15 are callee-saved. +| ld f9, SAVE_FPR9 +| ld f10, SAVE_FPR10 +| ld f11, SAVE_FPR11 +| ld f12, SAVE_FPR12 +| ld f13, SAVE_FPR13 +| ld f14, SAVE_FPR14 +| ld f15, SAVE_FPR15 +| lmg r6, r15, SAVE_GPRS // Restores the stack pointer. +|.endmacro +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Instruction headers. +|.macro ins_A; .endmacro +|.macro ins_AD; .endmacro +|.macro ins_AJ; .endmacro +|.macro ins_ABC; srlg RB, RD, 8; llgcr RC, RD; .endmacro +|.macro ins_AB_; srlg RB, RD, 8; .endmacro +|.macro ins_A_C; llgcr RC, RD; .endmacro +|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD +| +|// Instruction decode+dispatch. +|.macro ins_NEXT +| llgc OP, 3(PC) +| llgh RD, 0(PC) +| llgc RA, 2(PC) +| sllg TMPR1, OP, 3 +| lg TMPR1, 0(TMPR1, DISPATCH) +| la PC, 4(PC) +| br TMPR1 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| .macro ins_next +| j ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC +| lg PC, LFUNC:RB->pc +| llgc OP, 3(PC) +| llgc RA, 2(PC) +| sllg TMPR1, OP, 3 +| la PC, 4(PC) +| lg TMPR1, 0(TMPR1, DISPATCH) +| br TMPR1 +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC, RD = nargs+1 +| stg PC, -8(BASE) +| ins_callt +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|//----------------------------------------------------------------------- +| +|// Macros to clear or set tags. +|.macro cleartp, reg +| nihf reg, 0x7fff +|.endmacro +|.macro settp, reg, tp +| oihf reg, tp<<15 +|.endmacro +|.macro settp, dst, reg, tp +| llihf dst, tp<<15 +| ogr dst, reg +|.endmacro +|.macro setint, reg +| settp reg, LJ_TISNUM +|.endmacro +|.macro setint, dst, reg +| settp dst, reg, LJ_TISNUM +|.endmacro +| +|// Macros to test operand types. +|.macro checktp_nc, reg, tp, target +| srag ITYPE, reg, 47 +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checktp, reg, tp, target +| srag ITYPE, reg, 47 +| cleartp reg +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checktptp, src, tp, target +| srag ITYPE, src, 47 +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro +| +|.macro checknumx, reg, target, jump +| srag ITYPE, reg, 47 +| clfi ITYPE, LJ_TISNUM +| jump target +|.endmacro +|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro +|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro +|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro +|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro +|.macro checknumber, src, target; checknumx src, target, jh; .endmacro +| +|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47) +|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47) +| +|.define PC_OP, -1(PC) +|.define PC_RA, -2(PC) +|.define PC_RB, -4(PC) +|.define PC_RC, -3(PC) +|.define PC_RD, -4(PC) +| +|.macro branchPC, reg +| // Must not clobber condition code. +| sllg TMPR1, reg, 2 +| lay PC, (-BCBIAS_J*4)(TMPR1, PC) +|.endmacro +| +|// Set current VM state. +|.macro set_vmstate, st +| lghi TMPR1, ~LJ_VMST_..st +| stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH) +|.endmacro +| +|// Synthesize binary floating-point constants. +|.macro bfpconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. +| llihh tmp, 0x4338 +| ldgr reg, tmp +|.endmacro +| +|// Move table write barrier back. Overwrites reg. +|.macro barrierback, tab, reg +| ni tab->marked, ~LJ_GC_BLACK // black2gray(tab) +| lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH) +| stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH) +| stg reg, tab->gclist +|.endmacro + +#if !LJ_DUALNUM +#error "Only dual-number mode supported for s390x target" +#endif + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | tmll PC, FRAME_P + | je ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + | nill PC, -8 + | sgr BASE, PC // Restore caller base. + | lay RA, -8(RA, PC) // Rebase RA and prepend one result. + | lg PC, -8(BASE) // Fetch PC of previous frame. + | // Prepending may overwrite the pcall frame, so do it at the end. + | load_true ITYPE + | stg ITYPE, 0(RA, BASE) // Prepend true to results. + | + |->vm_returnc: + | aghi RD, 1 // RD = nresults+1 + | je ->vm_unwind_yield + | st RD, SAVE_MULTRES + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return + | lghi TMPR1, FRAME_C + | xgr PC, TMPR1 + | tmll PC, FRAME_TYPE + | jne ->vm_returnp + | + | // Return to C. + | set_vmstate C + | nill PC, -8 + | sgr PC, BASE + | lcgr PC, PC // Previous base = BASE - delta. + | + | aghi RD, -1 + | je >2 + |1: // Move results down. + | lg RB, 0(BASE, RA) + | stg RB, -16(BASE) + | la BASE, 8(BASE) + | aghi RD, -1 + | jne <1 + |2: + | lg L:RB, SAVE_L + | stg PC, L:RB->base + |3: + | llgf RD, SAVE_MULTRES + | lgf RA, SAVE_NRES // RA = wanted nresults+1 + |4: + | cgr RA, RD + | jne >6 // More/less results wanted? + |5: + | lay BASE, -16(BASE) + | stg BASE, L:RB->top + | + |->vm_leave_cp: + | lg RA, SAVE_CFRAME // Restore previous C frame. + | stg RA, L:RB->cframe + | lghi CRET1, 0 // Ok return status for vm_pcall. + | + |->vm_leave_unw: + | restoreregs + | br r14 + | + |6: + | jl >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | cg BASE, L:RB->maxstack + | jh >8 + | lghi TMPR1, LJ_TNIL + | stg TMPR1, -16(BASE) + | la BASE, 8(BASE) + | aghi RD, 1 + | j <4 + | + |7: // Fewer results wanted. + | cghi RA, 0 + | je <5 // But check for LUA_MULTRET+1. + | sgr RA, RD // Negative result! + | sllg TMPR1, RA, 3 + | la BASE, 0(TMPR1, BASE) // Correct top. + | j <5 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | stg BASE, L:RB->top // Save current top held in BASE (yes). + | st RD, SAVE_MULTRES // Need to fill only remainder with nil. + | lgr CARG2, RA + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->top // Need the (realloced) L->top in BASE. + | j <3 + | + |->vm_unwind_yield: + | lghi CRET1, LUA_YIELD + | j ->vm_unwind_c_eh + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | lgr sp, CARG1 + | lgfr CARG2, CRET1 // Error return status for vm_pcall. + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | lg L:RB, SAVE_L + | lg GL:RB, L:RB->glref + | lghi TMPR1, ~LJ_VMST_C + | stg TMPR1, GL:RB->vmstate + | j ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK. + | lgr sp, CARG1 + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | lg L:RB, SAVE_L + | lghi RD, 1+1 // Really 1+2 results, incr. later. + | lg BASE, L:RB->base + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | la DISPATCH, GG_G2DISP(DISPATCH) + | lg PC, -8(BASE) // Fetch PC of previous frame. + | load_false RA + | lg RB, 0(BASE) + | stg RA, -16(BASE) // Prepend false to error message. + | stg RB, -8(BASE) + | lghi RA, -16 // Results start at BASE+RA = BASE-16. + | set_vmstate INTERP + | j ->vm_returnc // Increments RD/MULTRES and returns. + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | lghi CARG2, LUA_MINSTACK + | j >2 + | + |->vm_growstack_v: // Grow stack for vararg Lua function. + | aghi RD, -16 // LJ_FR2 + | j >1 + | + |->vm_growstack_f: // Grow stack for fixarg Lua function. + | // BASE = new base, RD = nargs+1, RB = L, PC = first PC + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + |1: + | llgc RA, (PC2PROTO(framesize)-4)(PC) + | la PC, 4(PC) // Must point after first instruction. + | stg BASE, L:RB->base + | stg RD, L:RB->top + | stg PC, SAVE_PC + | lgr CARG2, RA + |2: + | // RB = L, L->base = new base, L->top = top + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lg RD, L:RB->top + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | sgr RD, BASE + | srlg RD, RD, 3 + | aghi NARGS:RD, 1 + | // BASE = new base, RB = LFUNC, RD = nargs+1 + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | lgr L:RB, CARG1 + | stg CARG1, SAVE_L + | lgr RA, CARG2 + | lghi PC, FRAME_CP + | lghi RD, 0 + | la KBASE, CFRAME_RESUME(sp) + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | aghi DISPATCH, GG_G2DISP + | stg RD, SAVE_PC // Any value outside of bytecode is ok. + | stg RD, SAVE_CFRAME + | st RD, SAVE_NRES + | stg RD, SAVE_ERRF + | stg KBASE, L:RB->cframe + | clm RD, 1, L:RB->status + | je >2 // Initial resume (like a call). + | + | // Resume after yield (like a return). + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | stc RD, L:RB->status + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, RA + | srlg RD, RD, 3 + | aghi RD, 1 // RD = nresults+1 + | sgr RA, BASE // RA = resultofs + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z + | j ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | lghi PC, FRAME_CP + | llgfr CARG4, CARG4 + | stg CARG4, SAVE_ERRF + | j >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | lghi PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | st CARG3, SAVE_NRES + | lgr L:RB, CARG1 + | stg CARG1, SAVE_L + | lgr RA, CARG2 // Caveat: RA = CARG3. + | + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | lg KBASE, L:RB->cframe // Add our C frame to cframe chain. + | stg KBASE, SAVE_CFRAME + | stg L:RB, SAVE_PC // Any value outside of bytecode is ok. + | aghi DISPATCH, GG_G2DISP + | stg sp, L:RB->cframe + | + |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). + | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH) + | set_vmstate INTERP + | lg BASE, L:RB->base // BASE = old base (used in vmeta_call). + | agr PC, RA + | sgr PC, BASE // PC = frame delta + frame type + | + | lg RD, L:RB->top + | sgr RD, RA + | srlg NARGS:RD, NARGS:RD, 3 + | aghi NARGS:RD, 1 // RD = nargs+1 + | + |->vm_call_dispatch: + | lg LFUNC:RB, -16(RA) + | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. + | + |->vm_call_dispatch_f: + | lgr BASE, RA + | ins_call + | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | lgr L:RB, CARG1 + | stg L:RB, SAVE_L + | stg L:RB, SAVE_PC // Any value outside of bytecode is ok. + | + | lg KBASE, L:RB->stack // Compute -savestack(L, L->top). + | sg KBASE, L:RB->top + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | lghi TMPR0, 0 + | stg TMPR0, SAVE_ERRF // No error function. + | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame. + | aghi DISPATCH, GG_G2DISP + | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). + | + | lg KBASE, L:RB->cframe // Add our C frame to cframe chain. + | stg KBASE, SAVE_CFRAME + | stg sp, L:RB->cframe + | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH) + | + | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | // TValue * (new base) or NULL returned in r2 (CRET1/). + | cghi CRET1, 0 + | je ->vm_leave_cp // No base? Just remove C frame. + | lgr RA, CRET1 + | lghi PC, FRAME_CP + | j <2 // Else continue with the call. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) + | agr RA, BASE + | nill PC, -8 + | lgr RB, BASE + | sgr BASE, PC // Restore caller BASE. + | sllg TMPR1, RD, 3 + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -8(RA, TMPR1) // Ensure one valid arg. + | lgr RC, RA // ... in [RC] + | lg PC, -24(RB) // Restore PC from [cont|PC]. + | lg RA, -32(RB) + |.if FFI + | clfi RA, 1 + | jle >1 + |.endif + | lg LFUNC:KBASE, -16(BASE) + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, (PC2PROTO(k))(KBASE) + | // BASE = base, RC = result, RB = meta base + | br RA // Jump to continuation. + | + |.if FFI + |1: + | je ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: Tail call from C function. + | sgr RB, BASE + | srl RB, 3 + | ahi RB, -3 + | llgfr RD, RB + | j ->vm_call_tail + |.endif + | + |->cont_cat: // BASE = base, RC = result, RB = mbase + | llgc RA, PC_RB + | sllg RA, RA, 3 + | aghi RB, -32 + | la RA, 0(RA, BASE) + | sgr RA, RB + | je ->cont_ra + | lcgr RA, RA + | srlg RA, RA, 3 + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgfr CARG3, RA // Caveat: RA == CARG3. + | lg TMPR0, 0(RC) + | stg TMPR0, 0(RB) + | lgr CARG2, RB + | j ->BC_CAT_Z + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | stg STR:RC, SAVE_TMP + | la RC, SAVE_TMP + | llgc TMPR1, PC_OP + | cghi TMPR1, BC_GGET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv. + | stg TAB:RA, 0(RB) + | j >2 + | + |->vmeta_tgetb: + | llgc RC, PC_RC + | setint RC + | stg RC, SAVE_TMP + | la RC, SAVE_TMP + | j >1 + | + |->vmeta_tgetv: + | llgc RC, PC_RC // Reload TValue *k from RC. + | sllg RC, RC, 3 + | la RC, 0(RC, BASE) + |1: + | llgc RB, PC_RB // Reload TValue *t from RB. + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + |2: + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, RB + | lgr CARG3, RC + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | je >3 + |->cont_ra: // BASE = base, RC = result + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RB, 0(RC) + | stg RB, 0(RA, BASE) + | ins_next + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | lg RA, L:RB->top + | stg PC, -24(RA) // [cont|PC] + | la PC, FRAME_CONT(RA) + | sgr PC, BASE + | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here. + | lghi NARGS:RD, 2+1 // 2 args for func(t, k). + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | lgr CARG1, TAB:RB + | lgfr CARG2, RC + | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in r2 (CRET1). + | llgc RA, PC_RA + | ltgr RC, CRET1 + | jne ->BC_TGETR_Z + | lghi ITYPE, LJ_TNIL + | j ->BC_TGETR2_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | stg STR:RC, SAVE_TMP + | la RC, SAVE_TMP + | llgc TMPR0, PC_OP + | cghi TMPR0, BC_GSET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv. + | stg TAB:RA, 0(RB) + | j >2 + | + |->vmeta_tsetb: + | llgc RC, PC_RC + | setint RC + | stg RC, SAVE_TMP + | la RC, SAVE_TMP + | j >1 + | + |->vmeta_tsetv: + | llgc RC, PC_RC // Reload TValue *k from RC. + | sllg RC, RC, 3 + | la RC, 0(RC, BASE) + |1: + | llgc RB, PC_RB // Reload TValue *t from RB. + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + |2: + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, RB + | lgr CARG3, RC + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | je >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | stg RB, 0(RC) + |->cont_nop: // BASE = base, (RC = result) + | ins_next + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | lg RA, L:RB->top + | stg PC, -24(RA) // [cont|PC] + | llgc RC, PC_RA + | // Copy value to third argument. + | sllg RB, RC, 3 + | lg RB, 0(RB, BASE) + | stg RB, 16(RA) + | la PC, FRAME_CONT(RA) + | sgr PC, BASE + | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here. + | lghi NARGS:RD, 3+1 // 3 args for func(t, k, v). + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | lg L:CARG1, SAVE_L + | lgr CARG2, TAB:RB + | stg BASE, L:CARG1->base + | lgfr CARG3, RC + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // TValue * returned in r2 (CRET1). + | lgr RC, CRET1 + | llgc RA, PC_RA + | j ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | la CARG2, 0(RA, BASE) + | la CARG3, 0(RD, BASE) // Caveat: RA == CARG3 + | lgr CARG1, L:RB + | llgc CARG4, PC_OP + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + |3: + | lgr RC, CRET1 + | lg BASE, L:RB->base + | clgfi RC, 1 + | jh ->vmeta_binop + |4: + | la PC, 4(PC) + | jl >6 + |5: + | llgh RD, PC_RD + | branchPC RD + |6: + | ins_next + | + |->cont_condt: // BASE = base, RC = result + | la PC, 4(PC) + | lg ITYPE, 0(RC) + | srag ITYPE, ITYPE, 47 + | lghi TMPR0, LJ_TISTRUECOND + | clr ITYPE, TMPR0 // Branch if result is true. + | jl <5 + | j <6 + | + |->cont_condf: // BASE = base, RC = result + | lg ITYPE, 0(RC) + | srag ITYPE, ITYPE, 47 + | lghi TMPR0, LJ_TISTRUECOND + | clr ITYPE, TMPR0 // Branch if result is false. + | j <4 + | + |->vmeta_equal: + | cleartp TAB:RD + | lay PC, -4(PC) + | lgr CARG2, RA + | lgfr CARG4, RB + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG3, RD + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + | j <3 + | + |->vmeta_equal_cd: + |.if FFI + | lay PC, -4(PC) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG1, L:RB + | llgf CARG2, -4(PC) + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + | j <3 + |.endif + | + |->vmeta_istype: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | llgfr CARG2, RA + | llgfr CARG3, RD // Caveat: CARG3 == RA. + | lgr L:CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | lg BASE, L:RB->base + | j <6 + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vno: + | llgc RB, PC_RB + | llgc RC, PC_RC + |->vmeta_arith_vn: + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | la RB, 0(RB, BASE) + | la RC, 0(RC, KBASE) + | j >1 + | + |->vmeta_arith_nvo: + | llgc RC, PC_RC + | llgc RB, PC_RB + |->vmeta_arith_nv: + | sllg RC, RC, 3 + | sllg RB, RB, 3 + | la TMPR1, 0(RC, KBASE) + | la RC, 0(RB, BASE) + | lgr RB, TMPR1 + | j >1 + | + |->vmeta_unm: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | la RC, 0(RD, BASE) + | lgr RB, RC + | j >1 + | + |->vmeta_arith_vvo: + | llgc RB, PC_RB + | llgc RC, PC_RC + |->vmeta_arith_vv: + | sllg RC, RC, 3 + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + | la RC, 0(RC, BASE) + |1: + | llgc RA, PC_RA + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | llgc CARG5, PC_OP // Caveat: CARG5 == RD. + | lgr CARG2, RA + | lgr CARG3, RB // Caveat: CARG3 == RA. + | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out). + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | cghi CRET1, 0 + | lgr RC, CRET1 + | je ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = base, RC = new base, stack = cont/func/o1/o2 + | lgr RA, RC + | sgr RC, BASE + | stg PC, -24(RA) // [cont|PC] + | la PC, FRAME_CONT(RC) + | lghi NARGS:RD, 2+1 // 2 args for func(o1, o2). + | j ->vm_call_dispatch + | + |->vmeta_len: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | la CARG2, 0(RD, BASE) + | lgr L:CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_len // (lua_State *L, TValue *o) + | // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1). + | lgr RC, CRET1 + | lg BASE, L:RB->base +#if LJ_52 + | cghi RC, 0 + | jne ->vmeta_binop // Binop call for compatibility. + | llgh RD, PC_RD + | sllg RD, RD, 3 + | lg TAB:CARG1, 0(RD, BASE) + | cleartp TAB:CARG1 + | j ->BC_LEN_Z +#else + | j ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call_ra: + | la RA, 16(RA, BASE) // RA previously set to RA*8. + |->vmeta_call: // Resolve and call __call metamethod. + | // BASE = old base, RA = new base, RC = nargs+1, PC = return + | stg NARGS:RD, SAVE_TMP // Save RA, RC for us (not sure about this). + | lgr RB, RA + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lay CARG2, -16(RA) + | sllg RD, RD, 3 + | lay CARG3, -8(RA, RD) // Caveat: CARG3 == RA. + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | lgr RA, RB + | lg L:RB, SAVE_L + | lg BASE, L:RB->base + | lg NARGS:RD, SAVE_TMP + | lg LFUNC:RB, -16(RA) + | aghi NARGS:RD, 1 // 32-bit on x64. + | // This is fragile. L->base must not move, KBASE must always be defined. + | cgr KBASE, BASE // Continue with CALLT if flag set. + | je ->BC_CALLT_Z + | cleartp LFUNC:RB + | lgr BASE, RA + | ins_call // Otherwise call resolved metamethod. + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG2, RA + | lgr CARG1, RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_for // (lua_State *L, TValue *base) + | lg BASE, L:RB->base + | llgc OP, PC_OP + | llgc RA, PC_RA + | llgh RD, PC_RD + | sllg TMPR1, OP, 3 + | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI. + | br TMPR1 + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | clfi NARGS:RD, 1+1; jl ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | clfi NARGS:RD, 2+1; jl ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name, op + | .ffunc_1 name + | lg TMPR0, 0(BASE) + | checknumtp TMPR0, ->fff_fallback + | op f0, 0(BASE) + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc_n name, ld + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc_2 name + | lg TMPR1, 0(BASE) + | lg TMPR0, 8(BASE) + | ld FARG1, 0(BASE) + | ld FARG2, 8(BASE) + | checknumtp TMPR1, ->fff_fallback + | checknumtp TMPR0, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses label 1. + |.macro ffgccheck + | lg RB, (DISPATCH_GL(gc.total))(DISPATCH) + | clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH) + | jl >1 + | brasl r14, ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | lg RB, 0(BASE) + | srag ITYPE, RB, 47 + | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES + | lg RB, 0(BASE) + | stg RB, -16(BASE) + | ahi RD, -2 + | je >2 + | lgr RA, BASE + |1: + | la RA, 8(RA) + | lg RB, 0(RA) + | stg RB, -16(RA) + | brct RD, <1 + |2: + | llgf RD, SAVE_MULTRES + | j ->fff_res_ + | + |.ffunc_1 type + | lg RC, 0(BASE) + | srag RC, RC, 47 + | lghi RB, LJ_TISNUM + | clgr RC, RB + | jnl >1 + | lgr RC, RB + |1: + | lghi TMPR0, -1 + | xgr RC, TMPR0 + |2: + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | sllg RC, RC, 3 + | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB) + | lg PC, -8(BASE) + | settp STR:RC, LJ_TSTR + | stg STR:RC, -16(BASE) + | j ->fff_res1 + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | lg TAB:RB, 0(BASE) + | lg PC, -8(BASE) + | checktab TAB:RB, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | lg TAB:RB, TAB:RB->metatable + |2: + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -16(BASE) + | cghi TAB:RB, 0 + | je ->fff_res1 + | settp TAB:RC, TAB:RB, LJ_TTAB + | stg TAB:RC, -16(BASE) // Store metatable as default result. + | lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH) + | llgf RA, TAB:RB->hmask + | n RA, STR:RC->sid + | settp STR:RC, LJ_TSTR + | mghi RA, #NODE + | ag NODE:RA, TAB:RB->node + |3: // Rearranged logic, because we expect _not_ to find the key. + | cg STR:RC, NODE:RA->key + | je >5 + |4: + | ltg NODE:RA, NODE:RA->next + | jne <3 + | j ->fff_res1 // Not found, keep default result. + |5: + | lg RB, NODE:RA->val + | cghi RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. + | stg RB, -16(BASE) // Return value of mt.__metatable. + | j ->fff_res1 + | + |6: + | clfi ITYPE, LJ_TUDATA; je <1 + | clfi ITYPE, LJ_TISNUM; jh >7 + | lhi ITYPE, LJ_TISNUM + |7: + | lhi TMPR0, -1 + | xr ITYPE, TMPR0 // not ITYPE + | llgfr ITYPE, ITYPE + | sllg ITYPE, ITYPE, 3 + | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH) + | j <2 + | + |.ffunc_2 setmetatable + | lg TAB:RB, 0(BASE) + | lgr TAB:TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback + | // Fast path: no mt for table yet and not clearing the mt. + | lghi TMPR0, 0 + | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback + | lg TAB:RA, 8(BASE) + | checktab TAB:RA, ->fff_fallback + | stg TAB:RA, TAB:RB->metatable + | lg PC, -8(BASE) + | stg TAB:TMPR1, -16(BASE) // Return original table. + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | je >1 + | // Possible write barrier. Table is black, but skip iswhite(mt) check. + | barrierback TAB:RB, RC + |1: + | j ->fff_res1 + | + |.ffunc_2 rawget + | lg TAB:CARG2, 0(BASE) + | checktab TAB:CARG2, ->fff_fallback + | la CARG3, 8(BASE) + | lg CARG1, SAVE_L + | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // cTValue * returned in r2 (CRET1). + | // Copy table slot. + | lg RB, 0(CRET1) + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | clfi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. + | lg RB, 0(BASE) + | checknumber RB, ->fff_fallback + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | lg PC, -8(BASE) + | lg STR:RB, 0(BASE) + | checktp_nc STR:RB, LJ_TSTR, >3 + | // A __tostring method in the string base metatable is ignored. + |2: + | stg STR:RB, -16(BASE) + | j ->fff_res1 + |3: // Handle numbers inline, unless a number base metatable is present. + | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1 + | lghi TMPR0, 0 + | cg TMPR0, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH) + | jne ->fff_fallback + | ffgccheck // Caveat: uses label 1. + | lg L:RB, SAVE_L + | stg BASE, L:RB->base // Add frame since C call can throw. + | stg PC, SAVE_PC // Redundant (but a defined value). + | lgr CARG2, BASE // Otherwise: CARG2 == BASE + | lgr L:CARG1, L:RB + | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o) + | // GCstr returned in r2 (CRET1). + | lg BASE, L:RB->base + | settp STR:RB, CRET1, LJ_TSTR + | j <2 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | je >2 // Missing 2nd arg? + |1: + | lg CARG1, 0(BASE) + | lg PC, -8(BASE) + | checktab CARG1, ->fff_fallback + | lgr RB, BASE // Save BASE. + | la CARG2, 8(BASE) + | lay CARG3, -16(BASE) + | brasl r14, extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // 1=found, 0=end, -1=error returned in r2 (CRET1). + | lgr BASE, RB // Restore BASE. + | ltr RD, CRET1; jh ->fff_res2 // Found key/value. + | jl ->fff_fallback_2 // Invalid key. + | // End of traversal: return nil. + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -16(BASE) + | j ->fff_res1 + |2: // Set missing 2nd arg to nil. + | lghi TMPR0, LJ_TNIL + | stg TMPR0, 8(BASE) + | j <1 + | + |.ffunc_1 pairs + | lg TAB:RB, 0(BASE) + | lgr TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | ltg TMPR0, TAB:RB->metatable; jne ->fff_fallback +#endif + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | lg CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | lg PC, -8(BASE) + | stg CFUNC:RD, -16(BASE) + | stg TMPR1, -8(BASE) + | lghi TMPR0, LJ_TNIL + | stg TMPR0, 0(BASE) + | lghi RD, 1+3 + | j ->fff_res + | + |.ffunc_2 ipairs_aux + | lg TAB:RB, 0(BASE) + | checktab TAB:RB, ->fff_fallback + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | lg PC, -8(BASE) + | aghi RA, 1 + | setint ITYPE, RA + | stg ITYPE, -16(BASE) + | cl RA, TAB:RB->asize; jhe >2 // Not in array part? + | lg RD, TAB:RB->array + | lgfr TMPR1, RA + | sllg TMPR1, TMPR1, 3 + | la RD, 0(TMPR1, RD) + |1: + | lg TMPR0, 0(RD) + | cghi TMPR0, LJ_TNIL; je ->fff_res0 + | // Copy array slot. + | stg TMPR0, -8(BASE) + |->fff_res2: + | lghi RD, 1+2 + | j ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | lt TMPR0, TAB:RB->hmask; je ->fff_res0 + | lgr CARG1, TAB:RB + | lgfr CARG2, RA + | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in r2 (CRET1). + | ltgr RD, CRET1 + | jne <1 + |->fff_res0: + | lghi RD, 1+0 + | j ->fff_res + | + |.ffunc_1 ipairs + | lg TAB:RB, 0(BASE) + | lgr TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | lghi TMPR0, 0 + | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback +#endif + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | lg CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | lg PC, -8(BASE) + | stg CFUNC:RD, -16(BASE) + | stg TMPR1, -8(BASE) + | llihf RD, LJ_TISNUM<<15 + | stg RD, 0(BASE) + | lghi RD, 1+3 + | j ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc_1 pcall + | lg L:RB, SAVE_L + | sllg RA, NARGS:RD, 3 + | la RA, 0(RA, BASE) + | clg RA, L:RB->maxstack; jh ->fff_fallback + | la RA, 16(BASE) + | aghi NARGS:RD, -1 + | lghi PC, 16+FRAME_PCALL + |1: + | llgc RB, (DISPATCH_GL(hookmask))(DISPATCH) + | srlg RB, RB, HOOK_ACTIVE_SHIFT(r0) + | nill RB, 1 // High bits already zero (from load). + | agr PC, RB // Remember active hook before pcall. + | // Note: this does a (harmless) copy of the function to the PC slot, too. + | lgr KBASE, RD + |2: + | sllg TMPR1, KBASE, 3 + | lg RB, -24(TMPR1, RA) + | stg RB, -16(TMPR1, RA) + | aghi KBASE, -1 + | jh <2 + | j ->vm_call_dispatch + | + |.ffunc_2 xpcall + | lg L:RB, SAVE_L + | sllg RA, NARGS:RD, 3 + | la RA, 0(RA, BASE) + | clg RA, L:RB->maxstack; jh ->fff_fallback + | lg LFUNC:RA, 8(BASE) + | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback + | lg LFUNC:RB, 0(BASE) // Swap function and traceback. + | stg LFUNC:RA, 0(BASE) + | stg LFUNC:RB, 8(BASE) + | la RA, 24(BASE) + | aghi NARGS:RD, -2 + | lghi PC, 24+FRAME_PCALL + | j <1 + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | lg L:RB, 0(BASE) + | lgr L:TMPR0, L:RB // Save type for checktptp. + | cleartp L:RB + |.else + |.ffunc coroutine_wrap_aux + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | lg L:RB, CFUNC:RB->upvalue[0].gcr + | cleartp L:RB + |.endif + | lg PC, -8(BASE) + | stg PC, SAVE_PC + | stg L:RB, SAVE_TMP + |.if resume + | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback + |.endif + | ltg TMPR0, L:RB->cframe; jne ->fff_fallback + | cli L:RB->status, LUA_YIELD; jh ->fff_fallback + | lg RA, L:RB->top + | je >1 // Status != LUA_YIELD (i.e. 0)? + | cg RA, L:RB->base // Check for presence of initial func. + | je ->fff_fallback + | lg PC, -8(RA) // Move initial function up. + | stg PC, 0(RA) + | la RA, 8(RA) + |1: + | sllg TMPR1, NARGS:RD, 3 + |.if resume + | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread). + |.else + | lay PC, -8(TMPR1, RA) // Check stack space (-1). + |.endif + | clg PC, L:RB->maxstack; jh ->fff_fallback + | stg PC, L:RB->top + | + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + |.if resume + | la BASE, 8(BASE) // Keep resumed thread in stack for GC. + |.endif + | stg BASE, L:RB->top + |.if resume + | lay RB, -24(TMPR1, BASE) // RB = end of source for stack move. + |.else + | lay RB, -16(TMPR1, BASE) // RB = end of source for stack move. + |.endif + | sgr RB, PC // Relative to PC. + | + | cgr PC, RA + | je >3 + |2: // Move args to coroutine. + | lg RC, 0(RB, PC) + | stg RC, -8(PC) + | lay PC, -8(PC) + | cgr PC, RA + | jne <2 + |3: + | lgr CARG2, RA + | lg L:CARG1, SAVE_TMP + | lghi CARG3, 0 + | lghi CARG4, 0 + | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | + | lg L:RB, SAVE_L + | lg L:PC, SAVE_TMP + | lg BASE, L:RB->base + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | + | clfi CRET1, LUA_YIELD + | jh >8 + |4: + | lg RA, L:PC->base + | lg KBASE, L:PC->top + | stg RA, L:PC->top // Clear coroutine stack. + | lgr PC, KBASE + | sgr PC, RA + | je >6 // No results? + | la RD, 0(PC, BASE) + | llgfr PC, PC + | srlg PC, PC, 3 + | clg RD, L:RB->maxstack + | jh >9 // Need to grow stack? + | + | lgr RB, BASE + | sgr RB, RA + |5: // Move results from coroutine. + | lg RD, 0(RA) + | stg RD, 0(RA, RB) + | la RA, 8(RA) + | cgr RA, KBASE + | jne <5 + |6: + |.if resume + | la RD, 2(PC) // nresults+1 = 1 + true + results. + | load_true ITYPE // Prepend true to results. + | stg ITYPE, -8(BASE) + |.else + | la RD, 1(PC) // nresults+1 = 1 + results. + |.endif + |7: + | lg PC, SAVE_PC + | st RD, SAVE_MULTRES + |.if resume + | lghi RA, -8 + |.else + | lghi RA, 0 + |.endif + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z + | j ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | load_false ITYPE // Prepend false to results. + | stg ITYPE, -8(BASE) + | lg RA, L:PC->top + | aghi RA, -8 + | stg RA, L:PC->top // Clear error from coroutine stack. + | // Copy error message. + | lg RD, 0(RA) + | stg RD, 0(BASE) + | lghi RD, 1+2 // nresults+1 = 1 + false + error. + | j <7 + |.else + | lgr CARG2, L:PC + | lgr CARG1, L:RB + | brasl r14, extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Error function does not return. + |.endif + | + |9: // Handle stack expansion on return from yield. + | lg L:RA, SAVE_TMP + | stg KBASE, L:RA->top // Undo coroutine stack clearing. + | lgr CARG2, PC + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg L:PC, SAVE_TMP + | lg BASE, L:RB->base + | j <4 // Retry the stack move. + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | lg L:RB, SAVE_L + | lg TMPR0, L:RB->cframe + | tmll TMPR0, CFRAME_RESUME + | je ->fff_fallback + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | stg RD, L:RB->top + | lghi RD, 0 + | stg RD, L:RB->cframe + | lghi CRET1, LUA_YIELD + | stc CRET1, L:RB->status + | j ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.ffunc_1 math_abs + | lg RB, 0(BASE) + | checkint RB, >3 + | lpr RB, RB; jo >2 + |->fff_resbit: + |->fff_resi: + | setint RB + |->fff_resRB: + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + |2: + | llihh RB, 0x41e0 // 2^31 + | j ->fff_resRB + |3: + | jh ->fff_fallback + | nihh RB, 0x7fff // Clear sign bit. + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |.ffunc_n math_sqrt, sqdb + |->fff_resf0: + | lg PC, -8(BASE) + | stdy f0, -16(BASE) + | // fallthrough + | + |->fff_res1: + | lghi RD, 1+1 + |->fff_res: + | st RD, SAVE_MULTRES + |->fff_res_: + | tmll PC, FRAME_TYPE + | jne >7 + |5: + | llgc TMPR1, PC_RB + | clgr TMPR1, RD // More results expected? + | jh >6 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8 + | ins_next + | + |6: // Fill up results with nil. + | sllg TMPR1, RD, 3 + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -24(TMPR1, BASE) + | la RD, 1(RD) + | j <5 + | + |7: // Non-standard return case. + | lghi RA, -16 // Results start at BASE+RA = BASE-16. + | j ->vm_return + | + |.macro math_round, func + | .ffunc math_ .. func + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checknumx RB, ->fff_resRB, je + | jh ->fff_fallback + | brasl r14, ->vm_ .. func + | cfdbr RB, 0, f0 + | jo ->fff_resf0 + | llgfr RB, RB + | j ->fff_resi + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc math_log + | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. + | lg TMPR0, 0(BASE) + | ld FARG1, 0(BASE) + | checknumtp TMPR0, ->fff_fallback + | brasl r14, extern log + | j ->fff_resf0 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | brasl r14, extern func + | j ->fff_resf0 + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | brasl r14, extern func + | j ->fff_resf0 + |.endmacro + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | lg TMPR0, 0(BASE) + | ld FARG1, 0(BASE) + | lg CARG1, 8(BASE) + | checknumtp TMPR0, ->fff_fallback + | checkinttp CARG1, ->fff_fallback + | lgfr CARG1, CARG1 + | brasl r14, extern ldexp // (double, int) + | j ->fff_resf0 + | + |.ffunc_n math_frexp + | la CARG1, SAVE_TMP + | brasl r14, extern frexp + | llgf RB, SAVE_TMP + | lg PC, -8(BASE) + | stdy f0, -16(BASE) + | setint RB + | stg RB, -8(BASE) + | lghi RD, 1+2 + | j ->fff_res + | + |.ffunc_n math_modf + | lay CARG1, -16(BASE) + | brasl r14, extern modf // (double, double*) + | lg PC, -8(BASE) + | stdy f0, -8(BASE) + | lghi RD, 1+2 + | j ->fff_res + | + |.macro math_minmax, name, cjmp + | .ffunc name + | lghi RA, 2*8 + | sllg TMPR1, RD, 3 + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checkint RB, >4 + |1: // Handle integers. + | clgr RA, TMPR1; jhe ->fff_resRB + | lg TMPR0, -8(RA, BASE) + | checkint TMPR0, >3 + | cr RB, TMPR0 + | cjmp >2 + | lgr RB, TMPR0 + |2: + | aghi RA, 8 + | j <1 + |3: + | jh ->fff_fallback + | // Convert intermediate result to number and continue below. + | cdfbr f0, RB + | ldgr f1, TMPR0 + | j >6 + |4: + | jh ->fff_fallback + |5: // Handle numbers or integers. + | clgr RA, TMPR1; jhe ->fff_resf0 + | lg RB, -8(RA, BASE) + | ldy f1, -8(RA, BASE) + | checknumx RB, >6, jl + | jh ->fff_fallback + | cdfbr f1, RB + |6: + | cdbr f0, f1 + | cjmp >7 + | ldr f0, f1 + |7: + | aghi RA, 8 + | j <5 + |.endmacro + | + | math_minmax math_min, jnh + | math_minmax math_max, jnl + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | chi NARGS:RD, 1+1; jne ->fff_fallback + | lg STR:RB, 0(BASE) + | checkstr STR:RB, ->fff_fallback + | lg PC, -8(BASE) + | ltg TMPR0, STR:RB->len + | je ->fff_res0 // Return no results for empty string. + | llgc RB, STR:RB[1] + | j ->fff_resi + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | chi NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. + | lg RB, 0(BASE) + | checkint RB, ->fff_fallback + | clfi RB, 255; jh ->fff_fallback + | strvh RB, SAVE_TMP // Store [c,0]. + | lghi TMPR1, 1 + | la RD, SAVE_TMP // Points to stack. Little-endian. + |->fff_newstr: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | llgfr CARG3, TMPR1 // Zero-extended to size_t. + | lgr CARG2, RD + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: + | // GCstr * returned in r2 (CRET1). + | lgr STR:RD, CRET1 + | lg BASE, L:RB->base + | lg PC, -8(BASE) + | settp STR:RD, LJ_TSTR + | stg STR:RD, -16(BASE) + | j ->fff_res1 + | + |.ffunc string_sub + | ffgccheck + | lghi TMPR1, -1 + | clfi NARGS:RD, 1+2; jl ->fff_fallback + | jnh >1 + | lg TMPR1, 16(BASE) + | checkint TMPR1, ->fff_fallback + |1: + | lg STR:RB, 0(BASE) + | checkstr STR:RB, ->fff_fallback + | lg ITYPE, 8(BASE) + | lgfr RA, ITYPE + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM + | jne ->fff_fallback + | llgf RC, STR:RB->len + | clr RC, TMPR1 // len < end? (unsigned compare) + | jl >5 + |2: + | cghi RA, 0 // start <= 0? + | jle >7 + |3: + | sr TMPR1, RA // start > end? + | jnhe ->fff_emptystr + | la RD, (#STR-1)(RA, STR:RB) + | ahi TMPR1, 1 + |4: + | j ->fff_newstr + | + |5: // Negative end or overflow. + | chi TMPR1, 0 + | jnl >6 + | ahi TMPR1, 1 + | ar TMPR1, RC // end = end+(len+1) + | j <2 + |6: // Overflow. + | lr TMPR1, RC // end = len + | j <2 + | + |7: // Negative start or underflow. + | je >8 + | agr RA, RC // start = start+(len+1) + | aghi RA, 1 + | jh <3 // start > 0? + |8: // Underflow. + | lghi RA, 1 // start = 1 + | j <3 + | + |->fff_emptystr: // Range underflow. + | lghi TMPR1, 0 + | j <4 + | + |.macro ffstring_op, name + | .ffunc_1 string_ .. name + | ffgccheck + | lg STR:CARG2, 0(BASE) + | checkstr STR:CARG2, ->fff_fallback + | lg L:RB, SAVE_L + | lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH) + | stg BASE, L:RB->base + | lg RC, SBUF:CARG1->b + | stg L:RB, SBUF:CARG1->L + | stg RC, SBUF:CARG1->w + | stg PC, SAVE_PC + | brasl r14, extern lj_buf_putstr_ .. name + | // lgr CARG1, CRET1 (nop, CARG1==CRET1) + | brasl r14, extern lj_buf_tostr + | j ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |.macro .ffunc_bit, name, kind, fdef + | fdef name + |.if kind == 2 + | bfpconst_tobit f1, RB + |.endif + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checkint RB, >1 + |.if kind > 0 + | j >2 + |.else + | j ->fff_resbit + |.endif + |1: + | jh ->fff_fallback + |.if kind < 2 + | bfpconst_tobit f1, RB + |.endif + | adbr f0, f1 + | lgdr RB, f0 + | llgfr RB, RB + |2: + |.endmacro + | + |.macro .ffunc_bit, name, kind + | .ffunc_bit name, kind, .ffunc_1 + |.endmacro + | + |.ffunc_bit bit_tobit, 0 + | j ->fff_resbit + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name, 2 + | lgr TMPR1, NARGS:RD // Save for fallback. + | sllg RD, NARGS:RD, 3 + | lay RD, -16(RD, BASE) + |1: + | clgr RD, BASE + | jle ->fff_resbit + | lg RA, 0(RD) + | checkint RA, >2 + | ins RB, RA + | aghi RD, -8 + | j <1 + |2: + | jh ->fff_fallback_bit_op + | ldgr f0, RA + | adbr f0, f1 + | lgdr RA, f0 + | ins RB, RA + | aghi RD, -8 + | j <1 + |.endmacro + | + |.ffunc_bit_op bit_band, nr + |.ffunc_bit_op bit_bor, or + |.ffunc_bit_op bit_bxor, xr + | + |.ffunc_bit bit_bswap, 1 + | lrvr RB, RB + | j ->fff_resbit + | + |.ffunc_bit bit_bnot, 1 + | xilf RB, -1 + | j ->fff_resbit + | + |->fff_fallback_bit_op: + | lgr NARGS:RD, TMPR1 // Restore for fallback + | j ->fff_fallback + | + |.macro .ffunc_bit_sh, name, ins + | .ffunc_bit name, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | nill RA, 0x1f // Limit shift to 5-bits. + | ins RB, 0(RA) + | j ->fff_resbit + |.endmacro + | + |.ffunc_bit_sh bit_lshift, sll + |.ffunc_bit_sh bit_rshift, srl + |.ffunc_bit_sh bit_arshift, sra + | + |.ffunc_bit bit_rol, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | rll RB, RB, 0(RA) + | j ->fff_resbit + | + |.ffunc_bit bit_ror, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | lcr RA, RA // Right rotate equivalent to negative left rotate. + | rll RB, RB, 0(RA) + | j ->fff_resbit + | + |//----------------------------------------------------------------------- + | + |->fff_fallback_2: + | lghi NARGS:RD, 1+2 // Other args are ignored, anyway. + | j ->fff_fallback + |->fff_fallback_1: + | lghi NARGS:RD, 1+1 // Other args are ignored, anyway. + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RD = nargs+1 + | lg L:RB, SAVE_L + | lg PC, -8(BASE) // Fallback may overwrite PC. + | stg PC, SAVE_PC // Redundant (but a defined value). + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler. + | stg RD, L:RB->top + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | clg RA, L:RB->maxstack + | jh >5 // Need to grow stack. + | lgr CARG1, L:RB + | lg TMPR1, CFUNC:RD->f + | basr r14, TMPR1 // (lua_State *L) + | lg BASE, L:RB->base + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | lgr RD, CRET1 + | cghi RD, 0; jh ->fff_res // Returned nresults+1? + |1: + | lg RA, L:RB->top + | sgr RA, BASE + | srlg RA, RA, 3 + | cghi RD, 0 + | la NARGS:RD, 1(RA) + | lg LFUNC:RB, -16(BASE) + | jne ->vm_call_tail // Returned -1? + | cleartp LFUNC:RB + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | lgr RA, BASE + | tmll PC, FRAME_TYPE + | jne >3 + | llgc RB, PC_RA + | lcgr RB, RB + | sllg RB, RB, 3 + | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8 + | j ->vm_call_dispatch // Resolve again for tailcall. + |3: + | lgr RB, PC + | nill RB, -8 + | sgr BASE, RB + | j ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | lghi CARG2, LUA_MINSTACK + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lghi RD, 0 // Simulate a return 0. + | j <1 // Dumb retry (goes through ff first). + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RD = nargs+1 + | stg r14, SAVE_TMP // Save return address + | lg L:RB, SAVE_L + | stg PC, SAVE_PC // Redundant (but a defined value). + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | lgr CARG1, L:RB + | stg RD, L:RB->top + | brasl r14, extern lj_gc_step // (lua_State *L) + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, BASE + | srlg RD, RD, 3 + | aghi NARGS:RD, 1 + | lg r14, SAVE_TMP // Restore return address. + | br r14 + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + | stg r0, 0 + | stg r0, 0 + | + |->vm_rethook: // Dispatch target for return hooks. + | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH) + | tmll RD, HOOK_ACTIVE + | jne >5 + | j >1 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH) + | tmll RD, HOOK_ACTIVE // Hook already active? + | jne >5 + | + | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT + | je >5 + | ly TMPR0, (DISPATCH_GL(hookcount))(DISPATCH) + | ahi TMPR0, -1 + | sty TMPR0, (DISPATCH_GL(hookcount))(DISPATCH) + | je >1 + | tmll RD, LUA_MASKLINE + | je >5 + |1: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG2, PC + | lgr CARG1, L:RB + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | brasl r14, extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | lg BASE, L:RB->base + |4: + | llgc RA, PC_RA + |5: + | llgc OP, PC_OP + | sllg TMPR1, OP, 3 + | llgh RD, PC_RD + | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) + | br TMPR1 + | + |->cont_hook: // Continue from hook yield. + | stg r0, 0 + | stg r0, 0 + | + |->vm_hotloop: // Hot loop counter underflow. + | stg r0, 0 + | stg r0, 0 + | + |->vm_callhook: // Dispatch target for call hooks. + | stg PC, SAVE_PC + |.if JIT + | j >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | stg PC, SAVE_PC + | oill PC, 1 // Marker for hot call. + |1: + |.endif + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | stg RD, L:RB->top + | lgr CARG2, PC + | lgr CARG1, L:RB + | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // ASMFunction returned in r2 (CRET1). + | lghi TMPR0, 0 + | stg TMPR0, SAVE_PC // Invalidate for subsequent line hook. + |.if JIT + | nill PC, -2 + |.endif + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, BASE + | lgr RB, CRET1 + | llgc RA, PC_RA + | srl RD, 3 + | ahi NARGS:RD, 1 + | llgfr RD, RD + | br RB + | + |->cont_stitch: // Trace stitching. + | stg r0, 0 + | stg r0, 0 + | + |->vm_profhook: // Dispatch target for profiler hook. + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Called from an exit stub with the exit number on the stack. + |// The 16 bit exit number is stored with two (sign-extended) push imm8. + |->vm_exit_handler: + | stg r0, 0 + | stg r0, 0 + |->vm_exit_interp: + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// FP value rounding. Called by math.floor/math.ceil fast functions. + |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14. + |.macro vm_round, name, mask + |->name: + | ldr f4, f0 + | lghi r0, 1 + | cdfbr f1, r0 + | didbr f0, f2, f1, mask // f0=remainder, f2=quotient. + | fidbra f4, mask, f4, 0 + | ldr f0, f4 + | jnle >1 + | br r14 + |1: // partial remainder (sanity check) + | stg r0, 0 + |.endmacro + | + | vm_round vm_floor, 7 // Round towards -inf. + | vm_round vm_ceil, 6 // Round towards +inf. + | vm_round vm_trunc, 5 // Round towards 0. + | + |// FP modulo x%y. Called by BC_MOD* and vm_arith. + |->vm_mod: // NYI. + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Assertions --------------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->assert_bad_for_arg_type: + | stg r0, 0 + | stg r0, 0 +#ifdef LUA_USE_ASSERT +#endif + | + |->vm_next: + |.if JIT + | NYI // On big-endian. + |.endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in ah/al. + |->vm_ffi_callback: + | stg r0, 0 + | stg r0, 0 + | + |->cont_ffi_callback: // Return from FFI callback. + | stg r0, 0 + | stg r0, 0 + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, r8 + | stmg r6, r15, 48(sp) + | lgr r13, sp // Use r13 as frame pointer. + | lgr CCSTATE, CARG1 + | lg r7, CCSTATE->func + | + | // Readjust stack. + | sgf sp, CCSTATE->spadj + | + | // Copy stack slots. + | llgc r1, CCSTATE->nsp + | chi r1, 0 + | jh >2 + |1: + | lmg CARG1, CARG5, CCSTATE->gpr[0] + | // TODO: conditionally load FPRs? + | ld FARG1, CCSTATE->fpr[0] + | ld FARG2, CCSTATE->fpr[1] + | ld FARG3, CCSTATE->fpr[2] + | ld FARG4, CCSTATE->fpr[3] + | basr r14, r7 + | + | stg CRET1, CCSTATE->gpr[0] + | std f0, CCSTATE->fpr[0] + | + | lgr sp, r13 + | lmg r6, r15, 48(sp) + | br r14 + | + |2: + | sll r1, 3 + | la r10, (offsetof(CCallState, stack))(CCSTATE) // Source. + | la r11, (CCALL_SPS_EXTRA*8)(sp) // Destination. + |3: + | chi r1, 256 + | jl >4 + | mvc 0(256, r11), 0(r10) + | la r10, 256(r10) + | la r11, 256(r11) + | ahi r1, -256 + | j <3 + | + |4: + | ahi r1, -1 + | jl <1 + | larl r9, >5 + | ex r1, 0(r9) + | j <1 + | + |5: + | // exrl target + | mvc 0(1, r11), 0(r10) + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + (void)vk; + |// Note: aligning all instructions does not pay off. + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + |.macro jmp_comp, lt, ge, le, gt, target + ||switch (op) { + ||case BC_ISLT: + | lt target + ||break; + ||case BC_ISGE: + | ge target + ||break; + ||case BC_ISLE: + | le target + ||break; + ||case BC_ISGT: + | gt target + ||break; + ||default: break; /* Shut up GCC. */ + ||} + |.endmacro + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1, RD = src2, JMP with RD = target + | ins_AD + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | ld f0, 0(RA, BASE) + | ld f1, 0(RD, BASE) + | lg RA, 0(RA, BASE) + | lg RD, 0(RD, BASE) + | srag ITYPE, RA, 47 + | srag RB, RD, 47 + | + | clfi ITYPE, LJ_TISNUM; jne >7 + | clfi RB, LJ_TISNUM; jne >8 + | // Both are integers. + | la PC, 4(PC) + | cr RA, RD + | jmp_comp jhe, jl, jh, jle, >9 + |6: + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | jh ->vmeta_comp + | // RA is a number. + | clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp + | // RA is a number, RD is an integer. + | cdfbr f1, RD + | j >1 + | + |8: // RA is an integer, RD is not an integer. + | jh ->vmeta_comp + | // RA is an integer, RD is a number. + | cdfbr f0, RA + |1: + | la PC, 4(PC) + | cdbr f0, f1 + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + | jmp_comp jnl, jl, jnle, jle, <9 + | j <6 + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | ins_AD // RA = src1, RD = src2, JMP with RD = target + | sllg RD, RD, 3 + | ld f1, 0(RD, BASE) + | lg RD, 0(RD, BASE) + | sllg RA, RA, 3 + | ld f0, 0(RA, BASE) + | lg RA, 0(RA, BASE) + | la PC, 4(PC) + | srag RB, RD, 47 + | srag ITYPE, RA, 47 + | clfi RB, LJ_TISNUM; jne >7 + | clfi ITYPE, LJ_TISNUM; jne >8 + | cr RD, RA + if (vk) { + | jne >9 + } else { + | je >9 + } + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RD is not an integer. + | jh >5 + | // RD is a number. + | clfi ITYPE, LJ_TISNUM; jl >1; jne >5 + | // RD is a number, RA is an integer. + | cdfbr f0, RA + | j >1 + | + |8: // RD is an integer, RA is not an integer. + | jh >5 + | // RD is an integer, RA is a number. + | cdfbr f1, RD + | j >1 + | + |1: + | cdbr f0, f1 + |4: + iseqne_fp: + if (vk) { + | jne >2 // Unordered means not equal. + } else { + | je >1 // Unordered means not equal. + } + iseqne_end: + if (vk) { + |1: // EQ: Branch to the target. + | llgh RD, PC_RD + | branchPC RD + |2: // NE: Fallthrough to next instruction. + |.if not FFI + |3: + |.endif + } else { + |.if not FFI + |3: + |.endif + |2: // NE: Branch to the target. + | llgh RD, PC_RD + | branchPC RD + |1: // EQ: Fallthrough to next instruction. + } + if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || + op == BC_ISEQN || op == BC_ISNEN)) { + | j <9 + } else { + | ins_next + } + | + if (op == BC_ISEQV || op == BC_ISNEV) { + |5: // Either or both types are not numbers. + |.if FFI + | clfi RB, LJ_TCDATA; je ->vmeta_equal_cd + | clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd + |.endif + | cgr RA, RD + | je <1 // Same GCobjs or pvalues? + | cr RB, ITYPE + | jne <2 // Not the same type? + | clfi RB, LJ_TISTABUD + | jh <2 // Different objects and not table/ud? + | + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | cleartp TAB:RA + | lg TAB:RB, TAB:RA->metatable + | cghi TAB:RB, 0 + | je <2 // No metatable? + | tm TAB:RB->nomm, 1<<MM_eq + | jne <2 // Or 'no __eq' flag set? + if (vk) { + | lghi RB, 0 // ne = 0 + } else { + | lghi RB, 1 // ne = 1 + } + | j ->vmeta_equal // Handle __eq metamethod. + } else { + |.if FFI + |3: + | clfi ITYPE, LJ_TCDATA + if (LJ_DUALNUM && vk) { + | jne <9 + } else { + | jne <2 + } + | j ->vmeta_equal_cd + |.endif + } + break; + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | ins_AND // RA = src, RD = str const, JMP with RD = target + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg RB, 0(RA, BASE) + | la PC, 4(PC) + | checkstr RB, >3 + | cg RB, 0(RD, KBASE) + iseqne_test: + if (vk) { + | jne >2 + } else { + | je >1 + } + goto iseqne_end; + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | ins_AD // RA = src, RD = num const, JMP with RD = target + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | ld f0, 0(RA, BASE) + | lg RB, 0(RA, BASE) + | ld f1, 0(RD, KBASE) + | lg RD, 0(RD, KBASE) + | la PC, 4(PC) + | checkint RB, >7 + | checkint RD, >8 + | cr RB, RD + if (vk) { + | jne >9 + } else { + | je >9 + } + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | jh >3 + | // RA is a number. + | checkint RD, >1 + | // RA is a number, RD is an integer. + | cdfbr f1, RD + | j >1 + | + |8: // RA is an integer, RD is a number. + | cdfbr f0, RB + | cdbr f0, f1 + | j >4 + |1: + | cdbr f0, f1 + |4: + goto iseqne_fp; + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | srag RB, RB, 47 + | la PC, 4(PC) + | cr RB, RD + if (!LJ_HASFFI) goto iseqne_test; + if (vk) { + | jne >3 + | llgh RD, PC_RD + | branchPC RD + |2: + | ins_next + |3: + | cghi RB, LJ_TCDATA; jne <2 + | j ->vmeta_equal_cd + } else { + | je >2 + | cghi RB, LJ_TCDATA; je ->vmeta_equal_cd + | llgh RD, PC_RD + | branchPC RD + |2: + | ins_next + } + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | ins_AD // RA = dst or unused, RD = src, JMP with RD = target + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg ITYPE, 0(RD, BASE) + | la PC, 4(PC) + if (op == BC_ISTC || op == BC_ISFC) { + | lgr RB, ITYPE + } + | srag ITYPE, ITYPE, 47 + | clfi ITYPE, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISTC) { + | jhe >1 + } else { + | jl >1 + } + if (op == BC_ISTC || op == BC_ISFC) { + | stg RB, 0(RA, BASE) + } + | llgh RD, PC_RD + | branchPC RD + |1: // Fallthrough to the next instruction. + | ins_next + break; + + case BC_ISTYPE: + | ins_AD // RA = src, RD = -type + | lghr RD, RD + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | srag RB, RB, 47 + | agr RB, RD + | jne ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | ins_AD // RA = src, RD = -(TISNUM-1) + | sllg TMPR1, RA, 3 + | lg TMPR1, 0(TMPR1, BASE) + | checknumtp TMPR1, ->vmeta_istype + | ins_next + break; + case BC_MOV: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | lg RB, 0(RD, BASE) + | sllg RA, RA, 3 + | stg RB, 0(RA, BASE) + | ins_next_ + break; + case BC_NOT: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg RB, 0(RD, BASE) + | srag RB, RB, 47 + | load_false RC + | clfi RB, LJ_TISTRUECOND + | jl >1 + | load_true RC + |1: + | stg RC, 0(RA, BASE) + | ins_next + break; + case BC_UNM: + | ins_AD // RA = dst, RD = src + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg RB, 0(RD, BASE) + | checkint RB, >3 + | lcr RB, RB; jo >2 + |1: + | stg RB, 0(RA, BASE) + | ins_next + |2: + | llihh RB, 0x41e0 // (double)2^31 + | j <1 + |3: + | jh ->vmeta_unm + | // Toggle sign bit. + | llihh TMPR0, 0x8000 + | xgr RB, TMPR0 + | j <1 + break; + case BC_LEN: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | lg RD, 0(RD, BASE) + | checkstr RD, >2 + | llgf RD, STR:RD->len + |1: + | sllg RA, RA, 3 + | setint RD + | stg RD, 0(RA, BASE) + | ins_next + |2: + | cghi ITYPE, LJ_TTAB; jne ->vmeta_len + | lgr TAB:CARG1, TAB:RD +#if LJ_52 + | lg TAB:RB, TAB:RD->metatable + | cghi TAB:RB, 0 + | jne >9 + |3: +#endif + |->BC_LEN_Z: + | brasl r14, extern lj_tab_len // (GCtab *t) + | // Length of table returned in r2 (CRET1). + | lgr RD, CRET1 + | llgc RA, PC_RA + | j <1 +#if LJ_52 + |9: // Check for __len. + | tm TAB:RB->nomm, 1<<MM_len + | jne <3 + | j ->vmeta_len // 'no __len' flag NOT set: check. +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithpre + | ins_ABC + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | sllg RA, RA, 3 + |.endmacro + | + |.macro ins_arithfp, ins + | ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | ld f0, 0(RB, BASE) + | ld f1, 0(RC, KBASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checknumtp RB, ->vmeta_arith_vno + | checknumtp RC, ->vmeta_arith_vno + | ins f0, f1 + || break; + ||case 1: + | ld f1, 0(RB, BASE) + | ld f0, 0(RC, KBASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checknumtp RB, ->vmeta_arith_nvo + | checknumtp RC, ->vmeta_arith_nvo + | ins f0, f1 + || break; + ||default: + | ld f0, 0(RB, BASE) + | ld f1, 0(RC, BASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checknumtp RB, ->vmeta_arith_vvo + | checknumtp RC, ->vmeta_arith_vvo + | ins f0, f1 + || break; + ||} + | std f0, 0(RA, BASE) + | ins_next + |.endmacro + | + |.macro ins_arithdn, intins + | ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_vno + | checkint RC, ->vmeta_arith_vno + | intins RB, RC; jo ->vmeta_arith_vno + || break; + ||case 1: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_nvo + | checkint RC, ->vmeta_arith_nvo + | intins RC, RB; jo ->vmeta_arith_nvo + || break; + ||default: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checkint RB, ->vmeta_arith_vvo + | checkint RC, ->vmeta_arith_vvo + | intins RB, RC; jo ->vmeta_arith_vvo + || break; + ||} + ||if (vk == 1) { + | // setint RC + | stg RC, 0(RA, BASE) + ||} else { + | // setint RB + | stg RB, 0(RA, BASE) + ||} + | ins_next + |.endmacro + + | // RA = dst, RB = src1 or num const, RC = src2 or num const + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn ar + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn sr + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithpre + | // For multiplication we use msgfr and check if the result + | // fits in an int32_t. + switch(op) { + case BC_MULVN: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_vno + | checkint RC, ->vmeta_arith_vno + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_vno + break; + case BC_MULNV: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_nvo + | checkint RC, ->vmeta_arith_nvo + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_nvo + break; + default: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checkint RB, ->vmeta_arith_vvo + | checkint RC, ->vmeta_arith_vvo + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_vvo + break; + } + | llgfr RB, RB + | setint RB + | stg RB, 0(RA, BASE) + | ins_next + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithfp ddbr + break; + // TODO: implement fast mod operation. + // x86_64 does floating point mod, however it might be better to use integer mod. + case BC_MODVN: + | j ->vmeta_arith_vno + break; + case BC_MODNV: + | j ->vmeta_arith_nvo + break; + case BC_MODVV: + | j ->vmeta_arith_vvo + break; + case BC_POW: + | ins_ABC + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | ld FARG1, 0(RB, BASE) + | ld FARG2, 0(RC, BASE) + | lg TMPR0, 0(RB, BASE) + | checknumtp TMPR0, ->vmeta_arith_vvo + | lg TMPR0, 0(RC, BASE) + | checknumtp TMPR0, ->vmeta_arith_vvo + | brasl r14, extern pow // double pow(double x, double y), result in f0. + | llgc RA, PC_RA + | sllg RA, RA, 3 + | std f0, 0(RA, BASE) + | ins_next + break; + + case BC_CAT: + | ins_ABC // RA = dst, RB = src_start, RC = src_end + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG3, RC + | sgr CARG3, RB + | sllg RC, RC, 3 + | la CARG2, 0(RC, BASE) + |->BC_CAT_Z: + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | jne ->vmeta_binop + | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. + | sllg RB, RB, 3 + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RC, 0(RB, BASE) + | stg RC, 0(RA, BASE) + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | ins_AND // RA = dst, RD = str const (~) + | sllg RD, RD, 3 + | lg RD, 0(RD, KBASE) + | settp RD, LJ_TSTR + | sllg RA, RA, 3 + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KCDATA: + |.if FFI + | ins_AND // RA = dst, RD = cdata const (~) + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg RD, 0(RD, KBASE) + | settp RD, LJ_TCDATA + | stg RD, 0(RA, BASE) + | ins_next + |.endif + break; + case BC_KSHORT: + | ins_AD // RA = dst, RD = signed int16 literal + | // Assumes DUALNUM. + | lhr RD, RD // Sign-extend literal to 32-bits. + | setint RD + | sllg RA, RA, 3 + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KNUM: + | ins_AD // RA = dst, RD = num const + | sllg RD, RD, 3 + | ld f0, 0(RD, KBASE) + | sllg RA, RA, 3 + | std f0, 0(RA, BASE) + | ins_next + break; + case BC_KPRI: + | ins_AD // RA = dst, RD = primitive type (~) + | sllg RA, RA, 3 + | sllg RD, RD, 47 + | lghi TMPR0, -1 + | xgr RD, TMPR0 // not + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KNIL: + | ins_AD // RA = dst_start, RD = dst_end + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | la RA, 8(RA, BASE) + | la RD, 0(RD, BASE) + | lghi RB, LJ_TNIL + | stg RB, -8(RA) // Sets minimum 2 slots. + |1: + | stg RB, 0(RA) + | la RA, 8(RA) + | clgr RA, RD + | jle <1 + | ins_next + break; + +/* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | ins_AD // RA = dst, RD = upvalue # + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB) + | lg RB, UPVAL:RB->v + | lg RD, 0(RB) + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_USETV: +#define TV2MARKOFS \ + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) + | ins_AD // RA = upvalue #, RD = src + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | sllg RA, RA, 3 + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | tm UPVAL:RB->closed, 0xff + | lg RB, UPVAL:RB->v + | sllg TMPR1, RD, 3 + | lg RA, 0(TMPR1, BASE) + | stg RA, 0(RB) + | je >1 + | // Check barrier for closed upvalue. + | tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv) + | jne >2 + |1: + | ins_next + | + |2: // Upvalue is black. Check if new value is collectable and white. + | srag RD, RA, 47 + | ahi RD, -LJ_TISGCV + | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) + | jle <1 + | cleartp GCOBJ:RA + | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) + | je <1 + | // Crossed a write barrier. Move the barrier forward. + | lgr CARG2, RB + | lay GL:CARG1, GG_DISP2G(DISPATCH) + | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; +#undef TV2MARKOFS + case BC_USETS: + | ins_AND // RA = upvalue #, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | lg STR:RA, 0(RD, KBASE) + | lg RD, UPVAL:RB->v + | settp STR:ITYPE, STR:RA, LJ_TSTR + | stg STR:ITYPE, 0(RD) + | tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) + | jne >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) + | je <1 + | tm UPVAL:RB->closed, 0xff + | je <1 + | // Crossed a write barrier. Move the barrier forward. + | lgr CARG2, RD + | lay GL:CARG1, GG_DISP2G(DISPATCH) + | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETN: + | ins_AD // RA = upvalue #, RD = num const + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | cleartp LFUNC:RB + | ld f0, 0(RD, KBASE) + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | lg RA, UPVAL:RB->v + | std f0, 0(RA) + | ins_next + break; + case BC_USETP: + | ins_AD // RA = upvalue #, RD = primitive type (~) + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | sllg RD, RD, 47 + | lghi TMPR0, -1 + | xgr RD, TMPR0 + | lg RA, UPVAL:RB->v + | stg RD, 0(RA) + | ins_next + break; + case BC_UCLO: + | ins_AD // RA = level, RD = target + | branchPC RD // Do this first to free RD. + | lg L:RB, SAVE_L + | ltg TMPR0, L:RB->openupval + | je >1 + | stg BASE, L:RB->base + | sllg RA, RA, 3 + | la CARG2, 0(RA, BASE) + | lgr L:CARG1, L:RB + | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level) + | lg BASE, L:RB->base + |1: + | ins_next + break; + + case BC_FNEW: + | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lg CARG3, -16(BASE) + | cleartp CARG3 + | sllg RD, RD, 3 + | lg CARG2, 0(RD, KBASE) // Fetch GCproto *. + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | brasl r14, extern lj_func_newL_gc + | // GCfuncL * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | sllg RA, RA, 3 + | settp LFUNC:CRET1, LJ_TFUNC + | stg LFUNC:CRET1, 0(RA, BASE) + | ins_next + break; + case BC_TNEW: + | ins_AD // RA = dst, RD = hbits|asize + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lg RA, (DISPATCH_GL(gc.total))(DISPATCH) + | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH) + | stg PC, SAVE_PC + | jhe >5 + |1: + | srlg CARG3, RD, 11 + | llill TMPR0, 0x7ff + | nr RD, TMPR0 + | cr RD, TMPR0 + | je >3 + |2: + | lgr L:CARG1, L:RB + | llgfr CARG2, RD + | brasl r14, extern lj_tab_new // (lua_State *L, uint32_t asize, uint32_t hbits) + | // Table * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | sllg RA, RA, 3 + | settp TAB:CRET1, LJ_TTAB + | stg TAB:CRET1, 0(RA, BASE) + | ins_next + |3: // Turn 0x7ff into 0x801. + | llill RD, 0x801 + | j <2 + |5: + | lgr L:CARG1, L:RB + | brasl r14, extern lj_gc_step_fixtop // (lua_State *L) + | llgh RD, PC_RD + | j <1 + break; + case BC_TDUP: + | ins_AND // RA = dst, RD = table const (~) (holding template table) + | lg L:RB, SAVE_L + | lg RA, (DISPATCH_GL(gc.total))(DISPATCH) + | stg PC, SAVE_PC + | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH) + | stg BASE, L:RB->base + | jhe >3 + |2: + | sllg RD, RD, 3 + | lg TAB:CARG2, 0(RD, KBASE) + | lgr L:CARG1, L:RB + | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt) + | // Table * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | settp TAB:CRET1, LJ_TTAB + | sllg RA, RA, 3 + | stg TAB:CRET1, 0(RA, BASE) + | ins_next + |3: + | lgr L:CARG1, L:RB + | brasl r14, extern lj_gc_step_fixtop // (lua_State *L) + | llgh RD, PC_RD // Need to reload RD. + | lghi TMPR0, -1 + | xgr RD, TMPR0 // not RD + | j <2 + break; + + case BC_GGET: + | ins_AND // RA = dst, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg TAB:RB, LFUNC:RB->env + | sllg TMPR1, RD, 3 + | lg STR:RC, 0(TMPR1, KBASE) + | j ->BC_TGETS_Z + break; + case BC_GSET: + | ins_AND // RA = src, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg TAB:RB, LFUNC:RB->env + | sllg TMPR1, RD, 3 + | lg STR:RC, 0(TMPR1, KBASE) + | j ->BC_TSETS_Z + break; + + case BC_TGETV: + | ins_ABC // RA = dst, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | checktab TAB:RB, ->vmeta_tgetv + | + | // Integer key? + | checkint RC, >5 + | cl RC, TAB:RB->asize // Takes care of unordered, too. + | jhe ->vmeta_tgetv // Not in array part? Use fallback. + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + | lg ITYPE, 0(RC) + | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. + | je >2 + |1: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |2: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<<MM_index + | je ->vmeta_tgetv // 'no __index' flag NOT set: check. + | j <1 + | + |5: // String key? + | cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv + | cleartp STR:RC + | j ->BC_TGETS_Z + break; + case BC_TGETS: + | ins_ABC + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | lghi TMPR1, -1 + | xgr RC, TMPR1 + | sllg RC, RC, 3 + | lg STR:RC, 0(RC, KBASE) + | checktab TAB:RB, ->vmeta_tgets + |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * + | l TMPR1, TAB:RB->hmask + | n TMPR1, STR:RC->sid + | lgfr TMPR1, TMPR1 + | mghi TMPR1, #NODE + | ag NODE:TMPR1, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cg ITYPE, NODE:TMPR1->key + | jne >4 + | // Get node value. + | lg ITYPE, NODE:TMPR1->val + | cghi ITYPE, LJ_TNIL + | je >5 // Key found, but nil value? + |2: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |4: // Follow hash chain. + | lg NODE:TMPR1, NODE:TMPR1->next + | cghi NODE:TMPR1, 0 + | jne <1 + | // End of hash chain: key not found, nil result. + | lghi ITYPE, LJ_TNIL + | + |5: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <2 // No metatable: done. + | tm TAB:TMPR1->nomm, 1<<MM_index + | jne <2 // 'no __index' flag set: done. + | j ->vmeta_tgets // Caveat: preserve STR:RC. + break; + case BC_TGETB: + | ins_ABC // RA = dst, RB = table, RC = byte literal + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | checktab TAB:RB, ->vmeta_tgetb + | cl RC, TAB:RB->asize + | jhe ->vmeta_tgetb + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + | lg ITYPE, 0(RC) + | cghi ITYPE, LJ_TNIL + | je >2 + |1: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |2: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<<MM_index + | je ->vmeta_tgetb // 'no __index' flag NOT set: check. + | j <1 + break; + case BC_TGETR: + | ins_ABC // RA = dst, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | cleartp TAB:RB + | sllg RC, RC, 3 + | llgf RC, 4(RC, BASE) // Load low word (big endian). + | cl RC, TAB:RB->asize + | jhe ->vmeta_tgetr // Not in array part? Use fallback. + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + |->BC_TGETR_Z: + | lg ITYPE, 0(RC) + |->BC_TGETR2_Z: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + break; + + case BC_TSETV: + | ins_ABC // RA = src, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | checktab TAB:RB, ->vmeta_tsetv + | + | // Integer key? + | checkint RC, >5 + | cl RC, TAB:RB->asize // Takes care of unordered, too. + | jhe ->vmeta_tsetv + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(RC) + | je >3 // Previous value is nil? + |1: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: // Set array slot. + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | stg RB, 0(RC) + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<<MM_newindex + | je ->vmeta_tsetv // 'no __newindex' flag NOT set: check. + | j <1 + | + |5: // String key? + | cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv + | cleartp STR:RC + | j ->BC_TSETS_Z + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + case BC_TSETS: + | ins_ABC // RA = src, RB = table, RC = str const (~) + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | lghi TMPR0, -1 + | xgr RC, TMPR0 // ~RC + | sllg RC, RC, 3 + | lg STR:RC, 0(RC, KBASE) + | checktab TAB:RB, ->vmeta_tsets + |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * + | l TMPR1, TAB:RB->hmask + | n TMPR1, STR:RC->sid + | lgfr TMPR1, TMPR1 + | mghi TMPR1, #NODE + | mvi TAB:RB->nomm, 0 // Clear metamethod cache. + | ag NODE:TMPR1, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cg ITYPE, NODE:TMPR1->key + | jne >5 + | // Ok, key found. Assumes: offsetof(Node, val) == 0 + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(TMPR1) + | je >4 // Previous value is nil? + |2: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |3: // Set node value. + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(TMPR1) + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | lg TAB:ITYPE, TAB:RB->metatable + | cghi TAB:ITYPE, 0 + | je <2 + | tm TAB:ITYPE->nomm, 1<<MM_newindex + | je ->vmeta_tsets // 'no __newindex' flag NOT set: check. + | j <2 + | + |5: // Follow hash chain. + | lg NODE:TMPR1, NODE:TMPR1->next + | cghi NODE:TMPR1, 0 + | jne <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je >6 // No metatable: continue. + | tm TAB:TMPR1->nomm, 1<<MM_newindex + | je ->vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | stg ITYPE, SAVE_TMP + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | la CARG3, SAVE_TMP + | lgr CARG2, TAB:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Handles write barrier for the new key. TValue * returned in r2 (CRET1). + | lgr TMPR1, CRET1 + | lg L:CRET1, SAVE_L + | lg BASE, L:CRET1->base + | llgc RA, PC_RA + | j <2 // Must check write barrier for value. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, ITYPE + | j <3 + break; + case BC_TSETB: + | ins_ABC // RA = src, RB = table, RC = byte literal + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | checktab TAB:RB, ->vmeta_tsetb + | cl RC, TAB:RB->asize + | jhe ->vmeta_tsetb + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(RC) + | je >3 // Previous value is nil? + |1: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: // Set array slot. + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(RC) + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<<MM_newindex + | je ->vmeta_tsetb // 'no __newindex' flag NOT set: check. + | j <1 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + case BC_TSETR: + | ins_ABC // RA = src, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | cleartp TAB:RB + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: + | cl RC, TAB:RB->asize + | jhe ->vmeta_tsetr + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Set array slot. + |->BC_TSETR_Z: + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(RC) + | ins_next + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + + case BC_TSETM: + | ins_AD // RA = base (table at base-1), RD = num const (start index) + |1: + | sllg RA, RA, 3 + | sllg TMPR1, RD, 3 + | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word. + | la RA, 0(RA, BASE) + | lg TAB:RB, -8(RA) // Guaranteed to be a table. + | cleartp TAB:RB + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: + | llgf RD, SAVE_MULTRES + | aghi RD, -1 + | je >4 // Nothing to copy? + | agr RD, TMPR1 // Compute needed size. + | clgf RD, TAB:RB->asize + | jh >5 // Doesn't fit into array part? + | sgr RD, TMPR1 + | sllg TMPR1, TMPR1, 3 + | ag TMPR1, TAB:RB->array + |3: // Copy result slots to table. + | lg RB, 0(RA) + | la RA, 8(RA) + | stg RB, 0(TMPR1) + | la TMPR1, 8(TMPR1) + | brctg RD, <3 + |4: + | ins_next + | + |5: // Need to resize array part. + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, TAB:RB + | lgfr CARG3, RD + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | lg BASE, L:RB->base + | llgc RA, PC_RA // Restore RA. + | llgh RD, PC_RD // Restore RD. + | j <1 // Retry. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:RB, RD + | j <2 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALL: case BC_CALLM: + | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + | sllg RA, RA, 3 + | lgr RD, RC + if (op == BC_CALLM) { + | agf NARGS:RD, SAVE_MULTRES + } + | lg LFUNC:RB, 0(RA, BASE) + | checkfunc LFUNC:RB, ->vmeta_call_ra + | la BASE, 16(RA, BASE) + | ins_call + break; + + case BC_CALLMT: + | ins_AD // RA = base, RD = extra_nargs + | a NARGS:RD, SAVE_MULTRES + | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. + break; + case BC_CALLT: + | ins_AD // RA = base, RD = nargs+1 + | sllg RA, RA, 3 + | la RA, 16(RA, BASE) + | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint. + | lg LFUNC:RB, -16(RA) + | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call + |->BC_CALLT_Z: + | lg PC, -8(BASE) + | tmll PC, FRAME_TYPE + | jne >7 + |1: + | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below. + | st NARGS:RD, SAVE_MULTRES + | aghi NARGS:RD, -1 + | je >3 + |2: // Move args down. + | lg RB, 0(RA) + | la RA, 8(RA) + | stg RB, 0(KBASE) + | la KBASE, 8(KBASE) + | brctg NARGS:RD, <2 + | + | lg LFUNC:RB, -16(BASE) + |3: + | cleartp LFUNC:RB + | llgf NARGS:RD, SAVE_MULTRES + | llgc TMPR1, LFUNC:RB->ffid + | cghi TMPR1, 1 // (> FF_C) Calling a fast function? + | jh >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function. + | tmll PC, FRAME_TYPE // Lua frame below? + | jne <4 + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE. + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, (PC2PROTO(k))(KBASE) + | j <4 + | + |7: // Tailcall from a vararg function. + | aghi PC, -FRAME_VARG + | tmll PC, FRAME_TYPEP + | jne >8 // Vararg frame below? + | sgr BASE, PC // Need to relocate BASE/KBASE down. + | lgr KBASE, BASE + | lg PC, -8(BASE) + | j <1 + |8: + | aghi PC, FRAME_VARG + | j <1 + break; + + case BC_ITERC: + | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) + | sllg RA, RA, 3 + | la RA, 16(RA, BASE) // fb = base+2 + | lg RB, -32(RA) // Copy state. fb[0] = fb[-4]. + | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3]. + | stg RB, 0(RA) + | stg RC, 8(RA) + | lg LFUNC:RB, -40(RA) // Copy callable. fb[-2] = fb[-5] + | stg LFUNC:RB, -16(RA) + | lghi NARGS:RD, 2+1 // Handle like a regular 2-arg call. + | checkfunc LFUNC:RB, ->vmeta_call + | lgr BASE, RA + | ins_call + break; + + case BC_ITERN: + |.if JIT + | hotloop RB // NYI: add hotloop, record BC_ITERN. + |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | sllg RA, RA, 3 + | lg TAB:RB, -16(RA, BASE) + | cleartp TAB:RB + | llgf RC, -4(RA, BASE) // Get index from control var. + | llgf TMPR1, TAB:RB->asize + | la PC, 4(PC) + | lg ITYPE, TAB:RB->array + |1: // Traverse array part. + | clr RC, TMPR1; jhe >5 // Index points after array part? + | sllg RD, RC, 3 // Warning: won't work if RD==RC! + | lg TMPR0, 0(RD, ITYPE) + | cghi TMPR0, LJ_TNIL; je >4 + | // Copy array slot to returned value. + | lgr RB, TMPR0 + | stg RB, 8(RA, BASE) + | // Return array index as a numeric key. + | setint ITYPE, RC + | stg ITYPE, 0(RA, BASE) + | ahi RC, 1 + | sty RC, -4(RA, BASE) // Update control var. + |2: + | llgh RD, PC_RD // Get target from ITERL. + | branchPC RD + |3: + | ins_next + | + |4: // Skip holes in array part. + | ahi RC, 1 + | j <1 + | + |5: // Traverse hash part. + | sr RC, TMPR1 + |6: + | cl RC, TAB:RB->hmask; jh <3 // End of iteration? Branch to ITERL+1. + | llgfr ITYPE, RC + | mghi ITYPE, #NODE + | ag NODE:ITYPE, TAB:RB->node + | lghi TMPR0, LJ_TNIL + | cg TMPR0, NODE:ITYPE->val; je >7 + | ar TMPR1, RC + | ahi TMPR1, 1 + | // Copy key and value from hash slot. + | lg RB, NODE:ITYPE->key + | lg RC, NODE:ITYPE->val + | stg RB, 0(RA, BASE) + | stg RC, 8(RA, BASE) + | sty TMPR1, -4(RA, BASE) + | j <2 + | + |7: // Skip holes in hash part. + | ahi RC, 1 + | j <6 + break; + + case BC_ISNEXT: + | ins_AD // RA = base, RD = target (points to ITERN) + | sllg RA, RA, 3 + | lg CFUNC:RB, -24(RA, BASE) + | checkfunc CFUNC:RB, >5 + | lg TMPR1, -16(RA, BASE) + | checktptp TMPR1, LJ_TTAB, >5 + | lghi TMPR0, LJ_TNIL + | cg TMPR0, -8(RA, BASE); jne >5 + | llgc TMPR1, CFUNC:RB->ffid + | clfi TMPR1, (uint8_t)FF_next_N; jne >5 + | branchPC RD + | llihl TMPR1, 0x7fff + | iihh TMPR1, 0xfffe + | stg TMPR1, -8(RA, BASE) // Initialize control var. + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | lghi TMPR0, BC_JMP + | stcy TMPR0, PC_OP + | branchPC RD + | mvi 3(PC), BC_ITERC + | j <1 + break; + + case BC_VARG: + | ins_ABC // RA = base, RB = nresults+1, RC = numparams + | sllg RA, RA, 3 + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | la TMPR1, (16+FRAME_VARG)(RC, BASE) + | la RA, 0(RA, BASE) + | sg TMPR1, -8(BASE) + | // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams. + | cghi RB, 0 + | je >5 // Copy all varargs? + | lay RB, -8(RA, RB) + | clgr TMPR1, BASE // No vararg slots? + | lghi TMPR0, LJ_TNIL + | jnl >2 + |1: // Copy vararg slots to destination slots. + | lg RC, -16(TMPR1) + | la TMPR1, 8(TMPR1) + | stg RC, 0(RA) + | la RA, 8(RA) + | clgr RA, RB // All destination slots filled? + | jnl >3 + | clgr TMPR1, BASE // No more vararg slots? + | jl <1 + |2: // Fill up remainder with nil. + | stg TMPR0, 0(RA) + | la RA, 8(RA) + | clgr RA, RB + | jl <2 + |3: + | ins_next + | + |5: // Copy all varargs. + | lghi TMPR0, 1 + | st TMPR0, SAVE_MULTRES // MULTRES = 0+1 + | lgr RC, BASE + | slgr RC, TMPR1 + | jno <3 // No vararg slots? (borrow or zero) + | llgfr RB, RC + | srlg RB, RB, 3 + | ahi RB, 1 + | st RB, SAVE_MULTRES // MULTRES = #varargs+1 + | lg L:RB, SAVE_L + | agr RC, RA + | clg RC, L:RB->maxstack + | jh >7 // Need to grow stack? + |6: // Copy all vararg slots. + | lg RC, -16(TMPR1) + | la TMPR1, 8(TMPR1) + | stg RC, 0(RA) + | la RA, 8(RA) + | clgr TMPR1, BASE // No more vararg slots? + | jl <6 + | j <3 + | + |7: // Grow stack for varargs. + | stg BASE, L:RB->base + | stg RA, L:RB->top + | stg PC, SAVE_PC + | sgr TMPR1, BASE // Need delta, because BASE may change. + | st TMPR1, SAVE_TMP_HI + | llgf CARG2, SAVE_MULTRES + | aghi CARG2, -1 + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lgf TMPR1, SAVE_TMP_HI + | lg RA, L:RB->top + | agr TMPR1, BASE + | j <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | ins_AD // RA = results, RD = extra_nresults + | agf RD, SAVE_MULTRES // MULTRES >=1, so RD >=1. + | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. + break; + + case BC_RET: case BC_RET0: case BC_RET1: + | ins_AD // RA = results, RD = nresults+1 + if (op != BC_RET0) { + | sllg RA, RA, 3 + } + |1: + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES // Save nresults+1. + | tmll PC, FRAME_TYPE // Check frame type marker. + | jne >7 // Not returning to a fixarg Lua func? + switch (op) { + case BC_RET: + |->BC_RET_Z: + | lgr KBASE, BASE // Use KBASE for result move. + | aghi RD, -1 + | je >3 + |2: // Move results down. + | lg RB, 0(KBASE, RA) + | stg RB, -16(KBASE) + | la KBASE, 8(KBASE) + | brctg RD, <2 + |3: + | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256. + | llgc RB, PC_RB + |5: + | cgr RB, RD // More results expected? + | jh >6 + break; + case BC_RET1: + | lg RB, 0(BASE, RA) + | stg RB, -16(BASE) + /* fallthrough */ + case BC_RET0: + |5: + | llgc TMPR1, PC_RB + | cgr TMPR1, RD + | jh >6 + default: + break; + } + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8 + | lg LFUNC:KBASE, -16(BASE) + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, PC2PROTO(k)(KBASE) + | ins_next + | + |6: // Fill up results with nil. + | lghi TMPR1, LJ_TNIL + if (op == BC_RET) { + | stg TMPR1, -16(KBASE) // Note: relies on shifted base. + | la KBASE, 8(KBASE) + } else { + | sllg RC, RD, 3 // RC used as temp. + | stg TMPR1, -24(RC, BASE) + } + | la RD, 1(RD) + | j <5 + | + |7: // Non-standard return case. + | lay RB, -FRAME_VARG(PC) + | tmll RB, FRAME_TYPEP + | jne ->vm_return + | // Return from vararg function: relocate BASE down and RA up. + | sgr BASE, RB + if (op != BC_RET0) { + | agr RA, RB + } + | j <1 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, 0(RA) + |.define FOR_STOP, 8(RA) + |.define FOR_STEP, 16(RA) + |.define FOR_EXT, 24(RA) + + case BC_FORL: + |.if JIT + | hotloop RB + |.endif + | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + vk = (op == BC_IFORL || op == BC_JFORL); + | ins_AJ // RA = base, RD = target (after end of loop or start of loop) + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | lg RB, FOR_IDX + | checkint RB, >9 + | lg TMPR1, FOR_STOP + if (!vk) { + | checkint TMPR1, ->vmeta_for + | lg ITYPE, FOR_STEP + | chi ITYPE, 0; jl >5 + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | // lg TMPR1, FOR_STOP + | checkinttp TMPR1, ->assert_bad_for_arg_type + | lg TMPR0, FOR_STEP + | checkinttp TMPR0, ->assert_bad_for_arg_type +#endif + | lg ITYPE, FOR_STEP + | chi ITYPE, 0; jl >5 + | ar RB, ITYPE; jo >1 + | setint RB + | stg RB, FOR_IDX + } + | cr RB, TMPR1 + | stg RB, FOR_EXT + if (op == BC_FORI) { + | jle >7 + |1: + |6: + | branchPC RD + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jle =>BC_JLOOP + |1: + |6: + } else if (op == BC_IFORL) { + | jh >7 + |6: + | branchPC RD + |1: + } else { + | jle =>BC_JLOOP + |1: + |6: + } + |7: + | ins_next + | + |5: // Invert check for negative step. + if (!vk) { + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for + } else { + | ar RB, ITYPE; jo <1 + | setint RB + | stg RB, FOR_IDX + } + | cr RB, TMPR1 + | stg RB, FOR_EXT + if (op == BC_FORI) { + | jhe <7 + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jhe =>BC_JLOOP + } else if (op == BC_IFORL) { + | jl <7 + } else { + | jhe =>BC_JLOOP + } + | j <6 + |9: // Fallback to FP variant. + if (!vk) { + | jhe ->vmeta_for + } + if (!vk) { + | lg TMPR0, FOR_STOP + | checknumtp TMPR0, ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | lg TMPR0, FOR_STOP + | checknumtp TMPR0, ->assert_bad_for_arg_type + | lg TMPR0, FOR_STEP + | checknumtp TMPR0, ->assert_bad_for_arg_type +#endif + } + | lg RB, FOR_STEP + if (!vk) { + | checknum RB, ->vmeta_for + } + | ld f0, FOR_IDX + | ld f1, FOR_STOP + if (vk) { + | adb f0, FOR_STEP + | std f0, FOR_IDX + } + | cghi RB, 0; jl >3 + | cdbr f1, f0 + |1: + | std f0, FOR_EXT + if (op == BC_FORI) { + | jnl <7 + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jnl =>BC_JLOOP + } else if (op == BC_IFORL) { + | jl <7 + } else { + | jnl =>BC_JLOOP + } + | j <6 + | + |3: // Invert comparison if step is negative. + | cdbr f0, f1 + | j <1 + break; + + case BC_ITERL: + |.if JIT + | hotloop RB + |.endif + | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | ins_AJ // RA = base, RD = target + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | lg RB, 0(RA) + | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | stg RB, -8(RA) + | j =>BC_JLOOP + } else { + | branchPC RD // Otherwise save control var + branch. + | stg RB, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | ins_A // RA = base, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop RB + |.endif + | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. + break; + + case BC_ILOOP: + | ins_A // RA = base, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + | stg r0, 0 + | stg r0, 0 + break; + + case BC_JMP: + | ins_AJ // RA = unused, RD = target + | branchPC RD + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + /* + ** Reminder: A function may be called with func/args above L->maxstack, + ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, + ** too. This means all FUNC* ops (including fast functions) must check + ** for stack overflow _before_ adding more slots! + */ + + case BC_FUNCF: + |.if JIT + | stg r0, 0 + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | lg KBASE, (PC2PROTO(k)-4)(PC) + | lg L:RB, SAVE_L + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) // Top of frame. + | clg RA, L:RB->maxstack + | jh ->vm_growstack_f + | llgc RA, (PC2PROTO(numparams)-4)(PC) + | clgr NARGS:RD, RA // Check for missing parameters. + | jle >3 + |2: + if (op == BC_JFUNCF) { + | llgh RD, PC_RD + | j =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | sllg TMPR1, NARGS:RD, 3 + | lghi TMPR0, LJ_TNIL + |4: + | stg TMPR0, -8(TMPR1, BASE) + | la TMPR1, 8(TMPR1) + | la RD, 1(RD) + | clgr RD, RA + | jle <4 + | j <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | stg r0, 0 // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | sllg TMPR1, NARGS:RD, 3 + | la RB, (FRAME_VARG+8)(TMPR1) + | la RD, 8(TMPR1, BASE) + | lg LFUNC:KBASE, -16(BASE) + | stg RB, -8(RD) // Store delta + FRAME_VARG. + | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC. + | lg L:RB, SAVE_L + | sllg RA, RA, 3 + | la RA, 0(RA, RD) + | cg RA, L:RB->maxstack + | jh ->vm_growstack_v // Need to grow stack. + | lgr RA, BASE + | lgr BASE, RD + | llgc RB, (PC2PROTO(numparams)-4)(PC) + | cghi RB, 0 + | je >2 + | aghi RA, 8 + | lghi TMPR1, LJ_TNIL + |1: // Copy fixarg slots up to new frame. + | la RA, 8(RA) + | cgr RA, BASE + | jnl >3 // Less args than parameters? + | lg KBASE, -16(RA) + | stg KBASE, 0(RD) + | la RD, 8(RD) + | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC). + | brctg RB, <1 + |2: + if (op == BC_JFUNCV) { + | llgh RD, PC_RD + | j =>BC_JLOOP + } else { + | lg KBASE, (PC2PROTO(k)-4)(PC) + | ins_next + } + | + |3: // Clear missing parameters. + | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here. + | la RD, 8(RD) + | brctg RB, <3 + | j <2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | ins_AD // BASE = new base, RD = nargs+1 + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | lg KBASE, CFUNC:RB->f + | lg L:RB, SAVE_L + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD,BASE) + | stg BASE, L:RB->base + | la RA, (8*LUA_MINSTACK)(RD) + | clg RA, L:RB->maxstack + | stg RD, L:RB->top + | lgr CARG1, L:RB + if (op != BC_FUNCC) { + | lgr CARG2, KBASE + } + | jh ->vm_growstack_c // Need to grow stack. + | set_vmstate C + if (op == BC_FUNCC) { + | basr r14, KBASE // (lua_State *L) + } else { + | // (lua_State *L, lua_CFunction f) + | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH) + | basr r14, TMPR1 + } + | // nresults returned in r2 (CRET1). + | lgr RD, CRET1 + | lg BASE, L:RB->base + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | sllg TMPR1, RD, 3 + | la RA, 0(TMPR1, BASE) + | lcgr RA, RA + | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 + | lg PC, -8(BASE) // Fetch PC of caller. + | j ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + dasm_growpc(Dst, BC__MAX); + build_subroutines(ctx); + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.quad .Lbegin\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.quad lj_vm_ffi_call\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */ + "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe1:\n" + "\t.long .LECIE1-.LSCIE1\n" + ".LSCIE1:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.long lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, + ".LSFDE2:\n" + "\t.long .LEFDE2-.LASFDE2\n" + ".LASFDE2:\n" + "\t.long .LASFDE2-.Lframe1\n" + "\t.long .Lbegin-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160); +#if LJ_HASFFI + fprintf(ctx->fp, + ".Lframe2:\n" + "\t.long .LECIE2-.LSCIE2\n" + ".LSCIE2:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, + ".LSFDE3:\n" + "\t.long .LEFDE3-.LASFDE3\n" + ".LASFDE3:\n" + "\t.long .LASFDE3-.Lframe2\n" + "\t.long lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */ + "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); +#endif +#endif + break; + default: /* No other modes. */ + break; + } +}
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor