Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:15.5:Update
luajit
luajit-s390x.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File luajit-s390x.patch of Package luajit
diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.h luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.h --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.h 1969-12-31 18:00:00.000000000 -0600 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.h 2021-10-05 12:36:23.421160526 -0500 @@ -0,0 +1,546 @@ +/* +** DynASM s390x encoding engine. +** Copyright (C) 2005-2016 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +#define DASM_ARCH "s390x" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, + DASM_DISP12, DASM_DISP20, + DASM_IMM8, DASM_IMM16, DASM_IMM32, + DASM_LEN8R,DASM_LEN4HR,DASM_LEN4LR, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned short *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) + DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) + DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList) actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) + memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = + (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned short ins = *p++; + unsigned short action = ins; + if (action >= DASM__MAX) { + ofs += 2; + continue; + } + + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: + goto stop; + case DASM_SECTION: + n = *p++ & 255; + CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; + goto stop; + case DASM_ESC: + p++; + ofs += 2; + break; + case DASM_REL_EXT: + p++; + ofs += 4; + break; + case DASM_ALIGN: + ofs += *p++; + b[pos++] = ofs; + break; + case DASM_REL_LG: + if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */ + ofs += 2; + } + n = *p++ - 10; + pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { + CK(n >= 10 || *pl < 0, RANGE_LG); + CKPL(lg, LG); + goto putrel; + } + pl += 10; + n = *pl; + if (n < 0) + n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */ + ofs += 2; + } + pl = D->pclabels + n; + CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + ofs += 2; + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + *p++ - 10; + CKPL(lg, LG); + goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; + CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { + int *pb = DASM_POS2PTR(D, n); + n = *pb; + *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM8: + b[pos++] = n; + break; + case DASM_IMM16: + CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I); /* TODO: is this the right way to handle unsigned immediates? */ + ofs += 2; + b[pos++] = n; + break; + case DASM_IMM32: + ofs += 4; + b[pos++] = n; + break; + case DASM_DISP20: + CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I); + b[pos++] = n; + break; + case DASM_DISP12: + CK((n >> 12) == 0, RANGE_I); + b[pos++] = n; + break; + case DASM_LEN8R: + CK(n >= 1 && n <= 256, RANGE_I); + b[pos++] = n; + break; + case DASM_LEN4HR: + case DASM_LEN4LR: + CK(n >= 1 && n <= 128, RANGE_I); + b[pos++] = n; + break; + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} + +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t * szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) + return D->status; + { + int pc; + for (pc = 0; pc * sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) + return DASM_S_UNDEF_PC | pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { + int *pb = DASM_POS2PTR(D, n); + n = *pb; + *pb = -idx; + } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned short ins = *p++; + unsigned short action = ins; + switch (action) { + case DASM_STOP: + case DASM_SECTION: + goto stop; + case DASM_ESC: + p++; + break; + case DASM_REL_EXT: + p++; + break; + case DASM_ALIGN: + ofs -= (b[pos++] + ofs) & *p++; + break; + case DASM_REL_LG: + case DASM_REL_PC: + p++; + pos++; + break; + case DASM_LABEL_LG: + case DASM_LABEL_PC: + p++; + b[pos++] += ofs; + break; + case DASM_IMM8: + case DASM_IMM16: + case DASM_IMM32: + case DASM_DISP20: + case DASM_DISP12: + case DASM_LEN8R: + case DASM_LEN4HR: + case DASM_LEN4LR: + pos++; + break; + } + } + stop:(void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned short *cp = (unsigned short *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned short ins = *p++; + unsigned short action = ins; + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: + case DASM_SECTION: + goto stop; + case DASM_ESC: + *cp++ = *p++; + break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, *p++, 1) - 4; + goto patchrel; + case DASM_ALIGN: + ins = *p++; + /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */ + while ((((char *)cp - base) & ins)) + *cp++ = 0x0700; /* nop */ + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); + p++; /* skip argument */ + patchrel: + /* Offsets are halfword aligned (so need to be halved). */ + n += 2; /* Offset is relative to start of instruction. */ + if (cp[-1] >> 12 == 0xc) { + *cp++ = n >> 17; + } else { + CK(-(1 << 16) <= n && n < (1 << 16) && (n & 1) == 0, RANGE_LG); + } + *cp++ = n >> 1; + break; + case DASM_LABEL_LG: + ins = *p++; + if (ins >= 20) + D->globals[ins - 10] = (void *)(base + n); + break; + case DASM_LABEL_PC: + break; + case DASM_IMM8: + cp[-1] |= n & 0xff; + break; + case DASM_IMM16: + *cp++ = n; + break; + case DASM_IMM32: + *cp++ = n >> 16; + *cp++ = n; + break; + case DASM_DISP20: + cp[-2] |= n & 0xfff; + cp[-1] |= (n >> 4) & 0xff00; + break; + case DASM_DISP12: + cp[-1] |= n & 0xfff; + break; + case DASM_LEN8R: + cp[-1] |= (n - 1) & 0xff; + break; + case DASM_LEN4HR: + cp[-1] |= ((n - 1) << 4) & 0xf0; + break; + case DASM_LEN4LR: + cp[-1] |= (n - 1) & 0x0f; + break; + default: + *cp++ = ins; + break; + } + } + stop:(void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} + +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc * sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) + return *DASM_POS2PTR(D, -pos); + if (pos > 0) + return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { + D->status = DASM_S_UNDEF_LG | i; + break; + } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC | (D->section - D->sections); + return D->status; +} +#endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.lua luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.lua --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/dasm_s390x.lua 1969-12-31 18:00:00.000000000 -0600 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/dasm_s390x.lua 2021-10-05 12:36:23.461160737 -0500 @@ -0,0 +1,1633 @@ +------------------------------------------------------------------------------ +-- DynASM s390x module. +-- +-- Copyright (C) 2005-2016 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "s390x", + description = "DynASM s390x module", + version = "1.4.0", + vernum = 10400, + release = "2015-10-18", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex = bit.ror, bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM8", "IMM16", "IMM32", "LEN8R","LEN4HR","LEN4LR", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +local max_action = 0 +for n, name in ipairs(action_names) do + map_action[name] = n-1 + max_action = n +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n, name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +local function havearg(a) + return a == "ESC" or + a == "SECTION" or + a == "REL_LG" or + a == "LABEL_LG" or + a == "REL_EXT" +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned short ", name, "[", nn, "] = {") + local esc = false -- also need to escape for action arguments + for i = 1, nn do + assert(out:write("\n 0x", sub(tohex(actlist[i]), 5, 8))) + if i ~= nn then assert(out:write(",")) end + local name = action_names[actlist[i]+1] + if not esc and name then + assert(out:write(" /* ", name, " */")) + esc = havearg(name) + else + esc = false + end + end + assert(out:write("\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add halfword to action list. +local function wputxhw(n) + assert(n >= 0 and n <= 0xffff, "halfword out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxhw(w) + if val then wputxhw(val) end -- Not sure about this, do we always have one arg? + if a then actargs[#actargs+1] = a end + if val or a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped halfword. +local function wputhw(n) + if n <= max_action then waction("ESC") end + wputxhw(n) +end + +-- Reserve position for halfword. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20, next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20, next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20, next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0, next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0, next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +-- Ext. register name -> int. name. +local map_archdef = { sp = "r15" } + +-- Int. register name -> ext. name. +local map_reg_rev = { r15 = "sp" } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_cond = { + o = 1, h = 2, nle = 3, l = 4, + nhe = 5, lh = 6, ne = 7, e = 8, + nlh = 9, he = 10, nl = 11, le = 12, + nh = 13, no = 14, [""] = 15, +} + +------------------------------------------------------------------------------ + +local function parse_reg(expr) + if not expr then werror("expected register name") end + local tname, ovreg = match(expr, "^([%w_]+):(r1?%d)$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^[rf](1?%d)$") + if r then + r = tonumber(r) + if r <= 15 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok then return y end + end + return nil +end + +local function is_uint12(num) + return 0 <= num and num < 4096 +end + +local function is_int20(num) + return -shl(1, 19) <= num and num < shl(1, 19) +end + +local function is_int32(num) + return -2147483648 <= num and num < 2147483648 +end + +local function is_uint16(num) + return 0 <= num and num < 0xffff +end + +local function is_int16(num) + return -32768 <= num and num < 32768 +end + +local function is_int8(num) + return -128 <= num and num < 128 +end + +local function is_uint8(num) + return 0 <= num and num < 256 +end + +-- Split a memory operand of the form d(b) or d(x,b) into d, x and b. +-- If x is not specified then it is 0. +local function split_memop(arg) + local reg = "[%w_:]+" + local d, x, b = match(arg, "^(.*)%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$") + if d then + return d, parse_reg(x), parse_reg(b) + end + local d, b = match(arg, "^(.*)%(%s*("..reg..")%s*%)$") + if d then + return d, 0, parse_reg(b) + end + -- Assume the two registers are passed as "(r1,r2)", and displacement(d) is not specified. TODO: not sure if we want to do this, GAS doesn't. + local x, b = match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$") + if b then + return 0, parse_reg(x), parse_reg(b) + end + -- Accept a lone integer as a displacement. TODO: allow expressions/variables here? Interacts badly with the other rules currently. + local d = match(arg,"^(-?[%d]+)$") + if d then + return d, 0, 0 + end + local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$") + if reg then + local r, tp = parse_reg(reg) + if tp then + return format(tp.ctypefmt, tailr), 0, r + end + end + werror("bad memory operand: "..arg) + return nil +end + +-- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x +-- are GPRs. +-- If the fourth return value is not-nil then it needs to be called to +-- insert an action. +-- Encoded as: xbddd +local function parse_mem_bx(arg) + local d, x, b = split_memop(arg) + local dval = tonumber(d) + if dval then + if not is_uint12(dval) then + werror("displacement out of range: ", dval) + end + return dval, x, b, nil + end + if match(d, "^[rf]1?[0-9]?") then + werror("expected immediate operand, got register") + end + return 0, x, b, function() waction("DISP12", nil, d) end +end + +-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR. +-- Encoded as: bddd +local function parse_mem_b(arg) + local d, x, b, a = parse_mem_bx(arg) + if x ~= 0 then + werror("unexpected index register") + end + return d, b, a +end + +-- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2 +-- and b and x are GPRs. +-- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits). +local function parse_mem_bxy(arg) + local d, x, b = split_memop(arg) + local dval = tonumber(d) + if dval then + if not is_int20(dval) then + werror("displacement out of range: ", dval) + end + return dval, x, b, nil + end + if match(d, "^[rf]1?[0-9]?") then + werror("expected immediate operand, got register") + end + return 0, x, b, function() waction("DISP20", nil, d) end +end + +-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and +-- b is a GPR. +-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits). +local function parse_mem_by(arg) + local d, x, b, a = parse_mem_bxy(arg) + if x ~= 0 then + werror("unexpected index register") + end + return d, b, a +end + +-- Parse memory operand of the form d(l, b) where 0 <= d < 4096, 1 <= l <= 256, +-- and b is a GPR. +local function parse_mem_lb(arg) + local reg = "r1?[0-9]" + local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$") + if not d then + -- TODO: handle values without registers? + -- TODO: handle registers without a displacement? + werror("bad memory operand: "..arg) + return nil + end + local dval = tonumber(d) + local dact = nil + if dval then + if not is_uint12(dval) then + werror("displacement out of range: ", dval) + end + else + dval = 0 + dact = function() waction("DISP12", nil, d) end + end + local lval = tonumber(l) + local lact = nil + if lval then + if lval < 1 or lval > 256 then + werror("length out of range: ", dval) + end + lval = lval - 1 + else + lval = 0 + lact = function() waction("LEN8R", nil, l) end + end + return dval, lval, parse_reg(b), dact, lact +end + +local function parse_mem_l2b(arg, high_l) + local reg = "r1?[0-9]" + local d, l, b = match(arg, "^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$") + if not d then + -- TODO: handle values without registers? + -- TODO: handle registers without a displacement? + werror("bad memory operand: "..arg) + return nil + end + local dval = tonumber(d) + local dact = nil + if dval then + if not is_uint12(dval) then + werror("displacement out of range: ", dval) + end + else + dval = 0 + dact = function() waction("DISP12", nil, d) end + end + local lval = tonumber(l) + local lact = nil + if lval then + if lval < 1 or lval > 128 then + werror("length out of range: ", dval) + end + lval = lval - 1 + else + lval = 0 + if high_l then + lact = function() waction("LEN4HR", nil, l) end + else + lact = function() waction("LEN4LR", nil, l) end + end + end + return dval, lval, parse_reg(b), dact, lact +end + +local function parse_imm32(imm) + local imm_val = tonumber(imm) + if imm_val then + if not is_int32(imm_val) then + werror("immediate value out of range: ", imm_val) + end + wputhw(band(shr(imm_val, 16), 0xffff)) + wputhw(band(imm_val, 0xffff)) + elseif match(imm, "^[rfv]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):(r1?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM32", nil, imm) -- if we get label + end +end + +local function parse_imm16(imm) + local imm_val = tonumber(imm) + if imm_val then + if not is_int16(imm_val) and not is_uint16(imm_val) then + werror("immediate value out of range: ", imm_val) + end + wputhw(band(imm_val, 0xffff)) + elseif match(imm, "^[rfv]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):(r1?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM16", nil, imm) + end +end + +local function parse_imm8(imm) + local imm_val = tonumber(imm) + if imm_val then + if not is_int8(imm_val) and not is_uint8(imm_val) then + werror("Immediate value out of range: ", imm_val) + end + return imm_val, nil + end + return 0, function() waction("IMM8", nil, imm) end +end + +local function parse_mask(mask) + local m3 = parse_number(mask) + if m3 then + if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then + return m3 + else + werror("Mask value should be 0,1 or 3-7: ", m3) + end + end +end + +local function parse_mask2(mask) + local m4 = parse_number(mask) + if ( m4 >=0 and m4 <=1) then + return m4 + else + werror("Mask value should be 0 or 1: ", m4) + end +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +-- Template strings for s390x instructions. +map_op = { + a_2 = "00005a000000RX-a", + ad_2 = "00006a000000RX-a", + adb_2 = "ed000000001aRXE", + adbr_2 = "0000b31a0000RRE", + adr_2 = "000000002a00RR", + ae_2 = "00007a000000RX-a", + aeb_2 = "ed000000000aRXE", + aebr_2 = "0000b30a0000RRE", + aer_2 = "000000003a00RR", + afi_2 = "c20900000000RIL-a", + ag_2 = "e30000000008RXY-a", + agf_2 = "e30000000018RXY-a", + agfi_2 = "c20800000000RIL-a", + agfr_2 = "0000b9180000RRE", + aghi_2 = "0000a70b0000RI-a", + agr_2 = "0000b9080000RRE", + ah_2 = "00004a000000RX-a", + ahi_2 = "0000a70a0000RI-a", + ahy_2 = "e3000000007aRXY-a", + aih_2 = "cc0800000000RIL-a", + al_2 = "00005e000000RX-a", + alc_2 = "e30000000098RXY-a", + alcg_2 = "e30000000088RXY-a", + alcgr_2 = "0000b9880000RRE", + alcr_2 = "0000b9980000RRE", + alfi_2 = "c20b00000000RIL-a", + alg_2 = "e3000000000aRXY-a", + algf_2 = "e3000000001aRXY-a", + algfi_2 = "c20a00000000RIL-a", + algfr_2 = "0000b91a0000RRE", + algr_2 = "0000b90a0000RRE", + alr_2 = "000000001e00RR", + alsih_2 = "cc0a00000000RIL-a", + alsihn_2 = "cc0b00000000RIL-a", + aly_2 = "e3000000005eRXY-a", + ap_2 = "fa0000000000SS-b", + ar_2 = "000000001a00RR", + au_2 = "00007e000000RX-a", + aur_2 = "000000003e00RR", + aw_2 = "00006e000000RX-a", + awr_2 = "000000002e00RR", + axbr_2 = "0000b34a0000RRE", + axr_2 = "000000003600RR", + ay_2 = "e3000000005aRXY-a", + bakr_2 = "0000b2400000RRE", + bal_2 = "000045000000RX-a", + balr_2 = "000000000500RR", + bas_2 = "00004d000000RX-a", + basr_2 = "000000000d00RR", + bassm_2 = "000000000c00RR", + bc_2 = "000047000000RX-b", + bc_2 = "000047000000RX-b", + bcr_2 = "000000000700RR", + bct_2 = "000046000000RX-a", + bctg_2 = "e30000000046RXY-a", + bctgr_2 = "0000b9460000RRE", + bctr_2 = "000000000600RR", + bras_2 = "0000a7050000RI-b", + brasl_2 = "c00500000000RIL-b", + brc_2 = "0000a7040000RI-c", + brcl_2 = "c00400000000RIL-c", + brcl_2 = "c00400000000RIL-c", + brct_2 = "0000a7060000RI-b", + brctg_2 = "0000a7070000RI-b", + brcth_2 = "cc0600000000RIL-b", + brxh_3 = "000084000000RSI", + brxhg_3 = "ec0000000044RIE-e", + bsa_2 = "0000b25a0000RRE", + bsg_2 = "0000b2580000RRE", + bsm_2 = "000000000b00RR", + bxh_3 = "000086000000RS-a", + bxhg_3 = "eb0000000044RSY-a", + bxle_3 = "000087000000RS-a", + bxleg_3 = "eb0000000045RSY-a", + c_2 = "000059000000RX-a", + cd_2 = "000069000000RX-a", + cdb_2 = "ed0000000019RXE", + cdbr_2 = "0000b3190000RRE", + cdfbr_2 = "0000b3950000RRE", + cdfbra_4 = "0000b3950000RRF-e", + cdfr_2 = "0000b3b50000RRE", + cdftr_2 = "0000b9510000RRE", + cdgbr_2 = "0000b3a50000RRE", + cdgbra_4 = "0000b3a50000RRF-e", + cdgr_2 = "0000b3c50000RRE", + cdgtr_2 = "0000b3f10000RRE", + cdr_2 = "000000002900RR", + cds_3 = "0000bb000000RS-a", + cdsg_3 = "eb000000003eRSY-a", + cdstr_2 = "0000b3f30000RRE", + cdsy_3 = "eb0000000031RSY-a", + cdtr_2 = "0000b3e40000RRE", + cdutr_2 = "0000b3f20000RRE", + ce_2 = "000079000000RX-a", + ceb_2 = "ed0000000009RXE", + cebr_2 = "0000b3090000RRE", + cedtr_2 = "0000b3f40000RRE", + cefbr_2 = "0000b3940000RRE", + cefbra_4 = "0000b3940000RRF-e", + cefr_2 = "0000b3b40000RRE", + cegbr_2 = "0000b3a40000RRE", + cegbra_4 = "0000b3a40000RRF-e", + cegr_2 = "0000b3c40000RRE", + cer_2 = "000000003900RR", + cextr_2 = "0000b3fc0000RRE", + cfdbr_3 = "0000b3990000RRF-e", + cfdbra_4 = "0000b3990000RRF-e", + cfebr_3 = "0000b3980000RRF-e", + cfebra_4 = "0000b3980000RRF-e", + cfi_2 = "c20d00000000RIL-a", + cfxbr_3 = "0000b39a0000RRF-e", + cfxbra_4 = "0000b39a0000RRF-e", + cg_2 = "e30000000020RXY-a", + cgdbr_3 = "0000b3a90000RRF-e", + cgdbra_4 = "0000b3a90000RRF-e", + cgebr_3 = "0000b3a80000RRF-e", + cgebra_4 = "0000b3a80000RRF-e", + cgf_2 = "e30000000030RXY-a", + cgfi_2 = "c20c00000000RIL-a", + cgfr_2 = "0000b9300000RRE", + cgfrl_2 = "c60c00000000RIL-b", + cgh_2 = "e30000000034RXY-a", + cghi_2 = "0000a70f0000RI-a", + cghrl_2 = "c60400000000RIL-b", + cgr_2 = "0000b9200000RRE", + cgrl_2 = "c60800000000RIL-b", + cgxbr_3 = "0000b3aa0000RRF-e", + cgxbra_4 = "0000b3aa0000RRF-e", + ch_2 = "000049000000RX-a", + chf_2 = "e300000000cdRXY-a", + chhr_2 = "0000b9cd0000RRE", + chi_2 = "0000a70e0000RI-a", + chlr_2 = "0000b9dd0000RRE", + chrl_2 = "c60500000000RIL-b", + chy_2 = "e30000000079RXY-a", + cih_2 = "cc0d00000000RIL-a", + cksm_2 = "0000b2410000RRE", + cl_2 = "000055000000RX-a", + clc_2 = "d50000000000SS-a", + clcl_2 = "000000000f00RR", + clcle_3 = "0000a9000000RS-a", + clclu_3 = "eb000000008fRSY-a", + clfi_2 = "c20f00000000RIL-a", + clg_2 = "e30000000021RXY-a", + clgf_2 = "e30000000031RXY-a", + clgfi_2 = "c20e00000000RIL-a", + clgfr_2 = "0000b9310000RRE", + clgfrl_2 = "c60e00000000RIL-b", + clghrl_2 = "c60600000000RIL-b", + clgr_2 = "0000b9210000RRE", + clgrl_2 = "c60a00000000RIL-b", + clhf_2 = "e300000000cfRXY-a", + clhhr_2 = "0000b9cf0000RRE", + clhlr_2 = "0000b9df0000RRE", + clhrl_2 = "c60700000000RIL-b", + cli_2 = "000095000000SI", + clih_2 = "cc0f00000000RIL-a", + clm_3 = "0000bd000000RS-b", + clmh_3 = "eb0000000020RSY-b", + clmy_3 = "eb0000000021RSY-b", + clr_2 = "000000001500RR", + clrl_2 = "c60f00000000RIL-b", + clst_2 = "0000b25d0000RRE", + cly_2 = "e30000000055RXY-a", + cmpsc_2 = "0000b2630000RRE", + cpya_2 = "0000b24d0000RRE", + cr_2 = "000000001900RR", + crl_2 = "c60d00000000RIL-b", + cs_3 = "0000ba000000RS-a", + csg_3 = "eb0000000030RSY-a", + csp_2 = "0000b2500000RRE", + cspg_2 = "0000b98a0000RRE", + csy_3 = "eb0000000014RSY-a", + cu41_2 = "0000b9b20000RRE", + cu42_2 = "0000b9b30000RRE", + cudtr_2 = "0000b3e20000RRE", + cuse_2 = "0000b2570000RRE", + cuxtr_2 = "0000b3ea0000RRE", + cvb_2 = "00004f000000RX-a", + cvbg_2 = "e3000000000eRXY-a", + cvby_2 = "e30000000006RXY-a", + cvd_2 = "00004e000000RX-a", + cvdg_2 = "e3000000002eRXY-a", + cvdy_2 = "e30000000026RXY-a", + cxbr_2 = "0000b3490000RRE", + cxfbr_2 = "0000b3960000RRE", + cxfbra_4 = "0000b3960000RRF-e", + cxfr_2 = "0000b3b60000RRE", + cxftr_2 = "0000b9590000RRE", + cxgbr_2 = "0000b3a60000RRE", + cxgbra_4 = "0000b3a60000RRF-e", + cxgr_2 = "0000b3c60000RRE", + cxgtr_2 = "0000b3f90000RRE", + cxr_2 = "0000b3690000RRE", + cxstr_2 = "0000b3fb0000RRE", + cxtr_2 = "0000b3ec0000RRE", + cxutr_2 = "0000b3fa0000RRE", + cy_2 = "e30000000059RXY-a", + d_2 = "00005d000000RX-a", + dd_2 = "00006d000000RX-a", + ddb_2 = "ed000000001dRXE", + ddbr_2 = "0000b31d0000RRE", + ddr_2 = "000000002d00RR", + de_2 = "00007d000000RX-a", + deb_2 = "ed000000000dRXE", + debr_2 = "0000b30d0000RRE", + der_2 = "000000003d00RR", + didbr_4 = "0000b35b0000RRF-b", + dl_2 = "e30000000097RXY-a", + dlg_2 = "e30000000087RXY-a", + dlgr_2 = "0000b9870000RRE", + dlr_2 = "0000b9970000RRE", + dr_2 = "000000001d00RR", + dsg_2 = "e3000000000dRXY-a", + dsgf_2 = "e3000000001dRXY-a", + dsgfr_2 = "0000b91d0000RRE", + dsgr_2 = "0000b90d0000RRE", + dxbr_2 = "0000b34d0000RRE", + dxr_2 = "0000b22d0000RRE", + ear_2 = "0000b24f0000RRE", + ecag_3 = "eb000000004cRSY-a", + ed_2 = "de0000000000SS-a", + edmk_2 = "df0000000000SS-a", + eedtr_2 = "0000b3e50000RRE", + eextr_2 = "0000b3ed0000RRE", + efpc_2 = "0000b38c0000RRE", + epair_2 = "0000b99a0000RRE", + epar_2 = "0000b2260000RRE", + epsw_2 = "0000b98d0000RRE", + ereg_2 = "0000b2490000RRE", + eregg_2 = "0000b90e0000RRE", + esair_2 = "0000b99b0000RRE", + esar_2 = "0000b2270000RRE", + esdtr_2 = "0000b3e70000RRE", + esea_2 = "0000b99d0000RRE", + esta_2 = "0000b24a0000RRE", + esxtr_2 = "0000b3ef0000RRE", + ex_2 = "000044000000RX-a", + exrl_2 = "c60000000000RIL-b", + fidr_2 = "0000b37f0000RRE", + fier_2 = "0000b3770000RRE", + fixr_2 = "0000b3670000RRE", + flogr_2 = "0000b9830000RRE", + hdr_2 = "000000002400RR", + her_2 = "000000003400RR", + iac_2 = "0000b2240000RRE", + ic_2 = "000043000000RX-a", + icm_3 = "0000bf000000RS-b", + icmh_3 = "eb0000000080RSY-b", + icmy_3 = "eb0000000081RSY-b", + icy_2 = "e30000000073RXY-a", + iihf_2 = "c00800000000RIL-a", + iihh_2 = "0000a5000000RI-a", + iihl_2 = "0000a5010000RI-a", + iilf_2 = "c00900000000RIL-a", + iilh_2 = "0000a5020000RI-a", + iill_2 = "0000a5030000RI-a", + ipm_2 = "0000b2220000RRE", + iske_2 = "0000b2290000RRE", + ivsk_2 = "0000b2230000RRE", + kdbr_2 = "0000b3180000RRE", + kdtr_2 = "0000b3e00000RRE", + kebr_2 = "0000b3080000RRE", + kimd_2 = "0000b93e0000RRE", + klmd_2 = "0000b93f0000RRE", + km_2 = "0000b92e0000RRE", + kmac_2 = "0000b91e0000RRE", + kmc_2 = "0000b92f0000RRE", + kmf_2 = "0000b92a0000RRE", + kmo_2 = "0000b92b0000RRE", + kxbr_2 = "0000b3480000RRE", + kxtr_2 = "0000b3e80000RRE", + l_2 = "000058000000RX-a", + la_2 = "000041000000RX-a", + laa_3 = "eb00000000f8RSY-a", + laag_3 = "eb00000000e8RSY-a", + laal_3 = "eb00000000faRSY-a", + laalg_3 = "eb00000000eaRSY-a", + lae_2 = "000051000000RX-a", + laey_2 = "e30000000075RXY-a", + lam_3 = "00009a000000RS-a", + lamy_3 = "eb000000009aRSY-a", + lan_3 = "eb00000000f4RSY-a", + lang_3 = "eb00000000e4RSY-a", + lao_3 = "eb00000000f6RSY-a", + laog_3 = "eb00000000e6RSY-a", + larl_2 = "c00000000000RIL-b", + lax_3 = "eb00000000f7RSY-a", + laxg_3 = "eb00000000e7RSY-a", + lay_2 = "e30000000071RXY-a", + lb_2 = "e30000000076RXY-a", + lbh_2 = "e300000000c0RXY-a", + lbr_2 = "0000b9260000RRE", + lcdbr_2 = "0000b3130000RRE", + lcdfr_2 = "0000b3730000RRE", + lcdr_2 = "000000002300RR", + lcebr_2 = "0000b3030000RRE", + lcer_2 = "000000003300RR", + lcgfr_2 = "0000b9130000RRE", + lcgr_2 = "0000b9030000RRE", + lcr_2 = "000000001300RR", + lctl_3 = "0000b7000000RS-a", + lctlg_3 = "eb000000002fRSY-a", + lcxbr_2 = "0000b3430000RRE", + lcxr_2 = "0000b3630000RRE", + ld_2 = "000068000000RX-a", + ldebr_2 = "0000b3040000RRE", + lder_2 = "0000b3240000RRE", + ldgr_2 = "0000b3c10000RRE", + ldr_2 = "000000002800RR", + ldxbr_2 = "0000b3450000RRE", + ldxr_2 = "000000002500RR", + ldy_2 = "ed0000000065RXY-a", + le_2 = "000078000000RX-a", + ledbr_2 = "0000b3440000RRE", + ledr_2 = "000000003500RR", + ler_2 = "000000003800RR", + lexbr_2 = "0000b3460000RRE", + lexr_2 = "0000b3660000RRE", + ley_2 = "ed0000000064RXY-a", + lfh_2 = "e300000000caRXY-a", + lg_2 = "e30000000004RXY-a", + lgb_2 = "e30000000077RXY-a", + lgbr_2 = "0000b9060000RRE", + lgdr_2 = "0000b3cd0000RRE", + lgf_2 = "e30000000014RXY-a", + lgfi_2 = "c00100000000RIL-a", + lgfr_2 = "0000b9140000RRE", + lgfrl_2 = "c40c00000000RIL-b", + lgh_2 = "e30000000015RXY-a", + lghi_2 = "0000a7090000RI-a", + lghr_2 = "0000b9070000RRE", + lghrl_2 = "c40400000000RIL-b", + lgr_2 = "0000b9040000RRE", + lgrl_2 = "c40800000000RIL-b", + lh_2 = "000048000000RX-a", + lhh_2 = "e300000000c4RXY-a", + lhi_2 = "0000a7080000RI-a", + lhr_2 = "0000b9270000RRE", + lhrl_2 = "c40500000000RIL-b", + lhy_2 = "e30000000078RXY-a", + llc_2 = "e30000000094RXY-a", + llch_2 = "e300000000c2RXY-a", + llcr_2 = "0000b9940000RRE", + llgc_2 = "e30000000090RXY-a", + llgcr_2 = "0000b9840000RRE", + llgf_2 = "e30000000016RXY-a", + llgfr_2 = "0000b9160000RRE", + llgfrl_2 = "c40e00000000RIL-b", + llgh_2 = "e30000000091RXY-a", + llghr_2 = "0000b9850000RRE", + llghrl_2 = "c40600000000RIL-b", + llgt_2 = "e30000000017RXY-a", + llgtr_2 = "0000b9170000RRE", + llh_2 = "e30000000095RXY-a", + llhh_2 = "e300000000c6RXY-a", + llhr_2 = "0000b9950000RRE", + llhrl_2 = "c40200000000RIL-b", + llihf_2 = "c00e00000000RIL-a", + llihh_2 = "0000a50c0000RI-a", + llihl_2 = "0000a50d0000RI-a", + llilf_2 = "c00f00000000RIL-a", + llilh_2 = "0000a50e0000RI-a", + llill_2 = "0000a50f0000RI-a", + lm_3 = "000098000000RS-a", + lmg_3 = "eb0000000004RSY-a", + lmh_3 = "eb0000000096RSY-a", + lmy_3 = "eb0000000098RSY-a", + lndbr_2 = "0000b3110000RRE", + lndfr_2 = "0000b3710000RRE", + lndr_2 = "000000002100RR", + lnebr_2 = "0000b3010000RRE", + lner_2 = "000000003100RR", + lngfr_2 = "0000b9110000RRE", + lngr_2 = "0000b9010000RRE", + lnr_2 = "000000001100RR", + lnxbr_2 = "0000b3410000RRE", + lnxr_2 = "0000b3610000RRE", + loc_3 = "eb00000000f2RSY-b", + locg_3 = "eb00000000e2RSY-b", + lpdbr_2 = "0000b3100000RRE", + lpdfr_2 = "0000b3700000RRE", + lpdr_2 = "000000002000RR", + lpebr_2 = "0000b3000000RRE", + lper_2 = "000000003000RR", + lpgfr_2 = "0000b9100000RRE", + lpgr_2 = "0000b9000000RRE", + lpq_2 = "e3000000008fRXY-a", + lpr_2 = "000000001000RR", + lpxbr_2 = "0000b3400000RRE", + lpxr_2 = "0000b3600000RRE", + lr_2 = "000000001800RR", + lra_2 = "0000b1000000RX-a", + lrag_2 = "e30000000003RXY-a", + lray_2 = "e30000000013RXY-a", + lrdr_2 = "000000002500RR", + lrer_2 = "000000003500RR", + lrl_2 = "c40d00000000RIL-b", + lrv_2 = "e3000000001eRXY-a", + lrvg_2 = "e3000000000fRXY-a", + lrvgr_2 = "0000b90f0000RRE", + lrvh_2 = "e3000000001fRXY-a", + lrvr_2 = "0000b91f0000RRE", + lt_2 = "e30000000012RXY-a", + ltdbr_2 = "0000b3120000RRE", + ltdr_2 = "000000002200RR", + ltdtr_2 = "0000b3d60000RRE", + ltebr_2 = "0000b3020000RRE", + lter_2 = "000000003200RR", + ltg_2 = "e30000000002RXY-a", + ltgf_2 = "e30000000032RXY-a", + ltgfr_2 = "0000b9120000RRE", + ltgr_2 = "0000b9020000RRE", + ltr_2 = "000000001200RR", + ltxbr_2 = "0000b3420000RRE", + ltxr_2 = "0000b3620000RRE", + ltxtr_2 = "0000b3de0000RRE", + lura_2 = "0000b24b0000RRE", + lurag_2 = "0000b9050000RRE", + lxdbr_2 = "0000b3050000RRE", + lxdr_2 = "0000b3250000RRE", + lxebr_2 = "0000b3060000RRE", + lxer_2 = "0000b3260000RRE", + lxr_2 = "0000b3650000RRE", + ly_2 = "e30000000058RXY-a", + lzdr_2 = "0000b3750000RRE", + lzer_2 = "0000b3740000RRE", + lzxr_2 = "0000b3760000RRE", + m_2 = "00005c000000RX-a", + madb_3 = "ed000000001eRXF", + maeb_3 = "ed000000000eRXF", + maebr_3 = "0000b30e0000RRD", + maer_3 = "0000b32e0000RRD", + md_2 = "00006c000000RX-a", + mdb_2 = "ed000000001cRXE", + mdbr_2 = "0000b31c0000RRE", + mde_2 = "00007c000000RX-a", + mdeb_2 = "ed000000000cRXE", + mdebr_2 = "0000b30c0000RRE", + mder_2 = "000000003c00RR", + mdr_2 = "000000002c00RR", + me_2 = "00007c000000RX-a", + meeb_2 = "ed0000000017RXE", + meebr_2 = "0000b3170000RRE", + meer_2 = "0000b3370000RRE", + mer_2 = "000000003c00RR", + mfy_2 = "e3000000005cRXY-a", + mghi_2 = "0000a70d0000RI-a", + mh_2 = "00004c000000RX-a", + mhi_2 = "0000a70c0000RI-a", + mhy_2 = "e3000000007cRXY-a", + ml_2 = "e30000000096RXY-a", + mlg_2 = "e30000000086RXY-a", + mlgr_2 = "0000b9860000RRE", + mlr_2 = "0000b9960000RRE", + mr_2 = "000000001c00RR", + ms_2 = "000071000000RX-a", + msfi_2 = "c20100000000RIL-a", + msg_2 = "e3000000000cRXY-a", + msgf_2 = "e3000000001cRXY-a", + msgfi_2 = "c20000000000RIL-a", + msgfr_2 = "0000b91c0000RRE", + msgr_2 = "0000b90c0000RRE", + msr_2 = "0000b2520000RRE", + msta_2 = "0000b2470000RRE", + msy_2 = "e30000000051RXY-a", + mvc_2 = "d20000000000SS-a", + mvcin_2 = "e80000000000SS-a", + mvcl_2 = "000000000e00RR", + mvcle_3 = "0000a8000000RS-a", + mvclu_3 = "eb000000008eRSY-a", + mvghi_2 = "e54800000000SIL", + mvhhi_2 = "e54400000000SIL", + mvhi_2 = "e54c00000000SIL", + mvi_2 = "000092000000SI", + mvn_2 = "d10000000000SS-a", + mvpg_2 = "0000b2540000RRE", + mvst_2 = "0000b2550000RRE", + mvz_2 = "d30000000000SS-a", + mxbr_2 = "0000b34c0000RRE", + mxd_2 = "000067000000RX-a", + mxdb_2 = "ed0000000007RXE", + mxdbr_2 = "0000b3070000RRE", + mxdr_2 = "000000002700RR", + mxr_2 = "000000002600RR", + n_2 = "000054000000RX-a", + nc_2 = "d40000000000SS-a", + ng_2 = "e30000000080RXY-a", + ngr_2 = "0000b9800000RRE", + ni_2 = "000094000000SI", + nihf_2 = "c00a00000000RIL-a", + nihh_2 = "0000a5040000RI-a", + nihl_2 = "0000a5050000RI-a", + nilf_2 = "c00b00000000RIL-a", + nilh_2 = "0000a5060000RI-a", + nill_2 = "0000a5070000RI-a", + nr_2 = "000000001400RR", + ny_2 = "e30000000054RXY-a", + o_2 = "000056000000RX-a", + oc_2 = "d60000000000SS-a", + og_2 = "e30000000081RXY-a", + ogr_2 = "0000b9810000RRE", + oi_2 = "000096000000SI", + oihf_2 = "c00c00000000RIL-a", + oihh_2 = "0000a5080000RI-a", + oihl_2 = "0000a5090000RI-a", + oilf_2 = "c00d00000000RIL-a", + oilh_2 = "0000a50a0000RI-a", + oill_2 = "0000a50b0000RI-a", + or_2 = "000000001600RR", + oy_2 = "e30000000056RXY-a", + palb_2 = "0000b2480000RRE", + pcc_2 = "0000b92c0000RRE", + pckmo_2 = "0000b9280000RRE", + pfd_2 = "e30000000036m", + pfdrl_2 = "c60200000000RIL-c", + pfmf_2 = "0000b9af0000RRE", + pgin_2 = "0000b22e0000RRE", + pgout_2 = "0000b22f0000RRE", + popcnt_2 = "0000b9e10000RRE", + pt_2 = "0000b2280000RRE", + ptf_2 = "0000b9a20000RRE", + pti_2 = "0000b99e0000RRE", + rll_3 = "eb000000001dRSY-a", + rllg_3 = "eb000000001cRSY-a", + rrbe_2 = "0000b22a0000RRE", + rrbm_2 = "0000b9ae0000RRE", + s_2 = "00005b000000RX-a", + sar_2 = "0000b24e0000RRE", + sd_2 = "00006b000000RX-a", + sdb_2 = "ed000000001bRXE", + sdbr_2 = "0000b31b0000RRE", + sdr_2 = "000000002b00RR", + se_2 = "00007b000000RX-a", + seb_2 = "ed000000000bRXE", + sebr_2 = "0000b30b0000RRE", + ser_2 = "000000003b00RR", + sfasr_2 = "0000b3850000RRE", + sfpc_2 = "0000b3840000RRE", + sg_2 = "e30000000009RXY-a", + sgf_2 = "e30000000019RXY-a", + sgfr_2 = "0000b9190000RRE", + sgr_2 = "0000b9090000RRE", + sh_2 = "00004b000000RX-a", + shy_2 = "e3000000007bRXY-a", + sl_2 = "00005f000000RX-a", + sla_2 = "00008b000000RS-a", + slag_3 = "eb000000000bRSY-a", + slak_3 = "eb00000000ddRSY-a", + slb_2 = "e30000000099RXY-a", + slbg_2 = "e30000000089RXY-a", + slbgr_2 = "0000b9890000RRE", + slbr_2 = "0000b9990000RRE", + slda_2 = "00008f000000RS-a", + sldl_2 = "00008d000000RS-a", + slfi_2 = "c20500000000RIL-a", + slg_2 = "e3000000000bRXY-a", + slgf_2 = "e3000000001bRXY-a", + slgfi_2 = "c20400000000RIL-a", + slgfr_2 = "0000b91b0000RRE", + slgr_2 = "0000b90b0000RRE", + sll_2 = "000089000000RS-a", + sllg_3 = "eb000000000dRSY-a", + sllk_3 = "eb00000000dfRSY-a", + slr_2 = "000000001f00RR", + sly_2 = "e3000000005fRXY-a", + spm_2 = "000000000400RR", + sqdb_2 = "ed0000000015RXE", + sqdbr_2 = "0000b3150000RRE", + sqdr_2 = "0000b2440000RRE", + sqeb_2 = "ed0000000014RXE", + sqebr_2 = "0000b3140000RRE", + sqer_2 = "0000b2450000RRE", + sqxbr_2 = "0000b3160000RRE", + sqxr_2 = "0000b3360000RRE", + sr_2 = "000000001b00RR", + sra_2 = "00008a000000RS-a", + srag_3 = "eb000000000aRSY-a", + srak_3 = "eb00000000dcRSY-a", + srda_2 = "00008e000000RS-a", + srdl_2 = "00008c000000RS-a", + srl_2 = "000088000000RS-a", + srlg_3 = "eb000000000cRSY-a", + srlk_3 = "eb00000000deRSY-a", + srst_2 = "0000b25e0000RRE", + srstu_2 = "0000b9be0000RRE", + ssair_2 = "0000b99f0000RRE", + ssar_2 = "0000b2250000RRE", + st_2 = "000050000000RX-a", + stam_3 = "00009b000000RS-a", + stamy_3 = "eb000000009bRSY-a", + stc_2 = "000042000000RX-a", + stch_2 = "e300000000c3RXY-a", + stcm_3 = "0000be000000RS-b", + stcmh_3 = "eb000000002cRSY-b", + stcmy_3 = "eb000000002dRSY-b", + stctg_3 = "eb0000000025RSY-a", + stctl_3 = "0000b6000000RS-a", + stcy_2 = "e30000000072RXY-a", + std_2 = "000060000000RX-a", + stdy_2 = "ed0000000067RXY-a", + ste_2 = "000070000000RX-a", + stey_2 = "ed0000000066RXY-a", + stfh_2 = "e300000000cbRXY-a", + stfl_1 = "0000b2b10000S", + stg_2 = "e30000000024RXY-a", + stgrl_2 = "c40b00000000RIL-b", + sth_2 = "000040000000RX-a", + sthh_2 = "e300000000c7RXY-a", + sthrl_2 = "c40700000000RIL-b", + sthy_2 = "e30000000070RXY-a", + stm_3 = "000090000000RS-a", + stmg_3 = "eb0000000024RSY-a", + stmh_3 = "eb0000000026RSY-a", + stmy_3 = "eb0000000090RSY-a", + stoc_3 = "eb00000000f3RSY-b", + stocg_3 = "eb00000000e3RSY-b", + stpq_2 = "e3000000008eRXY-a", + strl_2 = "c40f00000000RIL-b", + strv_2 = "e3000000003eRXY-a", + strvg_2 = "e3000000002fRXY-a", + strvh_2 = "e3000000003fRXY-a", + stura_2 = "0000b2460000RRE", + sturg_2 = "0000b9250000RRE", + sty_2 = "e30000000050RXY-a", + su_2 = "00007f000000RX-a", + sur_2 = "000000003f00RR", + svc_1 = "000000000a00I", + sw_2 = "00006f000000RX-a", + swr_2 = "000000002f00RR", + sxbr_2 = "0000b34b0000RRE", + sxr_2 = "000000003700RR", + sy_2 = "e3000000005bRXY-a", + tar_2 = "0000b24c0000RRE", + tb_2 = "0000b22c0000RRE", + thder_2 = "0000b3580000RRE", + thdr_2 = "0000b3590000RRE", + tm_2 = "000091000000SI", + tmhh_2 = "0000a7020000RI-a", + tmhl_2 = "0000a7030000RI-a", + tmlh_2 = "0000a7000000RI-a", + tmll_2 = "0000a7010000RI-a", + tmy_2 = "eb0000000051SIY", + tr_2 = "dc0000000000SS-a", + trace_3 = "000099000000RS-a", + tracg_3 = "eb000000000fRSY-a", + tre_2 = "0000b2a50000RRE", + trt_2 = "dd0000000000SS-a", + trtr_2 = "d00000000000SS-a", + unpka_2 = "ea0000000000SS-a", + unpku_2 = "e20000000000SS-a", + x_2 = "000057000000RX-a", + xc_2 = "d70000000000SS-a", + xg_2 = "e30000000082RXY-a", + xgr_2 = "0000b9820000RRE", + xi_2 = "000097000000SI", + xihf_2 = "c00600000000RIL-a", + xilf_2 = "c00700000000RIL-a", + xr_2 = "000000001700RR", + xy_2 = "e30000000057RXY-a", +} +for cond, c in pairs(map_cond) do + -- Extended mnemonics for branches. + -- TODO: replace 'B' with correct encoding. + -- brc + map_op["j"..cond.."_1"] = "0000"..tohex(0xa7040000+shl(c, 20)).."RI-c" + -- brcl + map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c, 20)).."0000".."RIL-c" + -- bc + map_op["b"..cond.."_1"] = "0000"..tohex(0x47000000+shl(c, 20)).."RX-b" + -- bcr + map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c, 4)).."RR" +end +------------------------------------------------------------------------------ +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + -- Read the template in 16-bit chunks. + -- Leading halfword zeroes should not be written out. + local op0 = tonumber(sub(template, 1, 4), 16) + local op1 = tonumber(sub(template, 5, 8), 16) + local op2 = tonumber(sub(template, 9, 12), 16) + + -- Process each character. + local p = sub(template, 13) + if p == "I" then + local imm_val, a = parse_imm8(params[1]) + op2 = op2 + imm_val + wputhw(op2) + if a then a() end + elseif p == "RI-a" then + op1 = op1 + shl(parse_reg(params[1]), 4) + wputhw(op1) + parse_imm16(params[2]) + elseif p == "RI-b" then + op1 = op1 + shl(parse_reg(params[1]), 4) + wputhw(op1) + local mode, n, s = parse_label(params[2]) + waction("REL_"..mode, n, s) + elseif p == "RI-c" then + if #params > 1 then + op1 = op1 + shl(parse_num(params[1]), 4) + end + wputhw(op1) + local mode, n, s = parse_label(params[#params]) + waction("REL_"..mode, n, s) + elseif p == "RIE-e" then + op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + wputhw1(op0) + local mode, n, s = parse_label(params[3]) + waction("REL_"..mode, n, s) + wputhw(op2) + elseif p == "RIL-a" then + op0 = op0 + shl(parse_reg(params[1]), 4) + wputhw(op0); + parse_imm32(params[2]) + elseif p == "RIL-b" then + op0 = op0 + shl(parse_reg(params[1]), 4) + wputhw(op0) + local mode, n, s = parse_label(params[2]) + waction("REL_"..mode, n, s) + elseif p == "RIL-c" then + if #params > 1 then + op0 = op0 + shl(parse_num(params[1]), 4) + end + wputhw(op0) + local mode, n, s = parse_label(params[#params]) + waction("REL_"..mode, n, s) + elseif p == "RR" then + if #params > 1 then + op2 = op2 + shl(parse_reg(params[1]), 4) + end + op2 = op2 + parse_reg(params[#params]) + wputhw(op2) + elseif p == "RRD" then + wputhw(op1) + op2 = op2 + shl(parse_reg(params[1]), 12) + shl(parse_reg(params[2]), 4) + parse_reg(params[3]) + wputhw(op2) + elseif p == "RRE" then + op2 = op2 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + wputhw(op1); wputhw(op2) + elseif p == "RRF-b" then + wputhw(op1) + op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_reg(params[2]), 12) + parse_reg(params[3]) + shl(parse_mask(params[4]), 8) + wputhw(op2) + elseif p == "RRF-e" then + wputhw(op1) + op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_mask(params[2]), 12) + parse_reg(params[3]) + if params[4] then + op2 = op2 + shl(parse_mask2(params[4]), 8) + end + wputhw(op2) + elseif p == "RS-a" then + if (params[3]) then + local d, b, a = parse_mem_b(params[3]) + op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + op2 = op2 + shl(b, 12) + d + else + local d, b, a = parse_mem_b(params[2]) + op1 = op1 + shl(parse_reg(params[1]), 4) + op2 = op2 + shl(b, 12) + d + end + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "RS-b" then + local m = parse_mask(params[2]) + local d, b, a = parse_mem_b(params[3]) + op1 = op1 + shl(parse_reg(params[1]), 4) + m + op2 = op2 + shl(b, 12) + d + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "RSI" then + op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + wputhw(op1) + local mode, n, s = parse_label(params[3]) + waction("REL_"..mode, n, s) + elseif p == "RSY-a" then + local d, b, a = parse_mem_by(params[3]) + op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2]) + op1 = op1 + shl(b, 12) + band(d, 0xfff) + op2 = op2 + band(shr(d, 4), 0xff00) + wputhw(op0); wputhw(op1); wputhw(op2) + if a then a() end -- a() emits action. + elseif p == "RX-a" then + local d, x, b, a = parse_mem_bx(params[2]) + op1 = op1 + shl(parse_reg(params[1]), 4) + x + op2 = op2 + shl(b, 12) + d + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "RX-b" then + local d, x, b, a = parse_mem_bx(params[#params]) + if #params > 1 then + op1 = op1 + shl(parse_num(params[1]), 4) + end + op1 = op1 + x + op2 = op2 + shl(b, 12) + d + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "RXE" then + local d, x, b, a = parse_mem_bx(params[2]) + op0 = op0 + shl(parse_reg(params[1]), 4) + x + op1 = op1 + shl(b, 12) + d + wputhw(op0); wputhw(op1) + if a then a() end + wputhw(op2); + elseif p == "RXF" then + local d, x, b, a = parse_mem_bx(params[3]) + op0 = op0 + shl(parse_reg(params[2]), 4) + x + op1 = op1 + shl(b, 12) + d + wputhw(op0); wputhw(op1) + if a then a() end + op2 = op2 + shl(parse_reg(params[1]), 12) + wputhw(op2) + elseif p == "RXY-a" then + local d, x, b, a = parse_mem_bxy(params[2]) + op0 = op0 + shl(parse_reg(params[1]), 4) + x + op1 = op1 + shl(b, 12) + band(d, 0xfff) + op2 = op2 + band(shr(d, 4), 0xff00) + wputhw(op0); wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "S" then + wputhw(op1); + local d, b, a = parse_mem_b(params[1]) + op2 = op2 + shl(b, 12) + d + wputhw(op2) + if a then a() end + elseif p == "SI" then + local imm_val, a = parse_imm8(params[2]) + op1 = op1 + imm_val + wputhw(op1) + if a then a() end + local d, b, a = parse_mem_b(params[1]) + op2 = op2 + shl(b, 12) + d + wputhw(op2) + if a then a() end + elseif p == "SIL" then + wputhw(op0) + local d, b, a = parse_mem_b(params[1]) + op1 = op1 + shl(b, 12) + d + wputhw(op1) + if a then a() end + parse_imm16(params[2]) + elseif p == "SIY" then + local imm8, iact = parse_imm8(params[2]) + op0 = op0 + shl(imm8, 8) + wputhw(op0) + if iact then iact() end + local d, b, a = parse_mem_by(params[1]) + op1 = op1 + shl(b, 12) + band(d, 0xfff) + op2 = op2 + band(shr(d, 4), 0xff00) + wputhw(op1); wputhw(op2) + if a then a() end + elseif p == "SS-a" then + local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1]) + local d2, b2, d2a = parse_mem_b(params[2]) + op0 = op0 + l1 + op1 = op1 + shl(b1, 12) + d1 + op2 = op2 + shl(b2, 12) + d2 + wputhw(op0) + if l1a then l1a() end + wputhw(op1) + if d1a then d1a() end + wputhw(op2) + if d2a then d2a() end + elseif p == "SS-b" then + local high_l = true + local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1], high_l) + high_l = false + local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2], high_l) + op0 = op0 + shl(l1, 4) + l2 + op1 = op1 + shl(b1, 12) + d1 + op2 = op2 + shl(b2, 12) + d2 + wputhw(op0) + if l1a then l1a() end + if l2a then l2a() end + wputhw(op1) + if d1a then d1a() end + wputhw(op2) + if d2a then d2a() end + else + werror("unrecognized encoding") + end +end + +function op_template(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x%x%x%x%x", "") end + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 5 positions. + if secpos+5 > maxsecpos then wflush() end + local lpos, apos, spos = #actlist, #actargs, secpos + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + end + error(err, 0) +end +map_op[".template__"] = op_template +------------------------------------------------------------------------------ +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end +------------------------------------------------------------------------------ +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end +------------------------------------------------------------------------------ +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _, p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1, 8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action halfword is 2**n-1. + return + end + end + end + werror("bad alignment") +end +------------------------------------------------------------------------------ +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _, name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end +------------------------------------------------------------------------------ +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end +------------------------------------------------------------------------------ +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end +------------------------------------------------------------------------------ +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end +return _M +------------------------------------------------------------------------------ diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/run.sh luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/run.sh --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/run.sh 1969-12-31 18:00:00.000000000 -0600 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/run.sh 2021-10-05 12:36:23.413160484 -0500 @@ -0,0 +1,13 @@ +#!/bin/bash +# set -x + +# run test +lua ../dynasm.lua test_z_inst.c | gcc -DDASM_CHECKS -std=gnu99 -Wall -Werror -g -x c -o test_z_inst - +./test_z_inst +ec=$? + +# cleanup +rm -f ./test_z_inst + +# exit +exit $ec diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/test_z_inst.c luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/test_z_inst.c --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/dynasm/Examples/test_z_inst.c 1969-12-31 18:00:00.000000000 -0600 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/dynasm/Examples/test_z_inst.c 2021-10-05 12:36:23.449160674 -0500 @@ -0,0 +1,487 @@ +#include <assert.h> +#include <stdio.h> +#include <sys/mman.h> + +#include "../dasm_proto.h" +#include "../dasm_s390x.h" + +// DynASM directives. +|.arch s390x +|.actionlist actions +|.globals lab_ + +static void add(dasm_State *state) +{ + dasm_State ** Dst = &state; + + | ar r2,r3 + | br r14 +} + +/* +static void add_rrd(dasm_State *state) +{ + dasm_State **Dst = &state; + + | lgfi r4 , 0x02 + | maer r2 , r3 , r4 + | br r14 +} +*/ + +static void sub(dasm_State *state) +{ + dasm_State **Dst = &state; + + | sr r2,r3 + | br r14 +} + +static void mul(dasm_State *state) +{ + dasm_State **Dst = &state; + + | msr r2 , r3 + | br r14 +} + +static void rx(dasm_State *state) +{ + dasm_State **Dst = &state; + + int x = 1; + int y = 4095; + + | la r4, 4095(r2, r3) + | la r5, 4095(r4) + | la r1, x(r5) + | la r2, y(r1, r0) + | br r14 +} + +static void rxy(dasm_State *state) +{ + dasm_State **Dst = &state; + + int x = -524287; + int y = 524286; + + | lay r4, -524288(r2, r3) + | lay r5, 524287(r4) + | lay r1, x(r5) + | lay r2, y(r1, r0) + | br r14 +} + +static void lab(dasm_State *state) +{ + dasm_State **Dst = &state; + + // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3); + | la r1, 0(r0) + |1: + | agr r2, r2 + | la r1, 1(r1) + | cgr r1, r3 + | jl <1 + | br r14 +} + +static void labg(dasm_State *state) +{ + dasm_State **Dst = &state; + + // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3); + | la r1, 0(r0) + |1: + | agr r2, r2 + | la r1, 1(r1) + | cgr r1, r3 + | jgl <1 + | jgnl >1 + | stg r0, 0(r0) + |1: + | br r14 +} + +static void jmp_fwd(dasm_State *state) +{ + dasm_State **Dst = &state; + + // while(r2!=r3){r2 += 2}; + | j >1 + |1: + | cgr r2 , r3 + | jne >2 + | je >3 + |2: + | afi r2, 0x2 + | j <1 + |3: + | br r14 + +} + +static void add_imm16(dasm_State *state) +{ + dasm_State **Dst = &state; + + | ahi r2 , 0xf + | br r14 +} + +static void add_imm32(dasm_State *state) +{ + dasm_State **Dst = &state; + + | afi r2 , 0xe + | br r14 +} + +static void save(dasm_State *state) +{ + dasm_State **Dst = &state; + + |.define CFRAME_SPACE, 224 // Delta for sp, 8 byte aligned. + | + |// Register save area. + |.define SAVE_GPRS, 264(sp) // Save area for r6-r15 (10*8 bytes). + | + |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended). + |.define RESERVED, 232(sp) // Reserved for compiler use. + |.define BACKCHAIN, 224(sp) + | + |// Current stack frame. + |.define SAVE_FPR15, 216(sp) + |.define SAVE_FPR14, 208(sp) + |.define SAVE_FPR13, 200(sp) + |.define SAVE_FPR12, 192(sp) + |.define SAVE_FPR11, 184(sp) + |.define SAVE_FPR10, 176(sp) + |.define SAVE_FPR9, 168(sp) + |.define SAVE_FPR8, 160(sp) + | + |// Callee save area. + |.define CALLEESAVE, 000(sp) + | + |.macro saveregs + | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame. + | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless. + | std f8, SAVE_FPR8 // f8-f15 are callee-saved. + | std f9, SAVE_FPR9 + | std f10, SAVE_FPR10 + | std f11, SAVE_FPR11 + | std f12, SAVE_FPR12 + | std f13, SAVE_FPR13 + | std f14, SAVE_FPR14 + | std f15, SAVE_FPR15 + |.endmacro + | + |.macro restoreregs + | ld f8, SAVE_FPR8 // f8-f15 are callee-saved. + | ld f9, SAVE_FPR9 + | ld f10, SAVE_FPR10 + | ld f11, SAVE_FPR11 + | ld f12, SAVE_FPR12 + | ld f13, SAVE_FPR13 + | ld f14, SAVE_FPR14 + | ld f15, SAVE_FPR15 + | lmg r6, r15, SAVE_GPRS // Restores the stack pointer. + |.endmacro + | + | saveregs + | lgfi r7, 0x10 // 16 + | lgfi r8, 0x20 // 32 + | agr r2, r3 + | agr r7, r8 + | msgr r2, r7 + | restoreregs + | br r14 +} + +static void labmul(dasm_State *state) +{ + dasm_State **Dst = &state; + + // Multiply using an add function. + // Only correct if input is positive. + |->mul_func: + | stmg r6, r14, 48(sp) + | lgr r6, r2 + | lgr r7, r3 + | cgfi r7, 0 + | je >3 + | cgfi r7, 1 + | je >2 + |1: + | lgr r3, r6 + | brasl r14, ->add_func + | lay r7, -1(r7) + | cgfi r7, 1 + | jh <1 + |2: + | lmg r6, r14, 48(sp) + | br r14 + |3: + | la r2, 0(r0) + | j <2 + + |->add_func: + | agr r2, r3 + | br r14 +} + +static void pc(dasm_State *state) { + dasm_State **Dst = &state; + int MAX = 10; + dasm_growpc(Dst, MAX+1); + + | j =>MAX + for (int i = 0; i <= MAX; i++) { + |=>i: + if (i == 0) { + | br r14 + } else { + | aghi r2, i + | j =>i-1 + } + } +} + +/* +static void load_test(dasm_State *state) +{ + dasm_State **Dst = &state; + + | ltdr r2 , r3 + | br r14 +} +*/ + + +static void test_mask(dasm_State *state) +{ + dasm_State **Dst = &state; + + |lay sp , -8(sp) + |stg r2, 4(sp) + |tm 4(sp),0x04 + |je >2 + |jne >1 +|1: + |ar r2,r3 + |br r14 +|2: + |sr r2,r3 + |br r14 +} + +static void ssa(dasm_State *state) { + dasm_State **Dst = &state; + + | lay sp, -16(sp) + | lay r0, -1(r0) + | stg r0, 8(sp) + | xc 8(8, sp), 8(sp) + | stg r2, 0(sp) + | mvc 13(2, sp), 6(sp) + | lg r2, 8(sp) + | la sp, 16(sp) + | br r14 +} + +static void ssa_act(dasm_State *state) { + dasm_State **Dst = &state; + + int xl = 8; + int d1 = 13; + int l1 = 2; + int d2 = 6; + + | lay sp, -16(sp) + | lay r0, -1(r0) + | stg r0, 8(sp) + | xc 8(xl, sp), 8(sp) + | stg r2, 0(sp) + | mvc d1(l1, sp), d2(sp) + | lg r2, 8(sp) + | la sp, 16(sp) + | br r14 +} + +typedef struct { + int a; + int b; +} SimpleStruct; + +static void type(dasm_State *state) { + dasm_State **Dst = &state; + + | .type SIMPLE, SimpleStruct + | lay sp, -8(sp) + | stg r2, 0(sp) + | xgr r2, r2 + | l r2, SIMPLE:sp->b + | la sp, 8(sp) + | br r14 +} + +static void sil(dasm_State *state) { + dasm_State **Dst = &state; + + | lay sp, -16(sp) + | xc 0(16, sp), 0(sp) + | mvghi 0(sp), 5 + | mvhi 8(sp), 7 + | mvhhi 12(sp), 11 + | lghi r2, 0 + | ag r2, 0(sp) // r2 += 5 + | a r2, 8(sp) // r2 += 7 + | ah r2, 12(sp) // r2 += 11 + | la sp, 16(sp) + | br r14 +} + +static void rrfe_rrd(dasm_State *state) { + dasm_State ** Dst = &state; + + | cefbr f0,r2 + | cefbr f2,r3 + | cefbr f4,r4 + | maebr f0 ,f2 ,f4 + | cfebr r2, 0, f0 + | br r14 +} + +static void rre(dasm_State *state) { + + dasm_State **Dst = &state; + + | lay sp , -8(sp) + | cefbr f0 , r2 + | cefbr f1 , r3 + | fidr f0 , f1 + | cfebr r2 ,0,f0 + | la sp, 8(sp) + | br r14 +} + +static void rsb(dasm_State *state) { + dasm_State **Dst = &state; + + | lay sp, -4(sp) + | lghi r3, 0x0706 + | lghi r4, 0 + | iill r4, 6 + | iilh r4, 7 + | st r4, 0(sp) + | lghi r2, 0 + | clm r3, 5, 0(sp) + | jne >1 + | lghi r2, 1 + |1: + | la sp, 4(sp) + | br r14 +} + +static void sqrt_rxe(dasm_State *state) +{ + dasm_State **Dst = &state; + + | lay sp , -8(sp) + | cefbr f0 , r2 + | stdy f0 , 0(sp) + | sqeb f0 ,0(r4,sp) + | cfebr r2 ,0, f0 + | la sp, 8(sp) + | br r14 + +} + +static void rxf(dasm_State *state) { + dasm_State **Dst = &state; + + | lay sp , -8(sp) + | cegbra f1 ,0, r2,0 + | cegbra f2 ,0,r3,0 + | ste f2 ,0(sp) + | maeb f1, f2, 0(sp) + | cfebr r2 ,0, f1 + | la sp, 8(sp) + | br r14 + +} + +typedef struct { + int64_t arg1; + int64_t arg2; + int64_t arg3; + void (*fn)(dasm_State *); + int64_t want; + const char *testname; +} test_table; + +test_table test[] = { + { 1, 2, 0, add, 3, "add"}, + {10, 5, 0, sub, 5, "sub"}, + { 2, 3, 0, mul, 6, "mul"}, + { 5, 7, 0, rx, 12298, "rx"}, + { 5, 7, 0, rxy, 10, "rxy"}, + { 2, 4, 0, lab, 32, "lab"}, + { 2, 4, 0, labg, 32, "labg"}, + { 2, 0, 0, add_imm16, 17, "imm16"}, + { 2, 0, 0, add_imm32, 16, "imm32"}, + { 7, 3, 0, save, 480, "save"}, + { 7, 3, 0, labmul, 21, "labmul0"}, + { 7, 0, 0, labmul, 0, "labmul1"}, + { 0, 0, 0, pc, 55, "pc"}, + { 2,12, 0, jmp_fwd, 12, "jmp_fwd"}, +// { 9,8, 0, add_rrd, 25, "add_rrd"}, +// { 2,4, 0, load_test, 4,"load_test"}, + {-1, 0, 0, ssa, 65535<<8, "ssa"}, + {-1, 0, 0, ssa_act, 65535<<8, "ssa_act"}, + {27, 0, 0, type, 27, "type"}, + { 0, 0, 0, sil, 23, "sil"}, + {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"}, + { 0, 0, 0, rsb, 0, "rsb"}, + {12,10, 0, rre, 10, "rre"}, + {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"}, + {16,10, 0, rxf, 116, "rxf"}, + { 4, 3, 0, test_mask, 1,"test_mask"} +}; + +static void *jitcode(dasm_State **state, size_t *size) +{ + int dasm_status = dasm_link(state, size); + assert(dasm_status == DASM_S_OK); + + void *ret = mmap(0, *size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + dasm_encode(state, ret); + dasm_free(state); + + mprotect(ret, *size, PROT_READ | PROT_EXEC); + return (int *)ret; +} + +int main(int argc, char *argv[]) +{ + dasm_State *state; + for(int i = 0; i < sizeof(test)/sizeof(test[0]); i++) { + dasm_init(&state, 1); + void* labels[lab__MAX]; + dasm_setupglobal(&state, labels, lab__MAX); + dasm_setup(&state, actions); + test[i].fn(state); + size_t size; + int64_t (*fptr)(int64_t, int64_t, int64_t) = jitcode(&state, &size); + int64_t got = fptr(test[i].arg1, test[i].arg2, test[i].arg3); + + if (got != test[i].want) { + fprintf(stderr, "FAIL: test %s: want %ld, got %ld\n", test[i].testname, test[i].want, got); + exit(1); + } + munmap(fptr, size); + } + printf("all tests passed\n"); + return 0; +} diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm_asm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm_asm.c --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm_asm.c 2021-10-05 12:28:22.786623190 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm_asm.c 2021-10-05 12:36:23.421160526 -0500 @@ -87,6 +87,54 @@ err: } fprintf(ctx->fp, "\t%s %s\n", opname, sym); } +#elif LJ_TARGET_S390X +/* Emit halfwords piecewise as assembler text. */ +static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n) +{ + uint16_t *cp = (uint16_t*)p; + n /= 2; + int i; + for (i = 0; i < n; i++) { + if ((i & 7) == 0) + fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]); + else + fprintf(ctx->fp, ",0x%hx", cp[i]); + if ((i & 7) == 7) putc('\n', ctx->fp); + } + if ((n & 7) != 0) putc('\n', ctx->fp); +} + +/* Emit s390x text relocations. */ +static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n, + const char *sym) +{ + if (n & 1 || n < 2) { + fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n); + exit(1); + } + n -= 2; + const char *opname = NULL; + const char *argt = ""; /* Inserted before argument. */ + int opcode = *(uint16_t*)(&cp[n]); + int arg = (opcode>>4) & 0xf; + switch (opcode & 0xff0f) { + case 0xa705: opname = "bras"; argt = "%r"; break; + case 0xc005: opname = "brasl"; argt = "%r"; break; + case 0xa704: opname = "brc"; break; + case 0xc004: opname = "brcl"; break; + default: + fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n", + sym); + exit(1); + } + emit_asm_halfwords(ctx, cp, n); + if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) { + /* Various fixups for external symbols outside of our binary. */ + fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym); + return; + } + fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym); +} #else /* Emit words piecewise as assembler text. */ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) @@ -303,6 +351,9 @@ void emit_asm(BuildCtx *ctx) emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); } ofs += n+4; +#elif LJ_TARGET_S390X + emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); + ofs += n+4; #else emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); ofs += n; @@ -311,6 +362,8 @@ void emit_asm(BuildCtx *ctx) } #if LJ_TARGET_X86ORX64 emit_asm_bytes(ctx, ctx->code+ofs, next-ofs); +#elif LJ_TARGET_S390X + emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs); #else emit_asm_words(ctx, ctx->code+ofs, next-ofs); #endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm.c --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/host/buildvm.c 2021-06-25 05:53:23.000000000 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/host/buildvm.c 2021-10-05 12:36:23.377160294 -0500 @@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, #include "../dynasm/dasm_ppc.h" #elif LJ_TARGET_MIPS #include "../dynasm/dasm_mips.h" +#elif LJ_TARGET_S390X +#include "../dynasm/dasm_s390x.h" #else #error "No support for this architecture (yet)" #endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/jit/dis_s390x.lua luajit-2.1.0~beta3+git.1624618403.e9577376/src/jit/dis_s390x.lua --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/jit/dis_s390x.lua 1969-12-31 18:00:00.000000000 -0600 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/jit/dis_s390x.lua 2021-10-05 12:36:23.445160653 -0500 @@ -0,0 +1 @@ +-- Not yet implemented. diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lib_jit.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lib_jit.c --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lib_jit.c 2021-06-25 05:53:23.000000000 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lib_jit.c 2021-10-05 12:58:31.960174477 -0500 @@ -718,6 +718,8 @@ static uint32_t jit_cpudetect(void) if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif +#elif LJ_TARGET_S390X + /* No optional CPU features to detect (for now). */ #else #error "Missing CPU detection for this architecture" diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_arch.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_arch.h --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_arch.h 2021-10-05 12:28:22.786623190 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_arch.h 2021-10-05 12:36:23.457160716 -0500 @@ -31,6 +31,8 @@ #define LUAJIT_ARCH_mips32 6 #define LUAJIT_ARCH_MIPS64 7 #define LUAJIT_ARCH_mips64 7 +#define LUAJIT_ARCH_S390X 8 +#define LUAJIT_ARCH_s390x 8 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -59,6 +61,8 @@ #define LUAJIT_TARGET LUAJIT_ARCH_ARM #elif defined(__aarch64__) #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 +#elif defined(__s390x__) || defined(__s390x) +#define LUAJIT_TARGET LUAJIT_ARCH_S390X #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #define LUAJIT_TARGET LUAJIT_ARCH_PPC #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) @@ -428,6 +432,21 @@ #define LJ_ARCH_VERSION 10 #endif +#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X + +#define LJ_ARCH_NAME "s390x" +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN LUAJIT_BE +#define LJ_TARGET_S390X 1 +#define LJ_TARGET_EHRETREG 0xe +#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */ +#define LJ_TARGET_MASKSHIFT 1 +#define LJ_TARGET_MASKROT 1 +#define LJ_TARGET_UNALIGNED 1 +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#define LJ_TARGET_GC64 1 +#define LJ_ARCH_NOJIT 1 /* NYI */ + #else #error "No target architecture defined" #endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_asm.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_asm.c --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_asm.c 2021-06-25 05:53:23.000000000 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_asm.c 2021-10-05 12:36:23.389160358 -0500 @@ -1642,6 +1642,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS #include "lj_asm_mips.h" +#elif LJ_TARGET_S390X +#include "lj_asm_s390x.h" #else #error "Missing assembler for target CPU" #endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccallback.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccallback.c --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccallback.c 2021-10-25 16:52:59.684087182 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccallback.c 2021-10-25 15:40:46.949027757 -0500 @@ -566,6 +566,15 @@ void lj_ccallback_mcode_free(CTState *ct if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ((float *)dp)[1] = *(float *)dp; +#elif LJ_TARGET_S390X + +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \ + } else { \ + if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.c --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.c 2021-10-05 12:28:22.790623212 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.c 2021-10-05 12:36:23.449160674 -0500 @@ -653,6 +653,40 @@ goto done; \ } +#elif LJ_TARGET_S390X +/* -- POSIX/s390x calling conventions --------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = 1; /* Return all structs by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET \ + cc->retref = 1; /* Return all complex values by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET2 \ + UNUSED(dp); /* Nothing to do. */ + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \ + if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex numbers by reference. */ \ + /* TODO: not sure why this is different to structs. */ \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + +#define CCALL_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \ + } else { \ + if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -1111,7 +1145,7 @@ static int ccall_set_args(lua_State *L, CTypeID did; CType *d; CTSize sz; - MSize n, isfp = 0, isva = 0; + MSize n, isfp = 0, isva = 0, onstack = 0; void *dp, *rp = NULL; #if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 int isf32 = 0; @@ -1154,6 +1188,7 @@ static int ccall_set_args(lua_State *L, CCALL_HANDLE_REGARG /* Handle register arguments. */ /* Otherwise pass argument on stack. */ + onstack = 1; if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; nsp = (nsp + align) & ~align; /* Align argument on stack. */ @@ -1234,6 +1269,16 @@ static int ccall_set_args(lua_State *L, *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ } #endif +#if LJ_TARGET_S390X + /* Arguments need to be sign-/zero-extended to 64-bits. */ + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || + (isfp && onstack)) && d->size <= 4) { + if (d->info & CTF_UNSIGNED || isfp) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_X64 && LJ_ABI_WIN if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ if (nfpr == ngpr) diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.h --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_ccall.h 2021-10-05 12:28:22.790623212 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_ccall.h 2021-10-05 12:36:23.445160653 -0500 @@ -139,6 +139,21 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; +#elif LJ_TARGET_S390X + +#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */ +#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */ +#define CCALL_NRET_GPR 1 /* GPR 2 */ +#define CCALL_NRET_FPR 1 /* FPR 0 */ +#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */ +#define CCALL_SPS_FREE 0 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + float f; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_err.c luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_err.c --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_err.c 2021-06-25 05:53:23.000000000 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_err.c 2021-10-05 12:36:23.457160716 -0500 @@ -419,6 +419,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int ver if (version != 1) return _URC_FATAL_PHASE1_ERROR; cf = (void *)_Unwind_GetCFA(ctx); +#ifdef LJ_TARGET_S390X + cf -= 160; /* CFA points 160 bytes above r15. */ +#endif L = cframe_L(cf); if ((actions & _UA_SEARCH_PHASE)) { #if LJ_UNWIND_EXT diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_frame.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_frame.h --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_frame.h 2021-10-05 12:28:22.794623233 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_frame.h 2021-10-05 12:36:23.453160695 -0500 @@ -273,6 +273,20 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL #endif #define CFRAME_OFS_MULTRES 0 #define CFRAME_SHIFT_MULTRES 3 +#elif LJ_TARGET_S390X +#define CFRAME_OFS_ERRF 280 +#define CFRAME_OFS_NRES 272 +#define CFRAME_OFS_PREV 264 +#define CFRAME_OFS_L 256 +#define CFRAME_OFS_PC 168 +#define CFRAME_OFS_MULTRES 160 +#define CFRAME_SIZE 240 +/* +** TODO: it would be good if we always decoded param*8 like +** the RISC architectures do. If so then SHIFT_MULTRES will +** need to change to 3. +*/ +#define CFRAME_SHIFT_MULTRES 0 #else #error "Missing CFRAME_* definitions for this architecture" #endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target.h --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target.h 2021-06-25 05:53:23.000000000 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target.h 2021-10-05 12:36:23.445160653 -0500 @@ -144,6 +144,8 @@ typedef uint32_t RegCost; #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS #include "lj_target_mips.h" +#elif LJ_TARGET_S390X +#include "lj_target_s390x.h" #else #error "Missing include for target CPU" #endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target_s390x.h luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target_s390x.h --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/lj_target_s390x.h 1969-12-31 18:00:00.000000000 -0600 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/lj_target_s390x.h 2021-10-05 12:36:23.445160653 -0500 @@ -0,0 +1,80 @@ +/* +** Definitions for IBM z/Architecture (s390x) CPUs. +** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_S390X_H +#define _LJ_TARGET_S390X_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#define GPRDEF(_) \ + _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ + _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) +#define FPRDEF(_) \ + _(F0) _(F1) _(F2) _(F3) \ + _(F4) _(F5) _(F6) _(F7) \ + _(F8) _(F9) _(F10) _(F11) \ + _(F12) _(F13) _(F14) _(F15) +// TODO: VREG? + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + + /* Calling conventions. */ + RID_SP = RID_R15, + RID_RET = RID_R2, + RID_FPRET = RID_F0, + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_R7, /* Interpreter BASE. */ + RID_LPC = RID_R9, /* Interpreter PC. */ + RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_R0, + RID_MIN_FPR = RID_F0, + RID_MAX_GPR = RID_MIN_FPR, + RID_MAX_FPR = RID_MAX, + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, +}; + +/* -- Register sets ------------------------------------------------------- */ + +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the *.dasc file(s). +** +** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. +*/ +#define SPS_FIXED 2 +#define SPS_FIRST 2 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +#define EXITSTUB_SPACING 4 +#define EXITSTUBS_PER_GROUP 32 + +/* -- Instructions -------------------------------------------------------- */ + +#endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/Makefile luajit-2.1.0~beta3+git.1624618403.e9577376/src/Makefile --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/Makefile 2021-10-05 12:28:22.786623190 -0500 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/Makefile 2021-10-05 13:54:29.633902276 -0500 @@ -245,6 +245,9 @@ else ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) TARGET_LJARCH= arm else +ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH))) + TARGET_LJARCH= s390x +else ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) TARGET_ARCH= -D__AARCH64EB__=1 @@ -273,6 +276,7 @@ else endif endif endif +endif endif endif endif diff -urpN luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/vm_s390x.dasc luajit-2.1.0~beta3+git.1624618403.e9577376/src/vm_s390x.dasc --- luajit-2.1.0~beta3+git.1624618403.e9577376.orig/src/vm_s390x.dasc 1969-12-31 18:00:00.000000000 -0600 +++ luajit-2.1.0~beta3+git.1624618403.e9577376/src/vm_s390x.dasc 2021-10-25 16:52:46.620017650 -0500 @@ -0,0 +1,4261 @@ +|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h +| +|// This assembly targets the instruction set available on z10 (and newer) +|// machines. +| +|// ELF ABI registers: +|// r0,r1 | | volatile | +|// r2 | parameter and return value | volatile | +|// r3-r5 | parameter | volatile | +|// r6 | parameter | saved | +|// r7-r11 | | saved | +|// r12 | GOT pointer (needed?) | saved | +|// r13 | literal pool (not needed) | saved | +|// r14 | return address | volatile | +|// r15 | stack pointer | saved | +|// f0,f2,f4,f6 | parameter and return value | volatile | +|// f1,f3,f5,f7 | | volatile | +|// f8-f15 | | saved | +|// ar0,ar1 | TLS | volatile | +|// ar2-ar15 | | volatile | +| +|.arch s390x +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter, callee-saved. +|.define KBASE, r8 // Constants of current Lua function. +|.define PC, r9 // Next PC. +|.define DISPATCH, r10 // Opcode dispatch table. +|.define ITYPE, r11 // Temporary used for type information. +|.define BASE, r13 // Base of current Lua stack frame. +| +|// The following temporaries are not saved across C calls, except for RB. +|.define RA, r4 // Overlaps CARG3. +|.define RB, r7 // Must be callee-save. +|.define RC, r5 // Overlaps CARG4. +|.define RD, r6 // Overlaps CARG5. +| +|// Calling conventions. Also used as temporaries. +|.define CARG1, r2 +|.define CARG2, r3 +|.define CARG3, r4 +|.define CARG4, r5 +|.define CARG5, r6 +| +|.define FARG1, f0 +|.define FARG2, f2 +|.define FARG3, f4 +|.define FARG4, f6 +| +|.define CRET1, r2 +| +|.define TMPR0, r0 +|.define TMPR1, r1 +|.define OP, r2 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned. +| +|// Register save area. +|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes). +|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before stack frame is allocated). +| +|// Argument save area. +|.define SAVE_ERRF, 280(sp) // Argument 4, in r5. +|.define SAVE_NRES, 272(sp) // Argument 3, in r4. Size is 4-bytes. +|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3. +|.define SAVE_L, 256(sp) // Argument 1, in r2. +|.define RESERVED, 248(sp) // Reserved for compiler use. +|.define BACKCHAIN, 240(sp) // <- sp entering interpreter. +| +|// Interpreter stack frame. +|.define SAVE_FPR15, 232(sp) +|.define SAVE_FPR14, 224(sp) +|.define SAVE_FPR13, 216(sp) +|.define SAVE_FPR12, 208(sp) +|.define SAVE_FPR11, 200(sp) +|.define SAVE_FPR10, 192(sp) +|.define SAVE_FPR9, 184(sp) +|.define SAVE_FPR8, 176(sp) +|.define SAVE_PC, 168(sp) +|.define SAVE_MULTRES, 160(sp) +|.define SAVE_TMP, 160(sp) // Overlaps SAVE_MULTRES +|.define SAVE_TMP_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES). +| +|// Callee save area (allocated by interpreter). +|.define CALLEESAVE, 000(sp) // <- sp in interpreter. +| +|.macro saveregs +| stmg r6, r15, SAVE_GPRS_P +| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame. +| std f8, SAVE_FPR8 // f8-f15 are callee-saved. +| std f9, SAVE_FPR9 +| std f10, SAVE_FPR10 +| std f11, SAVE_FPR11 +| std f12, SAVE_FPR12 +| std f13, SAVE_FPR13 +| std f14, SAVE_FPR14 +| std f15, SAVE_FPR15 +|.endmacro +| +|.macro restoreregs +| ld f8, SAVE_FPR8 // f8-f15 are callee-saved. +| ld f9, SAVE_FPR9 +| ld f10, SAVE_FPR10 +| ld f11, SAVE_FPR11 +| ld f12, SAVE_FPR12 +| ld f13, SAVE_FPR13 +| ld f14, SAVE_FPR14 +| ld f15, SAVE_FPR15 +| lmg r6, r15, SAVE_GPRS // Restores the stack pointer. +|.endmacro +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Instruction headers. +|.macro ins_A; .endmacro +|.macro ins_AD; .endmacro +|.macro ins_AJ; .endmacro +|.macro ins_ABC; srlg RB, RD, 8; llgcr RC, RD; .endmacro +|.macro ins_AB_; srlg RB, RD, 8; .endmacro +|.macro ins_A_C; llgcr RC, RD; .endmacro +|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD +| +|// Instruction decode+dispatch. +|.macro ins_NEXT +| llgc OP, 3(PC) +| llgh RD, 0(PC) +| llgc RA, 2(PC) +| sllg TMPR1, OP, 3 +| lg TMPR1, 0(TMPR1, DISPATCH) +| la PC, 4(PC) +| br TMPR1 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| .macro ins_next +| j ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC +| lg PC, LFUNC:RB->pc +| llgc OP, 3(PC) +| llgc RA, 2(PC) +| sllg TMPR1, OP, 3 +| la PC, 4(PC) +| lg TMPR1, 0(TMPR1, DISPATCH) +| br TMPR1 +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC, RD = nargs+1 +| stg PC, -8(BASE) +| ins_callt +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|//----------------------------------------------------------------------- +| +|// Macros to clear or set tags. +|.macro cleartp, reg +| nihf reg, 0x7fff +|.endmacro +|.macro settp, reg, tp +| oihf reg, tp<<15 +|.endmacro +|.macro settp, dst, reg, tp +| llihf dst, tp<<15 +| ogr dst, reg +|.endmacro +|.macro setint, reg +| settp reg, LJ_TISNUM +|.endmacro +|.macro setint, dst, reg +| settp dst, reg, LJ_TISNUM +|.endmacro +| +|// Macros to test operand types. +|.macro checktp_nc, reg, tp, target +| srag ITYPE, reg, 47 +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checktp, reg, tp, target +| srag ITYPE, reg, 47 +| cleartp reg +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checktptp, src, tp, target +| srag ITYPE, src, 47 +| clfi ITYPE, tp +| jne target +|.endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro +| +|.macro checknumx, reg, target, jump +| srag ITYPE, reg, 47 +| clfi ITYPE, LJ_TISNUM +| jump target +|.endmacro +|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro +|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro +|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro +|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro +|.macro checknumber, src, target; checknumx src, target, jh; .endmacro +| +|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE == ~(1<<47) +|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE == ~(2<<47) +| +|.define PC_OP, -1(PC) +|.define PC_RA, -2(PC) +|.define PC_RB, -4(PC) +|.define PC_RC, -3(PC) +|.define PC_RD, -4(PC) +| +|.macro branchPC, reg +| // Must not clobber condition code. +| sllg TMPR1, reg, 2 +| lay PC, (-BCBIAS_J*4)(TMPR1, PC) +|.endmacro +| +|// Set current VM state. +|.macro set_vmstate, st +| lghi TMPR1, ~LJ_VMST_..st +| stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH) +|.endmacro +| +|// Synthesize binary floating-point constants. +|.macro bfpconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. +| llihh tmp, 0x4338 +| ldgr reg, tmp +|.endmacro +| +|// Move table write barrier back. Overwrites reg. +|.macro barrierback, tab, reg +| ni tab->marked, ~LJ_GC_BLACK // black2gray(tab) +| lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH) +| stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH) +| stg reg, tab->gclist +|.endmacro + +#if !LJ_DUALNUM +#error "Only dual-number mode supported for s390x target" +#endif + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | tmll PC, FRAME_P + | je ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + | nill PC, -8 + | sgr BASE, PC // Restore caller base. + | lay RA, -8(RA, PC) // Rebase RA and prepend one result. + | lg PC, -8(BASE) // Fetch PC of previous frame. + | // Prepending may overwrite the pcall frame, so do it at the end. + | load_true ITYPE + | stg ITYPE, 0(RA, BASE) // Prepend true to results. + | + |->vm_returnc: + | aghi RD, 1 // RD = nresults+1 + | je ->vm_unwind_yield + | st RD, SAVE_MULTRES + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return + | lghi TMPR1, FRAME_C + | xgr PC, TMPR1 + | tmll PC, FRAME_TYPE + | jne ->vm_returnp + | + | // Return to C. + | set_vmstate C + | nill PC, -8 + | sgr PC, BASE + | lcgr PC, PC // Previous base = BASE - delta. + | + | aghi RD, -1 + | je >2 + |1: // Move results down. + | lg RB, 0(BASE, RA) + | stg RB, -16(BASE) + | la BASE, 8(BASE) + | aghi RD, -1 + | jne <1 + |2: + | lg L:RB, SAVE_L + | stg PC, L:RB->base + |3: + | llgf RD, SAVE_MULTRES + | lgf RA, SAVE_NRES // RA = wanted nresults+1 + |4: + | cgr RA, RD + | jne >6 // More/less results wanted? + |5: + | lay BASE, -16(BASE) + | stg BASE, L:RB->top + | + |->vm_leave_cp: + | lg RA, SAVE_CFRAME // Restore previous C frame. + | stg RA, L:RB->cframe + | lghi CRET1, 0 // Ok return status for vm_pcall. + | + |->vm_leave_unw: + | restoreregs + | br r14 + | + |6: + | jl >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | cg BASE, L:RB->maxstack + | jh >8 + | lghi TMPR1, LJ_TNIL + | stg TMPR1, -16(BASE) + | la BASE, 8(BASE) + | aghi RD, 1 + | j <4 + | + |7: // Fewer results wanted. + | cghi RA, 0 + | je <5 // But check for LUA_MULTRET+1. + | sgr RA, RD // Negative result! + | sllg TMPR1, RA, 3 + | la BASE, 0(TMPR1, BASE) // Correct top. + | j <5 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | stg BASE, L:RB->top // Save current top held in BASE (yes). + | st RD, SAVE_MULTRES // Need to fill only remainder with nil. + | lgr CARG2, RA + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->top // Need the (realloced) L->top in BASE. + | j <3 + | + |->vm_unwind_yield: + | lghi CRET1, LUA_YIELD + | j ->vm_unwind_c_eh + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | lgr sp, CARG1 + | lgfr CARG2, CRET1 // Error return status for vm_pcall. + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | lg L:RB, SAVE_L + | lg GL:RB, L:RB->glref + | lghi TMPR1, ~LJ_VMST_C + | stg TMPR1, GL:RB->vmstate + | j ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK. + | lgr sp, CARG1 + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | lg L:RB, SAVE_L + | lghi RD, 1+1 // Really 1+2 results, incr. later. + | lg BASE, L:RB->base + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | la DISPATCH, GG_G2DISP(DISPATCH) + | lg PC, -8(BASE) // Fetch PC of previous frame. + | load_false RA + | lg RB, 0(BASE) + | stg RA, -16(BASE) // Prepend false to error message. + | stg RB, -8(BASE) + | lghi RA, -16 // Results start at BASE+RA = BASE-16. + | set_vmstate INTERP + | j ->vm_returnc // Increments RD/MULTRES and returns. + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | lghi CARG2, LUA_MINSTACK + | j >2 + | + |->vm_growstack_v: // Grow stack for vararg Lua function. + | aghi RD, -16 // LJ_FR2 + | j >1 + | + |->vm_growstack_f: // Grow stack for fixarg Lua function. + | // BASE = new base, RD = nargs+1, RB = L, PC = first PC + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + |1: + | llgc RA, (PC2PROTO(framesize)-4)(PC) + | la PC, 4(PC) // Must point after first instruction. + | stg BASE, L:RB->base + | stg RD, L:RB->top + | stg PC, SAVE_PC + | lgr CARG2, RA + |2: + | // RB = L, L->base = new base, L->top = top + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lg RD, L:RB->top + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | sgr RD, BASE + | srlg RD, RD, 3 + | aghi NARGS:RD, 1 + | // BASE = new base, RB = LFUNC, RD = nargs+1 + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | lgr L:RB, CARG1 + | stg CARG1, SAVE_L + | lgr RA, CARG2 + | lghi PC, FRAME_CP + | lghi RD, 0 + | la KBASE, CFRAME_RESUME(sp) + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | aghi DISPATCH, GG_G2DISP + | stg RD, SAVE_PC // Any value outside of bytecode is ok. + | stg RD, SAVE_CFRAME + | st RD, SAVE_NRES + | stg RD, SAVE_ERRF + | stg KBASE, L:RB->cframe + | clm RD, 1, L:RB->status + | je >2 // Initial resume (like a call). + | + | // Resume after yield (like a return). + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | stc RD, L:RB->status + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, RA + | srlg RD, RD, 3 + | aghi RD, 1 // RD = nresults+1 + | sgr RA, BASE // RA = resultofs + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z + | j ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | lghi PC, FRAME_CP + | llgfr CARG4, CARG4 + | stg CARG4, SAVE_ERRF + | j >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | lghi PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | st CARG3, SAVE_NRES + | lgr L:RB, CARG1 + | stg CARG1, SAVE_L + | lgr RA, CARG2 // Caveat: RA = CARG3. + | + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | lg KBASE, L:RB->cframe // Add our C frame to cframe chain. + | stg KBASE, SAVE_CFRAME + | stg L:RB, SAVE_PC // Any value outside of bytecode is ok. + | aghi DISPATCH, GG_G2DISP + | stg sp, L:RB->cframe + | + |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). + | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH) + | set_vmstate INTERP + | lg BASE, L:RB->base // BASE = old base (used in vmeta_call). + | agr PC, RA + | sgr PC, BASE // PC = frame delta + frame type + | + | lg RD, L:RB->top + | sgr RD, RA + | srlg NARGS:RD, NARGS:RD, 3 + | aghi NARGS:RD, 1 // RD = nargs+1 + | + |->vm_call_dispatch: + | lg LFUNC:RB, -16(RA) + | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. + | + |->vm_call_dispatch_f: + | lgr BASE, RA + | ins_call + | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | lgr L:RB, CARG1 + | stg L:RB, SAVE_L + | stg L:RB, SAVE_PC // Any value outside of bytecode is ok. + | + | lg KBASE, L:RB->stack // Compute -savestack(L, L->top). + | sg KBASE, L:RB->top + | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | lghi TMPR0, 0 + | stg TMPR0, SAVE_ERRF // No error function. + | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame. + | aghi DISPATCH, GG_G2DISP + | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). + | + | lg KBASE, L:RB->cframe // Add our C frame to cframe chain. + | stg KBASE, SAVE_CFRAME + | stg sp, L:RB->cframe + | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH) + | + | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | // TValue * (new base) or NULL returned in r2 (CRET1/). + | cghi CRET1, 0 + | je ->vm_leave_cp // No base? Just remove C frame. + | lgr RA, CRET1 + | lghi PC, FRAME_CP + | j <2 // Else continue with the call. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) + | agr RA, BASE + | nill PC, -8 + | lgr RB, BASE + | sgr BASE, PC // Restore caller BASE. + | sllg TMPR1, RD, 3 + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -8(RA, TMPR1) // Ensure one valid arg. + | lgr RC, RA // ... in [RC] + | lg PC, -24(RB) // Restore PC from [cont|PC]. + | lg RA, -32(RB) + |.if FFI + | clfi RA, 1 + | jle >1 + |.endif + | lg LFUNC:KBASE, -16(BASE) + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, (PC2PROTO(k))(KBASE) + | // BASE = base, RC = result, RB = meta base + | br RA // Jump to continuation. + | + |.if FFI + |1: + | je ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: Tail call from C function. + | sgr RB, BASE + | srl RB, 3 + | ahi RB, -3 + | llgfr RD, RB + | j ->vm_call_tail + |.endif + | + |->cont_cat: // BASE = base, RC = result, RB = mbase + | llgc RA, PC_RB + | sllg RA, RA, 3 + | aghi RB, -32 + | la RA, 0(RA, BASE) + | sgr RA, RB + | je ->cont_ra + | lcgr RA, RA + | srlg RA, RA, 3 + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgfr CARG3, RA // Caveat: RA == CARG3. + | lg TMPR0, 0(RC) + | stg TMPR0, 0(RB) + | lgr CARG2, RB + | j ->BC_CAT_Z + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | stg STR:RC, SAVE_TMP + | la RC, SAVE_TMP + | llgc TMPR1, PC_OP + | cghi TMPR1, BC_GGET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv. + | stg TAB:RA, 0(RB) + | j >2 + | + |->vmeta_tgetb: + | llgc RC, PC_RC + | setint RC + | stg RC, SAVE_TMP + | la RC, SAVE_TMP + | j >1 + | + |->vmeta_tgetv: + | llgc RC, PC_RC // Reload TValue *k from RC. + | sllg RC, RC, 3 + | la RC, 0(RC, BASE) + |1: + | llgc RB, PC_RB // Reload TValue *t from RB. + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + |2: + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, RB + | lgr CARG3, RC + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | je >3 + |->cont_ra: // BASE = base, RC = result + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RB, 0(RC) + | stg RB, 0(RA, BASE) + | ins_next + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | lg RA, L:RB->top + | stg PC, -24(RA) // [cont|PC] + | la PC, FRAME_CONT(RA) + | sgr PC, BASE + | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here. + | lghi NARGS:RD, 2+1 // 2 args for func(t, k). + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | lgr CARG1, TAB:RB + | lgfr CARG2, RC + | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in r2 (CRET1). + | llgc RA, PC_RA + | ltgr RC, CRET1 + | jne ->BC_TGETR_Z + | lghi ITYPE, LJ_TNIL + | j ->BC_TGETR2_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | stg STR:RC, SAVE_TMP + | la RC, SAVE_TMP + | llgc TMPR0, PC_OP + | cghi TMPR0, BC_GSET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv. + | stg TAB:RA, 0(RB) + | j >2 + | + |->vmeta_tsetb: + | llgc RC, PC_RC + | setint RC + | stg RC, SAVE_TMP + | la RC, SAVE_TMP + | j >1 + | + |->vmeta_tsetv: + | llgc RC, PC_RC // Reload TValue *k from RC. + | sllg RC, RC, 3 + | la RC, 0(RC, BASE) + |1: + | llgc RB, PC_RB // Reload TValue *t from RB. + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + |2: + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, RB + | lgr CARG3, RC + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | je >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | stg RB, 0(RC) + |->cont_nop: // BASE = base, (RC = result) + | ins_next + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | lg RA, L:RB->top + | stg PC, -24(RA) // [cont|PC] + | llgc RC, PC_RA + | // Copy value to third argument. + | sllg RB, RC, 3 + | lg RB, 0(RB, BASE) + | stg RB, 16(RA) + | la PC, FRAME_CONT(RA) + | sgr PC, BASE + | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here. + | lghi NARGS:RD, 3+1 // 3 args for func(t, k, v). + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | lg L:CARG1, SAVE_L + | lgr CARG2, TAB:RB + | stg BASE, L:CARG1->base + | lgfr CARG3, RC + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // TValue * returned in r2 (CRET1). + | lgr RC, CRET1 + | llgc RA, PC_RA + | j ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | la CARG2, 0(RA, BASE) + | la CARG3, 0(RD, BASE) // Caveat: RA == CARG3 + | lgr CARG1, L:RB + | llgc CARG4, PC_OP + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + |3: + | lgr RC, CRET1 + | lg BASE, L:RB->base + | clgfi RC, 1 + | jh ->vmeta_binop + |4: + | la PC, 4(PC) + | jl >6 + |5: + | llgh RD, PC_RD + | branchPC RD + |6: + | ins_next + | + |->cont_condt: // BASE = base, RC = result + | la PC, 4(PC) + | lg ITYPE, 0(RC) + | srag ITYPE, ITYPE, 47 + | lghi TMPR0, LJ_TISTRUECOND + | clr ITYPE, TMPR0 // Branch if result is true. + | jl <5 + | j <6 + | + |->cont_condf: // BASE = base, RC = result + | lg ITYPE, 0(RC) + | srag ITYPE, ITYPE, 47 + | lghi TMPR0, LJ_TISTRUECOND + | clr ITYPE, TMPR0 // Branch if result is false. + | j <4 + | + |->vmeta_equal: + | cleartp TAB:RD + | lay PC, -4(PC) + | lgr CARG2, RA + | lgfr CARG4, RB + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG3, RD + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + | j <3 + | + |->vmeta_equal_cd: + |.if FFI + | lay PC, -4(PC) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG1, L:RB + | llgf CARG2, -4(PC) + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins) + | // 0/1 or TValue * (metamethod) returned in r2 (CRET1). + | j <3 + |.endif + | + |->vmeta_istype: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | llgfr CARG2, RA + | llgfr CARG3, RD // Caveat: CARG3 == RA. + | lgr L:CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | lg BASE, L:RB->base + | j <6 + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vno: + | llgc RB, PC_RB + | llgc RC, PC_RC + |->vmeta_arith_vn: + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | la RB, 0(RB, BASE) + | la RC, 0(RC, KBASE) + | j >1 + | + |->vmeta_arith_nvo: + | llgc RC, PC_RC + | llgc RB, PC_RB + |->vmeta_arith_nv: + | sllg RC, RC, 3 + | sllg RB, RB, 3 + | la TMPR1, 0(RC, KBASE) + | la RC, 0(RB, BASE) + | lgr RB, TMPR1 + | j >1 + | + |->vmeta_unm: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | la RC, 0(RD, BASE) + | lgr RB, RC + | j >1 + | + |->vmeta_arith_vvo: + | llgc RB, PC_RB + | llgc RC, PC_RC + |->vmeta_arith_vv: + | sllg RC, RC, 3 + | sllg RB, RB, 3 + | la RB, 0(RB, BASE) + | la RC, 0(RC, BASE) + |1: + | llgc RA, PC_RA + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | llgc CARG5, PC_OP // Caveat: CARG5 == RD. + | lgr CARG2, RA + | lgr CARG3, RB // Caveat: CARG3 == RA. + | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out). + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | cghi CRET1, 0 + | lgr RC, CRET1 + | je ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = base, RC = new base, stack = cont/func/o1/o2 + | lgr RA, RC + | sgr RC, BASE + | stg PC, -24(RA) // [cont|PC] + | la PC, FRAME_CONT(RC) + | lghi NARGS:RD, 2+1 // 2 args for func(o1, o2). + | j ->vm_call_dispatch + | + |->vmeta_len: + | llgh RD, PC_RD + | sllg RD, RD, 3 + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | la CARG2, 0(RD, BASE) + | lgr L:CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_len // (lua_State *L, TValue *o) + | // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1). + | lgr RC, CRET1 + | lg BASE, L:RB->base +#if LJ_52 + | cghi RC, 0 + | jne ->vmeta_binop // Binop call for compatibility. + | llgh RD, PC_RD + | sllg RD, RD, 3 + | lg TAB:CARG1, 0(RD, BASE) + | cleartp TAB:CARG1 + | j ->BC_LEN_Z +#else + | j ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call_ra: + | la RA, 16(RA, BASE) // RA previously set to RA*8. + |->vmeta_call: // Resolve and call __call metamethod. + | // BASE = old base, RA = new base, RC = nargs+1, PC = return + | stg NARGS:RD, SAVE_TMP // Save RA, RC for us (not sure about this). + | lgr RB, RA + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lay CARG2, -16(RA) + | sllg RD, RD, 3 + | lay CARG3, -8(RA, RD) // Caveat: CARG3 == RA. + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | lgr RA, RB + | lg L:RB, SAVE_L + | lg BASE, L:RB->base + | lg NARGS:RD, SAVE_TMP + | lg LFUNC:RB, -16(RA) + | aghi NARGS:RD, 1 // 32-bit on x64. + | // This is fragile. L->base must not move, KBASE must always be defined. + | cgr KBASE, BASE // Continue with CALLT if flag set. + | je ->BC_CALLT_Z + | cleartp LFUNC:RB + | lgr BASE, RA + | ins_call // Otherwise call resolved metamethod. + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG2, RA + | lgr CARG1, RB + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_for // (lua_State *L, TValue *base) + | lg BASE, L:RB->base + | llgc OP, PC_OP + | llgc RA, PC_RA + | llgh RD, PC_RD + | sllg TMPR1, OP, 3 + | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI. + | br TMPR1 + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | clfi NARGS:RD, 1+1; jl ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | clfi NARGS:RD, 2+1; jl ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name, op + | .ffunc_1 name + | lg TMPR0, 0(BASE) + | checknumtp TMPR0, ->fff_fallback + | op f0, 0(BASE) + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc_n name, ld + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc_2 name + | lg TMPR1, 0(BASE) + | lg TMPR0, 8(BASE) + | ld FARG1, 0(BASE) + | ld FARG2, 8(BASE) + | checknumtp TMPR1, ->fff_fallback + | checknumtp TMPR0, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses label 1. + |.macro ffgccheck + | lg RB, (DISPATCH_GL(gc.total))(DISPATCH) + | clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH) + | jl >1 + | brasl r14, ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | lg RB, 0(BASE) + | srag ITYPE, RB, 47 + | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES + | lg RB, 0(BASE) + | stg RB, -16(BASE) + | ahi RD, -2 + | je >2 + | lgr RA, BASE + |1: + | la RA, 8(RA) + | lg RB, 0(RA) + | stg RB, -16(RA) + | brct RD, <1 + |2: + | llgf RD, SAVE_MULTRES + | j ->fff_res_ + | + |.ffunc_1 type + | lg RC, 0(BASE) + | srag RC, RC, 47 + | lghi RB, LJ_TISNUM + | clgr RC, RB + | jnl >1 + | lgr RC, RB + |1: + | lghi TMPR0, -1 + | xgr RC, TMPR0 + |2: + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | sllg RC, RC, 3 + | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB) + | lg PC, -8(BASE) + | settp STR:RC, LJ_TSTR + | stg STR:RC, -16(BASE) + | j ->fff_res1 + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | lg TAB:RB, 0(BASE) + | lg PC, -8(BASE) + | checktab TAB:RB, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | lg TAB:RB, TAB:RB->metatable + |2: + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -16(BASE) + | cghi TAB:RB, 0 + | je ->fff_res1 + | settp TAB:RC, TAB:RB, LJ_TTAB + | stg TAB:RC, -16(BASE) // Store metatable as default result. + | lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH) + | llgf RA, TAB:RB->hmask + | n RA, STR:RC->hash + | settp STR:RC, LJ_TSTR + | mghi RA, #NODE + | ag NODE:RA, TAB:RB->node + |3: // Rearranged logic, because we expect _not_ to find the key. + | cg STR:RC, NODE:RA->key + | je >5 + |4: + | ltg NODE:RA, NODE:RA->next + | jne <3 + | j ->fff_res1 // Not found, keep default result. + |5: + | lg RB, NODE:RA->val + | cghi RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. + | stg RB, -16(BASE) // Return value of mt.__metatable. + | j ->fff_res1 + | + |6: + | clfi ITYPE, LJ_TUDATA; je <1 + | clfi ITYPE, LJ_TISNUM; jh >7 + | lhi ITYPE, LJ_TISNUM + |7: + | lhi TMPR0, -1 + | xr ITYPE, TMPR0 // not ITYPE + | llgfr ITYPE, ITYPE + | sllg ITYPE, ITYPE, 3 + | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH) + | j <2 + | + |.ffunc_2 setmetatable + | lg TAB:RB, 0(BASE) + | lgr TAB:TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback + | // Fast path: no mt for table yet and not clearing the mt. + | lghi TMPR0, 0 + | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback + | lg TAB:RA, 8(BASE) + | checktab TAB:RA, ->fff_fallback + | stg TAB:RA, TAB:RB->metatable + | lg PC, -8(BASE) + | stg TAB:TMPR1, -16(BASE) // Return original table. + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | je >1 + | // Possible write barrier. Table is black, but skip iswhite(mt) check. + | barrierback TAB:RB, RC + |1: + | j ->fff_res1 + | + |.ffunc_2 rawget + | lg TAB:CARG2, 0(BASE) + | checktab TAB:CARG2, ->fff_fallback + | la CARG3, 8(BASE) + | lg CARG1, SAVE_L + | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // cTValue * returned in r2 (CRET1). + | // Copy table slot. + | lg RB, 0(CRET1) + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | clfi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. + | lg RB, 0(BASE) + | checknumber RB, ->fff_fallback + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | lg PC, -8(BASE) + | lg STR:RB, 0(BASE) + | checktp_nc STR:RB, LJ_TSTR, >3 + | // A __tostring method in the string base metatable is ignored. + |2: + | stg STR:RB, -16(BASE) + | j ->fff_res1 + |3: // Handle numbers inline, unless a number base metatable is present. + | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1 + | lghi TMPR0, 0 + | cg TMPR0, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH) + | jne ->fff_fallback + | ffgccheck // Caveat: uses label 1. + | lg L:RB, SAVE_L + | stg BASE, L:RB->base // Add frame since C call can throw. + | stg PC, SAVE_PC // Redundant (but a defined value). + | lgr CARG2, BASE // Otherwise: CARG2 == BASE + | lgr L:CARG1, L:RB + | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o) + | // GCstr returned in r2 (CRET1). + | lg BASE, L:RB->base + | settp STR:RB, CRET1, LJ_TSTR + | j <2 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | je >2 // Missing 2nd arg? + |1: + | lg CARG2, 0(BASE) + | checktab CARG2, ->fff_fallback + | lg L:RB, SAVE_L + | stg BASE, L:RB->base // Add frame since C call can throw. + | stg BASE, L:RB->top // Dummy frame length is ok. + | lg PC, -8(BASE) + | la CARG3, 8(BASE) + | lgr CARG1, L:RB + | stg PC, SAVE_PC // Needed for ITERN fallback. + | brasl r14, extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) + | // Flag returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltr RD, CRET1; je >3 // End of traversal? + | // Copy key and value to results. + | lg RB, 8(BASE) + | lg RD, 16(BASE) + | stg RB, -16(BASE) + | stg RD, -8(BASE) + |->fff_res2: + | lghi RD, 1+2 + | j ->fff_res + |2: // Set missing 2nd arg to nil. + | lghi TMPR0, LJ_TNIL + | stg TMPR0, 8(BASE) + | j <1 + |3: // End of traversal: return nil. + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -16(BASE) + | j ->fff_res1 + | + |.ffunc_1 pairs + | lg TAB:RB, 0(BASE) + | lgr TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | ltg TMPR0, TAB:RB->metatable; jne ->fff_fallback +#endif + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | lg CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | lg PC, -8(BASE) + | stg CFUNC:RD, -16(BASE) + | stg TMPR1, -8(BASE) + | lghi TMPR0, LJ_TNIL + | stg TMPR0, 0(BASE) + | lghi RD, 1+3 + | j ->fff_res + | + |.ffunc_2 ipairs_aux + | lg TAB:RB, 0(BASE) + | checktab TAB:RB, ->fff_fallback + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | lg PC, -8(BASE) + | aghi RA, 1 + | setint ITYPE, RA + | stg ITYPE, -16(BASE) + | cl RA, TAB:RB->asize; jhe >2 // Not in array part? + | lg RD, TAB:RB->array + | lgfr TMPR1, RA + | sllg TMPR1, TMPR1, 3 + | la RD, 0(TMPR1, RD) + |1: + | lg TMPR0, 0(RD) + | cghi TMPR0, LJ_TNIL; je ->fff_res0 + | // Copy array slot. + | stg TMPR0, -8(BASE) + | j ->fff_res2 + |2: // Check for empty hash part first. Otherwise call C function. + | lt TMPR0, TAB:RB->hmask; je ->fff_res0 + | lgr CARG1, TAB:RB + | lgfr CARG2, RA + | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in r2 (CRET1). + | ltgr RD, CRET1 + | jne <1 + |->fff_res0: + | lghi RD, 1+0 + | j ->fff_res + | + |.ffunc_1 ipairs + | lg TAB:RB, 0(BASE) + | lgr TMPR1, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | lghi TMPR0, 0 + | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback +#endif + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | lg CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | lg PC, -8(BASE) + | stg CFUNC:RD, -16(BASE) + | stg TMPR1, -8(BASE) + | llihf RD, LJ_TISNUM<<15 + | stg RD, 0(BASE) + | lghi RD, 1+3 + | j ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc_1 pcall + | la RA, 16(BASE) + | aghi NARGS:RD, -1 + | lghi PC, 16+FRAME_PCALL + |1: + | llgc RB, (DISPATCH_GL(hookmask))(DISPATCH) + | srlg RB, RB, HOOK_ACTIVE_SHIFT(r0) + | nill RB, 1 // High bits already zero (from load). + | agr PC, RB // Remember active hook before pcall. + | // Note: this does a (harmless) copy of the function to the PC slot, too. + | lgr KBASE, RD + |2: + | sllg TMPR1, KBASE, 3 + | lg RB, -24(TMPR1, RA) + | stg RB, -16(TMPR1, RA) + | aghi KBASE, -1 + | jh <2 + | j ->vm_call_dispatch + | + |.ffunc_2 xpcall + | lg LFUNC:RA, 8(BASE) + | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback + | lg LFUNC:RB, 0(BASE) // Swap function and traceback. + | stg LFUNC:RA, 0(BASE) + | stg LFUNC:RB, 8(BASE) + | la RA, 24(BASE) + | aghi NARGS:RD, -2 + | lghi PC, 24+FRAME_PCALL + | j <1 + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | lg L:RB, 0(BASE) + | lgr L:TMPR0, L:RB // Save type for checktptp. + | cleartp L:RB + |.else + |.ffunc coroutine_wrap_aux + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | lg L:RB, CFUNC:RB->upvalue[0].gcr + | cleartp L:RB + |.endif + | lg PC, -8(BASE) + | stg PC, SAVE_PC + | stg L:RB, SAVE_TMP + |.if resume + | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback + |.endif + | ltg TMPR0, L:RB->cframe; jne ->fff_fallback + | cli L:RB->status, LUA_YIELD; jh ->fff_fallback + | lg RA, L:RB->top + | je >1 // Status != LUA_YIELD (i.e. 0)? + | cg RA, L:RB->base // Check for presence of initial func. + | je ->fff_fallback + | lg PC, -8(RA) // Move initial function up. + | stg PC, 0(RA) + | la RA, 8(RA) + |1: + | sllg TMPR1, NARGS:RD, 3 + |.if resume + | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread). + |.else + | lay PC, -8(TMPR1, RA) // Check stack space (-1). + |.endif + | clg PC, L:RB->maxstack; jh ->fff_fallback + | stg PC, L:RB->top + | + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + |.if resume + | la BASE, 8(BASE) // Keep resumed thread in stack for GC. + |.endif + | stg BASE, L:RB->top + |.if resume + | lay RB, -24(TMPR1, BASE) // RB = end of source for stack move. + |.else + | lay RB, -16(TMPR1, BASE) // RB = end of source for stack move. + |.endif + | sgr RB, PC // Relative to PC. + | + | cgr PC, RA + | je >3 + |2: // Move args to coroutine. + | lg RC, 0(RB, PC) + | stg RC, -8(PC) + | lay PC, -8(PC) + | cgr PC, RA + | jne <2 + |3: + | lgr CARG2, RA + | lg L:CARG1, SAVE_TMP + | lghi CARG3, 0 + | lghi CARG4, 0 + | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | + | lg L:RB, SAVE_L + | lg L:PC, SAVE_TMP + | lg BASE, L:RB->base + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | + | clfi CRET1, LUA_YIELD + | jh >8 + |4: + | lg RA, L:PC->base + | lg KBASE, L:PC->top + | stg RA, L:PC->top // Clear coroutine stack. + | lgr PC, KBASE + | sgr PC, RA + | je >6 // No results? + | la RD, 0(PC, BASE) + | llgfr PC, PC + | srlg PC, PC, 3 + | clg RD, L:RB->maxstack + | jh >9 // Need to grow stack? + | + | lgr RB, BASE + | sgr RB, RA + |5: // Move results from coroutine. + | lg RD, 0(RA) + | stg RD, 0(RA, RB) + | la RA, 8(RA) + | cgr RA, KBASE + | jne <5 + |6: + |.if resume + | la RD, 2(PC) // nresults+1 = 1 + true + results. + | load_true ITYPE // Prepend true to results. + | stg ITYPE, -8(BASE) + |.else + | la RD, 1(PC) // nresults+1 = 1 + results. + |.endif + |7: + | lg PC, SAVE_PC + | st RD, SAVE_MULTRES + |.if resume + | lghi RA, -8 + |.else + | lghi RA, 0 + |.endif + | tmll PC, FRAME_TYPE + | je ->BC_RET_Z + | j ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | load_false ITYPE // Prepend false to results. + | stg ITYPE, -8(BASE) + | lg RA, L:PC->top + | aghi RA, -8 + | stg RA, L:PC->top // Clear error from coroutine stack. + | // Copy error message. + | lg RD, 0(RA) + | stg RD, 0(BASE) + | lghi RD, 1+2 // nresults+1 = 1 + false + error. + | j <7 + |.else + | lgr CARG2, L:PC + | lgr CARG1, L:RB + | brasl r14, extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Error function does not return. + |.endif + | + |9: // Handle stack expansion on return from yield. + | lg L:RA, SAVE_TMP + | stg KBASE, L:RA->top // Undo coroutine stack clearing. + | lgr CARG2, PC + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg L:PC, SAVE_TMP + | lg BASE, L:RB->base + | j <4 // Retry the stack move. + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | lg L:RB, SAVE_L + | lg TMPR0, L:RB->cframe + | tmll TMPR0, CFRAME_RESUME + | je ->fff_fallback + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | stg RD, L:RB->top + | lghi RD, 0 + | stg RD, L:RB->cframe + | lghi CRET1, LUA_YIELD + | stc CRET1, L:RB->status + | j ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.ffunc_1 math_abs + | lg RB, 0(BASE) + | checkint RB, >3 + | lpr RB, RB; jo >2 + |->fff_resbit: + |->fff_resi: + | setint RB + |->fff_resRB: + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + |2: + | llihh RB, 0x41e0 // 2^31 + | j ->fff_resRB + |3: + | jh ->fff_fallback + | nihh RB, 0x7fff // Clear sign bit. + | lg PC, -8(BASE) + | stg RB, -16(BASE) + | j ->fff_res1 + | + |.ffunc_n math_sqrt, sqdb + |->fff_resf0: + | lg PC, -8(BASE) + | stdy f0, -16(BASE) + | // fallthrough + | + |->fff_res1: + | lghi RD, 1+1 + |->fff_res: + | st RD, SAVE_MULTRES + |->fff_res_: + | tmll PC, FRAME_TYPE + | jne >7 + |5: + | llgc TMPR1, PC_RB + | clgr TMPR1, RD // More results expected? + | jh >6 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8 + | ins_next + | + |6: // Fill up results with nil. + | sllg TMPR1, RD, 3 + | lghi TMPR0, LJ_TNIL + | stg TMPR0, -24(TMPR1, BASE) + | la RD, 1(RD) + | j <5 + | + |7: // Non-standard return case. + | lghi RA, -16 // Results start at BASE+RA = BASE-16. + | j ->vm_return + | + |.macro math_round, func + | .ffunc math_ .. func + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checknumx RB, ->fff_resRB, je + | jh ->fff_fallback + | brasl r14, ->vm_ .. func + | cfdbr RB, 0, f0 + | jo ->fff_resf0 + | llgfr RB, RB + | j ->fff_resi + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc math_log + | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. + | lg TMPR0, 0(BASE) + | ld FARG1, 0(BASE) + | checknumtp TMPR0, ->fff_fallback + | brasl r14, extern log + | j ->fff_resf0 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | brasl r14, extern func + | j ->fff_resf0 + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | brasl r14, extern func + | j ->fff_resf0 + |.endmacro + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | lg TMPR0, 0(BASE) + | ld FARG1, 0(BASE) + | lg CARG1, 8(BASE) + | checknumtp TMPR0, ->fff_fallback + | checkinttp CARG1, ->fff_fallback + | lgfr CARG1, CARG1 + | brasl r14, extern ldexp // (double, int) + | j ->fff_resf0 + | + |.ffunc_n math_frexp + | la CARG1, SAVE_TMP + | brasl r14, extern frexp + | llgf RB, SAVE_TMP + | lg PC, -8(BASE) + | stdy f0, -16(BASE) + | setint RB + | stg RB, -8(BASE) + | lghi RD, 1+2 + | j ->fff_res + | + |.ffunc_n math_modf + | lay CARG1, -16(BASE) + | brasl r14, extern modf // (double, double*) + | lg PC, -8(BASE) + | stdy f0, -8(BASE) + | lghi RD, 1+2 + | j ->fff_res + | + |.macro math_minmax, name, cjmp + | .ffunc name + | lghi RA, 2*8 + | sllg TMPR1, RD, 3 + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checkint RB, >4 + |1: // Handle integers. + | clgr RA, TMPR1; jhe ->fff_resRB + | lg TMPR0, -8(RA, BASE) + | checkint TMPR0, >3 + | cr RB, TMPR0 + | cjmp >2 + | lgr RB, TMPR0 + |2: + | aghi RA, 8 + | j <1 + |3: + | jh ->fff_fallback + | // Convert intermediate result to number and continue below. + | cdfbr f0, RB + | ldgr f1, TMPR0 + | j >6 + |4: + | jh ->fff_fallback + |5: // Handle numbers or integers. + | clgr RA, TMPR1; jhe ->fff_resf0 + | lg RB, -8(RA, BASE) + | ldy f1, -8(RA, BASE) + | checknumx RB, >6, jl + | jh ->fff_fallback + | cdfbr f1, RB + |6: + | cdbr f0, f1 + | cjmp >7 + | ldr f0, f1 + |7: + | aghi RA, 8 + | j <5 + |.endmacro + | + | math_minmax math_min, jnh + | math_minmax math_max, jnl + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | chi NARGS:RD, 1+1; jne ->fff_fallback + | lg STR:RB, 0(BASE) + | checkstr STR:RB, ->fff_fallback + | lg PC, -8(BASE) + | ltg TMPR0, STR:RB->len + | je ->fff_res0 // Return no results for empty string. + | llgc RB, STR:RB[1] + | j ->fff_resi + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | chi NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. + | lg RB, 0(BASE) + | checkint RB, ->fff_fallback + | clfi RB, 255; jh ->fff_fallback + | strvh RB, SAVE_TMP // Store [c,0]. + | lghi TMPR1, 1 + | la RD, SAVE_TMP // Points to stack. Little-endian. + |->fff_newstr: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | llgfr CARG3, TMPR1 // Zero-extended to size_t. + | lgr CARG2, RD + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: + | // GCstr * returned in r2 (CRET1). + | lgr STR:RD, CRET1 + | lg BASE, L:RB->base + | lg PC, -8(BASE) + | settp STR:RD, LJ_TSTR + | stg STR:RD, -16(BASE) + | j ->fff_res1 + | + |.ffunc string_sub + | ffgccheck + | lghi TMPR1, -1 + | clfi NARGS:RD, 1+2; jl ->fff_fallback + | jnh >1 + | lg TMPR1, 16(BASE) + | checkint TMPR1, ->fff_fallback + |1: + | lg STR:RB, 0(BASE) + | checkstr STR:RB, ->fff_fallback + | lg ITYPE, 8(BASE) + | lgfr RA, ITYPE + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM + | jne ->fff_fallback + | llgf RC, STR:RB->len + | clr RC, TMPR1 // len < end? (unsigned compare) + | jl >5 + |2: + | cghi RA, 0 // start <= 0? + | jle >7 + |3: + | sr TMPR1, RA // start > end? + | jnhe ->fff_emptystr + | la RD, (#STR-1)(RA, STR:RB) + | ahi TMPR1, 1 + |4: + | j ->fff_newstr + | + |5: // Negative end or overflow. + | chi TMPR1, 0 + | jnl >6 + | ahi TMPR1, 1 + | ar TMPR1, RC // end = end+(len+1) + | j <2 + |6: // Overflow. + | lr TMPR1, RC // end = len + | j <2 + | + |7: // Negative start or underflow. + | je >8 + | agr RA, RC // start = start+(len+1) + | aghi RA, 1 + | jh <3 // start > 0? + |8: // Underflow. + | lghi RA, 1 // start = 1 + | j <3 + | + |->fff_emptystr: // Range underflow. + | lghi TMPR1, 0 + | j <4 + | + |.macro ffstring_op, name + | .ffunc_1 string_ .. name + | ffgccheck + | lg STR:CARG2, 0(BASE) + | checkstr STR:CARG2, ->fff_fallback + | lg L:RB, SAVE_L + | lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH) + | stg BASE, L:RB->base + | lg RC, SBUF:CARG1->b + | stg L:RB, SBUF:CARG1->L + | stg RC, SBUF:CARG1->w + | stg PC, SAVE_PC + | brasl r14, extern lj_buf_putstr_ .. name + | // lgr CARG1, CRET1 (nop, CARG1==CRET1) + | brasl r14, extern lj_buf_tostr + | j ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |.macro .ffunc_bit, name, kind, fdef + | fdef name + |.if kind == 2 + | bfpconst_tobit f1, RB + |.endif + | lg RB, 0(BASE) + | ld f0, 0(BASE) + | checkint RB, >1 + |.if kind > 0 + | j >2 + |.else + | j ->fff_resbit + |.endif + |1: + | jh ->fff_fallback + |.if kind < 2 + | bfpconst_tobit f1, RB + |.endif + | adbr f0, f1 + | lgdr RB, f0 + | llgfr RB, RB + |2: + |.endmacro + | + |.macro .ffunc_bit, name, kind + | .ffunc_bit name, kind, .ffunc_1 + |.endmacro + | + |.ffunc_bit bit_tobit, 0 + | j ->fff_resbit + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name, 2 + | lgr TMPR1, NARGS:RD // Save for fallback. + | sllg RD, NARGS:RD, 3 + | lay RD, -16(RD, BASE) + |1: + | clgr RD, BASE + | jle ->fff_resbit + | lg RA, 0(RD) + | checkint RA, >2 + | ins RB, RA + | aghi RD, -8 + | j <1 + |2: + | jh ->fff_fallback_bit_op + | ldgr f0, RA + | adbr f0, f1 + | lgdr RA, f0 + | ins RB, RA + | aghi RD, -8 + | j <1 + |.endmacro + | + |.ffunc_bit_op bit_band, nr + |.ffunc_bit_op bit_bor, or + |.ffunc_bit_op bit_bxor, xr + | + |.ffunc_bit bit_bswap, 1 + | lrvr RB, RB + | j ->fff_resbit + | + |.ffunc_bit bit_bnot, 1 + | xilf RB, -1 + | j ->fff_resbit + | + |->fff_fallback_bit_op: + | lgr NARGS:RD, TMPR1 // Restore for fallback + | j ->fff_fallback + | + |.macro .ffunc_bit_sh, name, ins + | .ffunc_bit name, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | nill RA, 0x1f // Limit shift to 5-bits. + | ins RB, 0(RA) + | j ->fff_resbit + |.endmacro + | + |.ffunc_bit_sh bit_lshift, sll + |.ffunc_bit_sh bit_rshift, srl + |.ffunc_bit_sh bit_arshift, sra + | + |.ffunc_bit bit_rol, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | rll RB, RB, 0(RA) + | j ->fff_resbit + | + |.ffunc_bit bit_ror, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | lg RA, 8(BASE) + | checkint RA, ->fff_fallback + | lcr RA, RA // Right rotate equivalent to negative left rotate. + | rll RB, RB, 0(RA) + | j ->fff_resbit + | + |//----------------------------------------------------------------------- + | + |->fff_fallback_2: + | lghi NARGS:RD, 1+2 // Other args are ignored, anyway. + | j ->fff_fallback + |->fff_fallback_1: + | lghi NARGS:RD, 1+1 // Other args are ignored, anyway. + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RD = nargs+1 + | lg L:RB, SAVE_L + | lg PC, -8(BASE) // Fallback may overwrite PC. + | stg PC, SAVE_PC // Redundant (but a defined value). + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler. + | stg RD, L:RB->top + | lg CFUNC:RD, -16(BASE) + | cleartp CFUNC:RD + | clg RA, L:RB->maxstack + | jh >5 // Need to grow stack. + | lgr CARG1, L:RB + | lg TMPR1, CFUNC:RD->f + | basr r14, TMPR1 // (lua_State *L) + | lg BASE, L:RB->base + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | lgr RD, CRET1 + | cghi RD, 0; jh ->fff_res // Returned nresults+1? + |1: + | lg RA, L:RB->top + | sgr RA, BASE + | srlg RA, RA, 3 + | cghi RD, 0 + | la NARGS:RD, 1(RA) + | lg LFUNC:RB, -16(BASE) + | jne ->vm_call_tail // Returned -1? + | cleartp LFUNC:RB + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | lgr RA, BASE + | tmll PC, FRAME_TYPE + | jne >3 + | llgc RB, PC_RA + | lcgr RB, RB + | sllg RB, RB, 3 + | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8 + | j ->vm_call_dispatch // Resolve again for tailcall. + |3: + | lgr RB, PC + | nill RB, -8 + | sgr BASE, RB + | j ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | lghi CARG2, LUA_MINSTACK + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lghi RD, 0 // Simulate a return 0. + | j <1 // Dumb retry (goes through ff first). + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RD = nargs+1 + | stg r14, SAVE_TMP // Save return address + | lg L:RB, SAVE_L + | stg PC, SAVE_PC // Redundant (but a defined value). + | stg BASE, L:RB->base + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | lgr CARG1, L:RB + | stg RD, L:RB->top + | brasl r14, extern lj_gc_step // (lua_State *L) + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, BASE + | srlg RD, RD, 3 + | aghi NARGS:RD, 1 + | lg r14, SAVE_TMP // Restore return address. + | br r14 + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + | stg r0, 0 + | stg r0, 0 + | + |->vm_rethook: // Dispatch target for return hooks. + | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH) + | tmll RD, HOOK_ACTIVE + | jne >5 + | j >1 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH) + | tmll RD, HOOK_ACTIVE // Hook already active? + | jne >5 + | + | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT + | je >5 + | ly TMPR0, (DISPATCH_GL(hookcount))(DISPATCH) + | ahi TMPR0, -1 + | sty TMPR0, (DISPATCH_GL(hookcount))(DISPATCH) + | je >1 + | tmll RD, LUA_MASKLINE + | je >5 + |1: + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lgr CARG2, PC + | lgr CARG1, L:RB + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | brasl r14, extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | lg BASE, L:RB->base + |4: + | llgc RA, PC_RA + |5: + | llgc OP, PC_OP + | sllg TMPR1, OP, 3 + | llgh RD, PC_RD + | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) + | br TMPR1 + | + |->cont_hook: // Continue from hook yield. + | stg r0, 0 + | stg r0, 0 + | + |->vm_hotloop: // Hot loop counter underflow. + | stg r0, 0 + | stg r0, 0 + | + |->vm_callhook: // Dispatch target for call hooks. + | stg PC, SAVE_PC + |.if JIT + | j >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | stg PC, SAVE_PC + | oill PC, 1 // Marker for hot call. + |1: + |.endif + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD, BASE) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | stg RD, L:RB->top + | lgr CARG2, PC + | lgr CARG1, L:RB + | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // ASMFunction returned in r2 (CRET1). + | lghi TMPR0, 0 + | stg TMPR0, SAVE_PC // Invalidate for subsequent line hook. + |.if JIT + | nill PC, -2 + |.endif + | lg BASE, L:RB->base + | lg RD, L:RB->top + | sgr RD, BASE + | lgr RB, CRET1 + | llgc RA, PC_RA + | srl RD, 3 + | ahi NARGS:RD, 1 + | llgfr RD, RD + | br RB + | + |->cont_stitch: // Trace stitching. + | stg r0, 0 + | stg r0, 0 + | + |->vm_profhook: // Dispatch target for profiler hook. + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Called from an exit stub with the exit number on the stack. + |// The 16 bit exit number is stored with two (sign-extended) push imm8. + |->vm_exit_handler: + | stg r0, 0 + | stg r0, 0 + |->vm_exit_interp: + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// FP value rounding. Called by math.floor/math.ceil fast functions. + |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14. + |.macro vm_round, name, mask + |->name: + | lghi r0, 1 + | cdfbr f1, r0 + | didbr f0, f2, f1, mask // f0=remainder, f2=quotient. + | jnle >1 + | ldr f0, f2 + | br r14 + |1: // partial remainder (sanity check) + | stg r0, 0 + |.endmacro + | + | vm_round vm_floor, 7 // Round towards -inf. + | vm_round vm_ceil, 6 // Round towards +inf. + | vm_round vm_trunc, 5 // Round towards 0. + | + |// FP modulo x%y. Called by BC_MOD* and vm_arith. + |->vm_mod: // NYI. + | stg r0, 0 + | stg r0, 0 + | + |//----------------------------------------------------------------------- + |//-- Assertions --------------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->assert_bad_for_arg_type: + | stg r0, 0 + | stg r0, 0 +#ifdef LUA_USE_ASSERT +#endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in ah/al. + |->vm_ffi_callback: + | stg r0, 0 + | stg r0, 0 + | + |->cont_ffi_callback: // Return from FFI callback. + | stg r0, 0 + | stg r0, 0 + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, r8 + | stmg r6, r15, 48(sp) + | lgr r13, sp // Use r13 as frame pointer. + | lgr CCSTATE, CARG1 + | lg r7, CCSTATE->func + | + | // Readjust stack. + | sgf sp, CCSTATE->spadj + | + | // Copy stack slots. + | llgc r1, CCSTATE->nsp + | chi r1, 0 + | jh >2 + |1: + | lmg CARG1, CARG5, CCSTATE->gpr[0] + | // TODO: conditionally load FPRs? + | ld FARG1, CCSTATE->fpr[0] + | ld FARG2, CCSTATE->fpr[1] + | ld FARG3, CCSTATE->fpr[2] + | ld FARG4, CCSTATE->fpr[3] + | basr r14, r7 + | + | stg CRET1, CCSTATE->gpr[0] + | std f0, CCSTATE->fpr[0] + | + | lgr sp, r13 + | lmg r6, r15, 48(sp) + | br r14 + | + |2: + | sll r1, 3 + | la r10, (offsetof(CCallState, stack))(CCSTATE) // Source. + | la r11, (CCALL_SPS_EXTRA*8)(sp) // Destination. + |3: + | chi r1, 256 + | jl >4 + | mvc 0(256, r11), 0(r10) + | la r10, 256(r10) + | la r11, 256(r11) + | ahi r1, -256 + | j <3 + | + |4: + | ahi r1, -1 + | jl <1 + | larl r9, >5 + | ex r1, 0(r9) + | j <1 + | + |5: + | // exrl target + | mvc 0(1, r11), 0(r10) + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + (void)vk; + |// Note: aligning all instructions does not pay off. + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + |.macro jmp_comp, lt, ge, le, gt, target + ||switch (op) { + ||case BC_ISLT: + | lt target + ||break; + ||case BC_ISGE: + | ge target + ||break; + ||case BC_ISLE: + | le target + ||break; + ||case BC_ISGT: + | gt target + ||break; + ||default: break; /* Shut up GCC. */ + ||} + |.endmacro + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1, RD = src2, JMP with RD = target + | ins_AD + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | ld f0, 0(RA, BASE) + | ld f1, 0(RD, BASE) + | lg RA, 0(RA, BASE) + | lg RD, 0(RD, BASE) + | srag ITYPE, RA, 47 + | srag RB, RD, 47 + | + | clfi ITYPE, LJ_TISNUM; jne >7 + | clfi RB, LJ_TISNUM; jne >8 + | // Both are integers. + | la PC, 4(PC) + | cr RA, RD + | jmp_comp jhe, jl, jh, jle, >9 + |6: + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | jh ->vmeta_comp + | // RA is a number. + | clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp + | // RA is a number, RD is an integer. + | cdfbr f1, RD + | j >1 + | + |8: // RA is an integer, RD is not an integer. + | jh ->vmeta_comp + | // RA is an integer, RD is a number. + | cdfbr f0, RA + |1: + | la PC, 4(PC) + | cdbr f0, f1 + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + | jmp_comp jnl, jl, jnle, jle, <9 + | j <6 + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | ins_AD // RA = src1, RD = src2, JMP with RD = target + | sllg RD, RD, 3 + | ld f1, 0(RD, BASE) + | lg RD, 0(RD, BASE) + | sllg RA, RA, 3 + | ld f0, 0(RA, BASE) + | lg RA, 0(RA, BASE) + | la PC, 4(PC) + | srag RB, RD, 47 + | srag ITYPE, RA, 47 + | clfi RB, LJ_TISNUM; jne >7 + | clfi ITYPE, LJ_TISNUM; jne >8 + | cr RD, RA + if (vk) { + | jne >9 + } else { + | je >9 + } + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RD is not an integer. + | jh >5 + | // RD is a number. + | clfi ITYPE, LJ_TISNUM; jl >1; jne >5 + | // RD is a number, RA is an integer. + | cdfbr f0, RA + | j >1 + | + |8: // RD is an integer, RA is not an integer. + | jh >5 + | // RD is an integer, RA is a number. + | cdfbr f1, RD + | j >1 + | + |1: + | cdbr f0, f1 + |4: + iseqne_fp: + if (vk) { + | jne >2 // Unordered means not equal. + } else { + | je >1 // Unordered means not equal. + } + iseqne_end: + if (vk) { + |1: // EQ: Branch to the target. + | llgh RD, PC_RD + | branchPC RD + |2: // NE: Fallthrough to next instruction. + |.if not FFI + |3: + |.endif + } else { + |.if not FFI + |3: + |.endif + |2: // NE: Branch to the target. + | llgh RD, PC_RD + | branchPC RD + |1: // EQ: Fallthrough to next instruction. + } + if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || + op == BC_ISEQN || op == BC_ISNEN)) { + | j <9 + } else { + | ins_next + } + | + if (op == BC_ISEQV || op == BC_ISNEV) { + |5: // Either or both types are not numbers. + |.if FFI + | clfi RB, LJ_TCDATA; je ->vmeta_equal_cd + | clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd + |.endif + | cgr RA, RD + | je <1 // Same GCobjs or pvalues? + | cr RB, ITYPE + | jne <2 // Not the same type? + | clfi RB, LJ_TISTABUD + | jh <2 // Different objects and not table/ud? + | + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | cleartp TAB:RA + | lg TAB:RB, TAB:RA->metatable + | cghi TAB:RB, 0 + | je <2 // No metatable? + | tm TAB:RB->nomm, 1<<MM_eq + | jne <2 // Or 'no __eq' flag set? + if (vk) { + | lghi RB, 0 // ne = 0 + } else { + | lghi RB, 1 // ne = 1 + } + | j ->vmeta_equal // Handle __eq metamethod. + } else { + |.if FFI + |3: + | clfi ITYPE, LJ_TCDATA + if (LJ_DUALNUM && vk) { + | jne <9 + } else { + | jne <2 + } + | j ->vmeta_equal_cd + |.endif + } + break; + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | ins_AND // RA = src, RD = str const, JMP with RD = target + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg RB, 0(RA, BASE) + | la PC, 4(PC) + | checkstr RB, >3 + | cg RB, 0(RD, KBASE) + iseqne_test: + if (vk) { + | jne >2 + } else { + | je >1 + } + goto iseqne_end; + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | ins_AD // RA = src, RD = num const, JMP with RD = target + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | ld f0, 0(RA, BASE) + | lg RB, 0(RA, BASE) + | ld f1, 0(RD, KBASE) + | lg RD, 0(RD, KBASE) + | la PC, 4(PC) + | checkint RB, >7 + | checkint RD, >8 + | cr RB, RD + if (vk) { + | jne >9 + } else { + | je >9 + } + | llgh RD, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | jh >3 + | // RA is a number. + | checkint RD, >1 + | // RA is a number, RD is an integer. + | cdfbr f1, RD + | j >1 + | + |8: // RA is an integer, RD is a number. + | cdfbr f0, RB + | cdbr f0, f1 + | j >4 + |1: + | cdbr f0, f1 + |4: + goto iseqne_fp; + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | srag RB, RB, 47 + | la PC, 4(PC) + | cr RB, RD + if (!LJ_HASFFI) goto iseqne_test; + if (vk) { + | jne >3 + | llgh RD, PC_RD + | branchPC RD + |2: + | ins_next + |3: + | cghi RB, LJ_TCDATA; jne <2 + | j ->vmeta_equal_cd + } else { + | je >2 + | cghi RB, LJ_TCDATA; je ->vmeta_equal_cd + | llgh RD, PC_RD + | branchPC RD + |2: + | ins_next + } + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | ins_AD // RA = dst or unused, RD = src, JMP with RD = target + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg ITYPE, 0(RD, BASE) + | la PC, 4(PC) + if (op == BC_ISTC || op == BC_ISFC) { + | lgr RB, ITYPE + } + | srag ITYPE, ITYPE, 47 + | clfi ITYPE, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISTC) { + | jhe >1 + } else { + | jl >1 + } + if (op == BC_ISTC || op == BC_ISFC) { + | stg RB, 0(RA, BASE) + } + | llgh RD, PC_RD + | branchPC RD + |1: // Fallthrough to the next instruction. + | ins_next + break; + + case BC_ISTYPE: + | ins_AD // RA = src, RD = -type + | lghr RD, RD + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | srag RB, RB, 47 + | agr RB, RD + | jne ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | ins_AD // RA = src, RD = -(TISNUM-1) + | sllg TMPR1, RA, 3 + | lg TMPR1, 0(TMPR1, BASE) + | checknumtp TMPR1, ->vmeta_istype + | ins_next + break; + case BC_MOV: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | lg RB, 0(RD, BASE) + | sllg RA, RA, 3 + | stg RB, 0(RA, BASE) + | ins_next_ + break; + case BC_NOT: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg RB, 0(RD, BASE) + | srag RB, RB, 47 + | load_false RC + | cghi RB, LJ_TTRUE + | je >1 + | load_true RC + |1: + | stg RC, 0(RA, BASE) + | ins_next + break; + case BC_UNM: + | ins_AD // RA = dst, RD = src + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg RB, 0(RD, BASE) + | checkint RB, >3 + | lcr RB, RB; jo >2 + |1: + | stg RB, 0(RA, BASE) + | ins_next + |2: + | llihh RB, 0x41e0 // (double)2^31 + | j <1 + |3: + | jh ->vmeta_unm + | // Toggle sign bit. + | llihh TMPR0, 0x8000 + | xgr RB, TMPR0 + | j <1 + break; + case BC_LEN: + | ins_AD // RA = dst, RD = src + | sllg RD, RD, 3 + | lg RD, 0(RD, BASE) + | checkstr RD, >2 + | llgf RD, STR:RD->len + |1: + | sllg RA, RA, 3 + | setint RD + | stg RD, 0(RA, BASE) + | ins_next + |2: + | cghi ITYPE, LJ_TTAB; jne ->vmeta_len + | lgr TAB:CARG1, TAB:RD +#if LJ_52 + | lg TAB:RB, TAB:RD->metatable + | cghi TAB:RB, 0 + | jne >9 + |3: +#endif + |->BC_LEN_Z: + | brasl r14, extern lj_tab_len // (GCtab *t) + | // Length of table returned in r2 (CRET1). + | lgr RD, CRET1 + | llgc RA, PC_RA + | j <1 +#if LJ_52 + |9: // Check for __len. + | tm TAB:RB->nomm, 1<<MM_len + | jne <3 + | j ->vmeta_len // 'no __len' flag NOT set: check. +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithpre + | ins_ABC + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | sllg RA, RA, 3 + |.endmacro + | + |.macro ins_arithfp, ins + | ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | ld f0, 0(RB, BASE) + | ld f1, 0(RC, KBASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checknumtp RB, ->vmeta_arith_vno + | checknumtp RC, ->vmeta_arith_vno + | ins f0, f1 + || break; + ||case 1: + | ld f1, 0(RB, BASE) + | ld f0, 0(RC, KBASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checknumtp RB, ->vmeta_arith_nvo + | checknumtp RC, ->vmeta_arith_nvo + | ins f0, f1 + || break; + ||default: + | ld f0, 0(RB, BASE) + | ld f1, 0(RC, BASE) + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checknumtp RB, ->vmeta_arith_vvo + | checknumtp RC, ->vmeta_arith_vvo + | ins f0, f1 + || break; + ||} + | std f0, 0(RA, BASE) + | ins_next + |.endmacro + | + |.macro ins_arithdn, intins + | ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_vno + | checkint RC, ->vmeta_arith_vno + | intins RB, RC; jo ->vmeta_arith_vno + || break; + ||case 1: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_nvo + | checkint RC, ->vmeta_arith_nvo + | intins RC, RB; jo ->vmeta_arith_nvo + || break; + ||default: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checkint RB, ->vmeta_arith_vvo + | checkint RC, ->vmeta_arith_vvo + | intins RB, RC; jo ->vmeta_arith_vvo + || break; + ||} + ||if (vk == 1) { + | // setint RC + | stg RC, 0(RA, BASE) + ||} else { + | // setint RB + | stg RB, 0(RA, BASE) + ||} + | ins_next + |.endmacro + + | // RA = dst, RB = src1 or num const, RC = src2 or num const + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn ar + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn sr + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithpre + | // For multiplication we use msgfr and check if the result + | // fits in an int32_t. + switch(op) { + case BC_MULVN: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_vno + | checkint RC, ->vmeta_arith_vno + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_vno + break; + case BC_MULNV: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, KBASE) + | checkint RB, ->vmeta_arith_nvo + | checkint RC, ->vmeta_arith_nvo + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_nvo + break; + default: + | lg RB, 0(RB, BASE) + | lg RC, 0(RC, BASE) + | checkint RB, ->vmeta_arith_vvo + | checkint RC, ->vmeta_arith_vvo + | lgfr RB, RB + | msgfr RB, RC + | lgfr RC, RB + | cgr RB, RC; jne ->vmeta_arith_vvo + break; + } + | llgfr RB, RB + | setint RB + | stg RB, 0(RA, BASE) + | ins_next + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithfp ddbr + break; + // TODO: implement fast mod operation. + // x86_64 does floating point mod, however it might be better to use integer mod. + case BC_MODVN: + | j ->vmeta_arith_vno + break; + case BC_MODNV: + | j ->vmeta_arith_nvo + break; + case BC_MODVV: + | j ->vmeta_arith_vvo + break; + case BC_POW: + | ins_ABC + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | ld FARG1, 0(RB, BASE) + | ld FARG2, 0(RC, BASE) + | lg TMPR0, 0(RB, BASE) + | checknumtp TMPR0, ->vmeta_arith_vvo + | lg TMPR0, 0(RC, BASE) + | checknumtp TMPR0, ->vmeta_arith_vvo + | brasl r14, extern pow // double pow(double x, double y), result in f0. + | llgc RA, PC_RA + | sllg RA, RA, 3 + | std f0, 0(RA, BASE) + | ins_next + break; + + case BC_CAT: + | ins_ABC // RA = dst, RB = src_start, RC = src_end + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG3, RC + | sgr CARG3, RB + | sllg RC, RC, 3 + | la CARG2, 0(RC, BASE) + |->BC_CAT_Z: + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1). + | lg BASE, L:RB->base + | ltgr RC, CRET1 + | jne ->vmeta_binop + | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. + | sllg RB, RB, 3 + | llgc RA, PC_RA + | sllg RA, RA, 3 + | lg RC, 0(RB, BASE) + | stg RC, 0(RA, BASE) + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | ins_AND // RA = dst, RD = str const (~) + | sllg RD, RD, 3 + | lg RD, 0(RD, KBASE) + | settp RD, LJ_TSTR + | sllg RA, RA, 3 + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KCDATA: + |.if FFI + | ins_AND // RA = dst, RD = cdata const (~) + | sllg RD, RD, 3 + | sllg RA, RA, 3 + | lg RD, 0(RD, KBASE) + | settp RD, LJ_TCDATA + | stg RD, 0(RA, BASE) + | ins_next + |.endif + break; + case BC_KSHORT: + | ins_AD // RA = dst, RD = signed int16 literal + | // Assumes DUALNUM. + | lhr RD, RD // Sign-extend literal to 32-bits. + | setint RD + | sllg RA, RA, 3 + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KNUM: + | ins_AD // RA = dst, RD = num const + | sllg RD, RD, 3 + | ld f0, 0(RD, KBASE) + | sllg RA, RA, 3 + | std f0, 0(RA, BASE) + | ins_next + break; + case BC_KPRI: + | ins_AD // RA = dst, RD = primitive type (~) + | sllg RA, RA, 3 + | sllg RD, RD, 47 + | lghi TMPR0, -1 + | xgr RD, TMPR0 // not + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_KNIL: + | ins_AD // RA = dst_start, RD = dst_end + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | la RA, 8(RA, BASE) + | la RD, 0(RD, BASE) + | lghi RB, LJ_TNIL + | stg RB, -8(RA) // Sets minimum 2 slots. + |1: + | stg RB, 0(RA) + | la RA, 8(RA) + | clgr RA, RD + | jle <1 + | ins_next + break; + +/* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | ins_AD // RA = dst, RD = upvalue # + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB) + | lg RB, UPVAL:RB->v + | lg RD, 0(RB) + | stg RD, 0(RA, BASE) + | ins_next + break; + case BC_USETV: +#define TV2MARKOFS \ + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) + | ins_AD // RA = upvalue #, RD = src + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | sllg RA, RA, 3 + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | tm UPVAL:RB->closed, 0xff + | lg RB, UPVAL:RB->v + | sllg TMPR1, RD, 3 + | lg RA, 0(TMPR1, BASE) + | stg RA, 0(RB) + | je >1 + | // Check barrier for closed upvalue. + | tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv) + | jne >2 + |1: + | ins_next + | + |2: // Upvalue is black. Check if new value is collectable and white. + | srag RD, RA, 47 + | ahi RD, -LJ_TISGCV + | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) + | jle <1 + | cleartp GCOBJ:RA + | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) + | je <1 + | // Crossed a write barrier. Move the barrier forward. + | lgr CARG2, RB + | lay GL:CARG1, GG_DISP2G(DISPATCH) + | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; +#undef TV2MARKOFS + case BC_USETS: + | ins_AND // RA = upvalue #, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | lg STR:RA, 0(RD, KBASE) + | lg RD, UPVAL:RB->v + | settp STR:ITYPE, STR:RA, LJ_TSTR + | stg STR:ITYPE, 0(RD) + | tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) + | jne >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) + | je <1 + | tm UPVAL:RB->closed, 0xff + | je <1 + | // Crossed a write barrier. Move the barrier forward. + | lgr CARG2, RD + | lay GL:CARG1, GG_DISP2G(DISPATCH) + | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETN: + | ins_AD // RA = upvalue #, RD = num const + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | sllg RD, RD, 3 + | cleartp LFUNC:RB + | ld f0, 0(RD, KBASE) + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | lg RA, UPVAL:RB->v + | std f0, 0(RA) + | ins_next + break; + case BC_USETP: + | ins_AD // RA = upvalue #, RD = primitive type (~) + | lg LFUNC:RB, -16(BASE) + | sllg RA, RA, 3 + | cleartp LFUNC:RB + | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB) + | sllg RD, RD, 47 + | lghi TMPR0, -1 + | xgr RD, TMPR0 + | lg RA, UPVAL:RB->v + | stg RD, 0(RA) + | ins_next + break; + case BC_UCLO: + | ins_AD // RA = level, RD = target + | branchPC RD // Do this first to free RD. + | lg L:RB, SAVE_L + | ltg TMPR0, L:RB->openupval + | je >1 + | stg BASE, L:RB->base + | sllg RA, RA, 3 + | la CARG2, 0(RA, BASE) + | lgr L:CARG1, L:RB + | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level) + | lg BASE, L:RB->base + |1: + | ins_next + break; + + case BC_FNEW: + | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lg CARG3, -16(BASE) + | cleartp CARG3 + | sllg RD, RD, 3 + | lg CARG2, 0(RD, KBASE) // Fetch GCproto *. + | lgr CARG1, L:RB + | stg PC, SAVE_PC + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | brasl r14, extern lj_func_newL_gc + | // GCfuncL * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | sllg RA, RA, 3 + | settp LFUNC:CRET1, LJ_TFUNC + | stg LFUNC:CRET1, 0(RA, BASE) + | ins_next + break; + case BC_TNEW: + | ins_AD // RA = dst, RD = hbits|asize + | lg L:RB, SAVE_L + | stg BASE, L:RB->base + | lg RA, (DISPATCH_GL(gc.total))(DISPATCH) + | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH) + | stg PC, SAVE_PC + | jhe >5 + |1: + | srlg CARG3, RD, 11 + | llill TMPR0, 0x7ff + | nr RD, TMPR0 + | cr RD, TMPR0 + | je >3 + |2: + | lgr L:CARG1, L:RB + | llgfr CARG2, RD + | brasl r14, extern lj_tab_new // (lua_State *L, uint32_t asize, uint32_t hbits) + | // Table * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | sllg RA, RA, 3 + | settp TAB:CRET1, LJ_TTAB + | stg TAB:CRET1, 0(RA, BASE) + | ins_next + |3: // Turn 0x7ff into 0x801. + | llill RD, 0x801 + | j <2 + |5: + | lgr L:CARG1, L:RB + | brasl r14, extern lj_gc_step_fixtop // (lua_State *L) + | llgh RD, PC_RD + | j <1 + break; + case BC_TDUP: + | ins_AND // RA = dst, RD = table const (~) (holding template table) + | lg L:RB, SAVE_L + | lg RA, (DISPATCH_GL(gc.total))(DISPATCH) + | stg PC, SAVE_PC + | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH) + | stg BASE, L:RB->base + | jhe >3 + |2: + | sllg RD, RD, 3 + | lg TAB:CARG2, 0(RD, KBASE) + | lgr L:CARG1, L:RB + | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt) + | // Table * returned in r2 (CRET1). + | lg BASE, L:RB->base + | llgc RA, PC_RA + | settp TAB:CRET1, LJ_TTAB + | sllg RA, RA, 3 + | stg TAB:CRET1, 0(RA, BASE) + | ins_next + |3: + | lgr L:CARG1, L:RB + | brasl r14, extern lj_gc_step_fixtop // (lua_State *L) + | llgh RD, PC_RD // Need to reload RD. + | lghi TMPR0, -1 + | xgr RD, TMPR0 // not RD + | j <2 + break; + + case BC_GGET: + | ins_AND // RA = dst, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg TAB:RB, LFUNC:RB->env + | sllg TMPR1, RD, 3 + | lg STR:RC, 0(TMPR1, KBASE) + | j ->BC_TGETS_Z + break; + case BC_GSET: + | ins_AND // RA = src, RD = str const (~) + | lg LFUNC:RB, -16(BASE) + | cleartp LFUNC:RB + | lg TAB:RB, LFUNC:RB->env + | sllg TMPR1, RD, 3 + | lg STR:RC, 0(TMPR1, KBASE) + | j ->BC_TSETS_Z + break; + + case BC_TGETV: + | ins_ABC // RA = dst, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | checktab TAB:RB, ->vmeta_tgetv + | + | // Integer key? + | checkint RC, >5 + | cl RC, TAB:RB->asize // Takes care of unordered, too. + | jhe ->vmeta_tgetv // Not in array part? Use fallback. + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + | lg ITYPE, 0(RC) + | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. + | je >2 + |1: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |2: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<<MM_index + | je ->vmeta_tgetv // 'no __index' flag NOT set: check. + | j <1 + | + |5: // String key? + | cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv + | cleartp STR:RC + | j ->BC_TGETS_Z + break; + case BC_TGETS: + | ins_ABC + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | lghi TMPR1, -1 + | xgr RC, TMPR1 + | sllg RC, RC, 3 + | lg STR:RC, 0(RC, KBASE) + | checktab TAB:RB, ->vmeta_tgets + |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * + | l TMPR1, TAB:RB->hmask + | n TMPR1, STR:RC->hash + | lgfr TMPR1, TMPR1 + | mghi TMPR1, #NODE + | ag NODE:TMPR1, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cg ITYPE, NODE:TMPR1->key + | jne >4 + | // Get node value. + | lg ITYPE, NODE:TMPR1->val + | cghi ITYPE, LJ_TNIL + | je >5 // Key found, but nil value? + |2: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |4: // Follow hash chain. + | lg NODE:TMPR1, NODE:TMPR1->next + | cghi NODE:TMPR1, 0 + | jne <1 + | // End of hash chain: key not found, nil result. + | lghi ITYPE, LJ_TNIL + | + |5: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <2 // No metatable: done. + | tm TAB:TMPR1->nomm, 1<<MM_index + | jne <2 // 'no __index' flag set: done. + | j ->vmeta_tgets // Caveat: preserve STR:RC. + break; + case BC_TGETB: + | ins_ABC // RA = dst, RB = table, RC = byte literal + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | checktab TAB:RB, ->vmeta_tgetb + | cl RC, TAB:RB->asize + | jhe ->vmeta_tgetb + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + | lg ITYPE, 0(RC) + | cghi ITYPE, LJ_TNIL + | je >2 + |1: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + | + |2: // Check for __index if table value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<<MM_index + | je ->vmeta_tgetb // 'no __index' flag NOT set: check. + | j <1 + break; + case BC_TGETR: + | ins_ABC // RA = dst, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | cleartp TAB:RB + | sllg RC, RC, 3 + | llgf RC, 4(RC, BASE) // Load low word (big endian). + | cl RC, TAB:RB->asize + | jhe ->vmeta_tgetr // Not in array part? Use fallback. + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Get array slot. + |->BC_TGETR_Z: + | lg ITYPE, 0(RC) + |->BC_TGETR2_Z: + | sllg RA, RA, 3 + | stg ITYPE, 0(RA, BASE) + | ins_next + break; + + case BC_TSETV: + | ins_ABC // RA = src, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | checktab TAB:RB, ->vmeta_tsetv + | + | // Integer key? + | checkint RC, >5 + | cl RC, TAB:RB->asize // Takes care of unordered, too. + | jhe ->vmeta_tsetv + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(RC) + | je >3 // Previous value is nil? + |1: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: // Set array slot. + | sllg RA, RA, 3 + | lg RB, 0(RA, BASE) + | stg RB, 0(RC) + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<<MM_newindex + | je ->vmeta_tsetv // 'no __newindex' flag NOT set: check. + | j <1 + | + |5: // String key? + | cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv + | cleartp STR:RC + | j ->BC_TSETS_Z + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + case BC_TSETS: + | ins_ABC // RA = src, RB = table, RC = str const (~) + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | lghi TMPR0, -1 + | xgr RC, TMPR0 // ~RC + | sllg RC, RC, 3 + | lg STR:RC, 0(RC, KBASE) + | checktab TAB:RB, ->vmeta_tsets + |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * + | l TMPR1, TAB:RB->hmask + | n TMPR1, STR:RC->hash + | lgfr TMPR1, TMPR1 + | mghi TMPR1, #NODE + | mvi TAB:RB->nomm, 0 // Clear metamethod cache. + | ag NODE:TMPR1, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cg ITYPE, NODE:TMPR1->key + | jne >5 + | // Ok, key found. Assumes: offsetof(Node, val) == 0 + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(TMPR1) + | je >4 // Previous value is nil? + |2: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |3: // Set node value. + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(TMPR1) + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | lg TAB:ITYPE, TAB:RB->metatable + | cghi TAB:ITYPE, 0 + | je <2 + | tm TAB:ITYPE->nomm, 1<<MM_newindex + | je ->vmeta_tsets // 'no __newindex' flag NOT set: check. + | j <2 + | + |5: // Follow hash chain. + | lg NODE:TMPR1, NODE:TMPR1->next + | cghi NODE:TMPR1, 0 + | jne <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je >6 // No metatable: continue. + | tm TAB:TMPR1->nomm, 1<<MM_newindex + | je ->vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | stg ITYPE, SAVE_TMP + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | la CARG3, SAVE_TMP + | lgr CARG2, TAB:RB + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Handles write barrier for the new key. TValue * returned in r2 (CRET1). + | lgr TMPR1, CRET1 + | lg L:CRET1, SAVE_L + | lg BASE, L:CRET1->base + | llgc RA, PC_RA + | j <2 // Must check write barrier for value. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, ITYPE + | j <3 + break; + case BC_TSETB: + | ins_ABC // RA = src, RB = table, RC = byte literal + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | checktab TAB:RB, ->vmeta_tsetb + | cl RC, TAB:RB->asize + | jhe ->vmeta_tsetb + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | lghi TMPR0, LJ_TNIL + | cg TMPR0, 0(RC) + | je >3 // Previous value is nil? + |1: + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: // Set array slot. + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(RC) + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | lg TAB:TMPR1, TAB:RB->metatable + | cghi TAB:TMPR1, 0 + | je <1 + | tm TAB:TMPR1->nomm, 1<<MM_newindex + | je ->vmeta_tsetb // 'no __newindex' flag NOT set: check. + | j <1 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + case BC_TSETR: + | ins_ABC // RA = src, RB = table, RC = key + | sllg RB, RB, 3 + | lg TAB:RB, 0(RB, BASE) + | cleartp TAB:RB + | sllg RC, RC, 3 + | lg RC, 0(RC, BASE) + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: + | cl RC, TAB:RB->asize + | jhe ->vmeta_tsetr + | llgfr RC, RC + | sllg RC, RC, 3 + | ag RC, TAB:RB->array + | // Set array slot. + |->BC_TSETR_Z: + | sllg RA, RA, 3 + | lg ITYPE, 0(RA, BASE) + | stg ITYPE, 0(RC) + | ins_next + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR1 + | j <2 + break; + + case BC_TSETM: + | ins_AD // RA = base (table at base-1), RD = num const (start index) + |1: + | sllg RA, RA, 3 + | sllg TMPR1, RD, 3 + | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word. + | la RA, 0(RA, BASE) + | lg TAB:RB, -8(RA) // Guaranteed to be a table. + | cleartp TAB:RB + | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jne >7 + |2: + | llgf RD, SAVE_MULTRES + | aghi RD, -1 + | je >4 // Nothing to copy? + | agr RD, TMPR1 // Compute needed size. + | clgf RD, TAB:RB->asize + | jh >5 // Doesn't fit into array part? + | sgr RD, TMPR1 + | sllg TMPR1, TMPR1, 3 + | ag TMPR1, TAB:RB->array + |3: // Copy result slots to table. + | lg RB, 0(RA) + | la RA, 8(RA) + | stg RB, 0(TMPR1) + | la TMPR1, 8(TMPR1) + | brctg RD, <3 + |4: + | ins_next + | + |5: // Need to resize array part. + | lg L:CARG1, SAVE_L + | stg BASE, L:CARG1->base + | lgr CARG2, TAB:RB + | lgfr CARG3, RD + | lgr L:RB, L:CARG1 + | stg PC, SAVE_PC + | brasl r14, extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | lg BASE, L:RB->base + | llgc RA, PC_RA // Restore RA. + | llgh RD, PC_RD // Restore RD. + | j <1 // Retry. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:RB, RD + | j <2 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALL: case BC_CALLM: + | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + | sllg RA, RA, 3 + | lgr RD, RC + if (op == BC_CALLM) { + | agf NARGS:RD, SAVE_MULTRES + } + | lg LFUNC:RB, 0(RA, BASE) + | checkfunc LFUNC:RB, ->vmeta_call_ra + | la BASE, 16(RA, BASE) + | ins_call + break; + + case BC_CALLMT: + | ins_AD // RA = base, RD = extra_nargs + | a NARGS:RD, SAVE_MULTRES + | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. + break; + case BC_CALLT: + | ins_AD // RA = base, RD = nargs+1 + | sllg RA, RA, 3 + | la RA, 16(RA, BASE) + | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint. + | lg LFUNC:RB, -16(RA) + | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call + |->BC_CALLT_Z: + | lg PC, -8(BASE) + | tmll PC, FRAME_TYPE + | jne >7 + |1: + | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below. + | st NARGS:RD, SAVE_MULTRES + | aghi NARGS:RD, -1 + | je >3 + |2: // Move args down. + | lg RB, 0(RA) + | la RA, 8(RA) + | stg RB, 0(KBASE) + | la KBASE, 8(KBASE) + | brctg NARGS:RD, <2 + | + | lg LFUNC:RB, -16(BASE) + |3: + | cleartp LFUNC:RB + | llgf NARGS:RD, SAVE_MULTRES + | llgc TMPR1, LFUNC:RB->ffid + | cghi TMPR1, 1 // (> FF_C) Calling a fast function? + | jh >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function. + | tmll PC, FRAME_TYPE // Lua frame below? + | jne <4 + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE. + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, (PC2PROTO(k))(KBASE) + | j <4 + | + |7: // Tailcall from a vararg function. + | aghi PC, -FRAME_VARG + | tmll PC, FRAME_TYPEP + | jne >8 // Vararg frame below? + | sgr BASE, PC // Need to relocate BASE/KBASE down. + | lgr KBASE, BASE + | lg PC, -8(BASE) + | j <1 + |8: + | aghi PC, FRAME_VARG + | j <1 + break; + + case BC_ITERC: + | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) + | sllg RA, RA, 3 + | la RA, 16(RA, BASE) // fb = base+2 + | lg RB, -32(RA) // Copy state. fb[0] = fb[-4]. + | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3]. + | stg RB, 0(RA) + | stg RC, 8(RA) + | lg LFUNC:RB, -40(RA) // Copy callable. fb[-2] = fb[-5] + | stg LFUNC:RB, -16(RA) + | lghi NARGS:RD, 2+1 // Handle like a regular 2-arg call. + | checkfunc LFUNC:RB, ->vmeta_call + | lgr BASE, RA + | ins_call + break; + + case BC_ITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + |.if JIT + | // NYI: add hotloop, record BC_ITERN. + |.endif + | sllg RA, RA, 3 + | lg TAB:RB, -16(RA, BASE) + | cleartp TAB:RB + | llgf RC, -4(RA, BASE) // Get index from control var. + | llgf TMPR1, TAB:RB->asize + | la PC, 4(PC) + | lg ITYPE, TAB:RB->array + |1: // Traverse array part. + | clr RC, TMPR1; jhe >5 // Index points after array part? + | sllg RD, RC, 3 // Warning: won't work if RD==RC! + | lg TMPR0, 0(RD, ITYPE) + | cghi TMPR0, LJ_TNIL; je >4 + | // Copy array slot to returned value. + | lgr RB, TMPR0 + | stg RB, 8(RA, BASE) + | // Return array index as a numeric key. + | setint ITYPE, RC + | stg ITYPE, 0(RA, BASE) + | ahi RC, 1 + | sty RC, -4(RA, BASE) // Update control var. + |2: + | llgh RD, PC_RD // Get target from ITERL. + | branchPC RD + |3: + | ins_next + | + |4: // Skip holes in array part. + | ahi RC, 1 + | j <1 + | + |5: // Traverse hash part. + | sr RC, TMPR1 + |6: + | cl RC, TAB:RB->hmask; jh <3 // End of iteration? Branch to ITERL+1. + | llgfr ITYPE, RC + | mghi ITYPE, #NODE + | ag NODE:ITYPE, TAB:RB->node + | lghi TMPR0, LJ_TNIL + | cg TMPR0, NODE:ITYPE->val; je >7 + | ar TMPR1, RC + | ahi TMPR1, 1 + | // Copy key and value from hash slot. + | lg RB, NODE:ITYPE->key + | lg RC, NODE:ITYPE->val + | stg RB, 0(RA, BASE) + | stg RC, 8(RA, BASE) + | sty TMPR1, -4(RA, BASE) + | j <2 + | + |7: // Skip holes in hash part. + | ahi RC, 1 + | j <6 + break; + + case BC_ISNEXT: + | ins_AD // RA = base, RD = target (points to ITERN) + | sllg RA, RA, 3 + | lg CFUNC:RB, -24(RA, BASE) + | checkfunc CFUNC:RB, >5 + | lg TMPR1, -16(RA, BASE) + | checktptp TMPR1, LJ_TTAB, >5 + | lghi TMPR0, LJ_TNIL + | cg TMPR0, -8(RA, BASE); jne >5 + | llgc TMPR1, CFUNC:RB->ffid + | clfi TMPR1, (uint8_t)FF_next_N; jne >5 + | branchPC RD + | llihl TMPR1, 0x7fff + | iihh TMPR1, 0xfffe + | stg TMPR1, -8(RA, BASE) // Initialize control var. + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | lghi TMPR0, BC_JMP + | stcy TMPR0, PC_OP + | branchPC RD + | mvi 3(PC), BC_ITERC + | j <1 + break; + + case BC_VARG: + | ins_ABC // RA = base, RB = nresults+1, RC = numparams + | sllg RA, RA, 3 + | sllg RB, RB, 3 + | sllg RC, RC, 3 + | la TMPR1, (16+FRAME_VARG)(RC, BASE) + | la RA, 0(RA, BASE) + | sg TMPR1, -8(BASE) + | // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams. + | cghi RB, 0 + | je >5 // Copy all varargs? + | lay RB, -8(RA, RB) + | clgr TMPR1, BASE // No vararg slots? + | lghi TMPR0, LJ_TNIL + | jnl >2 + |1: // Copy vararg slots to destination slots. + | lg RC, -16(TMPR1) + | la TMPR1, 8(TMPR1) + | stg RC, 0(RA) + | la RA, 8(RA) + | clgr RA, RB // All destination slots filled? + | jnl >3 + | clgr TMPR1, BASE // No more vararg slots? + | jl <1 + |2: // Fill up remainder with nil. + | stg TMPR0, 0(RA) + | la RA, 8(RA) + | clgr RA, RB + | jl <2 + |3: + | ins_next + | + |5: // Copy all varargs. + | lghi TMPR0, 1 + | st TMPR0, SAVE_MULTRES // MULTRES = 0+1 + | lgr RC, BASE + | slgr RC, TMPR1 + | jno <3 // No vararg slots? (borrow or zero) + | llgfr RB, RC + | srlg RB, RB, 3 + | ahi RB, 1 + | st RB, SAVE_MULTRES // MULTRES = #varargs+1 + | lg L:RB, SAVE_L + | agr RC, RA + | clg RC, L:RB->maxstack + | jh >7 // Need to grow stack? + |6: // Copy all vararg slots. + | lg RC, -16(TMPR1) + | la TMPR1, 8(TMPR1) + | stg RC, 0(RA) + | la RA, 8(RA) + | clgr TMPR1, BASE // No more vararg slots? + | jl <6 + | j <3 + | + |7: // Grow stack for varargs. + | stg BASE, L:RB->base + | stg RA, L:RB->top + | stg PC, SAVE_PC + | sgr TMPR1, BASE // Need delta, because BASE may change. + | st TMPR1, SAVE_TMP_HI + | llgf CARG2, SAVE_MULTRES + | aghi CARG2, -1 + | lgr CARG1, L:RB + | brasl r14, extern lj_state_growstack // (lua_State *L, int n) + | lg BASE, L:RB->base + | lgf TMPR1, SAVE_TMP_HI + | lg RA, L:RB->top + | agr TMPR1, BASE + | j <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | ins_AD // RA = results, RD = extra_nresults + | agf RD, SAVE_MULTRES // MULTRES >=1, so RD >=1. + | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. + break; + + case BC_RET: case BC_RET0: case BC_RET1: + | ins_AD // RA = results, RD = nresults+1 + if (op != BC_RET0) { + | sllg RA, RA, 3 + } + |1: + | lg PC, -8(BASE) + | st RD, SAVE_MULTRES // Save nresults+1. + | tmll PC, FRAME_TYPE // Check frame type marker. + | jne >7 // Not returning to a fixarg Lua func? + switch (op) { + case BC_RET: + |->BC_RET_Z: + | lgr KBASE, BASE // Use KBASE for result move. + | aghi RD, -1 + | je >3 + |2: // Move results down. + | lg RB, 0(KBASE, RA) + | stg RB, -16(KBASE) + | la KBASE, 8(KBASE) + | brctg RD, <2 + |3: + | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256. + | llgc RB, PC_RB + |5: + | cgr RB, RD // More results expected? + | jh >6 + break; + case BC_RET1: + | lg RB, 0(BASE, RA) + | stg RB, -16(BASE) + /* fallthrough */ + case BC_RET0: + |5: + | llgc TMPR1, PC_RB + | cgr TMPR1, RD + | jh >6 + default: + break; + } + | llgc RA, PC_RA + | lcgr RA, RA + | sllg RA, RA, 3 + | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8 + | lg LFUNC:KBASE, -16(BASE) + | cleartp LFUNC:KBASE + | lg KBASE, LFUNC:KBASE->pc + | lg KBASE, PC2PROTO(k)(KBASE) + | ins_next + | + |6: // Fill up results with nil. + | lghi TMPR1, LJ_TNIL + if (op == BC_RET) { + | stg TMPR1, -16(KBASE) // Note: relies on shifted base. + | la KBASE, 8(KBASE) + } else { + | sllg RC, RD, 3 // RC used as temp. + | stg TMPR1, -24(RC, BASE) + } + | la RD, 1(RD) + | j <5 + | + |7: // Non-standard return case. + | lay RB, -FRAME_VARG(PC) + | tmll RB, FRAME_TYPEP + | jne ->vm_return + | // Return from vararg function: relocate BASE down and RA up. + | sgr BASE, RB + if (op != BC_RET0) { + | agr RA, RB + } + | j <1 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, 0(RA) + |.define FOR_STOP, 8(RA) + |.define FOR_STEP, 16(RA) + |.define FOR_EXT, 24(RA) + + case BC_FORL: + |.if JIT + | hotloop RB + |.endif + | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + vk = (op == BC_IFORL || op == BC_JFORL); + | ins_AJ // RA = base, RD = target (after end of loop or start of loop) + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | lg RB, FOR_IDX + | checkint RB, >9 + | lg TMPR1, FOR_STOP + if (!vk) { + | checkint TMPR1, ->vmeta_for + | lg ITYPE, FOR_STEP + | chi ITYPE, 0; jl >5 + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | // lg TMPR1, FOR_STOP + | checkinttp TMPR1, ->assert_bad_for_arg_type + | lg TMPR0, FOR_STEP + | checkinttp TMPR0, ->assert_bad_for_arg_type +#endif + | lg ITYPE, FOR_STEP + | chi ITYPE, 0; jl >5 + | ar RB, ITYPE; jo >1 + | setint RB + | stg RB, FOR_IDX + } + | cr RB, TMPR1 + | stg RB, FOR_EXT + if (op == BC_FORI) { + | jle >7 + |1: + |6: + | branchPC RD + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jle =>BC_JLOOP + |1: + |6: + } else if (op == BC_IFORL) { + | jh >7 + |6: + | branchPC RD + |1: + } else { + | jle =>BC_JLOOP + |1: + |6: + } + |7: + | ins_next + | + |5: // Invert check for negative step. + if (!vk) { + | srag ITYPE, ITYPE, 47 + | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for + } else { + | ar RB, ITYPE; jo <1 + | setint RB + | stg RB, FOR_IDX + } + | cr RB, TMPR1 + | stg RB, FOR_EXT + if (op == BC_FORI) { + | jhe <7 + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jhe =>BC_JLOOP + } else if (op == BC_IFORL) { + | jl <7 + } else { + | jhe =>BC_JLOOP + } + | j <6 + |9: // Fallback to FP variant. + if (!vk) { + | jhe ->vmeta_for + } + if (!vk) { + | lg TMPR0, FOR_STOP + | checknumtp TMPR0, ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | lg TMPR0, FOR_STOP + | checknumtp TMPR0, ->assert_bad_for_arg_type + | lg TMPR0, FOR_STEP + | checknumtp TMPR0, ->assert_bad_for_arg_type +#endif + } + | lg RB, FOR_STEP + if (!vk) { + | checknum RB, ->vmeta_for + } + | ld f0, FOR_IDX + | ld f1, FOR_STOP + if (vk) { + | adb f0, FOR_STEP + | std f0, FOR_IDX + } + | cghi RB, 0; jl >3 + | cdbr f1, f0 + |1: + | std f0, FOR_EXT + if (op == BC_FORI) { + | jnl <7 + } else if (op == BC_JFORI) { + | branchPC RD + | llgh RD, PC_RD + | jnl =>BC_JLOOP + } else if (op == BC_IFORL) { + | jl <7 + } else { + | jnl =>BC_JLOOP + } + | j <6 + | + |3: // Invert comparison if step is negative. + | cdbr f0, f1 + | j <1 + break; + + case BC_ITERL: + |.if JIT + | hotloop RB + |.endif + | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | ins_AJ // RA = base, RD = target + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) + | lg RB, 0(RA) + | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | stg RB, -8(RA) + | j =>BC_JLOOP + } else { + | branchPC RD // Otherwise save control var + branch. + | stg RB, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | ins_A // RA = base, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop RBd + |.endif + | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. + break; + + case BC_ILOOP: + | ins_A // RA = base, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + | stg r0, 0 + | stg r0, 0 + break; + + case BC_JMP: + | ins_AJ // RA = unused, RD = target + | branchPC RD + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + /* + ** Reminder: A function may be called with func/args above L->maxstack, + ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, + ** too. This means all FUNC* ops (including fast functions) must check + ** for stack overflow _before_ adding more slots! + */ + + case BC_FUNCF: + |.if JIT + | stg r0, 0 + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | lg KBASE, (PC2PROTO(k)-4)(PC) + | lg L:RB, SAVE_L + | sllg RA, RA, 3 + | la RA, 0(RA, BASE) // Top of frame. + | clg RA, L:RB->maxstack + | jh ->vm_growstack_f + | llgc RA, (PC2PROTO(numparams)-4)(PC) + | clgr NARGS:RD, RA // Check for missing parameters. + | jle >3 + |2: + if (op == BC_JFUNCF) { + | llgh RD, PC_RD + | j =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | sllg TMPR1, NARGS:RD, 3 + | lghi TMPR0, LJ_TNIL + |4: + | stg TMPR0, -8(TMPR1, BASE) + | la TMPR1, 8(TMPR1) + | la RD, 1(RD) + | clgr RD, RA + | jle <4 + | j <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | stg r0, 0 // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | sllg TMPR1, NARGS:RD, 3 + | la RB, (FRAME_VARG+8)(TMPR1) + | la RD, 8(TMPR1, BASE) + | lg LFUNC:KBASE, -16(BASE) + | stg RB, -8(RD) // Store delta + FRAME_VARG. + | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC. + | lg L:RB, SAVE_L + | sllg RA, RA, 3 + | la RA, 0(RA, RD) + | cg RA, L:RB->maxstack + | jh ->vm_growstack_v // Need to grow stack. + | lgr RA, BASE + | lgr BASE, RD + | llgc RB, (PC2PROTO(numparams)-4)(PC) + | cghi RB, 0 + | je >2 + | aghi RA, 8 + | lghi TMPR1, LJ_TNIL + |1: // Copy fixarg slots up to new frame. + | la RA, 8(RA) + | cgr RA, BASE + | jnl >3 // Less args than parameters? + | lg KBASE, -16(RA) + | stg KBASE, 0(RD) + | la RD, 8(RD) + | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC). + | brctg RB, <1 + |2: + if (op == BC_JFUNCV) { + | llgh RD, PC_RD + | j =>BC_JLOOP + } else { + | lg KBASE, (PC2PROTO(k)-4)(PC) + | ins_next + } + | + |3: // Clear missing parameters. + | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here. + | la RD, 8(RD) + | brctg RB, <3 + | j <2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | ins_AD // BASE = new base, RD = nargs+1 + | lg CFUNC:RB, -16(BASE) + | cleartp CFUNC:RB + | lg KBASE, CFUNC:RB->f + | lg L:RB, SAVE_L + | sllg RD, NARGS:RD, 3 + | lay RD, -8(RD,BASE) + | stg BASE, L:RB->base + | la RA, (8*LUA_MINSTACK)(RD) + | clg RA, L:RB->maxstack + | stg RD, L:RB->top + | lgr CARG1, L:RB + if (op != BC_FUNCC) { + | lgr CARG2, KBASE + } + | jh ->vm_growstack_c // Need to grow stack. + | set_vmstate C + if (op == BC_FUNCC) { + | basr r14, KBASE // (lua_State *L) + } else { + | // (lua_State *L, lua_CFunction f) + | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH) + | basr r14, TMPR1 + } + | // nresults returned in r2 (CRET1). + | lgr RD, CRET1 + | lg BASE, L:RB->base + | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH) + | set_vmstate INTERP + | sllg TMPR1, RD, 3 + | la RA, 0(TMPR1, BASE) + | lcgr RA, RA + | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 + | lg PC, -8(BASE) // Fetch PC of caller. + | j ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + dasm_growpc(Dst, BC__MAX); + build_subroutines(ctx); + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.quad .Lbegin\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.quad lj_vm_ffi_call\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */ + "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe1:\n" + "\t.long .LECIE1-.LSCIE1\n" + ".LSCIE1:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.long lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, + ".LSFDE2:\n" + "\t.long .LEFDE2-.LASFDE2\n" + ".LASFDE2:\n" + "\t.long .LASFDE2-.Lframe1\n" + "\t.long .Lbegin-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160); +#if LJ_HASFFI + fprintf(ctx->fp, + ".Lframe2:\n" + "\t.long .LECIE2-.LSCIE2\n" + ".LSCIE2:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0xe\n" + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n" + "\t.align 8\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, + ".LSFDE3:\n" + "\t.long .LEFDE3-.LASFDE3\n" + ".LASFDE3:\n" + "\t.long .LASFDE3-.Lframe2\n" + "\t.long lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */ + "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */ + "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */ + "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */ + "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */ + "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */ + "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */ + "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */ + "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */ + "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */ + "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */ + "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */ + "\t.align 8\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); +#endif +#endif + break; + default: /* No other modes. */ + break; + } +}
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor