Skip to content

JIT新增汇编指令生成

Posted on:October 18, 2023 at 04:06 AM

一些数据结构

INST3(fcvtas,      "fcvtas",       0,      IF_EN3F,   0x0E21C800,  0x5E21C800,  0x1E240000)

INST3(fcvtau,      "fcvtau",       0,      IF_EN3F,   0x2E21C800,  0x7E21C800,  0x1E250000)

INST3(fcvtms,      "fcvtms",       0,      IF_EN3F,   0x0E21B800,  0x5E21B800,  0x1E300000)

INST3(fcvtmu,      "fcvtmu",       0,      IF_EN3F,   0x2E21B800,  0x7E21B800,  0x1E310000)
/*
REGDEF(name, rnum,       mask, xname, wname) */
REGDEF(R0,      0,     0x0001, "x0" , "w0"   )
REGDEF(R1,      1,     0x0002, "x1" , "w1"   )
REGDEF(R2,      2,     0x0004, "x2" , "w2"   )
REGDEF(R3,      3,     0x0008, "x3" , "w3"   )
REGDEF(R4,      4,     0x0010, "x4" , "w4"   )
REGDEF(R5,      5,     0x0020, "x5" , "w5"   )
REGDEF(R6,      6,     0x0040, "x6" , "w6"   )
REGDEF(R7,      7,     0x0080, "x7" , "w7"   )
REGDEF(R8,      8,     0x0100, "x8" , "w8"   )
REGDEF(R9,      9,     0x0200, "x9" , "w9"   )
REGDEF(R10,    10,     0x0400, "x10", "w10"  )
REGDEF(R11,    11,     0x0800, "x11", "w11"  )
    enum insFormat : unsigned
    {
#define IF_DEF(en, op1, op2) IF_##en,
#include "emitfmts.h"

        IF_COUNT
    };
//       -- the first two characters are
//
//   DI  :: Data Processing - Immediate
//   DR  :: Data Processing - Register
//   DV  :: Data Processing - Vector Register
//   LS  :: Loads and Stores
//   BI  :: Branches - Immediate
//   BR  :: Branches - Register
//   SN  :: System - No Registers or Immediates
//   SI  :: System - Immediate
//   SR  :: System - Register
//
//   _   :: a separator char '_'
//
//       -- the next two characters are
//
//   #   :: number of registers in the encoding
//   ?   :: A unique letter A,B,C,...
//       -- optional third character
//   I   :: by element immediate
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

IF_DEF(BI_0A, IS_NONE, JMP)  // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00   b
IF_DEF(BI_0B, IS_NONE, JMP)  // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00   b<cond>
IF_DEF(BI_0C, IS_NONE, CALL) // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00   bl
IF_DEF(BI_1A, IS_NONE, JMP)  // BI_1A   X.......iiiiiiii iiiiiiiiiiittttt      Rt       simm19:00   cbz cbnz
IF_DEF(BI_1B, IS_NONE, JMP)  // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6  simm14:00   tbz tbnz
IF_DEF(BR_1A, IS_NONE, CALL) // BR_1A   ................ ......nnnnn.....         Rn                ret
IF_DEF(BR_1B, IS_NONE, CALL) // BR_1B   ................ ......nnnnn.....         Rn                br blr

两类新增指令

CAST EXAMPLE

using System;
using System.Runtime.InteropServices;

namespace ConsoleApplication
{
    public class Program
    {
        public static void Main(string[] args)
        {
            double a=4.5;
            int b=(int)a;

        }
    }
}
 .method private hidebysig static void  Main(string[] args) cil managed
  {
    .entrypoint
    // Code size       15 (0xf)
    .maxstack  1
    .locals init (float64 V_0,
             int32 V_1)
    IL_0000:  nop
    IL_0001:  ldc.r8     10.5
    IL_000a:  stloc.0
    IL_000b:  ldloc.0
    IL_000c:  conv.i4
    IL_000d:  stloc.1
    IL_000e:  ret
  } // end of method Program::Main

IMPORT

IL代码转换成HIR,不涉及新增指令的代码

MORPH

该阶段主要对HIR的每个节点进行变形 jit/morph.cpp
Compiler::fgMorphBlocks
   枚举 BasicBlock
    调用fgMorphStmts()根据oper做出postorder morphing
     判断节点类型如果节点为GTK_SMPOP类型(一些简单的算术运算节点)if(kind & GT_SMPOP)调用fgMorphSmpOp
       case GT_CAST 调用fgMorphCast函数,将GT_CAST转换为Call helper(调用的是c++函数实现)代码见下
       case GT_DIV 转换为Call helper(调用的是c++函数实现)但是有的架构包含这种指令则不需要变形

            /* Note that if we need to use a helper call then we can not morph oper */
            if (!tree->gtOverflow())
            {
#ifdef TARGET_ARM64 // On ARM64 All non-overflow checking conversions can be optimized
                goto OPTIMIZECAST;
#else
                switch (dstType)//有限状态机
                {
                    case TYP_INT:
                        goto OPTIMIZECAST;

                    case TYP_UINT:
#if defined(TARGET_ARM) || defined(TARGET_AMD64)
                        goto OPTIMIZECAST;
#else  // TARGET_X86
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);//调用jithelper函数
#endif // TARGET_X86

                    case TYP_LONG:
#ifdef TARGET_AMD64
                        // SSE2 has instructions to convert a float/double directly to a long
                        goto OPTIMIZECAST;
#else  // !TARGET_AMD64
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
#endif // !TARGET_AMD64

                    case TYP_ULONG:
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
                    default:
                        break;
                }
#endif // TARGET_ARM64
}

会把一些cpu无法用指令表示的节点转换成GT_CALL helper节点,直接调用jithelper函数

中间步骤

主要跟新增指令无关

PHASE_LOWERING

这个阶段会做主要的Lowering(使LIR更接近机器代码)工作

///////////////////////////////////////////////////////////////////////////////
// Dominator and reachability sets are no longer valid. They haven't been
// maintained up to here, and shouldn't be used (unless recomputed).
///////////////////////////////////////////////////////////////////////////////
fgDomsComputed = false;

/* Create LSRA before Lowering, this way Lowering can initialize the TreeNode Map */
m_pLinearScan = getLinearScanAllocator(this);

/* Lower */
Lowering lower(this, m_pLinearScan); // PHASE_LOWERING
lower.DoPhase();

Lsra部分

寄存器分配

Codegen部分

计算帧布局

frame layout 指的是函数运行时需要在栈空间上分配多大的空间 jit/lclvars.cpp
  调用Compiler::lvaAssignFrameOffsets
  会分两步计算
  第一步设置一个虚拟的初始偏移值0, 然后以这个0为基准设置各个变量的偏移值, 参数为正数本地变量为负数
  第二步根据是否使用frame pointer调整各个偏移值
   调用 lvaAssignVirtualFrameOffsetsToArgs()
    本地变量 argOffs = 0, 这个变量记录当前参数的偏移值
   调用 lvaAssignVirtualFrameOffsetsToLocals()
    计算栈上的本地变量, 包括临时变量距离virtual 0的偏移值, 这里算出的偏移值都会是负数

根据LIR节点生成汇编指令

jit/codegencommon.cpp
  调用CodeGen::genGenerateMachineCode
  调用CodeGen::genCodeForBBlist jit/codegenarm64.cpp
  调用CodeGen::genCodeForTreeNode
  这里分析arm64的版本
  判断节点类型 treeNode->gtOper
  case GT_CAST
   如果是float=>float, 调用 genFloatToFloatCast(treeNode)
    添加转换的指令, 例如 float => double 是 INS_cvtss2sd
   如果是float=>int, 调用 genFloatToIntCast(treeNode)
    添加转换的指令, 例如 float => int 是 INS_cvttss2si
   如果是int=>float, 调用 genIntToFloatCast(treeNode)
    添加转换的指令, 例如 int => float 是 INS_cvtsi2ss
   如果是int=>int, 调用 genIntToIntCast(treeNode)
    添加转换的指令, 可以直接使用 mov 指令

void CodeGen::genFloatToIntCast(GenTree* treeNode)
{
    // we don't expect to see overflow detecting float/double --> int type conversions here
    // as they should have been converted into helper calls by front-end.
    assert(treeNode->OperGet() == GT_CAST);
    assert(!treeNode->gtOverflow());

    regNumber targetReg = treeNode->GetRegNum();
    assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.

    GenTree* op1 = treeNode->AsOp()->gtOp1;
    assert(!op1->isContained());                  // Cannot be contained
    assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg.

    var_types dstType = treeNode->CastToType();
    var_types srcType = op1->TypeGet();
    assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));

    // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
    // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
    // we expect the front-end or lowering phase to have generated two levels of cast.
    //
    emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
    noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));

    instruction ins       = INS_fcvtzs;    // default to sign converts
    insOpts     cvtOption = INS_OPTS_NONE; // invalid value

    if (varTypeIsUnsigned(dstType))
    {
        ins = INS_fcvtzu; // use unsigned converts转换成无符号的长字
    }

    if (srcType == TYP_DOUBLE)
    {
        if (dstSize == EA_4BYTE)
        {
            cvtOption = INS_OPTS_D_TO_4BYTE;
        }
        else
        {
            assert(dstSize == EA_8BYTE);
            cvtOption = INS_OPTS_D_TO_8BYTE;
        }
    }
    else
    {
        assert(srcType == TYP_FLOAT);
        if (dstSize == EA_4BYTE)
        {
            cvtOption = INS_OPTS_S_TO_4BYTE;
        }
        else
        {
            assert(dstSize == EA_8BYTE);
            cvtOption = INS_OPTS_S_TO_8BYTE;
        }
    }

    genConsumeOperands(treeNode->AsOp());

    GetEmitter()->emitIns_R_R(ins, dstSize, treeNode->GetRegNum(), op1->GetRegNum(), cvtOption);//生成对应的指令

    genProduceReg(treeNode);
}


  调用emitIns_R_R寄存器到寄存器
  调用CodeGen::genCodeForTreeNode

IR树和汇编的对应关系

some

机器代码(Emiiter)生成

jit/codegencommon.cpp
  调用CodeGen::genGenerateMachineCode jit/emit.cpp
  调用emitter::emitEndCodeGen jit/emitaarm64.cpp
  调用emitter::emitIssue1Instr
  调用emitter::emitOutputInstr用来转换单条指令
  根据生成指令的格式生成机器码,新增指令这里基本不需要改动除非有SIMD

// DI :: Data Processing - Immediate
// DR :: Data Processing - Register
// DV :: Data Processing - Vector Register
// LS :: Loads and Stores
// BI :: Branches - Immediate
// BR :: Branches - Register
// SN :: System - No Registers or Immediates

size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)

    BYTE*       dst  = *dp;
    BYTE*       odst = dst;
    code_t      code = 0;
    size_t      sz   = emitGetInstrDescSize(id); // TODO-ARM64-Cleanup: on ARM, this is set in each case. why?
    instruction ins  = id->idIns();
    insFormat   fmt  = id->idInsFmt();
    emitAttr    size = id->idOpSize();

#ifdef DEBUG
#if DUMP_GC_TABLES
    bool dspOffs = emitComp->opts.dspGCtbls;
#else
    bool dspOffs = !emitComp->opts.disDiffable;
#endif
#endif // DEBUG

    assert(REG_NA == (int)REG_NA);

    /* What instruction format have we got? */

    switch (fmt)

        ssize_t  imm;
        ssize_t  index;
        ssize_t  index2;
        unsigned cmode;
        unsigned immShift;
        emitAttr elemsize;
        emitAttr datasize;

        case IF_BI_0A: // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
        case IF_BI_0B: // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00
        case IF_LARGEJMP:
            assert(id->idGCref() == GCT_NONE);
            assert(id->idIsBound());
            dst = emitOutputLJ(ig, dst, id);
            sz  = sizeof(instrDescJmp);
            break;

        case IF_BI_0C: // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
            code = emitInsCode(ins, fmt);
            sz   = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
            dst += emitOutputCall(ig, dst, id, code);
            // Always call RecordRelocation so that we wire in a JumpStub when we don't reach
            emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_BRANCH26);
            break;

        case IF_BI_1A: // BI_1A   ......iiiiiiiiii iiiiiiiiiiittttt      Rt       simm19:00
            assert(insOptsNone(id->idInsOpt()));
            assert(id->idIsBound());

            dst = emitOutputLJ(ig, dst, id);
            sz  = sizeof(instrDescJmp);
            break;
IN0004: 000025 nop
IN0005: 000026 mov      rax, 0x4025000000000000
IN0006: 000030 mov      qword ptr [V01 rbp-0x10], rax
IN0007: 000034 vcvttsd2si eax, qword ptr [V01 rbp-0x10]
IN0008: 000039 mov      dword ptr [V02 rbp-0x14], eax
IN0009: 00003C nop