一些数据结构
Basic Block
:struct类型,包含一批IL指令,结尾是跳转或return
定义在jit/block.hSTMT
:一个完整的表达式,BB由一个或多个STMT组成
定义在jit/gentree.hinstruction
:enum类型,包含相关cpu的指令
ARM指令定义在jit/instrarm64.h
INST3(fcvtas, "fcvtas", 0, IF_EN3F, 0x0E21C800, 0x5E21C800, 0x1E240000)
INST3(fcvtau, "fcvtau", 0, IF_EN3F, 0x2E21C800, 0x7E21C800, 0x1E250000)
INST3(fcvtms, "fcvtms", 0, IF_EN3F, 0x0E21B800, 0x5E21B800, 0x1E300000)
INST3(fcvtmu, "fcvtmu", 0, IF_EN3F, 0x2E21B800, 0x7E21B800, 0x1E310000)
regNumber
:enum类型,包含相关cpu的寄存器的名称及别名
ARM寄存器定义在jit/registerarm64.hInsGroup
可以看作是保存汇编的BasicBlock
/*
REGDEF(name, rnum, mask, xname, wname) */
REGDEF(R0, 0, 0x0001, "x0" , "w0" )
REGDEF(R1, 1, 0x0002, "x1" , "w1" )
REGDEF(R2, 2, 0x0004, "x2" , "w2" )
REGDEF(R3, 3, 0x0008, "x3" , "w3" )
REGDEF(R4, 4, 0x0010, "x4" , "w4" )
REGDEF(R5, 5, 0x0020, "x5" , "w5" )
REGDEF(R6, 6, 0x0040, "x6" , "w6" )
REGDEF(R7, 7, 0x0080, "x7" , "w7" )
REGDEF(R8, 8, 0x0100, "x8" , "w8" )
REGDEF(R9, 9, 0x0200, "x9" , "w9" )
REGDEF(R10, 10, 0x0400, "x10", "w10" )
REGDEF(R11, 11, 0x0800, "x11", "w11" )
insFormat
:enum类型,用来区分生成的指令的格式
enum insFormat : unsigned
{
#define IF_DEF(en, op1, op2) IF_##en,
#include "emitfmts.h"
IF_COUNT
};
// -- the first two characters are
//
// DI :: Data Processing - Immediate
// DR :: Data Processing - Register
// DV :: Data Processing - Vector Register
// LS :: Loads and Stores
// BI :: Branches - Immediate
// BR :: Branches - Register
// SN :: System - No Registers or Immediates
// SI :: System - Immediate
// SR :: System - Register
//
// _ :: a separator char '_'
//
// -- the next two characters are
//
// # :: number of registers in the encoding
// ? :: A unique letter A,B,C,...
// -- optional third character
// I :: by element immediate
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
IF_DEF(BI_0A, IS_NONE, JMP) // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 b
IF_DEF(BI_0B, IS_NONE, JMP) // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00 b<cond>
IF_DEF(BI_0C, IS_NONE, CALL) // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 bl
IF_DEF(BI_1A, IS_NONE, JMP) // BI_1A X.......iiiiiiii iiiiiiiiiiittttt Rt simm19:00 cbz cbnz
IF_DEF(BI_1B, IS_NONE, JMP) // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6 simm14:00 tbz tbnz
IF_DEF(BR_1A, IS_NONE, CALL) // BR_1A ................ ......nnnnn..... Rn ret
IF_DEF(BR_1B, IS_NONE, CALL) // BR_1B ................ ......nnnnn..... Rn br blr
GTNODE
:enum类型,表示IR树的节点,定义在jit/gtlist.h#define GTNODE(en,st,cm,ok) GT_##en,表示IR的节点
CodeGen
:负责JIT后端(代码生成)的类Emmiter
:负责写入汇编代码的类Gentree
:语法节点, 根据IL指令构建
成员包括gtOper 运算符, 有 GT_NOP GT_ADDR 等
gtType 评价后的类型, 有 TYP_VOID TYP_INT 等
gtLsraInfo 使用LSRA分配寄存器时使用的信息
gtNext IR中下一个tree
gtPrev IR中上一个tree
gtTreeID tree的id, 在函数中唯一, 仅用于debug
gtSeqNum LIR中的tree的序列顺序, 仅用于debug
定义在jit/gentree.h
两类新增指令
- 之前没有该指令,调用的jithelper函数
GT_DIV/GT_MOD
- 之前有对应的指令,但是更新了
GT_CAST
CAST EXAMPLE
using System;
using System.Runtime.InteropServices;
namespace ConsoleApplication
{
public class Program
{
public static void Main(string[] args)
{
double a=4.5;
int b=(int)a;
}
}
}
.method private hidebysig static void Main(string[] args) cil managed
{
.entrypoint
// Code size 15 (0xf)
.maxstack 1
.locals init (float64 V_0,
int32 V_1)
IL_0000: nop
IL_0001: ldc.r8 10.5
IL_000a: stloc.0
IL_000b: ldloc.0
IL_000c: conv.i4
IL_000d: stloc.1
IL_000e: ret
} // end of method Program::Main
IMPORT
IL代码转换成HIR,不涉及新增指令的代码
MORPH
该阶段主要对HIR的每个节点进行变形
jit/morph.cpp
Compiler::fgMorphBlocks
枚举 BasicBlock
调用fgMorphStmts()根据oper做出postorder morphing
判断节点类型如果节点为GTK_SMPOP类型(一些简单的算术运算节点)if(kind & GT_SMPOP)
调用fgMorphSmpOp
case GT_CAST 调用fgMorphCast函数,将GT_CAST转换为Call helper(调用的是c++函数实现)代码见下
case GT_DIV 转换为Call helper(调用的是c++函数实现)但是有的架构包含这种指令则不需要变形
/* Note that if we need to use a helper call then we can not morph oper */
if (!tree->gtOverflow())
{
#ifdef TARGET_ARM64 // On ARM64 All non-overflow checking conversions can be optimized
goto OPTIMIZECAST;
#else
switch (dstType)//有限状态机
{
case TYP_INT:
goto OPTIMIZECAST;
case TYP_UINT:
#if defined(TARGET_ARM) || defined(TARGET_AMD64)
goto OPTIMIZECAST;
#else // TARGET_X86
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);//调用jithelper函数
#endif // TARGET_X86
case TYP_LONG:
#ifdef TARGET_AMD64
// SSE2 has instructions to convert a float/double directly to a long
goto OPTIMIZECAST;
#else // !TARGET_AMD64
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
#endif // !TARGET_AMD64
case TYP_ULONG:
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
default:
break;
}
#endif // TARGET_ARM64
}
会把一些cpu无法用指令表示的节点转换成GT_CALL helper节点,直接调用jithelper函数
中间步骤
主要跟新增指令无关
PHASE_LOWERING
这个阶段会做主要的Lowering(使LIR更接近机器代码)工作
///////////////////////////////////////////////////////////////////////////////
// Dominator and reachability sets are no longer valid. They haven't been
// maintained up to here, and shouldn't be used (unless recomputed).
///////////////////////////////////////////////////////////////////////////////
fgDomsComputed = false;
/* Create LSRA before Lowering, this way Lowering can initialize the TreeNode Map */
m_pLinearScan = getLinearScanAllocator(this);
/* Lower */
Lowering lower(this, m_pLinearScan); // PHASE_LOWERING
lower.DoPhase();
Lsra部分
寄存器分配
Codegen部分
计算帧布局
frame layout 指的是函数运行时需要在栈空间上分配多大的空间
jit/lclvars.cpp
调用Compiler::lvaAssignFrameOffsets
会分两步计算
第一步设置一个虚拟的初始偏移值0, 然后以这个0为基准设置各个变量的偏移值, 参数为正数本地变量为负数
第二步根据是否使用frame pointer调整各个偏移值
调用 lvaAssignVirtualFrameOffsetsToArgs()
本地变量 argOffs = 0, 这个变量记录当前参数的偏移值
调用 lvaAssignVirtualFrameOffsetsToLocals()
计算栈上的本地变量, 包括临时变量距离virtual 0的偏移值, 这里算出的偏移值都会是负数
根据LIR节点生成汇编指令
jit/codegencommon.cpp
调用CodeGen::genGenerateMachineCode
调用CodeGen::genCodeForBBlist
jit/codegenarm64.cpp
调用CodeGen::genCodeForTreeNode
这里分析arm64的版本
判断节点类型 treeNode->gtOper
case GT_CAST
如果是float=>float, 调用 genFloatToFloatCast(treeNode)
添加转换的指令, 例如 float => double 是 INS_cvtss2sd
如果是float=>int, 调用 genFloatToIntCast(treeNode)
添加转换的指令, 例如 float => int 是 INS_cvttss2si
如果是int=>float, 调用 genIntToFloatCast(treeNode)
添加转换的指令, 例如 int => float 是 INS_cvtsi2ss
如果是int=>int, 调用 genIntToIntCast(treeNode)
添加转换的指令, 可以直接使用 mov 指令
void CodeGen::genFloatToIntCast(GenTree* treeNode)
{
// we don't expect to see overflow detecting float/double --> int type conversions here
// as they should have been converted into helper calls by front-end.
assert(treeNode->OperGet() == GT_CAST);
assert(!treeNode->gtOverflow());
regNumber targetReg = treeNode->GetRegNum();
assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
GenTree* op1 = treeNode->AsOp()->gtOp1;
assert(!op1->isContained()); // Cannot be contained
assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg.
var_types dstType = treeNode->CastToType();
var_types srcType = op1->TypeGet();
assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
// We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
// For conversions to small types (byte/sbyte/int16/uint16) from float/double,
// we expect the front-end or lowering phase to have generated two levels of cast.
//
emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
instruction ins = INS_fcvtzs; // default to sign converts
insOpts cvtOption = INS_OPTS_NONE; // invalid value
if (varTypeIsUnsigned(dstType))
{
ins = INS_fcvtzu; // use unsigned converts转换成无符号的长字
}
if (srcType == TYP_DOUBLE)
{
if (dstSize == EA_4BYTE)
{
cvtOption = INS_OPTS_D_TO_4BYTE;
}
else
{
assert(dstSize == EA_8BYTE);
cvtOption = INS_OPTS_D_TO_8BYTE;
}
}
else
{
assert(srcType == TYP_FLOAT);
if (dstSize == EA_4BYTE)
{
cvtOption = INS_OPTS_S_TO_4BYTE;
}
else
{
assert(dstSize == EA_8BYTE);
cvtOption = INS_OPTS_S_TO_8BYTE;
}
}
genConsumeOperands(treeNode->AsOp());
GetEmitter()->emitIns_R_R(ins, dstSize, treeNode->GetRegNum(), op1->GetRegNum(), cvtOption);//生成对应的指令
genProduceReg(treeNode);
}
调用emitIns_R_R寄存器到寄存器
调用CodeGen::genCodeForTreeNode
IR树和汇编的对应关系
机器代码(Emiiter)生成
jit/codegencommon.cpp
调用CodeGen::genGenerateMachineCode
jit/emit.cpp
调用emitter::emitEndCodeGen
jit/emitaarm64.cpp
调用emitter::emitIssue1Instr
调用emitter::emitOutputInstr用来转换单条指令
根据生成指令的格式生成机器码,新增指令这里基本不需要改动除非有SIMD
// DI :: Data Processing - Immediate
// DR :: Data Processing - Register
// DV :: Data Processing - Vector Register
// LS :: Loads and Stores
// BI :: Branches - Immediate
// BR :: Branches - Register
// SN :: System - No Registers or Immediates
size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
BYTE* dst = *dp;
BYTE* odst = dst;
code_t code = 0;
size_t sz = emitGetInstrDescSize(id); // TODO-ARM64-Cleanup: on ARM, this is set in each case. why?
instruction ins = id->idIns();
insFormat fmt = id->idInsFmt();
emitAttr size = id->idOpSize();
#ifdef DEBUG
#if DUMP_GC_TABLES
bool dspOffs = emitComp->opts.dspGCtbls;
#else
bool dspOffs = !emitComp->opts.disDiffable;
#endif
#endif // DEBUG
assert(REG_NA == (int)REG_NA);
/* What instruction format have we got? */
switch (fmt)
ssize_t imm;
ssize_t index;
ssize_t index2;
unsigned cmode;
unsigned immShift;
emitAttr elemsize;
emitAttr datasize;
case IF_BI_0A: // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00
case IF_BI_0B: // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00
case IF_LARGEJMP:
assert(id->idGCref() == GCT_NONE);
assert(id->idIsBound());
dst = emitOutputLJ(ig, dst, id);
sz = sizeof(instrDescJmp);
break;
case IF_BI_0C: // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00
code = emitInsCode(ins, fmt);
sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
dst += emitOutputCall(ig, dst, id, code);
// Always call RecordRelocation so that we wire in a JumpStub when we don't reach
emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_BRANCH26);
break;
case IF_BI_1A: // BI_1A ......iiiiiiiiii iiiiiiiiiiittttt Rt simm19:00
assert(insOptsNone(id->idInsOpt()));
assert(id->idIsBound());
dst = emitOutputLJ(ig, dst, id);
sz = sizeof(instrDescJmp);
break;
IN0004: 000025 nop
IN0005: 000026 mov rax, 0x4025000000000000
IN0006: 000030 mov qword ptr [V01 rbp-0x10], rax
IN0007: 000034 vcvttsd2si eax, qword ptr [V01 rbp-0x10]
IN0008: 000039 mov dword ptr [V02 rbp-0x14], eax
IN0009: 00003C nop