Skip to content

Zeroing in JIT

Posted on:November 27, 2023 at 10:06 AM

zero in jit

struct example

using System;

unsafe struct S1
{
    public fixed byte a[10];
    public int b;
    public fixed byte c[23];
    public fixed byte d[24];
    public fixed byte e[25];
} //88个字节

unsafe struct S2
{
    public fixed short a[10];
    public int b;
    public fixed short c[23];
    public fixed short d[24];
    public fixed short e[25];
} //168个字节

class C
{
    public S1 X1()
    {
        S1 s = default;
        return s;
    }

    public S2 X2()
    {
        S2 s = default;
        return s;
    }
}

class Program
{
    static void Main()
    {
        C c = new C();
        S1 result1 = c.X1();
//        Console.WriteLine($"Size of S1: {sizeof(S1)} bytes");
        S2 result2 = c.X2();
//        Console.WriteLine($"Size of S2: {sizeof(S2)} bytes");
    }
}

COMPlus_JitHWIntrinsic=0: 未开启SIMD清0

X1的assembly
       xor      edx, edx
       mov      qword ptr [rbp-0x60], rdx
       mov      qword ptr [rbp-0x58], rdx
       mov      qword ptr [rbp-0x50], rdx
       mov      qword ptr [rbp-0x48], rdx
       mov      qword ptr [rbp-0x40], rdx
       mov      qword ptr [rbp-0x38], rdx
       mov      qword ptr [rbp-0x30], rdx
       mov      qword ptr [rbp-0x28], rdx
       mov      qword ptr [rbp-0x20], rdx
       mov      qword ptr [rbp-0x18], rdx
       mov      qword ptr [rbp-0x10], rdx
X2的assembly
       xor      esi, esi
       lea      rdi, bword ptr [rbp-0xB0]
       mov      edx, 168
       call     CORINFO_HELP_MEMSET  //调用系统库的memset
       mov      rdi, bword ptr [rbp-0xD8]

如何调用memset [https://github.com/dotnet/runtime/blob/main/src/coreclr/vm/amd64/crthelpers.S]

LEAF_ENTRY JIT_MemSet, _TEXT
        test    rdx, rdx                // check if count is zero
        jz      Exit_MemSet             // if zero, no bytes to set

        cmp     byte ptr [rdi], 0       // check dest for null

        jmp     C_PLTFUNC(memset)       // forward to the CRT implementation

Exit_MemSet:
        ret

LEAF_END_MARKED JIT_MemSet, _TEXT

COMPlus_JitHWIntrinsic=1: 开启SIMD

X1的assembly
       vxorps   ymm0, ymm0, ymm0
       vmovdqu  ymmword ptr [rbp-0x60], ymm0
       vmovdqu  ymmword ptr [rbp-0x40], ymm0
       vmovdqu  ymmword ptr [rbp-0x28], ymm0
X2的assembly
       vxorps   ymm0, ymm0, ymm0
       vmovdqu  ymmword ptr [rbp-0xB0], ymm0
       vmovdqu  ymmword ptr [rbp-0x90], ymm0
       vmovdqu  ymmword ptr [rbp-0x70], ymm0
       vmovdqu  ymmword ptr [rbp-0x50], ymm0
       vmovdqu  ymmword ptr [rbp-0x30], ymm0
       vmovdqu  xmmword ptr [rbp-0x18], xmm0

如果cpu支持AVX512 会使用zmm0

针对GT_STOREBLK的变换 [https://github.com/dotnet/runtime/blob/main/src/coreclr/jit/codegenxarch.cpp]

void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
{
    assert(storeBlkNode->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));

    bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();

    switch (storeBlkNode->gtBlkOpKind)
    {
        case GenTreeBlk::BlkOpKindCpObjRepInstr:
        case GenTreeBlk::BlkOpKindCpObjUnroll:
#ifndef JIT32_GCENCODER
            assert(!storeBlkNode->gtBlkOpGcUnsafe);
#endif
            genCodeForCpObj(storeBlkNode->AsBlk());
            break;

#ifdef TARGET_AMD64
        case GenTreeBlk::BlkOpKindHelper:
            assert(!storeBlkNode->gtBlkOpGcUnsafe);
            if (isCopyBlk)
            {
                genCodeForCpBlkHelper(storeBlkNode);
            }
            else
            {
                genCodeForInitBlkHelper(storeBlkNode);
            }
            break;
#endif // TARGET_AMD64
        case GenTreeBlk::BlkOpKindRepInstr:
#ifndef JIT32_GCENCODER
            assert(!storeBlkNode->gtBlkOpGcUnsafe);
#endif
            if (isCopyBlk)
            {
                genCodeForCpBlkRepMovs(storeBlkNode);
            }
            else
            {
                genCodeForInitBlkRepStos(storeBlkNode);
            }
            break;
        case GenTreeBlk::BlkOpKindUnrollMemmove:
        case GenTreeBlk::BlkOpKindUnroll:
            if (isCopyBlk)
            {
#ifndef JIT32_GCENCODER
                if (storeBlkNode->gtBlkOpGcUnsafe)
                {
                    GetEmitter()->emitDisableGC();
                }
#endif
                if (storeBlkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
                {
                    genCodeForCpBlkUnroll(storeBlkNode);
                }
                else
                {
                    assert(storeBlkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnrollMemmove);
                    genCodeForMemmove(storeBlkNode);
                }
#ifndef JIT32_GCENCODER
                if (storeBlkNode->gtBlkOpGcUnsafe)
                {
                    GetEmitter()->emitEnableGC();
                }
#endif
            }
            else
            {
#ifndef JIT32_GCENCODER
                assert(!storeBlkNode->gtBlkOpGcUnsafe);
#endif
                genCodeForInitBlkUnroll(storeBlkNode);
            }
            break;
        default:
            unreached();
    }
}

stackalloc example

using System;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using BenchmarkDotNet.Diagnosers;
using System.Linq;

using System.Runtime.CompilerServices;

[DisassemblyDiagnoser(printInstructionAddresses: true, syntax: DisassemblySyntax.Masm)]
public class MyBenchmark
{
    [Benchmark]
    public void Constant256() => Use(stackalloc byte[256]);

    [Benchmark]
    public void Constant1024() => Use(stackalloc byte[1024]);

    [MethodImpl(MethodImplOptions.NoInlining)]
    private static void Use(Span<byte> span)
    {
       // Console.WriteLine("Hello");
        // Do something with the span, or leave it empty for a baseline measurement
    }
}

class Program
{
    public static void Main(string[] args)
    {
        var summary = BenchmarkRunner.Run<MyBenchmark>();
    }
}