Saturday, January 3, 2015

Practical Reverse Engineering p. 35 #5

Question number 5 on page 35 of Practical Reverse Engineering is as follows:

Decompile the following kernel routines in Windows:
  • KeInitializeDpc 
  • KeInitializeApc 
  • ObFastDereferenceObject (explain its calling convention)
  • KeInitializeQueue 
  • KxWaitForLockChainValid 
  • KeReadyThread 
  • KiInitializeTSS 
  • RtlValidateUnicodeString

KeInitializeDpc

Inside ntoskrnl.exe, KeInitializeDpc has the following prototype:

VOID NTAPI KeInitializeDpc(
    PRKDPC Dpc, 
    PKDEFERRED_ROUTINE DeferredRoutine, 
    PVOID DeferredContext);

This has a parameter for the KDPC struct, which contains a LIST_ENTRY. These are defined as:

typedef struct _LIST_ENTRY {
  struct _LIST_ENTRY  *Flink;   /* 0x0 */
  struct _LIST_ENTRY  *Blink;   /* 0x8 */
} LIST_ENTRY, *PLIST_ENTRY;

typedef struct _KDPC
{
     UCHAR Type;                /* 0x0 */   
     UCHAR Importance;          /* 0x1 */
     WORD Number;               /* 0x2 */
     BYTE Unknown[4];           /* 0x4 */
     LIST_ENTRY DpcListEntry;   /* 0x8 */
     PVOID DeferredRoutine;     /* 0x18 */
     PVOID DeferredContext;     /* 0x20 */
     PVOID SystemArgument1;     /* 0x28 */
     PVOID SystemArgument2;     /* 0x30 */
     PVOID DpcData;             /* 0x38 */
} KDPC, *PKDPC;

Here is the disassembly:

KeInitializeDpc:
    xor     eax, eax
    mov     dword ptr [rcx], 113h
    mov     [rcx+18h], rdx
    mov     [rcx+38h], rax
    mov     [rcx+10h], rax
    mov     [rcx+20h], r8
    retn

The first MOV is an optimization which sets the first 3 variables in the struct, as it sets a dword to 0x113 (0b100010011). Everything else lines up easily enough. Here is the fully decompiled function.

VOID NTAPI KeInitializeDpc(
    PRKDPC Dpc, 
    PKDEFERRED_ROUTINE DeferredRoutine, 
    PVOID DeferredContext)
{
    Dpc->Type = 19;                       /* mov dword ptr [rcx],113h */
    Dpc->Importance = 1;                     
    Dpc->Number = 0;
    Dpc->DeferredRoutine = DeferredRoutine; /* mov [rcx+18h], rdx */
    Dpc->DpcData = 0;                       /* mov [rcx+38h], rax */
    Dpc->DpcListEntry.Blink = 0;            /* mov [rcx+10h], rax */
    Dpc->DeferredContext = DeferredContext; /* mov [rcx+20h], r8 */
}



KeInitializeApc

Inside ntoskrnl.exe, KeInitializeApc has the following prototype:

VOID NTAPI KeInitializeApc( 
    _In_ PKAPC  Apc,
    _In_ PKTHREAD   Thread,
    _In_ KAPC_ENVIRONMENT   TargetEnvironment,
    _In_ PKKERNEL_ROUT_In_E     KernelRoutine,
    _In_Opt_ PKRUNDOWN_ROUT_In_E RundownRoutine ,
    _In_ PKNORMAL_ROUT_In_E     NormalRoutine,
    _In_ KPROCESSOR_MODE    Mode,
    _In_ PVOID  Context);   

Here is the KAPC struct with added offsets:

typedef struct _KAPC
{
     UCHAR Type;                /* 0x0 */
     UCHAR SpareByte0;          /* 0x1 */
     UCHAR Size;                /* 0x2 */
     UCHAR SpareByte1;          /* 0x3 */
     ULONG SpareLong0;          /* 0x4 */
     PKTHREAD Thread;           /* 0x8 */
     LIST_ENTRY ApcListEntry;   /* 0x10 */
     PVOID KernelRoutine;       /* 0x20 */
     PVOID RundownRoutine;      /* 0x28 */
     PVOID NormalRoutine;       /* 0x30 */
     PVOID NormalContext;       /* 0x38 */
     PVOID SystemArgument1;     /* 0x40 */
     PVOID SystemArgument2;     /* 0x48 */
     CHAR ApcStateIndex;        /* 0x50 */
     CHAR ApcMode;              /* 0x51 */
     UCHAR Inserted;            /* 0x52 */
} KAPC, *PKAPC;

And here is the disassembly:

KeInitializeApc:
    mov     byte ptr [rcx], 12h
    mov     byte ptr [rcx+2], 58h
    cmp     r8d, 2
    jz      short loc_1400BAAAF
    mov     [rcx+50h], r8b

loc_1400BAA71:                         
    mov     rax, [rsp+28h]
    mov     [rcx+8], rdx
    xor     edx, edx
    mov     [rcx+28h], rax
    mov     rax, [rsp+30h]
    mov     [rcx+20h], r9
    mov     [rcx+30h], rax
    test    rax, rax
    jnz     short loc_1400BAA9D
    mov     [rcx+51h], dl
    mov     [rcx+38h], rdx

loc_1400BAA99:                          
    mov     [rcx+52h], dl
    retn 

loc_1400BAA9D:                          
    mov     al, [rsp+38h]
    mov     [rcx+51h], al
    mov     rax, [rsp+40h]
    mov     [rcx+38h], rax
    jmp     short loc_1400BAA99 

loc_1400BAAAF:                         
    mov     al, [rdx+242h]
    mov     [rcx+50h], al
    jmp     short loc_1400BAA71

This routine contains a couple if statements, but otherwise it's just writing the arguments and some constants to the struct.

VOID NTAPI KeInitializeApc( 
    _In_ PKAPC  Apc,
    _In_ PKTHREAD   Thread,
    _In_ KAPC_ENVIRONMENT   TargetEnvironment,
    _In_ PKKERNEL_ROUT_In_E     KernelRoutine,
    _In_Opt_ PKRUNDOWN_ROUT_In_E RundownRoutine ,
    _In_ PKNORMAL_ROUT_In_E     NormalRoutine,
    _In_ KPROCESSOR_MODE    Mode,
    _In_ PVOID  Context) 
{
    Apc->Type = 0x12;        /* mov byte ptr [rcx], 12h */
    Apc->Size = 0x58;        /* mov byte ptr [rcx+2], 58h */

    /* cmp r8d, 2 */
    if ((DWORD)TargetEnvironment == CurrentApcEnvironment)
      Apc->ApcStateIndex = Thread->ApcStateIndex;/* mov [rcx+50h], al */
    else
      Apc->ApcStateIndex = TargetEnvironment;  /* mov [rcx+50h], r8b */

    Apc->Thread = Thread;                    /* mov [rcx+8], rdx */
    Apc->RundownRoutine = RundownRoutine;    /* mov [rcx+28h], rax */
    Apc->KernelRoutine = KernelRoutine;      /* mov [rcx+20h], r9 */
    Apc->NormalRoutine = NormalRoutine;      /* mov [rcx+30h], rax */

    /* test rax, rax */
    if (NormalRoutine != 0)
    {
        Apc->ApcMode = Mode;                 /* mov [rcx+51h], al */
        Apc->NormalContext = Context;        /* mov [rcx+38h], rax */
    }
    else
    {
        Apc->ApcMode = 0;                    /* mov [rcx+51h], dl */
        Apc->NormalContext = 0;              /* mov [rcx+38h], rdx */    
    }

    Apc->Inserted = 0;                       /* mov [rcx+52h], dl */
}



ObFastDereferenceObject

Inside ntoskrnl.exe, ObFastDereferenceObject has the following prototype:

void __fastcall ObFastDereferenceObject(
    _In_ PEX_FAST_REF FastRef,
    _In_ PVOID Object 
)

Here is the struct that is passed in the first argument:

typedef struct _EX_FAST_REF
{
    union
    {
        PVOID Object;
        ULONG RefCnt: 4;
        UINT64 RefCnt;
    };    
} EX_FAST_REF, *PEX_FAST_REF;

Here is the disassembly, which shows that there are fastcall optimizations on the 1st parameter for certain processors:

ObFastDereferenceObject:
    mov     r9, rcx
    prefetchw byte ptr [rcx]
    mov     rax, [rcx]
    mov     r8, rax
    xor     r8, rdx
    cmp     r8, 0Fh
    jnb     short loc_140062C29

loc_140062C1D:                          
    lea     r8, [rax+1]
    lock cmpxchg [r9], r8
    jnz     short loc_140062C31
    retn

loc_140062C29:                                              
    mov     rcx, rdx        
    jmp     ObfDereferenceObject

loc_140062C31:                          
    mov     rcx, rax
    xor     rcx, rdx
    cmp     rcx, 0Fh
    jb      short loc_140062C1D
    jmp     short loc_140062C29

The function is one big loop that increments the FastRef->Object pointer. There is also a precondition test. If the loop fails, another function is called.

void __fastcall ObFastDereferenceObject(
    _In_ PEX_FAST_REF FastRef,
    _In_ PVOID Object 
)
{
    for (   EX_FAST_REF  a = *FastRef,      /* mov rax, [rcx] */
                         b = *FastRef;      /* mov r8, rax */
            *b->Object ^ Object             /* xor r8, rdx */
            <= 0x0F;                        /* cmp rcx, 0Fh */
            b->Object = *(a->Object) + 1    /* lea r8, [rax+1] */
        )
    {
        /* lock cmpxchg [r9], r8 */
        if (atomic_compare_exchange_strong(FastRef, &a, b));  
            return;
    }
                                        /* mov rcx, rdx */
    ObfDereferenceObject(Object);       /* jmp ObfDereferenceObject */
}



KeInitializeQueue

Inside ntoskrnl.exe, KeInitializeQueue has the following prototype:

VOID NTAPI KeInitializeQueue(
  _Out_  PRKQUEUE Queue,
  _In_   ULONG Count);

Here are the relevant structs which make up our Queue parameter:

typedef struct _DISPATCHER_HEADER
{
     union
     {
          struct
          {
               UCHAR Type;
               union
               {
                    UCHAR Abandoned;
                    UCHAR Absolute;
                    UCHAR NpxIrql;
                    UCHAR Signalling;
               };
               union
               {
                    UCHAR Size;
                    UCHAR Hand;
               };
               union
               {
                    UCHAR Inserted;
                    UCHAR DebugActive;
                    UCHAR DpcActive;
               };
          };
          LONG Lock;
     };
     LONG SignalState;
     LIST_ENTRY WaitListHead;
} DISPATCHER_HEADER, *PDISPATCHER_HEADER;

typedef struct _KQUEUE {
    DISPATCHER_HEADER Header;         /* 0x0 */
    LIST_ENTRY EntryListHead;         /* 0x18 */
    ULONG CurrentCount;               /* 0x28 */
    ULONG MaximumCount;               /* 0x2c */
    LIST_ENTRY ThreadListHead;        /* 0x30 */
} KQUEUE, *PKQUEUE, *RESTRICTED_POINTER PRKQUEUE;

The disassembly for the function is:

KeInitializeQueue:
    mov     word ptr [rcx], 4
    mov     byte ptr [rcx+2], 10h
    lea     rax, [rcx+8]
    xor     r8d, r8d
    mov     [rcx+4], r8d
    mov     [rax+8], rax
    mov     [rax], rax
    lea     rax, [rcx+18h]
    mov     [rax+8], rax
    mov     [rax], rax
    lea     rax, [rcx+30h]
    mov     [rax+8], rax
    mov     [rax], rax
    mov     [rcx+28h], r8d
    test    edx, edx
    jz      short loc_1400DF8A9
    mov     [rcx+2Ch], edx
    retn

loc_1400DF8A9:                        
    mov     eax, cs:KeNumberProcessors_0
    mov     [rcx+2Ch], eax
    retn

This function is again just basically filling in a struct with some constants.

VOID NTAPI KeInitializeQueue(
  _Out_  PRKQUEUE Queue,
  _In_   ULONG Count)
{
    Queue->Header.Type = 4;              /* mov word ptr [rcx], 4 */
    Queue->Header.Abandoned = FALSE;
    Queue->Header.Size = 0x10;           /* mov byte ptr [rcx+2], 10h */
    Queue->Header.SignalState = 0;       /* mov [rcx+4], r8d */

    /* lea rax, [rcx+8] */ 
    Queue->Header.WaitListHead->Blink = &Queue->Header.WaitListHead; 
    Queue->Header.WaitListHead->Flink = &Queue->Header.WaitListHead;

    /* lea rax, [rcx+18h] */
    Queue->EntryListHead->Blink = &Queue->EntryListHead; 
    Queue->EntryListHead->Flink = &Queue->EntryListHead; 

    /* lea rax, [rcx+30h] */
    Queue->ThreadListHead->Blink = &Queue->ThreadListHead; 
    Queue->ThreadListHead->Flink = &Queue->ThreadListHead;

    Queue.CurrentCount = 0;

    /* test edx, edx */
    if (Count == 0)
        Queue->MaximumCount = KeNumberProcessors; /* cs:_0 */
    else
        Queue->MaximumCount = Count;            /* mov [rcx+2Ch], edx */
}



KxWaitForLockChainValid

Inside ntoskrnl.exe, KxWaitForLockChainValid has the following prototype:

PKSPIN_LOCK_QUEUE KxWaitForLockChainValid(
       __inout PKSPIN_LOCK_QUEUE LockQueue);

Here is the definition for the struct parameter:

typedef struct _KSPIN_LOCK_QUEUE 
{
    struct _KSPIN_LOCK_QUEUE * volatile Next;
    PKSPIN_LOCK volatile Lock;
} KSPIN_LOCK_QUEUE, *PKSPIN_LOCK_QUEUE;

Here is the disassembly of the function:

KxWaitForLockChainValid:
    mov     [rsp+8], rbx
    push    rdi
    sub     rsp, 20h
    mov     rdi, rcx
    xor     ebx, ebx

loc_1400DA7F7:                        
    inc     ebx
    test    cs:HvlLongSpinCountMask, ebx
    jz      loc_14019DCAC

loc_1400DA805:                       
    pause

loc_1400DA807:                        
    mov     rax, [rdi]
    test    rax, rax
    jz      short loc_1400DA7F7
    mov     rbx, [rsp+28h+8]
    add     rsp, 20h
    pop     rdi
    retn

loc_14019DCAC:                    
    mov     eax, cs:HvlEnlightenments
    test    al, 40h
    jz      loc_1400DA805
    mov     ecx, ebx
    call    HvlNotifyLongSpinWait
    nop
    jmp     loc_1400DA807

This is a spinlock implementation. It's interesting that the last label is in a distant memory area. This is usually an indication of an optimization by the compiler that the code is rarely used.

PKSPIN_LOCK_QUEUE KxWaitForLockChainValid(
       __inout PKSPIN_LOCK_QUEUE LockQueue)
{
    UINT32 i = 0;  /* xor ebx, ebx */

    do             /* loc_1400DA7F7 */
    {
        ++i;       /* inc ebx */ 

        /* test cs:HvlLongSpinCountMask, ebx */ 
        /* test al, 40h */
        if (i == HvlLongSpinCountMask && HvlEnlightenments != 0x40))   
            HvlNotifyLongSpinWait(i);       /* mov ecx, ebx */
        else
            _mm_pause();                    /* pause */

    } while(LockQueue->Next != 0);  /* test rax, rax */
}



KeReadyThread

Inside ntoskrnl.exe, KeReadyThread has the following prototype:

VOID NTAPI KeReadyThread(_In_ PKTHREAD Thread);

Here is the disassembly:

KeReadyThread:              
    push    rbx
    sub     rsp, 20h
    mov     rdx, [rcx+0B8h]
    mov     rbx, rcx
    mov     eax, [rdx+234h]
    test    al, 7
    jnz     short loc_1400F6684

loc_1400F6676:                         
    mov     rcx, rbx
    call    KiFastReadyThread

loc_1400F667E:                          
    add     rsp, 20h
    pop     rbx
    retn

loc_1400F6684:                          
    call    KiInSwapSingleProcess
    test    al, al
    jnz     short loc_1400F667E
    jmp     short loc_1400F6676

Until I calculate the offsets the struct values are unknown.

VOID NTAPI KeReadyThread(_In_ PKTHREAD Thread)
{
    /* mov rdx, [rcx+0B8h] */
    /* mov eax, [rdx+234h] */
    /* test al, 7 */
    if ((BYTE)Thread->UnknownB8.Unknown234 == 7)
        if (KiInSwapSingleProcess(Thread))  /* call KiInSwapSingle */
            return;                         /* jnz loc_1400F667E */

    KiFastReadyThread(Thread);              /* call KiFastReadyThread */
}



KiInitializeTSS

Inside ntoskrnl.exe, KiInitializeTSS has the following prototype:

VOID NTAPI KiInitializeTSS(_In_ PKTSS Tss);

This has a parameter for the PKTSS struct. It is defined as:

typedef struct _KTSS
{
     WORD Backlink;
     WORD Reserved0;
     ULONG Esp0;
     WORD Ss0;                  /* 0x8 */
     WORD Reserved1;
     ULONG NotUsed1[4];
     ULONG CR3;
     ULONG Eip;
     ULONG EFlags;
     ULONG Eax;
     ULONG Ecx;
     ULONG Edx;
     ULONG Ebx;
     ULONG Esp;
     ULONG Ebp;
     ULONG Esi;
     ULONG Edi;
     WORD Es;
     WORD Reserved2;
     WORD Cs;
     WORD Reserved3;
     WORD Ss;
     WORD Reserved4;
     WORD Ds;
     WORD Reserved5;
     WORD Fs;
     WORD Reserved6;
     WORD Gs;
     WORD Reserved7;
     WORD LDT;                  /* 0x60 */
     WORD Reserved8;
     WORD Flags;                /* 0x64 */
     WORD IoMapBase;            /* 0x66 */
     KiIoAccessMap IoMaps[1];
     UCHAR IntDirectionMap[32]; /* 0x208c */
} KTSS, *PKTSS;

Here is the disassembly:

KiInitializeTSS:
     mov     edi, edi
     push    ebp
     mov     ebp, esp
     mov     eax, dword ptr [ebp+8]
     and     word ptr [eax+64h], 0
     and     word ptr [eax+60h], 0
     mov     word ptr [eax+66h], 20ACh
     mov     word ptr [eax+8], 10h
     pop     ebp
     ret     4

This function fills in the structure with constants.

VOID NTAPI KiInitializeTSS(_In_ PKTSS Tss)
{
     Tss->Flags = 0;     /* and word ptr [eax+64h], 0 */
     Tss->LDT = 0;       /* and word ptr [eax+60h], 0 */

     /* mov word ptr [eax+66h], 20ACh */
     Tss->IoMapBase = sizeof(KTSS);

     Tss->Ss0 = 16;      /* mov word ptr [eax+8], 10h */
}



RtlValidateUnicodeString

Inside ntoskrnl.exe, RtlValidateUnicodeString has the following prototype:

NTSTATUS NTAPI RtlValidateUnicodeString(   
    _In_ ULONG Flags,
    _In_ PCUNICODE_STRING UnicodeString);

The UNICODE_STRING struct in a 64-bit system context is defined as:

typedef struct _UNICODE_STRING {
  USHORT Length;            /* 0x0 */
  USHORT MaximumLength;     /* 0x2 */
  DWORD  Reserved;          /* 0x4 */
  PWSTR  Buffer;            /* 0x8 */
} UNICODE_STRING, *PUNICODE_STRING;

Here's the disassembly of the function:

RtlValidateUnicodeString:
    xor     eax, eax
    test    ecx, ecx
    jnz     short loc_1400D23BB
    test    rdx, rdx
    jz      short locret_1400D23BA
    movzx   r8d, word ptr [rdx]
    test    r8b, 1
    jnz     short loc_1400D23BB
    movzx   ecx, word ptr [rdx+2]
    test    cl, 1
    jnz     short loc_1400D23BB
    cmp     r8w, cx
    ja      short loc_1400D23BB
    mov     r9d, 0FFFEh
    cmp     cx, r9w
    ja      short loc_1400D23BB
    cmp     [rdx+8], rax
    jz      loc_14019BAF4

locret_1400D23BA:       
    retn 

loc_1400D23BB:    
    mov     eax, 0C000000Dh
    retn
    
loc_14019BAF4:
    test    r8w, r8w
    jnz     loc_1400D23BB
    test    cx, cx
    jz      locret_1400D23BA
    jmp     loc_1400D23BB

The function, true to its name, follows the traditional validation pattern of executing tests and returning false (NTSTATUS: INVALID_PARAMETER) or true (NTSTATUS: SUCCESS) depending on if the conditions are met or not. Note that the last test case in the main body of the function can jump to a distant memory space for more tests, an optimization that likely means it is rarely branched to.

/* test ecx, ecx */
if (Flags != 0)
    return STATUS_INVALID_PARAMETER; /* mov eax, 0C000000Dh */

/* test rdx, rdx */
if (!UnicodeString)
    return STATUS_SUCCESS;           /* xor eax, eax */

/* movzx r8d, word ptr [rdx] */
/* test r8b, 1 */
if (UnicodeString->Length & 1 != 0)
    return STATUS_INVALID_PARAMETER; 

/* movzx ecx, word ptr [rdx+2] */
/* test cl, 1 */
if (UnicodeString->MaximumLength & 1 != 0)
    return STATUS_INVALID_PARAMETER; 

/* cmp r8w, cx */
if (UnicodeString->Length > UnicodeString.MaximumLength)
    return STATUS_INVALID_PARAMETER; 

/* mov r9d, 0FFFEh */
/* cmp cx, r9w */
if (UnicodeString->MaximumLength > 65534)
    return STATUS_INVALID_PARAMETER; 

/* cmp [rdx+8], rax */
if (UnicodeString->Buffer == 0)
{
    /* test r8w, r8w */
    if (UnicodeString->Length != 0)
        return STATUS_INVALID_PARAMETER; 

    /* test cx, cx */
    if (UnicodeString->MaximumLength != 0)
        return STATUS_INVALID_PARAMETER; 
}

return STATUS_SUCCESS;

No comments :

Post a Comment