Introduction
This challenge is a reversing challenge from zer0pts 2023, that I played with The Flat Network Society
. We ranked 12th on this CTF which is not bad !
Here is the task:
Initial Analysis
In this challenge, we have a x86_binary. According to the task statement, it is a network topology.
Running file
on the challenge tells us it's a x86_64 binary, and it's not stripped.
$ file topology
topology: ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=56f15fff39045953d06f6071987a75d57a64a860, for GNU/Linux 3.2.0, not stripped
Let's quickly fire up IDA and analyze its code. I often like to read IDA's pseudocode when I don't really need to read the assembly.
The main function is quite little, it calls 4 main functions that will be interesting.
int __fastcall main(int argc, const char **argv, const char **envp)
{
int v3; // ebx
const char *FLAG; // rbp
int flag_length; // eax
if ( argc <= 1 )
{
__printf_chk(1LL, "Usage: %s FLAG\n", *argv);
return 1;
}
else
{
v3 = setup_network();
if ( head == v3 )
{
FLAG = argv[1];
flag_length = strlen(FLAG);
network_main(FLAG, flag_length + 1);
}
else
{
handle_message();
}
if ( tail != v3 )
wait(0LL);
destroy_network(v3);
return 0;
}
}
First, we can see it call the setup_network
function. This function does some forks 100 times and gives each child a number from 0 to 100.
for ( i = 0; i != 100; ++i )
{
v5 = v0;
v6 = shmget(0, 0x60uLL, 384);
v0 = v6;
if ( v6 == -1 )
exit(1);
if ( i )
{
whoami = i;
if ( i == 99 )
{
tail = v6;
goto LABEL_15;
}
}
else
{
head = v6;
}
v4 = fork();
if ( v4 == -1 )
exit(1);
if ( v4 )
goto LABEL_15;
}
We can guess it's creating "machines" on the network and the parent process will act as a router.
Then, the parent will execute network_main
with the flag and its length as argument, whereas childs will simply execute handle_message
.
At the the end, the network is destroyed.
First, we can analyze the network_main
function, we will try to understand the handle_message
function later.
Here is the decompile function:
int __fastcall network_main(char *flag, int flag_length)
{
char *v2; // r12
int v3; // ebp
int fd; // ebx
int v5; // eax
const char **v6; // rdx
unsigned __int64 canary_result; // rax
int v9; // [rsp+Ch] [rbp-9Ch]
char flag_cpy[80]; // [rsp+10h] [rbp-98h] BYREF
char v11; // [rsp+60h] [rbp-48h] BYREF
unsigned __int64 canary; // [rsp+68h] [rbp-40h]
canary = __readfsqword(0x28u);
memset(flag_cpy, 0, sizeof(flag_cpy));
if ( flag_length > 80 )
flag_length = 80;
__memcpy_chk(flag_cpy, flag, flag_length, 80LL);
send_msg(0x1337CAFE, -1, 0LL, 0);
if ( !recv_msg() && *(prev + 2) == 322423550 )
{
v2 = flag_cpy;
v9 = 0;
LABEL_6:
v3 = 0;
fd = 1;
while ( 1 )
{
send_msg(0x1337F146, fd, v2, 8);
if ( recv_msg() || *(prev + 2) != 0x1337BEEF )
break;
v3 += strcmp(prev + 16, "OK") == 0;
if ( ++fd == 100 )
{
v5 = 1;
if ( v3 > 4 )
v5 = v9;
v9 = v5;
putc(0x2E, _bss_start);
fflush(_bss_start);
v2 += 8;
if ( &v11 == v2 )
{
if ( v9 )
puts("\nWrong...");
else
puts("\nCorrect!");
break;
}
goto LABEL_6;
}
}
}
send_msg(0x1337DEAD, -1, 0LL, 0); // game over
canary_result = canary - __readfsqword(0x28u);
if ( canary_result )
LODWORD(canary_result) = main(0x1337DEAD, 0xFFFFFFFFLL, v6);
return canary_result;
}
First, we can see there's flag validation inside that function. We can see that by looking at the string. Also, it appears the flag is 80 bytes long since it checks the flag_length and set it to 80 if its length is greater than 80.
The function starts by copying the flag to a local stack buffer. Then it sends a message with the code 0x1337CAFE
. It will then send another message with the code 0x1337F146
which looks like FLAG
in leet.
Then, it receives messages and handle them. Then, it checks if the message response is "OK". If yes, it will increment a buffer that is checked later to verify if the flag is correct.
At the end, it sends a message 0x1337DEAD
which tells the childs to stop executing.
We can now try to understand the handle_message
function, so we will know what each messages mean.
At the moment we've seen these messages:
- 0x1337CAFE
- 0x1337F146
- 0x1337DEAD
- 0x1337BEEF
The function is quite simple:
__int64 handle_message()
{
__int64 result; // rax
while ( 1 )
{
result = recv_msg();
if ( result )
break;
result = *(prev + 2);
switch ( result )
{
case 0x1337DEAD: // end
if ( whoami != 0x63 )
return send_msg(0x1337DEAD, -1, 0LL, 0);
return result;
case 0x1337F146: // verify flag
if ( (*(&function_table + whoami - 1))(prev + 16) )
send_msg(0x1337BEEF, 0, &aNg, 3); // NG\0
else
send_msg(0x1337BEEF, 0, "OK", 3); // OK
break;
case 0x1337CAFE:
send_msg(0x1337CAFE, -1, 0LL, 0); // start
break;
}
}
return result;
}
It's a switch case on each message type. It looks like 0x1337BEEF
only serves for childs to interact with the parent.
0x1337DEAD means it's the end of the loop, 0x1337CAFE serves to tell childs it's the beginning of the look.
The 0x1337F146 message is interesting. It calls a function from a function table with whoami
as offset. It looks like each child has its own function.
Looking at the function table we can see there's a lot of function with names having 8 characters.
That's interesting, while looking at these functions, we can see they all have the same structure:
A switch case from 0 to 9 (10 cases). With Mixed-Boolean-Arithmetics inside.
Since the flag is 80 bytes long, and there are 10 switch cases, we can guess the validation will be applied 8 char by 8 char until all the flag is validated.
The problem here is that we have a lot of MBA to solve: we need to automate it.
Here is an example of an equation from the binary:
result = __ROR8__(
__ROL8__(
__ROL8__(
(((_byteswap_uint64(__ROL8__(
((__ROR8__(
((__ROL8__(
(__ROR8__(
_byteswap_uint64(__ROR8__(
__ROL8__(
_byteswap_uint64(__ROL8__(
__ROL8__(
_byteswap_uint64((*a1 - 0x332E6CDA6567C801LL) ^ 0x99C03926A2D14CB8LL),
2) ^ 0xB166E33278625DLL,
25) ^ 0xBDC5F1C9986DD8BLL),
31) ^ 0xED38CE2E23CB4B6ELL,
23) ^ 0xCE1D10B09DE7489CLL),
13) ^ 0xD9B3819634D8D37BLL)
+ 0x5D5DA73146A6DF9CLL,
23) ^ 0x1229A782A7C5F6EELL)
- 0x23C713123221595LL) ^ 0xE574DFBAA50052EDLL,
10) ^ 0x622EA5FE347B52A0LL)
- 0x39AC3E8BB5877A31LL) ^ 0x34354D957F26BA45LL,
19) ^ 0x25AD977FED140F6ALL) ^ 0xA1385DACDAFE62DDLL)
- 0x2576019C67F76BBFLL) ^ 0x3283BB3581DD1BFFLL)
+ 0x298757D44042FA6BLL,
19) ^ 0x15D7206CE0FEBD39LL,
14) ^ 0xE149F731A1247171LL,
26) != 0x83E589CF9E9284DALL;
Yeah.. it looks bad.
At the beginning of the function, every function increments a variable from the bss. That variable match the index of the flag part it's verifying.
Now that we understood what the binary does, let's try to solve it !
Solution
My main choice for solving MBA is Triton. Triton is a library that helps with program emulation and equation solving, it runs z3 or bitwuzla under the hood and works like a charm.
Since extracting each function's code would be very long, we can start by loading the whole program in memory thanks to lief, then, we will be able to set the values of the counter variable for each function, and symbolize a small memory area that will be used to emulate the flag.
To get all the functions offset, I simply copy/pasted the functions in the Functions
panel from idea then used cut
and sed
to get only the data I wanted.
Let's start by initiating a triton context and the ast, which we will need in order to emulate the binary.
def loadBinary(ctx, binary):
# Map the binary into the memory
phdrs = binary.segments
for phdr in phdrs:
size = phdr.physical_size
vaddr = phdr.virtual_address
print('[+] Loading 0x%06x - 0x%06x' %(vaddr, vaddr+size))
ctx.setConcreteMemoryAreaValue(vaddr, list(phdr.content))
ctx = TritonContext()
ast = ctx.getAstContext()
ctx.setArchitecture(ARCH.X86_64)
ctx.setAstRepresentationMode(AST_REPRESENTATION.PYTHON)
for mode in [MODE.ALIGNED_MEMORY, MODE.AST_OPTIMIZATIONS, MODE.CONSTANT_FOLDING, MODE.ONLY_ON_SYMBOLIZED]:
ctx.setMode(mode, True)
binary = lief.parse("./topology")
loadBinary(ctx, binary)
Then we can quickly write a function to emulate the binary.
def emulate(ctx, pc = 0x1000):
while pc:
opcode = ctx.getConcreteMemoryAreaValue(pc, 16)
instruction = Instruction()
instruction.setOpcode(opcode)
instruction.setAddress(pc)
ctx.processing(instruction)
if instruction.getOpcode()== b'\xc3':
return
pc = ctx.getConcreteRegisterValue(ctx.registers.rip)
Then, we will try to solve each functions for each value of the function counter.
for j in range(10):
for i in range(392):
if i % 4 == 0:
ctx.setConcreteMemoryAreaValue(0xEA360 + i, [j])
else:
ctx.setConcreteMemoryAreaValue(0xEA360 + i, [0x00])
for idx, function in enumerate(functions):
# Here, I chose a small part of .data which wasn't used by the program (0xea000)
ctx.symbolizeMemory(MemoryAccess(0xea000, CPUSIZE.QWORD))
ctx.symbolizeRegister(ctx.registers.rax)
ctx.setConcreteRegisterValue(ctx.registers.rdi, 0xea000)
emulate(ctx, function) # emulate the function
rax = ctx.getSymbolicRegister(ctx.registers.rax) # then we retrieve the result of the equation
res = str(ctx.getModel(rax.getAst() == 0x0)).split("= ")[1][2:-1]
if bytes.fromhex(res).decode() in results.keys():
results[bytes.fromhex(res).decode()] += 1
else:
results[bytes.fromhex(res).decode()] = 1
print(sorted(results, key=lambda x: results[x])[-1][::-1], end='')
results = {}
We can then run the script with python. And after some time we get a flag ! \o/
$ python3 solve.py
[+] Loading 0x000040 - 0x000318
[+] Loading 0x000318 - 0x000334
[+] Loading 0x000000 - 0x001398
[+] Loading 0x002000 - 0x0e5ddd
[+] Loading 0x0e6000 - 0x0e8b78
[+] Loading 0x0e9d38 - 0x0ea338
[+] Loading 0x0e9d48 - 0x0e9f38
[+] Loading 0x000338 - 0x000368
[+] Loading 0x000368 - 0x0003ac
[+] Loading 0x000338 - 0x000368
[+] Loading 0x0e6fa8 - 0x0e732c
[+] Loading 0x000000 - 0x000000
[+] Loading 0x0e9d38 - 0x0ea000
zer0pts{kMo7UtDhqMfXhaUp0kP8MEPLPJFgKUx7YlWyyxB9POKUhegFqdNm5sXIfxk2FIfV}
Thoughts
I really enjoyed this challenge, since it made me learn more about Triton, and the handling of parents/childs. Also, the context was interesting ! Thanks to its author !
Full script:
import binascii
from triton import *
import lief
from rich import print
functions = [0x0000000000002349,
0x0000000000004879,
0x000000000000720F,
0x0000000000009B93,
0x000000000000B7F1,
0x000000000000D230,
0x000000000000FF1D,
0x0000000000012253,
0x0000000000014591,
0x0000000000016D79,
0x000000000001949D,
0x000000000001B7DE,
0x000000000001DD99,
0x000000000001FBF2,
0x00000000000220E1,
0x0000000000023D0B,
0x0000000000025892,
0x0000000000027757,
0x000000000002A192,
0x000000000002CB4F,
0x000000000002E879,
0x000000000003169C,
0x000000000003339F,
0x0000000000035445,
0x00000000000377DB,
0x0000000000039B9A,
0x000000000003C0CE,
0x000000000003ED75,
0x00000000000420AA,
0x00000000000447BF,
0x000000000004688D,
0x0000000000048B2D,
0x000000000004A98D,
0x000000000004C883,
0x000000000004F645,
0x0000000000051ABC,
0x0000000000054610,
0x00000000000570D6,
0x0000000000059372,
0x000000000005BD94,
0x000000000005E571,
0x00000000000607B1,
0x0000000000061C13,
0x0000000000063CC4,
0x00000000000666B4,
0x0000000000068CEE,
0x000000000006B326,
0x000000000006D526,
0x000000000006FC3C,
0x00000000000721F9,
0x0000000000073F7E,
0x0000000000076B82,
0x00000000000792A8,
0x000000000007C1A6,
0x000000000007E7E0,
0x000000000008058D,
0x00000000000826E7,
0x000000000008474B,
0x00000000000872DB,
0x000000000008A16E,
0x000000000008C37F,
0x000000000008E5A9,
0x00000000000913C6,
0x0000000000093198,
0x0000000000095978,
0x0000000000097827,
0x000000000009A2B6,
0x000000000009C8B5,
0x000000000009EF2D,
0x00000000000A118B,
0x00000000000A366B,
0x00000000000A5C37,
0x00000000000A7DBC,
0x00000000000AA152,
0x00000000000AC1F9,
0x00000000000AE63D,
0x00000000000AFBAF,
0x00000000000B25EF,
0x00000000000B4741,
0x00000000000B7083,
0x00000000000B8A9C,
0x00000000000BB3DC,
0x00000000000BDEAB,
0x00000000000C105C,
0x00000000000C3472,
0x00000000000C563F,
0x00000000000C7C0B,
0x00000000000C9D84,
0x00000000000CCD4A,
0x00000000000CF04E,
0x00000000000D0374,
0x00000000000D22F4,
0x00000000000D44AD,
0x00000000000D700D,
0x00000000000DA451,
0x00000000000DC40B,
0x00000000000DEEE4,
0x00000000000E13FA,
0x00000000000E37AE
]
def loadBinary(ctx, binary):
# Map the binary into the memory
phdrs = binary.segments
for phdr in phdrs:
size = phdr.physical_size
vaddr = phdr.virtual_address
print('[+] Loading 0x%06x - 0x%06x' %(vaddr, vaddr+size))
ctx.setConcreteMemoryAreaValue(vaddr, list(phdr.content))
return
def emulate(ctx, pc = 0x1000):
while pc:
opcode = ctx.getConcreteMemoryAreaValue(pc, 16)
instruction = Instruction()
instruction.setOpcode(opcode)
instruction.setAddress(pc)
ctx.processing(instruction)
if instruction.getOpcode()== b'\xc3':
return
pc = ctx.getConcreteRegisterValue(ctx.registers.rip)
ctx = TritonContext()
ast = ctx.getAstContext()
ctx.setArchitecture(ARCH.X86_64)
ctx.setAstRepresentationMode(AST_REPRESENTATION.PYTHON)
for mode in [MODE.ALIGNED_MEMORY, MODE.AST_OPTIMIZATIONS, MODE.CONSTANT_FOLDING, MODE.ONLY_ON_SYMBOLIZED]:
ctx.setMode(mode, True)
binary = lief.parse("./topology")
loadBinary(ctx, binary)
results = {}
for j in range(10):
for i in range(392):
if i % 4 == 0:
ctx.setConcreteMemoryAreaValue(0xEA360 + i, [j])
else:
ctx.setConcreteMemoryAreaValue(0xEA360 + i, [0x00])
for idx, function in enumerate(functions):
ctx.symbolizeMemory(MemoryAccess(0xea000, CPUSIZE.QWORD))
ctx.symbolizeRegister(ctx.registers.rax)
ctx.setConcreteRegisterValue(ctx.registers.rdi, 0xea000)
emulate(ctx, function)
rax = ctx.getSymbolicRegister(ctx.registers.rax)
res = str(ctx.getModel(rax.getAst() == 0x0)).split("= ")[1][2:-1]
if bytes.fromhex(res).decode() in results.keys():
results[bytes.fromhex(res).decode()] += 1
else:
results[bytes.fromhex(res).decode()] = 1
print(sorted(results, key=lambda x: results[x])[-1][::-1], end='')
results = {}