Topology - zer0pts 2023

Introduction

This challenge is a reversing challenge from zer0pts 2023, that I played with The Flat Network Society. We ranked 12th on this CTF which is not bad !

Here is the task:

Initial Analysis

In this challenge, we have a x86_binary. According to the task statement, it is a network topology.

Running file on the challenge tells us it's a x86_64 binary, and it's not stripped.

$ file topology
topology: ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=56f15fff39045953d06f6071987a75d57a64a860, for GNU/Linux 3.2.0, not stripped

Let's quickly fire up IDA and analyze its code. I often like to read IDA's pseudocode when I don't really need to read the assembly.
The main function is quite little, it calls 4 main functions that will be interesting.

int __fastcall main(int argc, const char **argv, const char **envp)
{
  int v3; // ebx
  const char *FLAG; // rbp
  int flag_length; // eax

  if ( argc <= 1 )
  {
    __printf_chk(1LL, "Usage: %s FLAG\n", *argv);
    return 1;
  }
  else
  {
    v3 = setup_network();
    if ( head == v3 )
    {
      FLAG = argv[1];
      flag_length = strlen(FLAG);
      network_main(FLAG, flag_length + 1);
    }
    else
    {
      handle_message();
    }
    if ( tail != v3 )
      wait(0LL);
    destroy_network(v3);
    return 0;
  }
}

First, we can see it call the setup_network function. This function does some forks 100 times and gives each child a number from 0 to 100.

  for ( i = 0; i != 100; ++i )
  {
    v5 = v0;
    v6 = shmget(0, 0x60uLL, 384);
    v0 = v6;
    if ( v6 == -1 )
      exit(1);
    if ( i )
    {
      whoami = i;
      if ( i == 99 )
      {
        tail = v6;
        goto LABEL_15;
      }
    }
    else
    {
      head = v6;
    }
    v4 = fork();
    if ( v4 == -1 )
      exit(1);
    if ( v4 )
      goto LABEL_15;
  }

We can guess it's creating "machines" on the network and the parent process will act as a router.

Then, the parent will execute network_main with the flag and its length as argument, whereas childs will simply execute handle_message.

At the the end, the network is destroyed.
First, we can analyze the network_main function, we will try to understand the handle_message function later.

Here is the decompile function:

int __fastcall network_main(char *flag, int flag_length)
{
  char *v2; // r12
  int v3; // ebp
  int fd; // ebx
  int v5; // eax
  const char **v6; // rdx
  unsigned __int64 canary_result; // rax
  int v9; // [rsp+Ch] [rbp-9Ch]
  char flag_cpy[80]; // [rsp+10h] [rbp-98h] BYREF
  char v11; // [rsp+60h] [rbp-48h] BYREF
  unsigned __int64 canary; // [rsp+68h] [rbp-40h]

  canary = __readfsqword(0x28u);
  memset(flag_cpy, 0, sizeof(flag_cpy));
  if ( flag_length > 80 )
    flag_length = 80;
  __memcpy_chk(flag_cpy, flag, flag_length, 80LL);
  send_msg(0x1337CAFE, -1, 0LL, 0);
  if ( !recv_msg() && *(prev + 2) == 322423550 )
  {
    v2 = flag_cpy;
    v9 = 0;
LABEL_6:
    v3 = 0;
    fd = 1;
    while ( 1 )
    {
      send_msg(0x1337F146, fd, v2, 8);  
      if ( recv_msg() || *(prev + 2) != 0x1337BEEF )
        break;
      v3 += strcmp(prev + 16, "OK") == 0;
      if ( ++fd == 100 )
      {
        v5 = 1;
        if ( v3 > 4 )
          v5 = v9;
        v9 = v5;
        putc(0x2E, _bss_start);
        fflush(_bss_start);
        v2 += 8;
        if ( &v11 == v2 )
        {
          if ( v9 )
            puts("\nWrong...");
          else
            puts("\nCorrect!");
          break;
        }
        goto LABEL_6;
      }
    }
  }
  send_msg(0x1337DEAD, -1, 0LL, 0);             // game over
  canary_result = canary - __readfsqword(0x28u);
  if ( canary_result )
    LODWORD(canary_result) = main(0x1337DEAD, 0xFFFFFFFFLL, v6);
  return canary_result;
}

First, we can see there's flag validation inside that function. We can see that by looking at the string. Also, it appears the flag is 80 bytes long since it checks the flag_length and set it to 80 if its length is greater than 80.
The function starts by copying the flag to a local stack buffer. Then it sends a message with the code 0x1337CAFE. It will then send another message with the code 0x1337F146 which looks like FLAG in leet.

Then, it receives messages and handle them. Then, it checks if the message response is "OK". If yes, it will increment a buffer that is checked later to verify if the flag is correct.

At the end, it sends a message 0x1337DEAD which tells the childs to stop executing.

We can now try to understand the handle_message function, so we will know what each messages mean.

At the moment we've seen these messages:

  • 0x1337CAFE
  • 0x1337F146
  • 0x1337DEAD
  • 0x1337BEEF

The function is quite simple:

__int64 handle_message()
{
  __int64 result; // rax

  while ( 1 )
  {
    result = recv_msg();
    if ( result )
      break;
    result = *(prev + 2);
    switch ( result )
    {
      case 0x1337DEAD:                          // end
        if ( whoami != 0x63 )
          return send_msg(0x1337DEAD, -1, 0LL, 0);
        return result;
      case 0x1337F146:                          // verify flag
        if ( (*(&function_table + whoami - 1))(prev + 16) )
          send_msg(0x1337BEEF, 0, &aNg, 3);     // NG\0
        else
          send_msg(0x1337BEEF, 0, "OK", 3);     // OK
        break;
      case 0x1337CAFE:
        send_msg(0x1337CAFE, -1, 0LL, 0);       // start
        break;
    }
  }
  return result;
}

It's a switch case on each message type. It looks like 0x1337BEEF only serves for childs to interact with the parent.
0x1337DEAD means it's the end of the loop, 0x1337CAFE serves to tell childs it's the beginning of the look.

The 0x1337F146 message is interesting. It calls a function from a function table with whoami as offset. It looks like each child has its own function.

Looking at the function table we can see there's a lot of function with names having 8 characters.

That's interesting, while looking at these functions, we can see they all have the same structure:
A switch case from 0 to 9 (10 cases). With Mixed-Boolean-Arithmetics inside.

Since the flag is 80 bytes long, and there are 10 switch cases, we can guess the validation will be applied 8 char by 8 char until all the flag is validated.

The problem here is that we have a lot of MBA to solve: we need to automate it.
Here is an example of an equation from the binary:

      result = __ROR8__(
                 __ROL8__(
                   __ROL8__(
                     (((_byteswap_uint64(__ROL8__(
                                           ((__ROR8__(
                                               ((__ROL8__(
                                                   (__ROR8__(
                                                      _byteswap_uint64(__ROR8__(
                                                                         __ROL8__(
                                                                           _byteswap_uint64(__ROL8__(
                                                                                              __ROL8__(
                                                                                                _byteswap_uint64((*a1 - 0x332E6CDA6567C801LL) ^ 0x99C03926A2D14CB8LL),
                                                                                                2) ^ 0xB166E33278625DLL,
                                                                                              25) ^ 0xBDC5F1C9986DD8BLL),
                                                                           31) ^ 0xED38CE2E23CB4B6ELL,
                                                                         23) ^ 0xCE1D10B09DE7489CLL),
                                                      13) ^ 0xD9B3819634D8D37BLL)
                                                 + 0x5D5DA73146A6DF9CLL,
                                                   23) ^ 0x1229A782A7C5F6EELL)
                                              - 0x23C713123221595LL) ^ 0xE574DFBAA50052EDLL,
                                               10) ^ 0x622EA5FE347B52A0LL)
                                          - 0x39AC3E8BB5877A31LL) ^ 0x34354D957F26BA45LL,
                                           19) ^ 0x25AD977FED140F6ALL) ^ 0xA1385DACDAFE62DDLL)
                     - 0x2576019C67F76BBFLL) ^ 0x3283BB3581DD1BFFLL)
                   + 0x298757D44042FA6BLL,
                     19) ^ 0x15D7206CE0FEBD39LL,
                   14) ^ 0xE149F731A1247171LL,
                 26) != 0x83E589CF9E9284DALL;

Yeah.. it looks bad.

At the beginning of the function, every function increments a variable from the bss. That variable match the index of the flag part it's verifying.

Now that we understood what the binary does, let's try to solve it !

Solution

My main choice for solving MBA is Triton. Triton is a library that helps with program emulation and equation solving, it runs z3 or bitwuzla under the hood and works like a charm.

Since extracting each function's code would be very long, we can start by loading the whole program in memory thanks to lief, then, we will be able to set the values of the counter variable for each function, and symbolize a small memory area that will be used to emulate the flag.

To get all the functions offset, I simply copy/pasted the functions in the Functions panel from idea then used cut and sed to get only the data I wanted.

Let's start by initiating a triton context and the ast, which we will need in order to emulate the binary.

def loadBinary(ctx, binary):
	# Map the binary into the memory
	phdrs = binary.segments
	for phdr in phdrs:
		size = phdr.physical_size
		vaddr = phdr.virtual_address
		print('[+] Loading 0x%06x - 0x%06x' %(vaddr, vaddr+size))
		ctx.setConcreteMemoryAreaValue(vaddr, list(phdr.content))


ctx = TritonContext()
ast = ctx.getAstContext()

ctx.setArchitecture(ARCH.X86_64)
ctx.setAstRepresentationMode(AST_REPRESENTATION.PYTHON)

for mode in [MODE.ALIGNED_MEMORY, MODE.AST_OPTIMIZATIONS, MODE.CONSTANT_FOLDING, MODE.ONLY_ON_SYMBOLIZED]:
	ctx.setMode(mode, True)


binary = lief.parse("./topology")

loadBinary(ctx, binary)

Then we can quickly write a function to emulate the binary.

def emulate(ctx, pc = 0x1000):
	while pc:
		opcode = ctx.getConcreteMemoryAreaValue(pc, 16)
		instruction = Instruction()
		instruction.setOpcode(opcode)
		instruction.setAddress(pc)
		ctx.processing(instruction)
		if instruction.getOpcode()== b'\xc3':
			return
		pc = ctx.getConcreteRegisterValue(ctx.registers.rip)

Then, we will try to solve each functions for each value of the function counter.

for j in range(10):
	for i in range(392):
		if i % 4 == 0:
			ctx.setConcreteMemoryAreaValue(0xEA360 + i, [j])
	else:
		ctx.setConcreteMemoryAreaValue(0xEA360 + i, [0x00])

	for idx, function in enumerate(functions):
		# Here, I chose a small part of .data which wasn't used by the program (0xea000)
		ctx.symbolizeMemory(MemoryAccess(0xea000, CPUSIZE.QWORD))
		ctx.symbolizeRegister(ctx.registers.rax)
		ctx.setConcreteRegisterValue(ctx.registers.rdi, 0xea000)
		emulate(ctx, function) # emulate the function

		rax = ctx.getSymbolicRegister(ctx.registers.rax) # then we retrieve the result of the equation
		res = str(ctx.getModel(rax.getAst() == 0x0)).split("= ")[1][2:-1]
		if bytes.fromhex(res).decode() in results.keys():
			results[bytes.fromhex(res).decode()] += 1
		else:
			results[bytes.fromhex(res).decode()] = 1
		print(sorted(results, key=lambda x: results[x])[-1][::-1], end='')
		results = {}

We can then run the script with python. And after some time we get a flag ! \o/

$ python3 solve.py
[+] Loading 0x000040 - 0x000318
[+] Loading 0x000318 - 0x000334
[+] Loading 0x000000 - 0x001398
[+] Loading 0x002000 - 0x0e5ddd
[+] Loading 0x0e6000 - 0x0e8b78
[+] Loading 0x0e9d38 - 0x0ea338
[+] Loading 0x0e9d48 - 0x0e9f38
[+] Loading 0x000338 - 0x000368
[+] Loading 0x000368 - 0x0003ac
[+] Loading 0x000338 - 0x000368
[+] Loading 0x0e6fa8 - 0x0e732c
[+] Loading 0x000000 - 0x000000
[+] Loading 0x0e9d38 - 0x0ea000
zer0pts{kMo7UtDhqMfXhaUp0kP8MEPLPJFgKUx7YlWyyxB9POKUhegFqdNm5sXIfxk2FIfV}

Thoughts

I really enjoyed this challenge, since it made me learn more about Triton, and the handling of parents/childs. Also, the context was interesting ! Thanks to its author !

Full script:

import binascii
from triton import *
import lief 
from rich import print


functions = [0x0000000000002349,
0x0000000000004879,
0x000000000000720F,
0x0000000000009B93,
0x000000000000B7F1,
0x000000000000D230,
0x000000000000FF1D,
0x0000000000012253,
0x0000000000014591,
0x0000000000016D79,
0x000000000001949D,
0x000000000001B7DE,
0x000000000001DD99,
0x000000000001FBF2,
0x00000000000220E1,
0x0000000000023D0B,
0x0000000000025892,
0x0000000000027757,
0x000000000002A192,
0x000000000002CB4F,
0x000000000002E879,
0x000000000003169C,
0x000000000003339F,
0x0000000000035445,
0x00000000000377DB,
0x0000000000039B9A,
0x000000000003C0CE,
0x000000000003ED75,
0x00000000000420AA,
0x00000000000447BF,
0x000000000004688D,
0x0000000000048B2D,
0x000000000004A98D,
0x000000000004C883,
0x000000000004F645,
0x0000000000051ABC,
0x0000000000054610,
0x00000000000570D6,
0x0000000000059372,
0x000000000005BD94,
0x000000000005E571,
0x00000000000607B1,
0x0000000000061C13,
0x0000000000063CC4,
0x00000000000666B4,
0x0000000000068CEE,
0x000000000006B326,
0x000000000006D526,
0x000000000006FC3C,
0x00000000000721F9,
0x0000000000073F7E,
0x0000000000076B82,
0x00000000000792A8,
0x000000000007C1A6,
0x000000000007E7E0,
0x000000000008058D,
0x00000000000826E7,
0x000000000008474B,
0x00000000000872DB,
0x000000000008A16E,
0x000000000008C37F,
0x000000000008E5A9,
0x00000000000913C6,
0x0000000000093198,
0x0000000000095978,
0x0000000000097827,
0x000000000009A2B6,
0x000000000009C8B5,
0x000000000009EF2D,
0x00000000000A118B,
0x00000000000A366B,
0x00000000000A5C37,
0x00000000000A7DBC,
0x00000000000AA152,
0x00000000000AC1F9,
0x00000000000AE63D,
0x00000000000AFBAF,
0x00000000000B25EF,
0x00000000000B4741,
0x00000000000B7083,
0x00000000000B8A9C,
0x00000000000BB3DC,
0x00000000000BDEAB,
0x00000000000C105C,
0x00000000000C3472,
0x00000000000C563F,
0x00000000000C7C0B,
0x00000000000C9D84,
0x00000000000CCD4A,
0x00000000000CF04E,
0x00000000000D0374,
0x00000000000D22F4,
0x00000000000D44AD,
0x00000000000D700D,
0x00000000000DA451,
0x00000000000DC40B,
0x00000000000DEEE4,
0x00000000000E13FA,
0x00000000000E37AE
]

def loadBinary(ctx, binary):
    # Map the binary into the memory
    phdrs = binary.segments
    for phdr in phdrs:
        size   = phdr.physical_size
        vaddr  = phdr.virtual_address
        print('[+] Loading 0x%06x - 0x%06x' %(vaddr, vaddr+size))
        ctx.setConcreteMemoryAreaValue(vaddr, list(phdr.content))
    return

def emulate(ctx, pc = 0x1000):

  while pc:
    opcode = ctx.getConcreteMemoryAreaValue(pc, 16) 
    instruction = Instruction()
    instruction.setOpcode(opcode)
    instruction.setAddress(pc)
    ctx.processing(instruction)

    if instruction.getOpcode()== b'\xc3':
      return
    pc = ctx.getConcreteRegisterValue(ctx.registers.rip)

ctx = TritonContext()
ast = ctx.getAstContext()

ctx.setArchitecture(ARCH.X86_64)
ctx.setAstRepresentationMode(AST_REPRESENTATION.PYTHON)

for mode in [MODE.ALIGNED_MEMORY, MODE.AST_OPTIMIZATIONS, MODE.CONSTANT_FOLDING, MODE.ONLY_ON_SYMBOLIZED]:
    ctx.setMode(mode, True)

binary = lief.parse("./topology")
loadBinary(ctx, binary)

results = {}

for j in range(10):
  for i in range(392):
    if i % 4 == 0:
      ctx.setConcreteMemoryAreaValue(0xEA360 + i, [j])
    else: 
      ctx.setConcreteMemoryAreaValue(0xEA360 + i, [0x00])
  for idx, function in enumerate(functions):
      ctx.symbolizeMemory(MemoryAccess(0xea000, CPUSIZE.QWORD))
      ctx.symbolizeRegister(ctx.registers.rax) 
      ctx.setConcreteRegisterValue(ctx.registers.rdi, 0xea000)
      emulate(ctx, function)
      rax = ctx.getSymbolicRegister(ctx.registers.rax) 
      res = str(ctx.getModel(rax.getAst() == 0x0)).split("= ")[1][2:-1]
      if bytes.fromhex(res).decode() in results.keys():
        results[bytes.fromhex(res).decode()] += 1
      else:
        results[bytes.fromhex(res).decode()] = 1

  print(sorted(results, key=lambda x: results[x])[-1][::-1], end='')
  results = {}

Show Comments