Unpacking Themida #2

Ok, so after the decryption is seems that we now have 3 additional functions:

Let's start with the first one, sub1357005 (sub1357000 really) as it's what's going to be executed next. Cleaning up what Hex-Rays spews out leaves us with the following:

  
int __usercall sub_1357005(int a1, int a2, unsigned int offset, int a4, int a5, int a6)  
{
  unsigned int i; // eax@3
  _BYTE *p_demangled_buf; // eax@8
  _BYTE *p_mangled_buf; // ecx@8

  if ( a1 ) // a1 == 0 from the "mov eax, 0" @ 0x1357000
  {
    if ( loc_1357062 != 0xE9 )
    {
      for ( i = 0x1357000u;
            *(_WORD *)i != 0x5A4D || *(_DWORD *)(i + *(_WORD *)((char *)loc_135709C + i - 20279392)) != 0x4550;
            i -= 4096 )
        ;
      offset = i;
    }
    p_demangled_buf = (_BYTE *)(offset + 0xC3F014);
    p_mangled_buf = (_BYTE *)(offset + 0xF57222);
  }
  else
  {
    p_demangled_buf = (_BYTE *)&b_demangled_0;
    p_mangled_buf = (_BYTE *)&b_mangled_0;
  }
  sub_13570CF(a2, p_mangled_buf, p_demangled_buf);
  *(_DWORD *)(&loc_1357062 + 1) = 0x1B6u;
  loc_1357062 = 0xE9u;
  sub_13570B2();
  ++*(_DWORD *)(a6 + 0xB8);
  return 0;
}

Looking at the Assembly, it seems that Hex-Rays doesn't get this exactly right, but it's good enough for our purposes.

I actually have no idea what the params to this function are. I haven't yet seen where it's called from or what's passed to it, but lets try work out what's happening here from what it actually does.

The important thing to note is that it seems to pass two interesting pointers to sub13570cf. The first location @ 0x1357222 has some binary data, the second @ 0x103f014 is empty. I can already hazard a guess that 0x1357222 is mangled and will be unmangled into 0x103f014. So I've called them 'bmangled0' and 'bunmangled0' respectively. Taking a look at the start of sub13570cf confirms this:

  
    v5 = 0x80u;
LABEL_2:  
    v6 = *p_mangled_buf++;
    *p_demangled_buf++ = v6;
    for ( i = 2; ; i = 1 )
    {

So I'm going to go ahead and call sub13570cf 'demanglebuffer_0'.

After demangling the buffer, sub1357005 patches 5 bytes from loc1357062 to E9 B6 01 00 00 (JMP ).
It then calls sub_13570B2 which simply jumps to the code we just demangled @ 0x103f014:

It also has an STI instruction (0xfb) which screws up debugging nicely. Maybe we'll get rid of this later.

So how do we demangle the mangled code at bmangled0? I simply ripped sub13570cf, massaged it a bit to get it to compile and ran it passing the binary data @ bmangled_0 (I could have cleaned it up, just don't care to really):

  
#include 
#include 
#include 

#define _BYTE unsigned char
#define __CFADD__(X,Y) (((unsigned int)(X)+(unsigned int)(Y)) >> 8 ? 1 : 0)
#define LOBYTE(X) *((_BYTE*)&X)

//----- (013570CF) --------------------------------------------------------
void demangle_code(int a1, _BYTE *p_mangled, _BYTE *p_demangled, int *l)  
{
  unsigned __int8 z; // dl@1
  char byte; // al@2
  signed int i; // ebx@2
  unsigned __int8 v8; // cf@3
  char v9; // dl@4
  char v10; // tt@4
  unsigned __int8 v11; // cf@6
  char v12; // dl@7
  char v13; // tt@7
  int v14; // eax@9
  unsigned __int8 v15; // cf@9
  char v16; // dl@10
  char v17; // tt@10
  unsigned __int8 v18; // cf@12
  char v19; // dl@12
  char v20; // dl@13
  char v21; // tt@13
  int v22; // eax@14
  unsigned __int8 v23; // cf@14
  char v24; // dl@14
  char v25; // dl@15
  char v26; // tt@15
  int v27; // eax@16
  unsigned __int8 v28; // cf@16
  char v29; // dl@16
  char v30; // dl@17
  char v31; // tt@17
  int v32; // eax@18
  unsigned __int8 v33; // cf@18
  char v34; // dl@19
  char v35; // tt@19
  int v36; // eax@20
  signed int v37; // eax@23
  unsigned __int8 v38; // cf@24
  char v39; // dl@24
  char v40; // dl@25
  char v41; // tt@25
  unsigned __int8 v42; // cf@26
  char v43; // dl@27
  char v44; // tt@27
  int v45; // eax@29
  unsigned int v46; // ecx@30
  unsigned __int8 v47; // cf@31
  char v48; // dl@31
  char v49; // dl@32
  char v50; // tt@32
  unsigned __int8 v51; // cf@33
  char v52; // dl@34
  char v53; // tt@34
  int v54; // eax@37
  unsigned int v55; // ecx@37
  unsigned __int8 v56; // cf@38
  char v57; // dl@38
  char v58; // dl@39
  char v59; // tt@39
  unsigned __int8 v60; // cf@40
  char v61; // dl@41
  char v62; // tt@41
  unsigned int v63; // ecx@45
  unsigned __int8 v64; // cf@49
  unsigned int v65; // ecx@50

  _BYTE *a = p_demangled;
  z = 0x80u;
LABEL_2:  
  byte = *p_mangled++;
  *p_demangled++ = byte;
  for ( i = 2; ; i = 1 )
  {
    while ( 1 )
    {
      while ( 1 )
      {
        v8 = __CFADD__(z, z);
        z *= 2;
        if ( !z )
        {
          v9 = *p_mangled++;
          v10 = v8 + v9;
          v8 = __CFADD__(v8, v9) | __CFADD__(v9, v8 + v9);
          z = v9 + v10;
        }
        if ( !v8 )
          goto LABEL_2;
        v11 = __CFADD__(z, z);
        z *= 2;
        if ( !z )
        {
          v12 = *p_mangled++;
          v13 = v11 + v12;
          v11 = __CFADD__(v11, v12) | __CFADD__(v12, v11 + v12);
          z = v12 + v13;
        }
        if ( v11 )
          break;
        v37 = 1;
        do
        {
          v38 = __CFADD__(z, z);
          v39 = 2 * z;
          if ( !v39 )
          {
            v40 = *p_mangled++;
            v41 = v38 + v40;
            v38 = __CFADD__(v38, v40) | __CFADD__(v40, v38 + v40);
            v39 = v40 + v41;
          }
          v37 += v38 + v37;
          v42 = __CFADD__(v39, v39);
          z = 2 * v39;
          if ( !z )
          {
            v43 = *p_mangled++;
            v44 = v42 + v43;
            v42 = __CFADD__(v42, v43) | __CFADD__(v43, v42 + v43);
            z = v43 + v44;
          }
        }
        while ( v42 );
        v45 = v37 - i;
        i = 1;
        if ( v45 )
        {
          v54 = (v45 - 1) << 8;
          LOBYTE(v54) = *p_mangled++;
          a1 = v54;
          v55 = 1;
          do
          {
            v56 = __CFADD__(z, z);
            v57 = 2 * z;
            if ( !v57 )
            {
              v58 = *p_mangled++;
              v59 = v56 + v58;
              v56 = __CFADD__(v56, v58) | __CFADD__(v58, v56 + v58);
              v57 = v58 + v59;
            }
            v55 += v56 + v55;
            v60 = __CFADD__(v57, v57);
            z = 2 * v57;
            if ( !z )
            {
              v61 = *p_mangled++;
              v62 = v60 + v61;
              v60 = __CFADD__(v60, v61) | __CFADD__(v61, v60 + v61);
              z = v61 + v62;
            }
          }
          while ( v60 );
          if ( (unsigned int)v54 >= 0x7D00 )
          {
LABEL_47:  
            v55 += 2;
LABEL_48:  
            ::memcpy(p_demangled, p_demangled - v54, v55);
            p_demangled += v55;
          }
          else
          {
            if ( (unsigned int)v54 < 0x500 )
            {
              if ( (unsigned int)v54 > 0x7F )
                goto LABEL_48;
              goto LABEL_47;
            }
            v63 = v55 + 1;
            ::memcpy(p_demangled, p_demangled - v54, v63);
            p_demangled += v63;
          }
        }
        else
        {
          v46 = 1;
          do
          {
            v47 = __CFADD__(z, z);
            v48 = 2 * z;
            if ( !v48 )
            {
              v49 = *p_mangled++;
              v50 = v47 + v49;
              v47 = __CFADD__(v47, v49) | __CFADD__(v49, v47 + v49);
              v48 = v49 + v50;
            }
            v46 += v47 + v46;
            v51 = __CFADD__(v48, v48);
            z = 2 * v48;
            if ( !z )
            {
              v52 = *p_mangled++;
              v53 = v51 + v52;
              v51 = __CFADD__(v51, v52) | __CFADD__(v52, v51 + v52);
              z = v52 + v53;
            }
          }
          while ( v51 );
          memcpy(p_demangled, p_demangled - a1, v46);
          p_demangled += v46;
        }
      }
      v14 = 0;
      v15 = __CFADD__(z, z);
      z *= 2;
      if ( !z )
      {
        v16 = *p_mangled++;
        v17 = v15 + v16;
        v15 = __CFADD__(v15, v16) | __CFADD__(v16, v15 + v16);
        z = v16 + v17;
      }
      if ( !v15 )
        break;
      v18 = __CFADD__(z, z);
      v19 = 2 * z;
      if ( !v19 )
      {
        v20 = *p_mangled++;
        v21 = v18 + v20;
        v18 = __CFADD__(v18, v20) | __CFADD__(v20, v18 + v20);
        v19 = v20 + v21;
      }
      v22 = v18;
      v23 = __CFADD__(v19, v19);
      v24 = 2 * v19;
      if ( !v24 )
      {
        v25 = *p_mangled++;
        v26 = v23 + v25;
        v23 = __CFADD__(v23, v25) | __CFADD__(v25, v23 + v25);
        v24 = v25 + v26;
      }
      v27 = v22 + v23 + v22;
      v28 = __CFADD__(v24, v24);
      v29 = 2 * v24;
      if ( !v29 )
      {
        v30 = *p_mangled++;
        v31 = v28 + v30;
        v28 = __CFADD__(v28, v30) | __CFADD__(v30, v28 + v30);
        v29 = v30 + v31;
      }
      v32 = v27 + v28 + v27;
      v33 = __CFADD__(v29, v29);
      z = 2 * v29;
      if ( !z )
      {
        v34 = *p_mangled++;
        v35 = v33 + v34;
        v33 = __CFADD__(v33, v34) | __CFADD__(v34, v33 + v34);
        z = v34 + v35;
      }
      v36 = v32 + v33 + v32;
      if ( v36 )
          LOBYTE(v36) = p_demangled[-v36];
      *p_demangled++ = v36;
      i = 2;
    }

    LOBYTE(v14) = *p_mangled++;
    v64 = v14 & 1;
    LOBYTE(v14) = (unsigned __int8)v14 >> 1;
    if ( !(_BYTE)v14 )
      break;
    v65 = v64 + 2;
    a1 = v14;
    memcpy(p_demangled, p_demangled - v14, v65);
    p_demangled += v65;
  }
  *l = (int)p_demangled - (int)a;
}

int main()  
{
    int offset = 0x10000;

    FILE *f = fopen("data1.bin", "r");
    fseek(f, 0, SEEK_END);
    int len = ftell(f);
    fseek(f, 0, SEEK_SET);
    fread(p_mangled,1,len,f);
    fclose(f);

    _BYTE *p_mangled = (_BYTE*)malloc(len);
    _BYTE *p_demangled = (_BYTE*)malloc(offset+len);
    memset(p_demangled, 0, len);

    int out_len;
    demangle_code(0, p_mangled, p_demangled+offset, &out_len);

    FILE *fo = fopen("data1.bin.demangled", "w");
    fwrite(p_demangled+offset,1,out_len,fo);
    fclose(fo);

    return 0;
}

EDIT: There's actually something wrong with my results, so I'm using the results generated by the packed executable which i dumped from memory. I hope to look into why my code isn't producing the correct code at some point (it's slightly off).

Some more IDAPython magic to patch the demangled code back:

  
def patch_from_file(ea,filename):  
    f = open(filename, "rb")
    data = bytearray(f.read())
    f.close()
    for i in range(0, len(data)):
        PatchByte(ea+i, data[i])

# do the business
patch\_from\_file(0x103f014,"data1.bin.demangled")  

We've got some more code, looking good...