VM技术的原理

虚拟机保护是通过开发者自定的一套opcode,由虚拟机的dispatcher解释执行,从而起到代码混淆、增加逆向难度的技术。

img

VM_start:是对虚拟机的初始化。

VM_dispatcher: 调度器,解释op_code,并选择相应的函数执行,一般为switch语句,根据地址码判断。

VM_code:程序可执行代码形成的操作码。

通过对HWS-2022的babyVM来初涉VM保护的逆向分析。

BabyVM

image-20220128224102355

首先花指令,通过jz 和 jnz 连用实现jmp的功能,本题多次运用该花指令。

1
2
3
4
5
6
7
8
import idautils
adr=0x00412CC0
end=0x00413991
tmp=get_bytes(adr,end-adr)
tmp=tmp.replace(b'\x74\x03\x75\x01\xE8',b'\x90'*5)
for i in range(len(tmp)):
PatchByte(adr+i,tmp[i])
print('ok')

VM_Dispatcher

修复好后可以直接看到VM_Dispatcher函数内容,根据opcode中的地址码以switch和case语句来执行。

通过分析代码来确定变量的特殊用途,并且定义相应结构体。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
void __cdecl VM_Dispatch(_DWORD *a1)
{
__int64 *v1; // eax
unsigned __int64 v2; // kr00_8
int v3; // eax
int v4; // ecx
int v5; // eax
int v6; // esi
int v7; // eax
int v8; // ecx
int v9; // eax
int v10; // ecx
int v11; // esi
int v12; // ecx
int v13; // esi
int v14; // eax
int v15; // edx
unsigned int v16; // ecx
int v17; // edx
__int64 v18; // kr08_8
int v19; // eax
__int64 v20; // rdi
__int64 v21; // rdi
int v22; // eax
__int64 v23; // kr10_8
int v24; // eax
__int64 v25; // rdi
__int64 v26; // rdi
int v27; // eax
int v28; // eax
int v29; // eax
__int64 v30; // rax
__int64 v31; // rax
int v32; // esi
__int64 v33; // rax
__int64 v34; // rax
int v35; // esi
unsigned __int64 v36; // rax
unsigned __int64 v37; // rax
int v38; // esi
unsigned __int64 v39; // rax
unsigned __int64 v40; // rax
int v41; // esi
int v42; // edi
int v43; // eax
int v44; // esi
int v45; // eax
int v46; // edi
int v47; // eax
int v48; // esi
int v49; // eax
int v50; // edi
int v51; // eax
int v52; // esi
int v53; // eax
__int64 v54; // kr18_8
int v55; // eax
unsigned int v56; // eax
__int64 v57; // [esp-10h] [ebp-134h]
__int64 v58; // [esp-10h] [ebp-134h]
__int64 v59; // [esp-8h] [ebp-12Ch]
int v60; // [esp+10h] [ebp-114h]
__int64 Inst_op; // [esp+108h] [ebp-1Ch]
__int64 Lnum; // [esp+110h] [ebp-14h]
unsigned __int64 Rnum; // [esp+118h] [ebp-Ch]

LODWORD(Reg.RIP) = 0;
HIDWORD(Reg.RIP) = 0;
LABEL_2:
while ( a1[6 * LODWORD(Reg.RIP)] != 0x19 ) // REG15 是EIP
{
v1 = (__int64 *)&a1[6 * LODWORD(Reg.RIP)]; // 4x6=24字节 指令长度 低16字节分为两个操作数
Inst_op = *v1;
Lnum = v1[1];
Rnum = v1[2];
v2 = __PAIR64__(HIDWORD(Reg.RIP), LODWORD(Reg.RIP)++) + 1;// 修改PC 即EIP
HIDWORD(Reg.RIP) = HIDWORD(v2);
switch ( (int)Inst_op )
{
case 0:
*(_QWORD *)(Mem + 8 * LODWORD(Reg.R[Lnum])) = Rnum;// mov Mem[Reg[Lnum]],Rnum
break;
case 1:
v5 = Lnum;
LODWORD(Reg.R[v5]) = Rnum;
HIDWORD(Reg.R[v5]) = HIDWORD(Rnum); // mov Reg[Lnum],Rnum
break;
case 2:
v9 = Lnum; // mov Reg[Lnum],Reg[Rnum]
LODWORD(Reg.R[v9]) = Reg.R[Rnum];
HIDWORD(Reg.R[v9]) = HIDWORD(Reg.R[Rnum]);
break;
case 3:
v6 = 8 * LODWORD(Reg.R[Rnum]); // mov Reg[Lnum],Mem[Reg[Rnum]]
v7 = Lnum;
v8 = Mem;
LODWORD(Reg.R[v7]) = *(_DWORD *)(Mem + v6);
HIDWORD(Reg.R[v7]) = *(_DWORD *)(v8 + v6 + 4);
break;
case 4:
v3 = 8 * LODWORD(Reg.R[Lnum]);
v4 = Mem;
*(_DWORD *)(Mem + v3) = Reg.R[Rnum];
*(_DWORD *)(v4 + v3 + 4) = HIDWORD(Reg.R[Rnum]);// mov Mem[Reg[Lnum]],Reg[Rnum]
break;
case 5:
v10 = Reg.R[4]; // Reg[4]初始化时为0x100 类似ESP
v60 = __CFADD__(v10, 1) + HIDWORD(Reg.R[4]);// push Reg[Lnum]
LODWORD(Reg.R[4]) = v10 + 1;
HIDWORD(Reg.R[4]) = v60;
v11 = 8 * (v10 + 1);
v12 = Buffer;
*(_DWORD *)(Buffer + v11) = Reg.R[Lnum];
*(_DWORD *)(v12 + v11 + 4) = HIDWORD(Reg.R[Lnum]);
break;
case 6:
v13 = 8 * LODWORD(Reg.R[4]);
v14 = Lnum;
v15 = Buffer;
LODWORD(Reg.R[v14]) = *(_DWORD *)(Buffer + v13);// pop Reg[Lnum]
HIDWORD(Reg.R[v14]) = *(_DWORD *)(v15 + v13 + 4);
v16 = Reg.R[4];
v17 = (__PAIR64__(HIDWORD(Reg.R[4]), v16) - 1) >> 32;
LODWORD(Reg.R[4]) = v16 - 1;
HIDWORD(Reg.R[4]) = v17;
break;
case 7:
v18 = Rnum + __PAIR64__(HIDWORD(Reg.R[Lnum]), Reg.R[Lnum]);// add Reg[Lnum],Rnum
v19 = Lnum;
LODWORD(Reg.R[v19]) = Rnum + LODWORD(Reg.R[Lnum]);
HIDWORD(Reg.R[v19]) = HIDWORD(v18);
break;
case 8: // add Reg[Lnum],Reg[Rnum]
LODWORD(v20) = Reg.R[Lnum];
HIDWORD(v20) = HIDWORD(Reg.R[Lnum]);
v21 = __PAIR64__(HIDWORD(Reg.R[Rnum]), Reg.R[Rnum]) + v20;
v22 = Lnum;
LODWORD(Reg.R[v22]) = v21;
HIDWORD(Reg.R[v22]) = HIDWORD(v21);
break;
case 9:
v23 = __PAIR64__(HIDWORD(Reg.R[Lnum]), Reg.R[Lnum]) - Rnum;// sub Reg[Lnum],Rnum
v24 = Lnum;
LODWORD(Reg.R[v24]) = LODWORD(Reg.R[Lnum]) - Rnum;
HIDWORD(Reg.R[v24]) = HIDWORD(v23);
break;
case 0xA:
LODWORD(v25) = Reg.R[Lnum];
HIDWORD(v25) = HIDWORD(Reg.R[Lnum]);
v26 = v25 - __PAIR64__(HIDWORD(Reg.R[Rnum]), Reg.R[Rnum]);// sub Reg[Lnum],Reg[Rnum]
v27 = Lnum;
LODWORD(Reg.R[v27]) = v26;
HIDWORD(Reg.R[v27]) = HIDWORD(v26);
break;
case 0xB:
HIDWORD(v57) = HIDWORD(Reg.R[Lnum]); // mul Reg[Lnum],Rnum
LODWORD(v57) = Reg.R[Lnum];
v28 = Lnum;
LODWORD(Reg.R[v28]) = v57 * Rnum;
HIDWORD(Reg.R[v28]) = (v57 * Rnum) >> 32;
break;
case 0xC:
HIDWORD(v59) = HIDWORD(Reg.R[Rnum]);
LODWORD(v59) = Reg.R[Rnum];
HIDWORD(v58) = HIDWORD(Reg.R[Lnum]);
LODWORD(v58) = Reg.R[Lnum];
v29 = Lnum;
LODWORD(Reg.R[v29]) = v58 * v59;
HIDWORD(Reg.R[v29]) = (unsigned __int64)(v58 * v59) >> 32;// mul Reg[Lnum],Reg[Rnum]
break;
case 0xD:
LODWORD(v30) = Reg.R[Lnum];
HIDWORD(v30) = HIDWORD(Reg.R[Lnum]);
v31 = v30 << Rnum;
v32 = v31;
LODWORD(v31) = 8 * Lnum;
*(_DWORD *)((char *)Reg.R + v31) = v32;
*(_DWORD *)((char *)Reg.R + v31 + 4) = HIDWORD(v31);// shl Reg[Lnum],Rnum
break;
case 0xE:
LODWORD(v33) = Reg.R[Lnum];
HIDWORD(v33) = HIDWORD(Reg.R[Lnum]);
v34 = v33 << LODWORD(Reg.R[Rnum]);
v35 = v34;
LODWORD(v34) = 8 * Lnum;
*(_DWORD *)((char *)Reg.R + v34) = v35;
*(_DWORD *)((char *)Reg.R + v34 + 4) = HIDWORD(v34);// shl Reg[Lnum],Reg[Rnum]
break;
case 0xF:
LODWORD(v36) = Reg.R[Lnum]; // shr Reg[Lnum],Rnum
HIDWORD(v36) = HIDWORD(Reg.R[Lnum]);
v37 = v36 >> Rnum;
v38 = v37;
LODWORD(v37) = 8 * Lnum;
*(_DWORD *)((char *)Reg.R + v37) = v38;
*(_DWORD *)((char *)Reg.R + v37 + 4) = HIDWORD(v37);
break;
case 0x10: // shr Reg[Lnum],Reg[Rnum]
LODWORD(v39) = Reg.R[Lnum];
HIDWORD(v39) = HIDWORD(Reg.R[Lnum]);
v40 = v39 >> LODWORD(Reg.R[Rnum]);
v41 = v40;
LODWORD(v40) = 8 * Lnum;
*(_DWORD *)((char *)Reg.R + v40) = v41;
*(_DWORD *)((char *)Reg.R + v40 + 4) = HIDWORD(v40);
break;
case 0x11:
v42 = HIDWORD(Rnum) ^ HIDWORD(Reg.R[Lnum]);
v43 = Lnum;
LODWORD(Reg.R[v43]) = Rnum ^ LODWORD(Reg.R[Lnum]);// xor Reg[Lnum],Rnum
HIDWORD(Reg.R[v43]) = v42;
break;
case 0x12:
v44 = HIDWORD(Reg.R[Rnum]) ^ HIDWORD(Reg.R[Lnum]);// xor Reg[Lnum],Reg[Rnum]
v45 = Lnum;
LODWORD(Reg.R[v45]) = LODWORD(Reg.R[Rnum]) ^ LODWORD(Reg.R[Lnum]);
HIDWORD(Reg.R[v45]) = v44;
break;
case 0x13:
v46 = HIDWORD(Rnum) | HIDWORD(Reg.R[Lnum]);// or Reg[Lnum],Rnum
v47 = Lnum;
LODWORD(Reg.R[v47]) = Rnum | LODWORD(Reg.R[Lnum]);
HIDWORD(Reg.R[v47]) = v46;
break;
case 0x14:
v48 = HIDWORD(Reg.R[Rnum]) | HIDWORD(Reg.R[Lnum]);// or Reg[Lnum],Reg[Rnum]
v49 = Lnum;
LODWORD(Reg.R[v49]) = LODWORD(Reg.R[Rnum]) | LODWORD(Reg.R[Lnum]);
HIDWORD(Reg.R[v49]) = v48;
break;
case 0x15:
v50 = HIDWORD(Rnum) & HIDWORD(Reg.R[Lnum]);// and Reg[Lnum],Rnum
v51 = Lnum;
LODWORD(Reg.R[v51]) = Rnum & LODWORD(Reg.R[Lnum]);
HIDWORD(Reg.R[v51]) = v50;
break;
case 0x16:
v52 = HIDWORD(Reg.R[Rnum]) & HIDWORD(Reg.R[Lnum]);// and Reg[Lnum],Reg[Rnum]
v53 = Lnum;
LODWORD(Reg.R[v53]) = Reg.R[Rnum] & LODWORD(Reg.R[Lnum]);
HIDWORD(Reg.R[v53]) = v52;
break;
case 0x17:
v54 = getchar(); // Reg[Lnum]=getchar()
v55 = Lnum;
LODWORD(Reg.R[v55]) = v54;
HIDWORD(Reg.R[v55]) = HIDWORD(v54);
break;
case 0x18:
putchar(Reg.R[Lnum]); // putchar(Reg[Lnum])
break;
case 0x1A:
LOBYTE(cmp_bool) = __PAIR64__(HIDWORD(Reg.R[Lnum]), Reg.R[Lnum]) == Rnum;// cmp Reg[Lnum],Rnum
BYTE1(cmp_bool) = __PAIR64__(HIDWORD(Reg.R[Lnum]), Reg.R[Lnum]) < Rnum;
break;
case 0x1B:
LOBYTE(cmp_bool) = LODWORD(Reg.R[Lnum]) == LODWORD(Reg.R[Rnum]) && HIDWORD(Reg.R[Lnum]) == HIDWORD(Reg.R[Rnum]);// cmp Reg[Lnum],Reg[Rnum]
v56 = HIDWORD(Reg.R[Lnum]);
BYTE1(cmp_bool) = v56 <= HIDWORD(Reg.R[Rnum])
&& (v56 < HIDWORD(Reg.R[Rnum]) || LODWORD(Reg.R[Lnum]) < LODWORD(Reg.R[Rnum]));
break;
case 0x1C:
if ( (unsigned __int8)cmp_bool == 1 ) // je Lnum
{
LODWORD(Reg.RIP) = Lnum;
HIDWORD(Reg.RIP) = HIDWORD(Lnum);
}
break;
case 0x1D:
LODWORD(Reg.RIP) = Lnum; // jmp Lnum
HIDWORD(Reg.RIP) = HIDWORD(Lnum);
break;
case 0x1E:
if ( BYTE1(cmp_bool) == 1 ) // jb Lnum
{
LODWORD(Reg.RIP) = Lnum;
HIDWORD(Reg.RIP) = HIDWORD(Lnum);
}
break;
case 0x1F:
if ( !(_BYTE)cmp_bool ) // jne Lnum
{
LODWORD(Reg.RIP) = Lnum;
HIDWORD(Reg.RIP) = HIDWORD(Lnum);
}
break;
default:
goto LABEL_2;
}
}
}

IDA定义结构体

观察各case块用到的变量和动作,因为opcode也是解释为x86的指令执行,故要熟悉常用的汇编指令。

1
2
3
4
5
v1 = (__int64 *)&a1[6 * LODWORD(Reg.RIP)]; 
Inst_op = *v1;
Lnum = v1[1];
Rnum = v1[2];
v2 = __PAIR64__(HIDWORD(Reg.RIP), LODWORD(Reg.RIP)++) + 1;

结合指令格式,地址码和操作数,在switch开始前的初始动作便能确定指令长度为24Byte并且了解数组的哪个值为IP。

结合分析结果定义如下结构体

VM中的寄存器,未定义前IDA识别为了大小为32的int数组。

1
2
3
4
struct VM_REG{
_QWORD REG[15];//也可以修改的更详细,比如有控制堆栈的寄存器
_QWORD RIP;
}

opcode指令格式,指令长度为24字节。

1
2
3
4
5
struct Inst{
_QWORD adr;
_QWORD Lnum;
_QWORD Rnum;
}

way-1

IDA 具体操作,shift+F9 进入struct窗口,按Insert插入结构体,在自己定义结构体的end处按d就能增加元素,通过Y键可以修改数据的类型,例如数组,_DWORD,_QWORD等,最后将想定义为结构体的一片空间的首地址改为自己定义的结构体类型即可。

image-20220128232054510

way-2

另:导入自己定义好的结构体,shift + F1 ,之后insert,把写好的c语言的结构体复制进去即可。

image-20220130212048340

VM_Start

根据对VM_Dispatcher的交叉引用发现有四组调用,并且在追溯中发现VM初始化的函数。

1
2
3
4
5
6
7
8
9
10
11
12
void sub_412BB0()
{
__CheckForDebuggerJustMyCode(&unk_421002);
Buffer = (int)malloc(0x8000u);
Mem = malloc(0x800000u);
j_memset(&Reg, 0, sizeof(Reg));
j_memset(&cmp_bool, 0, sizeof(cmp_bool));
LODWORD(Reg.R[4]) = 256;
HIDWORD(Reg.R[4]) = 0;
j_VM_Dispatch((int)&code_3);
j_VM_Dispatch((int)&code_4);
}

开辟了缓冲区和内存空间,返回了空间的指针,并且对特殊寄存器赋值,比如Reg.R[4],之后在操控动态空间中用到,类似esp。

VM_Parser

该题目的opcode比较多,并且有4个分支调用Dispatch,opcode被拆分成四组依次执行,在对每条虚拟机解释指令分析后便可编写parser还原为x86的混编代码,进而分析出伪代码。

IDAPYTHON dump出opcode,简易脚本如下

1
2
3
4
5
6
7
8
9
10
11
import idautils
adr=0x0041E000
end=0x0041E378
op=[]
while(True):
op.append([Qword(adr),Qword(adr+8),Qword(adr+16)])
if(adr==end):
print(op)
print('yes')
break
adr+=24

根据VM_Dispatch写还原parser

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
code_1=[[18, 0, 0], [18, 1, 1], [18, 2, 2], [18, 3, 3], [18, 6, 6], [18, 7, 7], [1, 0, 105], [1, 1, 110], [1, 2, 112], [1, 3, 117], [1, 6, 116], [1, 7, 32], [24, 0, 18446744073709551615], [24, 1, 18446744073709551615], [24, 2, 18446744073709551615], [24, 3, 18446744073709551615], [24, 6, 18446744073709551615], [24, 7, 18446744073709551615], [1, 0, 102], [1, 1, 108], [1, 2, 97], [1, 3, 103], [1, 6, 58], [1, 7, 32], [24, 0, 18446744073709551615], [24, 1, 18446744073709551615], [24, 2, 18446744073709551615], [24, 3, 18446744073709551615], [24, 6, 18446744073709551615], [24, 7, 18446744073709551615], [18, 1, 1], [23, 0, 18446744073709551615], [5, 0, 18446744073709551615], [7, 1, 1], [26, 1, 38], [30, 31, 18446744073709551615], [25, 18446744073709551615, 18446744073709551615], [18, 2, 2]]
#输入并check长度
code_2=[[6, 0, 18446744073709551615], [26, 0, 125], [28, 18, 18446744073709551615], [1, 0, 119], [1, 1, 114], [1, 2, 111], [1, 3, 110], [1, 6, 103], [1, 7, 33], [24, 0, 18446744073709551615], [24, 1, 18446744073709551615], [24, 2, 18446744073709551615], [24, 3, 18446744073709551615], [24, 6, 18446744073709551615], [24, 7, 18446744073709551615], [1, 0, 10], [24, 0, 18446744073709551615], [25, 18446744073709551615, 18446744073709551615], [1, 8, 256], [26, 8, 225], [30, 25, 18446744073709551615], [6, 0, 18446744073709551615], [4, 8, 0], [9, 8, 1], [29, 19, 18446744073709551615], [6, 0, 18446744073709551615], [26, 0, 123], [31, 3, 18446744073709551615], [6, 0, 18446744073709551615], [26, 0, 103], [31, 3, 18446744073709551615], [6, 0, 18446744073709551615], [26, 0, 97], [31, 3, 18446744073709551615], [6, 0, 18446744073709551615], [26, 0, 108], [31, 3, 18446744073709551615], [6, 0, 18446744073709551615], [26, 0, 102], [31, 3, 18446744073709551615], [18, 9, 9], [1, 10, 225], [3, 7, 9], [3, 6, 10], [17, 6, 66], [13, 6, 2], [27, 6, 7], [31, 3, 18446744073709551615], [7, 9, 1], [7, 10, 1], [26, 9, 32], [30, 42, 18446744073709551615], [1, 0, 99], [1, 1, 111], [1, 2, 114], [1, 3, 114], [1, 6, 101], [1, 7, 99], [24, 0, 18446744073709551615], [24, 1, 18446744073709551615], [24, 2, 18446744073709551615], [24, 3, 18446744073709551615], [24, 6, 18446744073709551615], [24, 7, 18446744073709551615], [1, 0, 116], [1, 1, 108], [1, 2, 121], [1, 3, 33], [1, 6, 10], [24, 0, 18446744073709551615], [24, 1, 18446744073709551615], [24, 2, 18446744073709551615], [24, 3, 18446744073709551615], [24, 6, 18446744073709551615], [25, 18446744073709551615, 18446744073709551615]]
#对输入字符串进行处理
code_3=[[18, 2, 2], [0, 2, 255], [7, 2, 1], [0, 2, 547], [7, 2, 1], [0, 2, 571], [7, 2, 1], [0, 2, 567], [7, 2, 1], [0, 2, 567], [7, 2, 1], [0, 2, 587], [7, 2, 1], [0, 2, 555], [7, 2, 1], [0, 2, 251], [7, 2, 1], [0, 2, 555], [7, 2, 1], [0, 2, 547], [7, 2, 1], [0, 2, 591], [7, 2, 1], [0, 2, 239], [7, 2, 1], [0, 2, 567], [7, 2, 1], [0, 2, 239], [7, 2, 1], [0, 2, 591], [7, 2, 1], [0, 2, 591], [7, 2, 1], [0, 2, 547], [7, 2, 1], [0, 2, 547], [7, 2, 1], [0, 2, 571], [7, 2, 1], [0, 2, 567], [7, 2, 1], [0, 2, 255], [7, 2, 1], [0, 2, 563], [7, 2, 1], [0, 2, 563], [7, 2, 1], [0, 2, 563], [7, 2, 1], [0, 2, 567], [7, 2, 1], [0, 2, 587], [7, 2, 1], [0, 2, 563], [7, 2, 1], [0, 2, 591], [7, 2, 1], [0, 2, 555], [7, 2, 1], [0, 2, 555], [7, 2, 1], [0, 2, 587], [7, 2, 1], [0, 2, 239], [7, 2, 1], [25, 18446744073709551615, 18446744073709551615]]
#比对的密文
code_4=[[18, 2, 2], [3, 0, 2], [9, 0, 99], [4, 2, 0], [7, 2, 1], [26, 2, 32], [30, 1, 18446744073709551615], [25, 18446744073709551615, 18446744073709551615]]
#对密文处理
def VM_Parser(op):
VM_Inst={
0:"mov Mem[Reg[{Lnum}]] , {Rnum}",
1:"mov Reg[{Lnum}] , {Rnum}" ,
2:"mov Reg[{Lnum}] , Reg[{Rnum}]" ,
3:"mov Reg[{Lnum}] , Mem[Reg[{Rnum}]]",
4:"mov Mem[Reg[{Lnum}]] , Reg[{Rnum}]",
5:"push Reg[{Lnum}]",
6:"pop Reg[{Lnum}]",
7:"add Reg[{Lnum}] , {Rnum}",
8:"add Reg[{Lnum}] , Reg[{Rnum}]",
9:"sub Reg[{Lnum}] , {Rnum}",
0xA:"sub Reg[{Lnum}] , Reg[{Rnum}]",
0xB:"mul Reg[{Lnum}] , {Rnum}",
0xC:"mul Reg[{Lnum}] , Reg[{Rnum}]",
0xD:"shl Reg[{Lnum}] , {Rnum}",
0xE:"shl Reg[{Lnum}] , Reg[{Rnum}]",
0xF:"shr Reg[{Lnum}] , {Rnum}",
0x11:"xor Reg[{Lnum}] , {Rnum}",
0x12:"xor Reg[{Lnum}] , Reg[{Rnum}]",
0x13:"or Reg[{Lnum}] , {Rnum}",
0x14:"or Reg[{Lnum}] , Reg[{Rnum}]",
0x15:"and Reg[{Lnum}] , {Rnum}",
0x16:"and Reg[{Lnum}] , Reg[{Rnum}]",
0x17:"Reg[{Lnum}] = getchar()",
0x18:"putchar() = Reg[{Lnum}]",
0x19:"exit()",
0x1A:"cmp Reg[{Lnum}] , {Rnum}",
0x1B:"cmp Reg[{Lnum}] , Reg[{Rnum}]",
0x1C:"je {Lnum}",
0x1D:"jmp {Lnum}",
0x1E: "jb {Lnum}",
0x1F:"jne {Lnum}"

}
tmp = []
for i in range(len(op)):
Inst=op[i]
adr=Inst[0]
Lnum=Inst[1]
Rnum=Inst[2]
#if(adr==0): #dump比对密文
# tmp.append(Rnum)
print('%s: '%hex(i)+VM_Inst[adr].format(Lnum=hex(Lnum),Rnum=hex(Rnum)).replace('[0x','['))
#print(tmp)
VM_Parser(code_1)
print('------code_end------')
VM_Parser(code_2)
print('------code_end------')
VM_Parser(code_3)
print('------code_end------')
VM_Parser(code_4)

分析算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
0x0: xor Reg[0] , Reg[0]
0x1: xor Reg[1] , Reg[1]
0x2: xor Reg[2] , Reg[2]
0x3: xor Reg[3] , Reg[3]
0x4: xor Reg[6] , Reg[6]
0x5: xor Reg[7] , Reg[7]
0x6: mov Reg[0] , 0x69
0x7: mov Reg[1] , 0x6e
0x8: mov Reg[2] , 0x70
0x9: mov Reg[3] , 0x75
0xa: mov Reg[6] , 0x74
0xb: mov Reg[7] , 0x20
0xc: putchar() = Reg[0]
0xd: putchar() = Reg[1]
0xe: putchar() = Reg[2]
0xf: putchar() = Reg[3]
0x10: putchar() = Reg[6]
0x11: putchar() = Reg[7]
0x12: mov Reg[0] , 0x66
0x13: mov Reg[1] , 0x6c
0x14: mov Reg[2] , 0x61
0x15: mov Reg[3] , 0x67
0x16: mov Reg[6] , 0x3a
0x17: mov Reg[7] , 0x20
0x18: putchar() = Reg[0]
0x19: putchar() = Reg[1]
0x1a: putchar() = Reg[2]
0x1b: putchar() = Reg[3]
0x1c: putchar() = Reg[6]
0x1d: putchar() = Reg[7]
0x1e: xor Reg[1] , Reg[1]
0x1f: Reg[0] = getchar()
0x20: push Reg[0] //flag入栈
0x21: add Reg[1] , 0x1
0x22: cmp Reg[1] , 0x26 //check长度
0x23: jb 0x1f
0x24: exit()
0x25: xor Reg[2] , Reg[2]
------code_end------
0x0: pop Reg[0] //检测flag尾部是否为}
0x1: cmp Reg[0] , 0x7d
0x2: je 0x12
0x3: mov Reg[0] , 0x77
0x4: mov Reg[1] , 0x72
0x5: mov Reg[2] , 0x6f
0x6: mov Reg[3] , 0x6e
0x7: mov Reg[6] , 0x67
0x8: mov Reg[7] , 0x21
0x9: putchar() = Reg[0]
0xa: putchar() = Reg[1]
0xb: putchar() = Reg[2]
0xc: putchar() = Reg[3]
0xd: putchar() = Reg[6]
0xe: putchar() = Reg[7]
0xf: mov Reg[0] , 0xa
0x10: putchar() = Reg[0]
0x11: exit()
0x12: mov Reg[8] , 0x100
0x13: cmp Reg[8] , 0xe1
0x14: jb 0x19
0x15: pop Reg[0]
0x16: mov Mem[Reg[8]] , Reg[0]
0x17: sub Reg[8] , 0x1
0x18: jmp 0x13
0x19: pop Reg[0]
0x1a: cmp Reg[0] , 0x7b
0x1b: jne 0x3
0x1c: pop Reg[0]
0x1d: cmp Reg[0] , 0x67
0x1e: jne 0x3
0x1f: pop Reg[0]
0x20: cmp Reg[0] , 0x61
0x21: jne 0x3
0x22: pop Reg[0]
0x23: cmp Reg[0] , 0x6c
0x24: jne 0x3
0x25: pop Reg[0]
0x26: cmp Reg[0] , 0x66 //检测flag头
0x27: jne 0x3
0x28: xor Reg[9] , Reg[9]
0x29: mov Reg[a] , 0xe1
0x2a: mov Reg[7] , Mem[Reg[9]]
0x2b: mov Reg[6] , Mem[Reg[a]]
0x2c: xor Reg[6] , 0x42 //a[i]^=0x42 之后 a[i]<<=2
0x2d: shl Reg[6] , 0x2
0x2e: cmp Reg[6] , Reg[7]
0x2f: jne 0x3
0x30: add Reg[9] , 0x1
0x31: add Reg[a] , 0x1
0x32: cmp Reg[9] , 0x20 //flag的内容进行变换
0x33: jb 0x2a
0x34: mov Reg[0] , 0x63
0x35: mov Reg[1] , 0x6f
0x36: mov Reg[2] , 0x72
0x37: mov Reg[3] , 0x72
0x38: mov Reg[6] , 0x65
0x39: mov Reg[7] , 0x63
0x3a: putchar() = Reg[0]
0x3b: putchar() = Reg[1]
0x3c: putchar() = Reg[2]
0x3d: putchar() = Reg[3]
0x3e: putchar() = Reg[6]
0x3f: putchar() = Reg[7]
0x40: mov Reg[0] , 0x74
0x41: mov Reg[1] , 0x6c
0x42: mov Reg[2] , 0x79
0x43: mov Reg[3] , 0x21
0x44: mov Reg[6] , 0xa
0x45: putchar() = Reg[0]
0x46: putchar() = Reg[1]
0x47: putchar() = Reg[2]
0x48: putchar() = Reg[3]
0x49: putchar() = Reg[6]
0x4a: exit()
------code_end------
0x0: xor Reg[2] , Reg[2] //比对密文存入内存
0x1: mov Mem[Reg[2]] , 0xff
0x2: add Reg[2] , 0x1
0x3: mov Mem[Reg[2]] , 0x223
0x4: add Reg[2] , 0x1
0x5: mov Mem[Reg[2]] , 0x23b
0x6: add Reg[2] , 0x1
0x7: mov Mem[Reg[2]] , 0x237
0x8: add Reg[2] , 0x1
0x9: mov Mem[Reg[2]] , 0x237
0xa: add Reg[2] , 0x1
0xb: mov Mem[Reg[2]] , 0x24b
0xc: add Reg[2] , 0x1
0xd: mov Mem[Reg[2]] , 0x22b
0xe: add Reg[2] , 0x1
0xf: mov Mem[Reg[2]] , 0xfb
0x10: add Reg[2] , 0x1
0x11: mov Mem[Reg[2]] , 0x22b
0x12: add Reg[2] , 0x1
0x13: mov Mem[Reg[2]] , 0x223
0x14: add Reg[2] , 0x1
0x15: mov Mem[Reg[2]] , 0x24f
0x16: add Reg[2] , 0x1
0x17: mov Mem[Reg[2]] , 0xef
0x18: add Reg[2] , 0x1
0x19: mov Mem[Reg[2]] , 0x237
0x1a: add Reg[2] , 0x1
0x1b: mov Mem[Reg[2]] , 0xef
0x1c: add Reg[2] , 0x1
0x1d: mov Mem[Reg[2]] , 0x24f
0x1e: add Reg[2] , 0x1
0x1f: mov Mem[Reg[2]] , 0x24f
0x20: add Reg[2] , 0x1
0x21: mov Mem[Reg[2]] , 0x223
0x22: add Reg[2] , 0x1
0x23: mov Mem[Reg[2]] , 0x223
0x24: add Reg[2] , 0x1
0x25: mov Mem[Reg[2]] , 0x23b
0x26: add Reg[2] , 0x1
0x27: mov Mem[Reg[2]] , 0x237
0x28: add Reg[2] , 0x1
0x29: mov Mem[Reg[2]] , 0xff
0x2a: add Reg[2] , 0x1
0x2b: mov Mem[Reg[2]] , 0x233
0x2c: add Reg[2] , 0x1
0x2d: mov Mem[Reg[2]] , 0x233
0x2e: add Reg[2] , 0x1
0x2f: mov Mem[Reg[2]] , 0x233
0x30: add Reg[2] , 0x1
0x31: mov Mem[Reg[2]] , 0x237
0x32: add Reg[2] , 0x1
0x33: mov Mem[Reg[2]] , 0x24b
0x34: add Reg[2] , 0x1
0x35: mov Mem[Reg[2]] , 0x233
0x36: add Reg[2] , 0x1
0x37: mov Mem[Reg[2]] , 0x24f
0x38: add Reg[2] , 0x1
0x39: mov Mem[Reg[2]] , 0x22b
0x3a: add Reg[2] , 0x1
0x3b: mov Mem[Reg[2]] , 0x22b
0x3c: add Reg[2] , 0x1
0x3d: mov Mem[Reg[2]] , 0x24b
0x3e: add Reg[2] , 0x1
0x3f: mov Mem[Reg[2]] , 0xef
0x40: add Reg[2] , 0x1
0x41: exit()
------code_end------
0x0: xor Reg[2] , Reg[2] //将密文a[i]-=0x63
0x1: mov Reg[0] , Mem[Reg[2]]
0x2: sub Reg[0] , 0x63
0x3: mov Mem[Reg[2]] , Reg[0]
0x4: add Reg[2] , 0x1
0x5: cmp Reg[2] , 0x20
0x6: jb 0x1
0x7: exit()

实际执行时先存入密文并且处理,再输入flag经过处理后比对,逻辑如上,简单逆向算法即可。

1
2
3
4
5
6
enc=[255, 547, 571, 567, 567, 587, 555, 251, 555, 547, 591, 239, 567, 239, 591, 591, 547, 547, 571, 567, 255, 563, 563, 563, 567, 587, 563, 591, 555, 555, 587, 239]
for i in range(len(enc)):
tmp=enc[i]-0x63
tmp>>=2
print(chr(tmp^0x42),end='')
#flag{e247780d029a7a992247e6667869008a}

总结

在进行VM逆向分析时,首先要了解VM保护的运行流程,关键点在于Dispatcher中的指令解读和程序流程的分析,这其中的工作量一般是比较大的,并且比较吃汇编,同时要注意VM中指令格式,寄存器的结构体定义,这样可以优化伪码,加速分析。

Parser的编写就是用python将op译码为x86的汇编,再进行算法分析,这个过程应该需要刷题来练的,不过该题的parser比较简单。

IDA的宏定义 和 IDAPYTHON的使用还要进一步加深,分析VM耐心十分重要,虚拟化层度高时动调和静态结合效率会更高。

参考:

IDAPYTHON常用命令

https://blog.shi1011.cn/ctf/2077

https://www.cnblogs.com/nigacat/p/13039289.html