You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

479 lines
9.8 KiB

6 years ago
  1. ; My response to http://web2.clarkson.edu/projects/cosi/sp2015/students/daceyj/UrandomCounter/derp.cpp
  2. ; Remember: real programmers use assembly :P
  3. ; build with:
  4. ; $ yasm -f elf64 chc.s
  5. ; $ ld -o chc chc.o
  6. ; ...obviously choose the appropriate ELF format for your architecture :P
  7. ; Choose a mode
  8. %define BUILD_64
  9. ; ...and invoke it here
  10. %ifdef BUILD_64
  11. BITS 64
  12. %define SZOF_PTR 8
  13. ; Kids, don't try this at home.
  14. %define eax rax
  15. %define ebx rbx
  16. %define ecx rcx
  17. %define edx rdx
  18. %define edi rdi
  19. %define esi rsi
  20. %define esp rsp
  21. %define ebp rbp
  22. %else
  23. BITS 32
  24. %define SZOF_PTR 4
  25. %endif
  26. ; Constants (on my system)
  27. O_RDONLY: equ 0
  28. O_WRONLY: equ 1
  29. O_RDWR: equ 2
  30. SIGINT: equ 2
  31. %define SYS_EXIT 60
  32. %define SYS_READ 0
  33. %define SYS_WRITE 1
  34. %define SYS_OPEN 2
  35. %define SYS_CLOSE 3
  36. %define SYS_SIGACTION 13
  37. ; Configuration
  38. %define BUFFER_SZ 1572864
  39. %define ITOABUF_SZ 64
  40. ; Macros
  41. ; This beautiful macro takes care of just about every string constant.
  42. %macro print 1
  43. ; Ensure we write the string to the data section.
  44. ; This syntax does not unset the special macro __SECT__, which is used to restore
  45. ; the previous section below.
  46. [section .data]
  47. %%msg: db %1, 0
  48. %%msglen: equ $ - %%msg
  49. ; Restore to the section we're in (probably .text)
  50. __SECT__
  51. push eax
  52. push ebx
  53. mov eax, %%msg
  54. mov ebx, %%msglen
  55. call _write
  56. pop ebx
  57. pop eax
  58. %endmacro
  59. %macro write 2
  60. push eax
  61. push ebx
  62. mov eax, %1
  63. mov ebx, %2
  64. call _write
  65. pop ebx
  66. pop eax
  67. %endmacro
  68. %macro _linsys_pusha 0
  69. mov [_old_esp], esp
  70. mov esp, _syscall_save_stack
  71. push rdi
  72. push rsi
  73. push rdx
  74. push rcx
  75. push r8
  76. push r9
  77. push r10
  78. push r11
  79. mov esp, [_old_esp]
  80. %endmacro
  81. %macro _linsys_popa 0
  82. mov [_old_esp], esp
  83. mov esp, _syscall_save_stack
  84. sub esp, 8*SZOF_PTR
  85. pop r11
  86. pop r10
  87. pop r9
  88. pop r8
  89. pop rcx
  90. pop rdx
  91. pop rsi
  92. pop rdi
  93. mov esp, [_old_esp]
  94. %endmacro
  95. %macro linsys 1
  96. _linsys_pusha
  97. mov eax, %1
  98. syscall
  99. _linsys_popa
  100. %endmacro
  101. %macro linsys 2
  102. _linsys_pusha
  103. mov eax, %1
  104. mov rdi, %2
  105. syscall
  106. _linsys_popa
  107. %endmacro
  108. %macro linsys 3
  109. _linsys_pusha
  110. mov eax, %1
  111. mov rdi, %2
  112. mov rsi, %3
  113. syscall
  114. _linsys_popa
  115. %endmacro
  116. %macro linsys 4
  117. _linsys_pusha
  118. mov eax, %1
  119. mov rdi, %2
  120. mov rsi, %3
  121. mov rdx, %4
  122. syscall
  123. _linsys_popa
  124. %endmacro
  125. %macro linsys 5
  126. _linsys_pusha
  127. mov eax, %1
  128. mov rdi, %2
  129. mov rsi, %3
  130. mov rdx, %4
  131. mov rcx, %5
  132. syscall
  133. _linsys_popa
  134. %endmacro
  135. %macro linsys 6
  136. _linsys_pusha
  137. mov eax, %1
  138. mov rdi, %2
  139. mov rsi, %3
  140. mov rdx, %4
  141. mov rcx, %5
  142. mov r8, %6
  143. syscall
  144. _linsys_popa
  145. %endmacro
  146. %macro linsys 7
  147. _linsys_pusha
  148. mov eax, %1
  149. mov rdi, %2
  150. mov rsi, %3
  151. mov rdx, %4
  152. mov rcx, %5
  153. mov r8, %6
  154. mov r9, %7
  155. syscall
  156. _linsys_popa
  157. %endmacro
  158. ; R/W (not X) memory that's zeroed on startup
  159. section .bss
  160. ; Buffer storing our cardinalities so far
  161. ;
  162. ; This must contain as many elements as there are unique values; since we are counting
  163. ; bytes, this will be 256 :P . Similarly, the size of this datum controls the precision
  164. ; we will have, especially for counting large files like /dev/zero.
  165. global counts
  166. counts:
  167. %ifdef BUILD_64
  168. resq 256
  169. %else
  170. resw 256
  171. %endif
  172. ; Buffer space
  173. ;
  174. ; For efficiency, we'll try to read this many characters at a time from the input file.
  175. buffer:
  176. resb BUFFER_SZ
  177. .len: equ $ - buffer
  178. ; itoa buffer and size
  179. ;
  180. ; This is used by the itoa routine below for holding the conversion result.
  181. global itoa_buffer
  182. itoa_buffer:
  183. resb ITOABUF_SZ
  184. .sz:
  185. resq 1
  186. ; A little non-stack swap space for syscall convenience
  187. _old_esp: resq 1
  188. resq 8
  189. _syscall_save_stack:
  190. ; R/W (but not X) memory
  191. section .data
  192. itoa_base_10: db '0123456789', 0
  193. itoa_base_16: db '0123456789abcdef', 0
  194. ; R/X (executable) memory
  195. section .text
  196. ; Write a ebx-length string at eax to stdout.
  197. ;
  198. ; Screw CDECL, I have green hair.
  199. global _write
  200. _write:
  201. ; This is the Linux write syscall, using its own weird parameter passing.
  202. ; Store the registers we're about to clobber...
  203. push ecx
  204. push edx
  205. ; And push these, because we'll need them later
  206. push eax
  207. push ebx
  208. ; Do up the syscall
  209. linsys SYS_WRITE, 1, [esp+SZOF_PTR], [esp]
  210. ; Check the return
  211. .after:
  212. cmp eax, 0
  213. jl .err ; Oops.
  214. cmp eax, [esp]
  215. jge .done
  216. ; We didn't write the whole buffer; let's try that again.
  217. add [esp+SZOF_PTR], eax
  218. sub [esp], eax
  219. linsys SYS_WRITE, 1, [esp+SZOF_PTR], [esp]
  220. jmp .after
  221. .err:
  222. .done:
  223. ; Good or not, we're cleaning up
  224. pop ebx
  225. pop eax
  226. pop edx
  227. pop ecx
  228. ret
  229. ; Exit the program
  230. %macro exit 1
  231. ; This is a divergent path, so we don't need to worry about clobbers.
  232. linsys SYS_EXIT, %1
  233. ; Halt.
  234. %endmacro
  235. ; Helper to generate numeric strings
  236. ;
  237. ; These strings are stored in the itoa_buffer in .data, with their length similarly stored (AT!) itoa_buffer.sz.
  238. ; The value in eax is to be the number; the value in ebx should be a NULL-terminated string of digit characters.
  239. ; Sensible choices for ebx include itoa_base_10 and itoa_base_16 :P
  240. ; This routine won't clobber any other registers.
  241. global itoa
  242. itoa:
  243. push ecx
  244. push edx
  245. push edi
  246. push esi
  247. push eax
  248. mov ecx, 0
  249. .baselen_loop:
  250. cmp byte [ebx+ecx], 0
  251. je .baselen_done
  252. inc ecx
  253. jmp .baselen_loop
  254. .baselen_done:
  255. ; We can't do anything with a base-0 conversion, so stop here.
  256. cmp ecx, 0
  257. jle .error
  258. ; Begin the process.
  259. mov edi, itoa_buffer
  260. .convert_loop:
  261. ; Divide EAX by ECX. The quotient ends up in EAX (perfect for our recurrence), and the remainder (which
  262. ; is our index into the digits) goes into EDX.
  263. mov edx, 0
  264. div ecx
  265. mov dl, [ebx+edx]
  266. mov [edi], dl
  267. inc edi
  268. cmp eax, 0
  269. je .convert_done
  270. jmp .convert_loop
  271. .convert_done:
  272. ; Write a NUL as promised
  273. mov byte [edi], 0
  274. ; ...and store the size (also as promised)
  275. mov [itoa_buffer.sz], edi
  276. sub qword [itoa_buffer.sz], itoa_buffer
  277. ; The digits in itoa_buffer were written little-endian, so reverse them for our humans :P
  278. dec edi
  279. mov esi, itoa_buffer
  280. .reverse_loop:
  281. ; Don't cross the streams!
  282. cmp esi, edi
  283. jge .reverse_done
  284. ; Shame on Intel for no M/M XCHG instruction. Shame!
  285. mov cl, [esi]
  286. mov dl, [edi]
  287. mov [edi], cl
  288. mov [esi], dl
  289. inc esi
  290. dec edi
  291. jmp .reverse_loop
  292. .error:
  293. mov dword [itoa_buffer], 0x525245 ; Encodes to 'ERR\0'
  294. mov qword [itoa_buffer.sz], 3
  295. .reverse_done:
  296. ; That's all, blokes :P
  297. pop eax
  298. pop esi
  299. pop edi
  300. pop edx
  301. pop ecx
  302. ret
  303. global sigint_handler
  304. sigint_handler:
  305. ; If we end up here (by a signal), we have two useless things on the stack: the signal number
  306. ; and the return address. Destroy them judiciously.
  307. pop eax ; Using a word-size register to work with either build
  308. pop eax
  309. jmp _start.read_done
  310. ; The actual entry point for most executables (including this one)--usually part of the C runtime
  311. ; (and the default linker script for most platforms).
  312. global _start ; export
  313. _start:
  314. ; We expect to be called in the usual CDECL way, with (int argc, char **argv, char **envp)
  315. ; First, check argc.
  316. pop esi
  317. cmp esi, 2
  318. jge .has_arg
  319. ; Nag about not enough arguments
  320. print {"usage: <executable> <file>",10}
  321. exit 1
  322. .has_arg:
  323. ; Get argv into edi
  324. pop edi
  325. ; ...and get argv[1]
  326. ; mov ebx, [edi+SZOF_PTR*1]
  327. pop ebx
  328. ; ...and definitely try to open it.
  329. linsys SYS_OPEN, ebx, O_RDONLY, 0
  330. cmp eax, 0
  331. jge .file_open
  332. print {"Can't open file",10}
  333. exit 2
  334. .file_open:
  335. ; eax now holds our input file descriptor, which we ought to save somewhere--the stack is nice.
  336. push eax
  337. jmp .file_ready
  338. print 'FD: '
  339. mov ebx, itoa_base_10
  340. call itoa
  341. write itoa_buffer, [itoa_buffer.sz]
  342. print 10
  343. .file_ready:
  344. ; Set up a signal handler for if our user gets impatient. This will allow us to print out our
  345. ; cumulative results, even if the file is large.
  346. ; mov eax, SYS_SIGNAL ; Syscall 48 -- signal
  347. ; mov ebx, SIGINT; ; Signal to define (SIGINT)
  348. ; mov ecx, sigint_handler ; Handler ("function" sigint_handler)
  349. ; int 0x80
  350. mov ebx, 0
  351. .read_loop:
  352. ; And now the fun begins: we need to read from the file until it's empty (we guess a zero-byte read
  353. ; means just that).
  354. linsys SYS_READ, [esp], buffer, buffer.len
  355. cmp eax, 0
  356. jl .read_error
  357. je .read_done
  358. jmp .count_ready
  359. print 'READ: '
  360. mov ebx, itoa_base_10
  361. call itoa
  362. write itoa_buffer, [itoa_buffer.sz]
  363. print {' bytes',10}
  364. mov ebx, 0
  365. .count_ready:
  366. ; For every byte we just read (count eax), increment the appropriate count.
  367. mov ecx, 0
  368. .count_loop:
  369. cmp ecx, eax
  370. jge .count_done
  371. ; Note the use of the single-byte register in this move
  372. mov bl, [buffer+ecx]
  373. ; Actually increment the count
  374. inc qword [counts+ebx*SZOF_PTR]
  375. ; ...and postincrement our counter
  376. inc ecx
  377. ; Rinse, lather, repeat
  378. jmp .count_loop
  379. .count_done:
  380. ; Well, there's nothing better to do than try again...
  381. jmp .read_loop
  382. .read_error:
  383. print 'Error occured while reading: '
  384. mov ebx, itoa_base_10
  385. call itoa
  386. write itoa_buffer, [itoa_buffer.sz]
  387. print 10
  388. .read_done:
  389. ; We now need to write out our data. (This path may be branched to by our SIGINT signal handler, so
  390. ; it is divergent beginning now.)
  391. ; First, try to close our open file descriptor. We don't care if this fails.
  392. linsys SYS_CLOSE, [esp]
  393. mov ecx, 0
  394. mov esi, 0
  395. .print_loop:
  396. ; First, of course, check the terminating condition.
  397. cmp ecx, 256
  398. jge .print_done
  399. ; Print a pretty hex leader :3
  400. print '0x'
  401. ; Compute the hex of ecx
  402. mov eax, ecx
  403. mov ebx, itoa_base_16
  404. call itoa
  405. ; ...and write it to the output :P
  406. write itoa_buffer, [itoa_buffer.sz]
  407. ; Print out our separator
  408. print {':',9}
  409. ; ...then grab the cardinality we're looking for
  410. mov eax, [counts+ecx*SZOF_PTR]
  411. ; Sum it into the running total
  412. add esi, eax
  413. ; Convert that (to decimal, this time)
  414. mov ebx, itoa_base_10
  415. call itoa
  416. ; ...and write that too :D
  417. write itoa_buffer, [itoa_buffer.sz]
  418. ; Check if we're at a multiple of 16 minus one; if so, print out a nice newline
  419. mov edx, ecx
  420. and edx, 0x7
  421. cmp edx, 0x7
  422. jne .print_space
  423. print 10
  424. jmp .print_again
  425. .print_space:
  426. print 9
  427. .print_again:
  428. inc ecx
  429. jmp .print_loop
  430. .print_done:
  431. ; One more little thing: print out the running total
  432. print {'total:',9}
  433. mov eax, esi
  434. mov ebx, itoa_base_10
  435. call itoa
  436. write itoa_buffer, [itoa_buffer.sz]
  437. print 10
  438. ; Our job here is done. Halt.
  439. exit 0