Browse Source

initial shit

master
Graham Northup 6 years ago
commit
d0630ae1a4
  1. 5
      Makefile
  2. 479
      chc.s
  3. 15
      fmd.py
  4. 88
      fmg.py
  5. 347
      fmv.s

5
Makefile

@ -0,0 +1,5 @@
all: fmv chc
%: %.s
yasm -f elf64 $<
ld -o $@ $@.o

479
chc.s

@ -0,0 +1,479 @@
; My response to http://web2.clarkson.edu/projects/cosi/sp2015/students/daceyj/UrandomCounter/derp.cpp
; Remember: real programmers use assembly :P
; build with:
; $ yasm -f elf64 chc.s
; $ ld -o chc chc.o
; ...obviously choose the appropriate ELF format for your architecture :P
; Choose a mode
%define BUILD_64
; ...and invoke it here
%ifdef BUILD_64
BITS 64
%define SZOF_PTR 8
; Kids, don't try this at home.
%define eax rax
%define ebx rbx
%define ecx rcx
%define edx rdx
%define edi rdi
%define esi rsi
%define esp rsp
%define ebp rbp
%else
BITS 32
%define SZOF_PTR 4
%endif
; Constants (on my system)
O_RDONLY: equ 0
O_WRONLY: equ 1
O_RDWR: equ 2
SIGINT: equ 2
%define SYS_EXIT 60
%define SYS_READ 0
%define SYS_WRITE 1
%define SYS_OPEN 2
%define SYS_CLOSE 3
%define SYS_SIGACTION 13
; Configuration
%define BUFFER_SZ 1572864
%define ITOABUF_SZ 64
; Macros
; This beautiful macro takes care of just about every string constant.
%macro print 1
; Ensure we write the string to the data section.
; This syntax does not unset the special macro __SECT__, which is used to restore
; the previous section below.
[section .data]
%%msg: db %1, 0
%%msglen: equ $ - %%msg
; Restore to the section we're in (probably .text)
__SECT__
push eax
push ebx
mov eax, %%msg
mov ebx, %%msglen
call _write
pop ebx
pop eax
%endmacro
%macro write 2
push eax
push ebx
mov eax, %1
mov ebx, %2
call _write
pop ebx
pop eax
%endmacro
%macro _linsys_pusha 0
mov [_old_esp], esp
mov esp, _syscall_save_stack
push rdi
push rsi
push rdx
push rcx
push r8
push r9
push r10
push r11
mov esp, [_old_esp]
%endmacro
%macro _linsys_popa 0
mov [_old_esp], esp
mov esp, _syscall_save_stack
sub esp, 8*SZOF_PTR
pop r11
pop r10
pop r9
pop r8
pop rcx
pop rdx
pop rsi
pop rdi
mov esp, [_old_esp]
%endmacro
%macro linsys 1
_linsys_pusha
mov eax, %1
syscall
_linsys_popa
%endmacro
%macro linsys 2
_linsys_pusha
mov eax, %1
mov rdi, %2
syscall
_linsys_popa
%endmacro
%macro linsys 3
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
syscall
_linsys_popa
%endmacro
%macro linsys 4
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
mov rdx, %4
syscall
_linsys_popa
%endmacro
%macro linsys 5
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
mov rdx, %4
mov rcx, %5
syscall
_linsys_popa
%endmacro
%macro linsys 6
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
mov rdx, %4
mov rcx, %5
mov r8, %6
syscall
_linsys_popa
%endmacro
%macro linsys 7
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
mov rdx, %4
mov rcx, %5
mov r8, %6
mov r9, %7
syscall
_linsys_popa
%endmacro
; R/W (not X) memory that's zeroed on startup
section .bss
; Buffer storing our cardinalities so far
;
; This must contain as many elements as there are unique values; since we are counting
; bytes, this will be 256 :P . Similarly, the size of this datum controls the precision
; we will have, especially for counting large files like /dev/zero.
global counts
counts:
%ifdef BUILD_64
resq 256
%else
resw 256
%endif
; Buffer space
;
; For efficiency, we'll try to read this many characters at a time from the input file.
buffer:
resb BUFFER_SZ
.len: equ $ - buffer
; itoa buffer and size
;
; This is used by the itoa routine below for holding the conversion result.
global itoa_buffer
itoa_buffer:
resb ITOABUF_SZ
.sz:
resq 1
; A little non-stack swap space for syscall convenience
_old_esp: resq 1
resq 8
_syscall_save_stack:
; R/W (but not X) memory
section .data
itoa_base_10: db '0123456789', 0
itoa_base_16: db '0123456789abcdef', 0
; R/X (executable) memory
section .text
; Write a ebx-length string at eax to stdout.
;
; Screw CDECL, I have green hair.
global _write
_write:
; This is the Linux write syscall, using its own weird parameter passing.
; Store the registers we're about to clobber...
push ecx
push edx
; And push these, because we'll need them later
push eax
push ebx
; Do up the syscall
linsys SYS_WRITE, 1, [esp+SZOF_PTR], [esp]
; Check the return
.after:
cmp eax, 0
jl .err ; Oops.
cmp eax, [esp]
jge .done
; We didn't write the whole buffer; let's try that again.
add [esp+SZOF_PTR], eax
sub [esp], eax
linsys SYS_WRITE, 1, [esp+SZOF_PTR], [esp]
jmp .after
.err:
.done:
; Good or not, we're cleaning up
pop ebx
pop eax
pop edx
pop ecx
ret
; Exit the program
%macro exit 1
; This is a divergent path, so we don't need to worry about clobbers.
linsys SYS_EXIT, %1
; Halt.
%endmacro
; Helper to generate numeric strings
;
; These strings are stored in the itoa_buffer in .data, with their length similarly stored (AT!) itoa_buffer.sz.
; The value in eax is to be the number; the value in ebx should be a NULL-terminated string of digit characters.
; Sensible choices for ebx include itoa_base_10 and itoa_base_16 :P
; This routine won't clobber any other registers.
global itoa
itoa:
push ecx
push edx
push edi
push esi
push eax
mov ecx, 0
.baselen_loop:
cmp byte [ebx+ecx], 0
je .baselen_done
inc ecx
jmp .baselen_loop
.baselen_done:
; We can't do anything with a base-0 conversion, so stop here.
cmp ecx, 0
jle .error
; Begin the process.
mov edi, itoa_buffer
.convert_loop:
; Divide EAX by ECX. The quotient ends up in EAX (perfect for our recurrence), and the remainder (which
; is our index into the digits) goes into EDX.
mov edx, 0
div ecx
mov dl, [ebx+edx]
mov [edi], dl
inc edi
cmp eax, 0
je .convert_done
jmp .convert_loop
.convert_done:
; Write a NUL as promised
mov byte [edi], 0
; ...and store the size (also as promised)
mov [itoa_buffer.sz], edi
sub qword [itoa_buffer.sz], itoa_buffer
; The digits in itoa_buffer were written little-endian, so reverse them for our humans :P
dec edi
mov esi, itoa_buffer
.reverse_loop:
; Don't cross the streams!
cmp esi, edi
jge .reverse_done
; Shame on Intel for no M/M XCHG instruction. Shame!
mov cl, [esi]
mov dl, [edi]
mov [edi], cl
mov [esi], dl
inc esi
dec edi
jmp .reverse_loop
.error:
mov dword [itoa_buffer], 0x525245 ; Encodes to 'ERR\0'
mov qword [itoa_buffer.sz], 3
.reverse_done:
; That's all, blokes :P
pop eax
pop esi
pop edi
pop edx
pop ecx
ret
global sigint_handler
sigint_handler:
; If we end up here (by a signal), we have two useless things on the stack: the signal number
; and the return address. Destroy them judiciously.
pop eax ; Using a word-size register to work with either build
pop eax
jmp _start.read_done
; The actual entry point for most executables (including this one)--usually part of the C runtime
; (and the default linker script for most platforms).
global _start ; export
_start:
; We expect to be called in the usual CDECL way, with (int argc, char **argv, char **envp)
; First, check argc.
pop esi
cmp esi, 2
jge .has_arg
; Nag about not enough arguments
print {"usage: <executable> <file>",10}
exit 1
.has_arg:
; Get argv into edi
pop edi
; ...and get argv[1]
; mov ebx, [edi+SZOF_PTR*1]
pop ebx
; ...and definitely try to open it.
linsys SYS_OPEN, ebx, O_RDONLY, 0
cmp eax, 0
jge .file_open
print {"Can't open file",10}
exit 2
.file_open:
; eax now holds our input file descriptor, which we ought to save somewhere--the stack is nice.
push eax
jmp .file_ready
print 'FD: '
mov ebx, itoa_base_10
call itoa
write itoa_buffer, [itoa_buffer.sz]
print 10
.file_ready:
; Set up a signal handler for if our user gets impatient. This will allow us to print out our
; cumulative results, even if the file is large.
; mov eax, SYS_SIGNAL ; Syscall 48 -- signal
; mov ebx, SIGINT; ; Signal to define (SIGINT)
; mov ecx, sigint_handler ; Handler ("function" sigint_handler)
; int 0x80
mov ebx, 0
.read_loop:
; And now the fun begins: we need to read from the file until it's empty (we guess a zero-byte read
; means just that).
linsys SYS_READ, [esp], buffer, buffer.len
cmp eax, 0
jl .read_error
je .read_done
jmp .count_ready
print 'READ: '
mov ebx, itoa_base_10
call itoa
write itoa_buffer, [itoa_buffer.sz]
print {' bytes',10}
mov ebx, 0
.count_ready:
; For every byte we just read (count eax), increment the appropriate count.
mov ecx, 0
.count_loop:
cmp ecx, eax
jge .count_done
; Note the use of the single-byte register in this move
mov bl, [buffer+ecx]
; Actually increment the count
inc qword [counts+ebx*SZOF_PTR]
; ...and postincrement our counter
inc ecx
; Rinse, lather, repeat
jmp .count_loop
.count_done:
; Well, there's nothing better to do than try again...
jmp .read_loop
.read_error:
print 'Error occured while reading: '
mov ebx, itoa_base_10
call itoa
write itoa_buffer, [itoa_buffer.sz]
print 10
.read_done:
; We now need to write out our data. (This path may be branched to by our SIGINT signal handler, so
; it is divergent beginning now.)
; First, try to close our open file descriptor. We don't care if this fails.
linsys SYS_CLOSE, [esp]
mov ecx, 0
mov esi, 0
.print_loop:
; First, of course, check the terminating condition.
cmp ecx, 256
jge .print_done
; Print a pretty hex leader :3
print '0x'
; Compute the hex of ecx
mov eax, ecx
mov ebx, itoa_base_16
call itoa
; ...and write it to the output :P
write itoa_buffer, [itoa_buffer.sz]
; Print out our separator
print {':',9}
; ...then grab the cardinality we're looking for
mov eax, [counts+ecx*SZOF_PTR]
; Sum it into the running total
add esi, eax
; Convert that (to decimal, this time)
mov ebx, itoa_base_10
call itoa
; ...and write that too :D
write itoa_buffer, [itoa_buffer.sz]
; Check if we're at a multiple of 16 minus one; if so, print out a nice newline
mov edx, ecx
and edx, 0x7
cmp edx, 0x7
jne .print_space
print 10
jmp .print_again
.print_space:
print 9
.print_again:
inc ecx
jmp .print_loop
.print_done:
; One more little thing: print out the running total
print {'total:',9}
mov eax, esi
mov ebx, itoa_base_10
call itoa
write itoa_buffer, [itoa_buffer.sz]
print 10
; Our job here is done. Halt.
exit 0

15
fmd.py

@ -0,0 +1,15 @@
import sys
ENC = b'abcdefghijklmnopqrstuvwxyz0123456789 \n.,:;!"\'-=_[](){}<>@#$%^&*?'
ENC_D = [63] * 256
for k, v in enumerate(ENC):
ENC_D[v] = k
for k, v in enumerate(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
ENC_D[v] = k
if __name__ == '__main__':
print(len(ENC), file=sys.stderr)
print('db', ','.join(str(i) for i in ENC_D))

88
fmg.py

@ -0,0 +1,88 @@
import array, sys, random
from fmd import *
DEBUG_RESTARTS = False
def to_byte_quad(n):
return bytes(ENC[(n >> (6*i)) & 0x3f] for i in range(3, -1, -1))
def from_byte_quad(b):
accum = 0
for v in b:
accum = (accum | ENC_D[v]) << 6
return accum >> 6
def falling_power(base, hi, lo):
res = 0
for i in range(hi, lo, -1):
res += base ** i
return res
print(to_byte_quad(from_byte_quad(b'asdf')), file=sys.stderr)
print(to_byte_quad(0xdeadf00d), file=sys.stderr)
res = from_byte_quad(to_byte_quad(0xadfeed))
print(hex(res), file=sys.stderr)
print('Loading file...', file=sys.stderr)
frq = array.array('I')
frq.fromfile(open(sys.argv[1], 'rb'), falling_power(64, 4, 0))
print('Calculating totals...', file=sys.stderr)
tot = sum(frq)
print('Total characters:', tot, file=sys.stderr)
gen = int(sys.argv[2])
nz = [idx for idx, i in enumerate(frq) if i > 0]
print(len(nz), 'nonzero entries', file=sys.stderr)
sti = random.choice(nz)
buf = to_byte_quad(sti)
sm = 0
its = 0
hits = [0] * 4
for idx in range(gen):
sys.stdout.buffer.write(buf[0:1])
basebuf = buf[1:] + b'a'
for radix in range(4, 0, -1):
rbase = falling_power(64, 4, radix)
base = rbase + from_byte_quad(basebuf)
nlf = frq[base:base + 64]
#print('NLF @', base, hex(base), ':', nlf)
nznl = [idx for idx, i in enumerate(nlf) if i > 0]
if nznl:
hits[radix - 1] += 1
break
if DEBUG_RESTARTS:
print('Wedge', 5 - radix, ', not found with', basebuf, 'at base', base, '(', hex(base), ') from', rbase, '(', hex(rbase), ')', file=sys.stderr)
sys.stdout.buffer.write(b'~')
basebuf = basebuf[1:]
else:
print('Wedged, no out branch from', buf, file=sys.stderr)
break
sm += len(nznl)
its += 1
nzrg = sum(nlf[i] for i in nznl)
rn = random.randrange(nzrg)
#print()
#print('SEL:', rn, '/', nzrg)
ridx = 63
for idx, f in enumerate(nlf):
rn -= f
if rn <= 0:
ridx = idx
break
else:
print('Bugged, selected out of range rn', rn, 'out of', nzrg)
break
cp = ENC[ridx]
bcp = bytes((cp,))
#print('CP:', ridx, cp, bcp)
buf = buf[1:] + bcp
print()
sys.stdout.flush()
print('Average', sm, '/', its, '=', sm / its, 'branches', file=sys.stderr)
print('Hits:', hits, file=sys.stderr)

347
fmv.s

@ -0,0 +1,347 @@
%define BUILD_64
%ifdef BUILD_64
BITS 64
%define SZOF_PTR 8
%define eax rax
%define ebx rbx
%define ecx rcx
%define edx rdx
%define edi rdi
%define esi rsi
%define esp rsp
%define ebp rbp
%else
BITS 32
%define SZOF_PTR 4
%endif
O_RDONLY: equ 0
SIGINT: equ 2
%define SYS_EXIT 60
%define SYS_READ 0
%define SYS_WRITE 1
%define SYS_OPEN 2
%define SYS_CLOSE 3
%define SYS_SIGACTION 13
%define BUFFER_SZ 1572864
%macro print 1
; Ensure we write the string to the data section.
; This syntax does not unset the special macro __SECT__, which is used to restore
; the previous section below.
[section .data]
%%msg: db %1, 0
%%msglen: equ $ - %%msg
; Restore to the section we're in (probably .text)
__SECT__
push eax
push ebx
mov eax, %%msg
mov ebx, %%msglen
call _write
pop ebx
pop eax
%endmacro
%macro write 2
push eax
push ebx
mov eax, %1
mov ebx, %2
call _write
pop ebx
pop eax
%endmacro
%macro _linsys_pusha 0
mov [_old_esp], esp
mov esp, _syscall_save_stack
push rdi
push rsi
push rdx
push rcx
push r8
push r9
push r10
push r11
mov esp, [_old_esp]
%endmacro
%macro _linsys_popa 0
mov [_old_esp], esp
mov esp, _syscall_save_stack
sub esp, 8*SZOF_PTR
pop r11
pop r10
pop r9
pop r8
pop rcx
pop rdx
pop rsi
pop rdi
mov esp, [_old_esp]
%endmacro
%macro linsys 1
_linsys_pusha
mov eax, %1
syscall
_linsys_popa
%endmacro
%macro linsys 2
_linsys_pusha
mov eax, %1
mov rdi, %2
syscall
_linsys_popa
%endmacro
%macro linsys 3
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
syscall
_linsys_popa
%endmacro
%macro linsys 4
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
mov rdx, %4
syscall
_linsys_popa
%endmacro
%macro linsys 5
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
mov rdx, %4
mov rcx, %5
syscall
_linsys_popa
%endmacro
%macro linsys 6
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
mov rdx, %4
mov rcx, %5
mov r8, %6
syscall
_linsys_popa
%endmacro
%macro linsys 7
_linsys_pusha
mov eax, %1
mov rdi, %2
mov rsi, %3
mov rdx, %4
mov rcx, %5
mov r8, %6
mov r9, %7
syscall
_linsys_popa
%endmacro
%macro exit 1
; This is a divergent path, so we don't need to worry about clobbers.
linsys SYS_EXIT, %1
; Halt.
%endmacro
%macro push_char 1
mov r8, r9
mov r9, r10
mov r10, r11
mov r11, %1
cmp r12, 3
jl %%not_ready
mov esi, r8
inc dword [counts_b0 + esi * 4]
shl esi, 6
or esi, r9
inc dword [counts_b1 + esi * 4]
shl esi, 6
or esi, r10
inc dword [counts_b2 + esi * 4]
shl esi, 6
or esi, r11
inc dword [counts + esi * 4]
jmp %%done
%%not_ready:
inc r12
%%done:
%endmacro
section .bss
counts:
resd (64 * 64 * 64 * 64)
counts_b2:
resd (64 * 64 * 64)
counts_b1:
resd (64 * 64)
counts_b0:
resd 64
counts.len: equ $ - counts
%define COUNT_BASE_0 ((64 * 64 * 64 * 64) + (64 * 64 * 64) + (64 * 64))
%define COUNT_BASE_1 ((64 * 64 * 64 * 64) + (64 * 64 * 64))
%define COUNT_BASE_2 (64 * 64 * 64 * 64)
buffer:
resb BUFFER_SZ
.len: equ $ - buffer
_old_esp: resq 1
resq 8
_syscall_save_stack:
section .data
mapping:
db 63,63,63,63,63,63,63,63,63,63,37,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,36,42,43,57,58,59,61,44,50,51,62,63,39,45,38,63,26,27,28,29,30,31,32,33,34,35,40,41,54,46,55,63,56,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,48,63,49,60,47,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,52,63,53,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63
.len: equ $ - mapping
section .text
global _write
_write:
; This is the Linux write syscall, using its own weird parameter passing.
; Store the registers we're about to clobber...
push ecx
push edx
; And push these, because we'll need them later
push eax
push ebx
; Do up the syscall
linsys SYS_WRITE, 2, [esp+SZOF_PTR], [esp]
; Check the return
.after:
cmp eax, 0
jl .err ; Oops.
cmp eax, [esp]
jge .done
; We didn't write the whole buffer; let's try that again.
add [esp+SZOF_PTR], eax
sub [esp], eax
linsys SYS_WRITE, 1, [esp+SZOF_PTR], [esp]
jmp .after
.err:
.done:
; Good or not, we're cleaning up
pop ebx
pop eax
pop edx
pop ecx
ret
global _start
_start:
pop esi
pop edi
cmp esi, 3
jge .has_extra_arg
cmp esi, 2
jge .has_arg
print {"usage: <executable> [<update file>] <file> > <output file>",10}
exit 1
.has_extra_arg:
print {'Reading in previous table...',10}
pop ebx
linsys SYS_OPEN, ebx, O_RDONLY, 0
cmp eax, 0
jge .table_open
print {"Can't open table file",10}
exit 7
.table_open:
push eax
mov esi, counts
.table_read_loop:
cmp esi, counts+counts.len
jge .table_read_done
mov edx, counts+counts.len
sub edx, esi
linsys SYS_READ, [esp], esi, edx
cmp eax, 0
jl .table_read_error
cmp eax, 0
je .table_read_trunc
add esi, eax
jmp .table_read_loop
.table_read_error:
print {"Error reading in table",10}
exit 7
.table_read_trunc:
print {"Table file too small",10}
exit 7
.table_read_done:
pop eax
.has_arg:
print {'Updating tables...',10}
pop ebx
linsys SYS_OPEN, ebx, O_RDONLY, 0
cmp eax, 0
jge .file_open
print {"Can't open file",10}
exit 2
.file_open:
push eax
xor r8, r8
xor r9, r9
xor r10, r10
xor r11, r11
xor r12, r12
xor ebx, ebx
.read_loop:
linsys SYS_READ, [esp], buffer, buffer.len
cmp eax, 0
jl .read_error
je .read_done
mov ecx, 0
.count_loop:
cmp ecx, eax
jge .count_done
mov bl, [buffer + ecx]
mov bl, [mapping + ebx]
push_char ebx
inc ecx
jmp .count_loop
.count_done:
jmp .read_loop
.read_error:
print {'Error occured while reading',10}
.read_done:
linsys SYS_CLOSE, [esp]
mov esi, counts
.write_loop:
cmp esi, counts+counts.len
jge .write_done
mov edx, counts+counts.len
sub edx, esi
linsys SYS_WRITE, 1, esi, edx
cmp eax, 0
jl .write_error
add esi, eax
jmp .write_loop
.write_error:
print {'Error occured while writing',10}
.write_done:
print {'Finished.',10}
exit 0
Loading…
Cancel
Save