andreasjhkarlsson / andreasjhkarlsson.github.io Goto Github PK
View Code? Open in Web Editor NEWBlabbing
Blabbing
I just read your article 4 billion if statements and I have to say I'm impressed by how blazingly efficient your program is - just 10 seconds per number, wow! However, I did see some room for improvement:
==
, but >=
(or similar). Even the number of total comparisons stays the same.I included all of these optimizations and came up with this:
#include <sys/mman.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
// Idea stolen from https://andreasjhkarlsson.github.io/jekyll/update/2023/12/27/4-billion-if-statements.html
#define die(msg) do { puts(msg); exit(EXIT_FAILURE); } while(0)
size_t page_size;
char *create_lazy_code_page(size_t num, size_t bit);
void fill_code_page(char *page, size_t num, size_t bit) {
fprintf(stderr, "\tFilling code page for num 0x%016lx, bit 0x%016lx at %p\n", num, bit, page);
size_t off = 0;
#define emit(b) (page[off++] = (b))
// should never happen - we should stop at bit 1, not bit 0.
if(bit == 0)
die("bit 0");
// The code we write will overwrite the jump to the end of the page, where the call to fill_code_page is.
// This means that future invocations will immediately start with the code we emit,
// without wasting any cycles on checking whether the page has already been filled or not!
emit(0x48); // mov rax, <num with bit set>
emit(0xb8);
for(char i = 0; i < 8; i ++)
emit((char) ((num | bit) >> (8*i)));
emit(0x48); // cmp rdi, rax
emit(0x39);
emit(0xc7);
if(bit == 1) {
emit(0xb8); // mov eax, 1
emit(0x01);
emit(0x00);
emit(0x00);
emit(0x00);
// Can't use xor esi, esi here: that would clobber the flags
emit(0xbe); // mov esi, 0
emit(0x00);
emit(0x00);
emit(0x00);
emit(0x00);
// Now move 0 into rax if argument was above or equal to (num|bit), which means bit was set, which means number is odd.
emit(0x48); // cmovae rax, rsi
emit(0x0f);
emit(0x43);
emit(0xc6);
emit(0xc3); // ret
}
else {
char *handler_bit0 = create_lazy_code_page(num , bit >> 1);
char *handler_bit1 = create_lazy_code_page(num | bit, bit >> 1);
emit(0x48); // mov rax, <handler for bit=0>
emit(0xb8);
for(char i = 0; i < 8; i ++)
emit((char) (((size_t)handler_bit0) >> (8*i)));
emit(0x48); // mov rsi, <handler for bit=1>
emit(0xbe);
for(char i = 0; i < 8; i ++)
emit((char) (((size_t)handler_bit1) >> (8*i)));
// Now move bit1 handler into rax if argument was above (num|bit) - which means bit was set
emit(0x48); // cmovae rax, rsi
emit(0x0f);
emit(0x43);
emit(0xc6);
// jump to the correct handler
emit(0xff); // jmp rax
emit(0xe0);
}
}
char *create_lazy_code_page(size_t num, size_t bit) {
char *page = mmap(0, page_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
if(page == (char*) -1)
die("mmap");
fprintf(stderr, "\tNew lazy code page for num 0x%016lx, bit 0x%016lx at %p\n", num, bit, page);
size_t off = 0;
#define emit(b) (page[off++] = (b))
size_t fill_call_length = 10+10+10+10+1+2+1+5;
// Place the call to fill_code_page at the end of the page
// so that fill_code_page doesn't overwrite it.
// So, first, jump to fill_call_length bytes before the end of the page -
// that's exactly how long the call (and the jmp back to the start) is.
emit(0xe9); // jmp <fill_call_length bytes before end of page>
for(char i = 0; i < 4; i ++)
// additional -5 because that's how long this jmp itself is
emit((char) ((page_size - fill_call_length - 5) >> (8*i)));
off = page_size - fill_call_length;
// save rdi - this also aligns the stack correctly
emit(0x57); // push rdi
emit(0x48); // mov rdi, <page>
emit(0xbf);
for(char i = 0; i < 8; i ++)
emit((char) (((size_t)page) >> (8*i)));
emit(0x48); // mov rsi, <num>
emit(0xbe);
for(char i = 0; i < 8; i ++)
emit((char) (num >> (8*i)));
emit(0x48); // mov rdx, <bit>
emit(0xba);
for(char i = 0; i < 8; i ++)
emit((char) (bit >> (8*i)));
emit(0x48); // mov rax, fill_code_page
emit(0xb8);
for(char i = 0; i < 8; i ++)
emit((char) (((size_t) &fill_code_page) >> (8*i)));
emit(0xff); // call rax
emit(0xd0);
emit(0x5f); // pop rdi
emit(0xe9); // jmp <to start of page>
for(char i = 0; i < 4; i ++)
emit((char) ((-page_size) >> (8*i)));
return page;
}
int main(int argc, char **argv) {
page_size = sysconf(_SC_PAGE_SIZE);
if(page_size == -1)
die("sysconf");
char(*core_code_entry)(size_t) = (char(*)(size_t)) create_lazy_code_page(0, 1L << 63);
if(argc <= 1)
for(;;) {
size_t input;
scanf("%zu", &input);
char is_even = core_code_entry(input);
puts(is_even ? "even" : "odd");
}
else
for(int i = 1; i < argc; i ++) {
size_t input;
sscanf(argv[i], "%zu", &input);
char is_even = core_code_entry(input);
printf("%zu: %s\n", input, is_even ? "even" : "odd");
}
return EXIT_SUCCESS;
}
This program lazily allocates one RWX page per comparison to be made. At first, each such page doesn't even contain the comparison itself, but instead only bootstrap code. Once the comparison is actually needed the first time, the bootstrap code will run and generate the real comparison. Depending on the outcome, the generated code will jump to one of two new lazily filled-out RWX pages. Each of those will initially only contain bootstrap code as well and will get filled out with the correct subsequent comparison the first time they are actually needed.
Note that this filling out actually overwrites the bootstrap code, which means not a single cycle is wasted on checking whether a given comparison has already been generated or not! To be able to observe this speed bonus, the program supports checking multiple numbers for evenness in a single run.
Example:
$ time ./eighteen_quintillion_if_statements \
1234 12345 18446744073709551615 18446744073709551614 18446744073709551613 18446744073709551612
New lazy code page for num 0x0000000000000000, bit 0x8000000000000000 at 0x7f00268d7000
Filling code page for num 0x0000000000000000, bit 0x8000000000000000 at 0x7f00268d7000
New lazy code page for num 0x0000000000000000, bit 0x4000000000000000 at 0x7f00268d6000
New lazy code page for num 0x8000000000000000, bit 0x4000000000000000 at 0x7f00268d5000
Filling code page for num 0x0000000000000000, bit 0x4000000000000000 at 0x7f00268d6000
[... snip: 182 lines ...]
Filling code page for num 0x00000000000004d0, bit 0x0000000000000002 at 0x7f0026678000
New lazy code page for num 0x00000000000004d0, bit 0x0000000000000001 at 0x7f0026676000
New lazy code page for num 0x00000000000004d2, bit 0x0000000000000001 at 0x7f0026675000
Filling code page for num 0x00000000000004d2, bit 0x0000000000000001 at 0x7f0026675000
1234: even
Filling code page for num 0x0000000000002000, bit 0x0000000000001000 at 0x7f002668d000
[... snip: 35 lines ...]
Filling code page for num 0x0000000000003038, bit 0x0000000000000001 at 0x7f002665e000
12345: odd
Filling code page for num 0x8000000000000000, bit 0x4000000000000000 at 0x7f00268d5000
[... snip: 185 lines ...]
Filling code page for num 0xfffffffffffffffe, bit 0x0000000000000001 at 0x7f00265e1000
18446744073709551615: odd
18446744073709551614: even
Filling code page for num 0xfffffffffffffffc, bit 0x0000000000000001 at 0x7f00265e2000
18446744073709551613: odd
18446744073709551612: even
real 0m0.012s
user 0m0.001s
sys 0m0.008s
Hey, I just watched https://youtu.be/nlFjL0B43-w which "reviewed" a post you have. I came to the site, but disappointed to find no more. Nice style and informative too. Please write more!
Hello,
I am reading right now https://andreasjhkarlsson.github.io//jekyll/update/2023/12/27/4-billion-if-statements.html and it states that Python (thanks to the visionary genius of Ross van der Gussom).
But the guy's name is Guido van Rossum
.
Actually I wanted to fix this by PR, but can't find the file for your article. Enlighten me please.
Greetings from Munich, Marcel
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.