enzymead / rust Goto Github PK
View Code? Open in Web Editor NEWThis project forked from rust-lang/rust
A rust fork to work towards Enzyme integration
Home Page: https://www.rust-lang.org
License: Other
This project forked from rust-lang/rust
A rust fork to work towards Enzyme integration
Home Page: https://www.rust-lang.org
License: Other
I tried this code:
#![feature(bench_black_box)]
use autodiff::autodiff;
#[autodiff(dgrad, Reverse, Active, Active)]
fn get_output(amt: f64) -> (f64, f64) {
(amt, 1.0)
}
fn main() {
dgrad(1.0, (1.0, 1.0));
}
I expected to see this happen: Compile passed.
Instead, this happened: explanation
rustc --version --verbose
:
rustc 1.62.0-nightly (b25946088 2023-04-04)
binary: rustc
commit-hash: b25946088ccf2899d20e880657f79b51f2f34a02
commit-date: 2023-04-04
host: x86_64-unknown-linux-gnu
release: 1.62.0-nightly
LLVM version: 14.0.0
$ cargo clean && cargo +enzyme run --release -Z unstable-options --config 'lto="fat"'
Compiling version_check v0.9.4
Compiling proc-macro2 v1.0.56
Compiling unicode-ident v1.0.8
Compiling quote v1.0.26
Compiling syn v1.0.109
Compiling proc-macro-error-attr v1.0.4
Compiling proc-macro-error v1.0.4
Compiling autodiff v0.1.0 (**mask**/Enzyme-rust/library/autodiff)
Compiling tstgrad v0.1.0 (/tmp/tstgrad)
[compiler/rustc_codegen_llvm/src/back/write.rs:672] &type_analysis = 0x00007f121983d450
thread '<unnamed>' panicked at 'Args len shouldn't differ. Please report this.', compiler/rustc_codegen_llvm/src/back/write.rs:610:9
stack backtrace:
0: 0x7f1235309d1d - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::hbc2fefb0f908071d
1: 0x7f12353644cc - core::fmt::write::hcfd8dbbc6cc2d112
2: 0x7f12352fb4f1 - std::io::Write::write_fmt::h1f8236ac48b73a6e
3: 0x7f123530ca35 - std::panicking::default_hook::{{closure}}::h0a320c281e2cf753
4: 0x7f123530c6a9 - std::panicking::default_hook::h77b8cf0f703442c9
5: 0x7f12359ee979 - rustc_driver[9b7ca9a5e0bfcdc7]::DEFAULT_HOOK::{closure#0}::{closure#0}
6: 0x7f123530d206 - std::panicking::rust_panic_with_hook::he2eed2d393b1bf6f
7: 0x7f123530cfc9 - std::panicking::begin_panic_handler::{{closure}}::h9092161ea341e9d6
8: 0x7f123530a1d4 - std::sys_common::backtrace::__rust_end_short_backtrace::h6596e6e3a8c8524c
9: 0x7f123530cd39 - rust_begin_unwind
10: 0x7f12352d1473 - core::panicking::panic_fmt::he089468669ab0c2e
11: 0x7f1235c379fe - <rustc_codegen_llvm[b7afac244365f53b]::LlvmCodegenBackend as rustc_codegen_ssa[fde655230d926ecd]::traits::write::WriteBackendMethods>::autodiff
12: 0x7f1235b6f8c0 - rustc_codegen_ssa[fde655230d926ecd]::back::write::start_executing_work::<rustc_codegen_llvm[b7afac244365f53b]::LlvmCodegenBackend>::{closure#4}
13: 0x7f1235b44236 - std[330feb74f2e265b7]::sys_common::backtrace::__rust_begin_short_backtrace::<<rustc_codegen_llvm[b7afac244365f53b]::LlvmCodegenBackend as rustc_codegen_ssa[fde655230d926ecd]::traits::backend::ExtraBackendMethods>::spawn_thread<rustc_codegen_ssa[fde655230d926ecd]::back::write::start_executing_work<rustc_codegen_llvm[b7afac244365f53b]::LlvmCodegenBackend>::{closure#4}, core[889d80731517ff03]::result::Result<rustc_codegen_ssa[fde655230d926ecd]::back::write::CompiledModules, ()>>::{closure#0}, core[889d80731517ff03]::result::Result<rustc_codegen_ssa[fde655230d926ecd]::back::write::CompiledModules, ()>>
14: 0x7f1235b794e4 - <<std[330feb74f2e265b7]::thread::Builder>::spawn_unchecked_<<rustc_codegen_llvm[b7afac244365f53b]::LlvmCodegenBackend as rustc_codegen_ssa[fde655230d926ecd]::traits::backend::ExtraBackendMethods>::spawn_thread<rustc_codegen_ssa[fde655230d926ecd]::back::write::start_executing_work<rustc_codegen_llvm[b7afac244365f53b]::LlvmCodegenBackend>::{closure#4}, core[889d80731517ff03]::result::Result<rustc_codegen_ssa[fde655230d926ecd]::back::write::CompiledModules, ()>>::{closure#0}, core[889d80731517ff03]::result::Result<rustc_codegen_ssa[fde655230d926ecd]::back::write::CompiledModules, ()>>::{closure#1} as core[889d80731517ff03]::ops::function::FnOnce<()>>::call_once::{shim:vtable#0}
15: 0x7f12353173e3 - std::sys::unix::thread::Thread::new::thread_start::hb9f16899a8e702ff
16: 0x7f12350c2bb5 - <unknown>
17: 0x7f1235144d90 - <unknown>
18: 0x0 - <unknown>
error: internal compiler error: unexpected panic
note: the compiler unexpectedly panicked. this is a bug.
note: we would appreciate a bug report: https://github.com/rust-lang/rust/issues/new?labels=C-bug%2C+I-ICE%2C+T-compiler&template=ice.md
note: rustc 1.62.0-nightly (b25946088 2023-04-04) running on x86_64-unknown-linux-gnu
note: compiler flags: --crate-type bin -C opt-level=3 -C panic=abort -C lto -C codegen-units=1 -C incremental
note: some of the compiler flags provided by cargo are hidden
query stack during panic:
end of query stack
thread 'rustc' panicked at '**mask**/Enzyme-rust/compiler/rustc_codegen_ssa/src/back/write.rs:1860:17: panic during codegen/LLVM phase', compiler/rustc_middle/src/util/bug.rs:36:26
stack backtrace:
0: 0x7f1235309d1d - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::hbc2fefb0f908071d
1: 0x7f12353644cc - core::fmt::write::hcfd8dbbc6cc2d112
2: 0x7f12352fb4f1 - std::io::Write::write_fmt::h1f8236ac48b73a6e
3: 0x7f123530ca35 - std::panicking::default_hook::{{closure}}::h0a320c281e2cf753
4: 0x7f123530c6a9 - std::panicking::default_hook::h77b8cf0f703442c9
5: 0x7f12359ee979 - rustc_driver[9b7ca9a5e0bfcdc7]::DEFAULT_HOOK::{closure#0}::{closure#0}
6: 0x7f123530d206 - std::panicking::rust_panic_with_hook::he2eed2d393b1bf6f
7: 0x7f12375f19f1 - std[330feb74f2e265b7]::panicking::begin_panic::<alloc[bf8cb5fae4f7b365]::string::String>::{closure#0}
8: 0x7f12375dfffc - std[330feb74f2e265b7]::sys_common::backtrace::__rust_end_short_backtrace::<std[330feb74f2e265b7]::panicking::begin_panic<alloc[bf8cb5fae4f7b365]::string::String>::{closure#0}, !>
9: 0x7f1235965184 - std[330feb74f2e265b7]::panicking::begin_panic::<alloc[bf8cb5fae4f7b365]::string::String>
10: 0x7f12375bf61c - std[330feb74f2e265b7]::panic::panic_any::<alloc[bf8cb5fae4f7b365]::string::String>
11: 0x7f12376e4e58 - rustc_middle[ecde786d3aeaed7c]::ty::context::tls::with_opt::<rustc_middle[ecde786d3aeaed7c]::util::bug::opt_span_bug_fmt<rustc_span[89f4c99c0bafe0ba]::span_encoding::Span>::{closure#0}, ()>
12: 0x7f12377128e9 - rustc_middle[ecde786d3aeaed7c]::util::bug::opt_span_bug_fmt::<rustc_span[89f4c99c0bafe0ba]::span_encoding::Span>
13: 0x7f123596ac55 - rustc_middle[ecde786d3aeaed7c]::util::bug::bug_fmt
14: 0x7f1235c3a366 - <rustc_codegen_llvm[b7afac244365f53b]::LlvmCodegenBackend as rustc_codegen_ssa[fde655230d926ecd]::traits::backend::CodegenBackend>::join_codegen
15: 0x7f1235b02ae7 - <rustc_interface[ca3972cb95dc44fe]::queries::Linker>::link
16: 0x7f12359ce16f - rustc_interface[ca3972cb95dc44fe]::interface::run_compiler::<core[889d80731517ff03]::result::Result<(), rustc_errors[a8e81527e98fa9a3]::ErrorGuaranteed>, rustc_driver[9b7ca9a5e0bfcdc7]::run_compiler::{closure#1}>::{closure#0}
17: 0x7f12359c4fe2 - std[330feb74f2e265b7]::sys_common::backtrace::__rust_begin_short_backtrace::<rustc_interface[ca3972cb95dc44fe]::util::run_in_thread_pool_with_globals<rustc_interface[ca3972cb95dc44fe]::interface::run_compiler<core[889d80731517ff03]::result::Result<(), rustc_errors[a8e81527e98fa9a3]::ErrorGuaranteed>, rustc_driver[9b7ca9a5e0bfcdc7]::run_compiler::{closure#1}>::{closure#0}, core[889d80731517ff03]::result::Result<(), rustc_errors[a8e81527e98fa9a3]::ErrorGuaranteed>>::{closure#0}, core[889d80731517ff03]::result::Result<(), rustc_errors[a8e81527e98fa9a3]::ErrorGuaranteed>>
18: 0x7f12359cf7c9 - <<std[330feb74f2e265b7]::thread::Builder>::spawn_unchecked_<rustc_interface[ca3972cb95dc44fe]::util::run_in_thread_pool_with_globals<rustc_interface[ca3972cb95dc44fe]::interface::run_compiler<core[889d80731517ff03]::result::Result<(), rustc_errors[a8e81527e98fa9a3]::ErrorGuaranteed>, rustc_driver[9b7ca9a5e0bfcdc7]::run_compiler::{closure#1}>::{closure#0}, core[889d80731517ff03]::result::Result<(), rustc_errors[a8e81527e98fa9a3]::ErrorGuaranteed>>::{closure#0}, core[889d80731517ff03]::result::Result<(), rustc_errors[a8e81527e98fa9a3]::ErrorGuaranteed>>::{closure#1} as core[889d80731517ff03]::ops::function::FnOnce<()>>::call_once::{shim:vtable#0}
19: 0x7f12353173e3 - std::sys::unix::thread::Thread::new::thread_start::hb9f16899a8e702ff
20: 0x7f12350c2bb5 - <unknown>
21: 0x7f1235144d90 - <unknown>
22: 0x0 - <unknown>
error: internal compiler error: unexpected panic
note: the compiler unexpectedly panicked. this is a bug.
note: we would appreciate a bug report: https://github.com/rust-lang/rust/issues/new?labels=C-bug%2C+I-ICE%2C+T-compiler&template=ice.md
note: rustc 1.62.0-nightly (b25946088 2023-04-04) running on x86_64-unknown-linux-gnu
note: compiler flags: --crate-type bin -C opt-level=3 -C panic=abort -C lto -C codegen-units=1 -C incremental
note: some of the compiler flags provided by cargo are hidden
query stack during panic:
end of query stack
error: could not compile `tstgrad` (bin "tstgrad")
thanks for enzyme + rust! i was just trying it out but running ../x.py build --stage 1 library/std
led to:
Updating submodule src/llvm-project
Submodule 'src/llvm-project' (https://github.com/rust-lang/llvm-project.git) registered for path 'src/llvm-project'
Cloning into '/Users/anand/.cargo/dev/rust/src/llvm-project'...
remote: Enumerating objects: 3, done.
remote: Counting objects: 100% (3/3), done.
remote: Compressing objects: 100% (2/2), done.
remote: Total 3 (delta 0), reused 2 (delta 0), pack-reused 0
Receiving objects: 100% (3/3), done.
remote: Total 0 (delta 0), reused 0 (delta 0), pack-reused 0
remote: Enumerating objects: 118284, done.
remote: Counting objects: 100% (118284/118284), done.
remote: Compressing objects: 100% (107763/107763), done.
remote: Total 118282 (delta 19733), reused 43593 (delta 6703), pack-reused 0
Receiving objects: 100% (118282/118282), 166.91 MiB | 6.11 MiB/s, done.
Resolving deltas: 100% (19733/19733), done.
From https://github.com/rust-lang/llvm-project
* branch 9168e236c548d1d0e9938ee6dd4cdbd308fdfd72 -> FETCH_HEAD
Submodule path 'src/llvm-project': checked out '9168e236c548d1d0e9938ee6dd4cdbd308fdfd72'
thread 'main' panicked at 'shared linking to LLVM is not currently supported on aarch64-apple-darwin', src/bootstrap/native.rs:159:17
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Build completed unsuccessfully in 0:02:23
i know that it's probably too early to really get rust + enzyme working but im really interested in trying it out at some point
โ src git:(main) time cargo +enzyme build --release
warning: rust_x
(lib) generated 1 warning
Compiling rust_x v0.1.0 (/h/344/drehwald/prog/x_code)
Finished release [optimized] target(s) in 8m 29s
cargo +enzyme build --release 521.78s user 2.37s system 102% cpu 8:29.64 total
Better than C++ but still not great.
@bytesnake Enzyme Api recently needed some small breakage anyway, so Billy added the batching along the way.
I'll fix it on the rust side and probably add a wrapper for the pure (highly fragile) batching mode without AD,
by copying most from the autodiff macro.
We should sit together one day to decide how to update our original AD macro to allow vectorized AD.
Part of #29.
#[autodiff(cos_inplace, Reverse, Const)]
fn sin_inplace(#[dup] x: &f32, #[dup_noneed] y: &mut f32) {
*y = x.sin();
}
Results in:
rustc: /home/losch/Documents/rust-ml/rustc2/src/llvm-project/llvm/include/llvm/IR/Type.h:418: llvm::Type* llvm::Type::getNonOpaquePointerElementType() const: Assertion `NumContainedTys && "Attempting to get element type of opaque pointer"' failed.
I'm currently rebuilding LLVM with RelWithDebInfo
for more information, here is a preliminary trace:
after simplification :
; Function Attrs: mustprogress nofree noinline nosync nounwind nonlazybind willreturn memory(argmem: readwrite) uwtable
define internal void @preprocess__ZN3sin11sin_inplace17h49beadd84ee4eb97E(ptr noalias nocapture noundef readonly align 4 dereferenceable(4) %0, ptr noalias nocapture noundef writeonly align 4 dereferenceable(4) %1) unnamed_addr EnzymeAD/Enzyme#1 {
%3 = load float, ptr %0, align 4, !noundef !4
%4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
store float %4, ptr %1, align 4
ret void
}
in new function diffe_ZN3sin11sin_inplace17h49beadd84ee4eb97E nonconstant arg ptr %0
in new function diffe_ZN3sin11sin_inplace17h49beadd84ee4eb97E nonconstant arg ptr %2
analyzing function preprocess__ZN3sin11sin_inplace17h49beadd84ee4eb97E
+ knowndata: ptr %0 : {[-1]:Pointer, [-1,-1]:Float@float} - {}
+ knowndata: ptr %1 : {[-1]:Pointer, [-1,-1]:Float@float} - {}
+ retdata: {}
updating analysis of val: ptr %0 current: {} new {[-1]:Pointer, [-1,-1]:Float@float} from ptr %0 Changed=1 legal=1
updating analysis of val: ptr %1 current: {} new {[-1]:Pointer, [-1,-1]:Float@float} from ptr %1 Changed=1 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,-1]:Float@float} new {[-1]:Pointer, [-1,-1]:Float@float} from ptr %0 Changed=0 legal=1
updating analysis of val: ptr %1 current: {[-1]:Pointer, [-1,-1]:Float@float} new {[-1]:Pointer, [-1,-1]:Float@float} from ptr %1 Changed=0 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,-1]:Float@float} new {[-1]:Pointer} from %3 = load float, ptr %0, align 4, !noundef !4 Changed=0 legal=1
updating analysis of val: %3 = load float, ptr %0, align 4, !noundef !4 current: {} new {[-1]:Float@float} from %3 = load float, ptr %0, align 4, !noundef !4 Changed=1 legal=1
updating analysis of val: %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 current: {} new {[-1]:Float@float} from %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 Changed=1 legal=1
updating analysis of val: %3 = load float, ptr %0, align 4, !noundef !4 current: {[-1]:Float@float} new {[-1]:Float@float} from %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 Changed=0 legal=1
updating analysis of val: ptr %1 current: {[-1]:Pointer, [-1,-1]:Float@float} new {[-1]:Pointer, [-1,0]:Float@float} from store float %4, ptr %1, align 4 Changed=0 legal=1
updating analysis of val: %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 current: {[-1]:Float@float} new {[-1]:Float@float} from store float %4, ptr %1, align 4 Changed=0 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,-1]:Float@float} new {[-1]:Pointer, [-1,0]:Float@float} from %3 = load float, ptr %0, align 4, !noundef !4 Changed=0 legal=1
updating analysis of val: %3 = load float, ptr %0, align 4, !noundef !4 current: {[-1]:Float@float} new {[-1]:Float@float} from %3 = load float, ptr %0, align 4, !noundef !4 Changed=0 legal=1
checking if is constant[3] %3 = load float, ptr %0, align 4, !noundef !4
< UPSEARCH1> %3 = load float, ptr %0, align 4, !noundef !4
nonconstant(1) up-inst %3 = load float, ptr %0, align 4, !noundef !4 op ptr %0
<Value USESEARCH2> %3 = load float, ptr %0, align 4, !noundef !4 UA=None
considering use of %3 = load float, ptr %0, align 4, !noundef !4 - %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
considering use of %3 = load float, ptr %0, align 4, !noundef !4 - store float %4, ptr %1, align 4
Value nonconstant inst (uses): %3 = load float, ptr %0, align 4, !noundef !4 user store float %4, ptr %1, align 4
</Value USESEARCH2 const=0> %3 = load float, ptr %0, align 4, !noundef !4
Value nonconstant (couldn't disprove)[3] %3 = load float, ptr %0, align 4, !noundef !4
<Value USESEARCH2> %3 = load float, ptr %0, align 4, !noundef !4 UA=None
considering use of %3 = load float, ptr %0, align 4, !noundef !4 - %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
considering use of %3 = load float, ptr %0, align 4, !noundef !4 - store float %4, ptr %1, align 4
Value nonconstant inst (uses): %3 = load float, ptr %0, align 4, !noundef !4 user store float %4, ptr %1, align 4
</Value USESEARCH2 const=0> %3 = load float, ptr %0, align 4, !noundef !4
< UPSEARCH1> %3 = load float, ptr %0, align 4, !noundef !4
nonconstant(1) up-inst %3 = load float, ptr %0, align 4, !noundef !4 op ptr %0
couldnt decide fallback as nonconstant instruction(3): %3 = load float, ptr %0, align 4, !noundef !4
%3 = load float, ptr %0, align 4, !noundef !4 cv=0 ci=0
checking if is constant[3] %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
< UPSEARCH1> %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
nonconstant(1) up-call %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 op %3 = load float, ptr %0, align 4, !noundef !4
<Value USESEARCH2> %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 UA=None
considering use of %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 - store float %4, ptr %1, align 4
Value nonconstant inst (uses): %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 user store float %4, ptr %1, align 4
</Value USESEARCH2 const=0> %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
Value nonconstant (couldn't disprove)[3] %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
<Value USESEARCH2> %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 UA=None
considering use of %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 - store float %4, ptr %1, align 4
Value nonconstant inst (uses): %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 user store float %4, ptr %1, align 4
</Value USESEARCH2 const=0> %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
< UPSEARCH1> %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
nonconstant(1) up-call %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 op %3 = load float, ptr %0, align 4, !noundef !4
couldnt decide fallback as nonconstant instruction(3): %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
%4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100 cv=0 ci=0
checking if is constant[3] store float %4, ptr %1, align 4
< UPSEARCH1> store float %4, ptr %1, align 4
nonconstant(1) up-inst store float %4, ptr %1, align 4 op %4 = tail call float @llvm.sin.f32(float %3) EnzymeAD/Enzyme#100
couldnt decide fallback as nonconstant instruction(3): store float %4, ptr %1, align 4
store float %4, ptr %1, align 4 cv=1 ci=0
ret void cv=1 ci=1
; Function Attrs: mustprogress nofree noinline nosync nounwind nonlazybind willreturn memory(argmem: readwrite) uwtable
define internal void @diffe_ZN3sin11sin_inplace17h49beadd84ee4eb97E(ptr noalias nocapture noundef readonly align 4 dereferenceable(4) %0, ptr nocapture %1, ptr noalias nocapture writeonly align 4 dereferenceable(4) %2, ptr nocapture %3) unnamed_addr EnzymeAD/Enzyme#1 {
%5 = load float, ptr %0, align 4, !alias.scope !47843, !noalias !47846, !noundef !4
%6 = load float, ptr %3, align 4, !alias.scope !47848, !noalias !47851
store float 0.000000e+00, ptr %3, align 4, !alias.scope !47848, !noalias !47851
%7 = call fast float @llvm.cos.f32(float %5)
%8 = fmul fast float %6, %7
%9 = load float, ptr %1, align 4, !alias.scope !47846, !noalias !47843
%10 = fadd fast float %9, %8
store float %10, ptr %1, align 4, !alias.scope !47846, !noalias !47843
ret void
}
Throwing 200k LoC+ errors at users isn't productive.
Write an error handler that uses llvm-extract (or better the API equivalent https://llvm.org/doxygen/classllvm_1_1ExtractGVPass.html#a334e522b8b7f2944121de406ab0875fb) to write breaking input functions into files, so we can try to differentiate all functions first and abort compilation with a rustc level error when we wrote at least one error to disk.
This avoids the huge LLVM-IR modules from Rust's std library.
@_ZN4sin223primal_cos_inplace_fwd217h4372d8ca57db688fE = internal unnamed_addr alias void (ptr, ptr), ptr @_ZN4sin223primal_cos_inplace_fwd117h63b2ed0dfe283893E
!70222 = distinct !{!70222, !70219, !"_ZN4core5slice29_$LT$impl$u20$$u5b$T$u5d$$GT$15copy_from_slice17h85bc0a79623e6914E: argument 2"}
thread 'coordinator' panicked at compiler/rustc_codegen_llvm/src/back/write.rs:844:9:
assertion failed: res.is_ok()
stack backtrace:
0: 0x7f717d551f2b - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h699c2eda1c25947d
1: 0x7f717d5f4c10 - core::fmt::write::hbd0a2102da3b23ad
2: 0x7f717d55b5be - std::io::Write::write_fmt::had1d9773e09f96e7
3: 0x7f717d551d04 - std::sys_common::backtrace::print::hea2679286c7cd7bf
4: 0x7f717d5b3e13 - std::panicking::default_hook::{{closure}}::hdf3eb728a3cfee15
5: 0x7f717d5b3b19 - std::panicking::default_hook::h43acb4c83e8b8570
error: failed to prepare AutoDiff: src: _ZN4sin223primal_cos_inplace_fwd217h4372d8ca57db688fE, target: _ZN4sin216cos_inplace_fwd217h7d9928179656fa54E, could not find src function
6: 0x7f717dec4c8a - std[6b4a5321ca999280]::panicking::update_hook::<alloc[8bd0fa26b0019945]::boxed::Box<rustc_driver_impl[3bb1b3f2e578b797]::install_ice_hook::{closure#0}>>::{closure#0}
7: 0x7f717d5b468d - std::panicking::rust_panic_with_hook::h850b58927fbd9732
8: 0x7f717d552686 - std::panicking::begin_panic_handler::{{closure}}::h24e1c0b6dc790f97
9: 0x7f717d552136 - std::sys_common::backtrace::__rust_end_short_backtrace::h932b911a72b93ea1
10: 0x7f717d5b4232 - rust_begin_unwind
11: 0x7f717d549e05 - core::panicking::panic_fmt::he50bb7b3440a7688
12: 0x7f717d549ea3 - core::panicking::panic::hdb059e898064af1e
13: 0x7f717e17e26c - rustc_codegen_llvm[e72c960f46b30f4d]::back::write::differentiate
14: 0x7f717e192516 - rustc_codegen_ssa[72bf011a4be81bab]::back::write::generate_lto_work::<rustc_codegen_llvm[e72c960f46b30f4d]::LlvmCodegenBackend>
15: 0x7f717e17036c - std[6b4a5321ca999280]::sys_common::backtrace::__rust_begin_short_backtrace::<<rustc_codegen_llvm[e72c960f46b30f4d]::LlvmCodegenBackend as rustc_codegen_ssa[72bf011a4be81bab]::traits::backend::ExtraBackendMethods>::spawn_named_thread<rustc_codegen_ssa[72bf011a4be81bab]::back::write::start_executing_work<rustc_codegen_llvm[e72c960f46b30f4d]::LlvmCodegenBackend>::{closure#5}, core[ab080d8868199e65]::result::Result<rustc_codegen_ssa[72bf011a4be81bab]::back::write::CompiledModules, ()>>::{closure#0}, core[ab080d8868199e65]::result::Result<rustc_codegen_ssa[72bf011a4be81bab]::back::write::CompiledModules, ()>>
16: 0x7f717e17396c - <<std[6b4a5321ca999280]::thread::Builder>::spawn_unchecked_<<rustc_codegen_llvm[e72c960f46b30f4d]::LlvmCodegenBackend as rustc_codegen_ssa[72bf011a4be81bab]::traits::backend::ExtraBackendMethods>::spawn_named_thread<rustc_codegen_ssa[72bf011a4be81bab]::back::write::start_executing_work<rustc_codegen_llvm[e72c960f46b30f4d]::LlvmCodegenBackend>::{closure#5}, core[ab080d8868199e65]::result::Result<rustc_codegen_ssa[72bf011a4be81bab]::back::write::CompiledModules, ()>>::{closure#0}, core[ab080d8868199e65]::result::Result<rustc_codegen_ssa[72bf011a4be81bab]::back::write::CompiledModules, ()>>::{closure#1} as core[ab080d8868199e65]::ops::function::FnOnce<()>>::call_once::{shim:vtable#0}
17: 0x7f717d553115 - std::sys::unix::thread::Thread::new::thread_start::he77f3e9c20282189
18: 0x7f7174bcf609 - start_thread
at /build/glibc-SzIz7B/glibc-2.31/nptl/pthread_create.c:477:8
19: 0x7f717d3fa133 - clone
at /build/glibc-SzIz7B/glibc-2.31/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:95
20: 0x0 - <unknown>
error: the compiler unexpectedly panicked. this is a bug.
note: we would appreciate a bug report: https://github.com/rust-lang/rust/issues/new?labels=C-bug%2C+I-ICE%2C+T-compiler&template=ice.md
note: please attach the file at `/h/344/drehwald/prog/rust/library/autodiff/rustc-ice-2023-11-07T04_12_42-2990359.txt` to your bug report
note: compiler flags: -C opt-level=3 -C lto=fat
note: some of the compiler flags provided by cargo are hidden
query stack during panic:
end of query stack
error: could not compile `autodiff` (example "sin2" test) due to previous error
Julia has an error handler, as inspiration.
We don't need to fully return since we can't try any other AD tools here,
but before dying we should at least print the module to a file and print a link to a debug guide.
Use proper rust error highlighting too.
Can we generate a unique hash based on function name, type of bug (AA, TA, adjointGenerator, ...), other info,
so automatic rust reducer can just minimize based on that hash?
https://github.com/model-checking/kani/
Closing as solved through another approach.
I installed enzyme with cargo package, but trying to build my project with cargo enzyme
immediately fails with
Running: `RUSTFLAGS="--emit=llvm-bc" "cargo" "+enzyme" "-Zbuild-std" "rustc" "--target" "x86_64-unknown-linux-gnu" "--" "--emit=llvm-bc" "-g" "-Copt-level=3" "-Zno-link"`
error: failed to run `rustc` to learn about target-specific information
Caused by:
process didn't exit successfully: `rustc - --crate-name ___ --print=file-names --crate-type bin --crate-type rlib --crate-type dylib --crate-type cdylib --crate-type staticlib --crate-type proc-macro --print=sysroot --print=split-debuginfo --print=crate-name --print=cfg` (exit status: 1)
--- stderr
error: unknown print request `split-debuginfo`
thread 'main' panicked at 'Failed: `RUSTFLAGS="--emit=llvm-bc" "cargo" "+enzyme" "-Zbuild-std" "rustc" "--target" "x86_64-unknown-linux-gnu" "--" "--emit=llvm-bc" "-g" "-Copt-level=3" "-Zno-link"` (exit status: 101)', /home/iternal/.cargo/registry/src/index.crates.io-6f17d22bba15001f/enzyme-0.4.0/src/bin/run.rs:8:17
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
OS ubuntu 22.04, rustc 1.70.0-nightly (0c61c7a 2023-03-25)
I then invoked similar command for my version of Rust: rustc --print=split-debuginfo
Output:
off
packed
unpacked
While enzyme downloaded another Rust version, 1.57.0, which doesn't support this cmd argument.
cmd: ~/.cache/enzyme/rustc-1.57.0-src/build/x86_64-unknown-linux-gnu/stage2/bin/rustc --print=split-debuginfo
output: error: unknown print request split-debuginfo
We don't support globals on the rust side yet, but maybe the support is also something we could improve on the Enzyme core side. Credits to Ralf Jung for the alternative approach, bugs in the example are my own production ^^
#[autodiff_shadow(dx)]
global x: f32 = 2.0;
global dx: f32 = 0.0;
#[autodiff(...)]
fn foo (y: f32) -> f32 {
return x * y
}
fn main() {
for i in someArr.par_iter() {
let dy = dfoo(i);
...
}
// will we get the right result?
println!("{dx}");
}
Since we already modify function types, can't we instead require shadows of globals to be passed as extra input to dfoo?
WIP
rustc: /h/292/drehwald/prog/rust/src/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp:8582: llvm::ScalarEvolution::ExitLimit llvm::ScalarEvolution::computeExitLimitFromSingleExitSwitch(const llvm::Loop*, llvm::SwitchInst*, llvm::BasicBlock*, bool): Assertion `L->contains(Switch->getDefaultDest()) && "Default case must not exit the loop!"' failed.
The examples do not compile.
I followed the build instructions for Linux and attempted to run one of the examples: cargo +enzyme run --example rosenbrock_fwd_iter
. There are four compilation errors:
error: cannot find attribute `dup` in this scope
--> examples/rosenbrock_fwd_iter.rs:4:17
|
4 | fn rosenbrock(#[dup] x: &[f64; 2]) -> f64 {
| ^^^
error[E0659]: `autodiff` is ambiguous
--> examples/rosenbrock_fwd_iter.rs:3:3
|
3 | #[autodiff(d_rosenbrock, Forward, DuplicatedNoNeed)]
| ^^^^^^^^ ambiguous name
|
= note: ambiguous because of a name conflict with a builtin attribute
= note: `autodiff` could refer to a built-in attribute
note: `autodiff` could also refer to the attribute macro imported here
--> examples/rosenbrock_fwd_iter.rs:1:5
|
1 | use autodiff::autodiff;
| ^^^^^^^^^^^^^^^^^^
= help: use `crate::autodiff` to refer to this attribute macro unambiguously
error[E0425]: cannot find function `d_rosenbrock` in this scope
--> examples/rosenbrock_fwd_iter.rs:18:17
|
4 | fn rosenbrock(#[dup] x: &[f64; 2]) -> f64 {
| ----------------------------------------- similarly named function `rosenbrock` defined here
...
18 | let df_dx = d_rosenbrock(&x, &[1.0, 0.0]);
| ^^^^^^^^^^^^ help: a function with a similar name exists: `rosenbrock`
error[E0425]: cannot find function `d_rosenbrock` in this scope
--> examples/rosenbrock_fwd_iter.rs:19:17
|
4 | fn rosenbrock(#[dup] x: &[f64; 2]) -> f64 {
| ----------------------------------------- similarly named function `rosenbrock` defined here
...
19 | let df_dy = d_rosenbrock(&x, &[0.0, 1.0]);
| ^^^^^^^^^^^^ help: a function with a similar name exists: `rosenbrock`
The second error states there are two definitions of the autodiff
attribute:
autodiff
attribute is also defined in /src/lib.rs
.Two competing definitions of the attribute suggests a configuration issue. Am I missing a step?
Thank you for all your excellent work. Rust has a bright future with Enzyme for automatic differentiation.
Does Enzyme for Rust support Windows, or is Windows support planned?
Experiment with https://github.com/EnzymeAD/Enzyme.jl/blob/main/src/api.jl#L347
to see if we can add some slow enum/union support that way.
Let's collect some opinions on our current macro design to see if there is some common design that people would rather use than our current options:
It's fine that we disagree, but to summarize my point (which may well be misguided):
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:936:5] &fnc = (ptr:; Function Attrs: mustprogress nofree noinline nosync nounwind nonlazybind sanitize_hwaddress willreturn memory(argmem: read) uwtable
define internal noundef double @_ZN2ad3foo17h1df3884f5e83c68dE(ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %0) unnamed_addr #5 {
%2 = load double, ptr %0, align 8, !noundef !6
%3 = tail call double @llvm.sin.f64(double %2)
ret double %3
}
)
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:958:5] &primary_ret = true
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:959:5] &ret_activity = DFT_OUT_DIFF
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:960:5] &input_activity = [
DFT_DUP_ARG,
]
num_fnc_args: 1
input_activity.len(): 1
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:983:5] &res = (ptr:; Function Attrs: mustprogress nofree noinline nosync nounwind nonlazybind sanitize_hwaddress willreturn memory(argmem: readwrite) uwtable
define internal { double } @diffe_ZN2ad3foo17h1df3884f5e83c68dE(ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %0, ptr nocapture align 8 %1, double %2) unnamed_addr #111 {
%4 = load double, ptr %0, align 8, !alias.scope !41888, !noalias !41891, !noundef !6
%5 = tail call double @llvm.sin.f64(double %4) #112
%6 = call fast double @llvm.cos.f64(double %4)
%7 = fmul fast double %2, %6
%8 = load double, ptr %1, align 8, !alias.scope !41891, !noalias !41888
%9 = fadd fast double %8, %7
store double %9, ptr %1, align 8, !alias.scope !41891, !noalias !41888
%10 = insertvalue { double } undef, double %5, 0
ret { double } %10
}
)
rustc: /h/344/drehwald/prog/rust/src/llvm-project/llvm/include/llvm/Support/Casting.h:578: decltype(auto) llvm::cast(From*) [with To = llvm::VectorType; From = llvm::Type]: Assertion `isa<To>(Val) && "cast<Ty>() argument of incompatible type!"' failed.
warning: `ad` (bin "ad") generated 2 warnings (run `cargo fix --bin "ad"` to apply 1 suggestion)
error: could not compile `ad` (bin "ad"); 2 warnings emitted
Hi,
Thanks for open sourcing this amazing project. I'm wondering what is currently the best approach to use Enzyme in Rust projects? If anyone can point me towards a minimal working example/tutorial that will be great. Thanks!
Hi,
I'm wondering if there's any timeline for this project to be integrated into the upstream Rust compiler? As I understand it, the Rust compiler needs to be modified somewhat to emit the information this project needs. Are these changes too specific to ever get merged? It would be great if this could be installed as a package simply using cargo, instead of having to compile the whole compiler.
Sorry if there is information about this somewhere, I couldn't find it!
#![feature(bench_black_box)]
use autodiff::autodiff;
#[derive(Debug, Clone, PartialEq)]
struct Foo {
x: u128,
y: u128,
r: u128,
fee_denumerator: u128,
}
#[autodiff(jac, Reverse, Const, Active, DuplicatedNoNeed)]
fn get_output(amt_in: f64, amt_out: &mut f64, a: &Foo, b: &Foo) -> f64 {
let amount_in_fee = amt_in * a.r as f64;
let output = {
amount_in_fee * a.y as f64 / (a.fee_denumerator as f64 * a.x as f64 + amount_in_fee)
};
let amount_in_fee = output * b.r as f64;
let output = {
amount_in_fee * b.x as f64 / (b.fee_denumerator as f64 * b.y as f64 + amount_in_fee)
};
*amt_out = output;
output
}
#[autodiff(jac, Forward, Const, Const, Duplicated)]
fn hessian(x: f64, y: &mut f64, dy: &mut f64, d2y: &mut f64, a: &Foo, b: &Foo) -> f64;
fn main() {
let mut b = Foo {
x: 4848051834,
y: 543430617,
r: 9970,
fee_denumerator: 10000
};
let mut a = Foo {
x: 9907918868,
y: 1127604104,
r: 9985,
fee_denumerator: 10000
};
let mut b_d = Foo {
x: 4848051834,
y: 543430617,
r: 9970,
fee_denumerator: 10000
};
let mut a_d = Foo {
x: 9907918868,
y: 1127604104,
r: 9985,
fee_denumerator: 10000
};
let mut amt_in = 1.0;
// let mut amt_in = 1.0;
let mut amt_in_d = 0.00;
let mut amt_in_d2 = 1.00;
let mut amt_out = 1.0;
let mut amt_out_d = 1.0;
let mut amt_out_d3 = 1.00;
let dg = hessian(amt_in, &mut amt_in_d2, &mut amt_out, &mut amt_out_d, &a, &b);
// let dg = jac(&mut amt_in, &mut amt_in_d, &mut amt_out, &mut amt_out_d, &a, &b);
println!("amt_in: {amt_in:?} amt_out: {amt_out:?} dg: {dg:?}");
println!("amt_in_d: {amt_in_d:?} amt_out_d: {amt_out_d:?} amt_in_d2: {amt_in_d2:?} amt_out_d3: {amt_out_d3:?}");
}
$ RUSTFLAGS="-Z macro-backtrace" cargo +enzyme run --release -Z unstable-options --config 'lto="fat"'
Compiling tstgrad v0.1.0 (/tmp/tstgrad)
error[E0308]: mismatched types
--> src/main.rs:26:1
|
26 | #[autodiff(jac, Forward, Const, Const, Duplicated)]
| -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| |
| expected `()`, found `f64`
| help: try adding a return type: `-> f64`
| in this procedural macro expansion
|
::: /**mask**/Enzyme-rust/library/autodiff/src/lib.rs:10:1
|
10 | pub fn autodiff(args: TokenStream, input: TokenStream) -> TokenStream {
| --------------------------------------------------------------------- in this expansion of `#[autodiff]`
For more information about this error, try `rustc --explain E0308`.
error: could not compile `tstgrad` (bin "tstgrad") due to previous error
fn f_energy(inputs: &[f32; N_R], y: &mut f32) {..}
Our macro currently requires the shadow of input to be & instead of &mut, which is obviously wrong. We should update that check.
use autodiff::autodiff;
use std::io;
// Will be represented as {f32, i16, i16} when passed by reference
// will be represented as i64 if passed by value
struct Foo {
c1: i16,
a: f32,
c2: i16,
}
#[autodiff(cos, Reverse, Active, Duplicated)]
fn sin(x: &Foo) -> f32 {
assert!(x.c1 < x.c2);
f32::sin(x.a)
}
fn main() {
let mut s = String::new();
io::stdin().read_line(&mut s).unwrap();
let c2 = s.trim_end().parse::<i16>().unwrap();
dbg!(c2);
let foo = Foo { c1: 4, a: 3.14, c2 };
let mut df_dfoo = Foo { c1: 4, a: 0.0, c2 };
dbg!(df_dfoo.a);
dbg!(cos(&foo, &mut df_dfoo, 1.0));
dbg!(df_dfoo.a);
dbg!(f32::cos(foo.a));
}
analyzing function preprocess__ZN6struct3sin17hc3800a79908d474dE
+ knowndata: ptr %0 : {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} - {}
+ retdata: {[-1]:Float@float}
updating analysis of val: ptr %0 current: {} new {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} from ptr %0 Changed=1 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} from ptr %0 Changed=0 legal=1
updating analysis of val: %9 = tail call float @llvm.sin.f32(float %8) #102 current: {} new {[-1]:Float@float} from %9 = tail call float @llvm.sin.f32(float %8) #102 Changed=1 legal=1
updating analysis of val: %9 = tail call float @llvm.sin.f32(float %8) #102 current: {[-1]:Float@float} new {[-1]:Float@float} from %9 = tail call float @llvm.sin.f32(float %8) #102 Changed=0 legal=1
updating analysis of val: %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 current: {} new {[-1]:Pointer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=1 legal=1
updating analysis of val: %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 current: {[-1]:Pointer} new {[-1]:Pointer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=0 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=0 legal=1
updating analysis of val: %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 current: {[-1]:Pointer} new {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=1 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=0 legal=1
updating analysis of val: %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer} from %3 = load i16, ptr %2, align 4, !noundef !4 Changed=0 legal=1
updating analysis of val: %3 = load i16, ptr %2, align 4, !noundef !4 current: {} new {[-1]:Integer} from %3 = load i16, ptr %2, align 4, !noundef !4 Changed=1 legal=1
updating analysis of val: %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 current: {} new {[-1]:Pointer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=1 legal=1
updating analysis of val: %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 current: {[-1]:Pointer} new {[-1]:Pointer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=0 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=0 legal=1
updating analysis of val: %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 current: {[-1]:Pointer} new {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=1 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=0 legal=1
updating analysis of val: %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer} from %5 = load i16, ptr %4, align 2, !noundef !4 Changed=0 legal=1
updating analysis of val: %5 = load i16, ptr %4, align 2, !noundef !4 current: {} new {[-1]:Integer} from %5 = load i16, ptr %4, align 2, !noundef !4 Changed=1 legal=1
updating analysis of val: %6 = icmp slt i16 %3, %5 current: {} new {[-1]:Integer} from %6 = icmp slt i16 %3, %5 Changed=1 legal=1
updating analysis of val: %3 = load i16, ptr %2, align 4, !noundef !4 current: {[-1]:Integer} new {[-1]:Integer} from %6 = icmp slt i16 %3, %5 Changed=0 legal=1
updating analysis of val: %5 = load i16, ptr %4, align 2, !noundef !4 current: {[-1]:Integer} new {[-1]:Integer} from %6 = icmp slt i16 %3, %5 Changed=0 legal=1
skipping update into ptr %0 of {[-1]:Pointer} from %8 = load float, ptr %0, align 4, !noundef !4
updating analysis of val: %8 = load float, ptr %0, align 4, !noundef !4 current: {} new {[-1]:Float@float} from %8 = load float, ptr %0, align 4, !noundef !4 Changed=1 legal=1
updating analysis of val: %9 = tail call float @llvm.sin.f32(float %8) #102 current: {[-1]:Float@float} new {[-1]:Float@float} from %9 = tail call float @llvm.sin.f32(float %8) #102 Changed=0 legal=1
updating analysis of val: %8 = load float, ptr %0, align 4, !noundef !4 current: {[-1]:Float@float} new {[-1]:Float@float} from %9 = tail call float @llvm.sin.f32(float %8) #102 Changed=0 legal=1
updating analysis of val: %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=0 legal=1
updating analysis of val: %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=0 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=0 legal=1
updating analysis of val: %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=0 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer, [-1,4]:Integer, [-1,5]:Integer} from %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 Changed=0 legal=1
updating analysis of val: %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=0 legal=1
updating analysis of val: %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=0 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=0 legal=1
updating analysis of val: %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=0 legal=1
updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer} new {[-1]:Pointer, [-1,6]:Integer, [-1,7]:Integer} from %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 Changed=0 legal=1
updating analysis of val: %2 = getelementptr inbounds %2, ptr %0, i64 0, i32 1 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} from %3 = load i16, ptr %2, align 4, !noundef !4 Changed=0 legal=1
updating analysis of val: %3 = load i16, ptr %2, align 4, !noundef !4 current: {[-1]:Integer} new {[-1]:Integer} from %3 = load i16, ptr %2, align 4, !noundef !4 Changed=0 legal=1
updating analysis of val: %4 = getelementptr inbounds %2, ptr %0, i64 0, i32 2 current: {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} new {[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer} from %5 = load i16, ptr %4, align 2, !noundef !4 Changed=0 legal=1
updating analysis of val: %5 = load i16, ptr %4, align 2, !noundef !4 current: {[-1]:Integer} new {[-1]:Integer} from %5 = load i16, ptr %4, align 2, !noundef !4 Changed=0 legal=1
Finished release [optimized] target(s) in 0.28s
Here is the extra debug info:
30 [compiler/rustc_codegen_llvm/src/llvm/ffi.rs:942:5] &fnc = (ptr:; Function Attrs: mustprogress nofree noinline nosync nounwind nonlazybind sanitize_hwaddress willreturn memory(argmem: read) uwtable
29 define internal noundef double @_ZN2ad3foo17h901f7b61e40dfe72E(ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %0) unnamed_addr #5 {
28 %2 = load double, ptr %0, align 8, !noundef !6
27 %3 = tail call double @llvm.sin.f64(double %2)
26 ret double %3
25 }
24 )
23 num_fnc_args: 1
22 input_activity.len(): 1
21 analyzing function preprocess__ZN2ad3foo17h901f7b61e40dfe72E
20 + knowndata: ptr %0 : {[-1]:Pointer, [-1,-1]:Float@double} - {}
19 + retdata: {[-1]:Float@double}
18 updating analysis of val: ptr %0 current: {} new {[-1]:Pointer, [-1,-1]:Float@double} from ptr %0 Changed=1 legal=1
17 updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,-1]:Float@double} new {[-1]:Pointer, [-1,-1]:Float@double} from ptr %0 Changed=0 legal=1
16 updating analysis of val: %3 = tail call double @llvm.sin.f64(double %2) #112 current: {} new {[-1]:Float@double} from %3 = tail call double @llvm.sin.f64(double %2) #112 Changed=1 legal=1
15 updating analysis of val: %3 = tail call double @llvm.sin.f64(double %2) #112 current: {[-1]:Float@double} new {[-1]:Float@double} from %3 = tail call double @llvm.s in.f64(double %2) #112 Changed=0 legal=1
14 updating analysis of val: ptr %0 current: {[-1]:Pointer, [-1,-1]:Float@double} new {[-1]:Pointer} from %2 = load double, ptr %0, align 8, !noundef !6 Changed=0 legal=1
13 updating analysis of val: %2 = load double, ptr %0, align 8, !noundef !6 current: {} new {[-1]:Float@double} from %2 = load double, ptr %0, align 8, !noundef !6 Chan ged=1 legal=1
12 updating analysis of val: %3 = tail call double @llvm.sin.f64(double %2) #112 current: {[-1]:Float@double} new {[-1]:Float@double} from %3 = tail call double @llvm.s in.f64(double %2) #112 Changed=0 legal=1
11 updating analysis of val: %2 = load double, ptr %0, align 8, !noundef !6 current: {[-1]:Float@double} new {[-1]:Float@double} from %3 = tail call double @llvm.sin.f6 4(double %2) #112 Changed=0 legal=1
And here is the error message:
21 fn:; Function Attrs: mustprogress nofree noinline nosync nounwind nonlazybind sanitize_hwaddress willreturn memory(argmem: readwrite) uwtable
20 define internal noundef { double, double } @diffe_ZN2ad3foo17h901f7b61e40dfe72E(ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %0, ptr nocapture align 8 %1) unnamed_addr #111 {
19 %3 = alloca double, align 8
18 %4 = alloca double, align 8
17 %5 = load double, ptr %0, align 8, !noundef !6
16 %6 = tail call double @llvm.sin.f64(double %5) #112
15 store double %6, ptr %4, align 8
14 ret double %6
13
12 7: ; No predecessors!
11
10 8: ; No predecessors!
9 }
8
7 oval= %3 = tail call double @llvm.sin.f64(double %2) #112 icv=0
6 available inversion for ptr %0 of ptr %1
5 rustc: /h/344/drehwald/prog/rust/src/tools/enzyme/enzyme/Enzyme/GradientUtils.cpp:6147: llvm::Value* GradientUtils::invertPointerM(llvm::Value*, llvm::IRBuilder<>&, bool ): Assertion `0 && "cannot find deal with ptr that isnt arg"' failed.
Not reproducible on CE.
Now that we got basic functionality working, let's start tracking "Nice to have" features.
Optimization:
We want to run Enzyme on (O3) optimized input, which hasn't been unrolled or vectorized.
In rust-lang@ba6a9b1 I deactivated these three optimizations: unroll_loops
, config.vectorize_slp
, config.vectorize_loop
.
In the next step we want to optimize the LLVMModule again, this time with those three parameters set to their original value. Eventually we then should tell Enzyme to not optimize generated functions anymore, since it will be done along with the rest of the module, to avoid optimizing 3x. A good example of why we need this is https://github.com/rust-ml/autodiff-examples/blob/f54363ab03a827ec2377f11c969f6084a9a4a135/examples/recursive.rs#L30, which by default gets unrolled by rustc, leading to a huge code bloat.
Documentation and testcases would be nice. Started in https://github.com/rust-ml/autodiff-examples
EnzymeRegisterAllocationHandler to handle Rust users that use #[global_allocator]
(low priority).
Add flag to print information about allocations in the reverse pass (primal too?)
Use field_shuffle_seed to verify that we create correct TypeTrees.
Allow easier alternating between EnzymeDbg and EnzymeRel
Enable the Enzyme Dbg parser for Rust types unconditionally.
Even simple examples like sin(*x)
segfault in release mode. Investigating.
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:936:5] &fnc = (ptr:; Function Attrs: mustprogress nofree noinline nosync nounwind nonlazybind sanitize_hwaddress willreturn memory(argmem: read) uwtable
define internal noundef double @_ZN2ad3foo17h962107b3e3405598E(ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %0) unnamed_addr #5 {
%2 = load double, ptr %0, align 8, !noundef !6
%3 = tail call double @llvm.sin.f64(double %2)
ret double %3
}
)
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:958:5] &primary_ret = true
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:959:5] &ret_activity = DFT_OUT_DIFF
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:960:5] &input_activity = [
DFT_DUP_ARG,
]
[compiler/rustc_codegen_llvm/src/llvm/ffi.rs:983:5] &res = (ptr:; Function Attrs: mustprogress nofree noinline nosync nounwind nonlazybind sanitize_hwaddress willreturn memory(argmem: readwrite) uwtable
define internal { double } @diffe_ZN2ad3foo17h962107b3e3405598E(ptr noalias nocapture noundef readonly align 8 dereferenceable(8) %0, ptr nocapture align 8 %1, double %2) unnamed_addr #111 {
%4 = load double, ptr %0, align 8, !alias.scope !41888, !noalias !41891, !noundef !6
%5 = tail call double @llvm.sin.f64(double %4) #112
%6 = call fast double @llvm.cos.f64(double %4)
%7 = fmul fast double %2, %6
%8 = load double, ptr %1, align 8, !alias.scope !41891, !noalias !41888
%9 = fadd fast double %8, %7
store double %9, ptr %1, align 8, !alias.scope !41891, !noalias !41888
%10 = insertvalue { double } undef, double %5, 0
ret { double } %10
}
)
[compiler/rustc_codegen_llvm/src/back/write.rs:696:5] &outer_fnc = (ptr:define double @_ZN2ad3bar17h041e2a630fee488fE.852(ptr %0, ptr %1, double %2) {
%4 = call { double } @inner__ZN2ad3bar17h041e2a630fee488fE(ptr %0, ptr %1, double %2)
%5 = extractvalue { double } %4, 0
ret double %5
}
)
Running it results in crashing when calling the outer fnc.
warning: `ad` (bin "ad") generated 3 warnings (run `cargo fix --bin "ad"` to apply 1 suggestion)
Finished release [optimized] target(s) in 0.01s
Running `target/release/ad`
primal result: 5569.144408652917
[1] 316614 illegal hardware instruction (core dumped) cargo +enzyme run --release
https://github.com/EnzymeAD
Can you pleas unpin oxide-enzyme and pin this repo?
#![feature(bench_black_box)]
use autodiff::autodiff;
use std::ptr;
#[autodiff(sin_vec, Reverse, Active)]
fn cos_vec(#[dup] x: &Vec<f32>) -> f32 {
// uses enum internally and breaks
let res = x.into_iter().collect::<Vec<&f32>>();
*res[0]
}
fn main() {
let x = vec![1.0, 1.0, 1.0];
let mut d_x = vec![0.0; 3];
sin_vec(&x, &mut d_x, 1.0);
dbg!(&d_x, &x);
}
updating analysis of val: %37 = tail call noundef ptr @__rust_alloc(i64 noundef %2, i64 noundef %1) #105 current: {[-1]:Pointer} new {[-1]:Pointer} from %37 = tail call noundef ptr @__rust_alloc(i64 noundef %2, i64 noundef %1) #105 Changed=0 legal=1
analyzing function preprocess___rust_realloc
+ knowndata: ptr %0 : {} - {}
+ knowndata: i64 %1 : {} - {}
+ knowndata: i64 %2 : {} - {}
+ knowndata: i64 %3 : {} - {}
+ retdata: {[-1]:Pointer}
updating analysis of val: ptr %0 current: {} new {} from ptr %0 Changed=0 legal=1
updating analysis of val: i64 %1 current: {} new {} from i64 %1 Changed=0 legal=1
updating analysis of val: i64 %2 current: {} new {} from i64 %2 Changed=0 legal=1
updating analysis of val: i64 %3 current: {} new {} from i64 %3 Changed=0 legal=1
updating analysis of val: ptr %0 current: {} new {} from ptr %0 Changed=0 legal=1
updating analysis of val: i64 %1 current: {} new {} from i64 %1 Changed=0 legal=1
updating analysis of val: i64 %2 current: {} new {} from i64 %2 Changed=0 legal=1
updating analysis of val: i64 %3 current: {} new {} from i64 %3 Changed=0 legal=1
updating analysis of val: %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 current: {} new {[-1]:Pointer} from %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 Changed=1 legal=1
updating analysis of val: %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 current: {[-1]:Pointer} new {[-1]:Pointer} from %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 Changed=0 legal=1
starting IPO of %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105
ending IPO of %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105
updating ptr %0 = {} via IPO of %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 arg ptr %0
updating analysis of val: ptr %0 current: {} new {} from %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 Changed=0 legal=1
updating i64 %1 = {} via IPO of %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 arg i64 %1
updating analysis of val: i64 %1 current: {} new {} from %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 Changed=0 legal=1
updating i64 %2 = {[-1]:Integer} via IPO of %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 arg i64 %2
updating analysis of val: i64 %2 current: {} new {[-1]:Integer} from %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 Changed=1 legal=1
updating i64 %3 = {[-1]:Integer} via IPO of %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 arg i64 %3
updating analysis of val: i64 %3 current: {} new {[-1]:Integer} from %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 Changed=1 legal=1
updating analysis of val: %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 current: {[-1]:Pointer} new {[-1]:Pointer} from %5 = tail call ptr @__rdl_realloc(ptr %0, i64 %1, i64 %2, i64 %3) #105 Changed=0 legal=1
error: <unknown>:0:0: in function preprocess___rdl_realloc ptr (ptr, i64, i64, i64): Enzyme: could not statically determine size of realloc %23 = tail call noundef ptr @realloc(ptr noundef %0, i64 noundef %3) #107 - because of - ptr %0
We now use enzyme as a submodule rather than subtree.
As a consequence we need to manually init the enzyme submodule.
We should check where other submodules are registered, to allow x.py to handle that automatically when required.
@bytesnake Rust doesn't allow setting fast math yet due to UB concerns.
I'd be happy to expose it for testing purpose though?
// #![feature(panic_unwind)]
#![feature(bench_black_box)]
// extern crate panic_unwind;
use autodiff::autodiff;
#[derive(Debug, Clone, PartialEq)]
struct Foo {
x: u128,
y: u128,
r: u128,
fee_denumerator: u128,
}
fn get_output(amt_in: &f64, amt_out: &mut f64, a: &Foo, b: &Foo) {
let amount_in_fee = *amt_in * a.r as f64;
let output = {
amount_in_fee * a.y as f64 / (a.fee_denumerator as f64 * a.x as f64 + amount_in_fee)
};
let amount_in_fee = output * b.r as f64;
let output = {
amount_in_fee * b.x as f64 / (b.fee_denumerator as f64 * b.y as f64 + amount_in_fee)
};
*amt_out = output
}
#[autodiff(get_output, Reverse, Const, Duplicated, Duplicated)]
fn jac(x: &f64, d_x: &mut f64, y: &mut f64, y_t: &f64, a: &Foo, b: &Foo);
#[autodiff(jac, Forward, Const, Duplicated, Const, Const, Const)]
fn hessian(x: &f64, y_x: &f64, y: &mut f64, d_x: &mut f64, y_t: &f64, a: &Foo, b: &Foo);
fn main() {
let mut b = Foo {
x: 4848051834,
y: 543430617,
r: 9970,
fee_denumerator: 10000
};
let mut a = Foo {
x: 9907918868,
y: 1127604104,
r: 9985,
fee_denumerator: 10000
};
let mut b_d = Foo {
x: 4848051834,
y: 543430617,
r: 9970,
fee_denumerator: 10000
};
let mut a_d = Foo {
x: 9907918868,
y: 1127604104,
r: 9985,
fee_denumerator: 10000
};
let mut amt_in = 0.0;
let mut amt_in_d = 0.0;
let mut amt_out = 1.0;
let mut amt_out_d = 0.0;
let dg = hessian(&amt_in, &amt_in_d, &mut amt_out, &mut amt_out_d, &1.0, &a, &b);
println!("amt_in: {amt_in:?} amt_out: {amt_out:?} a: {a:?} b: {b:?}");
println!("amt_in_d: {amt_in_d:?} amt_out_d: {amt_out_d:?} a_d: {a_d:?} b_d: {b_d:?}");
}
!56060 = !{!56061, !56063}
!56061 = distinct !{!56061, !56062, !"_ZN4core9core_arch3x865cpuid13__cpuid_count17h37f6041374c32dcdE: argument 0"}
!56062 = distinct !{!56062, !"_ZN4core9core_arch3x865cpuid13__cpuid_count17h37f6041374c32dcdE"}
!56063 = distinct !{!56063, !56064, !"_ZN4core9core_arch3x865cpuid7__cpuid17h5847c8daa9f3b80fE: argument 0"}
!56064 = distinct !{!56064, !"_ZN4core9core_arch3x865cpuid7__cpuid17h5847c8daa9f3b80fE"}
!56065 = !{!56066}
!56066 = distinct !{!56066, !56067, !"_ZN4core9core_arch3x865cpuid13__cpuid_count17h37f6041374c32dcdE: argument 0"}
!56067 = distinct !{!56067, !"_ZN4core9core_arch3x865cpuid13__cpuid_count17h37f6041374c32dcdE"}
!56068 = !{!56069}
!56069 = distinct !{!56069, !56070, !"primal"}
!56070 = distinct !{!56070, !" diff: %"}
!56071 = !{!56072, !30828, !30830}
!56072 = distinct !{!56072, !56070, !"shadow_0"}
!56073 = !{!56072}
!56074 = !{!56069, !30828, !30830}
!56075 = !{!56076}
!56076 = distinct !{!56076, !56077, !"primal"}
!56077 = distinct !{!56077, !" diff: %"}
!56078 = !{!56079, !30828, !30830}
!56079 = distinct !{!56079, !56077, !"shadow_0"}
!56080 = !{!56079}
!56081 = !{!56076, !30828, !30830}
fn:; Function Attrs: mustprogress noinline nounwind nonlazybind willreturn
define internal void @fwddiffe_ZN7tstgrad3jac17hc199163f1b8f9085E(double* noalias nocapture noundef readonly align 8 dereferenceable(8) %0, double* nocapture %1, double* noalias noundef align 8 dereferenceable(8) %2, double* noalias nocapture noundef writeonly align 8 dereferenceable(8) %3, double* noalias noundef readonly align 8 dereferenceable(8) %4, %564* noalias nocapture noundef readonly align 8 dereferenceable(64) %5, %564* noalias nocapture noundef readonly align 8 dereferenceable(64) %6) unnamed_addr #63 {
%8 = alloca { i64*, i64* }, align 8
store { i64*, i64* } zeroinitializer, { i64*, i64* }* %8, align 8
%9 = alloca { i64*, i64* }, align 8
call void @fwddiffe_ZN7tstgrad10primal_jac17hb4b923b27533c244E(double* %0, double* %1, double* %3, %564* %5, %564* %6)
%10 = phi i8*
%11 = bitcast { i64*, i64* }* %8 to double**
%12 = bitcast { i64*, i64* }* %9 to double**
store double* %2, double** %12, align 8, !alias.scope !56068, !noalias !56071
store double* %2, double** %11, align 8, !alias.scope !56073, !noalias !56074
%13 = getelementptr inbounds { i64*, i64* }, { i64*, i64* }* %8, i64 0, i32 1
%14 = getelementptr inbounds { i64*, i64* }, { i64*, i64* }* %9, i64 0, i32 1
%15 = bitcast i64** %13 to double**
%16 = bitcast i64** %14 to double**
store double* %4, double** %16, align 8, !alias.scope !56075, !noalias !56078
store double* %4, double** %15, align 8, !alias.scope !56080, !noalias !56081
call void asm sideeffect "", "r,~{memory}"({ i64*, i64* }* nonnull %9) #64, !srcloc !30816
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %10) #64, !noalias !30827
ret void
17: ; No predecessors!
}
oval=void ({ i64*, i64* }*)* asm sideeffect "", "r,~{memory}" icv=0
available inversion for %10 = getelementptr inbounds { i64*, i64* }, { i64*, i64* }* %7, i64 0, i32 1 of %13 = getelementptr inbounds { i64*, i64* }, { i64*, i64* }* %8, i64 0, i32 1
available inversion for %11 = bitcast i64** %10 to double** of %15 = bitcast i64** %13 to double**
available inversion for %9 = bitcast { i64*, i64* }* %7 to double** of %11 = bitcast { i64*, i64* }* %8 to double**
available inversion for double* %0 of double* %1
available inversion for %7 = alloca { i64*, i64* }, align 8 of %8 = alloca { i64*, i64* }, align 8
rustc: **mask**/Enzyme-rust/src/tools/enzyme/enzyme/Enzyme/GradientUtils.cpp:5071: llvm::Value* GradientUtils::invertPointerM(llvm::Value*, llvm::IRBuilder<>&, bool): Assertion `0 && "cannot find deal with ptr that isnt arg"' failed.
warning: `tstgrad` (bin "tstgrad") generated 7 warnings (run `cargo fix --bin "tstgrad"` to apply 7 suggestions)
error: could not compile `tstgrad` (bin "tstgrad"); 7 warnings emitted
Caused by:
process didn't exit successfully: `rustc --crate-name tstgrad --edition=2021 src/main.rs --error-format=json --json=diagnostic-rendered-ansi,artifacts,future-incompat --diagnostic-width=317 --crate-type bin --emit=dep-info,link -C opt-level=3 -C panic=abort -C lto -C codegen-units=1 -C metadata=2497671c077fe0c3 -C extra-filename=-2497671c077fe0c3 --out-dir /tmp/tstgrad/target/x86_64-unknown-linux-gnu/release/deps --target x86_64-unknown-linux-gnu -C incremental=/tmp/tstgrad/target/x86_64-unknown-linux-gnu/release/incremental -L dependency=/tmp/tstgrad/target/x86_64-unknown-linux-gnu/release/deps -L dependency=/tmp/tstgrad/target/release/deps --extern autodiff=/tmp/tstgrad/target/release/deps/libautodiff-6c483861fee42660.so` (signal: 6, SIGABRT: process abort signal)
Based on our HackMD, steps in order to differentiate code using this rust fork. First iteration:
Add enzyme flag to x.py. Propagate that flag trough. We only request it at build time, not when running cargo/rustc.
Usage: ./configure --enable-llvm-link-shared --enable-llvm-plugins --enable-llvm-enzyme --release-channel=nightly --enable-llvm-assertions --enable-clang --enable-lld --enable-option-checking --enable-ninja
and x build --stage 1
Add enzyme as tree along with pre-build wrappers conditionally.
Add a trait file to rustc_codegen_ssa/src/traits/autodiff.rs
@ZuseZ4, @bytesnake
Write a special proc-macro which ~ follows this poc here, Add a hidden node similar to the asm macro and propagate the macro "parameters" trough. @bytesnake
Tighten macro assertions if possible, drop extern "C"
generation, further improvements. @bytesnake
Add (cfg-gated) code in rustc_codegen_llvm/src/lib.rs
and rustc_codegen_ssa/src/back/write.rs
to differentiate full-LTO builds.
Testing
Notes / changes:
Here we go again. Small update to the latest faer version:
error: <unknown>:0:0: in function preprocess__ZN8rust_msa8f_energy17h679628f42d5cf744E float (ptr, ptr, ptr): Enzyme: Cannot deduce type of copy call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(264) %6, ptr noundef nonnull align 4 dereferenceable(264) %4, i64 264, i1 false) #127, !noalias !4
Let's give this one more special case for now?
error: <unknown>:0:0: in function preprocess__ZN5small8f_energy17h3bb4e9e5f821a19bE float ([12 x float]*, [51 x float]*): Enzyme: failed to deduce type of memset call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(24) %5, i8 0, i64 24, i1 false) #66, !alias.scope !3, !noalias !6
Small example, after commit 467171d, updating Enzyme.
We can change it later, but still should try to get it right in the first run.
The larger design limitation will anyway be the user-facing ad macro, which limits which information we get.
We can take some inspiration from other traits like CoverageInfo, AsmBuilderMethods, which are implemented in both gcc and llvm.
Motivation:
Allow adding AD support for different codegen backends.
cg_llvm will be the first user, cg_cuda probably next.
We should keep in mind other possible ad tools (e.g. gcc based), as well as a possible MIR based ad tool.
The last one would be feature wise an awesome fallback, even if it can't compete performance-wise with backend based AD tools.
Assumptions:
Some inspiration: https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html
There are a few things to keep in mind for the Design:
@bytesnake Here we break main trough inlining.
Before:
8 ; Function Attrs: nonlazybind uwtable
9 define internal void @_ZN3ref4main17hd7a078464f06f560E() unnamed_addr #2 {
10 %1 = alloca float, align 4
11 %2 = alloca float, align 4
12 %3 = bitcast float* %2 to i8* 13 call void @llvm.lifetime.start.p0i8(i64 4, i8* %3)
14 store float 0x40091EB860000000, float* %2, align 4
15 %4 = bitcast float* %1 to i8*
16 call void @llvm.lifetime.start.p0i8(i64 4, i8* %4)
17 store float 0.000000e+00, float* %1, align 4
18 call void @_ZN3ref7cos_ref17h6719ec9878d14346E(float* noalias noundef readonly align 4 dereferenceable(4) %2, float* noalias noundef align 4 dereferenceable(4) %1, float 1.000000e +00)
19 br label %5
20
21 5: ; preds = %0
22 %6 = load float, float* %1, align 4
23 %7 = load float, float* %2, align 4
24 %8 = call float @"_ZN3std3f3221_$LT$impl$u20$f32$GT$3cos17h814fb01877381cc3E"(float %7)
25 br label %9
26
27 9: ; preds = %5
28 %10 = fcmp oeq float %6, %8
29 %11 = xor i1 %10, true
30 br i1 %11, label %15, label %12
31
32 12: ; preds = %9
33 %13 = bitcast float* %1 to i8*
34 call void @llvm.lifetime.end.p0i8(i64 4, i8* %13)
35 %14 = bitcast float* %2 to i8*
36 call void @llvm.lifetime.end.p0i8(i64 4, i8* %14)
37 ret void
38
39 15: ; preds = %9
40 call void @_ZN4core9panicking5panic17h53e5711e5a6f3ca2E([0 x i8]* noalias noundef nonnull readonly align 1 bitcast (<{ [38 x i8] }>* @3 to [0 x i8]*), i64 38, %0* noalias noundef readonly align 8 dereferenceable(24) bitcast (<{ i8*, [16 x i8] }>* @4 to %0*)) #7
41 unreachable
42 }
After:
2 ; Function Attrs: nonlazybind uwtable
3 define internal void @_ZN3ref4main17hd7a078464f06f560E() unnamed_addr #2 {
4 %1 = alloca float, align 4
5 %2 = bitcast float* %1 to i8*
6 call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2)
7 store float 0x40091EB860000000, float* %1, align 4
8 call void @_ZN3ref7cos_ref17h6719ec9878d14346E(float* noalias noundef nonnull readonly align 4 dereferenceable(4) %1, float* noalias align 4 undef, float undef)
9 unreachable
10 }
Building LLVM from source takes a lot of resources, so we should add support for
download-ci-llvm
in x.py
https://rustc-dev-guide.rust-lang.org/building/bootstrapping.html?highlight=download#what-is-a-sysroot
We discussed this previously in a weekly meeting, but without getting to a conclusion.
This is orthogonal to MSVC support.
Overlaps with not using extern blocks anymore.
Design proposal as per our discussion:
fn foo<T: Add>(a: T) -> T {
a * a
}
#[autodiff(foo, dfoo, Active, Duplicated)]
fn dfoo<T>(a: T, b: T) -> T;
fn main() {
dbg!(dfoo<f32>(3.14, 1.0));
}
require ;
after function declaration trough the autodiff macro.
Automatically replace ;
with {unimplemented!();}
Required Checks:
dfoo<f32>
checks if foo<f32>
is a valid instanciation
input and output Activity must be valid for the given T (think of Output for Scalar types, Duplicated for ptr/ref/.. types.
Actions:
Instanciate foo in case that no other code is using foo<f32>
.
Mark foo as used s.t. it won't be optimized away. Similar to our previous primary fncs.
Explanation:
Asking for the derivative dfoo<T>
must differentiate the source code of foo<T>
, so we must instanciate foo<T>
.
rustc: /h/292/drehwald/prog/rust/src/tools/enzyme/enzyme/Enzyme/GradientUtils.cpp:5071: llvm::Value* GradientUtils::invertPointerM(llvm::Value*, llvm::IRBuilder<>&, bool): Assertion
0 && "cannot find deal with ptr that isnt arg"' failed.`
hessian example with latest main: b259460
Currently blocked on EnzymeAD/Enzyme#1044
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/FunctionUtils.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/MustExitScalarEvolution.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/CacheUtility.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/ActivityAnalysisPrinter.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/InstructionBatcher.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/ActivityAnalysis.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/DifferentialUseAnalysis.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/DiffeGradientUtils.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/CApi.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/Enzyme.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/GradientUtils.cpp.o
FAILED: Enzyme/CMakeFiles/LLVMEnzyme-16.dir/EnzymeLogic.cpp.o
// #![feature(panic_unwind)]
#![feature(bench_black_box)]
// extern crate panic_unwind;
use autodiff::autodiff;
#[derive(Debug, Clone, PartialEq)]
struct Foo {
x: f64,
y: f64,
r: f64
}
#[autodiff(dgrad, Reverse, Const)]
fn get_output(#[dup] amt: &mut f64, a: Foo, b: Foo) {
{
// let t_amt = *t_amt;
let ab = (*amt * a.r * a.y) / (a.x + *amt * a.r);
let ba = (ab * b.r * b.x) / (b.y + ab * b.r);
*amt = ba;
}
}
fn main() {
let b = Foo {
x: 4848051834.0,
y: 543430617.5,
r: 0.997,
};
let a = Foo {
x: 9907918868.0,
y: 1127604104.0,
r: 0.9985
};
let mut amt = 1.0;
let mut amt2 = 1.0;
let dg = dgrad(&mut amt, &mut amt2, a.clone(), b.clone());
println!("amt: {amt:?}, amt2: {amt2:?}, dg: {dg:?}");
let mut amt = 17321749.0;
let mut amt2 = 1.0;
let dg = dgrad(&mut amt, &mut amt2, a.clone(), b.clone());
println!("amt: {amt:?}, amt2: {amt2:?}, dg: {dg:?}");
}
amt: 1.0107426332676432, amt2: 1.0107426329550588, dg: ()
amt: 17414541.144841213, amt2: 1.0000000098377733, dg: ()
as you see, when I input amt
is 1.0
so function dgrad
return the gradient is 1.0107426329550588
. is there anyway to amt2 = 1.0;
then input amt2
in function dgrad
to return something like 17414541.144841213
?
or should I do this?
#[autodiff(dgrad, Reverse, Active)]
fn get_output(#[dup] amt: &mut f64, a: Foo, b: Foo) -> f64 {
{
// let t_amt = *t_amt;
let ab = (*amt * a.r * a.y) / (a.x + *amt * a.r);
let ba = (ab * b.r * b.x) / (b.y + ab * b.r);
ba
}
}
let dg = dgrad(&mut amt, &mut amt2, a.clone(), b.clone(), 1.0);
println!("amt: {amt:?}, amt2: {amt2:?}, dg: {dg:?}");
let mut count = 0;
loop {
amt2 = 0.0;
let dg = dgrad(&mut amt, &mut amt2, a.clone(), b.clone(), 1.0);
println!("amt: {amt:?}, amt2: {amt2:?}, dg: {dg:?}");
amt = amt * amt2;
if amt2 == 1.0 {
break
}
count += 1;
}
println!("count: {count:?}");
Output:
...
amt: 17321764.990430593, amt2: 1.0000000000000004, dg: ()
amt: 17321764.9904306, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043061, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430616, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430623, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043063, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043064, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430646, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430653, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043066, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430668, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430675, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430683, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043069, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430698, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430705, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430713, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043072, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430728, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430735, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430743, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043075, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430757, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430765, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430772, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043078, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430787, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430795, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430802, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043081, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430817, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430824, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430832, amt2: 1.0000000000000004, dg: ()
amt: 17321764.99043084, amt2: 1.0000000000000004, dg: ()
amt: 17321764.990430847, amt2: 1.0000000000000002, dg: ()
amt: 17321764.99043085, amt2: 1.0000000000000002, dg: ()
amt: 17321764.990430854, amt2: 1.0000000000000002, dg: ()
amt: 17321764.990430858, amt2: 1.0000000000000002, dg: ()
amt: 17321764.99043086, amt2: 1.0000000000000002, dg: ()
amt: 17321764.990430865, amt2: 1.0000000000000002, dg: ()
amt: 17321764.99043087, amt2: 1.0000000000000002, dg: ()
amt: 17321764.990430873, amt2: 1.0000000000000002, dg: ()
amt: 17321764.990430877, amt2: 1.0000000000000002, dg: ()
amt: 17321764.99043088, amt2: 1.0000000000000002, dg: ()
amt: 17321764.990430884, amt2: 1.0000000000000002, dg: ()
amt: 17321764.990430888, amt2: 1.0, dg: ()
count: 4539
BTW, I am bad on math, pardon me. and graph calculator here: https://www.desmos.com/calculator/dd48dumucq
error: /h/344/drehwald/prog/rust/library/core/src/iter/range.rs:821:6: in function preprocess__ZN4core4iter5range101_$LT$impl$u20$core..iter..traits..iterator..Iterator$u20$for$u20$core..ops..range..Range$LT$A$GT$$GT$4next17h200cd38afbb96f54E { i64, i64 } (ptr): Enzyme: failed to deduce type of insertvalue %6 = insertvalue { i64, i64 } %5, i64 %4, 1, !dbg !200
failing example:
rust/library/autodiff/examples/rosenbrock_rev.rs
(without release mode)
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.