Comments (15)
I've posted a PR that should fix this at rust-lang/compiler-builtins#593.
from rust.
Note that this bug affects stable Rust when C code that uses _Float16
is being compiled into a Rust binary. In this scenario the C code will also use the incorrectly-compiled builtin. Example to reproduce using the cc
crate:
// src/main.rs
fn main() {
extern "C" {
fn do_cast(ignored: u32, x: u16) -> f32;
}
dbg!(unsafe { do_cast(123456, 0x4900 /* 10.0_f16.to_bits() */) });
}
// src/code.c
float do_cast(unsigned int ignored, unsigned short x) {
union { _Float16 f; unsigned short bits; } u = { .bits = x };
return (float) u.f;
}
// build.rs
fn main() {
cc::Build::new().file("src/code.c").compile("code");
}
# Cargo.toml
[package]
name = "example"
version = "0.1.0"
edition = "2021"
[build-dependencies]
cc = "1.0.94"
This example should print 10.0, but doesn't on current stable on x86_64 due to the incorrectly-compiled builtin from Rust's build of compiler-rt
(as opposed to using the correctly-compiled builtin that ships with gcc
or clang
).
from rust.
related issue: #118813
from rust.
Seems like there isn't an easy way to force the integer ABI either, running with -Ctarget-feature=-sse
Disabling such basic ABI-relevant target features is anyway wildly unsafe and should probably be rejected by rustc. Lucky enough, on 64bit targets LLVM catches this rather than generating code with a non-standard ABI.
from rust.
@rustbot label -needs-triage
from rust.
sounds like something is not compiling correctly and it's just reading completely wrong data ...
from rust.
the full code
#![feature(f16,f128)]
use std::mem::transmute;
#[inline(never)]
pub fn add(a: f16) -> f16 {
10.0f16 + a
}
#[inline(never)]
pub fn x() -> u16 {
let a = add(10.0);
unsafe { transmute::<_, u16>(a) }
}
results in
.LCPI0_0:
.long 0x41200000
example::add:
push rax
call qword ptr [rip + __extendhfsf2@GOTPCREL]
addss xmm0, dword ptr [rip + .LCPI0_0]
call qword ptr [rip + __truncsfhf2@GOTPCREL]
pop rax
ret
.LCPI1_0:
.short 0x4900
example::x:
push rax
pinsrw xmm0, word ptr [rip + .LCPI1_0], 0
call qword ptr [rip + example::add@GOTPCREL]
pextrw eax, xmm0, 0
pop rcx
ret
The 10.0 for add
is stored as the full 0 padded 4 bytes and moved normally, while the 10.0 for x
is only stored as 2 bytes and pinsrw
is used, which, from what I understand, leaves the upper bits untouched. So it does seem like it just adds whatever garbage happens to be around in xmm0?
from rust.
The LLVM IR contains the exact same constants for both cases...
source_filename = "example.1b362aa86f0e2f27-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define noundef half @_ZN7example3add17hb3d3e297b1262388E(half noundef %a) unnamed_addr #0 {
%_0 = fadd half %a, 0xH4900
ret half %_0
}
define noundef i16 @_ZN7example1x17h714cc7590e57d450E() unnamed_addr #0 {
%a = tail call noundef half @_ZN7example3add17hb3d3e297b1262388E(half noundef 0xH4900)
%_0 = bitcast half %a to i16
ret i16 %_0
}
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!"rustc version 1.79.0-nightly (a07f3eb43 2024-04-11)"}
from rust.
It seems like the bitcast is causing the weird codegen, the C version has similar IR but a much simpler (I was looking at the wrong thing) https://llvm.godbolt.org/z/s1boEjqn8:x
https://llvm.godbolt.org/z/8cafr6KT7:
define dso_local half @add(half noundef %a) {
entry:
%a.addr = alloca half, align 2
store half %a, ptr %a.addr, align 2
%0 = load half, ptr %a.addr, align 2
%ext = fpext half %0 to float
%add = fadd float 1.000000e+01, %ext
%unpromotion = fptrunc float %add to half
ret half %unpromotion
}
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
define dso_local signext i16 @x() {
entry:
%a = alloca half, align 2
%b = alloca half, align 2
store half 0xH4900, ptr %a, align 2
%0 = load half, ptr %a, align 2
%call = call half @add(half noundef %0)
store half %call, ptr %b, align 2
%1 = load i16, ptr %b, align 2
ret i16 %1
}
.LCPI0_0:
.long 0x41200000 # float 10
add: # @add
push rbp
mov rbp, rsp
sub rsp, 16
pextrw eax, xmm0, 0
mov word ptr [rbp - 2], ax
pinsrw xmm0, word ptr [rbp - 2], 0
call __extendhfsf2@PLT
movss xmm1, dword ptr [rip + .LCPI0_0] # xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
addss xmm0, xmm1
call __truncsfhf2@PLT
add rsp, 16
pop rbp
ret
.LCPI1_0:
.short 0x4900 # half 10
x: # @x
push rbp
mov rbp, rsp
sub rsp, 16
pinsrw xmm0, word ptr [rip + .LCPI1_0], 0
pextrw eax, xmm0, 0
mov word ptr [rbp - 2], ax
pinsrw xmm0, word ptr [rbp - 2], 0
call add
pextrw eax, xmm0, 0
mov word ptr [rbp - 4], ax
movsx eax, word ptr [rbp - 4]
add rsp, 16
pop rbp
ret
.long 0x41200000
is the f32 value of 10.0, I guess it can do f16->f32 extension of the const operand at comptime
from rust.
@rustbot label +A-LLVM
from rust.
It's an ABI issue with __extendhfsf2
and __truncsfhf2
from compiler-rt. LLVM is expecting f16
s to be passed and returned in xmm0
, but compiler-rt appears to have been compiled to pass them like u16
s. In this example (playground link):
#![feature(f16)]
#[inline(never)]
pub fn f16_as_f32_cast(x: f16) -> f32 {
x as f32
}
#[inline(never)]
pub fn f16_as_f32_intrinsic(x: f16) -> f32 {
extern "C" {
fn __extendhfsf2(x: u16) -> f32;
}
unsafe { __extendhfsf2(std::mem::transmute(x)) }
}
fn main() {
dbg!(f16_as_f32_cast(10.0));
dbg!(f16_as_f32_intrinsic(10.0));
}
f16_as_f32_cast
will return garbage whereas f16_as_f32_intrinsic
will return the correct result.
from rust.
Oh, yuck, if I am understanding this correctly it seems like compiler-rt emits a different ABI based on whether or not SSE2 is enabled llvm/llvm-project#56854. I guess that Rust must be building compiler-rt without SSE since the integer ABI works, but LLVM is expecting the +SSE rt that uses the float ABI.
Seems like there isn't an easy way to force the integer ABI either, running with -Ctarget-feature=-sse
error: example.rs:6:5: in function _ZN7example3add17hb3d3e297b1262388E half (half): SSE register return with SSE disabled
Any idea what Rust could do here?
Tangentially related #114479
from rust.
Defining the COMPILER_RT_HAS_FLOAT16
macro when compiling the relevant C intrinsics in compiler-builtins
build.rs
will fix the issue (if that macro is not defined, compiler-rt
will use uint16_t
instead of _Float16
to define the intrinsics). Note that _Float16
has limited platform support that depends on the C compiler (clang, gcc), so the long-term solution is to port the intrinsics to Rust in compiler-builtins
.
from rust.
It seems to be worse, though, because the non-standard ABI here is the one the library is expecting.
This is just a plain bug in compiler-rt.
from rust.
@rustbot label -T-libs
from rust.
Related Issues (20)
- Doc for std contains broken links in the implementors sections HOT 12
- Japanese translation of Rust by Example is not built
- ICE when triggering a malformed `diagnostic::on_unimplemented` across crate boundaries (`DefId(20:3 ~ ice_lib_test[60d0]::Test)` isn't local) HOT 1
- Ludicrous build time for `keshvar` crate in `release` mode HOT 4
- Imports of legacy modules with numeric constants do not get flagged as unused when the corresponding type is used in the code HOT 1
- When using `Box<self>` in first method parameter, the error output provides the wrong type in help message HOT 1
- `IoSlice`: the `Deref` impl is too restrictive on the lifetime of the slice, an alternative is needed. HOT 1
- Trait coherency regression in 1.78 HOT 3
- Type inference not working well with ptr mutability HOT 2
- GetSystemTimePreciseAsFileTime HOT 3
- Subpar documentation for ptr-to-ref conversion methods HOT 5
- Compiler Panics ,when I use Ractor Framework HOT 5
- 1.78.0 & WASM - panicked at dlmalloc.rs: assertion failed: psize <= size + max_overhead HOT 5
- False positives from invalid_reference_casting HOT 6
- Nested argument mismatch could suggest moving extra parameter outside parenthesis HOT 3
- Explain the difference between Once(Lock|Cell) and Lazy(Lock|Cell) in the docs HOT 2
- ICE: `Result::unwrap()` on an `Err` value: Sorts(ExpectedFound { ... } HOT 1
- rustfmt eats attributes in inline const blogs HOT 2
- Sometimes 'Σ' is not handled correctly in str.to_lowercase() HOT 2
- Compiler Crash with "Hello world" on latest nightly (aarch64) HOT 2
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from rust.