Git Product home page Git Product logo

Comments (15)

beetrees avatar beetrees commented on August 19, 2024 1

I've posted a PR that should fix this at rust-lang/compiler-builtins#593.

from rust.

beetrees avatar beetrees commented on August 19, 2024 1

Note that this bug affects stable Rust when C code that uses _Float16 is being compiled into a Rust binary. In this scenario the C code will also use the incorrectly-compiled builtin. Example to reproduce using the cc crate:

// src/main.rs
fn main() {
    extern "C" {
        fn do_cast(ignored: u32, x: u16) -> f32;
    }
    dbg!(unsafe { do_cast(123456, 0x4900 /* 10.0_f16.to_bits() */) });
}
// src/code.c
float do_cast(unsigned int ignored, unsigned short x) {
	union { _Float16 f; unsigned short bits; } u = { .bits = x };
	return (float) u.f;
}
// build.rs
fn main() {
	cc::Build::new().file("src/code.c").compile("code");
}
# Cargo.toml
[package]
name = "example"
version = "0.1.0"
edition = "2021"

[build-dependencies]
cc = "1.0.94"

This example should print 10.0, but doesn't on current stable on x86_64 due to the incorrectly-compiled builtin from Rust's build of compiler-rt (as opposed to using the correctly-compiled builtin that ships with gcc or clang).

from rust.

usamoi avatar usamoi commented on August 19, 2024 1

related issue: #118813

from rust.

RalfJung avatar RalfJung commented on August 19, 2024 1

Seems like there isn't an easy way to force the integer ABI either, running with -Ctarget-feature=-sse

Disabling such basic ABI-relevant target features is anyway wildly unsafe and should probably be rejected by rustc. Lucky enough, on 64bit targets LLVM catches this rather than generating code with a non-standard ABI.

from rust.

tgross35 avatar tgross35 commented on August 19, 2024

@rustbot label -needs-triage

from rust.

Nilstrieb avatar Nilstrieb commented on August 19, 2024

sounds like something is not compiling correctly and it's just reading completely wrong data ...

from rust.

Nilstrieb avatar Nilstrieb commented on August 19, 2024

the full code

#![feature(f16,f128)]
use std::mem::transmute;

#[inline(never)]
pub fn add(a: f16) -> f16 {
    10.0f16 + a
}

#[inline(never)]
pub fn x() -> u16 {
    let a = add(10.0);
    unsafe { transmute::<_, u16>(a) }
}

results in

.LCPI0_0:
        .long   0x41200000
example::add:
        push    rax
        call    qword ptr [rip + __extendhfsf2@GOTPCREL]
        addss   xmm0, dword ptr [rip + .LCPI0_0]
        call    qword ptr [rip + __truncsfhf2@GOTPCREL]
        pop     rax
        ret

.LCPI1_0:
        .short  0x4900
example::x:
        push    rax
        pinsrw  xmm0, word ptr [rip + .LCPI1_0], 0
        call    qword ptr [rip + example::add@GOTPCREL]
        pextrw  eax, xmm0, 0
        pop     rcx
        ret

The 10.0 for add is stored as the full 0 padded 4 bytes and moved normally, while the 10.0 for x is only stored as 2 bytes and pinsrw is used, which, from what I understand, leaves the upper bits untouched. So it does seem like it just adds whatever garbage happens to be around in xmm0?

from rust.

Nilstrieb avatar Nilstrieb commented on August 19, 2024

The LLVM IR contains the exact same constants for both cases...

source_filename = "example.1b362aa86f0e2f27-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define noundef half @_ZN7example3add17hb3d3e297b1262388E(half noundef %a) unnamed_addr #0 {
  %_0 = fadd half %a, 0xH4900
  ret half %_0
}

define noundef i16 @_ZN7example1x17h714cc7590e57d450E() unnamed_addr #0 {
  %a = tail call noundef half @_ZN7example3add17hb3d3e297b1262388E(half noundef 0xH4900)
  %_0 = bitcast half %a to i16
  ret i16 %_0
}

attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!"rustc version 1.79.0-nightly (a07f3eb43 2024-04-11)"}

from rust.

tgross35 avatar tgross35 commented on August 19, 2024

It seems like the bitcast is causing the weird codegen, the C version has similar IR but a much simpler x https://llvm.godbolt.org/z/8cafr6KT7: (I was looking at the wrong thing) https://llvm.godbolt.org/z/s1boEjqn8:

define dso_local half @add(half noundef %a) {
entry:
  %a.addr = alloca half, align 2
  store half %a, ptr %a.addr, align 2
  %0 = load half, ptr %a.addr, align 2
  %ext = fpext half %0 to float
  %add = fadd float 1.000000e+01, %ext
  %unpromotion = fptrunc float %add to half
  ret half %unpromotion
}

declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

define dso_local signext i16 @x() {
entry:
  %a = alloca half, align 2
  %b = alloca half, align 2
  store half 0xH4900, ptr %a, align 2
  %0 = load half, ptr %a, align 2
  %call = call half @add(half noundef %0)
  store half %call, ptr %b, align 2
  %1 = load i16, ptr %b, align 2
  ret i16 %1
}
.LCPI0_0:
        .long   0x41200000                      # float 10
add:                                    # @add
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        pextrw  eax, xmm0, 0
        mov     word ptr [rbp - 2], ax
        pinsrw  xmm0, word ptr [rbp - 2], 0
        call    __extendhfsf2@PLT
        movss   xmm1, dword ptr [rip + .LCPI0_0] # xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
        addss   xmm0, xmm1
        call    __truncsfhf2@PLT
        add     rsp, 16
        pop     rbp
        ret
.LCPI1_0:
        .short  0x4900                          # half 10
x:                                      # @x
        push    rbp
        mov     rbp, rsp
        sub     rsp, 16
        pinsrw  xmm0, word ptr [rip + .LCPI1_0], 0
        pextrw  eax, xmm0, 0
        mov     word ptr [rbp - 2], ax
        pinsrw  xmm0, word ptr [rbp - 2], 0
        call    add
        pextrw  eax, xmm0, 0
        mov     word ptr [rbp - 4], ax
        movsx   eax, word ptr [rbp - 4]
        add     rsp, 16
        pop     rbp
        ret

.long 0x41200000 is the f32 value of 10.0, I guess it can do f16->f32 extension of the const operand at comptime

from rust.

tgross35 avatar tgross35 commented on August 19, 2024

@rustbot label +A-LLVM

from rust.

beetrees avatar beetrees commented on August 19, 2024

It's an ABI issue with __extendhfsf2 and __truncsfhf2 from compiler-rt. LLVM is expecting f16s to be passed and returned in xmm0, but compiler-rt appears to have been compiled to pass them like u16s. In this example (playground link):

#![feature(f16)]

#[inline(never)]
pub fn f16_as_f32_cast(x: f16) -> f32 {
    x as f32
}

#[inline(never)]
pub fn f16_as_f32_intrinsic(x: f16) -> f32 {
    extern "C" {
        fn __extendhfsf2(x: u16) -> f32;
    }
    unsafe { __extendhfsf2(std::mem::transmute(x)) } 
}

fn main() {
	dbg!(f16_as_f32_cast(10.0));
	dbg!(f16_as_f32_intrinsic(10.0));
}

f16_as_f32_cast will return garbage whereas f16_as_f32_intrinsic will return the correct result.

from rust.

tgross35 avatar tgross35 commented on August 19, 2024

Oh, yuck, if I am understanding this correctly it seems like compiler-rt emits a different ABI based on whether or not SSE2 is enabled llvm/llvm-project#56854. I guess that Rust must be building compiler-rt without SSE since the integer ABI works, but LLVM is expecting the +SSE rt that uses the float ABI.

Seems like there isn't an easy way to force the integer ABI either, running with -Ctarget-feature=-sse

error: example.rs:6:5: in function _ZN7example3add17hb3d3e297b1262388E half (half): SSE register return with SSE disabled

Any idea what Rust could do here?

Tangentially related #114479

from rust.

beetrees avatar beetrees commented on August 19, 2024

Defining the COMPILER_RT_HAS_FLOAT16 macro when compiling the relevant C intrinsics in compiler-builtins build.rs will fix the issue (if that macro is not defined, compiler-rt will use uint16_t instead of _Float16 to define the intrinsics). Note that _Float16 has limited platform support that depends on the C compiler (clang, gcc), so the long-term solution is to port the intrinsics to Rust in compiler-builtins.

from rust.

chorman0773 avatar chorman0773 commented on August 19, 2024

It seems to be worse, though, because the non-standard ABI here is the one the library is expecting.
This is just a plain bug in compiler-rt.

from rust.

tgross35 avatar tgross35 commented on August 19, 2024

@rustbot label -T-libs

from rust.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.