Comments (23)
Here's the LLVM that I get -- I don't see any obvious Souper stoppers in there.
; Function Attrs: nounwind uwtable
define void @check_result(i64 %x, i64 %y) #0 {
entry:
ret void
}
; Function Attrs: noreturn nounwind
declare void @__assert_fail(i8*, i8*, i32, i8*) #1
; Function Attrs: nounwind uwtable
define void @check_overflow(i64 %x, i64 %y) #0 {
entry:
%conv = trunc i64 %x to i16
%conv1 = trunc i64 %y to i16
%add.i = add i16 %conv1, %conv
%0 = and i64 %y, %x
%1 = trunc i64 %0 to i16
%2 = icmp slt i16 %1, 0
%cmp10.i = icmp sgt i16 %add.i, -1
%or.cond28.i = and i1 %2, %cmp10.i
br i1 %or.cond28.i, label %checked_add_2.exit, label %lor.rhs.i
lor.rhs.i: ; preds = %entry
%3 = or i64 %y, %x
%4 = trunc i64 %3 to i16
%5 = icmp sgt i16 %4, -1
%cmp20.i = icmp slt i16 %add.i, 0
%cmp20..i = and i1 %5, %cmp20.i
br label %checked_add_2.exit
checked_add_2.exit: ; preds = %entry, %lor.rhs.i
%6 = phi i1 [ true, %entry ], [ %cmp20..i, %lor.rhs.i ]
%shr28.i = lshr i16 %add.i, 15
%shr629.i = lshr i16 %conv, 15
%shr930.i = lshr i16 %conv1, 15
%tobool.i = icmp eq i16 %shr629.i, 0
br i1 %tobool.i, label %checked_add_3.exit, label %land.lhs.true.i
land.lhs.true.i: ; preds = %checked_add_2.exit
%tobool13.i = icmp eq i16 %shr930.i, 0
%tobool15.i = icmp ne i16 %shr28.i, 0
%or.cond.i = or i1 %tobool13.i, %tobool15.i
br i1 %or.cond.i, label %checked_add_3.exit, label %land.lhs.true.i.checked_add_3.exit_crit_edge
land.lhs.true.i.checked_add_3.exit_crit_edge: ; preds = %land.lhs.true.i
br i1 %6, label %cond.end, label %cond.false
checked_add_3.exit: ; preds = %checked_add_2.exit, %land.lhs.true.i
%7 = or i16 %shr930.i, %shr629.i
%8 = icmp eq i16 %7, 0
%tobool20.i = icmp ne i16 %shr28.i, 0
%tobool20..i = and i1 %8, %tobool20.i
%cmptmp = xor i1 %6, %tobool20..i
br i1 %cmptmp, label %cond.false, label %cond.end
cond.false: ; preds = %land.lhs.true.i.checked_add_3.exit_crit_edge, %checked_add_3.exit
tail call void @__assert_fail(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0), i32 34, i8* getelementptr inbounds ([32 x i8]* @__PRETTY_FUNCTION__.check_overflow, i64 0, i64 0)) #3
unreachable
cond.end: ; preds = %land.lhs.true.i.checked_add_3.exit_crit_edge, %checked_add_3.exit
ret void
}
from souper.
Actually it is LLVM, not Souper, that proves check_result() is empty.
from souper.
Here's the Souper in question:
%0 = block 3
%1:i64 = var ; x
%2:i16 = trunc %1 ; a
%3:i32 = sext %2 ; a promoted to int
%4:i1 = slt %3, 0:i32 ; a < 0
%5:i64 = var ; y
%6:i16 = trunc %5 ; b
%7:i32 = sext %6 ; b promoted to int
%8:i1 = slt %7, 0:i32 ; b < 0
%9:i1 = and %4, %8 ; a < 0 && b < 0
%10:i32 = zext %2 ; a promoted to int
%11:i32 = zext %6 ; b promoted to int
%12:i32 = addnsw %10, %11 ; a + b
%13:i16 = trunc %12 ; r
%14:i32 = sext %13 ; r promoted to int
%15:i1 = sle 0:i32, %14 ; 0 <= r
%16:i1 = and %9, %15 ; a < 0 && b < 0 && r >= 0
blockpc %0 1 %16 0:i1
blockpc %0 2 %16 0:i1
%17:i1 = sle 0:i32, %3 ; 0 <= a
%18:i1 = sle 0:i32, %7 ; 0 <= b
%19:i1 = and %17, %18 ; a >= 0 && b >= 0
blockpc %0 2 %19 1:i1
%20 = block 3
%21:i32 = ashr %10, 15:i32 ; a >> 15
%22:i16 = trunc %21 ; a >> 15
%23:i32 = zext %22 ; a >> 15 promoted to int
%24:i1 = ne 0:i32, %23 ; (a >> 15) != 0
%25:i32 = ashr %11, 15:i32 ; b >> 15
%26:i16 = trunc %25 ; b >> 15
%27:i32 = zext %26 ; b >> 15 promoted to int
%28:i1 = ne 0:i32, %27 ; (b >> 15) != 0
%29:i1 = and %24, %28 ; ((a >> 15) != 0 && (b >> 15) != 0
%30:i1 = xor 1:i1, %29 ; ((a >> 15) == 0 || (b >> 15) == 0
%31:i32 = zext %13 ; r promoted to int
%32:i32 = ashr %31, 15:i32 ; r >> 15
%33:i16 = trunc %32 ; r >> 15
%34:i1 = ne 0:i16, %33 ; (r >> 15) != 0
%35:i1 = or %30, %34 ; ((a >> 15) == 0 || (b >> 15) == 0 || (r >> 15) != 0
blockpc %20 1 %35 1:i1
blockpc %20 2 %35 1:i1
%36:i1 = ne 0:i16, %22 ; (a >> 15) != 0
%37:i1 = ne 0:i16, %26 ; (b >> 15) != 0
%38:i1 = or %36, %37 ; (a >> 15) != 0 || (b >> 15) != 0
blockpc %20 2 %38 0:i1
%39:i1 = slt %14, 0:i32 ; r < 0
%40:i1 = phi %0, 1:i1, 0:i1, %39
%41:i32 = zext %40
%42:i32 = zext %33
%43:i1 = ne 0:i32, %42
%44:i1 = phi %20, 1:i1, 0:i1, %43
%45:i32 = zext %44
%46:i1 = eq %41, %45
infer %46
from souper.
And below is the LLVM. I believe this particular LLVM is correct since I tested it with all 2^32 inputs and the assertion never fails. Thus I think it's safe to say that Souper is either buggy or else somehow not up to the task of seeing the meaning of this code.
; ModuleID = 'add_souper_test_2.bc'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [9 x i8] c"o1 == o2\00", align 1
@.str1 = private unnamed_addr constant [18 x i8] c"add_souper_test.c\00", align 1
@__PRETTY_FUNCTION__.check_overflow = private unnamed_addr constant [32 x i8] c"void check_overflow(long, long)\00", align 1
; Function Attrs: nounwind uwtable
define void @check_overflow(i64 %x, i64 %y) #0 {
entry:
%conv = trunc i64 %x to i16
%conv1 = trunc i64 %y to i16
%conv.i = zext i16 %conv to i32
%conv1.i = zext i16 %conv1 to i32
%add.i = add nsw i32 %conv.i, %conv1.i
%conv2.i = trunc i32 %add.i to i16
%conv3.i = sext i16 %conv to i32
%cmp.i = icmp slt i32 %conv3.i, 0
%conv5.i = sext i16 %conv1 to i32
%cmp6.i = icmp slt i32 %conv5.i, 0
%or.cond.i = and i1 %cmp.i, %cmp6.i
%conv9.i = sext i16 %conv2.i to i32
%cmp10.i = icmp sge i32 %conv9.i, 0
%or.cond7.i = and i1 %or.cond.i, %cmp10.i
br i1 %or.cond7.i, label %checked_add_2.exit, label %lor.rhs.i
lor.rhs.i: ; preds = %entry
%conv12.i = sext i16 %conv to i32
%cmp13.i = icmp sge i32 %conv12.i, 0
%conv16.i = sext i16 %conv1 to i32
%cmp17.i = icmp sge i32 %conv16.i, 0
%or.cond8.i = and i1 %cmp13.i, %cmp17.i
br i1 %or.cond8.i, label %land.rhs.i, label %checked_add_2.exit
land.rhs.i: ; preds = %lor.rhs.i
%conv19.i = sext i16 %conv2.i to i32
%cmp20.i = icmp slt i32 %conv19.i, 0
br label %checked_add_2.exit
checked_add_2.exit: ; preds = %land.rhs.i, %lor.rhs.i, %entry
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs.i ], [ %cmp20.i, %land.rhs.i ]
%lor.ext.i = zext i1 %0 to i32
%conv2 = trunc i64 %x to i16
%conv3 = trunc i64 %y to i16
%conv.i3 = zext i16 %conv2 to i32
%conv1.i4 = zext i16 %conv3 to i32
%add.i5 = add nsw i32 %conv.i3, %conv1.i4
%conv2.i6 = trunc i32 %add.i5 to i16
%conv3.i7 = zext i16 %conv2.i6 to i32
%shr.i = ashr i32 %conv3.i7, 15
%conv4.i = trunc i32 %shr.i to i16
%conv5.i8 = zext i16 %conv2 to i32
%shr6.i = ashr i32 %conv5.i8, 15
%conv7.i = trunc i32 %shr6.i to i16
%conv8.i = zext i16 %conv3 to i32
%shr9.i = ashr i32 %conv8.i, 15
%conv10.i = trunc i32 %shr9.i to i16
%conv11.i = zext i16 %conv7.i to i32
%tobool.i = icmp ne i32 %conv11.i, 0
%conv12.i9 = zext i16 %conv10.i to i32
%tobool13.i = icmp ne i32 %conv12.i9, 0
%or.cond.i10 = and i1 %tobool.i, %tobool13.i
%or.cond.not.i = xor i1 %or.cond.i10, true
%tobool15.i = icmp ne i16 %conv4.i, 0
%or.cond7.i11 = or i1 %or.cond.not.i, %tobool15.i
br i1 %or.cond7.i11, label %lor.rhs.i13, label %checked_add_3.exit
lor.rhs.i13: ; preds = %checked_add_2.exit
%tobool16.i = icmp ne i16 %conv7.i, 0
%tobool18.i = icmp ne i16 %conv10.i, 0
%or.cond8.i12 = or i1 %tobool16.i, %tobool18.i
br i1 %or.cond8.i12, label %checked_add_3.exit, label %land.rhs.i15
land.rhs.i15: ; preds = %lor.rhs.i13
%conv19.i14 = zext i16 %conv4.i to i32
%tobool20.i = icmp ne i32 %conv19.i14, 0
br label %checked_add_3.exit
checked_add_3.exit: ; preds = %land.rhs.i15, %lor.rhs.i13, %checked_add_2.exit
%1 = phi i1 [ true, %checked_add_2.exit ], [ false, %lor.rhs.i13 ], [ %tobool20.i, %land.rhs.i15 ]
%lor.ext.i16 = zext i1 %1 to i32
%cmp = icmp eq i32 %lor.ext.i, %lor.ext.i16
br i1 %cmp, label %cond.end, label %cond.false
cond.false: ; preds = %checked_add_3.exit
call void @__assert_fail(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([18 x i8]* @.str1, i32 0, i32 0), i32 27, i8* getelementptr inbounds ([32 x i8]* @__PRETTY_FUNCTION__.check_overflow, i32 0, i32 0)) #2
unreachable
cond.end: ; preds = %checked_add_3.exit
ret void
}
; Function Attrs: noreturn nounwind
declare void @__assert_fail(i8*, i8*, i32, i8*) #1
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { noreturn nounwind }
!llvm.ident = !{!0}
!0 = metadata !{metadata !"clang version 3.6.0 (trunk 223586)"}
from souper.
My guess is somehow we're asking too much of blockpc here but I haven't looked into it at that level.
from souper.
I'll have a look at the blockpc
code soon.
from souper.
John, Thanks for the detailed example! I will investigate it.
from souper.
What I'm not sure about is if the bug is in the blockpc harvester or the solving part.
from souper.
If I code the overflow checks in branch-free fashion, Souper optimizes everything properly:
static int checked_add_2(int16_t a, int16_t b, int16_t *rp) {
int16_t r = (uint16_t)a + (uint16_t)b;
*rp = r;
// return (a < 0 && b < 0 && r >= 0) || (a >= 0 && b >= 0 && r < 0);
return (a < 0 & b < 0 & r >= 0) | (a >= 0 & b >= 0 & r < 0);
}
static int checked_add_3(int16_t a, int16_t b, int16_t *rp) {
const int BITS = CHAR_BIT * sizeof(int16_t);
uint16_t ur = (uint16_t)a + (uint16_t)b;
uint16_t sr = ur >> (BITS - 1);
uint16_t sa = (uint16_t)a >> (BITS - 1);
uint16_t sb = (uint16_t)b >> (BITS - 1);
*rp = ur;
// return (sa && sb && !sr) || (!sa && !sb && sr);
return (sa & sb & !sr) | (!sa & !sb & sr);
}
from souper.
Here's a simpler example where Souper misses the obvious optimization. Something definitely is wrong here.
#include <assert.h>
int sign(int x) {
if (x < 0)
return -1;
if (x > 0)
return 1;
return 0;
}
int sign2(int x) {
int y = x >> 31;
unsigned z = 0 - (unsigned)x;
unsigned w = z >> 31;
return w | y;
}
void compare(int x) { assert(sign(x) == sign2(x)); }
#ifdef TEST
int main(void) {
unsigned x = 0;
do {
compare(x);
x++;
} while (x != 0);
return 0;
}
#endif
from souper.
@regehr @chenyang78 This looks rather bad. sign
is optimized to
define i32 @sign(i32 %x) #0 {
entry:
%cmp = icmp slt i32 %x, 0
br i1 %cmp, label %return, label %if.end
if.end: ; preds = %entry
%cmp1 = icmp sgt i32 %x, 0
%. = zext i1 %cmp1 to i32
br label %return
return: ; preds = %if.end, %entry
%retval.0 = phi i32 [ -1, %entry ], [ %., %if.end ]
ret i32 %retval.0
}
Souper takes this and simplifies this to ret i32 0
. This seems related to issue #166. The conditions generated from the if.end
block are not sufficiently guarded to faithfully represent the execution which skips that block and immediately jumps to the return
block.
from souper.
Thanks John and Jeroen, I will look at it more closely and try to fix #166 over the weekend.
from souper.
@jeroenk In my test, souper didn't optimize the code that you referred. Assume I saved the bitcode in the file simple2.ll:
$ llvm-as -o simple2.bc simple2.ll
$ souper -keep-solver-inputs -dump-klee-exprs -z3-path=/home/chenyang/other_programs/z3/build/z3 simple2.bc
; Listing valid replacements.
; Using solver: Z3 + internal cache
Solver input saved to /tmp/input-eb9b23.smt2
Solver input saved to /tmp/input-efb132.smt2
Solver input saved to /tmp/input-7ac65c.smt2
Solver input saved to /tmp/input-2a4427.smt2
Probably I am missing something?
from souper.
@chenyang78 try passing -souper-infer-iN
from souper.
@chenyang78 I'm a bit puzzled, I'm no longer able to reproduce the problem either. I'll keep digging a bit more.
from souper.
@chenyang78 I'm starting to suspect that my binaries were out-of-sync compared to the state of my source tree. Hence, my apologies for the spurious report.
from souper.
Back to John's last example. This gets optimized to:
entry:
%cmp.i = icmp slt i32 %x, 0
br i1 %cmp.i, label %sign.exit, label %if.end.i
if.end.i: ; preds = %entry
%cmp1.i = icmp sgt i32 %x, 0
%..i = zext i1 %cmp1.i to i32
br label %sign.exit
sign.exit: ; preds = %if.end.i, %entry
%retval.0.i = phi i32 [ -1, %entry ], [ %..i, %if.end.i ]
%shr.i = ashr i32 %x, 31
%sub.i = sub i32 0, %x
%shr1.i = lshr i32 %sub.i, 31
%or.i = or i32 %shr1.i, %shr.i
%lnot = icmp eq i32 %retval.0.i, %or.i
...
The candidate souper generates for the eq
instruction at the bottom is:
%0 = block 2
%1:i32 = var
%2:i1 = slt %1, 0:i32
blockpc %0 1 %2 0:i1
%3:i1 = slt 0:i32, %1
%4:i32 = zext %3
%5:i32 = phi %0, 4294967295:i32, %4
%6:i32 = sub 0:i32, %1
%7:i32 = lshr %6, 31:i32
%8:i32 = ashr %1, 31:i32
%9:i32 = or %7, %8
%10:i1 = eq %5, %9
infer %10
In case we follow the code path corresponding to the 4294967295:i32
(= -1) alternative in the phi node, the value of %1
is not getting constrained to a negative value. This means that the solver is allowed to pick a positive value, set %5
equal to -1 and then notice that %9
is either 0 or 1. Thus, no optimization is found. If I manually add blockpc %0 0 %2 1:i1
to the above candidate then the optimization is found. Hence, it seems that we're generating too few blockpc's.
from souper.
@jeroenk No problem at all. Thanks for digging into the issue!
The blockpc
that you manually added was not generated by Souper's blockpc harvesting algorithm, because the basicblock sign.exit
has multiple incoming branches. I am thinking that probably we shouldn't rule out the multiple-incoming-branches case. What do you think, Peter @pcc? Thanks.
from souper.
@pcc might be busy doing other stuff?
Yang, are the modifications difficult? Clearly this needs to wait until #166 is sorted out. I keep running into additional test cases where this fix will matter.
from souper.
Yes, it needs some modification. I think we should wait until #166 gets fixed.
from souper.
Sounds good-- we'll back burner this one until the no-UB version of Souper is totally solid.
My impression is that real support for for LLVM's (very nasty) UB model is going to take some time and thought.
from souper.
Yes. Thanks for running the test. Hopefully the fix to #166 would be good.
from souper.
Could souper simpilfy all int sign variants to:
int sign(int x) {
return (x > 0) + (x >> 31);
}
Which usually optimal for all platforms?
from souper.
Related Issues (20)
- Souper crash in codegen from valid transformation HOT 5
- Assertion failed: (EB.get(&I)->hasOrigin(&I)), function ExtractExprCandidates, file .../lib/Extractor/Candidates.cpp, line 1025 HOT 6
- ICE: Assertion `false && "Phi with muliple arguments unimplemented"' failed HOT 1
- Instruction does not dominate all uses!
- Not deterministic SMT query string HOT 1
- Build failure HOT 1
- use error HOT 5
- LLVM Test Suite test failure with NO_INFER
- Build failure HOT 2
- futur m1 support ?
- Better spreading SMT solver load?
- [alive2] `IfNonPoison` removed upstream HOT 1
- Souper fails to turn multiplication by a power of two into a shift? HOT 3
- building for m1 HOT 2
- Packaging Souper HOT 1
- Possible bugs when generating inputs for dataflow pruning HOT 3
- Rgiht way to use souper HOT 2
- Hi, Is this superoptimizer hardware irrelvant? HOT 4
- Question about Souper IR's advantage compare to LLVM IR? HOT 1
- not found klee and alive2
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from souper.