After updating to the latest version, GPU executing stopped working for me. I have not created a minimal reproducible example yet, but this is the error I'm facing.
Failed to compile PTX code (ptxas exited with code 255)
ptxas /tmp/jl_rh605v.ptx, line 313; error : Instruction 'atom.cas.b16.global' requires .target sm_70 or higher
ptxas fatal : Ptx assembly aborted due to errors
If you think this is a bug, please file an issue and attach /tmp/jl_rh605v.ptx
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:33
[2] cufunction_compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/bki2w/src/compiler/execution.jl:399
[3] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/1Ajz2/src/cache.jl:90
[4] cufunction(f::typeof(GraphNeuralNetworks.scatter_scalar_kernel!), tt::Type{Tuple{typeof(+), CUDA.CuDeviceVector{UInt16, 1}, Int64, CUDA.CuDeviceVector{Int64, 1}}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA ~/.julia/packages/CUDA/bki2w/src/compiler/execution.jl:297
[5] cufunction(f::typeof(GraphNeuralNetworks.scatter_scalar_kernel!), tt::Type{Tuple{typeof(+), CUDA.CuDeviceVector{UInt16, 1}, Int64, CUDA.CuDeviceVector{Int64, 1}}})
@ CUDA ~/.julia/packages/CUDA/bki2w/src/compiler/execution.jl:291
[6] macro expansion
@ ~/.julia/packages/CUDA/bki2w/src/compiler/execution.jl:102 [inlined]
[7] scatter!(op::Function, dst::CUDA.CuArray{UInt16, 1, CUDA.Mem.DeviceBuffer}, src::Int64, idx::CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer})
@ GraphNeuralNetworks ~/.julia/packages/GraphNeuralNetworks/Hv1up/src/utils.jl:52
[8] degree(g::GraphNeuralNetworks.GNNGraphs.GNNGraph{Tuple{CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, Nothing}}, T::Type{UInt16}; dir::Symbol, edge_weight::Nothing)
@ GraphNeuralNetworks.GNNGraphs ~/.julia/packages/GraphNeuralNetworks/Hv1up/src/GNNGraphs/query.jl:212
[9] (::GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)})(g::GraphNeuralNetworks.GNNGraphs.GNNGraph{Tuple{CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, Nothing}}, x::CUDA.CuArray{UInt16, 2, CUDA.Mem.DeviceBuffer}, edge_weight::Nothing)
@ GraphNeuralNetworks ~/.julia/packages/GraphNeuralNetworks/Hv1up/src/layers/conv.jl:95
[10] (::GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)})(g::GraphNeuralNetworks.GNNGraphs.GNNGraph{Tuple{CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, Nothing}}, x::CUDA.CuArray{UInt16, 2, CUDA.Mem.DeviceBuffer})
@ GraphNeuralNetworks ~/.julia/packages/GraphNeuralNetworks/Hv1up/src/layers/conv.jl:80
[11] (::GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)})(g::GraphNeuralNetworks.GNNGraphs.GNNGraph{Tuple{CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, Nothing}})
@ GraphNeuralNetworks ~/.julia/packages/GraphNeuralNetworks/Hv1up/src/layers/basic.jl:12
[12] applylayer
@ ~/.julia/packages/GraphNeuralNetworks/Hv1up/src/layers/basic.jl:121 [inlined]
[13] applychain
@ ~/.julia/packages/GraphNeuralNetworks/Hv1up/src/layers/basic.jl:133 [inlined]
[14] (::GraphNeuralNetworks.GNNChain{Tuple{GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)}, GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)}, GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)}, GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)}, GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)}, GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)}, GraphNeuralNetworks.GCNConv{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, typeof(NNlib.relu)}, Flux.BatchNorm{typeof(NNlib.relu), CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Float32, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}})(g::GraphNeuralNetworks.GNNGraphs.GNNGraph{Tuple{CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Int64, 1, CUDA.Mem.DeviceBuffer}, Nothing}})
@ GraphNeuralNetworks ~/.julia/packages/GraphNeuralNetworks/Hv1up/src/layers/basic.jl:140