I've been working on a way to improve the performance of joins and have once again run into an issue with sorting data that contains nil values. I've worked out what I think is a generic solution for sorting nil data. It does involve monkey patching NilClass, but I think it's pretty tame. Do you see any issues if we monkey patch NilClass and use Sortable
when sorting DataFrames & Vectors? (Alternatively, we could use refinements instead of monkey patches).
# Puts nils before anything else
class NilClass
include Comparable
def <=>(other)
other.nil? ? 0 : -1
end
end
class Sortable
include Comparable
def initialize(value)
@value = value
end
attr_reader :value
def <=>(other)
# when @value is nil, use <=> from NilClass
# when other.value is nil, reverse comparison order and then use <=> from NilClass
@value <=> other.value || -(other.value <=> @value)
end
end
numbers_and_nil = (0.upto(10).to_a + [nil]*10).shuffle
puts numbers_and_nil.sort_by { |v| Sortable.new(v) }
# => [nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
strings_and_nil = (0.upto(10).to_a.map { |v| v.to_s } + [nil]*10).shuffle.map
puts strings_and_nil.sort_by { |v| Sortable.new(v) }
# => [nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, "0", "1", "10", "2", "3", "4", "5", "6", "7", "8", "9"]
arrays_and_nil = (0.upto(10).to_a + [nil]*10).shuffle.map { |v| [v] }
puts arrays_and_nil.sort_by { |v| Sortable.new(v) }
# => [[nil], [nil], [nil], [nil], [nil], [nil], [nil], [nil], [nil], [nil], [0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10]]
arrays_and_nil_in_2nd_position = (0.upto(10).to_a + [nil]*10).shuffle.map { |v| [(v || 1).to_s, v] }
puts arrays_and_nil_in_2nd_position.sort_by { |v| Sortable.new(v) }
# => [["0", 0], ["1", nil], ["1", nil], ["1", nil], ["1", nil], ["1", nil], ["1", nil], ["1", nil], ["1", nil], ["1", nil], ["1", nil], ["1", 1], ["10", 10], ["2", 2], ["3", 3], ["4", 4], ["5", 5], ["6", 6], ["7", 7], ["8", 8], ["9", 9]]