Histogram of the most significant digit (MSD)

An allocation-free function to compute MSD

function msd(x::Real)
    x = abs(x)
    d = x
    while true
        x < 1 && return floor(Int, d)
        d = x
        x ÷= 10
    end
end
msd(34513)
3
msd(-51334)
5
msd(2.76e19)
2

MSD of random numbers

using CUDA
using FLoops
using FoldsCUDA
using Setfield

function histogram_msd(xs, ex = nothing)
    zs = ntuple(_ -> 0, 9)  # a tuple of 9 zeros
    @floop ex for x in xs
        d = msd(x)
        1 <= d <= 9 || continue  # skip it if `msd` returns 0
        h2 = @set zs[d] = 1      # set `d`-th position of the tuple to 1
        @reduce(h1 = zs .+ h2)   # point-wise addition merges the histogram
    end
    return h1
end

Generate some random numbers

xs = let randn = has_cuda_gpu() ? CUDA.randn : randn
    exp.(10.0 .* (randn(10^8) .+ 6))
end
@assert all(isfinite, xs)
collect(view(xs, 1:10))  # preview
10-element Vector{Float64}:
 4.768048257452494e17
 3.3330491561352217e21
 2.497667162242791e27
 3.438125568833578e24
 3.2910232814316455e23
 4.1781109747367034e23
 3.483132831327238e21
 3.826799775495147e27
 2.192252258097073e21
 3.34028053094087e24

Pass an array of (real) numbers to histogram_msd to compute the histogram of MSD:

hist1 = histogram_msd(xs)
(30109078, 17606480, 12487189, 9687058, 7921170, 6693306, 5799620, 5118115, 4577984)

Frequency in percentage:

aspercentage(h) = pairs(round.((collect(h) ./ length(xs) .* 100); digits = 1))
aspercentage(hist1)
pairs(::Vector{Float64})(...):
  1 => 30.1
  2 => 17.6
  3 => 12.5
  4 => 9.7
  5 => 7.9
  6 => 6.7
  7 => 5.8
  8 => 5.1
  9 => 4.6

MSD of lazily computed numbers

FoldsCUDA supports running reduction over non-CuArray containers such as UnitRange and iterator transformation wrapping it. However, you need to explicitly specify to use CUDA by, e.g., passing CUDAEx to @floop:

hist2 = histogram_msd(x^2 for x in 1:10^8)
(19156360, 14699187, 12391984, 10917564, 9870226, 9076617, 8448304, 7934816, 7504942)

Frequency in percentage:

aspercentage(hist2)
pairs(::Vector{Float64})(...):
  1 => 19.2
  2 => 14.7
  3 => 12.4
  4 => 10.9
  5 => 9.9
  6 => 9.1
  7 => 8.4
  8 => 7.9
  9 => 7.5
hist3 = histogram_msd(exp(x) for x in range(1, 35, length=10^8))
(29897898, 16985662, 12691855, 9844565, 8043600, 6800762, 5891098, 5196305, 4648255)

Frequency in percentage:

aspercentage(hist3)
pairs(::Vector{Float64})(...):
  1 => 29.9
  2 => 17.0
  3 => 12.7
  4 => 9.8
  5 => 8.0
  6 => 6.8
  7 => 5.9
  8 => 5.2
  9 => 4.6

This page was generated using Literate.jl.