MD5のpure ruby版

SHA-1に比べてエンディアンの処理がある分ちょっと面倒でした。ネイティブライブラリに比べて相当遅いです。やっぱりソースは短いです。
(追記)
データが448bit(56byte)の時の処理を間違っていたので修正。プラスとマイナスが逆だった。

# -*- encoding: US-ASCII -*-
# MD5 implementation of pure ruby.

module Purerubymd5
  MASK = 0xffffffff
  def self.rotate_left_shift_32bit(i, n)
    i = MASK & i
    i = (MASK & (i << n)) | (i >> (32 - n))
  end

  def self.int64tobinstr_le(i64)
    [MASK & i64].pack("V") + [i64 >> 32].pack("V")
  end

  def self.binstr2int_le(str)
    str.unpack("V")[0]
  end

  def self.int2binstr_le(i)
    [MASK & i].pack("V")
  end

  def self.inttostr(i)
    sprintf("%08X", i)
  end

  def self.digest(str)
    [hexdigest(str)].pack("H*")
  end

  def self.hexdigest(str)
    # word A: 01 23 45 67
    # word B: 89 ab cd ef
    # word C: fe dc ba 98
    # word D: 76 54 32 10

    a = binstr2int_le([0x01, 0x23, 0x45, 0x67].pack("C4"))
    b = binstr2int_le([0x89, 0xab, 0xcd, 0xef].pack("C4"))
    c = binstr2int_le([0xfe, 0xdc, 0xba, 0x98].pack("C4"))
    d = binstr2int_le([0x76, 0x54, 0x32, 0x10].pack("C4"))

    # F(X,Y,Z) = XY v not(X) Z      (  1 <= i <= 16)
    # G(X,Y,Z) = XZ v Y not(Z)      ( 17 <= i <= 32)
    # H(X,Y,Z) = X xor Y xor Z      ( 33 <= i <= 48)
    # I(X,Y,Z) = Y xor (X v not(Z)) ( 49 <= i <= 64)
    f_a = []
    # T(i) = (4294967296 * abs(sin(i)).truncate
    t_a = []
    # k
    k_a = []
    # s
    s_a = []
    # n4 arg0, 1, 2, 3
    n4_a = []
    (1..64).each do |i|
      t_a[i] = (Math.sin(i).abs * 4294967296).truncate
      n4_a[i] = [(65 - i) % 4, (65 - i + 1) % 4, (65 - i + 2) % 4, (65 - i + 3) % 4]
      case i
        when 1 .. 16
          f_a[i] = lambda {|x, y, z| (x & y) | ((~x) & z)}
          k_a[i] = i - 1
          s_a[i] = [7, 12, 17, 22][(i - 1) % 4]
        when 17 .. 32
          f_a[i] = lambda {|x, y, z| (x & z) | (y & (~z))}
          k_a[i] = (1 + (i - 17) * 5) % 16
          s_a[i] = [5, 9, 14, 20][(i - 1) % 4]
        when 33 .. 48
          f_a[i] = lambda {|x, y, z| x ^ y ^ z}
          k_a[i] = (5 + (i - 33) * 3) % 16
          s_a[i] = [4, 11, 16, 23][(i - 1) % 4]
        when 49 .. 64
          f_a[i] = lambda {|x, y, z| y ^ (x | (~z))}
          k_a[i] = ((i - 49) * 7) % 16
          s_a[i] = [6, 10, 15, 21][(i - 1) % 4]
      end
    end

    # "1" is appended.
    buf = str + ["10000000"].pack("B8")

    if (56 - buf.size % 64) < 0
      (64 + (56 - buf.size % 64)).times do
        buf += [0].pack("C")
      end
    else
      (56 - buf.size % 64).times do
        buf += [0].pack("C")
      end
    end

    # set the number of bits in the original message. 64bit
    # little endian
    buf += int64tobinstr_le(str.size * 8)

    m_a = []
    (buf.size / 64).times do |i|
      m_a[i] = buf[i * 64, 64]
    end

    abcd = []
    abcd = [a, b, c, d]
    abcd2 = []

    m_a.each do |m|
      x = []

      # X0 to X15
      x = m.unpack("V16")

      abcd2 = abcd.dup

      (1..64).each do |n|
        abcd[n4_a[n][0]] = MASK & (abcd[n4_a[n][0]] + 
                      f_a[n].call(abcd[n4_a[n][1]], abcd[n4_a[n][2]], abcd[n4_a[n][3]]) + x[k_a[n]] + t_a[n])
        abcd[n4_a[n][0]] = rotate_left_shift_32bit(abcd[n4_a[n][0]], s_a[n])
        abcd[n4_a[n][0]] = MASK & (abcd[n4_a[n][0]] + abcd[n4_a[n][1]])
      end

      # A = A + AA, B = B + BB, C = C + CC, D = D + DD
      abcd.each_index do |i|
        abcd[i] = MASK & (abcd[i] + abcd2[i])
      end

    end

    int2binstr_le(abcd[0]).unpack("H*")[0] + 
      int2binstr_le(abcd[1]).unpack("H*")[0] + 
      int2binstr_le(abcd[2]).unpack("H*")[0] + 
      int2binstr_le(abcd[3]).unpack("H*")[0]
  end
end

# sample

require 'benchmark'
require 'digest/md5'

Benchmark.bm do |x|
p ""
  x.report { p Purerubymd5.hexdigest("") }
  x.report { p Digest::MD5.hexdigest("") }
p "d41d8cd98f00b204e9800998ecf8427e"

p "a"
  x.report { p Purerubymd5.hexdigest("a") }
  x.report { p Digest::MD5.hexdigest("a") }
p "0cc175b9c0f1b6a831c399e269772661"

p "abc"
  x.report { p Purerubymd5.hexdigest("abc") }
  x.report { p Digest::MD5.hexdigest("abc") }
p "900150983cd24fb0d6963f7d28e17f72"

p "message digest"
  x.report { p Purerubymd5.hexdigest("message digest") }
  x.report { p Digest::MD5.hexdigest("message digest") }
p "f96b697d7cb7938d525a2f31aaf161d0"

p "abcdefghijklmnopqrstuvwxyz"
  x.report { p Purerubymd5.hexdigest("abcdefghijklmnopqrstuvwxyz") }
  x.report { p Digest::MD5.hexdigest("abcdefghijklmnopqrstuvwxyz") }
p "c3fcd3d76192e4007dfb496cca67e13b"

p "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
  x.report { p Purerubymd5.hexdigest("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") }
  x.report { p Digest::MD5.hexdigest("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") }
p "d174ab98d277d9f5a5611c2c9f419d9f"

p "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
  x.report { p Purerubymd5.hexdigest("12345678901234567890123456789012345678901234567890123456789012345678901234567890") }
  x.report { p Digest::MD5.hexdigest("12345678901234567890123456789012345678901234567890123456789012345678901234567890") }
p "57edf4a22be3c955ac49da2e2107b67a"
end