These are functions written in C that collectively will calculate an MD5 sum.

/* MD5 routines, after Ron Rivest */
/* Written by David Madore <david.madore@ens.fr>, with code taken in
 * part from Colin Plumb. */
/* Public domain (1999/11/24) */

/* Note: these routines do not depend on endianness. */

/* === The header === */

/* Put this in MD5.h if you don't like having everything in one big
 * file. */

#ifndef _DMADORE_MD5_H
#define _DMADORE_MD5_H

struct MD5_ctx {
  /* The four chaining variables */
  unsigned long buf[4];
  /* Count number of message bits */
  unsigned long bits[2];
  /* Data being fed in */
  unsigned long in[16];
  /* Our position within the 512 bits (always between 0 and 63) */
  int b;
};

void MD5_transform (unsigned long buf[4], const unsigned long in[16]);
void MD5_start (struct MD5_ctx *context);
void MD5_feed (struct MD5_ctx *context, unsigned char inb);
void MD5_stop (struct MD5_ctx *context, unsigned char digest[16]);

#endif /* not defined _DMADORE_MD5_H */

/* === The implementation === */

#define F1(x, y, z) (z ^ (x & (y ^ z)))
#define F2(x, y, z) F1(z, x, y)
#define F3(x, y, z) (x ^ y ^ z)
#define F4(x, y, z) (y ^ (x | ~z))

#define MD5STEP(f, w, x, y, z, data, s) \
        { w += f (x, y, z) + data;  w = w<<s | (w&0xffffffffUL)>>(32-s); \
          w += x; }

void
MD5_transform (unsigned long buf[4], const unsigned long in[16])
{
  register unsigned long a, b, c, d;

  a = buf[0];  b = buf[1];  c = buf[2];  d = buf[3];
  MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478UL, 7);
  MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756UL, 12);
  MD5STEP(F1, c, d, a, b, in[2] + 0x242070dbUL, 17);
  MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceeeUL, 22);
  MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0fafUL, 7);
  MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62aUL, 12);
  MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613UL, 17);
  MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501UL, 22);
  MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8UL, 7);
  MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7afUL, 12);
  MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1UL, 17);
  MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7beUL, 22);
  MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122UL, 7);
  MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193UL, 12);
  MD5STEP(F1, c, d, a, b, in[14] + 0xa679438eUL, 17);
  MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821UL, 22);
  MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562UL, 5);
  MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340UL, 9);
  MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51UL, 14);
  MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aaUL, 20);
  MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105dUL, 5);
  MD5STEP(F2, d, a, b, c, in[10] + 0x02441453UL, 9);
  MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681UL, 14);
  MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8UL, 20);
  MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6UL, 5);
  MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6UL, 9);
  MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87UL, 14);
  MD5STEP(F2, b, c, d, a, in[8] + 0x455a14edUL, 20);
  MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905UL, 5);
  MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8UL, 9);
  MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9UL, 14);
  MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8aUL, 20);
  MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942UL, 4);
  MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681UL, 11);
  MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122UL, 16);
  MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380cUL, 23);
  MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44UL, 4);
  MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9UL, 11);
  MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60UL, 16);
  MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70UL, 23);
  MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6UL, 4);
  MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127faUL, 11);
  MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085UL, 16);
  MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05UL, 23);
  MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039UL, 4);
  MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5UL, 11);
  MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8UL, 16);
  MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665UL, 23);
  MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244UL, 6);
  MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97UL, 10);
  MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7UL, 15);
  MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039UL, 21);
  MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3UL, 6);
  MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92UL, 10);
  MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47dUL, 15);
  MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1UL, 21);
  MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4fUL, 6);
  MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0UL, 10);
  MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314UL, 15);
  MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1UL, 21);
  MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82UL, 6);
  MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235UL, 10);
  MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bbUL, 15);
  MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391UL, 21);
  buf[0] += a;  buf[1] += b;  buf[2] += c;  buf[3] += d;
}

#undef F1
#undef F2
#undef F3
#undef F4
#undef MD5STEP

void
MD5_start (struct MD5_ctx *ctx)
{
  int i;

  ctx->buf[0] = 0x67452301UL;
  ctx->buf[1] = 0xefcdab89UL;
  ctx->buf[2] = 0x98badcfeUL;
  ctx->buf[3] = 0x10325476UL;
  ctx->bits[0] = 0;
  ctx->bits[1] = 0;
  for ( i=0 ; i<16 ; i++ )
    ctx->in[i] = 0;
  ctx->b = 0;
}

void
MD5_feed (struct MD5_ctx *ctx, unsigned char inb)
{
  int i;
  unsigned long temp;

  ctx->in[ctx->b/4] |= ((unsigned long)inb) << ((ctx->b%4)*8);
  if ( ++ctx->b >= 64 )
    {
      MD5_transform (ctx->buf, ctx->in);
      ctx->b = 0;
      for ( i=0 ; i<16 ; i++ )
        ctx->in[i] = 0;
    }
  temp = ctx->bits[0];
  ctx->bits[0] += 8;
  if ( (temp&0xffffffffUL) > (ctx->bits[0]&0xffffffffUL) )
    ctx->bits[1]++;
}

void
MD5_stop (struct MD5_ctx *ctx, unsigned char digest[16])
{
  int i;
  unsigned long bits[2];

  for ( i=0 ; i<2 ; i++ )
    bits[i] = ctx->bits[i];
  MD5_feed (ctx, 0x80);
  for ( ; ctx->b!=56 ; )
    MD5_feed (ctx, 0);
  for ( i=0 ; i<2 ; i++ )
    {
      MD5_feed (ctx, bits[i]&0xff);
      MD5_feed (ctx, (bits[i]>>8)&0xff);
      MD5_feed (ctx, (bits[i]>>16)&0xff);
      MD5_feed (ctx, (bits[i]>>24)&0xff);
    }
  for ( i=0 ; i<4 ; i++ )
    {
      digest[4*i] = ctx->buf[i]&0xff;
      digest[4*i+1] = (ctx->buf[i]>>8)&0xff;
      digest[4*i+2] = (ctx->buf[i]>>16)&0xff;
      digest[4*i+3] = (ctx->buf[i]>>24)&0xff;
    }
}

collisionS for the full MD5 hash function was published by Xiaoyun Wang, Dengguo Feng, Xuejia Lai and Hongbo Yu in 2004, in their paper "Collisions for Hash Functions MD4, MD5, HAVAL-128 and RIPEMD" (the other hash functions were considerably weaker, and of less interest). The novelty in their technique, from what I've been able to glean, uses differential cryptanalysis, but simultaneously examining differentials of 32-bit subtraction and XORs ("1-bit subtractions"). The generated collisions have very few (6) different bits -- these are not random collisions, in any form.

Here's one such collision:

d1  31  dd  02  c5  e6  ee  c4  69  3d  9a  06  98  af  f9  5c
2f  ca  b5  87  12  46  7e  ab  40  04  58  3e  b8  fb  7f  89
55  ad  34  06  09  f4  b3  02  83  e4  88  83  25  71  41  5a
08  51  25  e8  f7  cd  c9  9f  d9  1d  bd  f2  80  37  3c  5b
d8  82  3e  31  56  34  8f  5b  ae  6d  ac  d4  36  c9  19  c6
dd  53  e2  b4  87  da  03  fd  02  39  63  06  d2  48  cd  a0
e9  9f  33  42  0f  57  7e  e8  ce  54  b6  70  80  a8  0d  1e
c6  98  21  bc  b6  a8  83  93  96  f9  65  2b  6f  f7  2a  70
and
d1  31  dd  02  c5  e6  ee  c4  69  3d  9a  06  98  af  f9  5c
2f  ca  b5  07  12  46  7e  ab  40  04  58  3e  b8  fb  7f  89
55  ad  34  06  09  f4  b3  02  83  e4  88  83  25  f1  41  5a
08  51  25  e8  f7  cd  c9  9f  d9  1d  bd  72  80  37  3c  5b
d8  82  3e  31  56  34  8f  5b  ae  6d  ac  d4  36  c9  19  c6
dd  53  e2  34  87  da  03  fd  02  39  63  06  d2  48  cd  a0
e9  9f  33  42  0f  57  7e  e8  ce  54  b6  70  80  28  0d  1e
c6  98  21  bc  b6  a8  83  93  96  f9  65  ab  6f  f7  2a  70

Rather than bore you with number of just how quickly such collisions can be found today, I give an anecdote: The paper was given (in "extended abstract" form) at a conference. There was much buzz the day before -- an MD5 collision was going to be published! As soon as the abstract was seen in the conference proceedings, people rushed to check that the 2 texts had the same MD5 hash value. I did the same, actually.

Sure enough, they didn't. A series of quick talks with 3 of the authors revealed a problem: they had attacked the MD5 algorithm published in the Chinese language edition of Bruce Schneier's Applied Cryptography. And the translation had a minor endiannity bug!

Running the texts through the "bugged" algorithm yielded a collision. Very impressive, to be sure, but considerably less in terms of shock value.

Luckily, the 4th contributor to the paper had stayed home. A series of phone calls back to him explained the problem, and he fixed the attack programs. A collision for the real MD5 algorithm was ready the next morning, in time for the conference talk.

Finding MD5 collisions is that fast. Actually, it's been sped up since then.


A hash collision for the MD5 ("secure") hash function had been known since at least 1996, if we use different (nonstandard!) initial values. Hans Dobbertin of the German Information Security Agency published a collision: an initialization vector IV and two inputs x, x' for which MD5(IV;x)=MD5(IV;x'). Quoting (but reformatting from PostScript) from the paper "Cryptanalysis of MD5 Compress":

Collision for the compress function of MD5. Use the initial value


IV = 0x12AC2375 0x3B341042 0x5F62B97C 0x4BA763ED
and define the first input X = (Xi)i<16 by setting:

X0 = 0xAA1DDA5E X4 = 0x1006363E X8 = 0x98A1FB19 X12 = 0x1326ED65
X1 = 0xD97ABFF5 X5 = 0x7218209D X9 = 0x1FAE44B0 X13 = 0xD93E0972
X2 = 0x55F0E1C1 X6 = 0xE01C135D X10 = 0x236BB992 X14 = 0xD458C868
X3 = 0x32774244 X7 = 0x9DA64D0E X11 = 0x6B7A669B X15 = 0x6B72746A
The second input X'= (X'i)i<16 is defined by setting X'i = Xi (i < 16; i ≠ 14) and X'14 = X14 + 29.

Then we have a collision, i.e. MD5-compress(IV ; X) = MD5-compress(IV ; X'); and this common compress value is


0xBF90E670 0x752AF92B 0x9CE4E3E1 0xB12CF8DE.
The computation of such a collision takes about 10 hours on a Pentium PC.

The paper already recommended using SHA-1 or RIPEMD-160, instead.

Log in or register to write something here or to contact authors.