Some code written a long time ago, far far away, to bump map one pixel of a 2d surface.

```
movd mm1,[esi+eax+4+%1]
movd edx,mm1
sub dl,128
mov dh,dl
sub dh,[esi+%1+eax+4-1+2]
sub dl,[esi+%1+4+3+2]
movd mm2,edx
punpcklbw mm2,mm3
psrlw mm2,1
packuswb mm2,mm3
movd edx,mm2
and edx,0x0000FFFF
movd mm2,[ebx+edx*4]
movd [edi+eax+4+%1],mm1
```

Do not ask for an explanation, your puny humanoid brains cannot possibly fathom it's subtle complexities.

OK OK, here's the stuff that puts it in context. Sorry about the tabs, the original source doesn't have the wonked out tabs. MSG me if you'd like the entire source and VC workspace, it's very old code of mine and I don't have much use for it anymore.

```-----------bump.cpp--------------------

#include <stdlib.h>
#include <stdio.h>
#include <conio.h>
#include <iostream.h>
#include <math.h>

unsigned char* bumpMap;

void bumpInit(void ) {
bumpMap=new unsigned char[256*256*4];
long i,j;
long double temp;
long double intensity;

for(i=0;i<256;i++) {
for(j=0;j<256;j++) {

intensity = 1 - pow ( (((i)&255) / 127.5) - 1 , 2 )
- pow ( (((j)&255) / 127.5) - 1, 2 );
if ( intensity >0) {
temp = ( (pow (intensity,15)));
temp =temp*256;

}
else temp=0;
if(temp>255)
temp=255;

bumpMap[(( ( i)&255) +
((j)&255)*256)*4+0]=(unsigned char)((temp-128-128));
bumpMap[(( ( i)&255) +
((j)&255)*256)*4+1]=(unsigned char)((temp-128-128));
bumpMap[(( ( i)&255) +
((j)&255)*256)*4+2]=(unsigned char)((temp-128-128));
bumpMap[(( ( i)&255) +
((j)&255)*256)*4+3]=(unsigned char)((temp-128-128));
}
}
}

void doBump(int width,int height,unsigned char
*source,unsigned char *dest,signed long spotX1
,signed long spotY1,signed long spotX2,signed long spotY2) {
//what to do with final row and final column? dunno right now.

doBumpASM(source,dest,width,height,
((((unsigned short)spotX1))
+((((unsigned short)spotY1))<<16))
,spotY1,bumpMap,spotX2,spotY2);
return;
}

-----------bump.nas--------------------
%macro onePixel 1
movd mm1,[esi+eax+4+%1]
movd edx,mm1
sub dl,128
mov dh,dl
sub dh,[esi+%1+eax+4-1]
sub dl,[esi+%1+4+3]

movd mm2,edx

punpcklbw mm2,mm3

psrlw mm2,1
packuswb mm2,mm3

movd edx,mm2
and edx,0x0000FFFF ;todo[] eliminate
movd mm2,[ebx+edx*4]
movd [edi+eax+4+%1],mm1

%endmacro

bits 32

section	.data

GLOBAL	_doBumpASM
GLOBAL  doBumpASM

doBumpASM:
_doBumpASM:

push ebp
push eax
push ecx
push esi

push ebx
push edi
push edx

pxor mm0,mm0
pxor mm1,mm1
pxor mm2,mm2
pxor mm3,mm3
pxor mm4,mm4
pxor mm5,mm5
pxor mm6,mm6
pxor mm7,mm7

mov ebp,esp

; remember, [ebp+28] is pushed ebp (2 lines above)
mov ebx,[ebp+56] ; bump map
mov esi,[ebp+32] ;source
mov edi,[ebp+36] ;dest
mov eax,[ebp+40]
shl eax,2
mov [ebp+36],eax      ;i eat them by the handful!
movd mm6,[ebp+48] ;(int spotX)+(int spotY)*65536
mov ecx,[ebp+44] ;height
dec ecx          ;height -1

nop
nop
nop
nop
mov edx,0x00000001
movd mm4,edx
mov edx,0x00010000
movd mm5,edx

outerLoop:
push ecx
push esi
push edi

mov ecx,[ebp+40]
shr ecx,2
dec ecx

movd mm7,[ebp+48]
movd [ebp+48],mm7

innerLoop:
onePixel 0
onePixel 4
onePixel 8
onePixel 12

dec ecx

jz doneinnerLoop
jmp innerLoop
doneinnerLoop:
pop edi
pop esi
pop ecx
dec ecx
jz nouterLoop
jmp outerLoop

nouterLoop
pop edx
pop edi
pop ebx
pop esi
pop ecx
pop eax
pop ebp
emms
ret
align 16

```