The code below is something I wrote a while back for Linux - it finds the highest set bit, which I think is what you are asking for.  It doesn't follow your exact specs, but should be easily adaptable.
Further notes:
- A return of 0 means that bit-0 was set; if no bits are found then 64 is returned.
- This assembler is written for the calling convention used by GCC under Linux.  I don't know how this differs under Mac OS X - you need to check.
- Input is a 64-bit unsigned int.
- Each CPU architecture is written into a separate .S source file and selectively compiled using 'gcc' depending on the target being built.  I don't use inline assembler.
x86:
/*
 * Find the highest set bit in a bitboard.
 *
 * %eax: &bb
 */
.globl x86_msb;
.type x86_msb,@function;
x86_msb:
    mov 4(%eax), %edx
    bsr %edx, %eax
    jz msb_z1
    add $32, %eax
    ret
msb_z1:
    mov (%eax), %edx
    bsr %edx, %eax
    jz msb_z2
    ret
msb_z2:
    mov $64, %eax
    ret
x86_64:
/*
 * Return the offset of the highest set bit in the bitmask
 *
 * %rdi: &bb
 */
.globl x64_msb;
.type x64_msb,@function;
x64_msb:
    movq (%rdi), %rdi
    bsrq %rdi, %rax
    jz msb_empty
    ret
msb_empty:
    mov $64, %eax
    ret
Here are the Windows implementations (.asm file):
x86:
;;
;; Return the offset of the highest set bit in the bitmask
;;
;; ECX: &bb
;;
public @x86_msb@4
@x86_msb@4:
    mov edx, dword ptr [ecx + 4]    ; bb (high)
    bsr eax, edx
    jz msb_z1
    add eax, 32
    ret
msb_z1:
    mov edx, dword ptr [ecx]        ; bb (low)
    bsr eax, edx
    jz msb_z2
    ret
msb_z2:
    mov eax, 64
    ret                         ; bb is empty
x86_64:
;;
;; Return the offset of the highest set bit in the bitmask
;;
;; RCX: &bb
;;
x64_msb PROC
    mov r8, qword ptr [rcx] ; r8 = bb
    bsr rax, r8         ; rax = lsb(bb)
    jz msb_empty
    ret
msb_empty:
    mov eax, 64         ; bb was empty
    ret
x64_msb ENDP