Here is a portable solution, that will, however, be slower than solutions taking advantage of specialized instructions such as clz (count leading zeros). I added comments at each step of the algorithm that explain how it works.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
/* return position of set bit, if exactly one of bits n*8-1 is set; n in [1,8]
   return 0 if no bit is set
*/
int bit_pos (uint64_t a)
{
    uint64_t t, c;
    t = a - 1; // create mask
    c = t >> 63; // correction for zero inputs
    t = t + c; // apply zero correction if necessary
    t = t & 0x0101010101010101ULL; // mark each byte covered by mask
    t = t * 0x0101010101010101ULL; // sum the byte markers in uppermost byte
    t = (t >> 53) - 1; // retrieve count and diminish by 1 for bit position
    t = t + c; // apply zero correction if necessary
    return (int)t;
}
int main (void)
{
    int i;
    uint64_t a;
    a = 0;
    printf ("a=%016llx   bit_pos=%2d   reference_pos=%2d\n", a, bit_pos(a), 0);
    for (i = 7; i < 64; i += 8) {
        a = (1ULL << i);
        printf ("a=%016llx   bit_pos=%2d   reference_pos=%2d\n", 
                a, bit_pos(a), i);
    }
    return EXIT_SUCCESS;
}
The output of this code should look like this:
a=0000000000000000   bit_pos= 0   reference_pos= 0
a=0000000000000080   bit_pos= 7   reference_pos= 7
a=0000000000008000   bit_pos=15   reference_pos=15
a=0000000000800000   bit_pos=23   reference_pos=23
a=0000000080000000   bit_pos=31   reference_pos=31
a=0000008000000000   bit_pos=39   reference_pos=39
a=0000800000000000   bit_pos=47   reference_pos=47
a=0080000000000000   bit_pos=55   reference_pos=55
a=8000000000000000   bit_pos=63   reference_pos=63
On an x86_64 platform, my compiler translates bit_pos() into this machine code:
bit_pos PROC 
        lea       r8, QWORD PTR [-1+rcx]
        shr       r8, 63
        mov       r9, 0101010101010101H
        lea       rdx, QWORD PTR [-1+r8+rcx]
        and       rdx, r9
        imul      r9, rdx
        shr       r9, 53
        lea       rax, QWORD PTR [-1+r8+r9]
        ret
[Later update]
The answer by duskwuff made it clear to me that my original thinking was unnecessarily convoluted. In fact, using duskwuff's approach, the desired functionality can be expressed much more concisely as follows:
/* return position of set bit, if exactly one of bits n*8-1 is set; n in [1,8]
   return 0 if no bit is set
*/
int bit_pos (uint64_t a)
{
    const uint64_t magic_multiplier = 
         (( 7ULL << 56) | (15ULL << 48) | (23ULL << 40) | (31ULL << 32) |
          (39ULL << 24) | (47ULL << 16) | (55ULL <<  8) | (63ULL <<  0));
    return (int)(((a >> 7) * magic_multiplier) >> 56);
}
Any reasonable compiler will precompute the magic multiplier, which is 0x070f171f272f373fULL. The code emitted for an x86_64 target shrinks to
bit_pos PROC 
        mov       rax, 070f171f272f373fH
        shr       rcx, 7
        imul      rax, rcx
        shr       rax, 56
        ret