I use clang -rewrite-objc Block.m to generate c++ code of Block.m.
The code in Block.m is under ARC:
void func() {
    __block NSObject *obj = [[NSObject alloc] init];
    void (^blk)(void) = ^() {
        obj = nil;
    };
}
I believe when block is copied and move to the heap, the block in the heap will retain the obj. But after digging into the source code of block runtime, I got a opposite result.
The generated c++ code:
static void __Block_byref_id_object_copy_131(void *dst, void *src) {
 _Block_object_assign((char*)dst + 40, *(void * *) ((char*)src + 40), 131);
}
static void __Block_byref_id_object_dispose_131(void *src) {
 _Block_object_dispose(*(void * *) ((char*)src + 40), 131);
}
struct __Block_byref_obj_0 {
  void *__isa;
__Block_byref_obj_0 *__forwarding;
 int __flags;
 int __size;
 void (*__Block_byref_id_object_copy)(void*, void*);
 void (*__Block_byref_id_object_dispose)(void*);
 NSObject *obj;
};
struct __func_block_impl_0 {
  struct __block_impl impl;
  struct __func_block_desc_0* Desc;
  __Block_byref_obj_0 *obj; // by ref
  __func_block_impl_0(void *fp, struct __func_block_desc_0 *desc, __Block_byref_obj_0 *_obj, int flags=0) : obj(_obj->__forwarding) {
    impl.isa = &_NSConcreteStackBlock;
    impl.Flags = flags;
    impl.FuncPtr = fp;
    Desc = desc;
  }
};
static void __func_block_func_0(struct __func_block_impl_0 *__cself) {
  __Block_byref_obj_0 *obj = __cself->obj; // bound by ref
        (obj->__forwarding->obj) = __null;
    }
static void __func_block_copy_0(struct __func_block_impl_0*dst, struct __func_block_impl_0*src) {_Block_object_assign((void*)&dst->obj, (void*)src->obj, 8/*BLOCK_FIELD_IS_BYREF*/);}
static void __func_block_dispose_0(struct __func_block_impl_0*src) {_Block_object_dispose((void*)src->obj, 8/*BLOCK_FIELD_IS_BYREF*/);}
static struct __func_block_desc_0 {
  size_t reserved;
  size_t Block_size;
  void (*copy)(struct __func_block_impl_0*, struct __func_block_impl_0*);
  void (*dispose)(struct __func_block_impl_0*);
} __func_block_desc_0_DATA = { 0, sizeof(struct __func_block_impl_0), __func_block_copy_0, __func_block_dispose_0};
void func() {
    __attribute__((__blocks__(byref))) __Block_byref_obj_0 obj = {(void*)0,(__Block_byref_obj_0 *)&obj, 33554432, sizeof(__Block_byref_obj_0), __Block_byref_id_object_copy_131, __Block_byref_id_object_dispose_131, ((NSObject *(*)(id, SEL))(void *)objc_msgSend)((id)((NSObject *(*)(id, SEL))(void *)objc_msgSend)((id)objc_getClass("NSObject"), sel_registerName("alloc")), sel_registerName("init"))};
    void (*blk)(void) = ((void (*)())&__func_block_impl_0((void *)__func_block_func_0, &__func_block_desc_0_DATA, (__Block_byref_obj_0 *)&obj, 570425344));
}
NOTE: 33554432 is BLOCK_HAS_COPY_DISPOSE and 570425344 is BLOCK_HAS_COPY_DISPOSE|BLOCK_HAS_DESCRIPTOR.
When block is copied, __func_block_copy_0 is called to handle the variables it captured, in this case it makes a copy of (__Block_byref_obj_0)obj, changes obj->forwarding to the copy __Block_byref_obj_0 and so on, all of these operations happen in _Block_object_assign((void*)&dst->obj, (void*)src->obj, 8/*BLOCK_FIELD_IS_BYREF*/);.
The source code of _Block_object_assign:
void _Block_object_assign(void *destAddr, const void *object, const int flags) {
    //printf("_Block_object_assign(*%p, %p, %x)\n", destAddr, object, flags);
    if ((flags & BLOCK_BYREF_CALLER) == BLOCK_BYREF_CALLER) {
        if ((flags & BLOCK_FIELD_IS_WEAK) == BLOCK_FIELD_IS_WEAK) {
            _Block_assign_weak(object, destAddr);
        }
        else {
            // do *not* retain or *copy* __block variables whatever they are
            _Block_assign((void *)object, destAddr);
        }
    }
    else if ((flags & BLOCK_FIELD_IS_BYREF) == BLOCK_FIELD_IS_BYREF)  {
        // copying a __block reference from the stack Block to the heap
        // flags will indicate if it holds a __weak reference and needs a special isa
        _Block_byref_assign_copy(destAddr, object, flags);
    }
    // (this test must be before next one)
    else if ((flags & BLOCK_FIELD_IS_BLOCK) == BLOCK_FIELD_IS_BLOCK) {
        // copying a Block declared variable from the stack Block to the heap
        _Block_assign(_Block_copy_internal(object, flags), destAddr);
    }
    // (this test must be after previous one)
    else if ((flags & BLOCK_FIELD_IS_OBJECT) == BLOCK_FIELD_IS_OBJECT) {
        //printf("retaining object at %p\n", object);
        _Block_retain_object(object);
        //printf("done retaining object at %p\n", object);
        _Block_assign((void *)object, destAddr);
    }
}
static void _Block_byref_assign_copy(void *dest, const void *arg, const int flags) {
    struct Block_byref **destp = (struct Block_byref **)dest;
    struct Block_byref *src = (struct Block_byref *)arg;
    //printf("_Block_byref_assign_copy called, byref destp %p, src %p, flags %x\n", destp, src, flags);
    //printf("src dump: %s\n", _Block_byref_dump(src));
    if (src->forwarding->flags & BLOCK_IS_GC) {
        ;   // don't need to do any more work
    }
    else if ((src->forwarding->flags & BLOCK_REFCOUNT_MASK) == 0) {
        //printf("making copy\n");
        // src points to stack
        bool isWeak = ((flags & (BLOCK_FIELD_IS_BYREF|BLOCK_FIELD_IS_WEAK)) == (BLOCK_FIELD_IS_BYREF|BLOCK_FIELD_IS_WEAK));
        // if its weak ask for an object (only matters under GC)
        struct Block_byref *copy = (struct Block_byref *)_Block_allocator(src->size, false, isWeak);
        copy->flags = src->flags | _Byref_flag_initial_value; // non-GC one for caller, one for stack
        copy->forwarding = copy; // patch heap copy to point to itself (skip write-barrier)
        src->forwarding = copy;  // patch stack to point to heap copy
        copy->size = src->size;
        if (isWeak) {
            copy->isa = &_NSConcreteWeakBlockVariable;  // mark isa field so it gets weak scanning
        }
        if (src->flags & BLOCK_HAS_COPY_DISPOSE) {
            // Trust copy helper to copy everything of interest
            // If more than one field shows up in a byref block this is wrong XXX
            copy->byref_keep = src->byref_keep;
            copy->byref_destroy = src->byref_destroy;
            (*src->byref_keep)(copy, src);
        }
        else {
            // just bits.  Blast 'em using _Block_memmove in case they're __strong
            _Block_memmove(
                (void *)©->byref_keep,
                (void *)&src->byref_keep,
                src->size - sizeof(struct Block_byref_header));
        }
    }
    // already copied to heap
    else if ((src->forwarding->flags & BLOCK_NEEDS_FREE) == BLOCK_NEEDS_FREE) {
        latching_incr_int(&src->forwarding->flags);
    }
    // assign byref data block pointer into new Block
    _Block_assign(src->forwarding, (void **)destp);
}
As flag is BLOCK_FIELD_IS_BYREF, so the branch goes to _Block_byref_assign_copy, this function malloc memory for a copy of __Block_byref_obj_0 and do some assignments, finally it will call (*src->byref_keep)(copy, src) which points to __Block_byref_id_object_copy_131, as we can see in this function, only one line of code:
_Block_object_assign((char*)dst + 40, *(void * *) ((char*)src + 40), 131);, 131 is BLOCK_FIELD_IS_OBJECT|BLOCK_BYREF_CALLER, "(char*)dst+40" is the address of BLOCK_FIELD_IS_BYREF(copy), as a result it will call _Block_assign((void *)object, destAddr); and this function just do a assignment *destAddr = object;, no retain!!!
I believe obj should be retained but the source code seems that it doesn't retain it. I'm really confusing.
I got the source code of Blocks Runtime here, you can fetch it using svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk compiler-rt.
Update 1:
static void _Block_assign_default(void *value, void **destptr) {
    *destptr = value;
}
static void (*_Block_assign)(void *value, void **destptr) = _Block_assign_default;
 
     
     
    