I'm trying to optimize the runtime of the code that I wrote for computing the number of iterations it takes to reach 1 in the collatz conjecture
Here is my psudeocode for this
int threexplusone(int x){
    if(x == 1){
      return 0;
    }else{
      if(x % 2 == 0){
        return (threexplusone(x/2)+1);
      }else{
        return (threexplusone(3*x+1)+1);
      }
    }
}
Here is my assembly code
threexplusone:
    push rbx        ;store the rbx to stack
    mov rax, 0      ;store the base case
    cmp rdi, 1      ;compare the input with 1
    je done         ;finished the loop if equal to 1
    jmp threexplusone_recursive ;jump to the recursion
threexplusone_recursive:
    push rax
    mov rdx,0
    mov rax,rdi
    mov rbx,2
    idiv rbx
    cmp rdx, 0      ;compare to check if the remainder is 0
    mov rbx, rax
    pop rax
    je  even        ;start the even instruction
odd:
    imul rdi,3      ;multiply x by 3
    add rdi, 1      ;add x by 1
    xor rax, rax
    call threexplusone  ;do the recursion
        
    inc rax     ;add the result by one
    jmp done
    
    
even:
    sar rdi,1       ;divided the input by 2
    xor rax,rax
    call threexplusone  ;do the recursion
    inc rax         ;add the result by one
    jmp done        ;
        ;
done:
    pop rbx
    ret
What could be the possible way of optimizing this code? Thanks
