Testing a C version first with symbols rather than hard-coded values using  musl-libc - you can retrieve syscall.h.in and syscall_arch.h form its git repository.
pthread_exit.c - calling _gettid system call:
#include <stdio.h>
#include <signal.h>
#include "syscall.h.in"
#include "syscall_arch.h"
int main(int argc, char** argv) {
  long pid =  __syscall0(__NR_gettid); 
  printf("pid: %ld\n", pid);
//__syscall2(__NR_tkill, pid, SIGKILL); 
  printf("Kilroy should not be here.\n");
}
gcc -o pthread_exit pthread_exit.c
./pthread_exit
pid: 14494
Kilroy should not be here.
./pthread_exit
pid: 14495
Kilroy should not be here.
./pthread_exit
pid: 14496
Kilroy should not be here.
pthread_exit.c - calling _gettid and tkill system calls:
#include <stdio.h>
#include <signal.h>
#include "syscall.h.in"
#include "syscall_arch.h"
int main(int argc, char** argv) {
  long pid =  __syscall0(__NR_gettid); 
  printf("pid: %ld\n", pid);
  __syscall2(__NR_tkill, pid, SIGKILL); 
  printf("Kilroy should not be here.\n");
}
gcc -o pthread_exit pthread_exit.c
./pthread_exit
pid: 14513
Killed
./pthread_exit
pid: 14514
Killed
./pthread_exit
pid: 14515
Killed
Looking at syscall_arch.h:
static inline long __syscall0(long n)
{
    register long r7 __ASM____R7__ = n;
    register long r0 __asm__("r0");
    __asm_syscall(R7_OPERAND);
}
static inline long __syscall2(long n, long a, long b)
{
    register long r7 __ASM____R7__ = n;
    register long r0 __asm__("r0") = a;
    register long r1 __asm__("r1") = b;
    __asm_syscall(R7_OPERAND, "0"(r0), "r"(r1));
}
As you had figured out, the number for the desired system call should reside in r7, and parameters are in r0 to r5 - we just need r0 and r1 here.
So, yes, it works:
pthread_exit.s:
        .syntax unified
        .arch armv7-a
        .fpu vfpv3-d16
        .text
        .align  2
        .global main
        .thumb
        .thumb_func
        .type   main, %function
main:
        mov     %r7, $0xe0   /* gettid is syscall #0xe0 */
        swi     $0          /* invoke syscall */
        /*r0 contains current tid*/
        mov     %r1, $9 /* SIGKILL signal */
        mov     %r7, $0xee   /* tkill is syscall #0xee */
        swi     $0          /* invoke syscall */
       .end
Using qemu-user:
qemu-arm pthread_exit
Killed
On a Cortex-A7 system:
 ./pthread_exit
Killed
Not sure this is the correct way to go though - see this discussion.