June 1, 2017

Circular debugging using ptrace results in deadlock due to race condition?

Hi guys,

As part of my personal research I am facing a challenging problem.
I am trying to let two processes be each other's debuggers using the ptrace syscall. However, my proof-of-concept implementation always results in a deadlock state (both processes get stuck in 't+' state as shown by 'ps aux').

Here is my code, it's pretty simple:
 

/* C standard headers */
#include <errno.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Linux headers */
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
/* Architecture-specific headers */
#include <asm/ptrace.h>
#include <asm/unistd.h>

#include <signal.h>
#include <assert.h>

typedef void fun_moved_from_context();
// using namespace std;
void attachTo(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL);
  printf("\t%s\tattachTo: %ld\n", id, ret);
  if (ret == -1) perror("err: ");
}
void seizeTo(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_SEIZE, pid, NULL, NULL);
  assert(ret > 0);
  printf("\t%s\tseizeTo: %ld\n", id, ret);
}
void detachFrom(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_DETACH, pid, NULL, NULL);
  printf("\t%s\tdetachFrom: %ld\n", id, ret);
}
void setOptions(pid_t pid, char* id) {
  long ret = ptrace(
      PTRACE_SETOPTIONS, pid, NULL,
      (void*)(PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT |
              PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));
  printf("\t%s\tsetOptions: %ld\n", id, ret);
}
void setVarData(pid_t pid, volatile bool* can_run, void* data, char* id) {
  long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
  printf("\t%s\tsetVarData: %ld\n", id, ret);
}
void cont(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
  printf("\t%s\tcont: %ld\n", id, ret);
}
void interrupt(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
  printf("\t%s\tinterrupt: %ld\n", id, ret);
}
void debug(int id) {
  int status;
  while (true) {
    printf("\t%s\twhile\n", id);
    sleep(1);
    struct user_regs regs;
    pid_t recv = wait(&status);
    if (recv == -1) {
      printf("\t%s\tDebugger exiting\n", id);
      return 0;
    } else {
      if (WIFSTOPPED(status)) {
        int signal = WSTOPSIG(status);
        switch (signal) {
          case SIGTRAP: {
            int event_code = (status >> 8) ^ SIGTRAP;
            switch (event_code) {
              case PTRACE_EVENT_FORK << 8:
                printf("\t%s\tFORK EVENT.\n", id);
                cont(recv, 2);
                break;
              case PTRACE_EVENT_EXIT << 8:
                printf("\t%s\t%li exited.\n", id, recv);
                return 0;
                break;
              default: {
                printf("\t%s\trecv: %i ; status: %i\n", id, recv, status);
                long ret = ptrace(PTRACE_GETREGS, recv, NULL, &regs);
                regs.uregs[15] += 2;  // addr_size;
                printf("\t%s\tp: new PC: %lx\n", id, regs.uregs[15]);
                ptrace(PTRACE_SETREGS, recv, NULL, &regs);
                cont(recv, id);
              }
            }
          }
        }
      }
    }
  }
}

int main() {
  volatile bool can_runA = false, can_runB = false;
  pid_t procA = getpid();
  volatile pid_t procB = 0;

  if (fork() > 0) {  // process A
    while (!can_runA) {
      printf("\tA\twaiting to continue...\n");
      sleep(1);
    }
    attachTo(procB, "A");
    waitpid(procB, NULL, __WALL);
    setOptions(procB, "A");
    setVarData(procB, &can_runB, 1, "A");
    cont(procB, "A");
    printf("\tA\tfinished\n");
  } else {  // process B
    procB = getpid();
    attachTo(procA, "B");
    waitpid(procA, NULL, __WALL);
    setOptions(procA, "B");
    setVarData(procA, &can_runA, 1, "B");
    setVarData(procA, &procB, procB, "B");
    cont(procA, "B");
    while (!can_runB) {
      printf("\tB\twaiting to continue...\n");
      sleep(1);
    }
    printf("\tB\tfinished\n");
  }  
  return 0;
}

I have compiled and run this on an ARMv7 developer board with kernel version 3.0.35 (Linaro 13.08).

The output of the above code is this:

  A    waiting to continue...
  B    attachTo: 0
  B    setOptions: 0
  B    setVarData: 0
  B    setVarData: 0
  B    cont: 0
  B    waiting to continue...
  B    waiting to continue...
  A    attachTo: 0 

As you can see it never reaches the "finished" printf code, and gets stuck as soon as the other process attempts to attach to the debugger.

I have done a similar experiment for 3 processes, such that each one attempts to attach to the other in a circular fashion: A -> B -> C -> A
The result in this case was exactly the same. However, here I was able to detect a race condition, because sometimes the code executed properly without getting stuck in a deadlock (but it's hard to reproduce).

If you wish, you can test this by using a lightweight debugger I've developed and three console terminals. Here's the code:

/* C standard headers */
#include <errno.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Linux headers */
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
/* Architecture-specific headers */
#include <asm/ptrace.h>
#include <asm/unistd.h>

#include <signal.h>
#include <assert.h>

typedef void fun_moved_from_context();
//using namespace std;
void attachTo(pid_t pid, int id) {
    long ret = ptrace (PTRACE_ATTACH, pid, NULL, NULL);        
    printf("%i  attachTo: %ld\n", id, ret);
}
void seizeTo(pid_t pid, int id) {
    long ret = ptrace (PTRACE_SEIZE, pid, NULL, NULL);       
    //assert(ret > 0);
    printf("%i  seizeTo: %ld\n", id, ret);        
}
void detachFrom(pid_t pid, int id) {    
    long ret = ptrace (PTRACE_DETACH, pid, NULL, NULL);        
    printf("%i  detachFrom: %ld\n", id, ret);    
}
void setOptions(pid_t pid, int id) {    
    long ret = ptrace(PTRACE_SETOPTIONS, pid, NULL, (void*) (PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));   
    printf("%i  setOptions: %ld\n", id, ret);    
}
void setVarData(pid_t pid, volatile bool* can_run, void* data, int id) {
    long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
    printf("%i  setVarData: %ld\n", id, ret);    
}
void cont(pid_t pid, int id) {
    long ret = ptrace (PTRACE_CONT, pid, NULL, NULL);
    printf("%i  cont: %ld\n", id, ret);    
}
void interrupt(pid_t pid, int id) {
    long ret = ptrace (PTRACE_INTERRUPT, pid, NULL, NULL);
    printf("%i  interrupt: %ld\n", id, ret);    
}
void debug(int id) {
    int status;  
    while (true) {          
        printf("%i  while\n", id);
        sleep(1);
        struct user_regs regs;    
        pid_t recv = wait(&status);   
        if (recv == -1) {
            printf("%i  Debugger exiting\n", id);
            return 0;
        } else {
            if (WIFSTOPPED(status)) {
                int signal = WSTOPSIG(status);    
                printf("%i  signal: %i\n",id,signal);
                switch (signal) {
                    case SIGTRAP: {                 
                        int event_code = (status >> 8) ^ SIGTRAP;
                        printf("%i  event_code: %i\n",id,event_code);
                        switch (event_code) {
                            case PTRACE_EVENT_FORK << 8:
                                printf("%i  FORK EVENT.\n", id);
                                cont(recv,id);
                                break;
                            case PTRACE_EVENT_EXIT << 8:
                                printf("%i  %li exited.\n", id, recv);
                                return 0;
                                break;
                            default: {
                                printf("%i  recv: %i ; status: %i\n", id, recv, status);
                                long ret=ptrace (PTRACE_GETREGS, recv, NULL, &regs);
                                regs.uregs[15] += 2;//addr_size;
                                printf("%i  p: new PC: %lx\n", id, regs.uregs[15]);
                                ptrace (PTRACE_SETREGS, recv, NULL, &regs);                
                                cont(recv,id);
                            }
                        }  
                        break;                      
                    }   
                    default: {
                        cont(recv,id);
                        break;
                    }                
                }
            }

        }         
    }
}

int main() {    
    int pid;
    int me = getpid();
    printf("Hello, I am %d\n", me);
    printf("pid:");
    scanf("%d",&pid);
    if (pid == 0) {
        printf("bkpt asm\n");
        asm("bkpt");
    } else {
        attachTo(pid, me);
        printf("start waitpid\n");
        waitpid(pid, NULL, __WALL);
        printf("end waitpid\n");
        setOptions(pid, me);  
        cont(pid,me);   
        debug(me);
    }
    return 0;
}

Once you've compiled the above code, you simply run the binary on each console and enter the PID of another process to establish a 3-way circle.

I am far from an expert on the kernel, but I did have a look at the ARM specific kernel implementation which left me puzzled. I couldn't find where/how/why this code does not work.
Now I'm wondering if it's possible at all to make this work without a deadlock occurring? Does anyone have any experience with this, or can provide some clues/feedback?

Thank you greatly for your time, attention and effort! :)
Ilya

Click Here!