Finding process tear-down duration

When something as simple as looking for a process gets complicated.

  ·  2 min read

while trying to investigate suspicious TCP connection that failed fast

finding the process that initiated a TCP request

reading /proc/pid before it gets deleted

I asked myself: how long does it take to be deleted?

First PoC using C #

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <time.h>
#include <errno.h>

long long get_time_ns() {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return (ts.tv_sec * 1000000000LL) + ts.tv_nsec;
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        fprintf(stderr, "Usage: %s <command> [args...]\n", argv[0]);
        return 1;
    }
    
    pid_t pid = fork();
    if (pid == 0) {
        execvp(argv[1], &argv[1]);
        perror("execvp");
        exit(1);
    }
    if (pid < 0) {
        perror("fork");
        return 1;
    }

    int status;
        
    waitpid(pid, &status, 0);
    long long exit_time = get_time_ns();
    
    char proc_path[64];
    sprintf(proc_path, "/proc/%d", pid);
    
    struct timespec sleep_ts;
    sleep_ts.tv_sec = 0;
    sleep_ts.tv_nsec = 1000; // 1 microsecond
    
    while (1) {
        struct stat st;
        if (stat(proc_path, &st) == -1 && errno == ENOENT) {
            break;
        }
        nanosleep(&sleep_ts, NULL);
    }
    
    long long cleanup_time = get_time_ns();
    double teardown_ms = (cleanup_time - exit_time) / 1000000.0;
    
    printf("Teardown time: %.3f milliseconds\n", teardown_ms);
    
    return 0;
}

Let’s try compiling and running it:

$ gcc main.c -o teardown
$ ./teardown /bin/test 
Teardown time: 0.062 milliseconds

Why /bin/test? I was looking for a binary that prints nothing when called as it, and it was the first one to come to my mind.

Let’s run this program many many times to have some stats:

for i in {0..10000}; do 
    ./teardown /bin/test | cut  -d ' ' -f 3 | tr '.' ',' >> out
done

From the out dataset, I find that:

  • the average teardown time measured is 18 microseconds;
  • the median teardown time is the same as the average time.

eBPF for faster exit() detection #