[PATCH] POC for fault injection

haris iqbal haris.phnx at gmail.com
Mon Mar 21 05:06:03 UTC 2016


Hello,

I have sent a patch which demonstrates the Idea that I thought of for
failing system calls. It overwrites the return value of the system
call with -1 to make it fail. Obviously this would be different for
different system calls but I did this only as a POC. This is done only
for the x86_64 architecture without any error checks.

One thing you can notice is the prints in strace gets mangled up. This
is due to the fact that I cannot understand where to do this failing
check in trace_syscall_entering() and trace_syscall_exiting(). The
code is a little complicated and it is difficult to understand what
parts are needed even when the syscall has to be failed. Other wise
the code would work fine.

To demonstrate that the logic is correct, I have wrote a simple
program myself. For now it works for only the read system call and for
the x86_64 architecture. You can check this repo for the code -->
https://github.com/harisphnx/fault_injection

On Mon, Mar 21, 2016 at 10:35 AM, student <haris.phnx at gmail.com> wrote:
> From: Md Haris Iqbal <haris.phnx at gmail.com>
>
> ---
>  strace.c  | 22 +++++++++++++++++++--
>  syscall.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 85 insertions(+), 3 deletions(-)
>
> diff --git a/strace.c b/strace.c
> index 49d6f3d..42cd9a6 100644
> --- a/strace.c
> +++ b/strace.c
> @@ -1487,6 +1487,11 @@ get_os_release(void)
>   * Don't want main() to inline us and defeat the reason
>   * we have a separate function.
>   */
> +
> +void fail_syscall(int);
> +void set_fail_flag(void);
> +void set_failing_parameters(char* str);
> +
>  static void ATTRIBUTE_NOINLINE
>  init(int argc, char *argv[])
>  {
> @@ -1523,7 +1528,7 @@ init(int argc, char *argv[])
>                 "k"
>  #endif
>                 "D"
> -               "a:e:o:O:p:s:S:u:E:P:I:")) != EOF) {
> +               "a:e:g:j:o:O:p:s:S:u:E:P:I:")) != EOF) {
>                 switch (c) {
>                 case 'b':
>                         if (strcmp(optarg, "execve") != 0)
> @@ -1600,6 +1605,13 @@ init(int argc, char *argv[])
>                 case 'e':
>                         qualify(optarg);
>                         break;
> +        case 'g':
> +            qualify(optarg);
> +            set_fail_flag();
> +            break;
> +        case 'j':
> +            set_failing_parameters(optarg);
> +            break;
>                 case 'o':
>                         outfname = xstrdup(optarg);
>                         break;
> @@ -2322,7 +2334,8 @@ show_stopsig:
>          * This should be syscall entry or exit.
>          * Handle it.
>          */
> -       if (trace_syscall(tcp) < 0) {
> +    int temp = trace_syscall(tcp);
> +    if (temp < 0) {
>                 /*
>                  * ptrace() failed in trace_syscall().
>                  * Likely a result of process disappearing mid-flight.
> @@ -2337,6 +2350,11 @@ show_stopsig:
>                 return true;
>         }
>
> +    if(temp == 3)
> +    {
> +        fail_syscall(pid);
> +    }
> +
>  restart_tracee_with_sig_0:
>         sig = 0;
>
> diff --git a/syscall.c b/syscall.c
> index 6efcde5..61186bc 100644
> --- a/syscall.c
> +++ b/syscall.c
> @@ -785,12 +785,54 @@ static void get_error(struct tcb *, const bool);
>  static int getregs_old(pid_t);
>  #endif
>
> +/* custom for POC of fault injection*/
> +
> +void fail_syscall(int);
> +void set_fail_flag(void);
> +void set_failing_parameters(const char*);
> +
> +
> +
> +unsigned int fail_flag = 0;
> +int probability = 100;
> +int failing_prob = 0;
> +void set_fail_flag()
> +{
> +    fail_flag = 1;
> +    return;
> +}
> +
> +void set_failing_parameters(const char* str)
> +{
> +    probability = atoi(str);
> +    srand(time(NULL));
> +    return;
> +}
> +
> +/* END */
> +
> +
>  static int
>  trace_syscall_entering(struct tcb *tcp)
>  {
>         int res, scno_good;
>
>         scno_good = res = get_scno(tcp);
> +
> +
> +    /* POC code for fault injection */
> +    if((rand() % 100) <= probability)
> +        failing_prob = 1;
> +
> +    if ((tcp->qual_flg & QUAL_TRACE) && fail_flag == 1 && failing_prob == 1)
> +    {
> +        // probablity needs to be added using randon number genaratiion
> +        tprintf("%s system called failed with probability %d\n", syscall_name(tcp->scno), probability);
> +        tcp->flags |= TCB_INSYSCALL;
> +        tcp->sys_func_rval = 0;
> +        return 0;
> +    }
> +
>         if (res == 0)
>                 return res;
>         if (res == 1)
> @@ -878,17 +920,28 @@ trace_syscall_entering(struct tcb *tcp)
>         /* Measure the entrance time as late as possible to avoid errors. */
>         if (Tflag || cflag)
>                 gettimeofday(&tcp->etime, NULL);
> -       return res;
> +    return res;
>  }
>
>  static int
>  trace_syscall_exiting(struct tcb *tcp)
>  {
> +
>         int sys_res;
>         struct timeval tv;
>         int res;
>         long u_error;
>
> +    if ((tcp->qual_flg & QUAL_TRACE) && fail_flag == 1 && failing_prob == 1)
> +    {
> +        // probablity needs to be added using randon number genaratiion
> +        tprintf("%s system called failed with probability %d\n", syscall_name(tcp->scno), probability);
> +        tcp->flags |= ~TCB_INSYSCALL;
> +        failing_prob = 0;
> +        return 3;
> +    }
> +
> +
>         /* Measure the exit time as early as possible to avoid errors. */
>         if (Tflag || cflag)
>                 gettimeofday(&tv, NULL);
> @@ -1241,6 +1294,17 @@ get_regset(pid_t pid)
>  }
>  #endif /* ARCH_REGS_FOR_GETREGSET */
>
> +void fail_syscall(pid)
> +{
> +    struct user_regs_struct regs;
> +
> +    ptrace(PTRACE_GETREGS, pid, NULL, &regs);
> +    regs.rax = -1;
> +    ptrace(PTRACE_SETREGS, pid, NULL, &regs);
> +
> +}
> +
> +
>  void
>  get_regs(pid_t pid)
>  {
> --
> 1.9.1
>



-- 

With regards,

Md Haris Iqbal,
Placement Coordinator, MTech IT
NITK Surathkal,
Contact: +91 8861996962




More information about the Strace-devel mailing list