[PATCH 2009-01-28] fix "to process" list corruption when the task appears twice
Denys Vlasenko
dvlasenk at redhat.com
Mon Feb 23 13:47:17 UTC 2009
diff -x CVS -urpN 2009-01-27/ChangeLog 2009-01-28/ChangeLog
--- 2009-01-27/ChangeLog 2009-01-26 20:09:35.000000000 +0100
+++ 2009-01-28/ChangeLog 2009-01-27 20:38:44.000000000 +0100
@@ -1,3 +1,8 @@
+2009-01-27 Denys Vlasenko <dvlasenk at redhat.com>
+
+ * strace.c (collect_stopped_tcbs): Guard against the case when
+ waitpid() reports the same task multiple times.
+
2009-01-26 Denys Vlasenko <dvlasenk at redhat.com>
* process.c (printwaitn): Add comment about wait4() pid expansion.
diff -x CVS -urpN 2009-01-27/strace.c 2009-01-28/strace.c
--- 2009-01-27/strace.c 2009-01-21 20:05:43.000000000 +0100
+++ 2009-01-28/strace.c 2009-01-27 20:38:44.000000000 +0100
@@ -27,7 +27,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $Id: strace.c,v 1.103 2009/01/21 19:05:43 vda_linux Exp $
+ * $Id: strace.c,v 1.104 2009/01/27 19:38:44 vda_linux Exp $
*/
#include "defs.h"
@@ -2255,21 +2255,39 @@ handle_group_exit(struct tcb *tcp, int s
static struct tcb *
collect_stopped_tcbs(void)
{
+#ifdef LINUX
+ static int remembered_pid;
+ static int remembered_status;
+#endif
int pid;
int wait_errno;
int status;
struct tcb *tcp;
+ struct tcb *found_tcps;
#ifdef LINUX
+ struct tcb **nextp;
struct rusage ru;
struct rusage* ru_ptr = cflag ? &ru : NULL;
int wnohang = 0;
- struct tcb *found_tcps;
- struct tcb **nextp = &found_tcps;
#ifdef __WALL
int wait4_options = __WALL;
#endif
+
+ if (remembered_pid > 0) {
+ pid = remembered_pid;
+ remembered_pid = 0;
+ if (debug)
+ fprintf(stderr, " [remembered wait(%#x) = %u]\n",
+ remembered_status, pid);
+ tcp = pid2tcb(pid); /* can't be NULL */
+ tcp->wait_status = remembered_status;
+ tcp->next_need_service = NULL;
+ return tcp;
+ }
+ nextp = &found_tcps;
#endif /* LINUX */
+ found_tcps = NULL;
while (1) {
#ifdef LINUX
#ifdef __WALL
@@ -2338,6 +2356,14 @@ collect_stopped_tcbs(void)
if (debug)
fprintf(stderr, " [wait(%#x) = %u]\n", status, pid);
+ /* RHEL5 bug workaround.
+ * It can re-report stopped tasks. Happens on SIGSTOPs here.
+ * Second (bogus) report has signal# set to 0.
+ * Stop collecting and process what we have.
+ */
+ if (WIFSTOPPED(status) && WSTOPSIG(status) == 0)
+ break;
+
/* Look up `pid' in our table. */
if ((tcp = pid2tcb(pid)) == NULL) {
#ifdef LINUX
@@ -2384,31 +2410,58 @@ Process %d attached (waiting for parent)
* process has not been actually restarted.
* Since we have inspected the arguments of suspended
* processes we end up here testing for this case.
+ *
+ * We also end up here when we catch new pid of
+ * CLONE_PTRACEd process. Do not process/restart it
+ * until we see corresponding clone() syscall exit
+ * in its parent.
*/
continue;
}
- tcp->wait_status = status;
#ifdef LINUX
+ /* So far observed only on RHEL5 ia64, but I imagine this
+ * can legitimately happen elsewhere.
+ * If we waited and got a stopped task notification,
+ * subsequent wait may return the same pid again, for example,
+ * with SIGKILL notification. SIGKILL kills even stopped tasks.
+ * We must not add it to the list
+ * (one task can't be inserted twice in the list).
+ */
+ {
+ struct tcb *f = found_tcps;
+ while (f) {
+ if (f == tcp) {
+ remembered_pid = pid;
+ remembered_status = status;
+ return found_tcps;
+ }
+ f = f->next_need_service;
+ }
+ }
/* It is important to not invert the order of tasks
* to process. For one, alloc_tcb() above picks newly forked
* threads in some order, processing of them and their parent
* should be in the same order, otherwise bad things happen
* (misinterpreted SIGSTOPs and such).
*/
+ tcp->wait_status = status;
*nextp = tcp;
nextp = &tcp->next_need_service;
+ *nextp = NULL;
wnohang = WNOHANG;
#endif
#ifdef SUNOS4
/* Probably need to replace wait with waitpid
* and loop on Sun too, but I can't test it. Volunteers?
*/
+ tcp->wait_status = status;
+ tcp->next_need_service = NULL;
+ found_tcps = tcp;
break;
#endif
- }
+ } /* while (1) - collecting all stopped/exited tracees */
- *nextp = NULL;
return found_tcps;
}
More information about the Strace-devel
mailing list