[4/4] analyzer: purge state for unknown function calls

Message ID 20191217230137.31567-5-dmalcolm@redhat.com
State New
Headers show
Series
  • analyzer: Fixes for problems seen with CVE-2005-1689
Related show

Commit Message

David Malcolm Dec. 17, 2019, 11:01 p.m.
Whilst analyzing the reproducer for detecting CVE-2005-1689
(krb5-1.4.1's src/lib/krb5/krb/recvauth.c), the analyzer reports
a false double-free of the form:

  krb5_xfree(inbuf.data);
  krb5_read_message(..., &inbuf);
  krb5_xfree(inbuf.data); /* false diagnostic here.  */

where the call to krb5_read_message overwrites inbuf.data with
a freshly-malloced buffer.

This patch fixes the issue by purging state more thorougly when
handling a call with unknown behavior, by walking the graph of
memory regions that are reachable from the call.

gcc/analyzer/ChangeLog:
	* analyzer.h (fndecl_has_gimple_body_p): New decl.
	* engine.cc (impl_region_model_context::on_unknown_change): New
	function.
	(fndecl_has_gimple_body_p): Make non-static.
	(exploded_node::on_stmt): Treat __analyzer_dump_exploded_nodes as
	known.  Track whether we have a call with unknown side-effects and
	pass it to on_call_post.
	* exploded-graph.h (impl_region_model_context::on_unknown_change):
	New decl.
	* program-state.cc (sm_state_map::on_unknown_change): New function.
	* program-state.h (sm_state_map::on_unknown_change): New decl.
	* region-model.cc: Include "bitmap.h".
	(region_model::on_call_pre): Return a bool, capturing whether the
	call has unknown side effects.
	(region_model::on_call_post): Add arg "bool unknown_side_effects"
	and if true, call handle_unrecognized_call.
	(class reachable_regions): New class.
	(region_model::handle_unrecognized_call): New function.
	* region-model.h (region_model::on_call_pre): Return a bool.
	(region_model::on_call_post): Add arg "bool unknown_side_effects".
	(region_model::handle_unrecognized_call): New decl.
	(region_model_context::on_unknown_change): New vfunc.
	(test_region_model_context::on_unknown_change): New function.

gcc/testsuite/ChangeLog:
	* gcc.dg/analyzer/data-model-1.c: Remove xfail.
	* gcc.dg/analyzer/data-model-5b.c: Likewise.
	* gcc.dg/analyzer/data-model-5c.c: Likewise.
	* gcc.dg/analyzer/setjmp-3.c: Mark "foo" as pure.
	* gcc.dg/analyzer/setjmp-4.c: Likewise.
	* gcc.dg/analyzer/setjmp-6.c: Likewise.
	* gcc.dg/analyzer/setjmp-7.c: Likewise.
	* gcc.dg/analyzer/setjmp-7a.c: Likewise.
	* gcc.dg/analyzer/setjmp-8.c: Likewise.
	* gcc.dg/analyzer/setjmp-9.c: Likewise.
	* gcc.dg/analyzer/unknown-fns.c: New test.
---
 gcc/analyzer/analyzer.h                       |   2 +
 gcc/analyzer/engine.cc                        |  28 ++-
 gcc/analyzer/exploded-graph.h                 |   2 +
 gcc/analyzer/program-state.cc                 |   8 +
 gcc/analyzer/program-state.h                  |   2 +
 gcc/analyzer/region-model.cc                  | 217 +++++++++++++++++-
 gcc/analyzer/region-model.h                   |  16 +-
 gcc/testsuite/gcc.dg/analyzer/data-model-1.c  |   4 +-
 gcc/testsuite/gcc.dg/analyzer/data-model-5b.c |   3 +-
 gcc/testsuite/gcc.dg/analyzer/data-model-5c.c |  10 +-
 gcc/testsuite/gcc.dg/analyzer/setjmp-3.c      |   2 +-
 gcc/testsuite/gcc.dg/analyzer/setjmp-4.c      |   2 +-
 gcc/testsuite/gcc.dg/analyzer/setjmp-6.c      |   2 +-
 gcc/testsuite/gcc.dg/analyzer/setjmp-7.c      |   2 +-
 gcc/testsuite/gcc.dg/analyzer/setjmp-7a.c     |   2 +-
 gcc/testsuite/gcc.dg/analyzer/setjmp-8.c      |   2 +-
 gcc/testsuite/gcc.dg/analyzer/setjmp-9.c      |   2 +-
 gcc/testsuite/gcc.dg/analyzer/unknown-fns.c   | 115 ++++++++++
 18 files changed, 383 insertions(+), 38 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/unknown-fns.c

-- 
2.21.0

Patch

diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
index 987e2fe43f4c..17a24d813c62 100644
--- a/gcc/analyzer/analyzer.h
+++ b/gcc/analyzer/analyzer.h
@@ -82,6 +82,8 @@  extern void register_analyzer_pass ();
 
 extern label_text make_label_text (bool can_colorize, const char *fmt, ...);
 
+extern bool fndecl_has_gimple_body_p (tree fndecl);
+
 /* An RAII-style class for pushing/popping cfun within a scope.
    Doing so ensures we get "In function " announcements
    from the diagnostics subsystem.  */
diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index 93babf67a87b..162940a2bfa9 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -106,6 +106,15 @@  impl_region_model_context::on_svalue_purge (svalue_id first_unused_sid,
   return total;
 }
 
+void
+impl_region_model_context::on_unknown_change (svalue_id sid)
+{
+  int sm_idx;
+  sm_state_map *smap;
+  FOR_EACH_VEC_ELT (m_new_state->m_checker_states, sm_idx, smap)
+    smap->on_unknown_change (sid);
+}
+
 /* class setjmp_svalue : public svalue.  */
 
 /* Compare the fields of this setjmp_svalue with OTHER, returning true
@@ -846,7 +855,7 @@  exploded_node::dump (const extrinsic_state &ext_state) const
 /* Return true if FNDECL has a gimple body.  */
 // TODO: is there a pre-canned way to do this?
 
-static bool
+bool
 fndecl_has_gimple_body_p (tree fndecl)
 {
   if (fndecl == NULL_TREE)
@@ -905,6 +914,10 @@  exploded_node::on_stmt (exploded_graph &eg,
   if (const greturn *return_ = dyn_cast <const greturn *> (stmt))
     state->m_region_model->on_return (return_, &ctxt);
 
+  /* Track whether we have a gcall to a function that's not recognized by
+     anything, for which we don't have a function body, or for which we
+     don't know the fndecl.  */
+  bool unknown_side_effects = false;
   if (const gcall *call = dyn_cast <const gcall *> (stmt))
     {
       /* Debugging/test support.  */
@@ -947,6 +960,11 @@  exploded_node::on_stmt (exploded_graph &eg,
 	  /* TODO: is there a good cross-platform way to do this?  */
 	  raise (SIGINT);
 	}
+      else if (is_special_named_call_p (call, "__analyzer_dump_exploded_nodes",
+					1))
+	{
+	  /* This is handled elsewhere.  */
+	}
       else if (is_setjmp_call_p (stmt))
 	state->m_region_model->on_setjmp (call, this, &ctxt);
       else if (is_longjmp_call_p (call))
@@ -955,7 +973,7 @@  exploded_node::on_stmt (exploded_graph &eg,
 	  return on_stmt_flags::terminate_path ();
 	}
       else
-	state->m_region_model->on_call_pre (call, &ctxt);
+	unknown_side_effects = state->m_region_model->on_call_pre (call, &ctxt);
     }
 
   bool any_sm_changes = false;
@@ -971,7 +989,9 @@  exploded_node::on_stmt (exploded_graph &eg,
 			    change,
 			    old_smap, new_smap);
       /* Allow the state_machine to handle the stmt.  */
-      if (!sm.on_stmt (&ctxt, snode, stmt))
+      if (sm.on_stmt (&ctxt, snode, stmt))
+	unknown_side_effects = false;
+      else
 	{
 	  /* For those stmts that were not handled by the state machine.  */
 	  if (const gcall *call = dyn_cast <const gcall *> (stmt))
@@ -989,7 +1009,7 @@  exploded_node::on_stmt (exploded_graph &eg,
     }
 
   if (const gcall *call = dyn_cast <const gcall *> (stmt))
-    state->m_region_model->on_call_post (call, &ctxt);
+    state->m_region_model->on_call_post (call, unknown_side_effects, &ctxt);
 
   return on_stmt_flags (any_sm_changes);
 }
diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h
index 8e3df18fadbf..5793f648b5f7 100644
--- a/gcc/analyzer/exploded-graph.h
+++ b/gcc/analyzer/exploded-graph.h
@@ -79,6 +79,8 @@  class impl_region_model_context : public region_model_context
 
   void on_condition (tree lhs, enum tree_code op, tree rhs) FINAL OVERRIDE;
 
+  void on_unknown_change (svalue_id sid ATTRIBUTE_UNUSED) FINAL OVERRIDE;
+
   exploded_graph *m_eg;
   log_user m_logger;
   const exploded_node *m_enode_for_diag;
diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc
index 6222ce9cd627..3ed78b40928b 100644
--- a/gcc/analyzer/program-state.cc
+++ b/gcc/analyzer/program-state.cc
@@ -455,6 +455,14 @@  sm_state_map::on_cast (svalue_id src_sid,
   impl_set_state (dst_sid, state, get_origin (src_sid));
 }
 
+/* Purge state from SID (in response to a call to an unknown function).  */
+
+void
+sm_state_map::on_unknown_change (svalue_id sid)
+{
+  impl_set_state (sid, (state_machine::state_t)0, svalue_id::null ());
+}
+
 /* Assert that this object is sane.  */
 
 void
diff --git a/gcc/analyzer/program-state.h b/gcc/analyzer/program-state.h
index 5aab319a84c2..5a810158d417 100644
--- a/gcc/analyzer/program-state.h
+++ b/gcc/analyzer/program-state.h
@@ -186,6 +186,8 @@  public:
   void on_cast (svalue_id src_sid,
 		svalue_id dst_sid);
 
+  void on_unknown_change (svalue_id sid);
+
   void validate (const state_machine &sm, int num_svalues) const;
 
   iterator_t begin () const { return m_map.begin (); }
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 3bae0cffa691..d8960aeeda25 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -38,6 +38,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "diagnostic-color.h"
 #include "diagnostic-metadata.h"
 #include "tristate.h"
+#include "bitmap.h"
 #include "selftest.h"
 #include "analyzer/analyzer.h"
 #include "analyzer/analyzer-logging.h"
@@ -4103,9 +4104,13 @@  region_model::on_assignment (const gassign *assign, region_model_context *ctxt)
 
    Updates to the region_model that should be made *before* sm-states
    are updated are done here; other updates to the region_model are done
-   in region_model::on_call_post.  */
+   in region_model::on_call_post.
 
-void
+   Return true if the function call has unknown side effects (it wasn't
+   recognized and we don't have a body for it, or are unable to tell which
+   fndecl it is).  */
+
+bool
 region_model::on_call_pre (const gcall *call, region_model_context *ctxt)
 {
   region_id lhs_rid;
@@ -4123,6 +4128,8 @@  region_model::on_call_pre (const gcall *call, region_model_context *ctxt)
     for (unsigned i = 0; i < gimple_call_num_args (call); i++)
       check_for_poison (gimple_call_arg (call, i), ctxt);
 
+  bool unknown_side_effects = false;
+
   if (tree callee_fndecl = get_fndecl_for_call (call, ctxt))
     {
       if (is_named_call_p (callee_fndecl, "malloc", call, 1))
@@ -4135,7 +4142,7 @@  region_model::on_call_pre (const gcall *call, region_model_context *ctxt)
 		= get_or_create_ptr_svalue (lhs_type, new_rid);
 	      set_value (lhs_rid, ptr_sid, ctxt);
 	    }
-	  return;
+	  return false;
 	}
       else if (is_named_call_p (callee_fndecl, "__builtin_alloca", call, 1))
 	{
@@ -4148,7 +4155,7 @@  region_model::on_call_pre (const gcall *call, region_model_context *ctxt)
 		= get_or_create_ptr_svalue (lhs_type, new_rid);
 	      set_value (lhs_rid, ptr_sid, ctxt);
 	    }
-	  return;
+	  return false;
 	}
       else if (is_named_call_p (callee_fndecl, "strlen", call, 1))
 	{
@@ -4167,7 +4174,7 @@  region_model::on_call_pre (const gcall *call, region_model_context *ctxt)
 		  svalue_id result_sid
 		    = get_or_create_constant_svalue (t_cst);
 		  set_value (lhs_rid, result_sid, ctxt);
-		  return;
+		  return false;
 		}
 	    }
 	  /* Otherwise an unknown value.  */
@@ -4187,18 +4194,20 @@  region_model::on_call_pre (const gcall *call, region_model_context *ctxt)
 	  /* Use quotes to ensure the output isn't truncated.  */
 	  warning_at (call->location, 0,
 		      "num heap regions: %qi", num_heap_regions);
+	  return false;
 	}
+      else if (!fndecl_has_gimple_body_p (callee_fndecl)
+	       && !DECL_PURE_P (callee_fndecl))
+	unknown_side_effects = true;
     }
-
-  /* Unrecognized call.  */
+  else
+    unknown_side_effects = true;
 
   /* Unknown return value.  */
   if (!lhs_rid.null_p ())
     set_to_new_unknown_value (lhs_rid, lhs_type, ctxt);
 
-  /* TODO: also, any pointer arguments might have been written through,
-     or the things they point to (implying a graph traversal, which
-     presumably we need to do before overwriting the old value).  */
+  return unknown_side_effects;
 }
 
 /* Update this model for the CALL stmt, using CTXT to report any
@@ -4206,10 +4215,15 @@  region_model::on_call_pre (const gcall *call, region_model_context *ctxt)
 
    Updates to the region_model that should be made *after* sm-states
    are updated are done here; other updates to the region_model are done
-   in region_model::on_call_pre.  */
+   in region_model::on_call_pre.
+
+   If UNKNOWN_SIDE_EFFECTS is true, also call handle_unrecognized_call
+   to purge state.  */
 
 void
-region_model::on_call_post (const gcall *call, region_model_context *ctxt)
+region_model::on_call_post (const gcall *call,
+			    bool unknown_side_effects,
+			    region_model_context *ctxt)
 {
   /* Update for "free" here, after sm-handling.
 
@@ -4252,6 +4266,185 @@  region_model::on_call_post (const gcall *call, region_model_context *ctxt)
 	  }
 	return;
       }
+
+  if (unknown_side_effects)
+    handle_unrecognized_call (call, ctxt);
+}
+
+/* Helper class for region_model::handle_unrecognized_call, for keeping
+   track of all regions that are reachable, and, of those, which are
+   mutable.  */
+
+class reachable_regions
+{
+public:
+  reachable_regions (region_model *model)
+  : m_model (model), m_reachable_rids (), m_mutable_rids ()
+  {}
+
+  /* Lazily mark RID as being reachable, recursively adding regions
+     reachable from RID.  */
+  void add (region_id rid, bool is_mutable)
+  {
+    gcc_assert (!rid.null_p ());
+
+    unsigned idx = rid.as_int ();
+    /* Bail out if this region is already in the sets at the IS_MUTABLE
+       level of mutability.  */
+    if (!is_mutable && bitmap_bit_p (m_reachable_rids, idx))
+      return;
+    bitmap_set_bit (m_reachable_rids, idx);
+
+    if (is_mutable)
+      {
+	if (bitmap_bit_p (m_mutable_rids, idx))
+	  return;
+	else
+	  bitmap_set_bit (m_mutable_rids, idx);
+      }
+
+    /* If this region's value is a pointer, add the pointee.  */
+    region *reg = m_model->get_region (rid);
+    svalue_id sid = reg->get_value_direct ();
+    svalue *sval = m_model->get_svalue (sid);
+    if (sval)
+      if (region_svalue *ptr = sval->dyn_cast_region_svalue ())
+	{
+	  region_id pointee_rid = ptr->get_pointee ();
+	  /* Use const-ness of pointer type to affect mutability.  */
+	  bool ptr_is_mutable = true;
+	  if (ptr->get_type ()
+	      && TREE_CODE (ptr->get_type ()) == POINTER_TYPE
+	      && TYPE_READONLY (TREE_TYPE (ptr->get_type ())))
+	    ptr_is_mutable = false;
+	  add (pointee_rid, ptr_is_mutable);
+	}
+
+    /* Add descendents of this region.  */
+    region_id_set descendents (m_model);
+    m_model->get_descendents (rid, &descendents, region_id::null ());
+    for (unsigned i = 0; i < m_model->get_num_regions (); i++)
+      {
+	region_id iter_rid = region_id::from_int (i);
+	if (descendents.region_p (iter_rid))
+	  add (iter_rid, is_mutable);
+      }
+  }
+
+  bool mutable_p (region_id rid)
+  {
+    gcc_assert (!rid.null_p ());
+    return bitmap_bit_p (m_mutable_rids, rid.as_int ());
+  }
+
+private:
+  region_model *m_model;
+
+  /* The region ids already seen.  This has to be an auto_bitmap rather than
+     an auto_sbitmap as new regions can be created within the model during
+     the traversal.  */
+  auto_bitmap m_reachable_rids;
+
+  /* The region_ids that can be changed (accessed via non-const pointers).  */
+  auto_bitmap m_mutable_rids;
+};
+
+/* Handle a call CALL to a function with unknown behavior.
+
+   Traverse the regions in this model, determining what regions are
+   reachable from pointer arguments to CALL and from global variables,
+   recursively.
+
+   Set all reachable regions to new unknown values and purge sm-state
+   from their values, and from values that point to them.  */
+
+void
+region_model::handle_unrecognized_call (const gcall *call,
+					region_model_context *ctxt)
+{
+  tree fndecl = get_fndecl_for_call (call, ctxt);
+
+  reachable_regions reachable_regions (this);
+
+  /* Determine the reachable regions and their mutability.  */
+  {
+    /* Globals.  */
+    region_id globals_rid = get_globals_region_id ();
+    if (!globals_rid.null_p ())
+      reachable_regions.add (globals_rid, true);
+
+    /* Params that are pointers.  */
+    tree iter_param_types = NULL_TREE;
+    if (fndecl)
+      iter_param_types = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+    for (unsigned arg_idx = 0; arg_idx < gimple_call_num_args (call); arg_idx++)
+      {
+	/* Track expected param type, where available.  */
+	tree param_type = NULL_TREE;
+	if (iter_param_types)
+	  {
+	    param_type = TREE_VALUE (iter_param_types);
+	    gcc_assert (param_type);
+	    iter_param_types = TREE_CHAIN (iter_param_types);
+	  }
+
+	tree parm = gimple_call_arg (call, arg_idx);
+	svalue_id parm_sid = get_rvalue (parm, NULL);
+	svalue *parm_sval = get_svalue (parm_sid);
+	if (parm_sval)
+	  if (region_svalue *parm_ptr = parm_sval->dyn_cast_region_svalue ())
+	    {
+	      region_id pointee_rid = parm_ptr->get_pointee ();
+	      bool is_mutable = true;
+	      if (param_type
+		  && TREE_CODE (param_type) == POINTER_TYPE
+		  &&  TYPE_READONLY (TREE_TYPE (param_type)))
+		is_mutable = false;
+	      reachable_regions.add (pointee_rid, is_mutable);
+	    }
+	// FIXME: what about compound parms that contain ptrs?
+      }
+  }
+
+  /* OK: we now have all reachable regions.
+     Set them all to new unknown values.  */
+  for (unsigned i = 0; i < get_num_regions (); i++)
+    {
+      region_id iter_rid = region_id::from_int (i);
+      if (reachable_regions.mutable_p (iter_rid))
+	{
+	  region *reg = get_region (iter_rid);
+
+	  /* Purge any sm-state for any underlying svalue.  */
+	  svalue_id curr_sid = reg->get_value_direct ();
+	  if (!curr_sid.null_p ())
+	    ctxt->on_unknown_change (curr_sid);
+
+	  set_to_new_unknown_value (iter_rid,
+				    reg->get_type (),
+				    ctxt);
+	}
+    }
+
+  /* Purge sm-state for any remaining svalues that point to regions that
+     were reachable.  This helps suppress leak false-positives.
+
+     For example, if we had a malloc call that was cast to a "foo *" type,
+     we could have a temporary void * for the result of malloc which has its
+     own svalue, not reachable from the function call, but for which the
+     "foo *" svalue was reachable.  If we don't purge it, the temporary will
+     be reported as a leak.  */
+  int i;
+  svalue *svalue;
+  FOR_EACH_VEC_ELT (m_svalues, i, svalue)
+    if (region_svalue *ptr = svalue->dyn_cast_region_svalue ())
+      {
+	region_id pointee_rid = ptr->get_pointee ();
+	if (reachable_regions.mutable_p (pointee_rid))
+	  ctxt->on_unknown_change (svalue_id::from_int (i));
+      }
+
+  validate ();
 }
 
 /* Update this model for the RETURN_STMT, using CTXT to report any
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 60911696be5a..56f400542616 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -1586,8 +1586,12 @@  class region_model
 
   void check_for_poison (tree expr, region_model_context *ctxt);
   void on_assignment (const gassign *stmt, region_model_context *ctxt);
-  void on_call_pre (const gcall *stmt, region_model_context *ctxt);
-  void on_call_post (const gcall *stmt, region_model_context *ctxt);
+  bool on_call_pre (const gcall *stmt, region_model_context *ctxt);
+  void on_call_post (const gcall *stmt,
+		     bool unknown_side_effects,
+		     region_model_context *ctxt);
+  void handle_unrecognized_call (const gcall *call,
+				 region_model_context *ctxt);
   void on_return (const greturn *stmt, region_model_context *ctxt);
   void on_setjmp (const gcall *stmt, const exploded_node *enode,
 		  region_model_context *ctxt);
@@ -1835,6 +1839,10 @@  class region_model_context
      to ptrs becoming known to be NULL or non-NULL, rather than just
      "unchecked") */
   virtual void on_condition (tree lhs, enum tree_code op, tree rhs) = 0;
+
+  /* Hooks for clients to be notified when an unknown change happens
+     to SID (in response to a call to an unknown function).  */
+  virtual void on_unknown_change (svalue_id sid) = 0;
 };
 
 /* A bundle of data for use when attempting to merge two region_model
@@ -2001,6 +2009,10 @@  public:
   {
   }
 
+  void on_unknown_change (svalue_id sid ATTRIBUTE_UNUSED) FINAL OVERRIDE
+  {
+  }
+
 private:
   /* Implicitly delete any diagnostics in the dtor.  */
   auto_delete_vec<pending_diagnostic> m_diagnostics;
diff --git a/gcc/testsuite/gcc.dg/analyzer/data-model-1.c b/gcc/testsuite/gcc.dg/analyzer/data-model-1.c
index 43d08474283e..d720a64a3f6e 100644
--- a/gcc/testsuite/gcc.dg/analyzer/data-model-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/data-model-1.c
@@ -325,9 +325,7 @@  void test_16e (int i)
   __analyzer_eval (j == i); /* { dg-warning "TRUE" } */
 
   might_write_to (&j);
-  __analyzer_eval (j == i); /* { dg-warning "UNKNOWN" "" { xfail *-*-* } } */
-  /* { dg-warning "TRUE" "" { target *-*-* } .-1 } */
-  // TODO(xfail)
+  __analyzer_eval (j == i); /* { dg-warning "UNKNOWN" } */
 }
 
 /* TODO: and more complicated graph-like examples, where anything that's
diff --git a/gcc/testsuite/gcc.dg/analyzer/data-model-5b.c b/gcc/testsuite/gcc.dg/analyzer/data-model-5b.c
index b0203af9975c..6866f5bf4696 100644
--- a/gcc/testsuite/gcc.dg/analyzer/data-model-5b.c
+++ b/gcc/testsuite/gcc.dg/analyzer/data-model-5b.c
@@ -87,5 +87,4 @@  void test_1 (const char *str)
   //__analyzer_dump();
   if (obj)
     unref (obj);
-} /* { dg-bogus "leak of 'obj'" "" { xfail *-*-* } } */
-// TODO (xfail): not sure why this is treated as leaking
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/data-model-5c.c b/gcc/testsuite/gcc.dg/analyzer/data-model-5c.c
index 1e52350c6c16..4dc559c1fcd6 100644
--- a/gcc/testsuite/gcc.dg/analyzer/data-model-5c.c
+++ b/gcc/testsuite/gcc.dg/analyzer/data-model-5c.c
@@ -66,19 +66,13 @@  string_obj *new_string_obj (const char *str)
 
 void unref (string_obj *obj)
 {
-  //__analyzer_dump();
   if (--obj->str_base.ob_refcnt == 0)
-    {
-      //__analyzer_dump();
-      free (obj);
-    }
+    free (obj);
 }
 
 void test_1 (const char *str)
 {
   string_obj *obj = new_string_obj (str);
-  //__analyzer_dump();
   if (obj)
     unref (obj);
-} /* { dg-bogus "leak of 'obj'" "" { xfail *-*-* } } */
-// TODO (xfail): not sure why this is treated as leaking
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-3.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-3.c
index 6f67ff532d34..36814a7d366c 100644
--- a/gcc/testsuite/gcc.dg/analyzer/setjmp-3.c
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-3.c
@@ -3,7 +3,7 @@ 
 #include <setjmp.h>
 #include <stddef.h>
 
-extern void foo (int);
+extern int foo (int) __attribute__ ((__pure__));
 
 static jmp_buf env;
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-4.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-4.c
index 21bb87e6476c..c45efd43e6ac 100644
--- a/gcc/testsuite/gcc.dg/analyzer/setjmp-4.c
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-4.c
@@ -2,7 +2,7 @@ 
 
 #include <setjmp.h>
 
-extern int foo (int);
+extern int foo (int) __attribute__ ((__pure__));
 static jmp_buf buf;
 
 void inner (int x)
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-6.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-6.c
index 84a6318ed3ca..d7319129070e 100644
--- a/gcc/testsuite/gcc.dg/analyzer/setjmp-6.c
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-6.c
@@ -2,7 +2,7 @@ 
 #include <stddef.h>
 #include <stdlib.h>
 
-extern void foo (int);
+extern int foo (int) __attribute__ ((__pure__));
 
 static jmp_buf env;
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-7.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-7.c
index ee4183dfb2a7..3a14534434d8 100644
--- a/gcc/testsuite/gcc.dg/analyzer/setjmp-7.c
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-7.c
@@ -2,7 +2,7 @@ 
 #include <stddef.h>
 #include <stdlib.h>
 
-extern void foo (int);
+extern int foo (int) __attribute__ ((__pure__));
 
 static jmp_buf env;
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-7a.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-7a.c
index 6f99df5126e8..a462f62649fc 100644
--- a/gcc/testsuite/gcc.dg/analyzer/setjmp-7a.c
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-7a.c
@@ -3,7 +3,7 @@ 
 #include <setjmp.h>
 #include <stdlib.h>
 
-extern void foo (int);
+extern int foo (int) __attribute__ ((__pure__));
 
 static jmp_buf env;
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-8.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-8.c
index ad3ef8fe991b..41f00a763395 100644
--- a/gcc/testsuite/gcc.dg/analyzer/setjmp-8.c
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-8.c
@@ -3,7 +3,7 @@ 
 #include <setjmp.h>
 #include <stddef.h>
 
-extern void foo (int);
+extern int foo (int) __attribute__ ((__pure__));
 
 static jmp_buf env;
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/setjmp-9.c b/gcc/testsuite/gcc.dg/analyzer/setjmp-9.c
index 081ae0479c60..b442f6355ae3 100644
--- a/gcc/testsuite/gcc.dg/analyzer/setjmp-9.c
+++ b/gcc/testsuite/gcc.dg/analyzer/setjmp-9.c
@@ -3,7 +3,7 @@ 
 #include <setjmp.h>
 #include <stddef.h>
 
-extern void foo (int);
+extern int foo (int) __attribute__ ((__pure__));
 
 static jmp_buf env;
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/unknown-fns.c b/gcc/testsuite/gcc.dg/analyzer/unknown-fns.c
new file mode 100644
index 000000000000..76cb68eaa569
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/unknown-fns.c
@@ -0,0 +1,115 @@ 
+/* Verify that the analyzer correctly purges state when it sees a call to
+   an unknown function.  */
+
+#include <stdlib.h>
+
+/* Verify fix for false-positive when checking for CVE-2005-1689.  */
+
+typedef struct _krb5_data {
+  char *data;
+} krb5_data;
+
+extern void krb5_read_message(krb5_data *buf);
+
+void
+test_1 (krb5_data inbuf)
+{
+  free(inbuf.data);
+  krb5_read_message(&inbuf); 
+  free(inbuf.data); /* { dg-bogus "double-'free'" } */
+}
+
+/* Verify that __pure__ functions are treated as not having side-effects.  */
+
+extern int called_by_test_1a (void *)
+  __attribute__ ((__pure__));
+void test_1a (krb5_data inbuf)
+{
+  free (inbuf.data);
+  called_by_test_1a (&inbuf);
+  free (inbuf.data); /* { dg-warning "double-'free'" } */
+}
+
+/* Verify that global pointers can be affected by an unknown function.  */
+
+void *global_ptr;
+extern void unknown_side_effects (void);
+
+void test_2 (void)
+{
+  free (global_ptr);
+  unknown_side_effects ();
+  free (global_ptr);
+}
+
+extern void called_by_test_3 (void *);
+
+void test_3a (void)
+{
+  void *ptr = malloc (1024);
+  called_by_test_3 (ptr);
+}  /* { dg-bogus "leak" } */
+
+void test_3b (void)
+{
+  krb5_data k;
+  k.data = malloc (1024);
+  called_by_test_3 (&k);
+} /* { dg-bogus "leak" } */
+
+/* Verify that we traverse the graph of regions that are reachable from
+   the call.  */
+
+struct foo
+{
+  struct foo *next;
+  int *ptr;
+};
+
+/* First, without a call to an unknown function.  */
+
+void test_4a (void)
+{
+  struct foo node_a;
+  struct foo node_b;
+  node_a.next = &node_b;
+  node_b.ptr = malloc (sizeof (int));
+  global_ptr = &node_a;
+  *node_b.ptr = 42; /* { dg-warning "possibly-NULL" } */
+  /* { dg-warning "leak" "" { target *-*-* } .-1 } */
+  /* FIXME: the above leak report is correct, but is reported at the wrong
+     location.  */
+} /* { dg-warning "leak" } */
+
+/* With a call to an unknown function.  */
+
+void test_4b (void)
+{
+  struct foo node_a;
+  struct foo node_b;
+  node_a.next = &node_b;
+  node_b.ptr = malloc (sizeof (int));
+  global_ptr = &node_a;
+  unknown_side_effects (); /* everything potentially visible through global_ptr.  */
+  *node_b.ptr = 42; /* { dg-bogus "possibly-NULL" } */
+} /* { dg-bogus "leak" } */
+
+extern void called_by_test_5 (const char *);
+void test_5 (void)
+{
+  called_by_test_5 ("???");
+}
+
+extern void called_by_test_6 (const struct foo *);
+void test_6 (void)
+{
+  struct foo node;
+  node.next = NULL;
+  node.ptr = malloc (sizeof (int));
+
+  /* This is a const ptr, but struct foo's ptr is non-const,
+     so we ought to assume it could be written to.  */
+  called_by_test_6 (&node);
+} /* { dg-bogus "leak" } */
+
+/* TODO: things reachable from "outside" i.e. by params to caller to entrypoint.  */