[28/49] analyzer: new files: analyzer.{cc|h}

Message ID 1573867416-55618-29-git-send-email-dmalcolm@redhat.com
State New
Headers show
Series
  • RFC: Add a static analysis framework to GCC
Related show

Commit Message

David Malcolm Nov. 16, 2019, 1:23 a.m.
gcc/ChangeLog:
	* analyzer/analyzer.cc: New file.
	* analyzer/analyzer.h: New file.
---
 gcc/analyzer/analyzer.cc | 125 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/analyzer/analyzer.h  | 126 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 251 insertions(+)
 create mode 100644 gcc/analyzer/analyzer.cc
 create mode 100644 gcc/analyzer/analyzer.h

-- 
1.8.5.3

Comments

Eric Gallager Dec. 7, 2019, 3:38 a.m. | #1
On 11/15/19, David Malcolm <dmalcolm@redhat.com> wrote:
> gcc/ChangeLog:

> 	* analyzer/analyzer.cc: New file.

> 	* analyzer/analyzer.h: New file.

> ---

>  gcc/analyzer/analyzer.cc | 125

> ++++++++++++++++++++++++++++++++++++++++++++++

>  gcc/analyzer/analyzer.h  | 126

> +++++++++++++++++++++++++++++++++++++++++++++++

>  2 files changed, 251 insertions(+)

>  create mode 100644 gcc/analyzer/analyzer.cc

>  create mode 100644 gcc/analyzer/analyzer.h

>

> diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc

> new file mode 100644

> index 0000000..399925c

> --- /dev/null

> +++ b/gcc/analyzer/analyzer.cc

> @@ -0,0 +1,125 @@

> +/* Utility functions for the analyzer.

> +   Copyright (C) 2019 Free Software Foundation, Inc.

> +   Contributed by David Malcolm <dmalcolm@redhat.com>.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it

> +under the terms of the GNU General Public License as published by

> +the Free Software Foundation; either version 3, or (at your option)

> +any later version.

> +

> +GCC is distributed in the hope that it will be useful, but

> +WITHOUT ANY WARRANTY; without even the implied warranty of

> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +General Public License for more details.

> +

> +You should have received a copy of the GNU General Public License

> +along with GCC; see the file COPYING3.  If not see

> +<http://www.gnu.org/licenses/>.  */

> +

> +#include "config.h"

> +#include "gcc-plugin.h"

> +#include "system.h"

> +#include "coretypes.h"

> +#include "tree.h"

> +#include "gimple.h"

> +#include "diagnostic.h"

> +#include "intl.h"

> +#include "analyzer/analyzer.h"

> +

> +/* Helper function for checkers.  Is the CALL to the given function name?

> */

> +

> +bool

> +is_named_call_p (const gcall *call, const char *funcname)

> +{

> +  gcc_assert (funcname);

> +

> +  tree fndecl = gimple_call_fndecl (call);

> +  if (!fndecl)

> +    return false;

> +

> +  return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname);

> +}

> +

> +/* Helper function for checkers.  Is the CALL to the given function name,

> +   and with the given number of arguments?  */

> +

> +bool

> +is_named_call_p (const gcall *call, const char *funcname,

> +		 unsigned int num_args)

> +{

> +  gcc_assert (funcname);

> +

> +  if (!is_named_call_p (call, funcname))

> +    return false;

> +

> +  if (gimple_call_num_args (call) != num_args)

> +    return false;

> +

> +  return true;

> +}

> +

> +/* Return true if stmt is a setjmp call.  */

> +

> +bool

> +is_setjmp_call_p (const gimple *stmt)

> +{

> +  /* TODO: is there a less hacky way to check for "setjmp"?  */

> +  if (const gcall *call = dyn_cast <const gcall *> (stmt))

> +    if (is_named_call_p (call, "_setjmp", 1))

> +      return true;

> +

> +  return false;

> +}

> +

> +/* Return true if stmt is a longjmp call.  */

> +

> +bool

> +is_longjmp_call_p (const gcall *call)

> +{

> +  /* TODO: is there a less hacky way to check for "longjmp"?  */

> +  if (is_named_call_p (call, "longjmp", 2))

> +    return true;

> +

> +  return false;

> +}

> +

> +/* Generate a label_text instance by formatting FMT, using a

> +   temporary clone of the global_dc's printer (thus using its

> +   formatting callbacks).

> +

> +   Colorize if the global_dc supports colorization and CAN_COLORIZE is

> +   true.  */

> +

> +label_text

> +make_label_text (bool can_colorize, const char *fmt, ...)

> +{

> +  pretty_printer *pp = global_dc->printer->clone ();

> +  pp_clear_output_area (pp);

> +

> +  if (!can_colorize)

> +    pp_show_color (pp) = false;

> +

> +  text_info ti;

> +  rich_location rich_loc (line_table, UNKNOWN_LOCATION);

> +

> +  va_list ap;

> +

> +  va_start (ap, fmt);

> +

> +  ti.format_spec = _(fmt);

> +  ti.args_ptr = &ap;

> +  ti.err_no = 0;

> +  ti.x_data = NULL;

> +  ti.m_richloc = &rich_loc;

> +

> +  pp_format (pp, &ti);

> +  pp_output_formatted_text (pp);

> +

> +  va_end (ap);

> +

> +  label_text result = label_text::take (xstrdup (pp_formatted_text (pp)));

> +  delete pp;

> +  return result;

> +}

> diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h

> new file mode 100644

> index 0000000..ace8924

> --- /dev/null

> +++ b/gcc/analyzer/analyzer.h

> @@ -0,0 +1,126 @@

> +/* Utility functions for the analyzer.

> +   Copyright (C) 2019 Free Software Foundation, Inc.

> +   Contributed by David Malcolm <dmalcolm@redhat.com>.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it

> +under the terms of the GNU General Public License as published by

> +the Free Software Foundation; either version 3, or (at your option)

> +any later version.

> +

> +GCC is distributed in the hope that it will be useful, but

> +WITHOUT ANY WARRANTY; without even the implied warranty of

> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +General Public License for more details.

> +

> +You should have received a copy of the GNU General Public License

> +along with GCC; see the file COPYING3.  If not see

> +<http://www.gnu.org/licenses/>.  */

> +

> +#ifndef GCC_ANALYZER_ANALYZER_H

> +#define GCC_ANALYZER_ANALYZER_H

> +

> +/* Forward decls of common types, with indentation to show inheritance. */


I'm wondering about the "with indentation to show inheritance" part...
does that require tweaking any editor configuration files or adding
/*INDENT-OFF*/ comments or anything to prevent automatic formatting
tools from "fixing" the indentation to go back to the normal style of
having everything be aligned?

> +

> +class graphviz_out;

> +class supergraph;

> +class supernode;

> +class superedge;

> +  class cfg_superedge;

> +    class switch_cfg_superedge;

> +  class callgraph_superedge;

> +    class call_superedge;

> +    class return_superedge;

> +class svalue;

> +  class region_svalue;

> +  class constant_svalue;

> +  class poisoned_svalue;

> +  class unknown_svalue;

> +  class setjmp_svalue;

> +class region;

> +  class map_region;

> +  class symbolic_region;

> +class region_model;

> +class region_model_context;

> +  class impl_region_model_context;

> +class constraint_manager;

> +class equiv_class;

> +struct model_merger;

> +struct svalue_id_merger_mapping;

> +struct canonicalization;

> +class pending_diagnostic;

> +class checker_path;

> +class extrinsic_state;

> +class sm_state_map;

> +class stmt_finder;

> +class program_point;

> +class program_state;

> +class exploded_graph;

> +class exploded_node;

> +class exploded_edge;

> +class exploded_cluster;

> +class exploded_path;

> +class analysis_plan;

> +class state_purge_map;

> +class state_purge_per_ssa_name;

> +class state_change;

> +

> +////////////////////////////////////////////////////////////////////////////

> +

> +extern bool is_named_call_p (const gcall *call, const char *funcname);

> +extern bool is_named_call_p (const gcall *call, const char *funcname,

> +			     unsigned int num_args);

> +extern bool is_setjmp_call_p (const gimple *stmt);

> +extern bool is_longjmp_call_p (const gcall *call);

> +

> +extern void register_analyzer_pass ();

> +

> +extern label_text make_label_text (bool can_colorize, const char *fmt,

> ...);

> +

> +////////////////////////////////////////////////////////////////////////////

> +

> +/* An RAII-style class for pushing/popping cfun within a scope.

> +   Doing so ensures we get "In function " announcements

> +   from the diagnostics subsystem.  */

> +

> +class auto_cfun

> +{

> +public:

> +  auto_cfun (function *fun) { push_cfun (fun); }

> +  ~auto_cfun () { pop_cfun (); }

> +};

> +

> +////////////////////////////////////////////////////////////////////////////

> +

> +/* Begin suppressing -Wformat and -Wformat-extra-args.  */

> +

> +#define PUSH_IGNORE_WFORMAT \

> +  _Pragma("GCC diagnostic push") \

> +  _Pragma("GCC diagnostic ignored \"-Wformat\"") \

> +  _Pragma("GCC diagnostic ignored \"-Wformat-extra-args\"")

> +

> +/* Finish suppressing -Wformat and -Wformat-extra-args.  */

> +

> +#define POP_IGNORE_WFORMAT \

> +  _Pragma("GCC diagnostic pop")

> +

> +////////////////////////////////////////////////////////////////////////////

> +

> +/* A template for creating hash traits for a POD type.  */

> +

> +template <typename Type>

> +struct pod_hash_traits : typed_noop_remove<Type>

> +{

> +  typedef Type value_type;

> +  typedef Type compare_type;

> +  static inline hashval_t hash (value_type);

> +  static inline bool equal (const value_type &existing,

> +			    const value_type &candidate);

> +  static inline void mark_deleted (Type &);

> +  static inline void mark_empty (Type &);

> +  static inline bool is_deleted (Type);

> +  static inline bool is_empty (Type);

> +};

> +

> +#endif /* GCC_ANALYZER_ANALYZER_H */

> --

> 1.8.5.3

>

>
Jeff Law Dec. 7, 2019, 3:01 p.m. | #2
On Fri, 2019-11-15 at 20:23 -0500, David Malcolm wrote:
> gcc/ChangeLog:

> 	* analyzer/analyzer.cc: New file.

> 	* analyzer/analyzer.h: New file.

> ---

>  gcc/analyzer/analyzer.cc | 125

> ++++++++++++++++++++++++++++++++++++++++++++++

>  gcc/analyzer/analyzer.h  | 126

> +++++++++++++++++++++++++++++++++++++++++++++++

>  2 files changed, 251 insertions(+)

>  create mode 100644 gcc/analyzer/analyzer.cc

>  create mode 100644 gcc/analyzer/analyzer.h

> 

> diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc

> new file mode 100644

> index 0000000..399925c

> --- /dev/null

> +++ b/gcc/analyzer/analyzer.cc

> @@ -0,0 +1,125 @@

> +/* Utility functions for the analyzer.

> +   Copyright (C) 2019 Free Software Foundation, Inc.

> +   Contributed by David Malcolm <dmalcolm@redhat.com>.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it

> +under the terms of the GNU General Public License as published by

> +the Free Software Foundation; either version 3, or (at your option)

> +any later version.

> +

> +GCC is distributed in the hope that it will be useful, but

> +WITHOUT ANY WARRANTY; without even the implied warranty of

> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +General Public License for more details.

> +

> +You should have received a copy of the GNU General Public License

> +along with GCC; see the file COPYING3.  If not see

> +<http://www.gnu.org/licenses/>;.  */

> +

> +#include "config.h"

> +#include "gcc-plugin.h"

> +#include "system.h"

> +#include "coretypes.h"

> +#include "tree.h"

> +#include "gimple.h"

> +#include "diagnostic.h"

> +#include "intl.h"

> +#include "analyzer/analyzer.h"

> +

> +/* Helper function for checkers.  Is the CALL to the given function

> name?  */

> +

> +bool

> +is_named_call_p (const gcall *call, const char *funcname)

> +{

> +  gcc_assert (funcname);

> +

> +  tree fndecl = gimple_call_fndecl (call);

> +  if (!fndecl)

> +    return false;

> +

> +  return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)),

> funcname);

> +}

> +

> +/* Helper function for checkers.  Is the CALL to the given function

> name,

> +   and with the given number of arguments?  */

> +

> +bool

> +is_named_call_p (const gcall *call, const char *funcname,

> +		 unsigned int num_args)

> +{

> +  gcc_assert (funcname);

> +

> +  if (!is_named_call_p (call, funcname))

> +    return false;

> +

> +  if (gimple_call_num_args (call) != num_args)

> +    return false;

> +

> +  return true;

These seem generic enough to live elsewhere.

> +}

> +

> +/* Return true if stmt is a setjmp call.  */

> +

> +bool

> +is_setjmp_call_p (const gimple *stmt)

> +{

> +  /* TODO: is there a less hacky way to check for "setjmp"?  */

> +  if (const gcall *call = dyn_cast <const gcall *> (stmt))

> +    if (is_named_call_p (call, "_setjmp", 1))

> +      return true;

> +

> +  return false;

> +}

> +

> +/* Return true if stmt is a longjmp call.  */

> +

> +bool

> +is_longjmp_call_p (const gcall *call)

> +{

> +  /* TODO: is there a less hacky way to check for "longjmp"?  */

> +  if (is_named_call_p (call, "longjmp", 2))

> +    return true;

> +

> +  return false;

> +}

I thought we already had routines to detect these.  We certainly have
*code* to detect them.   If it's the former we really just want one
implementation (that hands the various permutations we've seen through
the years).  If it's the latter, then we probably want to use these
routines to simplify those implementations.


> +

> +/* Generate a label_text instance by formatting FMT, using a

> +   temporary clone of the global_dc's printer (thus using its

> +   formatting callbacks).

> +

> +   Colorize if the global_dc supports colorization and CAN_COLORIZE

> is

> +   true.  */

> +

> +label_text

> +make_label_text (bool can_colorize, const char *fmt, ...)

> +{

> +  pretty_printer *pp = global_dc->printer->clone ();

> +  pp_clear_output_area (pp);

> +

> +  if (!can_colorize)

> +    pp_show_color (pp) = false;

> +

> +  text_info ti;

> +  rich_location rich_loc (line_table, UNKNOWN_LOCATION);

> +

> +  va_list ap;

> +

> +  va_start (ap, fmt);

> +

> +  ti.format_spec = _(fmt);

> +  ti.args_ptr = &ap;

> +  ti.err_no = 0;

> +  ti.x_data = NULL;

> +  ti.m_richloc = &rich_loc;

> +

> +  pp_format (pp, &ti);

> +  pp_output_formatted_text (pp);

> +

> +  va_end (ap);

> +

> +  label_text result = label_text::take (xstrdup (pp_formatted_text

> (pp)));

> +  delete pp;

> +  return result;

> +}

Is this better in the pretty-printer infrastructure?  

Jeff
David Malcolm Dec. 9, 2019, 11:22 p.m. | #3
On Fri, 2019-12-06 at 22:38 -0500, Eric Gallager wrote:
> On 11/15/19, David Malcolm <dmalcolm@redhat.com> wrote:

[...]
> > diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h

> > new file mode 100644

> > index 0000000..ace8924

> > --- /dev/null

> > +++ b/gcc/analyzer/analyzer.h

> > @@ -0,0 +1,126 @@

> > +/* Utility functions for the analyzer.

> > +   Copyright (C) 2019 Free Software Foundation, Inc.

> > +   Contributed by David Malcolm <dmalcolm@redhat.com>.

> > +

> > +This file is part of GCC.

> > +

> > +GCC is free software; you can redistribute it and/or modify it

> > +under the terms of the GNU General Public License as published by

> > +the Free Software Foundation; either version 3, or (at your

> > option)

> > +any later version.

> > +

> > +GCC is distributed in the hope that it will be useful, but

> > +WITHOUT ANY WARRANTY; without even the implied warranty of

> > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> > +General Public License for more details.

> > +

> > +You should have received a copy of the GNU General Public License

> > +along with GCC; see the file COPYING3.  If not see

> > +<http://www.gnu.org/licenses/>;.  */

> > +

> > +#ifndef GCC_ANALYZER_ANALYZER_H

> > +#define GCC_ANALYZER_ANALYZER_H

> > +

> > +/* Forward decls of common types, with indentation to show

> > inheritance. */

> 

> I'm wondering about the "with indentation to show inheritance"

> part...

> does that require tweaking any editor configuration files or adding

> /*INDENT-OFF*/ comments or anything to prevent automatic formatting

> tools from "fixing" the indentation to go back to the normal style of

> having everything be aligned?


If we had some kind of automatic formatting then I guess it would, but
I don't think we have such a system in place.

[...]
Martin Sebor Dec. 10, 2019, 5:17 p.m. | #4
On 11/15/19 6:23 PM, David Malcolm wrote:
> gcc/ChangeLog:

> 	* analyzer/analyzer.cc: New file.

> 	* analyzer/analyzer.h: New file.


Here are a few more comments/suggestions as I'm slowly making my way
through the patch, partly for my own benefit.  Hopefully they're at
least moderately useful.

> ---

>   gcc/analyzer/analyzer.cc | 125 ++++++++++++++++++++++++++++++++++++++++++++++

>   gcc/analyzer/analyzer.h  | 126 +++++++++++++++++++++++++++++++++++++++++++++++

>   2 files changed, 251 insertions(+)

>   create mode 100644 gcc/analyzer/analyzer.cc

>   create mode 100644 gcc/analyzer/analyzer.h

> 

> diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc

> new file mode 100644

> index 0000000..399925c

> --- /dev/null

> +++ b/gcc/analyzer/analyzer.cc

> @@ -0,0 +1,125 @@

> +/* Utility functions for the analyzer.

> +   Copyright (C) 2019 Free Software Foundation, Inc.

> +   Contributed by David Malcolm <dmalcolm@redhat.com>.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it

> +under the terms of the GNU General Public License as published by

> +the Free Software Foundation; either version 3, or (at your option)

> +any later version.

> +

> +GCC is distributed in the hope that it will be useful, but

> +WITHOUT ANY WARRANTY; without even the implied warranty of

> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +General Public License for more details.

> +

> +You should have received a copy of the GNU General Public License

> +along with GCC; see the file COPYING3.  If not see

> +<http://www.gnu.org/licenses/>.  */

> +

> +#include "config.h"

> +#include "gcc-plugin.h"

> +#include "system.h"

> +#include "coretypes.h"

> +#include "tree.h"

> +#include "gimple.h"

> +#include "diagnostic.h"

> +#include "intl.h"

> +#include "analyzer/analyzer.h"

> +

> +/* Helper function for checkers.  Is the CALL to the given function name?  */

> +

> +bool

> +is_named_call_p (const gcall *call, const char *funcname)

> +{

> +  gcc_assert (funcname);

> +

> +  tree fndecl = gimple_call_fndecl (call);

> +  if (!fndecl)

> +    return false;

> +

> +  return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname);

> +}

> +

> +/* Helper function for checkers.  Is the CALL to the given function name,

> +   and with the given number of arguments?  */

> +

> +bool

> +is_named_call_p (const gcall *call, const char *funcname,

> +		 unsigned int num_args)

> +{

> +  gcc_assert (funcname);

> +

> +  if (!is_named_call_p (call, funcname))

> +    return false;

> +

> +  if (gimple_call_num_args (call) != num_args)

> +    return false;

> +

> +  return true;

> +}

> +

> +/* Return true if stmt is a setjmp call.  */

> +

> +bool

> +is_setjmp_call_p (const gimple *stmt)

> +{

> +  /* TODO: is there a less hacky way to check for "setjmp"?  */

> +  if (const gcall *call = dyn_cast <const gcall *> (stmt))

> +    if (is_named_call_p (call, "_setjmp", 1))

> +      return true;

> +

> +  return false;

> +}

> +

> +/* Return true if stmt is a longjmp call.  */

> +

> +bool

> +is_longjmp_call_p (const gcall *call)

> +{

> +  /* TODO: is there a less hacky way to check for "longjmp"?  */

> +  if (is_named_call_p (call, "longjmp", 2))

> +    return true;

> +

> +  return false;

> +}


I haven't actually needed these helper functions myself but they seem
quite generic and might make good additions to gimple.h.

> +

> +/* Generate a label_text instance by formatting FMT, using a

> +   temporary clone of the global_dc's printer (thus using its

> +   formatting callbacks).

> +

> +   Colorize if the global_dc supports colorization and CAN_COLORIZE is

> +   true.  */

> +

> +label_text

> +make_label_text (bool can_colorize, const char *fmt, ...)

> +{

> +  pretty_printer *pp = global_dc->printer->clone ();

> +  pp_clear_output_area (pp);

> +

> +  if (!can_colorize)

> +    pp_show_color (pp) = false;

> +

> +  text_info ti;

> +  rich_location rich_loc (line_table, UNKNOWN_LOCATION);

> +

> +  va_list ap;

> +

> +  va_start (ap, fmt);

> +

> +  ti.format_spec = _(fmt);

> +  ti.args_ptr = &ap;

> +  ti.err_no = 0;

> +  ti.x_data = NULL;

> +  ti.m_richloc = &rich_loc;

> +

> +  pp_format (pp, &ti);

> +  pp_output_formatted_text (pp);

> +

> +  va_end (ap);

> +

> +  label_text result = label_text::take (xstrdup (pp_formatted_text (pp)));

> +  delete pp;

> +  return result;

> +}

> diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h

> new file mode 100644

> index 0000000..ace8924

> --- /dev/null

> +++ b/gcc/analyzer/analyzer.h

> @@ -0,0 +1,126 @@

> +/* Utility functions for the analyzer.

> +   Copyright (C) 2019 Free Software Foundation, Inc.

> +   Contributed by David Malcolm <dmalcolm@redhat.com>.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it

> +under the terms of the GNU General Public License as published by

> +the Free Software Foundation; either version 3, or (at your option)

> +any later version.

> +

> +GCC is distributed in the hope that it will be useful, but

> +WITHOUT ANY WARRANTY; without even the implied warranty of

> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +General Public License for more details.

> +

> +You should have received a copy of the GNU General Public License

> +along with GCC; see the file COPYING3.  If not see

> +<http://www.gnu.org/licenses/>.  */

> +

> +#ifndef GCC_ANALYZER_ANALYZER_H

> +#define GCC_ANALYZER_ANALYZER_H

> +

> +/* Forward decls of common types, with indentation to show inheritance.  */

> +

> +class graphviz_out;

> +class supergraph;

> +class supernode;

> +class superedge;

> +  class cfg_superedge;

> +    class switch_cfg_superedge;

> +  class callgraph_superedge;

> +    class call_superedge;

> +    class return_superedge;

> +class svalue;

> +  class region_svalue;

> +  class constant_svalue;

> +  class poisoned_svalue;

> +  class unknown_svalue;

> +  class setjmp_svalue;

> +class region;

> +  class map_region;

> +  class symbolic_region;

> +class region_model;

> +class region_model_context;

> +  class impl_region_model_context;

> +class constraint_manager;

> +class equiv_class;

> +struct model_merger;

> +struct svalue_id_merger_mapping;

> +struct canonicalization;

> +class pending_diagnostic;

> +class checker_path;

> +class extrinsic_state;

> +class sm_state_map;

> +class stmt_finder;

> +class program_point;

> +class program_state;

> +class exploded_graph;

> +class exploded_node;

> +class exploded_edge;

> +class exploded_cluster;

> +class exploded_path;

> +class analysis_plan;

> +class state_purge_map;

> +class state_purge_per_ssa_name;

> +class state_change;

> +

> +////////////////////////////////////////////////////////////////////////////

> +

> +extern bool is_named_call_p (const gcall *call, const char *funcname);

> +extern bool is_named_call_p (const gcall *call, const char *funcname,

> +			     unsigned int num_args);

> +extern bool is_setjmp_call_p (const gimple *stmt);

> +extern bool is_longjmp_call_p (const gcall *call);


There are functions in GCC that match either is_xxx() and xxx_p() but
by in my experience (and now also by my count) the latter is far more
prevalent: 538 vs 3,172.  I count just 28 functions that combine
the two into is_xxx_p().

I think it would make sense to choose either of the two dominant forms
over the is_xxx_p() combination here.  (Ideally, there'd be just one
style to make it easy to remember.)

> +

> +extern void register_analyzer_pass ();

> +

> +extern label_text make_label_text (bool can_colorize, const char *fmt, ...);

> +

> +////////////////////////////////////////////////////////////////////////////

> +

> +/* An RAII-style class for pushing/popping cfun within a scope.

> +   Doing so ensures we get "In function " announcements

> +   from the diagnostics subsystem.  */

> +

> +class auto_cfun

> +{

> +public:

> +  auto_cfun (function *fun) { push_cfun (fun); }

> +  ~auto_cfun () { pop_cfun (); }

> +};


This also seems like a nice general-purpose utility that might be
worth adding someplace more central.  I thought I'd seen code like
this in the compiler already but all I can find now is a plenty of
calls to push_cfun and but just one to pop.

> +

> +////////////////////////////////////////////////////////////////////////////

> +

> +/* Begin suppressing -Wformat and -Wformat-extra-args.  */

> +

> +#define PUSH_IGNORE_WFORMAT \

> +  _Pragma("GCC diagnostic push") \

> +  _Pragma("GCC diagnostic ignored \"-Wformat\"") \

> +  _Pragma("GCC diagnostic ignored \"-Wformat-extra-args\"")

> +

> +/* Finish suppressing -Wformat and -Wformat-extra-args.  */

> +

> +#define POP_IGNORE_WFORMAT \

> +  _Pragma("GCC diagnostic pop")

> +

> +////////////////////////////////////////////////////////////////////////////

> +

> +/* A template for creating hash traits for a POD type.  */

> +

> +template <typename Type>

> +struct pod_hash_traits : typed_noop_remove<Type>


Would it make sense to add this to hash-traits.h?

Martin


> +{

> +  typedef Type value_type;

> +  typedef Type compare_type;

> +  static inline hashval_t hash (value_type);

> +  static inline bool equal (const value_type &existing,

> +			    const value_type &candidate);

> +  static inline void mark_deleted (Type &);

> +  static inline void mark_empty (Type &);

> +  static inline bool is_deleted (Type);

> +  static inline bool is_empty (Type);

> +};

> +

> +#endif /* GCC_ANALYZER_ANALYZER_H */

>
Eric Gallager Dec. 10, 2019, 7:59 p.m. | #5
On 12/9/19, David Malcolm <dmalcolm@redhat.com> wrote:
> On Fri, 2019-12-06 at 22:38 -0500, Eric Gallager wrote:

>> On 11/15/19, David Malcolm <dmalcolm@redhat.com> wrote:

> [...]

>> > diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h

>> > new file mode 100644

>> > index 0000000..ace8924

>> > --- /dev/null

>> > +++ b/gcc/analyzer/analyzer.h

>> > @@ -0,0 +1,126 @@

>> > +/* Utility functions for the analyzer.

>> > +   Copyright (C) 2019 Free Software Foundation, Inc.

>> > +   Contributed by David Malcolm <dmalcolm@redhat.com>.

>> > +

>> > +This file is part of GCC.

>> > +

>> > +GCC is free software; you can redistribute it and/or modify it

>> > +under the terms of the GNU General Public License as published by

>> > +the Free Software Foundation; either version 3, or (at your

>> > option)

>> > +any later version.

>> > +

>> > +GCC is distributed in the hope that it will be useful, but

>> > +WITHOUT ANY WARRANTY; without even the implied warranty of

>> > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

>> > +General Public License for more details.

>> > +

>> > +You should have received a copy of the GNU General Public License

>> > +along with GCC; see the file COPYING3.  If not see

>> > +<http://www.gnu.org/licenses/>;.  */

>> > +

>> > +#ifndef GCC_ANALYZER_ANALYZER_H

>> > +#define GCC_ANALYZER_ANALYZER_H

>> > +

>> > +/* Forward decls of common types, with indentation to show

>> > inheritance. */

>>

>> I'm wondering about the "with indentation to show inheritance"

>> part...

>> does that require tweaking any editor configuration files or adding

>> /*INDENT-OFF*/ comments or anything to prevent automatic formatting

>> tools from "fixing" the indentation to go back to the normal style of

>> having everything be aligned?

>

> If we had some kind of automatic formatting then I guess it would, but

> I don't think we have such a system in place.

>


Check the contrib directory; there's a clang-format file and a vimrc
file in there that provide automatic formatting; do `make vimrc` and
`make clang-format` from the top-level to use them. There's also the
check_GNU_style scripts, but those just check & don't actually
reformat, AFAIK...

> [...]

>

>
David Malcolm Dec. 11, 2019, 7:48 p.m. | #6
On Sat, 2019-12-07 at 08:01 -0700, Jeff Law wrote:
> On Fri, 2019-11-15 at 20:23 -0500, David Malcolm wrote:

[...]
> > diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc

> > new file mode 100644

> > index 0000000..399925c

> > --- /dev/null

> > +++ b/gcc/analyzer/analyzer.cc

[...]
> > +

> > +/* Helper function for checkers.  Is the CALL to the given

> > function

> > name?  */

> > +

> > +bool

> > +is_named_call_p (const gcall *call, const char *funcname)

> > +{

> > +  gcc_assert (funcname);

> > +

> > +  tree fndecl = gimple_call_fndecl (call);

> > +  if (!fndecl)

> > +    return false;

> > +

> > +  return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)),

> > funcname);

> > +}

> > +

> > +/* Helper function for checkers.  Is the CALL to the given

> > function

> > name,

> > +   and with the given number of arguments?  */

> > +

> > +bool

> > +is_named_call_p (const gcall *call, const char *funcname,

> > +		 unsigned int num_args)

> > +{

> > +  gcc_assert (funcname);

> > +

> > +  if (!is_named_call_p (call, funcname))

> > +    return false;

> > +

> > +  if (gimple_call_num_args (call) != num_args)

> > +    return false;

> > +

> > +  return true;

> These seem generic enough to live elsewhere.


There's a potential can of worms here: these functions are used by the
checkers to detect fndecls by name, so that checkers can associate
behavior with them.  Examples:

* sm-malloc.c recognizes "malloc" and "free" as being special

* sm-signal.c knows that fprintf is async-signal-unsafe (but currently
doesn't know about any other fns; it's a proof-of-concept)

* sm-file.c currently doesn't know anything about "__fsetlocking",
which is one of the reasons the analyzer currently doesn't detect the
leak in intl/localealias.c reported as PR 47170, as it assumes that the
FILE * might have been closed and stops tracking state for it.

etc.

So we're going to have a lot more "recognize this function by name"
just to finish the existing proof-of-concept checkers.


In a perfect world, perhaps we'd have attributes for all of this, and
the user's code and their system headers would have dutifully marked
the pertinent decls with attributes, and the use of is_named_call_p
could be thought of as a bug (or wart)...  but we don't live in that
perfect world, and a good static analyzer shouldn't need to rely on the
user marking their code.

There's also integration and chicken-and-egg issues with attributes
where if we rely e.g. on the user's libc headers having attributes for
the checker, then we need to coordinate with e.g. glibc to add the
attributes, and implement them, and then the checker doesn't work if
someone is using a different libc, etc, etc.

So I think we want a concept of "known functions" in the analyzer,
where the analyzer can have its own "on the side" model of APIs -
perhaps a mixture of baked-in (e.g. for malloc/free), perhaps from an
overridable config file, but I'm not quite sure what form it should
take.  Maybe even a pragma that lets us tag named functions with
attributes, or somesuch.

For now, however, I want to take the pragmatic approach, and use these
functions as needed (and to review this as we gain experience with the
analyzer).

So I think I prefer to keep them in the analyzer subdir (but I'm happy
to move them if you'd prefer that)

Does the above sound sane?


> > +}

> > +

> > +/* Return true if stmt is a setjmp call.  */

> > +

> > +bool

> > +is_setjmp_call_p (const gimple *stmt)

> > +{

> > +  /* TODO: is there a less hacky way to check for "setjmp"?  */

> > +  if (const gcall *call = dyn_cast <const gcall *> (stmt))

> > +    if (is_named_call_p (call, "_setjmp", 1))

> > +      return true;

> > +

> > +  return false;

> > +}

> > +

> > +/* Return true if stmt is a longjmp call.  */

> > +

> > +bool

> > +is_longjmp_call_p (const gcall *call)

> > +{

> > +  /* TODO: is there a less hacky way to check for "longjmp"?  */

> > +  if (is_named_call_p (call, "longjmp", 2))

> > +    return true;

> > +

> > +  return false;

> > +}

> I thought we already had routines to detect these.  We certainly have

> *code* to detect them.   If it's the former we really just want one

> implementation (that hands the various permutations we've seen

> through

> the years).  If it's the latter, then we probably want to use these

> routines to simplify those implementations.


We have several:

* calls.c has:
  * setjmp_call_p (const_tree fndecl)
  * special_function_p which does name matching on "setjmp",
"sigsetjmp", "savectx", "vfork", "getcontext" and sets
ECF_RETURNS_TWICE (dropping leading single and double underscores)
* except.c has "tree setjmp_fn;" used with #ifdef
DONT_USE_BUILTIN_SETJMP
* omp-low.c has: setjmp_or_longjmp_p (const_tree fndecl)

(and maybe more).

I reviewed where I'm using the two the patch proposed adding above:

* I use is_setjmp_call_p in diagnostic_manager::add_events_for_eedge
for PK_BEFORE_STMT on the gcall to capture recording the setjmp buf (so
that the event can be cross-referenced at the longjmp call), and in
engine.cc for identifying that initial "store" call

* I use is_longjmp_call_p in only one place, in engine.cc, for
detecting the fn and handling the non-standard control flow

As for the other functions identified in calls.c special_function_p:
* "sigsetjmp": looks like I can generalize my code to handle this
* "savectx": https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71876
suggests it's Solaris-specific.  I have no idea of the signature or semantics, so I'm inclined not to handle it in the analyzer.
* "vfork": appears to me to be an old performance hack from before COW
pages.  Semantics appear sufficiently different from setjmp/longjmp
that it would need its own special-casing (and isn't a priority for me)
* "getcontext"/"setcontext": have slightly different semantics to
setjmp/longjmp (e.g. no val is passed like for longjmp; they can fail
and set errno); again, would need their own special-casing, and again,
these are not a priority for me)

I think the requirements for the analyzer here differ from that of the
rest of the compiler: the analyzer needs to capture semantics of the
fns in terms of the effects on states and paths, which is rather
different to that needed by code generation.  For example the
analyzer's special-casing captures subtleties like "Passing 0 as the
val to longjmp leads to setjmp returning 1".

So I'm thinking that for the next iteration of the kit I'll drop these
two functions in favor of:

  setjmp_like_p
  longjmp_like_p

to detect setjmp/sigsetjmp  and longjmp/siglongjmp, specifically for
the analyzer, so that it can trigger the special-casing for these.

Does that sound reasonable?

> > +/* Generate a label_text instance by formatting FMT, using a

> > +   temporary clone of the global_dc's printer (thus using its

> > +   formatting callbacks).

> > +

> > +   Colorize if the global_dc supports colorization and

> > CAN_COLORIZE

> > is

> > +   true.  */

> > +

> > +label_text

> > +make_label_text (bool can_colorize, const char *fmt, ...)

> > +{

> > +  pretty_printer *pp = global_dc->printer->clone ();

> > +  pp_clear_output_area (pp);

> > +

> > +  if (!can_colorize)

> > +    pp_show_color (pp) = false;

> > +

> > +  text_info ti;

> > +  rich_location rich_loc (line_table, UNKNOWN_LOCATION);

> > +

> > +  va_list ap;

> > +

> > +  va_start (ap, fmt);

> > +

> > +  ti.format_spec = _(fmt);

> > +  ti.args_ptr = &ap;

> > +  ti.err_no = 0;

> > +  ti.x_data = NULL;

> > +  ti.m_richloc = &rich_loc;

> > +

> > +  pp_format (pp, &ti);

> > +  pp_output_formatted_text (pp);

> > +

> > +  va_end (ap);

> > +

> > +  label_text result = label_text::take (xstrdup (pp_formatted_text

> > (pp)));

> > +  delete pp;

> > +  return result;

> > +}

> Is this better in the pretty-printer infrastructure?  


This touches on another can of worms... it's rather clunky to be
cloning the printer and using it to generate a temporary string.  I've
been thinking that maybe diagnostic_event::get_desc should instead
print to a pp, which might avoid a lot of printer cloning, but I've
attempted that before, unsuccessfully.  I guess I'll try again; it
feels cleaner (I think it required cloning the printer if we're going
to use it from within a rich_location label, since otherwise the
printing of the description interferes with that of
diagnostic_show_locus, and we end up with garbled text).

Dave

Patch

diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc
new file mode 100644
index 0000000..399925c
--- /dev/null
+++ b/gcc/analyzer/analyzer.cc
@@ -0,0 +1,125 @@ 
+/* Utility functions for the analyzer.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "gimple.h"
+#include "diagnostic.h"
+#include "intl.h"
+#include "analyzer/analyzer.h"
+
+/* Helper function for checkers.  Is the CALL to the given function name?  */
+
+bool
+is_named_call_p (const gcall *call, const char *funcname)
+{
+  gcc_assert (funcname);
+
+  tree fndecl = gimple_call_fndecl (call);
+  if (!fndecl)
+    return false;
+
+  return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname);
+}
+
+/* Helper function for checkers.  Is the CALL to the given function name,
+   and with the given number of arguments?  */
+
+bool
+is_named_call_p (const gcall *call, const char *funcname,
+		 unsigned int num_args)
+{
+  gcc_assert (funcname);
+
+  if (!is_named_call_p (call, funcname))
+    return false;
+
+  if (gimple_call_num_args (call) != num_args)
+    return false;
+
+  return true;
+}
+
+/* Return true if stmt is a setjmp call.  */
+
+bool
+is_setjmp_call_p (const gimple *stmt)
+{
+  /* TODO: is there a less hacky way to check for "setjmp"?  */
+  if (const gcall *call = dyn_cast <const gcall *> (stmt))
+    if (is_named_call_p (call, "_setjmp", 1))
+      return true;
+
+  return false;
+}
+
+/* Return true if stmt is a longjmp call.  */
+
+bool
+is_longjmp_call_p (const gcall *call)
+{
+  /* TODO: is there a less hacky way to check for "longjmp"?  */
+  if (is_named_call_p (call, "longjmp", 2))
+    return true;
+
+  return false;
+}
+
+/* Generate a label_text instance by formatting FMT, using a
+   temporary clone of the global_dc's printer (thus using its
+   formatting callbacks).
+
+   Colorize if the global_dc supports colorization and CAN_COLORIZE is
+   true.  */
+
+label_text
+make_label_text (bool can_colorize, const char *fmt, ...)
+{
+  pretty_printer *pp = global_dc->printer->clone ();
+  pp_clear_output_area (pp);
+
+  if (!can_colorize)
+    pp_show_color (pp) = false;
+
+  text_info ti;
+  rich_location rich_loc (line_table, UNKNOWN_LOCATION);
+
+  va_list ap;
+
+  va_start (ap, fmt);
+
+  ti.format_spec = _(fmt);
+  ti.args_ptr = &ap;
+  ti.err_no = 0;
+  ti.x_data = NULL;
+  ti.m_richloc = &rich_loc;
+
+  pp_format (pp, &ti);
+  pp_output_formatted_text (pp);
+
+  va_end (ap);
+
+  label_text result = label_text::take (xstrdup (pp_formatted_text (pp)));
+  delete pp;
+  return result;
+}
diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
new file mode 100644
index 0000000..ace8924
--- /dev/null
+++ b/gcc/analyzer/analyzer.h
@@ -0,0 +1,126 @@ 
+/* Utility functions for the analyzer.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ANALYZER_ANALYZER_H
+#define GCC_ANALYZER_ANALYZER_H
+
+/* Forward decls of common types, with indentation to show inheritance.  */
+
+class graphviz_out;
+class supergraph;
+class supernode;
+class superedge;
+  class cfg_superedge;
+    class switch_cfg_superedge;
+  class callgraph_superedge;
+    class call_superedge;
+    class return_superedge;
+class svalue;
+  class region_svalue;
+  class constant_svalue;
+  class poisoned_svalue;
+  class unknown_svalue;
+  class setjmp_svalue;
+class region;
+  class map_region;
+  class symbolic_region;
+class region_model;
+class region_model_context;
+  class impl_region_model_context;
+class constraint_manager;
+class equiv_class;
+struct model_merger;
+struct svalue_id_merger_mapping;
+struct canonicalization;
+class pending_diagnostic;
+class checker_path;
+class extrinsic_state;
+class sm_state_map;
+class stmt_finder;
+class program_point;
+class program_state;
+class exploded_graph;
+class exploded_node;
+class exploded_edge;
+class exploded_cluster;
+class exploded_path;
+class analysis_plan;
+class state_purge_map;
+class state_purge_per_ssa_name;
+class state_change;
+
+////////////////////////////////////////////////////////////////////////////
+
+extern bool is_named_call_p (const gcall *call, const char *funcname);
+extern bool is_named_call_p (const gcall *call, const char *funcname,
+			     unsigned int num_args);
+extern bool is_setjmp_call_p (const gimple *stmt);
+extern bool is_longjmp_call_p (const gcall *call);
+
+extern void register_analyzer_pass ();
+
+extern label_text make_label_text (bool can_colorize, const char *fmt, ...);
+
+////////////////////////////////////////////////////////////////////////////
+
+/* An RAII-style class for pushing/popping cfun within a scope.
+   Doing so ensures we get "In function " announcements
+   from the diagnostics subsystem.  */
+
+class auto_cfun
+{
+public:
+  auto_cfun (function *fun) { push_cfun (fun); }
+  ~auto_cfun () { pop_cfun (); }
+};
+
+////////////////////////////////////////////////////////////////////////////
+
+/* Begin suppressing -Wformat and -Wformat-extra-args.  */
+
+#define PUSH_IGNORE_WFORMAT \
+  _Pragma("GCC diagnostic push") \
+  _Pragma("GCC diagnostic ignored \"-Wformat\"") \
+  _Pragma("GCC diagnostic ignored \"-Wformat-extra-args\"")
+
+/* Finish suppressing -Wformat and -Wformat-extra-args.  */
+
+#define POP_IGNORE_WFORMAT \
+  _Pragma("GCC diagnostic pop")
+
+////////////////////////////////////////////////////////////////////////////
+
+/* A template for creating hash traits for a POD type.  */
+
+template <typename Type>
+struct pod_hash_traits : typed_noop_remove<Type>
+{
+  typedef Type value_type;
+  typedef Type compare_type;
+  static inline hashval_t hash (value_type);
+  static inline bool equal (const value_type &existing,
+			    const value_type &candidate);
+  static inline void mark_deleted (Type &);
+  static inline void mark_empty (Type &);
+  static inline bool is_deleted (Type);
+  static inline bool is_empty (Type);
+};
+
+#endif /* GCC_ANALYZER_ANALYZER_H */