[15/49] Add ordered_hash_map

Message ID 1573867416-55618-16-git-send-email-dmalcolm@redhat.com
State Superseded
Headers show
Series
  • RFC: Add a static analysis framework to GCC
Related show

Commit Message

David Malcolm Nov. 16, 2019, 1:23 a.m.
This patch adds an ordered_hash_map template, which is similar to
hash_map, but preserves insertion order.

gcc/ChangeLog:
	* Makefile.in (OBJS): Add ordered-hash-map-tests.o.
	* ordered-hash-map-tests.cc: New file.
	* ordered-hash-map.h: New file.
	* selftest-run-tests.c (selftest::run_tests): Call
	selftest::ordered_hash_map_tests_cc_tests.
	* selftest.h (selftest::ordered_hash_map_tests_cc_tests): New
	decl.
---
 gcc/Makefile.in               |   1 +
 gcc/ordered-hash-map-tests.cc | 247 ++++++++++++++++++++++++++++++++++++++++++
 gcc/ordered-hash-map.h        | 184 +++++++++++++++++++++++++++++++
 gcc/selftest-run-tests.c      |   1 +
 gcc/selftest.h                |   1 +
 5 files changed, 434 insertions(+)
 create mode 100644 gcc/ordered-hash-map-tests.cc
 create mode 100644 gcc/ordered-hash-map.h

-- 
1.8.5.3

Comments

Martin Sebor Dec. 4, 2019, 5:59 p.m. | #1
On 11/15/19 6:23 PM, David Malcolm wrote:
> This patch adds an ordered_hash_map template, which is similar to

> hash_map, but preserves insertion order.

> 

> gcc/ChangeLog:

> 	* Makefile.in (OBJS): Add ordered-hash-map-tests.o.

> 	* ordered-hash-map-tests.cc: New file.

> 	* ordered-hash-map.h: New file.

> 	* selftest-run-tests.c (selftest::run_tests): Call

> 	selftest::ordered_hash_map_tests_cc_tests.

> 	* selftest.h (selftest::ordered_hash_map_tests_cc_tests): New

> 	decl.

> ---

>   gcc/Makefile.in               |   1 +

>   gcc/ordered-hash-map-tests.cc | 247 ++++++++++++++++++++++++++++++++++++++++++

>   gcc/ordered-hash-map.h        | 184 +++++++++++++++++++++++++++++++

>   gcc/selftest-run-tests.c      |   1 +

>   gcc/selftest.h                |   1 +

>   5 files changed, 434 insertions(+)

>   create mode 100644 gcc/ordered-hash-map-tests.cc

>   create mode 100644 gcc/ordered-hash-map.h

> 

> diff --git a/gcc/Makefile.in b/gcc/Makefile.in

> index c87b00f..5feef6a 100644

> --- a/gcc/Makefile.in

> +++ b/gcc/Makefile.in

> @@ -1443,6 +1443,7 @@ OBJS = \

>   	optinfo-emit-json.o \

>   	options-save.o \

>   	opts-global.o \

> +	ordered-hash-map-tests.o \

>   	passes.o \

>   	plugin.o \

>   	postreload-gcse.o \

> diff --git a/gcc/ordered-hash-map-tests.cc b/gcc/ordered-hash-map-tests.cc

> new file mode 100644

> index 0000000..266f60c

> --- /dev/null

> +++ b/gcc/ordered-hash-map-tests.cc

> @@ -0,0 +1,247 @@

> +/* Unit tests for ordered-hash-map.h.

> +   Copyright (C) 2015-2019 Free Software Foundation, Inc.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it under

> +the terms of the GNU General Public License as published by the Free

> +Software Foundation; either version 3, or (at your option) any later

> +version.

> +

> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY

> +WARRANTY; without even the implied warranty of MERCHANTABILITY or

> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

> +for more details.

> +

> +You should have received a copy of the GNU General Public License

> +along with GCC; see the file COPYING3.  If not see

> +<http://www.gnu.org/licenses/>.  */

> +

> +#include "config.h"

> +#include "system.h"

> +#include "coretypes.h"

> +#include "tm.h"

> +#include "opts.h"

> +#include "hash-set.h"

> +#include "fixed-value.h"

> +#include "alias.h"

> +#include "flags.h"

> +#include "symtab.h"

> +#include "tree-core.h"

> +#include "stor-layout.h"

> +#include "tree.h"

> +#include "stringpool.h"

> +#include "ordered-hash-map.h"

> +#include "selftest.h"

> +

> +#if CHECKING_P

> +

> +namespace selftest {

> +

> +/* Populate *OUT_KVS with the key/value pairs of M.  */

> +

> +template <typename HashMap, typename Key, typename Value>

> +static void

> +get_kv_pairs (const HashMap &m,

> +	      auto_vec<std::pair<Key, Value> > *out_kvs)

> +{

> +  for (typename HashMap::iterator iter = m.begin ();

> +       iter != m.end ();

> +       ++iter)

> +    out_kvs->safe_push (std::make_pair ((*iter).first, (*iter).second));

> +}

> +

> +/* Construct an ordered_hash_map <const char *, int> and verify that

> +   various operations work correctly.  */

> +

> +static void

> +test_map_of_strings_to_int ()

> +{

> +  ordered_hash_map <const char *, int> m;

> +

> +  const char *ostrich = "ostrich";

> +  const char *elephant = "elephant";

> +  const char *ant = "ant";

> +  const char *spider = "spider";

> +  const char *millipede = "Illacme plenipes";

> +  const char *eric = "half a bee";

> +

> +  /* A fresh hash_map should be empty.  */

> +  ASSERT_EQ (0, m.elements ());

> +  ASSERT_EQ (NULL, m.get (ostrich));

> +

> +  /* Populate the hash_map.  */

> +  ASSERT_EQ (false, m.put (ostrich, 2));

> +  ASSERT_EQ (false, m.put (elephant, 4));

> +  ASSERT_EQ (false, m.put (ant, 6));

> +  ASSERT_EQ (false, m.put (spider, 8));

> +  ASSERT_EQ (false, m.put (millipede, 750));

> +  ASSERT_EQ (false, m.put (eric, 3));

> +

> +  /* Verify that we can recover the stored values.  */

> +  ASSERT_EQ (6, m.elements ());

> +  ASSERT_EQ (2, *m.get (ostrich));

> +  ASSERT_EQ (4, *m.get (elephant));

> +  ASSERT_EQ (6, *m.get (ant));

> +  ASSERT_EQ (8, *m.get (spider));

> +  ASSERT_EQ (750, *m.get (millipede));

> +  ASSERT_EQ (3, *m.get (eric));

> +

> +  /* Verify that the order of insertion is preserved.  */

> +  auto_vec<std::pair<const char *, int> > kvs;

> +  get_kv_pairs (m, &kvs);

> +  ASSERT_EQ (kvs.length (), 6);

> +  ASSERT_EQ (kvs[0].first, ostrich);

> +  ASSERT_EQ (kvs[0].second, 2);

> +  ASSERT_EQ (kvs[1].first, elephant);

> +  ASSERT_EQ (kvs[1].second, 4);

> +  ASSERT_EQ (kvs[2].first, ant);

> +  ASSERT_EQ (kvs[2].second, 6);

> +  ASSERT_EQ (kvs[3].first, spider);

> +  ASSERT_EQ (kvs[3].second, 8);

> +  ASSERT_EQ (kvs[4].first, millipede);

> +  ASSERT_EQ (kvs[4].second, 750);

> +  ASSERT_EQ (kvs[5].first, eric);

> +  ASSERT_EQ (kvs[5].second, 3);

> +}

> +

> +/* Construct an ordered_hash_map using int_hash and verify that various

> +   operations work correctly.  */

> +

> +static void

> +test_map_of_int_to_strings ()

> +{

> +  const int EMPTY = -1;

> +  const int DELETED = -2;

> +  typedef int_hash <int, EMPTY, DELETED> int_hash_t;

> +  ordered_hash_map <int_hash_t, const char *> m;

> +

> +  const char *ostrich = "ostrich";

> +  const char *elephant = "elephant";

> +  const char *ant = "ant";

> +  const char *spider = "spider";

> +  const char *millipede = "Illacme plenipes";

> +  const char *eric = "half a bee";

> +

> +  /* A fresh hash_map should be empty.  */

> +  ASSERT_EQ (0, m.elements ());

> +  ASSERT_EQ (NULL, m.get (2));

> +

> +  /* Populate the hash_map.  */

> +  ASSERT_EQ (false, m.put (2, ostrich));

> +  ASSERT_EQ (false, m.put (4, elephant));

> +  ASSERT_EQ (false, m.put (6, ant));

> +  ASSERT_EQ (false, m.put (8, spider));

> +  ASSERT_EQ (false, m.put (750, millipede));

> +  ASSERT_EQ (false, m.put (3, eric));

> +

> +  /* Verify that we can recover the stored values.  */

> +  ASSERT_EQ (6, m.elements ());

> +  ASSERT_EQ (*m.get (2), ostrich);

> +  ASSERT_EQ (*m.get (4), elephant);

> +  ASSERT_EQ (*m.get (6), ant);

> +  ASSERT_EQ (*m.get (8), spider);

> +  ASSERT_EQ (*m.get (750), millipede);

> +  ASSERT_EQ (*m.get (3), eric);

> +

> +  /* Verify that the order of insertion is preserved.  */

> +  auto_vec<std::pair<int, const char *> > kvs;

> +  get_kv_pairs (m, &kvs);

> +  ASSERT_EQ (kvs.length (), 6);

> +  ASSERT_EQ (kvs[0].first, 2);

> +  ASSERT_EQ (kvs[0].second, ostrich);

> +  ASSERT_EQ (kvs[1].first, 4);

> +  ASSERT_EQ (kvs[1].second, elephant);

> +  ASSERT_EQ (kvs[2].first, 6);

> +  ASSERT_EQ (kvs[2].second, ant);

> +  ASSERT_EQ (kvs[3].first, 8);

> +  ASSERT_EQ (kvs[3].second, spider);

> +  ASSERT_EQ (kvs[4].first, 750);

> +  ASSERT_EQ (kvs[4].second, millipede);

> +  ASSERT_EQ (kvs[5].first, 3);

> +  ASSERT_EQ (kvs[5].second, eric);

> +}

> +

> +/* Verify that we can remove items from an ordered_hash_map.  */

> +

> +static void

> +test_removal ()

> +{

> +  ordered_hash_map <const char *, int> m;

> +

> +  const char *ostrich = "ostrich";

> +  ASSERT_EQ (false, m.put (ostrich, 2));

> +

> +  ASSERT_EQ (1, m.elements ());

> +  ASSERT_EQ (2, *m.get (ostrich));

> +

> +  {

> +    auto_vec<std::pair<const char *, int> > kvs;

> +    get_kv_pairs (m, &kvs);

> +    ASSERT_EQ (kvs.length (), 1);

> +    ASSERT_EQ (kvs[0].first, ostrich);

> +    ASSERT_EQ (kvs[0].second, 2);

> +  }

> +

> +  m.remove (ostrich);

> +

> +  ASSERT_EQ (0, m.elements ());

> +  {

> +    auto_vec<std::pair<const char *, int> > kvs;

> +    get_kv_pairs (m, &kvs);

> +    ASSERT_EQ (kvs.length (), 0);

> +  }

> +

> +  /* Reinsertion (with a different value).  */

> +  ASSERT_EQ (false, m.put (ostrich, 42));

> +  ASSERT_EQ (1, m.elements ());

> +  ASSERT_EQ (42, *m.get (ostrich));

> +  {

> +    auto_vec<std::pair<const char *, int> > kvs;

> +    get_kv_pairs (m, &kvs);

> +    ASSERT_EQ (kvs.length (), 1);

> +    ASSERT_EQ (kvs[0].first, ostrich);

> +    ASSERT_EQ (kvs[0].second, 42);

> +  }

> +}

> +

> +/* Verify that ordered_hash_map's copy-ctor works.  */

> +

> +static void

> +test_copy_ctor ()

> +{

> +  ordered_hash_map <const char *, int> m;

> +

> +  const char *ostrich = "ostrich";

> +  ASSERT_EQ (false, m.put (ostrich, 2));

> +

> +  ASSERT_EQ (1, m.elements ());

> +  ASSERT_EQ (2, *m.get (ostrich));

> +

> +  ordered_hash_map <const char *, int> copy (m);

> +  ASSERT_EQ (1, copy.elements ());

> +  ASSERT_EQ (2, *copy.get (ostrich));

> +

> +  /* Remove from source.  */

> +  m.remove (ostrich);

> +  ASSERT_EQ (0, m.elements ());

> +

> +  /* Copy should be unaffected.  */

> +  ASSERT_EQ (1, copy.elements ());

> +  ASSERT_EQ (2, *copy.get (ostrich));

> +}

> +

> +/* Run all of the selftests within this file.  */

> +

> +void

> +ordered_hash_map_tests_cc_tests ()

> +{

> +  test_map_of_strings_to_int ();

> +  test_map_of_int_to_strings ();

> +  test_removal ();

> +  test_copy_ctor ();

> +}

> +

> +} // namespace selftest

> +

> +#endif /* CHECKING_P */

> diff --git a/gcc/ordered-hash-map.h b/gcc/ordered-hash-map.h

> new file mode 100644

> index 0000000..4e35cad

> --- /dev/null

> +++ b/gcc/ordered-hash-map.h

> @@ -0,0 +1,184 @@

> +/* A type-safe hash map that retains the insertion order of keys.

> +   Copyright (C) 2019 Free Software Foundation, Inc.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it under

> +the terms of the GNU General Public License as published by the Free

> +Software Foundation; either version 3, or (at your option) any later

> +version.

> +

> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY

> +WARRANTY; without even the implied warranty of MERCHANTABILITY or

> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

> +for more details.

> +

> +You should have received a copy of the GNU General Public License

> +along with GCC; see the file COPYING3.  If not see

> +<http://www.gnu.org/licenses/>.  */

> +

> +

> +#ifndef GCC_ORDERED_HASH_MAP_H

> +#define GCC_ORDERED_HASH_MAP_H

> +

> +/* Notes:

> +   - The keys must be PODs, since vec<> uses assignment to populate slots

> +     without properly initializing them.

> +   - doesn't have GTY support.

> +   - supports removal, but retains order of original insertion.

> +     (Removal might be better handled by using a doubly-linked list

> +     of nodes, holding the values).  */

> +

> +template<typename KeyId, typename Value,

> +	 typename Traits>

> +class ordered_hash_map

> +{

> +  typedef typename Traits::key_type Key;

> +

> +public:

> +  ordered_hash_map () {}

> +

> +  ordered_hash_map (const ordered_hash_map &other)


The container defines a copy-constructor but no copy assignment.
Is it safely assignable? (I don't think auto_vec is safely copyable
or assignable due to PR 90904.  It looks like the copy ctor works
around it below.)

I don't think I've made use of the hash_map copy ctor or copy
assignment but if it's anything like other GCC containers I'd
worry about it not doing the right thing, especially for non-
PODs.  I spent too much time chasing down miscompilations and
other problems due to bugs (or design limitations) in these
classes.

I'd far prefer to see us use libstdc++ containers in new code
than introduce new ones of our own.  They are better designed
and much better tested than these.  (I realize we're still
hampered by targeting C++ 98.)

Martin

> +  : m_map (other.m_map),

> +    m_keys (other.m_keys.length ()),

> +    m_key_index (other.m_key_index)

> +  {

> +     unsigned i;

> +     Key key;

> +     FOR_EACH_VEC_ELT (other.m_keys, i, key)

> +       m_keys.quick_push (key);

> +  }

> +

> +  /* If key K isn't already in the map add key K with value V to the map, and

> +     return false.  Otherwise set the value of the entry for key K to be V and

> +     return true.  */

> +

> +  bool put (const Key &k, const Value &v)

> +  {

> +    bool existed = m_map.put (k, v);

> +    if (!existed)

> +      {

> +        bool key_present;

> +        int &slot = m_key_index.get_or_insert (k, &key_present);

> +        if (!key_present)

> +          {

> +             slot = m_keys.length ();

> +             m_keys.safe_push (k);

> +          }

> +      }

> +    return existed;

> +  }

> +

> +  /* If the passed in key is in the map return its value otherwise NULL.  */

> +

> +  Value *get (const Key &k)

> +  {

> +    return m_map.get (k);

> +  }

> +

> +  /* Removing a key removes it from the map, but retains the insertion

> +     order.  */

> +

> +  void remove (const Key &k)

> +  {

> +     m_map.remove (k);

> +  }

> +

> +  size_t elements () const { return m_map.elements (); }

> +

> +  class iterator

> +  {

> +  public:

> +    explicit iterator (const ordered_hash_map &map, unsigned idx) :

> +      m_ordered_hash_map (map), m_idx (idx) {}

> +

> +    iterator &operator++ ()

> +    {

> +       /* Increment m_idx until we find a non-deleted element, or go beyond

> +	  the end.  */

> +       while (1)

> +	 {

> +	   ++m_idx;

> +	   if (valid_index_p ())

> +	     break;

> +	}

> +      return *this;

> +    }

> +

> +    /* Can't use std::pair here, because GCC before 4.3 don't handle

> +       std::pair where template parameters are references well.

> +       See PR86739.  */

> +    struct reference_pair {

> +      const Key &first;

> +      Value &second;

> +

> +      reference_pair (const Key &key, Value &value)

> +      : first (key), second (value) {}

> +

> +      template <typename K, typename V>

> +      operator std::pair<K, V> () const { return std::pair<K, V> (first, second); }

> +    };

> +

> +    reference_pair operator* ()

> +    {

> +      const Key &k = m_ordered_hash_map.m_keys[m_idx];

> +      Value *slot

> +        = const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k);

> +      gcc_assert (slot);

> +      return reference_pair (k, *slot);

> +    }

> +

> +    bool

> +    operator != (const iterator &other) const

> +    {

> +      return m_idx != other.m_idx;

> +    }

> +

> +    /* Treat one-beyond-the-end as valid, for handling the "end" case.  */

> +

> +    bool valid_index_p () const

> +    {

> +      if (m_idx > m_ordered_hash_map.m_keys.length ())

> +	return false;

> +      if (m_idx == m_ordered_hash_map.m_keys.length ())

> +	return true;

> +      const Key &k = m_ordered_hash_map.m_keys[m_idx];

> +      Value *slot

> +	= const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k);

> +      return slot != NULL;

> +    }

> +

> +    const ordered_hash_map &m_ordered_hash_map;

> +    unsigned m_idx;

> +  };

> +

> +  /* Standard iterator retrieval methods.  */

> +

> +  iterator begin () const

> +  {

> +    iterator i = iterator (*this, 0);

> +    while (!i.valid_index_p () && i != end ())

> +      ++i;

> +    return i;

> +  }

> +  iterator end () const { return iterator (*this, m_keys.length ()); }

> +

> +private:

> +  /* The underlying map.  */

> +  hash_map<KeyId, Value, Traits> m_map;

> +

> +  /* The ordering of the keys.  */

> +  auto_vec<Key> m_keys;

> +

> +  /* For each key that's ever been in the map, its index within m_keys.  */

> +  hash_map<KeyId, int> m_key_index;

> +};

> +

> +/* Two-argument form.  */

> +

> +template<typename Key, typename Value,

> +	 typename Traits = simple_hashmap_traits<default_hash_traits<Key>,

> +						 Value> >

> +class ordered_hash_map;

> +

> +#endif /* GCC_ORDERED_HASH_MAP_H */

> diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c

> index 92c8de7..f41244b 100644

> --- a/gcc/selftest-run-tests.c

> +++ b/gcc/selftest-run-tests.c

> @@ -76,6 +76,7 @@ selftest::run_tests ()

>     cgraph_c_tests ();

>     optinfo_emit_json_cc_tests ();

>     opt_problem_cc_tests ();

> +  ordered_hash_map_tests_cc_tests ();

>   

>     /* Mid-level data structures.  */

>     input_c_tests ();

> diff --git a/gcc/selftest.h b/gcc/selftest.h

> index c67b688..933d1cd 100644

> --- a/gcc/selftest.h

> +++ b/gcc/selftest.h

> @@ -242,6 +242,7 @@ extern void input_c_tests ();

>   extern void json_cc_tests ();

>   extern void opt_problem_cc_tests ();

>   extern void optinfo_emit_json_cc_tests ();

> +extern void ordered_hash_map_tests_cc_tests ();

>   extern void predict_c_tests ();

>   extern void pretty_print_c_tests ();

>   extern void read_rtl_function_c_tests ();

>
David Malcolm Jan. 8, 2020, 11:47 p.m. | #2
On Wed, 2019-12-04 at 10:59 -0700, Martin Sebor wrote:
> On 11/15/19 6:23 PM, David Malcolm wrote:

> > This patch adds an ordered_hash_map template, which is similar to

> > hash_map, but preserves insertion order.

> > 

> > gcc/ChangeLog:

> > 	* Makefile.in (OBJS): Add ordered-hash-map-tests.o.

> > 	* ordered-hash-map-tests.cc: New file.

> > 	* ordered-hash-map.h: New file.

> > 	* selftest-run-tests.c (selftest::run_tests): Call

> > 	selftest::ordered_hash_map_tests_cc_tests.

> > 	* selftest.h (selftest::ordered_hash_map_tests_cc_tests): New

> > 	decl.

> > ---

> >   gcc/Makefile.in               |   1 +

> >   gcc/ordered-hash-map-tests.cc | 247

> > ++++++++++++++++++++++++++++++++++++++++++

> >   gcc/ordered-hash-map.h        | 184

> > +++++++++++++++++++++++++++++++

> >   gcc/selftest-run-tests.c      |   1 +

> >   gcc/selftest.h                |   1 +

> >   5 files changed, 434 insertions(+)

> >   create mode 100644 gcc/ordered-hash-map-tests.cc

> >   create mode 100644 gcc/ordered-hash-map.h

> > 


[...]

> The container defines a copy-constructor but no copy assignment.

> Is it safely assignable? (I don't think auto_vec is safely copyable

> or assignable due to PR 90904.  It looks like the copy ctor works

> around it below.)


It's not safely assignable; I don't believe I'm using that (I am using
the copy ctor).

I can make it private or similar to ensure it's not used.

> I don't think I've made use of the hash_map copy ctor or copy

> assignment but if it's anything like other GCC containers I'd

> worry about it not doing the right thing, especially for non-

> PODs.  I spent too much time chasing down miscompilations and

> other problems due to bugs (or design limitations) in these

> classes.

> 

> I'd far prefer to see us use libstdc++ containers in new code

> than introduce new ones of our own.  They are better designed

> and much better tested than these.  (I realize we're still

> hampered by targeting C++ 98.)

> 

> Martin


[CCing Jonathan for libstdc++ expertise]

Is there such a container in libstdc++?

This patch implements a map that preserves insertion order when
iterating, without requiring the Key type to be comparable.

As far as I can tell, std::map instead is a map that requires the Key
type to be comparable, and uses that to implement a red-black tree
(rather than via hashing), and doesn't preserve insertion ordering.

I use this ordered_hash_map class later in various places in the
analyzer patch kit to ensure more deterministic results, so that
results aren't affected by hash values of possibly-changing pointer
values.

[...]

Dave
Jonathan Wakely Jan. 9, 2020, 11:56 a.m. | #3
On 08/01/20 18:47 -0500, David Malcolm wrote:
>On Wed, 2019-12-04 at 10:59 -0700, Martin Sebor wrote:

>> On 11/15/19 6:23 PM, David Malcolm wrote:

>> > This patch adds an ordered_hash_map template, which is similar to

>> > hash_map, but preserves insertion order.

>> >

>> > gcc/ChangeLog:

>> > 	* Makefile.in (OBJS): Add ordered-hash-map-tests.o.

>> > 	* ordered-hash-map-tests.cc: New file.

>> > 	* ordered-hash-map.h: New file.

>> > 	* selftest-run-tests.c (selftest::run_tests): Call

>> > 	selftest::ordered_hash_map_tests_cc_tests.

>> > 	* selftest.h (selftest::ordered_hash_map_tests_cc_tests): New

>> > 	decl.

>> > ---

>> >   gcc/Makefile.in               |   1 +

>> >   gcc/ordered-hash-map-tests.cc | 247

>> > ++++++++++++++++++++++++++++++++++++++++++

>> >   gcc/ordered-hash-map.h        | 184

>> > +++++++++++++++++++++++++++++++

>> >   gcc/selftest-run-tests.c      |   1 +

>> >   gcc/selftest.h                |   1 +

>> >   5 files changed, 434 insertions(+)

>> >   create mode 100644 gcc/ordered-hash-map-tests.cc

>> >   create mode 100644 gcc/ordered-hash-map.h

>> >

>

>[...]

>

>> The container defines a copy-constructor but no copy assignment.

>> Is it safely assignable? (I don't think auto_vec is safely copyable

>> or assignable due to PR 90904.  It looks like the copy ctor works

>> around it below.)

>

>It's not safely assignable; I don't believe I'm using that (I am using

>the copy ctor).

>

>I can make it private or similar to ensure it's not used.


Yes, if the compiler-generated assignment isn't safe, please declare
(but don't define) a private assignment operator, to suppress the
compiler-generated one.

>> I don't think I've made use of the hash_map copy ctor or copy

>> assignment but if it's anything like other GCC containers I'd

>> worry about it not doing the right thing, especially for non-

>> PODs.  I spent too much time chasing down miscompilations and

>> other problems due to bugs (or design limitations) in these

>> classes.

>>

>> I'd far prefer to see us use libstdc++ containers in new code

>> than introduce new ones of our own.  They are better designed

>> and much better tested than these.  (I realize we're still

>> hampered by targeting C++ 98.)

>>

>> Martin

>

>[CCing Jonathan for libstdc++ expertise]

>

>Is there such a container in libstdc++?



>This patch implements a map that preserves insertion order when

>iterating, without requiring the Key type to be comparable.

>

>As far as I can tell, std::map instead is a map that requires the Key

>type to be comparable,


Right.

>and uses that to implement a red-black tree

>(rather than via hashing),


Right.

>and doesn't preserve insertion ordering.


std::map has unique keys anyway, so insertion order is irrelevant.
Iteration is done by key order.

std::multimap is the non-unique version, and preserves insertion order
for equivalent keys, but only since C++11. Before that it was
unspecified whether or not a duplicate key was inserted after existing
elements with equivalent keys.

>I use this ordered_hash_map class later in various places in the

>analyzer patch kit to ensure more deterministic results, so that

>results aren't affected by hash values of possibly-changing pointer

>values.


Nothing in the standard library provides that functionality out of the
box. The C++11 hash containers don't preserve order at all, even
equivalent elements can be reordered when insertion triggers
rehashing.

You could build something using a sequence container (vector, deque or
list) that preserves a stable order according to insertion order, and
then manually maintain a separate index of hash values used for
lookup. That might not make things any simpler (I haven't looked at
your actual patch).

Patch

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index c87b00f..5feef6a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1443,6 +1443,7 @@  OBJS = \
 	optinfo-emit-json.o \
 	options-save.o \
 	opts-global.o \
+	ordered-hash-map-tests.o \
 	passes.o \
 	plugin.o \
 	postreload-gcse.o \
diff --git a/gcc/ordered-hash-map-tests.cc b/gcc/ordered-hash-map-tests.cc
new file mode 100644
index 0000000..266f60c
--- /dev/null
+++ b/gcc/ordered-hash-map-tests.cc
@@ -0,0 +1,247 @@ 
+/* Unit tests for ordered-hash-map.h.
+   Copyright (C) 2015-2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "opts.h"
+#include "hash-set.h"
+#include "fixed-value.h"
+#include "alias.h"
+#include "flags.h"
+#include "symtab.h"
+#include "tree-core.h"
+#include "stor-layout.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "ordered-hash-map.h"
+#include "selftest.h"
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Populate *OUT_KVS with the key/value pairs of M.  */
+
+template <typename HashMap, typename Key, typename Value>
+static void
+get_kv_pairs (const HashMap &m,
+	      auto_vec<std::pair<Key, Value> > *out_kvs)
+{
+  for (typename HashMap::iterator iter = m.begin ();
+       iter != m.end ();
+       ++iter)
+    out_kvs->safe_push (std::make_pair ((*iter).first, (*iter).second));
+}
+
+/* Construct an ordered_hash_map <const char *, int> and verify that
+   various operations work correctly.  */
+
+static void
+test_map_of_strings_to_int ()
+{
+  ordered_hash_map <const char *, int> m;
+
+  const char *ostrich = "ostrich";
+  const char *elephant = "elephant";
+  const char *ant = "ant";
+  const char *spider = "spider";
+  const char *millipede = "Illacme plenipes";
+  const char *eric = "half a bee";
+
+  /* A fresh hash_map should be empty.  */
+  ASSERT_EQ (0, m.elements ());
+  ASSERT_EQ (NULL, m.get (ostrich));
+
+  /* Populate the hash_map.  */
+  ASSERT_EQ (false, m.put (ostrich, 2));
+  ASSERT_EQ (false, m.put (elephant, 4));
+  ASSERT_EQ (false, m.put (ant, 6));
+  ASSERT_EQ (false, m.put (spider, 8));
+  ASSERT_EQ (false, m.put (millipede, 750));
+  ASSERT_EQ (false, m.put (eric, 3));
+
+  /* Verify that we can recover the stored values.  */
+  ASSERT_EQ (6, m.elements ());
+  ASSERT_EQ (2, *m.get (ostrich));
+  ASSERT_EQ (4, *m.get (elephant));
+  ASSERT_EQ (6, *m.get (ant));
+  ASSERT_EQ (8, *m.get (spider));
+  ASSERT_EQ (750, *m.get (millipede));
+  ASSERT_EQ (3, *m.get (eric));
+
+  /* Verify that the order of insertion is preserved.  */
+  auto_vec<std::pair<const char *, int> > kvs;
+  get_kv_pairs (m, &kvs);
+  ASSERT_EQ (kvs.length (), 6);
+  ASSERT_EQ (kvs[0].first, ostrich);
+  ASSERT_EQ (kvs[0].second, 2);
+  ASSERT_EQ (kvs[1].first, elephant);
+  ASSERT_EQ (kvs[1].second, 4);
+  ASSERT_EQ (kvs[2].first, ant);
+  ASSERT_EQ (kvs[2].second, 6);
+  ASSERT_EQ (kvs[3].first, spider);
+  ASSERT_EQ (kvs[3].second, 8);
+  ASSERT_EQ (kvs[4].first, millipede);
+  ASSERT_EQ (kvs[4].second, 750);
+  ASSERT_EQ (kvs[5].first, eric);
+  ASSERT_EQ (kvs[5].second, 3);
+}
+
+/* Construct an ordered_hash_map using int_hash and verify that various
+   operations work correctly.  */
+
+static void
+test_map_of_int_to_strings ()
+{
+  const int EMPTY = -1;
+  const int DELETED = -2;
+  typedef int_hash <int, EMPTY, DELETED> int_hash_t;
+  ordered_hash_map <int_hash_t, const char *> m;
+
+  const char *ostrich = "ostrich";
+  const char *elephant = "elephant";
+  const char *ant = "ant";
+  const char *spider = "spider";
+  const char *millipede = "Illacme plenipes";
+  const char *eric = "half a bee";
+
+  /* A fresh hash_map should be empty.  */
+  ASSERT_EQ (0, m.elements ());
+  ASSERT_EQ (NULL, m.get (2));
+
+  /* Populate the hash_map.  */
+  ASSERT_EQ (false, m.put (2, ostrich));
+  ASSERT_EQ (false, m.put (4, elephant));
+  ASSERT_EQ (false, m.put (6, ant));
+  ASSERT_EQ (false, m.put (8, spider));
+  ASSERT_EQ (false, m.put (750, millipede));
+  ASSERT_EQ (false, m.put (3, eric));
+
+  /* Verify that we can recover the stored values.  */
+  ASSERT_EQ (6, m.elements ());
+  ASSERT_EQ (*m.get (2), ostrich);
+  ASSERT_EQ (*m.get (4), elephant);
+  ASSERT_EQ (*m.get (6), ant);
+  ASSERT_EQ (*m.get (8), spider);
+  ASSERT_EQ (*m.get (750), millipede);
+  ASSERT_EQ (*m.get (3), eric);
+
+  /* Verify that the order of insertion is preserved.  */
+  auto_vec<std::pair<int, const char *> > kvs;
+  get_kv_pairs (m, &kvs);
+  ASSERT_EQ (kvs.length (), 6);
+  ASSERT_EQ (kvs[0].first, 2);
+  ASSERT_EQ (kvs[0].second, ostrich);
+  ASSERT_EQ (kvs[1].first, 4);
+  ASSERT_EQ (kvs[1].second, elephant);
+  ASSERT_EQ (kvs[2].first, 6);
+  ASSERT_EQ (kvs[2].second, ant);
+  ASSERT_EQ (kvs[3].first, 8);
+  ASSERT_EQ (kvs[3].second, spider);
+  ASSERT_EQ (kvs[4].first, 750);
+  ASSERT_EQ (kvs[4].second, millipede);
+  ASSERT_EQ (kvs[5].first, 3);
+  ASSERT_EQ (kvs[5].second, eric);
+}
+
+/* Verify that we can remove items from an ordered_hash_map.  */
+
+static void
+test_removal ()
+{
+  ordered_hash_map <const char *, int> m;
+
+  const char *ostrich = "ostrich";
+  ASSERT_EQ (false, m.put (ostrich, 2));
+
+  ASSERT_EQ (1, m.elements ());
+  ASSERT_EQ (2, *m.get (ostrich));
+
+  {
+    auto_vec<std::pair<const char *, int> > kvs;
+    get_kv_pairs (m, &kvs);
+    ASSERT_EQ (kvs.length (), 1);
+    ASSERT_EQ (kvs[0].first, ostrich);
+    ASSERT_EQ (kvs[0].second, 2);
+  }
+
+  m.remove (ostrich);
+
+  ASSERT_EQ (0, m.elements ());
+  {
+    auto_vec<std::pair<const char *, int> > kvs;
+    get_kv_pairs (m, &kvs);
+    ASSERT_EQ (kvs.length (), 0);
+  }
+
+  /* Reinsertion (with a different value).  */
+  ASSERT_EQ (false, m.put (ostrich, 42));
+  ASSERT_EQ (1, m.elements ());
+  ASSERT_EQ (42, *m.get (ostrich));
+  {
+    auto_vec<std::pair<const char *, int> > kvs;
+    get_kv_pairs (m, &kvs);
+    ASSERT_EQ (kvs.length (), 1);
+    ASSERT_EQ (kvs[0].first, ostrich);
+    ASSERT_EQ (kvs[0].second, 42);
+  }
+}
+
+/* Verify that ordered_hash_map's copy-ctor works.  */
+
+static void
+test_copy_ctor ()
+{
+  ordered_hash_map <const char *, int> m;
+
+  const char *ostrich = "ostrich";
+  ASSERT_EQ (false, m.put (ostrich, 2));
+
+  ASSERT_EQ (1, m.elements ());
+  ASSERT_EQ (2, *m.get (ostrich));
+
+  ordered_hash_map <const char *, int> copy (m);
+  ASSERT_EQ (1, copy.elements ());
+  ASSERT_EQ (2, *copy.get (ostrich));
+
+  /* Remove from source.  */
+  m.remove (ostrich);
+  ASSERT_EQ (0, m.elements ());
+
+  /* Copy should be unaffected.  */
+  ASSERT_EQ (1, copy.elements ());
+  ASSERT_EQ (2, *copy.get (ostrich));
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+ordered_hash_map_tests_cc_tests ()
+{
+  test_map_of_strings_to_int ();
+  test_map_of_int_to_strings ();
+  test_removal ();
+  test_copy_ctor ();
+}
+
+} // namespace selftest
+
+#endif /* CHECKING_P */
diff --git a/gcc/ordered-hash-map.h b/gcc/ordered-hash-map.h
new file mode 100644
index 0000000..4e35cad
--- /dev/null
+++ b/gcc/ordered-hash-map.h
@@ -0,0 +1,184 @@ 
+/* A type-safe hash map that retains the insertion order of keys.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef GCC_ORDERED_HASH_MAP_H
+#define GCC_ORDERED_HASH_MAP_H
+
+/* Notes:
+   - The keys must be PODs, since vec<> uses assignment to populate slots
+     without properly initializing them.
+   - doesn't have GTY support.
+   - supports removal, but retains order of original insertion.
+     (Removal might be better handled by using a doubly-linked list
+     of nodes, holding the values).  */
+
+template<typename KeyId, typename Value,
+	 typename Traits>
+class ordered_hash_map
+{
+  typedef typename Traits::key_type Key;
+
+public:
+  ordered_hash_map () {}
+
+  ordered_hash_map (const ordered_hash_map &other)
+  : m_map (other.m_map),
+    m_keys (other.m_keys.length ()),
+    m_key_index (other.m_key_index)
+  {
+     unsigned i;
+     Key key;
+     FOR_EACH_VEC_ELT (other.m_keys, i, key)
+       m_keys.quick_push (key);
+  }
+
+  /* If key K isn't already in the map add key K with value V to the map, and
+     return false.  Otherwise set the value of the entry for key K to be V and
+     return true.  */
+
+  bool put (const Key &k, const Value &v)
+  {
+    bool existed = m_map.put (k, v);
+    if (!existed)
+      {
+        bool key_present;
+        int &slot = m_key_index.get_or_insert (k, &key_present);
+        if (!key_present)
+          {
+             slot = m_keys.length ();
+             m_keys.safe_push (k);
+          }
+      }
+    return existed;
+  }
+
+  /* If the passed in key is in the map return its value otherwise NULL.  */
+
+  Value *get (const Key &k)
+  {
+    return m_map.get (k);
+  }
+
+  /* Removing a key removes it from the map, but retains the insertion
+     order.  */
+
+  void remove (const Key &k)
+  {
+     m_map.remove (k);
+  }
+
+  size_t elements () const { return m_map.elements (); }
+
+  class iterator
+  {
+  public:
+    explicit iterator (const ordered_hash_map &map, unsigned idx) :
+      m_ordered_hash_map (map), m_idx (idx) {}
+
+    iterator &operator++ ()
+    {
+       /* Increment m_idx until we find a non-deleted element, or go beyond
+	  the end.  */
+       while (1)
+	 {
+	   ++m_idx;
+	   if (valid_index_p ())
+	     break;
+	}
+      return *this;
+    }
+
+    /* Can't use std::pair here, because GCC before 4.3 don't handle
+       std::pair where template parameters are references well.
+       See PR86739.  */
+    struct reference_pair {
+      const Key &first;
+      Value &second;
+
+      reference_pair (const Key &key, Value &value)
+      : first (key), second (value) {}
+
+      template <typename K, typename V>
+      operator std::pair<K, V> () const { return std::pair<K, V> (first, second); }
+    };
+
+    reference_pair operator* ()
+    {
+      const Key &k = m_ordered_hash_map.m_keys[m_idx];
+      Value *slot
+        = const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k);
+      gcc_assert (slot);
+      return reference_pair (k, *slot);
+    }
+
+    bool
+    operator != (const iterator &other) const
+    {
+      return m_idx != other.m_idx;
+    }
+
+    /* Treat one-beyond-the-end as valid, for handling the "end" case.  */
+
+    bool valid_index_p () const
+    {
+      if (m_idx > m_ordered_hash_map.m_keys.length ())
+	return false;
+      if (m_idx == m_ordered_hash_map.m_keys.length ())
+	return true;
+      const Key &k = m_ordered_hash_map.m_keys[m_idx];
+      Value *slot
+	= const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k);
+      return slot != NULL;
+    }
+
+    const ordered_hash_map &m_ordered_hash_map;
+    unsigned m_idx;
+  };
+
+  /* Standard iterator retrieval methods.  */
+
+  iterator begin () const
+  {
+    iterator i = iterator (*this, 0);
+    while (!i.valid_index_p () && i != end ())
+      ++i;
+    return i;
+  }
+  iterator end () const { return iterator (*this, m_keys.length ()); }
+
+private:
+  /* The underlying map.  */
+  hash_map<KeyId, Value, Traits> m_map;
+
+  /* The ordering of the keys.  */
+  auto_vec<Key> m_keys;
+
+  /* For each key that's ever been in the map, its index within m_keys.  */
+  hash_map<KeyId, int> m_key_index;
+};
+
+/* Two-argument form.  */
+
+template<typename Key, typename Value,
+	 typename Traits = simple_hashmap_traits<default_hash_traits<Key>,
+						 Value> >
+class ordered_hash_map;
+
+#endif /* GCC_ORDERED_HASH_MAP_H */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index 92c8de7..f41244b 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -76,6 +76,7 @@  selftest::run_tests ()
   cgraph_c_tests ();
   optinfo_emit_json_cc_tests ();
   opt_problem_cc_tests ();
+  ordered_hash_map_tests_cc_tests ();
 
   /* Mid-level data structures.  */
   input_c_tests ();
diff --git a/gcc/selftest.h b/gcc/selftest.h
index c67b688..933d1cd 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -242,6 +242,7 @@  extern void input_c_tests ();
 extern void json_cc_tests ();
 extern void opt_problem_cc_tests ();
 extern void optinfo_emit_json_cc_tests ();
+extern void ordered_hash_map_tests_cc_tests ();
 extern void predict_c_tests ();
 extern void pretty_print_c_tests ();
 extern void read_rtl_function_c_tests ();