Skip to content
Snippets Groups Projects
Unverified Commit 78bf415a authored by Yorick Peterse's avatar Yorick Peterse
Browse files

Merge Table and HashMap together

This simplifies the HashMap implementation, and reduces the amount of
objects that need to be allocated.
parent fa2b866c
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -7,7 +7,7 @@ import std::operators::Equal
import std::process
import std::random
 
## The load factor of a Table before it should be resized.
## The load factor of a `HashMap` before it should be resized.
let LOAD_FACTOR = 0.75
 
## The default `Hasher` used for a `HashMap`.
Loading
Loading
@@ -134,19 +134,23 @@ object Pair!(K: Hash + Equal, V) {
}
}
 
## A raw HashMap-like structure that can be used for building high level data
## structures such as HashMap itself.
## An unordered hash map using linear probing and Robin Hood bucket stealing.
##
## The keys in a `HashMap` can be any object that implements the `Hash` and
## `Equal` traits. The values can be of any type. It's possible to store keys
## (or values) of different types but this will require the use of `Dynamic`.
##
## A Table uses linear probing for finding values and Robin Hood hashing.
## Removals are performed using backwards shift deletion.
## A `HashMap` is unordered, meaning that keys can be returned in a (seemingly)
## random order.
##
## For more information on these algorithms you can refer to the following
## resources:
## `HashMap` uses linear probing for finding values and Robin Hood hashing.
## Removals are performed using backwards shift deletion. For more information
## on these algorithms you can refer to the following resources:
##
## * http://codecapsule.com/2013/11/11/robin-hood-hashing/
## * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
## * https://www.sebastiansylvan.com/post/robin-hood-hashing-should-be-your-default-hash-table-implementation/
object Table!(K: Hash + Equal, V) {
object HashMap!(K: Hash + Equal, V) {
## The state to use for creating hashers.
@random_state: RandomState
 
Loading
Loading
@@ -154,7 +158,7 @@ object Table!(K: Hash + Equal, V) {
## pair.
@buckets: Array!(?Pair!(K, V))
 
## The number of key-value pairs stored in this table.
## The number of key-value pairs stored in this `HashMap`.
@length: Integer
 
## The number of values that can be stored before resizing.
Loading
Loading
@@ -165,220 +169,6 @@ object Table!(K: Hash + Equal, V) {
## time we want to check if we need to resize.
@resize_threshold: Integer
 
def init {
@random_state = RandomState.new
@buckets = []
@length = 0
@capacity = 1
@resize_threshold = 1
}
## Returns the buckets in this table.
def buckets -> Array!(?Pair!(K, V)) {
@buckets
}
## Returns the number of key-value pairs stored in this table.
def length -> Integer {
@length
}
## Resizes and rehashes the table.
def resize {
let old_buckets = @buckets
@capacity = @capacity * 2
@resize_threshold = (LOAD_FACTOR * @capacity.to_float).to_integer
@buckets = []
@buckets[@capacity - 1] = Nil
old_buckets.each_with_index do (pair, index) {
pair.if_true {
pair.reset_distance
rehash_pair(pair!)
}
}
}
## Returns `True` if this table should be resized.
def resize? -> Boolean {
@length >= @resize_threshold
}
## Returns the hash for the given key.
def hash_key(key: K) -> Integer {
let hasher = @random_state.to_hasher
key.hash(hasher)
hasher.to_hash
}
## Returns the desired bucket index for the given hash.
def desired_bucket(hash: Integer) -> Integer {
hash % @capacity
}
## Inserts a new pair into the table.
def insert_pair(mut pair: Pair!(K, V)) {
let mut index = desired_bucket(pair.hash)
{
let existing = @buckets[index]
existing.if true: {
existing.key == pair.key
.if_true {
@buckets[index] = pair
return
}
existing.replace_with?(pair).if_true {
@buckets[index] = pair
pair = existing!
}
}, false: {
@length += 1
@buckets[index] = pair
return
}
index = desired_bucket(index + 1)
pair.increase_distance
}.loop
}
## Rehashes an existing pair into the list of buckets.
def rehash_pair(mut pair: Pair!(K, V)) {
let mut index = desired_bucket(pair.hash)
{
let existing = @buckets[index]
existing.if_false {
@buckets[index] = pair
return
}
existing.replace_with?(pair).if_true {
@buckets[index] = pair
pair = existing!
}
index = desired_bucket(index + 1)
pair.increase_distance
}.loop
}
## Returns the index of the bucket the key resides in.
def bucket_index(key: K) -> ?Integer {
let hash = hash_key(key)
let mut index = desired_bucket(hash)
let desired = index
let mut pair = @buckets[index]
{ pair.key != key }.while_true {
# Finding an empty bucket can mean two things:
#
# 1. The initial desired bucket is not used, meaning our key definitely
# does not exist.
# 2. The initial desired bucket is used, and we ran into the next
# available bucket. This means the key also does not exist, because it
# would otherwise use this available bucket.
#
# This early return ensures we don't iterate over all buckets if we are
# certain we won't be able to find the key.
pair == Nil
.if_true {
return
}
index = desired_bucket(index + 1)
index == desired
.if_true {
# We cycled through all buckets but didn't find a matching pair.
return
}
pair = @buckets[index]
}
index
}
## Removes the key from this table, returning its value (if any).
def remove(key: K) -> ?V {
let index = bucket_index(key)
index.if true: {
let pair = @buckets[index!]
@buckets[index!] = Nil
@length -= 1
backwards_shift(index! + 1)
pair.value
}, false: {
Nil
}
}
## Shifts all pairs to the left starting at the given bucket index.
def backwards_shift(mut index: Integer) {
let mut pair = @buckets[index]
{ pair.and { pair!.distance.positive? } }.while_true {
@buckets[index - 1] = pair
@buckets[index] = Nil
pair.reduce_distance
index += 1
pair = @buckets[index]
}
}
}
impl Index!(K, V) for Table!(K, V) {
def [](key: K) -> ?V {
let index = bucket_index(key)
index.if true: {
@buckets[index!].value
}, false: {
Nil
}
}
}
impl SetIndex!(K, V) for Table!(K, V) {
def []=(key: K, value: V) -> V {
resize?.if_true {
resize
}
insert_pair(Pair.new(key: key, value: value, hash: hash_key(key)))
value
}
}
## An unordered hash map using linear probing and Robin Hood bucket stealing.
##
## The keys in a `HashMap` can be any object that implements the `Hash` and
## `Equal` traits. The values can be of any type. It's possible to store keys
## (or values) of different types but this will require the use of `Dynamic`.
##
## A `HashMap` is unordered, meaning that keys can be returned in a (seemingly)
## random order.
object HashMap!(K: Hash + Equal, V) {
@table: Table!(K, V)
## Returns a `HashMap` using two arrays: one containing the keys and one
## containing the values.
##
Loading
Loading
@@ -431,7 +221,16 @@ object HashMap!(K: Hash + Equal, V) {
 
## Creates a new, empty `HashMap`.
def init {
@table = Table.new
@random_state = RandomState.new
@buckets = []
@length = 0
@capacity = 1
@resize_threshold = 1
}
## Returns the buckets of this `HashMap`.
def buckets -> Array!(?Pair!(K, V)) {
@buckets
}
 
## Removes the given key, returning its value if the key was present in the
Loading
Loading
@@ -453,28 +252,20 @@ object HashMap!(K: Hash + Equal, V) {
##
## map.remove('name') # => 'Alice'
def remove(key: K) -> ?V {
@table.remove(key)
}
let index = _bucket_index(key)
 
## Returns `True` if the map is empty.
##
## # Examples
##
## Using a map that is empty:
##
## let map = HashMap.new
##
## map.empty? # => True
##
## Using a map that is not empty:
##
## let mut map = HashMap.new
##
## map['name'] = 'Alice'
##
## map.empty? # => False
def empty? -> Boolean {
length.zero?
index.if true: {
let pair = @buckets[index!]
@buckets[index!] = Nil
@length -= 1
_backwards_shift(index! + 1)
pair.value
}, false: {
Nil
}
}
 
## Calls the supplied `Block` for every key value pair in this map.
Loading
Loading
@@ -517,7 +308,7 @@ object HashMap!(K: Hash + Equal, V) {
## }
def iter -> Iterator!(Pair!(K, V)) {
let mut index = 0
let max = @table.buckets.length
let max = @buckets.length
 
Enumerator.new(
while: { index < max },
Loading
Loading
@@ -528,7 +319,7 @@ object HashMap!(K: Hash + Equal, V) {
index < max
.and { found == Nil }
}.while_true {
found = @table.buckets[index]
found = @buckets[index]
index += 1
}
 
Loading
Loading
@@ -582,12 +373,152 @@ object HashMap!(K: Hash + Equal, V) {
## map.key?('name') # => True
## map.key?('city') # => False
def key?(key: K) -> Boolean {
@table.bucket_index(key).if true: {
_bucket_index(key).if true: {
True
}, false: {
False
}
}
## Resizes and rehashes `self`.
def rehash {
let old_buckets = @buckets
@capacity = @capacity * 2
@resize_threshold = (LOAD_FACTOR * @capacity.to_float).to_integer
@buckets = []
@buckets[@capacity - 1] = Nil
old_buckets.each_with_index do (pair, index) {
pair.if_true {
pair.reset_distance
_rehash_pair(pair!)
}
}
}
## Hashes the supplied key using the internal hasher of this `HashMap`.
def _hash_key(key: K) -> Integer {
let hasher = @random_state.to_hasher
key.hash(hasher)
hasher.to_hash
}
## Inserts a new pair into `self`.
##
## The `Pair` to insert must be pre-hashed using the `Hasher` used internally
## by this `HashMap`, otherwise it might not be retrieved later.
def _insert_pair(mut pair: Pair!(K, V)) {
let mut index = _desired_bucket(pair.hash)
{
let existing = @buckets[index]
existing.if true: {
existing.key == pair.key
.if_true {
@buckets[index] = pair
return
}
existing.replace_with?(pair).if_true {
@buckets[index] = pair
pair = existing!
}
}, false: {
@length += 1
@buckets[index] = pair
return
}
index = _desired_bucket(index + 1)
pair.increase_distance
}.loop
}
## Rehashes an existing pair into the list of buckets.
def _rehash_pair(mut pair: Pair!(K, V)) {
let mut index = _desired_bucket(pair.hash)
{
let existing = @buckets[index]
existing.if_false {
@buckets[index] = pair
return
}
existing.replace_with?(pair).if_true {
@buckets[index] = pair
pair = existing!
}
index = _desired_bucket(index + 1)
pair.increase_distance
}.loop
}
## Returns the desired bucket index for the given hash.
def _desired_bucket(hash: Integer) -> Integer {
hash % @capacity
}
## Returns the index of the bucket the key resides in.
def _bucket_index(key: K) -> ?Integer {
let hash = _hash_key(key)
let mut index = _desired_bucket(hash)
let desired = index
let mut pair = @buckets[index]
{ pair.key != key }.while_true {
# Finding an empty bucket can mean two things:
#
# 1. The initial desired bucket is not used, meaning our key definitely
# does not exist.
# 2. The initial desired bucket is used, and we ran into the next
# available bucket. This means the key also does not exist, because it
# would otherwise use this available bucket.
#
# This early return ensures we don't iterate over all buckets if we are
# certain we won't be able to find the key.
pair == Nil
.if_true {
return
}
index = _desired_bucket(index + 1)
index == desired
.if_true {
# We cycled through all buckets but didn't find a matching pair.
return
}
pair = @buckets[index]
}
index
}
## Shifts all pairs to the left starting at the given bucket index.
def _backwards_shift(mut index: Integer) {
let mut pair = @buckets[index]
{ pair.and { pair!.distance.positive? } }.while_true {
@buckets[index - 1] = pair
@buckets[index] = Nil
pair.reduce_distance
index += 1
pair = @buckets[index]
}
}
}
 
impl Equal for HashMap!(K, V) {
Loading
Loading
@@ -639,7 +570,13 @@ impl Index!(K, V) for HashMap!(K, V) {
##
## map['name'] # => 'Alice'
def [](key: K) -> ?V {
@table[key]
let index = _bucket_index(key)
index.if true: {
@buckets[index!].value
}, false: {
Nil
}
}
}
 
Loading
Loading
@@ -655,7 +592,14 @@ impl SetIndex!(K, V) for HashMap!(K, V) {
##
## map['name'] = 'Alice' # => 'Alice'
def []=(key: K, value: V) -> V {
@table[key] = value
@length >= @resize_threshold
.if_true {
rehash
}
_insert_pair(Pair.new(key: key, value: value, hash: _hash_key(key)))
value
}
}
 
Loading
Loading
@@ -678,6 +622,6 @@ impl Length for HashMap!(K, V) {
##
## map.length # => 1
def length -> Integer {
@table.length
@length
}
}
import std::hash_map::(self, DefaultHasher, Pair, RandomState, Table)
import std::hash_map::(self, DefaultHasher, Pair, RandomState)
import std::test
import std::test::assert
 
Loading
Loading
@@ -173,307 +173,130 @@ test.group('std::hash_map::Pair.hash') do (g) {
}
}
 
test.group('std::hash_map::Table.buckets') do (g) {
g.test('Obtaining the buckets in a Table') {
let table = Table.new
test.group('std::hash_map::HashMap.rehash') do (g) {
g.test('Rehashing an empty HashMap') {
let map = HashMap.new
 
assert.equal(table.buckets, [])
}
}
test.group('std::hash_map::Table.length') do (g) {
g.test('Obtaining the number of pairs in a Table') {
let table = Table.new
let pair = Pair.new(key: 'key', value: 'value', hash: 0)
assert.equal(map.buckets, [])
 
table.insert_pair(pair)
map.rehash
 
assert.equal(table.length, 1)
assert.equal(map.buckets, [Nil, Nil])
}
}
test.group('std::hash_map::Table.resize') do (g) {
g.test('Resizing a Table') {
let table = Table.new
 
assert.equal(table.buckets, [])
table.resize
assert.equal(table.buckets, [Nil, Nil])
}
g.test('Rehashing a Table') {
let table = Table.new
g.test('Rehashing a HashMap with pairs') {
let map = HashMap.new
let pair1 = Pair.new(key: 'a', value: 'value', hash: 0)
let pair2 = Pair.new(key: 'b', value: 'value', hash: 1)
 
table.resize
table.insert_pair(pair: pair1)
table.insert_pair(pair: pair2)
map.rehash
map._insert_pair(pair: pair1)
map._insert_pair(pair: pair2)
 
# Moving the pair to a bucket it shouldn't be in allows us to test if
# `resize` ends up moving pairs or not.
pair2.increase_distance
 
table.buckets[1] = Nil
table.buckets[2] = pair2
map.buckets[1] = Nil
map.buckets[2] = pair2
 
table.resize
map.rehash
 
assert.equal(pair1.distance, 0)
assert.equal(pair2.distance, 0)
 
assert.equal(table.buckets[0], pair1)
assert.equal(table.buckets[1], pair2)
assert.equal(table.buckets[2], Nil)
assert.equal(map.buckets[0], pair1)
assert.equal(map.buckets[1], pair2)
assert.equal(map.buckets[2], Nil)
}
}
 
test.group('std::hash_map::Table.resize?') do (g) {
g.test('Checking if a table needs to be resized') {
let table = Table.new
let pair = Pair.new(key: 'a', value: 'value', hash: 0)
assert.false(table.resize?)
table.insert_pair(pair)
assert.true(table.resize?)
}
}
test.group('std::hash_map::Table.hash_key') do (g) {
g.test("Hashing a key using the Table's hasher") {
let table = Table.new
# We can't really maky any guarantees about the exact value returned, all we
# can guarantee is that the same key should produce the same hash.
let hash1 = table.hash_key('foo')
let hash2 = table.hash_key('foo')
let hash3 = table.hash_key('bar')
assert.equal(hash1, hash2)
assert.not_equal(hash1, hash3)
}
}
test.group('std::hash_map::Table.desired_bucket') do (g) {
g.test('Obtaining the desired bucket index of a hash') {
let table = Table.new
table.resize
assert.equal(table.desired_bucket(0), 0)
assert.equal(table.desired_bucket(1), 1)
assert.equal(table.desired_bucket(5), 1)
}
}
test.group('std::hash_map::Table.insert_pair') do (g) {
g.test('Inserting a Pair into a Table') {
let table = Table.new
test.group('std::hash_map::HashMap._insert_pair') do (g) {
g.test('Inserting a Pair into a HashMap') {
let map = HashMap.new
let pair = Pair.new(key: 'key', value: 'value', hash: 0)
 
table.insert_pair(pair)
map._insert_pair(pair)
 
assert.equal(table.length, 1)
assert.equal(table.buckets[0], pair)
assert.equal(map.length, 1)
assert.equal(map.buckets[0], pair)
}
 
g.test('Inserting a Pair into an existing bucket in a Table') {
let table = Table.new
g.test('Inserting a Pair into an existing bucket in a HashMap') {
let map = HashMap.new
let pair1 = Pair.new(key: 'a', value: 'a', hash: 0)
let pair2 = Pair.new(key: 'b', value: 'b', hash: 0)
 
table.resize
table.insert_pair(pair1)
table.insert_pair(pair2)
map.rehash
map._insert_pair(pair1)
map._insert_pair(pair2)
 
assert.equal(table.buckets[0], pair1)
assert.equal(table.buckets[1], pair2)
assert.equal(map.buckets[0], pair1)
assert.equal(map.buckets[1], pair2)
 
assert.equal(pair1.distance, 0)
assert.equal(pair2.distance, 1)
}
 
g.test('Inserting a Pair using an already used key') {
let table = Table.new
let map = HashMap.new
let pair1 = Pair.new(key: 'a', value: 'a', hash: 0)
let pair2 = Pair.new(key: 'a', value: 'b', hash: 0)
 
table.resize
table.insert_pair(pair1)
table.insert_pair(pair2)
map.rehash
map._insert_pair(pair1)
map._insert_pair(pair2)
 
assert.equal(table.buckets[0], pair2)
assert.equal(map.buckets[0], pair2)
}
 
g.test('Inserting a Pair after an unused bucket') {
let table = Table.new
let map = HashMap.new
let pair1 = Pair.new(key: 'one', value: 1, hash: 4764096362064740795)
let pair2 = Pair.new(key: 'two', value: 2, hash: -9161411174267222279)
let pair3 = Pair.new(key: 'three', value: 3, hash: 902578265635837404)
 
table.insert_pair(pair1)
table.resize
map._insert_pair(pair1)
map.rehash
 
table.insert_pair(pair2)
table.resize
map._insert_pair(pair2)
map.rehash
 
table.insert_pair(pair3)
map._insert_pair(pair3)
 
assert.equal(table.buckets[0], pair3)
assert.equal(table.buckets[1], pair2)
assert.equal(table.buckets[2], Nil)
assert.equal(table.buckets[3], pair1)
assert.equal(map.buckets[0], pair3)
assert.equal(map.buckets[1], pair2)
assert.equal(map.buckets[2], Nil)
assert.equal(map.buckets[3], pair1)
}
}
 
test.group('std::hash_map::Table.rehash_pair') do (g) {
g.test('Rehashing a Pair') {
let table = Table.new
let pair = Pair.new(key: 'key', value: 'value', hash: 0)
table.rehash_pair(pair)
assert.equal(table.buckets[0], pair)
}
g.test('Rehashing a Pair into an existing bucket in a Table') {
let table = Table.new
let pair1 = Pair.new(key: 'a', value: 'a', hash: 0)
let pair2 = Pair.new(key: 'b', value: 'b', hash: 0)
table.resize
table.rehash_pair(pair1)
table.rehash_pair(pair2)
assert.equal(table.buckets[0], pair1)
assert.equal(table.buckets[1], pair2)
test.group('std::hash_map::HashMap.remove') do (g) {
g.test('Removing an existing key from a HashMap') {
let map = %['a': 'b']
 
assert.equal(pair1.distance, 0)
assert.equal(pair2.distance, 1)
assert.equal(map.remove('a'), 'b')
assert.equal(map.buckets[0], Nil)
}
}
 
test.group('std::hash_map::Table.bucket_index') do (g) {
g.test('Obtaining the bucket index of an existing key') {
let table = Table.new
let pair = Pair.new(key: 'a', value: 'a', hash: 0)
g.test('Removing a non-existing key from a HashMap') {
let map: HashMap!(String, String) = HashMap.new
 
table.insert_pair(pair)
assert.equal(table.bucket_index('a'), 0)
}
g.test('Obtaining the bucket index of a non existing key') {
let table = Table.new
assert.equal(table.bucket_index('a'), Nil)
}
}
test.group('std::hash_map::Table.remove') do (g) {
g.test('Removing an existing key from a Table') {
let table = Table.new
let pair = Pair.new(key: 'a', value: 'a', hash: 0)
table.insert_pair(pair)
assert.equal(table.remove(pair.key), 'a')
assert.equal(table.buckets[0], Nil)
}
g.test('Removing a non-existing key from a Table') {
let table: Table!(String, String) = Table.new
assert.equal(table.remove('a'), Nil)
assert.equal(map.remove('a'), Nil)
}
 
g.test('Backwards shifting Pairs that follow the removed Pair') {
let table = Table.new
let map = HashMap.new
let pair1 = Pair.new(key: 'a', value: 'a', hash: 0)
let pair2 = Pair.new(key: 'b', value: 'b', hash: 0)
 
table.resize
table.insert_pair(pair1)
table.insert_pair(pair2)
table.remove(pair1.key)
assert.equal(table.buckets[0], pair2)
assert.equal(pair2.distance, 0)
}
}
test.group('std::hash_map::Table.backwards_shift') do (g) {
g.test('Performing a backwards shift starting at a particular bucket') {
let table = Table.new
let pair1 = Pair.new(key: 'a', value: 'a', hash: 0)
let pair2 = Pair.new(key: 'b', value: 'b', hash: 0)
let pair3 = Pair.new(key: 'c', value: 'c', hash: 0)
table.resize
table.resize
table.insert_pair(pair1)
table.insert_pair(pair2)
table.insert_pair(pair3)
table.backwards_shift(index: 1)
assert.equal(table.buckets[0], pair2)
assert.equal(table.buckets[1], pair3)
assert.equal(table.buckets[2], Nil)
map.rehash
map._insert_pair(pair1)
map._insert_pair(pair2)
map.remove(pair1.key)
 
assert.equal(map.buckets[0], pair2)
assert.equal(pair2.distance, 0)
assert.equal(pair3.distance, 1)
}
}
test.group('std::hash_map::Table.[]') do (g) {
g.test('Obtaining the value of a Pair by its key') {
let table = Table.new
let pair = Pair.new(key: 'a', value: 'b', hash: 0)
table.insert_pair(pair)
assert.equal(table['a'], 'b')
}
g.test('Obtaining the value of a Pair using a non-existing key') {
let table: Table!(String, String) = Table.new
assert.equal(table['a'], Nil)
}
}
test.group('std::hash_map::Table.[]=') do (g) {
g.test('Creating an inserting a Pair') {
let table = Table.new
let val1 = table['a'] = 'foo'
let val2 = table['b'] = 'bar'
assert.equal(val1, 'foo')
assert.equal(val2, 'bar')
assert.equal(table['a'], 'foo')
assert.equal(table['b'], 'bar')
assert.equal(table.length, 2)
}
g.test('Overwriting an existing Pair') {
let table = Table.new
let val1 = table['a'] = 'foo'
let val2 = table['a'] = 'bar'
assert.equal(val1, 'foo')
assert.equal(val2, 'bar')
assert.equal(table['a'], 'bar')
assert.equal(table.length, 1)
}
}
 
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment