algorithms/data-structure/hash-table/hashtable.go at master · TTWShell/algorithms · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
package hashtable

import (
	"fmt"
	"math"
	"sync"
)

const (
	// A = (sqrt(5) -1)/2，来自算法导论第三版
	A       = 0.6180339887
	MinSize = 10
	// Maximum average load of a bucket that triggers growth.
	LoadFactor = 0.65
)

type KeyError struct {
	s string
}

func (e *KeyError) Error() string {
	return e.s
}

type item struct {
	key   interface{}
	value interface{}
}

// 散列函数使用乘法散列法，冲突处理使用开放定址法（线性探测）的哈希表实现。
type HashTable struct {
	items []*item
	size  int
	count int
	sync.RWMutex
}

func NewHT(size int) *HashTable {
	if size >= MinSize {
		return &HashTable{size: size, items: make([]*item, size, size)}
	}
	panic(fmt.Sprintf("size must > %d", MinSize))
}

// 散列函数，支持int、string类型的key
func (ht *HashTable) hash(key interface{}) int {
	var tmp int
	switch k := key.(type) {
	case int:
		tmp = int(k)
	case string:
		// http://algs4.cs.princeton.edu/lectures/34HashTables.pdf
		// Horner's Method to hash string of length L (O(L))
		hash := int32(0)
		for i := 0; i < len(k); i++ {
			// 31 * i == (i << 5) - i
			hash = int32(k[i]) + int32(hash<<5-hash)
		}
		tmp = int(math.Abs(float64(hash)))
	default:
		panic("Only support int、string type key")
	}

	// 乘法散列法，来自算法导论第三版
	multip := float64(tmp) * A
	decimalPart := multip - float64(int(multip))
	return int(math.Floor(float64(ht.size) * decimalPart))
}

// 线性探测
func (ht *HashTable) linearProbing(hash, i int) int {
	return (hash + i) % ht.size
}

func (ht *HashTable) rehash() {
	oldSize, newSize := ht.size, ht.size<<1
	if newSize > oldSize {
		ht.size = newSize
		newItems := make([]*item, newSize, newSize)
		for _, item := range ht.items {
			if item == nil {
				continue
			}
			hash := ht.hash(item.key)
			for i := 0; i < newSize; i++ {
				tmp := ht.linearProbing(hash, i)
				if res := newItems[tmp]; res == nil {
					newItems[tmp] = item
					break
				}
			}
		}
		ht.items = newItems
	}
}

// Update if key is exists else create. Auto rehash(oldSize * 2).
func (ht *HashTable) Put(key, value interface{}) {
	ht.Lock()
	defer ht.Unlock()

	if float64(ht.count) >= float64(ht.size)*LoadFactor {
		ht.rehash()
	}

	hash := ht.hash(key)
	for i := 0; i < ht.size; i++ {
		tmp := ht.linearProbing(hash, i)
		if res := ht.items[tmp]; res == nil {
			ht.items[tmp] = &item{key: key, value: value}
			ht.count++
			return
		} else if res.key == key {
			// update
			res.value = value
			return
		}
	}
	panic("hash table overflow")
}

// Return (value, error). If key not exists, return KeyError.
func (ht *HashTable) Get(key interface{}) (value interface{}, err error) {
	ht.RLock()
	defer ht.RUnlock()

	hash := ht.hash(key)
	for i := 0; i < ht.size; i++ {
		tmp := ht.linearProbing(hash, i)
		// 如果找不到则必定不存在，否则，逐个比较key
		if ht.items[tmp] == nil {
			break
		} else if ht.items[tmp].key == key {
			return ht.items[tmp].value, nil
		}
	}
	return nil, &KeyError{"key no exists"}
}

// Return (value, error). If key not exists, return KeyError.
func (ht *HashTable) Pop(key interface{}) (value interface{}, err error) {
	ht.Lock()
	defer ht.Unlock()

	hash := ht.hash(key)
	for i := 0; i < ht.size; i++ {
		tmp := ht.linearProbing(hash, i)
		if ht.items[tmp] == nil {
			break
		} else if ht.items[tmp].key == key {
			value = ht.items[tmp].value
			ht.items[tmp] = nil
			ht.count--
			return value, nil
		}
	}
	return nil, &KeyError{"key no exists"}
}