-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathequivalence.py
250 lines (216 loc) · 10.4 KB
/
equivalence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at:
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the
# License.
"""Provides utilities for determining whether two objects are equivalent under the Ion data model."""
import struct
from datetime import datetime
from decimal import Decimal
from math import isnan
from amazon.ion.core import IonType, Timestamp, TimestampPrecision, MICROSECOND_PRECISION, OffsetTZInfo, Multimap
from amazon.ion.simple_types import IonPyList, IonPyDict, IonPyTimestamp, IonPyNull, IonPySymbol, \
IonPyText, IonPyDecimal, IonPyFloat
from amazon.ion.symbols import SymbolToken
def obj_has_ion_type_and_annotation(obj):
return hasattr(obj, 'ion_type') and hasattr(obj, 'ion_annotations')
def ion_equals(a, b, timestamps_instants_only=False):
"""Tests two objects for equivalence under the Ion data model.
There are three important cases:
* When neither operand specifies its `ion_type` or `annotations`, this method will only return True when the
values of both operands are equivalent under the Ion data model.
* When only one of the operands specifies its `ion_type` and `annotations`, this method will only return True
when that operand has no annotations and has a value equivalent to the other operand under the Ion data model.
* When both operands specify `ion_type` and `annotations`, this method will only return True when the ion_type
and annotations of both are the same and their values are equivalent under the Ion data model.
Note that the order of the operands does not matter.
Args:
a (object): The first operand.
b (object): The second operand.
timestamps_instants_only (Optional[bool]): False if timestamp objects (datetime and its subclasses) should be
compared according to the Ion data model (where the instant, precision, and offset must be equal); True
if these objects should be considered equivalent if they simply represent the same instant.
"""
if timestamps_instants_only:
return _ion_equals_timestamps_instants(a, b)
return _ion_equals_timestamps_data_model(a, b)
def _ion_equals_timestamps_instants(a, b):
return _ion_equals(a, b, _timestamp_instants_eq, _ion_equals_timestamps_instants)
def _ion_equals_timestamps_data_model(a, b):
return _ion_equals(a, b, _timestamps_eq, _ion_equals_timestamps_data_model)
def _ion_equals(a, b, timestamp_comparison_func, recursive_comparison_func):
"""Compares a and b according to the description of the ion_equals method."""
for a, b in ((a, b), (b, a)): # Ensures that operand order does not matter.
if obj_has_ion_type_and_annotation(a):
if obj_has_ion_type_and_annotation(b):
# Both operands have _IonNature. Their IonTypes and annotations must be equivalent.
eq = a.ion_type is b.ion_type and _annotations_eq(a, b)
else:
# Only one operand has _IonNature. It cannot be equivalent to the other operand if it has annotations.
eq = not a.ion_annotations
if eq:
if isinstance(a, IonPyList):
return _sequences_eq(a, b, recursive_comparison_func)
elif isinstance(a, IonPyDict):
return _structs_eq(a, b, recursive_comparison_func)
elif isinstance(a, IonPyTimestamp):
return timestamp_comparison_func(a, b)
elif isinstance(a, IonPyNull):
return isinstance(b, IonPyNull) or (b is None and a.ion_type is IonType.NULL)
elif isinstance(a, IonPySymbol) or (isinstance(a, IonPyText) and a.ion_type is IonType.SYMBOL):
return _symbols_eq(a, b)
elif isinstance(a, IonPyDecimal):
return _decimals_eq(a, b)
elif isinstance(a, IonPyFloat):
return _floats_eq(a, b)
else:
return a == b
return False
# Reaching this point means that neither operand has _IonNature.
for a, b in ((a, b), (b, a)): # Ensures that operand order does not matter.
if isinstance(a, list):
return _sequences_eq(a, b, recursive_comparison_func)
elif isinstance(a, dict):
return _structs_eq(a, b, recursive_comparison_func)
elif isinstance(a, datetime):
return timestamp_comparison_func(a, b)
elif isinstance(a, SymbolToken):
return _symbols_eq(a, b)
elif isinstance(a, Decimal):
return _decimals_eq(a, b)
elif isinstance(a, float):
return _floats_eq(a, b)
return a == b
def _annotations_eq(a, b):
return _sequences_eq(a.ion_annotations, b.ion_annotations, _symbols_eq)
def _sequences_eq(a, b, comparison_func):
assert isinstance(a, (list, tuple))
if not isinstance(b, (list, tuple)):
return False
sequence_len = len(a)
if sequence_len != len(b):
return False
for i in range(sequence_len):
if not comparison_func(a[i], b[i]):
return False
return True
def _structs_eq(a, b, comparison_func):
assert isinstance(a, (dict, Multimap, IonPyDict))
if not isinstance(b, (dict, Multimap, IonPyDict)):
return False
dict_len = len(a)
if dict_len != len(b):
return False
for a, b in ((a, b), (b, a)):
key_iter = iter(a.keys())
while True:
try:
key = next(key_iter)
except StopIteration:
break
if key not in b:
return False
if isinstance(a, (IonPyDict, Multimap)) and isinstance(b, (IonPyDict, Multimap)):
values_a = a.get_all_values(key)
values_b = b.get_all_values(key)
if len(values_a) != len(values_b):
return False
for value_a in values_a:
if not any(comparison_func(value_a, value_b) for value_b in values_b):
return False
else:
if not comparison_func(a[key], b[key]):
return False
return True
def _timestamps_eq(a, b):
"""Compares two timestamp operands for equivalence under the Ion data model."""
assert isinstance(a, datetime)
if not isinstance(b, datetime):
return False
# Local offsets must be equivalent.
if (a.tzinfo is None) ^ (b.tzinfo is None):
return False
if a.utcoffset() != b.utcoffset():
return False
for a, b in ((a, b), (b, a)):
if isinstance(a, Timestamp):
if isinstance(b, Timestamp):
# Both operands declare their precisions. They are only equivalent if their precisions are the same.
if a.precision is b.precision and a.fractional_precision is b.fractional_precision \
and a.fractional_seconds == b.fractional_seconds:
break
return False
elif a.precision is not TimestampPrecision.SECOND or a.fractional_precision != MICROSECOND_PRECISION:
# Only one of the operands declares its precision. It is only equivalent to the other (a naive datetime)
# if it has full microseconds precision.
return False
return a == b
def _timestamp_instants_eq(a, b):
"""Compares two timestamp operands for point-in-time equivalence only."""
assert isinstance(a, datetime)
if not isinstance(b, datetime):
return False
# datetime's __eq__ can't compare a None offset and a non-None offset. For these equivalence semantics, a None
# offset (unknown local offset) is treated equivalently to a +00:00.
if a.tzinfo is None:
a = a.replace(tzinfo=OffsetTZInfo())
if b.tzinfo is None:
b = b.replace(tzinfo=OffsetTZInfo())
# datetime's __eq__ implementation compares instants; offsets and precision need not be equal.
return a == b
def _symbols_eq(a, b):
assert isinstance(a, (str, SymbolToken))
if not isinstance(b, (str, SymbolToken)):
return False
a_text = getattr(a, 'text', a)
b_text = getattr(b, 'text', b)
if a_text == b_text:
if a_text is None:
# Both have unknown text. If they come from a local context, they are equivalent.
a_location = getattr(a, 'location', None)
b_location = getattr(b, 'location', None)
if (a_location is None) ^ (b_location is None):
return False
if a_location is not None:
# Both were imported from shared symbol tables. In this case, they are only equivalent if they were
# imported from the same position in the same shared symbol table.
if (a_location.name != b_location.name) or (a_location.position != b_location.position):
return False
a_sid = getattr(a, 'sid', None)
b_sid = getattr(b, 'sid', None)
if a_sid is None or b_sid is None:
raise ValueError('Attempted to compare malformed symbols %s, %s.' % (a, b))
if (a_sid == 0) ^ (b_sid == 0):
# SID 0 is only equal to SID 0.
return False
return True
return False
def _decimals_eq(a, b):
assert isinstance(a, Decimal)
if not isinstance(b, Decimal):
return False
if a.is_zero() and b.is_zero():
if a.is_signed() ^ b.is_signed():
# Negative-zero is not equivalent to positive-zero.
return False
# This ensures that both have equal precision.
return a.canonical().compare_total(b.canonical()) == 0
def _is_float_negative_zero(x):
return struct.pack('>d', x) == b'\x80\x00\x00\x00\x00\x00\x00\x00'
def _floats_eq(a, b):
assert isinstance(a, float)
if not isinstance(b, float):
return False
if a == 0 and b == 0:
# Negative-zero is not equivalent to positive-zero.
return not (_is_float_negative_zero(a) ^ _is_float_negative_zero(b))
# nan is always equivalent to nan.
return a == b or (isnan(a) and isnan(b))