-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathwriter_binary_raw.py
389 lines (328 loc) · 13.1 KB
/
writer_binary_raw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at:
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the
# License.
"""Writer for raw binary Ion values, without symbol table management."""
from datetime import datetime
from decimal import Decimal, localcontext
from enum import IntEnum
from functools import partial
import struct
from amazon.ion.equivalence import _is_float_negative_zero
from amazon.ion.symbols import SymbolToken
from .core import IonEventType, IonType, DataEvent, Transition, TimestampPrecision, TIMESTAMP_FRACTION_PRECISION_FIELD, \
MICROSECOND_PRECISION, TIMESTAMP_PRECISION_FIELD, TIMESTAMP_FRACTIONAL_SECONDS_FIELD, Timestamp
from .util import coroutine, total_seconds
from .writer import NOOP_WRITER_EVENT, WriteEventType, \
writer_trampoline, partial_transition, serialize_scalar, \
validate_scalar_value, illegal_state_null
from .writer_binary_raw_fields import _write_varuint, _write_uint, _write_varint, _write_int
class _TypeIds(IntEnum):
NULL = 0x00
BOOL_FALSE = 0x10
BOOL_TRUE = 0x11
POS_INT = 0x20
NEG_INT = 0x30
FLOAT = 0x40
DECIMAL = 0x50
TIMESTAMP = 0x60
SYMBOL = 0x70
STRING = 0x80
CLOB = 0x90
BLOB = 0xA0
LIST = 0xB0
SEXP = 0xC0
STRUCT = 0xD0
ANNOTATION_WRAPPER = 0xE0
class _Zeros(IntEnum):
"""Single-octet encodings, represented by the type ID and a length nibble of zero.
Notes:
Blob, clob, list, and sexp also have single-octet encodings, but the current
implementation handles these implicitly.
"""
INT = _TypeIds.POS_INT # Int zero is always encoded using the positive type ID.
FLOAT = _TypeIds.FLOAT # Represents zero.
DECIMAL = _TypeIds.DECIMAL # Represents 0d0.
STRING = _TypeIds.STRING # Represents the zero-length (empty) string.
SYMBOL = _TypeIds.SYMBOL # Represents symbol zero.
STRUCT = _TypeIds.STRUCT # Represents a struct with zero fields.
_VARINT_NEG_ZERO = 0xC0 # This refers to the variable-length signed integer subfield.
_INT_NEG_ZERO = 0x80 # This refers to the fixed-length signed integer subfield.
_LENGTH_FLOAT_64 = 0x08
_LENGTH_FIELD_THRESHOLD = 14
_LENGTH_FIELD_INDICATOR = 0x0E
_NULL_INDICATOR = 0x0F
def _null(tid):
return bytearray([tid | _NULL_INDICATOR])
_NULLS = [
_null(_TypeIds.NULL),
_null(_TypeIds.BOOL_FALSE),
_null(_TypeIds.POS_INT),
_null(_TypeIds.FLOAT),
_null(_TypeIds.DECIMAL),
_null(_TypeIds.TIMESTAMP),
_null(_TypeIds.SYMBOL),
_null(_TypeIds.STRING),
_null(_TypeIds.CLOB),
_null(_TypeIds.BLOB),
_null(_TypeIds.LIST),
_null(_TypeIds.SEXP),
_null(_TypeIds.STRUCT)
]
_BOOL_TRUE = bytearray([_TypeIds.BOOL_TRUE])
_BOOL_FALSE = bytearray([_TypeIds.BOOL_FALSE])
def _serialize_bool(ion_event):
if ion_event.value:
return _BOOL_TRUE
else:
return _BOOL_FALSE
def _write_length(buf, length, tid):
if length < _LENGTH_FIELD_THRESHOLD:
buf.append(tid | length)
else:
buf.append(tid | _LENGTH_FIELD_INDICATOR)
_write_varuint(buf, length)
def _write_int_value(buf, tid, value):
value_buf = bytearray()
length = _write_uint(value_buf, value)
_write_length(buf, length, tid)
buf.extend(value_buf)
def _serialize_int(ion_event):
buf = bytearray()
value = ion_event.value
validate_scalar_value(value, int)
if value == 0:
buf.append(_Zeros.INT)
else:
if value < 0:
value = -value
tid = _TypeIds.NEG_INT
else:
tid = _TypeIds.POS_INT
_write_int_value(buf, tid, value)
return buf
def _serialize_float(ion_event):
buf = bytearray()
float_value = ion_event.value
validate_scalar_value(float_value, float)
# TODO Assess whether abbreviated encoding of zero is beneficial; it's allowed by spec.
if float_value.is_integer() and float_value == 0.0 and not _is_float_negative_zero(float_value):
buf.append(_Zeros.FLOAT)
else:
# TODO Add an option for 32-bit representation (length=4) per the spec.
buf.append(_TypeIds.FLOAT | _LENGTH_FLOAT_64)
encoded = struct.pack('>d', float_value)
buf.extend(encoded)
return buf
def _write_decimal_value(buf, exponent, coefficient, sign=0):
length = _write_varint(buf, exponent)
if coefficient:
# The coefficient is non-zero, so the coefficient field is required.
length += _write_int(buf, coefficient)
elif sign:
# The coefficient is negative zero.
buf.append(_INT_NEG_ZERO)
length += 1
# Else the coefficient is positive zero and the field is omitted.
return length
def _write_timestamp_fractional_seconds(buf, value):
sign, digits, exponent = value.as_tuple()
coefficient = int(value.scaleb(-exponent).to_integral_value())
if coefficient == 0 and exponent >= 0:
length = 0
else:
length = _write_decimal_value(buf, exponent, coefficient, sign)
return length
def _serialize_decimal(ion_event):
buf = bytearray()
value = ion_event.value
validate_scalar_value(value, Decimal)
sign, digits, exponent = value.as_tuple()
with localcontext() as context:
# Adjusting precision for taking into account arbitrarily large/small
# numbers
context.prec = len(digits)
coefficient = int(value.scaleb(-exponent).to_integral_value())
if not sign and not exponent and not coefficient:
# The value is 0d0; other forms of zero will fall through.
buf.append(_Zeros.DECIMAL)
else:
value_buf = bytearray()
length = _write_decimal_value(value_buf, exponent, coefficient, sign)
_write_length(buf, length, _TypeIds.DECIMAL)
buf.extend(value_buf)
return buf
def _serialize_string(ion_event):
buf = bytearray()
value = ion_event.value
validate_scalar_value(value, str)
if not value:
buf.append(_Zeros.STRING)
else:
value_buf = value.encode('utf-8')
_write_length(buf, len(value_buf), _TypeIds.STRING)
buf.extend(value_buf)
return buf
def _serialize_symbol(ion_event):
buf = bytearray()
token = ion_event.value
validate_scalar_value(token, SymbolToken)
sid = token.sid
if sid == 0:
buf.append(_Zeros.SYMBOL)
else:
_write_int_value(buf, _TypeIds.SYMBOL, sid)
return buf
def _serialize_lob_value(event, tid):
buf = bytearray()
value = event.value
_write_length(buf, len(value), tid)
buf.extend(value)
return buf
_serialize_blob = partial(_serialize_lob_value, tid=_TypeIds.BLOB)
_serialize_clob = partial(_serialize_lob_value, tid=_TypeIds.CLOB)
def _serialize_timestamp(ion_event):
buf = bytearray()
dt = ion_event.value
precision = getattr(dt, TIMESTAMP_PRECISION_FIELD, TimestampPrecision.SECOND)
if precision is None: # TODO should this defaulting be pushed into Timestamp itself?
precision = TimestampPrecision.SECOND
validate_scalar_value(dt, datetime)
value_buf = bytearray()
if dt.tzinfo is None:
value_buf.append(_VARINT_NEG_ZERO) # This signifies an unknown local offset.
length = 1
else:
# Normalize to UTC and write the offset field.
offset = dt.utcoffset()
dt -= offset
length = _write_varint(value_buf, int(total_seconds(offset) // 60))
length += _write_varuint(value_buf, dt.year)
if precision.includes_month:
length += _write_varuint(value_buf, dt.month)
if precision.includes_day:
length += _write_varuint(value_buf, dt.day)
if precision.includes_minute:
length += _write_varuint(value_buf, dt.hour)
length += _write_varuint(value_buf, dt.minute)
if precision.includes_second:
length += _write_varuint(value_buf, dt.second)
if isinstance(ion_event.value, Timestamp):
fractional_seconds = getattr(ion_event.value, TIMESTAMP_FRACTIONAL_SECONDS_FIELD, None)
if fractional_seconds is not None:
length += _write_timestamp_fractional_seconds(value_buf, fractional_seconds)
else:
# This must be a normal datetime, which always has a range-validated microsecond value.
length += _write_decimal_value(value_buf, -MICROSECOND_PRECISION, dt.microsecond)
_write_length(buf, length, _TypeIds.TIMESTAMP)
buf.extend(value_buf)
return buf
_SERIALIZE_SCALAR_JUMP_TABLE = {
IonType.NULL: illegal_state_null,
IonType.BOOL: _serialize_bool,
IonType.INT: _serialize_int,
IonType.FLOAT: _serialize_float,
IonType.DECIMAL: _serialize_decimal,
IonType.TIMESTAMP: _serialize_timestamp,
IonType.SYMBOL: _serialize_symbol,
IonType.STRING: _serialize_string,
IonType.CLOB: _serialize_clob,
IonType.BLOB: _serialize_blob,
}
_serialize_scalar = partial(
serialize_scalar, jump_table=_SERIALIZE_SCALAR_JUMP_TABLE, null_table=_NULLS
)
def _serialize_annotation_wrapper(output_buf, annotations):
value_length = output_buf.current_container_length
annot_length_buf = bytearray()
annot_length = 0
for annotation in annotations:
annot_length += _write_varuint(annot_length_buf, annotation.sid)
header = bytearray()
length_buf = bytearray()
length = _write_varuint(length_buf, annot_length) + annot_length + value_length
_write_length(header, length, _TypeIds.ANNOTATION_WRAPPER)
header.extend(length_buf)
header.extend(annot_length_buf)
output_buf.end_container(header)
def _serialize_container(output_buf, ion_event):
ion_type = ion_event.ion_type
length = output_buf.current_container_length
header = bytearray()
if ion_type is IonType.STRUCT:
if length == 0:
header.append(_Zeros.STRUCT)
else:
# TODO Support sorted field name symbols, per the spec.
header.append(_TypeIds.STRUCT | _LENGTH_FIELD_INDICATOR)
_write_varuint(header, length)
else:
tid = _TypeIds.LIST
if ion_type is IonType.SEXP:
tid = _TypeIds.SEXP
_write_length(header, length, tid)
output_buf.end_container(header)
_WRITER_EVENT_NEEDS_INPUT_EMPTY = DataEvent(WriteEventType.NEEDS_INPUT, b'')
@coroutine
def _raw_writer_coroutine(writer_buffer, depth=0, container_event=None,
whence=None, pending_annotations=None):
def fail():
raise TypeError('Invalid event: %s at depth %d' % (ion_event, depth))
write_result = None
while True:
ion_event, self = (yield write_result)
delegate = self
curr_annotations = ion_event.annotations
writer_event = _WRITER_EVENT_NEEDS_INPUT_EMPTY
if depth > 0 and container_event.ion_type is IonType.STRUCT \
and ion_event.event_type.begins_value:
# A field name symbol ID is required at this position.
sid_buffer = bytearray()
_write_varuint(sid_buffer, ion_event.field_name.sid) # Write the field name's symbol ID.
writer_buffer.add_scalar_value(sid_buffer)
if ion_event.event_type.begins_value and curr_annotations:
writer_buffer.start_container()
if ion_event.event_type is IonEventType.SCALAR:
scalar_buffer = _serialize_scalar(ion_event)
writer_buffer.add_scalar_value(scalar_buffer)
if curr_annotations:
_serialize_annotation_wrapper(writer_buffer, curr_annotations)
elif ion_event.event_type is IonEventType.STREAM_END:
if depth != 0:
fail()
for partial_value in writer_buffer.drain():
yield partial_transition(partial_value, self)
writer_event = NOOP_WRITER_EVENT
elif ion_event.event_type is IonEventType.CONTAINER_START:
if not ion_event.ion_type.is_container:
raise TypeError('Expected container type')
writer_buffer.start_container()
delegate = _raw_writer_coroutine(writer_buffer, depth + 1,
ion_event, self, curr_annotations)
elif ion_event.event_type is IonEventType.CONTAINER_END:
if depth < 1:
fail()
_serialize_container(writer_buffer, container_event)
if pending_annotations:
_serialize_annotation_wrapper(writer_buffer, pending_annotations)
pending_annotations = None
delegate = whence
else:
fail()
write_result = Transition(writer_event, delegate)
def _raw_binary_writer(writer_buffer):
"""Returns a raw binary writer co-routine.
Yields:
DataEvent: serialization events to write out
Receives :class:`amazon.ion.core.IonEvent`.
"""
return writer_trampoline(_raw_writer_coroutine(writer_buffer))