Skip to content

Commit 6bea26e

Browse files
ahilgermeta-codesync[bot]
authored andcommitted
Benchmark: unique-types nested init (cache-defeating)
Summary: A companion to `nested-benchmark` that addresses why its `Initialize` column under-shows the isset-byte-array removal: - the immutable default-value cache (`getDefaultValueForImmutableField` returns a process-global default and just `Py_INCREF`s it), and - `Nested1110` reusing only 4 unique types, so `Nested1110()` freshly builds only the top node and pulls cached child defaults. `unique_struct.thrift` is a fanout-10, depth-3 tree of 1111 UNIQUE struct types. `unique-nested-benchmark` builds the 1110 children once in setup and times only the `Root` construction for `Initialize`, while `Deserialize` rebuilds all 1111 nodes fresh from the wire (no cache shortcut). Findings (immutable, `fbcode//mode/opt`, base vs the isset-disabled change): - Initialize (Root only): 1.29 us -> 1.31 us (flat; construction is dominated by per-field value handling, not the isset bytes). - Deserialize: 0.170 ms -> 0.128 ms (-25%; 0.153 -> 0.115 us/node). ___ Differential Revision: D110082460 fbshipit-source-id: ed604527ebf7e35c602c6a12f00e9e424f0c776a
1 parent 8b8113a commit 6bea26e

2 files changed

Lines changed: 6543 additions & 0 deletions

File tree

‎thrift/lib/python/benchmark/benchmark_struct.py‎

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,76 @@ def bench_deserialize(flavor: str, cls: str) -> str:
658658
print("\n")
659659

660660

661+
def benchmark_unique_nested():
662+
# Companion to `benchmark_nested` that defeats the process-global
663+
# default-value cache. `struct.thrift`'s Nested1110 reuses only 4 unique
664+
# types, so `Nested1110()` reuses cached child defaults and freshly builds
665+
# only the top node -- hiding per-struct internal-data init cost (e.g. the
666+
# isset byte array). `unique_struct.thrift` is a fanout-10, depth-3 tree of
667+
# 1111 UNIQUE types.
668+
#
669+
# immutable (thrift-python) only: this is the flavor whose internal-data
670+
# layout changed; base-vs-diff is the comparison.
671+
# Initialize measures only the Root struct construction: the 1110 children
672+
# are built once in setup, so the timed statement isolates the kwargs ->
673+
# internal-data path for one node rather than the Python iteration, f-string,
674+
# and getattr overhead of materializing the whole tree.
675+
# Deserialize builds all 1111 nodes fresh from the wire (no default-value
676+
# cache shortcut), so it is directly comparable to `nested-benchmark`'s
677+
# Nested1110 Deserialize column.
678+
setup = "import thrift.benchmark.unique_struct.thrift_types as U"
679+
# The 1110 children (and the Root's kwargs) are built once in setup; only the
680+
# Root construction is timed for Initialize.
681+
build_children = (
682+
"leaves = [getattr(U, f'L{i}')(val=i, str_val='x') for i in range(1000)]\n"
683+
"mids = [getattr(U, f'M{i}')(**{f'f{j+1}': leaves[i * 10 + j] for j in range(10)}) for i in range(100)]\n"
684+
"tops = [getattr(U, f'T{i}')(**{f'f{j+1}': mids[i * 10 + j] for j in range(10)}) for i in range(10)]\n"
685+
"root_kwargs = {f'f{j+1}': tops[j] for j in range(10)}"
686+
)
687+
build_root = "root = U.Root(**root_kwargs)"
688+
689+
def measure(stmt: str, stmt_setup: str) -> float:
690+
timer = timeit.Timer(stmt=stmt, setup=stmt_setup)
691+
number, _ = timer.autorange()
692+
results = timer.repeat(repeat=5, number=number)
693+
return min(results) * 1000.0 / number
694+
695+
init_ms = measure(build_root, f"{setup}\n{build_children}")
696+
697+
deser_setup = (
698+
f"{setup}\n"
699+
"from thrift.python.serializer import serialize, deserialize\n"
700+
f"{build_children}\n"
701+
f"{build_root}\n"
702+
"serialized = serialize(root)"
703+
)
704+
deser_ms = measure("_ = deserialize(U.Root, serialized)", deser_setup)
705+
706+
print(
707+
tabulate(
708+
[
709+
[
710+
"Initialize (Root only)",
711+
f"{init_ms * 1000.0:.4f} us",
712+
"(1 node)",
713+
],
714+
[
715+
"Deserialize (1111-node tree)",
716+
f"{deser_ms:.4f} ms",
717+
f"{deser_ms * 1000.0 / 1111:.6f} us/node",
718+
],
719+
],
720+
headers=[
721+
"Unique nested (unique types)",
722+
"time",
723+
"per node",
724+
],
725+
tablefmt="github",
726+
)
727+
)
728+
print("\n")
729+
730+
661731
@click.group()
662732
def cli():
663733
pass
@@ -758,6 +828,11 @@ def nested_benchmark() -> None:
758828
benchmark_nested()
759829

760830

831+
@click.command()
832+
def unique_nested_benchmark() -> None:
833+
benchmark_unique_nested()
834+
835+
761836
@click.command()
762837
@click.pass_context
763838
def run_all(ctx) -> None:
@@ -792,6 +867,7 @@ def main() -> None:
792867
cli.add_command(comparison_benchmark)
793868
cli.add_command(call_benchmark)
794869
cli.add_command(nested_benchmark)
870+
cli.add_command(unique_nested_benchmark)
795871
cli()
796872

797873

0 commit comments

Comments
 (0)