I'm practicing problems for the ICPC competition, and one of the problems requires solving it by using an FFT to compute the product of two polynomials efficiently. Since this is for the ICPC competition, I have to implement the FFT from scratch and can only use the standard libraries (numpy
is not allowed). I've arrived at this solution so far, but it's still too slow. Any tips on how I can improve its performance?
@cache
def reverse(num, lg_n):
res = 0
for i in range(lg_n):
if num & (1 << i):
res |= 2 ** (lg_n - 1 - i)
return res
def fft_inplace(p: list[complex], inverse: bool = False) -> None:
n = len(p)
lg_n = ceil(log2(n))
for i in range(n):
rev = reverse(i, lg_n)
if i < rev:
p[i], p[rev] = p[rev], p[i]
i = 2
while i <= n:
ang = 2 * pi / i * (1 if inverse else -1)
w = exp(ang * 1j)
for j in range(0, n, i):
current_w = 1
for k in range(i // 2):
u = p[j + k]
v = p[j + k + i // 2] * current_w
p[j + k] = u + v
p[j + k + i // 2] = u - v
current_w *= w
i *= 2
if inverse:
for i in range(n):
p[i] /= n
def multiply_polynomials(p: list[float], q: list[float]) -> list[float]:
"""Calcula o produto de dois polinômios"""
fft_inplace(p) # type: ignore
fft_inplace(q) # type: ignore
r = [a * b for a, b in zip(p, q)]
fft_inplace(r, inverse=True) # type: ignore
return [val.real for val in r]
This is the flamegraph from running cProfile
: