题解 P4191 【[CTSC2010]性能优化】

· · 题解

题意:

给定长度为 n 的多项式 A, B

A * B^C 循环卷积的结果,模质数 n+1

首先要会 \mathrm{FFT},不仅要会写,而且要懂原理

F(x)=a_0+a_1x+a_2x^2+\dots+a_{n-1}x^{n-1} 的系数分成两部分

F^{[0]}(x)=a_0+a_2x+\dots+a_{n-2}x^{\frac{n-2}2} F^{[1]}(x)=a_1+a_3x+\dots+a_{n-1}x^{\frac{n-2}2} F(x)=F^{[0]}(x^2)+xF^{[1]}(x^2)

n 次单位根代入

F(\omega_n^i)=F^{[0]}(\omega_n^{2i})+\omega_n^iF^{[1]}(\omega_n^{2i})

由于单位根具有消去律

F(\omega_n^i)=F^{[0]}(\omega_{\frac n2}^i)+\omega_n^iF^{[1]}(\omega_{\frac n2}^i)

然后就可以分治了

这题也是类似,考虑将 n 分解质因数,然后将 n 不断地分成 p 块,这样才能保证复杂度

具体地:

F(x)=a_0+a_1x+a_2x^2+\dots+a_{n-1}x^{n-1} F^{[0]}=a_0+a_px+\dots+a_{n-p}x^{\frac {n-p}p} F^{[i]}=a_i+a_{p+i}x+\dots+a_{n-p+i}x^{\frac{n-p}p} F(\omega_n^i)=F^{[0]}(\omega_{\frac np}^i)+\omega_n^iF^{[1]}(\omega_{\frac np}^i)+\dots+\omega_n^{i(p-1)}F^{[p-1]}(\omega_{\frac np}^i)

可以开始分治

注意此时如果要写迭代版的 \mathrm{FFT},需要注意的是 \mathrm{rev} 数组可能比较特殊

代码: ```cpp // luogu-judger-enable-o2 #include <iostream> #include <algorithm> const int N = 500000; using LL = long long; int n, m, wn[N], primes[N], power[N], prime_tot, mod; int pow(int x, int y) { int ans = 1; for (; y; y >>= 1, x = (LL) x * x % mod) if (y & 1) ans = (LL) ans * x % mod; return ans; } void reduce(int &x) { x += x >> 31 & mod; } void factor(int n) { for (int i = 2; i * i <= n; ++i) if (n % i == 0) n /= i, primes[prime_tot++] = i--; if (n > 1) primes[prime_tot++] = n; } int primitive_root() { for (int i = 2; ; ++i) { bool flag = 1; for (int j = 0; j < prime_tot; ++j) if (pow(i, n / primes[j]) == 1) { flag = 0; break; } if (flag) return i; } } int tmp[N]; void reverse(int *A) { for (int i = prime_tot - 1, block = n; ~i; --i) { for (int idx = 0, k = 0; k < n; k += block) for (int j = 0; j < primes[i]; ++j) for (int l = 0; l < block; l += primes[i]) tmp[idx++] = A[k + j + l]; for (int k = 0; k < n; ++k) A[k] = tmp[k]; block /= primes[i]; } } void dft(int *A, int typ) { reverse(A); for (int i = 0, block = 1; i < prime_tot; ++i) { const int mid = block, wi = wn[n / (block *= primes[i])]; for (int j = 0; j < n; ++j) tmp[j] = 0; for (int j = 0; j < n; j += block) { int wk = 1; for (int k = 0; k < block; ++k) { for (int l = k % mid, w = 1; l < block; l += mid, w = (LL) w * wk % mod) reduce(tmp[j + k] += (LL) w * A[j + l] % mod - mod); wk = (LL) wk * wi % mod; } } for (int j = 0; j < n; ++j) A[j] = tmp[j]; } if (!typ) { std::reverse(A + 1, A + n); for (int i = 0; i < n; ++i) A[i] = (LL) A[i] * n % mod; } } int a[N], b[N], rk[N]; int main() { std::ios::sync_with_stdio(0), std::cin.tie(0); std::cin >> n >> m, mod = n + 1, factor(n); wn[0] = 1, wn[1] = primitive_root(); for (int i = 2; i < n; ++i) wn[i] = (LL) wn[i - 1] * wn[1] % mod; for (int i = 0; i < n; ++i) std::cin >> a[i]; for (int i = 0; i < n; ++i) std::cin >> b[i]; dft(a, 1), dft(b, 1); for (int i = 0; i < n; ++i) a[i] = (LL) a[i] * pow(b[i], m) % mod; dft(a, 0); for (int i = 0; i < n; ++i) std::cout << a[i] << '\n'; return 0; } ```