首先考虑在序列上是怎么做 KMP 的。→ 远古题解(写的什么破烂东西)。
大致就是记录一个 nxt 数组,表示前后缀最大匹配长度。对于该数组的计算,不断跳 nxt 找候选项,匹配同理。
考虑多串匹配要怎么做。
- 多串,首先想到建 Trie 树然后在上面搞一些什么科技。
- 匹配,联想到 KMP。
考虑在 Trie 树上每个节点 i 记一个 faili,含义和 nxt 类似:找到一个深度最深的节点 j 满足根到 j 的字符串是到 i 字符串的一段后缀。
那就类似于 KMP,在 Trie 上不断失配,不断跳 fail 匹配。
另外建 Trie 图会比较好写。
如果 v=sonu,i 存在,那么 failv=sonfailu,i;如果 v 不存在,那么 sonu,i=sonfailu,i。
另外要使用 BFS,因为求 u 的信息需要先得知 failu 的信息,然而 failu 深度一定小于 u(是一段后缀,深度对应字符串长度)。
洛谷 P3808 AC 自动机(简单版)
#include <bits/stdc++.h>
using namespace std;
const int N = 1e6 + 5;
int n;
char s[N];
int son[N][26], cnt[N], fail[N], tot = 0;
bool st[N];
void insert() {
int len = strlen(s + 1), u = 0;
for (int i = 1; i <= len; i++) {
if (!son[u][s[i] - 'a']) son[u][s[i] - 'a'] = ++tot;
u = son[u][s[i] - 'a'];
}
cnt[u]++;
}
queue<int> q;
void build() {
for (int i = 0; i < 26; i++)
if (son[0][i]) q.push(son[0][i]), fail[son[0][i]] = 0;
while (q.size()) {
int u = q.front(); q.pop();
for (int i = 0; i < 26; i++) {
int v = son[u][i];
if (v) fail[v] = son[fail[u]][i], q.push(v);
else son[u][i] = son[fail[u]][i];
}
}
}
int ans = 0;
void query() {
int len = strlen(s + 1), u = 0;
for (int i = 1; i <= len; i++) {
u = son[u][s[i] - 'a'];
for (int j = u; j && !st[j]; j = fail[j]) ans += cnt[j], st[j] = 1;
}
}
int main() {
scanf("%d", &n);
for (int i = 1; i <= n; i++) scanf("%s", s + 1), insert();
build();
scanf("%s", s + 1), query();
printf("%d\n", ans);
return 0;
}
洛谷 P3796 AC 自动机(简单版 II)
#include <bits/stdc++.h>
using namespace std;
const int N = 1e6 + 5;
int n, ap[155];
char s[N], S[155][75];
int son[N][26], cnt[N], fail[N], tot = 0;
void insert(int id) {
int len = strlen(S[id] + 1), u = 0;
for (int i = 1; i <= len; i++) {
if (!son[u][S[id][i] - 'a']) son[u][S[id][i] - 'a'] = ++tot;
u = son[u][S[id][i] - 'a'];
}
cnt[u] = id;
}
queue<int> q;
void build() {
for (int i = 0; i < 26; i++)
if (son[0][i]) q.push(son[0][i]), fail[son[0][i]] = 0;
while (q.size()) {
int u = q.front(); q.pop();
for (int i = 0; i < 26; i++) {
int v = son[u][i];
if (v) fail[v] = son[fail[u]][i], q.push(v);
else son[u][i] = son[fail[u]][i];
}
}
}
void query() {
int len = strlen(s + 1), u = 0;
for (int i = 1; i <= len; i++) {
u = son[u][s[i] - 'a'];
for (int j = u; j; j = fail[j]) if (cnt[j]) ap[cnt[j]]++;
}
}
void solve() {
for (int i = 0; i <= tot; i++) {
fail[i] = cnt[i] = 0;
for (int j = 0; j < 26; j++) son[i][j] = 0;
} tot = 0;
for (int i = 1; i <= n; i++) scanf("%s", S[i] + 1), insert(i);
build();
scanf("%s", s + 1), query();
int Max = 0;
for (int i = 1; i <= n; i++) Max = max(Max, ap[i]);
printf("%d\n", Max);
for (int i = 1; i <= n; i++)
if (Max == ap[i]) cout << (S[i] + 1) << endl;
for (int i = 1; i <= n; i++) ap[i] = 0;
}
int main() {
while (1) {
scanf("%d", &n);
if (!n) break;
solve();
}
return 0;
}
洛谷 P5357 【模板】AC 自动机
以此祭天。
void query() {
int len = strlen(s + 1), u = 0;
for (int i = 1; i <= len; i++) {
u = son[u][s[i] - 'a'];
for (int j = u; j; j = fail[j]) ap[j]++;
}
}
当然,你要这么写也没问题,但是这么跳 fail 遇到 aaaaaaaaaa
之类的会被卡掉。
所以需要在 fail 树上 dfs 一下求子树和。
#include <bits/stdc++.h>
using namespace std;
const int N = 2e5 + 5, M = 2e6 + 5;
int n, ap[N];
char s[M];
int son[N][26], fail[N], tot = 0;
int id[N];
void insert(int id) {
int len = strlen(s + 1), u = 0;
for (int i = 1; i <= len; i++) {
if (!son[u][s[i] - 'a']) son[u][s[i] - 'a'] = ++tot;
u = son[u][s[i] - 'a'];
}
::id[id] = u;
}
queue<int> q;
void build() {
for (int i = 0; i < 26; i++)
if (son[0][i]) q.push(son[0][i]), fail[son[0][i]] = 0;
while (q.size()) {
int u = q.front(); q.pop();
for (int i = 0; i < 26; i++) {
int v = son[u][i];
if (v) fail[v] = son[fail[u]][i], q.push(v);
else son[u][i] = son[fail[u]][i];
}
}
}
void query() {
int len = strlen(s + 1), u = 0;
for (int i = 1; i <= len; i++) u = son[u][s[i] - 'a'], ap[u]++;
}
int h[N], e[M], ne[M], idx = 0;
void add(int a, int b) { e[idx] = b, ne[idx] = h[a], h[a] = idx++; }
void dfs(int u) {
for (int i = h[u]; ~i; i = ne[i]) {
int v = e[i]; dfs(v), ap[u] += ap[v];
}
}
void solve() {
for (int i = 0; i <= tot; i++) {
fail[i] = ap[i] = 0;
for (int j = 0; j < 26; j++) son[i][j] = 0;
} tot = 0;
scanf("%d", &n);
for (int i = 1; i <= n; i++) scanf("%s", s + 1), insert(i);
build();
scanf("%s", s + 1), query();
for (int i = 0; i <= tot; i++) h[i] = -1; idx = 0;
for (int i = 1; i <= tot; i++) add(fail[i], i);
dfs(0);
for (int i = 1; i <= n; i++) printf("%d\n", ap[id[i]]), id[i] = 0;
}
int main() {
solve();
return 0;
}