数据结构之$\color{#32CD32}{AC自动机}$
解决的问题
- 插入给定的 n 个长度不超过 50 的由小写英文字母组成的单词
- 输出给定文章中包含单词的个数
$AC自动机相当于在trie树上对于每个节点预处理出next[i]$
$在对文章进行搜索时,每次失败就跳到next[i]的地方,直到匹配成功$
首先对给定的单词插入trie
$ Code$
void insert()
{
int p = 0;
for (int i = 0; i < str[i]; i ++ )
{
int c = str[i] - 'a';
if (!trie[p][c])trie[p][c] = ++tot;
p = trie[p][c];
}
cnt[p]++;
}
$然后,用bfs递推出所有next的值$
$next[i] 表示 i号节点结尾的字符串的最大后缀(后最必须在树上能表示出)$
$根据父亲节点的next值,就能顺利推出公式:next[j] = trie[next[i]][ch] (j是i沿字符指针ch指向的儿子节点) $
$特别的,因为next[i]指向的是i的对应后缀,trie[next[i]][ch]可能为空$
$这样导致next[i]只能指向第二大的值,所以在遍历trie[next[i]][ch]时$
$给他特定赋值为父节点的next值的ch指针$
$Code$
void build()
{
int hh = 0, tt = -1;
for (int i = 0; i < 26; i ++ )
if (trie[0][i])
q[++tt] = trie[0][i];
while (hh <= tt)
{
int t = q[hh++];
for (int i = 0; i < 26; i ++ )
{
int p = trie[t][i];
if (!p)trie[t][i] = trie[ne[t]][i];
else
{
ne[p] = trie[ne[t]][i];
q[++tt] = p;
}
}
}
}
$最后,在查询文章时,枚举每一位字母$
$查找以当前字母为后缀的单词数量,就能统计出文章中单词出现的总数$
$Code$
int res = 0;
for (int i = 0, j = 0; str[i]; i ++ )
{
int c = str[i] - 'a';
j = trie[j][c];
int p = j;
while (p)
{
res += cnt[p];
p = ne[p];
}
}
$\color{#32CD32}{AC自动机Code}$
#include<bits/stdc++.h>
using namespace std;
const int N = 10010, M = 1000010, S = 55;
int n;
char str[M];
int trie[N * S][26], cnt[N * S], tot;
int q[N * S], ne[N * S];
void insert()
{
int p = 0;
for (int i = 0; i < str[i]; i ++ )
{
int c = str[i] - 'a';
if (!trie[p][c])trie[p][c] = ++tot;
p = trie[p][c];
}
cnt[p]++;
}
void build()
{
int hh = 0, tt = -1;
for (int i = 0; i < 26; i ++ )
if (trie[0][i])
q[++tt] = trie[0][i];
while (hh <= tt)
{
int t = q[hh++];
for (int i = 0; i < 26; i ++ )
{
int p = trie[t][i];
if (!p)trie[t][i] = trie[ne[t]][i];
else
{
ne[p] = trie[ne[t]][i];
q[++tt] = p;
}
}
}
}
int main()
{
scanf("%d", &n);
for (int i = 1; i <= n; i ++ )
{
scanf("%s", str);
insert();
}
build();
scanf("%s", str);
int res = 0;
for (int i = 0, j = 0; str[i]; i ++ )
{
int c = str[i] - 'a';
j = trie[j][c];
int p = j;
while (p)
{
res += cnt[p];
p = ne[p];
}
}
printf("%d\n", res);
return 0;
}
刻师傅牛啊
看私信
qpzc