Extended Knuth-Morris-Pratt Algorithm
模板
- 参数调整:MAXN是字符串的理论最大长度
- 调用:exKMP_getextend(char* 母串,char* 模式串),会同时处理好next数组和extend数组,数据间无需初始化。
- exKMP_next[i]表示模式串mode[i~(len-1)]与整个模式串的最长公共前缀长度
- exKMP_extend[i]表示母串str[i~(len-1)]于整个模式串的最长公共前缀长度
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
#pragma mark - Extended Knuth-Morris-Pratt Algorithm (exKMP) const int MAXN = 100010; int exKMP_next[MAXN],exKMP_extend[MAXN]; void exKMP_getnext(char* str){ //Preproccess mode string //next array indicates the public prefix of str[0~n-1] and str[i~n-1] //相当于自己和自己匹配的exKMP int len = strlen(str); exKMP_next[0] = len; //get next[1] specially int pos = 0; while(pos+1 < len && str[pos] == str[pos+1]){ pos++; } exKMP_next[1] = pos; pos = 1; for(int i = 2; i < len; i++){ if(exKMP_next[i - pos] + i < exKMP_next[pos] + pos){ //situ. 1 exKMP_next[i] = exKMP_next[i - pos]; }else{ //situ. 2 int j = exKMP_next[pos] + pos - i; if(j < 0){ j = 0; } while(i + j < len && str[i+j] == str[j]){ j++; } exKMP_next[i] = j; pos = i; } } } void exKMP_getextend(char *str,char *mode){ int len = strlen(str),lenmode = strlen(mode); exKMP_getnext(mode); int pos = 0; while(pos < len && pos < lenmode && str[pos] == mode[pos]){ pos++; } exKMP_extend[0] = pos; pos = 0;//pos是最近一次成功匹配的开始位置 for(int i = 1; i < len; i++){ if(exKMP_next[i - pos] + i < exKMP_extend[pos] + pos){ //situ.1 模式串的等价前置状态中,等价区没有超过当前匹配范围 exKMP_extend[i] = exKMP_next[i - pos]; }else{ //situ.2 模式串的等价前置状态中等价去超出了当前已经确认的范围 int j = exKMP_extend[pos] + pos - i; if(j < 0){ j = 0; } while(i + j < len && j < lenmode && str[j+i] == mode[j]){ j++; } exKMP_extend[i] = j; pos = i; } } } #pragma mark - |
题目
Clairewd’s message
Aho-Corasick Automaton
模板
- 参数调整:
- SP:字符集大小
- MAXN:理论上Tire树的节点上限
- mark():用来将字符映射到SP规定的字符集大小内的函数
- ACA_match():需要按照题目要求更改匹配时行为
- 调用:
- 初始化:init()
- 插入模式串:insert()
- 构建fail指针:ACA_buildfail()
- 匹配母串:ACA_match()
- 活用注记:
- root表示没有匹配到任何字符,如果超出字符集,也要转移到root
- 每一个节点的fail指针对应节点是等价的更早状态(包括root状态)
- 每个节点指向失配的出边实际上指向的是其fail指针对应状态中对应出边指向的状态
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
#pragma mark - AC Automaton const int SP = 26; const int MAXN = 600005; struct Trie{ int nxt[MAXN][SP]; int fail[MAXN]; int freq[MAXN]; int last[MAXN];//优化,last指向的是当前状态的前缀里面能够配上字符串的状态,而不是所有前缀 int root,tot; inline int mark(char a){ return a - 'a'; } void init(){ tot = 0; root = newnode(); } int newnode(){ for(int i = 0; i < SP; i++){ nxt[tot][i] = -1; } freq[tot] = 0; fail[tot] = -1; return tot++; } void insert(char* str){ int len = (int)strlen(str); int now = root; for(int i = 0; i < len; i++){ if(nxt[now][mark(str[i])] == -1){ nxt[now][mark(str[i])] = newnode(); } now = nxt[now][mark(str[i])]; } freq[now]++; } void ACA_buildfail(){ queue<int> que; fail[root] = root; last[root] = root; for(int i = 0; i < SP; i++){ if(nxt[root][i] == -1){ nxt[root][i] = root; }else{ fail[nxt[root][i]] = root; last[nxt[root][i]] = root; que.push(nxt[root][i]); } } while(!que.empty()){ int now = que.front(); que.pop(); for(int i = 0; i < SP; i++){ if(nxt[now][i] == -1){ nxt[now][i] = nxt[fail[now]][i]; }else{ fail[nxt[now][i]] = nxt[fail[now]][i]; last[nxt[now][i]] = freq[nxt[fail[now]][i]]?nxt[fail[now]][i]:last[nxt[fail[now]][i]]; que.push(nxt[now][i]); } } } } int ACA_match(char* str){ int len = (int)strlen(str); int now = root; int res = 0; for(int i = 0; i < len; i++){ now = nxt[now][mark(str[i])]; int temp = now; while(temp != root){ //Proccess matching here //res += freq[temp]; //freq[temp] = 0;//防止重复,but this will damage aca,try alternative //temp = last[temp]; } } return res; } }; #pragma mark - |
题目
[Pending]
Suffix Automaton
模板1~指针实现,动态分配内存
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
struct SAM_State{ int mxn; SAM_State *to[26]; SAM_State *parent; SAM_State(int _mxn){ mxn = _mxn; memset(to, 0, sizeof(to)); parent = NULL; } }; SAM_State* SAM_Append(SAM_State *last,SAM_State *root,int ch){ SAM_State *p = last; SAM_State *np = new SAM_State(p -> mxn + 1); while (p && p -> to[ch] == NULL) { p -> to[ch] = np; p = p -> parent; } if (p != NULL) { SAM_State *q = p -> to[ch]; if (q -> mxn == p -> mxn + 1) { np -> parent = q; }else{ SAM_State *nq = new SAM_State(p -> mxn + 1); nq -> parent = q -> parent; q -> parent = nq; np -> parent = nq; memcpy(nq -> to, q -> to, sizeof(q -> to)); while (p && p -> to[ch] == q) { p -> to[ch] = nq; p = p -> parent; } } }else{ np -> parent = root; } last = np; return last; } SAM_State* SAM_Build(string str){ SAM_State *root = new SAM_State(0); SAM_State *last = root; for (int i = 0; i < str.size(); i++) { last = SAM_Append(last, root, str[i] - 'a'); } return root; } |
模板2~数组实现,支持删除后缀
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
const int MAXN = 510005; struct SAM_Node{ int mxn; int to[26]; int parent; int del; int pos; int endflg; void init(){ mxn = 0; parent = -1; del = 0; pos = -1; endflg = 0; memset(to,-1,sizeof(to)); } } pool[MAXN*2]; int stempos[MAXN]; bool delflag[MAXN]; int SAM_tot; int del_tot; int dfs_tot; void SAM_Init(){ pool[0].init(); SAM_tot = 1; del_tot = 1; dfs_tot = 0; delflag[0] = false; } int SAM_Append(int last,int ch){ int p = last; int np = SAM_tot++; pool[np].init(); pool[np].mxn = pool[p].mxn + 1; pool[np].del = del_tot++; delflag[pool[np].del] = false; pool[np].pos = pool[np].mxn - 1; stempos[pool[np].pos] = np; while(p != -1 && (pool[p].to[ch] == -1 || delflag[pool[pool[p].to[ch]].del])){ pool[p].to[ch] = np; p = pool[p].parent; } if(p != -1){ int q = pool[p].to[ch]; if(pool[q].mxn == pool[p].mxn + 1){ pool[np].parent = q; }else{ int nq = SAM_tot++; pool[nq] = pool[q]; pool[nq].mxn = pool[p].mxn + 1; pool[q].parent = nq; pool[np].parent = nq; while(p != -1 && pool[p].to[ch] == q){ pool[p].to[ch] = nq; p = pool[p].parent; } } }else{ pool[np].parent = 0; } return np; } int len,last,qlen; char str[MAXN]; void suffix_delete(int siz){ int i,j; for(i = len-1,j = 1; j <= siz; i--,j++){ delflag[pool[stempos[i]].del] = true; } len -= siz; last = stempos[i]; } void suffix_append(){ int apdlen = strlen(str); for(int i = 0; i < apdlen; i++){ last = SAM_Append(last,str[i]-'a'); } len += apdlen; } |