Clasele 11-12 lecția 4 - 23 oct 2015
From Algopedia
Jump to navigationJump to search
Aho-Corasick
#include <cstdio>
#include <queue>
const int SIGMA = 3; // {a, b, c}
const char FIRST_LETTER = 'a';
class TrieNode { // nod al trie-ului
public:
TrieNode* next[SIGMA];
//vector<pair<char, TrieNode*> > next;
int contor;
TrieNode* failLink;
TrieNode* outputLink;
int occurences;
void insert(char* word);
int query(char* word);
bool erase(char* word);
void dump(char letter, int depth);
};
void TrieNode::insert(char* word) {
if (*word == 0) {
this->contor++;
} else {
if (this->next[*word - FIRST_LETTER] == NULL) {
this->next[*word - FIRST_LETTER] = new TrieNode;
}
this->next[*word - FIRST_LETTER]->insert(word + 1);
}
}
int TrieNode::query(char* word) {
if (*word == 0) {
return this->contor;
} else {
if (this->next[*word - FIRST_LETTER] == NULL) {
return 0;
} else {
return this->next[*word - FIRST_LETTER]->query(word + 1);
}
}
}
bool TrieNode::erase(char* word) {
if (*word == 0) {
if (this->contor == 0) {
return false;
} else {
this->contor--;
return true;
}
} else {
if (this->next[*word - FIRST_LETTER] == NULL) {
return false;
} else {
bool answer = this->next[*word - FIRST_LETTER]->erase(word + 1);
if (answer) {
TrieNode* mySon = this->next[*word - FIRST_LETTER];
bool toDelete = mySon->contor == 0;
for (int i = 0; i < SIGMA; i++) {
toDelete &= mySon->next[i] == NULL;
}
if (toDelete) {
delete mySon;
this->next[*word - FIRST_LETTER] = NULL;
}
}
return answer;
}
}
}
void TrieNode::dump(char letter = '-', int depth = 0) {
for (int i = 0; i < depth; ++i) {
printf(" ");
}
printf("%c %d %p (Fail: %p, Output: %p)\n", letter, this->contor,
this, this->failLink, this->outputLink);
for (int i = 0; i < SIGMA; ++i) {
if (this->next[i] != NULL)
this->next[i]->dump(i + FIRST_LETTER, depth + 1);
}
}
class Trie {
private:
TrieNode* root;
public:
Trie() {
this->root = new TrieNode;
}
void insert(char* word) {
this->root->insert(word);
}
int query(char* word) {
return this->root->query(word);
}
bool erase(char* word) {
return this->root->erase(word);
}
void ahoPreprocessing() {
std::queue<TrieNode*> bfsQueue;
this->root->failLink = this->root;
this->root->outputLink = this->root;
for (int i = 0; i < SIGMA; i++) { // primul nivel
if (this->root->next[i] != NULL) {
this->root->next[i]->failLink = this->root;
this->root->next[i]->outputLink = this->root;
bfsQueue.push(this->root->next[i]);
}
}
while (!bfsQueue.empty()) {
TrieNode* node = bfsQueue.front();
bfsQueue.pop();
for (int i = 0; i < SIGMA; i++) {
if (node->next[i] != NULL) {
TrieNode* son = node->next[i];
TrieNode* tmp = node->failLink;
while (tmp->next[i] == NULL && tmp != this->root) {
tmp = tmp->failLink;
}
if (tmp->next[i] != NULL) {
son->failLink = tmp->next[i];
} else {
son->failLink = tmp; // this->root
}
if (son->failLink->contor > 0) {
son->outputLink = son->failLink;
} else {
son->outputLink = son->failLink->outputLink;
}
bfsQueue.push(son);
}
}
}
}
void dump() {
this->root->dump();
}
void findInPattern(char* word) {
TrieNode* currentNode = this->root;
while (*word != 0) {
while (currentNode->next[*word - FIRST_LETTER] == NULL
&& currentNode != this->root) {
currentNode = currentNode->failLink;
}
if (currentNode->next[*word - FIRST_LETTER] != NULL) {
currentNode = currentNode->next[*word - FIRST_LETTER];
} // else currentNode == this->root
word++;
//printf("%s == %p\n", word, currentNode);
currentNode->occurences++;
/*
if (currentNode->contor > 0) {
// aparitia cuvantului;
printf("%p == %s\n", currentNode, word);
}
TrieNode* outputNode = currentNode->outputLink;
while (outputNode != this->root) {
// aparitia cuvantului;
printf("%p == %s\n", outputNode, word);
outputNode = outputNode->outputLink;
}//*/
}
}
};
int main(void) {
Trie T;
T.insert("abc");
T.insert("bac");
T.insert("ab");
T.insert("acc");
T.insert("bcc");
T.insert("b");
T.ahoPreprocessing();
T.dump();
T.findInPattern("abccbacc");
//abccbacc
//O(N * SIGMA)
//O(N)
return 0;
}
Temă
Pentru data viitoare veți avea de rezolvat următoarele 4 probleme: