Difference between revisions of "Clasele 11-12 lecția 4 - 23 oct 2015"
From Algopedia
Jump to navigationJump to search (→Temă) |
(No difference)
|
Latest revision as of 15:11, 31 October 2015
Aho-Corasick
#include <cstdio> #include <queue> const int SIGMA = 3; // {a, b, c} const char FIRST_LETTER = 'a'; class TrieNode { // nod al trie-ului public: TrieNode* next[SIGMA]; //vector<pair<char, TrieNode*> > next; int contor; TrieNode* failLink; TrieNode* outputLink; int occurences; void insert(char* word); int query(char* word); bool erase(char* word); void dump(char letter, int depth); }; void TrieNode::insert(char* word) { if (*word == 0) { this->contor++; } else { if (this->next[*word - FIRST_LETTER] == NULL) { this->next[*word - FIRST_LETTER] = new TrieNode; } this->next[*word - FIRST_LETTER]->insert(word + 1); } } int TrieNode::query(char* word) { if (*word == 0) { return this->contor; } else { if (this->next[*word - FIRST_LETTER] == NULL) { return 0; } else { return this->next[*word - FIRST_LETTER]->query(word + 1); } } } bool TrieNode::erase(char* word) { if (*word == 0) { if (this->contor == 0) { return false; } else { this->contor--; return true; } } else { if (this->next[*word - FIRST_LETTER] == NULL) { return false; } else { bool answer = this->next[*word - FIRST_LETTER]->erase(word + 1); if (answer) { TrieNode* mySon = this->next[*word - FIRST_LETTER]; bool toDelete = mySon->contor == 0; for (int i = 0; i < SIGMA; i++) { toDelete &= mySon->next[i] == NULL; } if (toDelete) { delete mySon; this->next[*word - FIRST_LETTER] = NULL; } } return answer; } } } void TrieNode::dump(char letter = '-', int depth = 0) { for (int i = 0; i < depth; ++i) { printf(" "); } printf("%c %d %p (Fail: %p, Output: %p)\n", letter, this->contor, this, this->failLink, this->outputLink); for (int i = 0; i < SIGMA; ++i) { if (this->next[i] != NULL) this->next[i]->dump(i + FIRST_LETTER, depth + 1); } } class Trie { private: TrieNode* root; public: Trie() { this->root = new TrieNode; } void insert(char* word) { this->root->insert(word); } int query(char* word) { return this->root->query(word); } bool erase(char* word) { return this->root->erase(word); } void ahoPreprocessing() { std::queue<TrieNode*> bfsQueue; this->root->failLink = this->root; this->root->outputLink = this->root; for (int i = 0; i < SIGMA; i++) { // primul nivel if (this->root->next[i] != NULL) { this->root->next[i]->failLink = this->root; this->root->next[i]->outputLink = this->root; bfsQueue.push(this->root->next[i]); } } while (!bfsQueue.empty()) { TrieNode* node = bfsQueue.front(); bfsQueue.pop(); for (int i = 0; i < SIGMA; i++) { if (node->next[i] != NULL) { TrieNode* son = node->next[i]; TrieNode* tmp = node->failLink; while (tmp->next[i] == NULL && tmp != this->root) { tmp = tmp->failLink; } if (tmp->next[i] != NULL) { son->failLink = tmp->next[i]; } else { son->failLink = tmp; // this->root } if (son->failLink->contor > 0) { son->outputLink = son->failLink; } else { son->outputLink = son->failLink->outputLink; } bfsQueue.push(son); } } } } void dump() { this->root->dump(); } void findInPattern(char* word) { TrieNode* currentNode = this->root; while (*word != 0) { while (currentNode->next[*word - FIRST_LETTER] == NULL && currentNode != this->root) { currentNode = currentNode->failLink; } if (currentNode->next[*word - FIRST_LETTER] != NULL) { currentNode = currentNode->next[*word - FIRST_LETTER]; } // else currentNode == this->root word++; //printf("%s == %p\n", word, currentNode); currentNode->occurences++; /* if (currentNode->contor > 0) { // aparitia cuvantului; printf("%p == %s\n", currentNode, word); } TrieNode* outputNode = currentNode->outputLink; while (outputNode != this->root) { // aparitia cuvantului; printf("%p == %s\n", outputNode, word); outputNode = outputNode->outputLink; }//*/ } } }; int main(void) { Trie T; T.insert("abc"); T.insert("bac"); T.insert("ab"); T.insert("acc"); T.insert("bcc"); T.insert("b"); T.ahoPreprocessing(); T.dump(); T.findInPattern("abccbacc"); //abccbacc //O(N * SIGMA) //O(N) return 0; }
Temă
Pentru data viitoare veți avea de rezolvat următoarele 4 probleme: