Clasele 11-12 lecția 4 - 23 oct 2015

From Algopedia
Jump to navigationJump to search

Aho-Corasick

#include <cstdio>
#include <queue>

const int SIGMA = 3; // {a, b, c}
const char FIRST_LETTER = 'a';

class TrieNode { // nod al trie-ului
public:
  TrieNode* next[SIGMA];
  //vector<pair<char, TrieNode*> > next;

  int contor;
  TrieNode* failLink;
  TrieNode* outputLink;
  int occurences;

  void insert(char* word);
  int query(char* word);
  bool erase(char* word);
  void dump(char letter, int depth);
};

void TrieNode::insert(char* word) {
  if (*word == 0) {
    this->contor++;
  } else {
    if (this->next[*word - FIRST_LETTER] == NULL) {
      this->next[*word - FIRST_LETTER] = new TrieNode;
    }
    this->next[*word - FIRST_LETTER]->insert(word + 1);
  }
}

int TrieNode::query(char* word) {
  if (*word == 0) {
    return this->contor;
  } else {
    if (this->next[*word - FIRST_LETTER] == NULL) {
      return 0;
    } else {
      return this->next[*word - FIRST_LETTER]->query(word + 1);
    }
  }
}

bool TrieNode::erase(char* word) {
  if (*word == 0) {
    if (this->contor == 0) {
      return false;
    } else {
      this->contor--;
      return true;
    }
  } else {
    if (this->next[*word - FIRST_LETTER] == NULL) {
      return false;
    } else {
      bool answer = this->next[*word - FIRST_LETTER]->erase(word + 1);
      if (answer) {
        TrieNode* mySon = this->next[*word - FIRST_LETTER];
        bool toDelete = mySon->contor == 0;
        for (int i = 0; i < SIGMA; i++) {
          toDelete &= mySon->next[i] == NULL;
        }
        if (toDelete) {
          delete mySon;
          this->next[*word - FIRST_LETTER] = NULL;
        }
      }
      return answer;
    }
  }
}

void TrieNode::dump(char letter = '-', int depth = 0) {
  for (int i = 0; i < depth; ++i) {
    printf("  ");
  }
  printf("%c %d %p (Fail: %p, Output: %p)\n", letter, this->contor,
      this, this->failLink, this->outputLink);
  for (int i = 0; i < SIGMA; ++i) {
    if (this->next[i] != NULL)
      this->next[i]->dump(i + FIRST_LETTER, depth + 1);
  }
}


class Trie {
private:
  TrieNode* root;

public:
  Trie() {
    this->root = new TrieNode;
  }

  void insert(char* word) {
    this->root->insert(word);
  }

  int query(char* word) {
    return this->root->query(word);
  }

  bool erase(char* word) {
    return this->root->erase(word);
  }

  void ahoPreprocessing() {
    std::queue<TrieNode*> bfsQueue;
    this->root->failLink = this->root;
    this->root->outputLink = this->root;
    for (int i = 0; i < SIGMA; i++) { // primul nivel
      if (this->root->next[i] != NULL) {
        this->root->next[i]->failLink = this->root;
        this->root->next[i]->outputLink = this->root;
        bfsQueue.push(this->root->next[i]);
      }
    }

    while (!bfsQueue.empty()) {
      TrieNode* node = bfsQueue.front();
      bfsQueue.pop();
      for (int i = 0; i < SIGMA; i++) {
        if (node->next[i] != NULL) {
          TrieNode* son = node->next[i];
          TrieNode* tmp = node->failLink;
          while (tmp->next[i] == NULL && tmp != this->root) {
            tmp = tmp->failLink;
          }
          if (tmp->next[i] != NULL) {
            son->failLink = tmp->next[i];
          } else {
            son->failLink = tmp; // this->root
          }
          if (son->failLink->contor > 0) {
            son->outputLink = son->failLink;
          } else {
            son->outputLink = son->failLink->outputLink;
          }

          bfsQueue.push(son);
        }
      }

    }
  }

  void dump() {
    this->root->dump();
  }

  void findInPattern(char* word) {
    TrieNode* currentNode = this->root;

    while (*word != 0) {
      while (currentNode->next[*word - FIRST_LETTER] == NULL
          && currentNode != this->root) {
        currentNode = currentNode->failLink;
      }
      if (currentNode->next[*word - FIRST_LETTER] != NULL) {
        currentNode = currentNode->next[*word - FIRST_LETTER];
      } // else currentNode == this->root
      word++;

      //printf("%s == %p\n", word, currentNode);

      currentNode->occurences++;
      /*
      if (currentNode->contor > 0) {
        // aparitia cuvantului;
        printf("%p == %s\n", currentNode, word);
      }
      TrieNode* outputNode = currentNode->outputLink;
      while (outputNode != this->root) {
        // aparitia cuvantului;
        printf("%p == %s\n", outputNode, word);
        outputNode = outputNode->outputLink;
      }//*/
    }
  }
};

int main(void) {
  Trie T;
  T.insert("abc");
  T.insert("bac");
  T.insert("ab");
  T.insert("acc");
  T.insert("bcc");
  T.insert("b");
  T.ahoPreprocessing();
  T.dump();
  T.findInPattern("abccbacc");

  //abccbacc
  //O(N * SIGMA)
  //O(N)
  return 0;
}

Temă

Pentru data viitoare veți avea de rezolvat următoarele 4 probleme: