1. 猴子技术宅首页
  2. php干货教程

[PHP教程] AC自动机 分享


能高效地匹配字符串,具体原理就不搬了,这边给出PHP的实现代码:

<?php class AcAutomation {     private $root;      public function __construct($keywords = array())     {         $this->root = $this->createNode();         foreach ($keywords as $keyword) {             $this->addKeyword($keyword);         }         $this->buildFailIndex();     }      /**      * 创建节点      * @param string $value      * @return stdClass      */     private function createNode($value = "")     {         $node = new stdClass();         $node->value = $value;         $node->next  = array();         $node->fail  = NULL;         $node->len   = 0;         // keyword长度,0表示非末尾,非0表示keyword的长度         return $node;     }       /**      * 添加关键词      * @param $keyword      */     private function addKeyword($keyword)     {         $keyword = trim($keyword);         if (!$keyword) {             return;         }         $cur = $this->root;         $matches = unpack('N*',iconv('UTF-8', 'UCS-4', strtolower($keyword)));         for ($i = 1; isset($matches[$i]); $i++) {             $v = $matches[$i];             if (!isset($cur->next[$v])) {                 $node = $this->createNode($v);                 $cur->next[$v] = $node;             }             if (!isset($matches[$i+1])) {                 $cur->next[$v]->len = $i;             }             $cur = $cur->next[$v];         }     }      /**      * 构造fail指针      */     private function buildFailIndex()     {         $queue = array();         foreach ($this->root->next as $node) {             $node->fail = $this->root;             $queue[] = $node;         }         while ($queue) {             $node = array_shift($queue);             foreach ($node->next as $child_node) {                 $val = $child_node->value;                 $p = $node->fail;                 while ($p != NULL) {                     if (isset($p->next[$val])) {                         $child_node->fail = $p->next[$val];                         break;                     }                     $p = $p->fail;                 }                 if ($p === NULL) {                     $child_node->fail = $this->root;                 }                 $queue[] = $child_node;             }         }     }      /**      * 搜索      * @param $content      * @return array      */     public function search($content)     {         $pos_list = array();         $p = $this->root;          $matches = unpack('N*',iconv('UTF-8', 'UCS-4', strtolower($content)));         for ($i = 1; isset($matches[$i]); $i++) {             $val = $matches[$i];             while (!isset($p->next[$val]) && $p != $this->root) {                 $p = $p->fail;             }             $p = isset($p->next[$val]) ? $p->next[$val] : $this->root;             $temp = $p;             while ($temp != $this->root) {                 if ($temp->len) {                     $pos_list[] = array($i - $temp->len, $i);                 }                 $temp = $temp->fail;             }         }         return $pos_list;     } } 

Example:

<?php $content = file_get_contents(dirname(__FILE__).'/content.txt'); $keywords = array('复杂度','个数','返回','代表','性能','数据','GET','安全性','耗时','优化','文档','并发','类型'); $ac = new AcAutomation($keywords); $res = $ac->search($content); foreach ($res as $pos) {     echo mb_substr($content,$pos[0],$pos[1] - $pos[0],'UTF-8'),PHP_EOL; } 

发表评论

电子邮件地址不会被公开。 必填项已用*标注