本文实例讲述了php正则替换处理HTML页面的方法。分享给大家供大家参考。具体如下:<?
本文实例讲述了php正则替换处理HTML页面的方法。分享给大家供大家参考。具体如下:
<?php
if(!defined('BASEPATH')) exit('No direct script access allowed');
/**
* HTML替换处理类,考虑如下几种替换
* 1. img src : '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
* 2. a href : '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i'
* 3. ifram.src : '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
* 4. frame src : '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
* 5. js : '/window.open([( ]+?)([\'" ]+?)(.+?)([ )+?])/i'
* 6. css : '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i'
*/
class Myreplace {
private $moudle_array = array('udata','tdata','tresult','dresult');
private $content;
private $relative_dirname;
private $projectid;
private $moudle;
function __construct() {
$this->CI = &get_instance ();
}
/**
* 替换
* @param string $content HTML内容
* @param string $relative 相对路径
* @param int $projectid 项目id
* @moudle string $moudle 模板标识: udata,tdata,tresult,dresult
*/
public function my_replace($content,$relative,$projectid,$moudle) {
$this->content = $content;
$this->relative_dirname = $relative;
$this->projectid = $projectid;
if(in_array(strtolower($moudle),$this->moudle_array))
$this->moudle = $moudle;
else exit;
switch($this->moudle) {
case 'udata':
$this->CI->load->model('mupload_data','model');
break;
case 'tdata':
$this->CI->load->model('taskdata','model');
break;
case 'tresult':
$this->CI->load->model('taskresult','model');
break;
case 'dresult':
$this->CI->load->model('dmsresult','model');
break;
default:
break;
}
$pattern = '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
$content = preg_replace_callback( $pattern, array($this, 'image_replace') , $content );
$pattern = '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i';
$content = preg_replace_callback( $pattern, array($this, 'html_replace') , $content );
$pattern = '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
$content = preg_replace_callback( $pattern, array($this, 'iframe_replace') , $content );
$pattern = '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
$content = preg_replace_callback( $pattern, array($this, 'frame_replace'), $content );
$pattern = '/window.open([( ]+?)([\'" ]+?)(.+?)([ )]+?)/i';
$content = preg_replace_callback( $pattern, array($this, 'js_replace'), $content );
$pattern = '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i';
$content = preg_replace_callback( $pattern, array($this, 'css_replace'), $content);
return $content;
}
private function image_replace($matches) {
if(count($matches) < 4) return '';
if( empty($matches[3]) ) return '';
$matches[3] = rtrim($matches[3],'\'"/');
//获取图片的id
$parent_dir_num = substr_count( $matches[3], '../');
$relative_dirname = $this->relative_dirname;
for($i=0; $i<$parent_dir_num; $i++) {
$relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
}
$relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
$image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if( !empty($image_id) ) {
if($this->moudle == 'dresult') {
return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
} else {
return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
}
} else {
return "<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
}
}
private function html_replace( $matches ) {
if(count($matches) < 4) return '';
if( empty($matches[3]) ) return '';
//如果href的链接($matches[3])以http或www或mailto开始,则不进行处理
//if(preg_match('/^[http|www|mailto](.+?)/i',$matches[3]))
// return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4];
$matches[3] = rtrim($matches[3],'\'"/');
//处理锚点
if(substr_count($matches[3],'#')>0)
$matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
//获取html的id
$parent_dir_num = substr_count( $matches[3], '../');
$relative_dirname = $this->relative_dirname;
for($i=0; $i<$parent_dir_num; $i++) {
$relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
}
$relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
$txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if( !empty($txtfile_id ) ) {
if($this->moudle == 'dresult') {
return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
} else {
return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
}
} else {
return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4];
}
}
private function iframe_replace( $matches ) {
if(count($matches) < 4) return '';
if( empty($matches[3]) ) return '';
$matches[3] = rtrim($matches[3],'\'"/');
//处理锚点
if(substr_count($matches[3],'#')>0)
$matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
//获取html的id
$parent_dir_num = substr_count( $matches[3], '../');
$relative_dirname = $this->relative_dirname;
for($i=0; $i<$parent_dir_num; $i++) {
$relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
}
$relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
$txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if( !empty($txtfile_id ) ) {
if($this->moudle == 'dresult') {
return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
} else {
return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
}
} else {
return "<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
}
}
private function frame_replace( $matches ) {
if(count($matches) < 4) return '';
if( empty($matches[3]) ) return '';
$matches[3] = rtrim($matches[3],'\'"/');
//处理锚点
if(substr_count($matches[3],'#')>0)
$matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
//获取html的id
$parent_dir_num = substr_count( $matches[3], '../');
$relative_dirname = $this->relative_dirname;
for($i=0; $i<$parent_dir_num; $i++) {
$relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
}
$relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
$txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if( !empty($txtfile_id ) ) {
if($this->moudle == 'dresult') {
return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
} else {
return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
}
} else {
return "<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
}
}
private function js_replace( $matches ){
if(count($matches) < 4) return '';
if( empty($matches[3]) ) return '';
//处理链接
$arr_html = split(',',$matches[3]);
$href = $arr_html[0];
$other = '';
for($i=0; $i<count($arr_html); $i++)
$other = $arr_html[$i].", ";
$other = rtrim($other,"\, ");
$href =rtrim($href,'\'\"');
//处理锚点
if(substr_count($href,'#')>0)
return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];;
//获取html的id
$parent_dir_num = substr_count( $href, '../');
$relative_dirname = $this->relative_dirname;
for($i=0; $i<$parent_dir_num; $i++) {
$relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
}
$relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($href,'./');
$txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if( !empty($txtfile_id ) ) {
if($this->moudle == 'dresult') {
return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
} else {
return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
}
} else {
return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];
}
}
private function css_replace( $matches ) {
if(count($matches) < 5) return '';
if( empty($matches[4]) ) return '';
$matches[4] = rtrim($matches[4],'\'"/');
//获取图片的id
$parent_dir_num = substr_count( $matches[4], '../');
$relative_dirname = $this->relative_dirname;
for($i=0; $i<$parent_dir_num; $i++) {
$relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
}
$relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[4],'./');
$image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
//输出
if( !empty($image_id) ) {
if($this->moudle == 'dresult') {
return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
} else {
return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
}
} else {
return "background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5];
}
}
}
/* End of Myreplace.php */
/* Location: /application/libraries/Myreplace.php */
PS:这里再为大家提供2款非常方便的正则表达式工具供大家参考使用:
JavaScript正则表达式在线测试工具: http://tools.jb51.net/regex/javascript
正则表达式在线生成工具: http://tools.jb51.net/regex/create_reg
希望本文所述对大家的php程序设计有所帮助。
php 正则替换 HTML页面