php切割页面div内容的实现代码分享

亮点：
1、利用php也能实现对页面div的切割处理。这里的做法抛砖引玉，希望读者能够提供更加完美的解决方案。
2、切割处理方法已经封装成一个方法，可以直接引用。
3、顺便加上标签云的截取。//getWebDiv（"id="taglist"","http://www.jb51.net/tag/"）;
复制代码代码如下:
<？php
header（"Content-type: text/html; charset=utf-8"）;
function getWebDiv（$div_id,$url=false,$data=false）{
if（$url ！== false）{
$data = file_get_contents（ $url ）;
}
$charset_pos = stripos（$data,"charset"）;
if（$charset_pos） {
if（stripos（$data,"utf-8",$charset_pos）） {
$data = iconv（"utf-8","utf-8",$data）;
}else if（stripos（$data,"gb2312",$charset_pos）） {
$data = iconv（"gb2312","utf-8",$data）;
}else if（stripos（$data,"gbk",$charset_pos）） {
$data = iconv（"gbk","utf-8",$data）;
}
}
preg_match_all（"/<div/i",$data,$pre_matches,PREG_OFFSET_CAPTURE）; //获取所有div前缀
preg_match_all（"/</div/i",$data,$suf_matches,PREG_OFFSET_CAPTURE）; //获取所有div后缀
$hit = strpos（$data,$div_id）;
if（$hit == -1） return false; //未命中
$divs = array（）; //合并所有div
foreach（$pre_matches[0] as $index=>$pre_div）{
$divs[（int）$pre_div[1]] = "p";
$divs[（int）$suf_matches[0][$index][1]] = "s";
}
//对div进行排序
$sort = array_keys（$divs）;
asort（$sort）;
$count = count（$pre_matches[0]）;
foreach（$pre_matches[0] as $index=>$pre_div）{
//<div $hit <div+1 时div被命中
if（（$pre_matches[0][$index][1] < $hit） && （$hit < $pre_matches[0][$index+1][1]））{
$deeper = 0;
//弹出被命中div前的div
while（array_shift（$sort）！= $pre_matches[0][$index][1] && （$count--）） continue;
//对剩余div进行匹配，若下一个为前缀，则向下一层，$deeper加1，
//否则后退一层，$deeper减1，$deeper为0则命中匹配，计算div长度
foreach（$sort as $key）{
if（$divs[$key] == "p"） $deeper++;
else if（$deeper == 0） {
$length = $key-$pre_matches[0][$index][1];
break;
}else {
$deeper--;
}
}
$hitDivString = substr（$data,$pre_matches[0][$index][1],$length）."</div>";
break;
}
}
return $hitDivString;
}
echo getWebDiv（"id="taglist"","http://www.jb51.net/tag/"）;
//End_php

考虑到id符号问题，id="u"由用户自己填写。
声明：此段php只针对带 id div内容的读取。
完善：匹配任意可闭合带id标签
复制代码代码如下:
View Code
<？php
header（"Content-type: text/html; charset=utf-8"）;
function getWebTag（$tag_id,$url=false,$tag="div",$data=false）{
if（$url ！== false）{
$data = file_get_contents（ $url ）;
}
$charset_pos = stripos（$data,"charset"）;
if（$charset_pos） {
if（stripos（$data,"utf-8",$charset_pos）） {
$data = iconv（"utf-8","utf-8",$data）;
}else if（stripos（$data,"gb2312",$charset_pos）） {
$data = iconv（"gb2312","utf-8",$data）;
}else if（stripos（$data,"gbk",$charset_pos）） {
$data = iconv（"gbk","utf-8",$data）;
}
}
preg_match_all（"/<".$tag."/i",$data,$pre_matches,PREG_OFFSET_CAPTURE）; //获取所有div前缀
preg_match_all（"/</".$tag."/i",$data,$suf_matches,PREG_OFFSET_CAPTURE）; //获取所有div后缀
$hit = strpos（$data,$tag_id）;
if（$hit == -1） return false; //未命中
$divs = array（）; //合并所有div
foreach（$pre_matches[0] as $index=>$pre_div）{
$divs[（int）$pre_div[1]] = "p";
$divs[（int）$suf_matches[0][$index][1]] = "s";
}
//对div进行排序
$sort = array_keys（$divs）;
asort（$sort）;
$count = count（$pre_matches[0]）;
foreach（$pre_matches[0] as $index=>$pre_div）{
//<div $hit <div+1 时div被命中
if（（$pre_matches[0][$index][1] < $hit） && （$hit < $pre_matches[0][$index+1][1]））{
$deeper = 0;
//弹出被命中div前的div
while（array_shift（$sort）！= $pre_matches[0][$index][1] && （$count--）） continue;
//对剩余div进行匹配，若下一个为前缀，则向下一层，$deeper加1，
//否则后退一层，$deeper减1，$deeper为0则命中匹配，计算div长度
foreach（$sort as $key）{
if（$divs[$key] == "p"） $deeper++;
else if（$deeper == 0） {
$length = $key-$pre_matches[0][$index][1];
break;
}else {
$deeper--;
}
}
$hitDivString = substr（$data,$pre_matches[0][$index][1],$length）."</".$tag.">";
break;
}
}
return $hitDivString;
}
echo getWebTag（"id="nav"","http://mail.163.com/html/mail_intro/","ul"）;
echo getWebTag（"id="homeBanners"","http://mail.163.com/html/mail_intro/"）;
echo getWebTag（"id="performance"","http://mail.163.com/html/mail_intro/","section"）;
//End_php

作者： Zjmainstay