以下为抓取到的页面,想通过正则表达式函数取出里边的关于教师课表安排的信息,希望各位大侠给帮忙,谢谢。
HTTP/1.1 200 OK
Connection: close
Date: Thu, 07 May 2015 07:44:28 GMT
Server: YxlinkWAF
X-Powered-By: ASP.NET
X-AspNet-Version: 1.1.4322
Cache-Control: private
Content-Type: text/html; charset=gb2312
Content-Length: 11062
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<HTML lang="gb2312">
<HEAD>
<title>现代教学管理信息系统</title><meta http-equiv="X-UA-Compatible" content="IE=EmulateIE7">
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<meta http-equiv="Content-Language" content="gb2312">
<meta content="all" name="robots">
<meta content="作者信息" name="author">
<meta content="版权信息" name="Copyright">
<meta content="站点介绍" name="description">
<meta content="站点关键词" name="keywords">
<LINK href="style/base/favicon.ico" type="image/x-icon" rel="icon">
<link rel="stylesheet" href="style/base/jw.css" type="text/css" media="all">
<LINK media="all" href="style/standard/jw.css" type="text/css" rel="stylesheet">
<script src="js/xtwh.js" type="text/javascript"></script>
<script defer>
function PutSettings()
{ try{
factory.printing.header="";
factory.printing.footer="";
factory.printing.portrait=true;
}catch(e){}
}
</script>
<style> @media Print { .bgnoprint { }
.noprint { DISPLAY: none }}
</style>
</HEAD>
<BODY onload="PutSettings();">
<OBJECT id="factory" style="DISPLAY: none" codeBase="ScriptX.cab#Version=5,60,0,360" classid="clsid:1663ed61-23eb-11d2-b92f-008048fdd814"
VIEWASTEXT>
</OBJECT>
<form name="Form1" method="post" action="jstjkbcx.aspx?zgh=080591&xm=%CD%F5%C0%E8%B7%E5&gnmkdm=N122303" id="Form1">
<input type="hidden" name="__EVENTTARGET" value="" />
<input type="hidden" name="__EVENTARGUMENT" value="" />
<input type="hidden" name="__VIEWSTATE" value="。。。。。。" />
<script language="javascript" type="text/javascript">
<!--
function __doPostBack(eventTarget, eventArgument) {
var theform;
if (window.navigator.appName.toLowerCase().indexOf("microsoft") > -1) {
theform = document.Form1;
}
else {
theform = document.forms["Form1"];
}
theform.__EVENTTARGET.value = eventTarget.split("$").join(":");
theform.__EVENTARGUMENT.value = eventArgument;
theform.submit();
}
// -->
</script>
<div class="toolbox noprint">
<div class="buttonbox"></div>
<div class="searchbox">
<p class="search_con">
学年:
<select name="xn" onchange="__doPostBack('xn','')" language="javascript" id="xn">
<option selected="selected" value="2014-2015">2014-2015</option>
<option value="2013-2014">2013-2014</option>
<option value="2012-2013">2012-2013</option>
<option value="2011-2012">2011-2012</option>
<option value="2010-2011">2010-2011</option>
<option value="2009-2010">2009-2010</option>
<option value="2008-2009">2008-2009</option>
<option value=""></option>
</select>学期:
<select name="xq" onchange="__doPostBack('xq','')" language="javascript" id="xq">
<option selected="selected" value="2">2</option>
<option value="1">1</option>
<option value=""></option>
</select>
教师姓名:
<select name="js" onchange="__doPostBack('js','')" language="javascript" id="js">
<option selected="selected" value="080591">王峰</option>
<option value=""></option>
</select>
<input name="btnPrint" id="btnPrint" type="button" class="button" style="DISPLAY:none" value="打印课表" onclick="window.print();" />
</p>
<p class="search_title"><em></em></p>
</div>
<p class="toolbox_fot"><em></em></p>
</div>
<div class="main_box">
<div class="mid_box">
<div class="title">
<p>
</p>
</div>
<span class="formbox">
<!--startprint-->
<table id="Table6" cellspacing="0" cellpadding="4" class="blacktab" border="0" height="132" width="100%">
<tr>
<td colspan="2" rowspan="1" width="2%">时间</td><td align="Center" width="14%">星期一</td><td align="Center" width="14%">星期二</td><td align="Center" width="14%">星期三</td><td align="Center" width="14%">星期四</td><td align="Center" width="14%">星期五</td><td align="Center" width="14%">星期六</td><td align="Center" width="14%">星期日</td>
</tr><tr>
<td colspan="2">早晨</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td rowspan="4" width="1%">上午</td><td width="1%">第1节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td>第2节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td>第3节</td><td align="Center" rowspan="2">线性代数B<br>1-17(3,4)<br>王黎峰<br>电教楼205<br>计科14(1)</td><td align="Center"> </td><td align="Center"> </td><td align="Center" rowspan="2">线性代数B<br>1-17(3,4)<br>王黎峰<br>电教楼205<br>计科14(1)</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td>第4节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td rowspan="4" width="1%">下午</td><td>第5节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td>第6节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td>第7节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td>第8节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td rowspan="2" width="1%">晚上</td><td>第9节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr><tr>
<td>第10节</td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td><td align="Center"> </td>
</tr>
</table>
<!--endprint-->
<TABLE id="Table3" width="100%" class="noprint">
<TR>
<TD align="left">实践课(或无上课时间)信息:</TD>
</TR>
<TR>
<TD colSpan="3"><table class="datelist" cellspacing="0" cellpadding="3" border="0" id="DataGrid1" width="100%">
<tr class="datelisthead">
<td>课程名称</td><td>教师</td><td>学分</td><td>起止周</td><td>上课时间</td><td>上课地点</td><td>教学班组成</td>
</tr>
</table></TD>
</TR>
</TABLE>
<div align="left" class="noprint">调、停(补)课信息:</div>
<table class="datelist noprint" cellspacing="0" cellpadding="3" border="0" id="DBGrid" width="100%">
<tr class="datelisthead">
<td>编号</td><td>课程名称</td><td>原上课时间地点</td><td>现上课时间地点</td><td>申请时间</td><td>申请原因</td>
</tr>
</table>
</span>
<div class="footbox">
<em class="footbox_con">
<span class="pagination"></span>
<span class="footbutton"></span>
</em>
</div>
</div>
</div>
</form>
</BODY>
</HTML>
------解决思路----------------------
include 'simple_html_dom.php';//这个你可以在网上下载一个
$dom=new simple_html_dom();
//要将内容转为 utf-8 才能处理
$dom->load($content);
$names=$dom->find('select[id=js]');
$res=array();
$res['name']=$names[0]->find('option[selected=selected]')[0]->text();
$tabletr=$dom->find('table[id=Table6] tr');
foreach($tabletr as $k=>$tr){
foreach($tr->find('td') as $k1=>$td){
if($k1!==0){
$info[$k][$k1]=$td->text();
}
}
}
//优化数据
foreach($info as $k=>$v){
if($k==1){
unset($info[1]);
}
if(count($v)==8){
unset($v[1]);
}
$info[$k]=array_values($v);
}
$week=$info[0];
unset($info[0]);
//组织数据
foreach($info as $v){
foreach($v as $k=>$v){
$tmp[$week[$k]][]=$v;
}
}
$res['info']=$tmp;
echo "<pre>";
print_r($res);
echo "</pre>";
/*
Array
(
[name] => 王峰
[info] => Array
(
[星期一] => Array
(
[0] =>
[1] =>
[2] => 线性代数B
1-17(3,4)
王黎峰
电教楼205
计科14(1)
[3] =>
[4] =>
[5] =>
[6] =>
[7] =>
[8] =>
[9] =>
[10] =>
)
[星期二] => Array
(
[0] =>
[1] =>
[2] =>
[3] =>
[4] =>
[5] =>
[6] =>
[7] =>
[8] =>
[9] =>
[10] =>
)
[星期三] => Array
(
[0] =>
[1] =>
[2] =>
[3] =>
[4] =>
[5] =>
[6] =>
[7] =>
[8] =>
[9] =>
[10] =>
)
[星期四] => Array
(
[0] =>
[1] =>
[2] => 线性代数B
1-17(3,4)
王黎峰
电教楼205
计科14(1)
[3] =>
[4] =>
[5] =>
[6] =>
[7] =>
[8] =>
[9] =>
[10] =>
)
[星期五] => Array
(
[0] =>
[1] =>
[2] =>
[3] =>
[4] =>
[5] =>
[6] =>
[7] =>
[8] =>
[9] =>
[10] =>
)
[星期六] => Array
(
[0] =>
[1] =>
[2] =>
[3] =>
[4] =>
[5] =>
[6] =>
[7] =>
[8] =>
[9] =>
)
[星期日] => Array
(
[0] =>
[1] =>
[2] =>
[3] =>
[4] =>
[5] =>
[6] =>
[7] =>
[8] =>
[9] =>
)
)
)
*/
//根据你自己的需求来处理课程信息
$txt = <<<EOF
线性代数B
1-17(3,4)
王黎峰
电教楼205
计科14(1)
EOF;
$arr=explode(PHP_EOL,$txt);
echo "<pre>";
print_r($arr);
echo "</pre>";