我这个代码是抓取当当网产品的产品名,图片,和价格
我的正则一次匹配产品名,图片,价格中的一个,我想把三个一次全部匹配了,求指点
- Java code
import java.io.BufferedInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.util.regex.Matcher;import java.util.regex.Pattern;public class test { public static void main(String[] args) { String url = "http://product.dangdang.com/product.aspx?product_id=20689512"; new test().spiderProduct(url); } public void spiderProduct(String url) { String content = getURLContent(url, "gb2312"); String regStr = ""; regStr = "<h1>(.*?)</h1>";// 产品名称 regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片 regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格 Pattern pattern = Pattern.compile(regStr); Matcher matcher = pattern.matcher(content); while (matcher.find()) { System.out.println(matcher.group(1)); } } public String getURLContent(String url, String encoding) { if (url == null || "".equals(url.trim())) return null; StringBuffer content = new StringBuffer(); try { // 新建URL对象 URL u = new URL(url); InputStream in = new BufferedInputStream(u.openStream()); InputStreamReader theHTML = new InputStreamReader(in, encoding != null ? encoding : "gb2312"); int c; while ((c = theHTML.read()) != -1) { content.append((char) c); } } // 处理异常 catch (MalformedURLException e) { System.err.println(e); } catch (IOException e) { System.err.println(e); } return content.toString(); }}
------解决方案--------------------
- Java code
import java.io.BufferedInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.util.regex.Matcher;import java.util.regex.Pattern;class ProductItem { String name; String picture; String price; public String getPrice() { return price; } public void setPrice(String price) { this.price = price; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getPicture() { return picture; } public void setPicture(String picture) { this.picture = picture; } public ProductItem(String name, String picture, String price) { super(); this.name = name; this.picture = picture; this.price = price; } public static ProductItem createItem(String urlString) { String name = null, picture = null, price = null; String content = getURLContent(urlString, "gb2312"); String regStr = "<h1>(.*?)</h1>";// 产品名称 Pattern pattern = Pattern.compile(regStr); Matcher matcher = pattern.matcher(content); if (matcher.find()) name = matcher.group(1); regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片 pattern = Pattern.compile(regStr); matcher = pattern.matcher(content); if (matcher.find()) picture = matcher.group(1); regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格 pattern = Pattern.compile(regStr); matcher = pattern.matcher(content); if (matcher.find()) price = matcher.group(1); return new ProductItem(name, picture, price); } public static ProductItem getItem(String urlString) { String name = null, picture = null, price = null; String content = getURLContent(urlString, "gb2312"); String regStr = "<h1>(.*?)</h1>.*?src=\"(.*?b\\.jpg)\".*?num\".*?(\\d+\\.\\d+).*";// 产品名称 Pattern pattern = Pattern.compile(regStr, Pattern.DOTALL); Matcher matcher = pattern.matcher(content); while (matcher.find()) { name = matcher.group(1); picture = matcher.group(2); price = matcher.group(3); } return new ProductItem(name, picture, price); } public static String getURLContent(String urlString, String encoding) { if (urlString == null || "".equals(urlString.trim())) return null; StringBuffer content = new StringBuffer(); try { // 新建URL对象 URL url = new URL(urlString); InputStream in = new BufferedInputStream(url.openStream()); InputStreamReader theHTML = new InputStreamReader(in, encoding != null ? encoding : "gb2312"); int c; while ((c = theHTML.read()) != -1) { content.append((char) c); } } // 处理异常 catch (MalformedURLException e) { System.err.println(e); } catch (IOException e) { System.err.println(e); } return content.toString(); } public String toString() { return "name = " + name + " \npicture = " + picture + " \nprice = " + price; }}public class dsfdsf { public static void main(String[] args) { String url = "http://product.dangdang.com/product.aspx?product_id=20689512"; ProductItem productItem = ProductItem.createItem(url); System.out.println(productItem); productItem = ProductItem.getItem(url); System.out.println(productItem); }}