当前位置: 代码迷 >> J2EE >> 大家看看小弟我抓取当当网产品的正则表达式哪里不对
  详细解决方案

大家看看小弟我抓取当当网产品的正则表达式哪里不对

热度:62   发布时间:2016-04-22 03:03:26.0
大家看看我抓取当当网产品的正则表达式哪里不对
我这个代码是抓取当当网产品的产品名,图片,和价格
我的正则一次匹配产品名,图片,价格中的一个,我想把三个一次全部匹配了,求指点
Java code
import java.io.BufferedInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.util.regex.Matcher;import java.util.regex.Pattern;public class test {    public static void main(String[] args) {        String url = "http://product.dangdang.com/product.aspx?product_id=20689512";        new test().spiderProduct(url);    }    public void spiderProduct(String url) {        String content = getURLContent(url, "gb2312");        String regStr = "";        regStr = "<h1>(.*?)</h1>";// 产品名称        regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片        regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格        Pattern pattern = Pattern.compile(regStr);        Matcher matcher = pattern.matcher(content);        while (matcher.find()) {            System.out.println(matcher.group(1));        }    }    public String getURLContent(String url, String encoding) {        if (url == null || "".equals(url.trim()))            return null;        StringBuffer content = new StringBuffer();        try {            // 新建URL对象            URL u = new URL(url);            InputStream in = new BufferedInputStream(u.openStream());            InputStreamReader theHTML = new InputStreamReader(in,                    encoding != null ? encoding : "gb2312");            int c;            while ((c = theHTML.read()) != -1) {                content.append((char) c);            }        }        // 处理异常        catch (MalformedURLException e) {            System.err.println(e);        } catch (IOException e) {            System.err.println(e);        }        return content.toString();    }}


------解决方案--------------------
Java code
import java.io.BufferedInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.util.regex.Matcher;import java.util.regex.Pattern;class ProductItem {    String name;    String picture;    String price;    public String getPrice() {    return price;    }    public void setPrice(String price) {    this.price = price;    }    public String getName() {    return name;    }    public void setName(String name) {    this.name = name;    }    public String getPicture() {    return picture;    }    public void setPicture(String picture) {    this.picture = picture;    }    public ProductItem(String name, String picture, String price) {    super();    this.name = name;    this.picture = picture;    this.price = price;    }    public static ProductItem createItem(String urlString) {    String name = null, picture = null, price = null;    String content = getURLContent(urlString, "gb2312");    String regStr = "<h1>(.*?)</h1>";// 产品名称    Pattern pattern = Pattern.compile(regStr);    Matcher matcher = pattern.matcher(content);    if (matcher.find())        name = matcher.group(1);    regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片    pattern = Pattern.compile(regStr);    matcher = pattern.matcher(content);    if (matcher.find())        picture = matcher.group(1);    regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格    pattern = Pattern.compile(regStr);    matcher = pattern.matcher(content);    if (matcher.find())        price = matcher.group(1);    return new ProductItem(name, picture, price);    }    public static ProductItem getItem(String urlString) {    String name = null, picture = null, price = null;    String content = getURLContent(urlString, "gb2312");    String regStr = "<h1>(.*?)</h1>.*?src=\"(.*?b\\.jpg)\".*?num\".*?(\\d+\\.\\d+).*";// 产品名称    Pattern pattern = Pattern.compile(regStr, Pattern.DOTALL);    Matcher matcher = pattern.matcher(content);    while (matcher.find()) {        name = matcher.group(1);        picture = matcher.group(2);        price = matcher.group(3);    }    return new ProductItem(name, picture, price);    }    public static String getURLContent(String urlString, String encoding) {    if (urlString == null || "".equals(urlString.trim()))        return null;    StringBuffer content = new StringBuffer();    try {        // 新建URL对象        URL url = new URL(urlString);        InputStream in = new BufferedInputStream(url.openStream());        InputStreamReader theHTML = new InputStreamReader(in,            encoding != null ? encoding : "gb2312");        int c;        while ((c = theHTML.read()) != -1) {        content.append((char) c);        }    }    // 处理异常    catch (MalformedURLException e) {        System.err.println(e);    } catch (IOException e) {        System.err.println(e);    }    return content.toString();    }    public String toString() {    return "name = " + name + "  \npicture = " + picture + "  \nprice = "        + price;    }}public class dsfdsf {    public static void main(String[] args) {    String url = "http://product.dangdang.com/product.aspx?product_id=20689512";    ProductItem productItem = ProductItem.createItem(url);    System.out.println(productItem);    productItem = ProductItem.getItem(url);    System.out.println(productItem);    }}