2009年7月10日 星期五

ReadHtml()

public static void ReadHtml() throws Exception {
InetSocketAddress ISA = new java.net.InetSocketAddress("192.168.1.5",3082);
Proxy proxy = new java.net.Proxy(java.net.Proxy.Type.HTTP,ISA);

String html = "http://rate.bot.com.tw/Pages/Static/UIP003.zh-TW.htm";
URL srcUrl;
BufferedReader in;

srcUrl = new URL(html);
//使用proxy
HttpURLConnection sms_gw = (HttpURLConnection) srcUrl.openConnection(proxy);
//讀取UTF-8編碼的網頁
in = new BufferedReader(new InputStreamReader(sms_gw.getInputStream(),"UTF-8"));

//不使用proxy
//in = new BufferedReader(new InputStreamReader(srcUrl.openStream(),"UTF-8"));
html="";
while ((thisLine = in.readLine()) != null) {
html += thisLine;
}
System.out.println("html \n"+html);
String stime="";
Pattern pattern = Pattern.compile("[0-9]{4}/[0-1]{1}[0-9]{1}/[0-9]{2} [0-9]{2}:[0-9]{2}");//2009/01/28 14:52
Matcher matcher = pattern.matcher(html);
while(matcher.find()) {
stime = matcher.group();
}

System.out.println(" 異動時間 "+stime);
html = html.replaceAll("\\s*", "");// 去掉空白
html = html.replaceAll(" ", "\n");// 去掉空白
html = html.replaceAll("<[^>]+>", ",");// 去掉html


InputStream in_ = new ByteArrayInputStream(html.getBytes());
BufferedReader buf = new BufferedReader(new InputStreamReader(in_));

while ((s = buf.readLine()) != null) {
if (!s.equals("")) {
System.out.println(s);

}
}
//Thread.sleep(100000);//
}

沒有留言: