htmlunit 是一款开源的java 页面分析工具,读取页面后,可以有效的使用htmlunit分析页面上的内容。项目可以模拟浏览器运行,被誉为java浏览器的开源实现。这个没有界面的浏览器,运行速度也是非常迅速的。
第一步:创建一个客户端
WebClient client = HttpUtil.getClient();
第二步:登录
public static HtmlPage loginXueqiu(WebClient client)
throws IOException, MalformedURLException {
HtmlPage page = (HtmlPage) client.getPage("http://xueqiu.com");
HtmlTextInput account = null;
HtmlForm loginForm = (HtmlForm) page.getElementById("form-login-index");
account = (HtmlTextInput) loginForm.getInputByName("username");
HtmlPasswordInput password = (HtmlPasswordInput) loginForm
.getInputByName("password");
DomNodeList btns = loginForm
.getElementsByTagName("button");
HtmlButton submit = (HtmlButton) btns.get(0);
account.setValueAttribute("xueqiuclient@126.com");
password.setValueAttribute("xueqiu");
return (HtmlPage) submit.click();
}
以获取股票列表为例:
private static List getStockList(WebClient client, String url) {
try {
Page p = client.getPage(url);
WebResponse wp = p.getWebResponse();
String sJson = wp.getContentAsString();
JSONObject.fromObject(sJson);
JSONObject json = JSONObject.fromObject(sJson);
JSONArray jarr = json.getJSONArray("data");
@SuppressWarnings("unchecked")
Iterator ite = jarr.iterator();
List lstStock = new ArrayList();
while (ite.hasNext()) {
try {
JSONArray jstock = ite.next();
TStock stock = new TStock(jstock);
lstStock.add(stock);
} catch (Exception e) {
continue;
}
}
return lstStock;
} catch (Exception e) {
log.error("获取上证列表出错", e);
return new ArrayList();
}
}
其中url是通过浏览器debug出来的,返回的是JSON数据。
/**
* 上证股票列表
*/
public static final String API_SHA="http://xueqiu.com/stock/quote_order.json?page=1&size=2000" +
"&order=asc&exchange=CN&stockType=sha&orderBy=symbol&column=symbol%2Cname";
/**
* 深证股票列表
*/
public static final String API_SZA="http://xueqiu.com/stock/quote_order.json?page=1&size=2000" +
"&order=asc&orderBy=symbol&exchange=CN&stockType=sza&column=symbol%2Cname";
/**
* 主要财务指标
*/
public static final String API_ZYCWZB="http://xueqiu.com/stock/f10/finmainindex.json?page=1&size=40&symbol=";
/**
* 资产负债表
*/
public static final String API_BALSHEET="http://xueqiu.com/stock/f10/balsheet.json?page=1&size=40&symbol=";
下载代码:
http://pan.baidu.com/s/1sjHnrzv
提取码:wr9j