使用jsoup登陆网站,重要的就是看这个网站登录的时候需要什么信息?
步骤:
(1) Jsoup.connect(url)创建一个connect对象,注意:这个方法只支持Web URLs (http和https 协议)。
(2) 设置User-Agentconn.userAgent(user_agent);
(3) 设置请求参数conn.data(params);
(4) 进行提交,并且获得返回值Connection.Response
(5) 从中解析cookies
package com.xing.jsoup;
import java.io.IOException;import java.util.HashMap;import java.util.Map;import org.jsoup.Connection;import org.jsoup.Connection.Method;import org.jsoup.Jsoup;public class FangdoCrawl {protected static int timeout = 1000*60;protected final static String user_agent = "Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20100101 Firefox/12.0 AlexaToolbar/alxf-2.15";protected static String LOGIN_FLAG = "JSESSIONID";/** * @param args */public static void main(String[] args) {FangdoCrawl crawl = new FangdoCrawl();crawl.login();}public void login(){//将要登陆的地址String fangdo = "http://www.16msg.com/web/doLogin.action";//准备登陆参数HashMapparams = new HashMap (3);params.put("phone_id", "*******");params.put("password", "*******");//获取连接Connection httpCon = getConnection(fangdo,params);try {//使用post方式进行提交Connection.Response res = httpCon.method(Method.POST).execute();//解析cookieString jsessionid = res.cookie(LOGIN_FLAG);System.out.println("jsessionid="+jsessionid);} catch (IOException e) {e.printStackTrace();}}public Connection getConnection(String url,Map params){return getConnection(url, timeout, params);}public Connection getConnection(String url,int timeout, Map params){Connection conn = Jsoup.connect(url);//设置userAgentconn = conn.userAgent(user_agent);//设置超时conn = conn.timeout(timeout);//设置请求头的相应信息conn = conn.header("accept-encoding", "gzip, deflate");//if(params != null && params.size()>0){//设置请求参数conn = conn.data(params);}return conn;}}