[英]A Connection provides a convenient interface to fetch content from the web, and parse them into Documents.
To get a new Connection, use org.jsoup.Jsoup#connect(String). Connections contain Connection.Requestand Connection.Response objects. The request objects are reusable as prototype requests.
Request configuration can be made using either the shortcut methods in Connection (e.g. #userAgent(String)), or by methods in the Connection.Request object directly. All request configuration must be made before the request is executed.
代码示例来源:origin: deeplearning4j/dl4j-examples
* Get a list of all URLs in a page for zip files
public static List<String> getZipUrlsFromPage(String url) {
List<String> out = new ArrayList<>();
try {
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
for (Element e : links) {
String s = e.attr("href");
if (s.endsWith(".zip")) {
if (s.startsWith("http")) {
//Absolute link
} else {
//Relative link
out.add(e.baseUri() + s);
} catch (IOException e) {
throw new RuntimeException(e);
return out;
代码示例来源:origin: RipMeApp/ripme
private static Document getDocument(String strUrl) throws IOException {
return Jsoup.connect(strUrl)
.timeout(10 * 1000)
代码示例来源:origin: RipMeApp/ripme
private String getImageLinkFromDLLink(String url) {
try {
Connection.Response response = Jsoup.connect(url)
String imageURL = response.header("Location");
return imageURL;
} catch (IOException e) {
LOGGER.info("Got error message " + e.getMessage() + " trying to download " + url);
return null;
代码示例来源:origin: ChinaSilence/any-video
private Document requestAPI(String keyword) {
try {
return Jsoup.connect(api).userAgent(ua).ignoreContentType(true).data("wd", keyword).get();
} catch (IOException e) {
throw new AnyException(ExceptionEnum.VIDEO_SEARCH_ERROR);
代码示例来源:origin: RipMeApp/ripme
private void defaultSettings() {
this.retries = Utils.getConfigInteger("download.retries", 1);
connection = Jsoup.connect(this.url);
代码示例来源:origin: indywidualny/FaceSlim
protected Void doInBackground(String... args) throws SQLException {
try {
final Document response = Jsoup.connect(args[0])
.header("Accept-Encoding", "gzip, deflate")
.cookie("https://m.facebook.com", CookieManager.getInstance().getCookie("https://m.facebook.com"))
base = "https://0.facebook.com";
Document doc = Jsoup.parse(response.toString(), base);
Elements select = doc.select("a");
dataSource.insertPage(args[0], doc.toString());
} catch (Exception e) {
Log.e("Offline", "Problem saving the current page", e);
代码示例来源:origin: wangdan/AisenWeiBo
js = js.replace("%username%", mAccount).replace("%password%", mPassword);
Document dom = Jsoup.connect(getLoginUrl()).get();
String html = dom.toString();
html = html.replace("<html>", "<html id='all' >").replace("</head>", js + "</head>")
.replace("action-type=\"submit\"", "action-type=\"submit\" id=\"submit\"");
dom = Jsoup.parse(html);
Element inputAccount = dom.select("input#userId").first();
inputAccount.attr("oninput", "getAccount()");
Element pwdAccount = dom.select("input#passwd").first();
pwdAccount.attr("oninput", "getAccount()");
代码示例来源:origin: RipMeApp/ripme
try {
Document doc = Jsoup.connect(url.toExternalForm())
for (Element el : doc.select("meta")) {
if (el.attr("property").equals("og:video")) {
result.add(new URL(el.attr("content")));
代码示例来源:origin: magefree/mage
public static Document downloadHtmlDocument(String urlString) throws NumberFormatException, IOException {
Preferences prefs = MageFrame.getPreferences();
Connection.ProxyType proxyType = Connection.ProxyType.valueByText(prefs.get("proxyType", "None"));
Document doc;
if (proxyType == ProxyType.NONE) {
doc = Jsoup.connect(urlString).timeout(60 * 1000).get();
} else {
String proxyServer = prefs.get("proxyAddress", "");
int proxyPort = Integer.parseInt(prefs.get("proxyPort", "0"));
URL url = new URL(urlString);
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyServer, proxyPort));
HttpURLConnection uc = (HttpURLConnection) url.openConnection(proxy);
String line;
StringBuffer tmp = new StringBuffer();
BufferedReader in = new BufferedReader(new InputStreamReader(uc.getInputStream()));
while ((line = in.readLine()) != null) {
doc = Jsoup.parse(String.valueOf(tmp));
return doc;
代码示例来源:origin: occidere/MMDownloader
* Jsoup을 이용한 HTML 코드 파싱.
* @param eachArchiveAddress 실제 만화가 담긴 아카이브 주소
* @return 성공하면 html 코드를 리턴
private String getHtmlPageJsoup(String eachArchiveAddress) throws Exception {
print.info("고속 연결 시도중...\n");
// pageSource = Html코드를 포함한 페이지 소스코드가 담길 스트링, domain = http://wasabisyrup.com <-마지막 / 안붙음!
String pageSource = null;
// POST방식으로 아예 처음부터 비밀번호를 body에 담아 전달
Response response = Jsoup.connect(eachArchiveAddress)
.header("charset", "utf-8")
.header("Accept-Encoding", "gzip") //20171126 gzip 추가
.timeout(MAX_WAIT_TIME) // timeout
.data("pass", PASSWORD) // 20180429 기준 마루마루에서 reCaptcha를 사용하기에 의미없음
Document preDoc = response.parse(); //받아온 HTML 코드를 저장
// <div class="gallery-template">이 만화 담긴 곳.
if (preDoc.select("div.gallery-template").isEmpty()) {
throw new RuntimeException("Jsoup Parsing Failed: No tag found");
} else { // 만약 Jsoup 파싱 시 내용 있으면 성공
pageSource = preDoc.toString();
print.info("고속 연결 성공!\n");
return pageSource; //성공 시 html코드 리턴
代码示例来源:origin: occidere/MMDownloader
Document doc = Jsoup.connect(rawAddress)
.header("charset", "utf-8")
Elements divContent = doc.select("div.content").select("[href*=/archives/]");
代码示例来源:origin: ysc/QuestionAnsweringSystem
private List<Evidence> searchBaidu(String url, String referer) {
List<Evidence> evidences = new ArrayList<>();
try {
Document document = Jsoup.connect(url)
.header("Accept", ACCEPT)
.header("Accept-Encoding", ENCODING)
.header("Accept-Language", LANGUAGE)
.header("Connection", CONNECTION)
.header("User-Agent", USER_AGENT)
.header("Host", HOST)
.header("Referer", referer)
String resultCssQuery = "html > body > div > div > div > div > div";
Elements elements = document.select(resultCssQuery);
for (Element element : elements) {
Elements subElements = element.select("h3 > a");
代码示例来源:origin: RipMeApp/ripme
try {
logger.debug("Retrieving " + UpdateUtils.updateJsonURL);
doc = Jsoup.connect(UpdateUtils.updateJsonURL)
.timeout(10 * 1000)
} catch (IOException e) {
logger.error("Error while fetching update: ", e);
logger.info("Current version: " + getThisJarVersion());
String jsonString = doc.body().html().replaceAll(""", "\"");
ripmeJson = new JSONObject(jsonString);
代码示例来源:origin: crazyhitty/Munch
protected String doInBackground(String... strings) {
try {
Document rssDocument = Jsoup.connect(mUrl).ignoreContentType(true).parser(Parser.xmlParser()).get();
mItems = rssDocument.select("item");
} catch (IOException e) {
return "failure";
return "success";
代码示例来源:origin: ChinaSilence/any-video
* 调用腾讯接口,获取视频信息
private String videoInfo(String vid) {
try {
Document document = Jsoup.connect(VIDEO_API).header("Cookie", COOKIE)
.data("vids", vid).data("platform", PLATFORM)
.data("sdtfrom", SDTFROM)
.data("format", "10209")
.data("otype", "json").data("defn", "fhd")
.data("defaultfmt", "fhd").data("guid", GUID).ignoreContentType(true).get();
String result = document.text().replace("QZOutputJson=", "");
return result.substring(0, result.length() - 1);
} catch (IOException e) {
log.info("request tencent api error, vid : " + vid);
throw new AnyException("request tencent api error, vid : " + vid);
代码示例来源:origin: ChinaSilence/any-video
private String getOpenId(String accessToken) throws IOException{
String url = openIdUri + accessToken;
Document document = Jsoup.connect(url).get();
String resultText = document.text();
Matcher matcher = Pattern.compile("\"openid\":\"(.*?)\"").matcher(resultText);
if (matcher.find()){
return matcher.group(1);
return null;
代码示例来源:origin: RipMeApp/ripme
private JSONArray getPageUrls() {
String postURL = "http://www.tsumino.com/Read/Load";
try {
// This sessionId will expire and need to be replaced
Document doc = Jsoup.connect(postURL).data("q", getAlbumID()).userAgent(USER_AGENT).cookies(cookies).referrer("http://www.tsumino.com/Read/View/" + getAlbumID()).get();
String jsonInfo = doc.html().replaceAll("<html>","").replaceAll("<head></head>", "").replaceAll("<body>", "").replaceAll("</body>", "")
.replaceAll("</html>", "").replaceAll("\n", "");
JSONObject json = new JSONObject(jsonInfo);
return json.getJSONArray("reader_page_urls");
} catch (IOException e) {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_ERRORED, "Unable to download album, please compete the captcha at http://www.tsumino.com/Read/Auth/"
+ getAlbumID() + " and try again");
return null;
代码示例来源:origin: delthas/JavaSkype
private Response sendRequest(Method method, String apiPath, boolean absoluteApiPath, String... keyval) throws IOException {
String url = absoluteApiPath ? apiPath : SERVER_HOSTNAME + apiPath;
Connection conn = Jsoup.connect(url).maxBodySize(100 * 1024 * 1024).timeout(10000).method(method).ignoreContentType(true).ignoreHttpErrors(true);
logger.finest("Sending " + method + " request at " + url);
if (skypeToken != null) {
conn.header("X-Skypetoken", skypeToken);
} else {
logger.fine("No token sent for the request at: " + url);
return conn.execute();
代码示例来源:origin: JinBoy23520/CoderToDeveloperByTCLer
* 模拟浏览器行为的请求头获取Document
* @param url
* @return
* @throws IOException
public static Document getDoc(String url) throws IOException {
* 在爬之前最好看一下浏览器访问目标网站的Request Header信息,然后进行模仿
return Jsoup.connect(url)
// .header("accept", "application/json, text/plain,*/*")
// .header("Accept-Encoding", "gzip, deflate,br")
// .header("Accept-Language", "zh-CN,zh;q=0.8")//,en-US;q=0.5,en;q=0.3
// .header("Referer", "https://www.baidu.com/")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Encoding", "gzip, deflate")
.header("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")//,en-US;q=0.5,en;q=0.3
.header("Host", "www.cnblogs.com")
.header("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0")// "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0
.header("Cookie", "_ga=GA1.2.727269871.1498415016")
代码示例来源:origin: bluetata/crawler-jsoup-maven
static Map<String,String> connect() throws IOException{
Connection.Response res = Jsoup.connect("https://www.facebook.com/login.php")
.data("username", "dietime1943@hotmail.com", "password", "password")
.timeout(30 * 1000)
Document doc = res.parse();
Map<String, String> loginCookies = res.cookies();
String sessionId = res.cookie("SESSIONID");
return loginCookies;