Java – How to get all links from a web page?
A jsoup HTML parser example to show you how to parse and get all HTML hyperlinks from a web page:
pom.xml
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
JsoupFindLinkSample.java
package com.mkyong;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
public class JsoupFindLinkSample {
public static void main(String[] args) throws IOException {
for (String link : findLinks("https://google.com")) {
System.out.println(link);
}
}
private static Set<String> findLinks(String url) throws IOException {
Set<String> links = new HashSet<>();
Document doc = Jsoup.connect(url)
.data("query", "Java")
.userAgent("Mozilla")
.cookie("auth", "token")
.timeout(3000)
.get();
Elements elements = doc.select("a[href]");
for (Element element : elements) {
links.add(element.attr("href"));
}
return links;
}
}
Output
https://play.google.com/?hl=en&tab=w8
https://www.google.com/calendar?tab=wc
/intl/en/about.html
https://photos.google.com/?tab=wq&pageId=none
https://drive.google.com/?tab=wo
//...
References
About Author
Comments
Subscribe
0 Comments