1236. Web Crawler
in Coding Interview on Medium, String, BFS
URL startUrl 및 인터페이스 HtmlParser가 있는 경우 웹 크롤러를 구현하여 startUrl과 동일한 호스트 이름에 있는 모든 링크를 크롤링
BFS
/**
* // This is the HtmlParser's API interface.
* // You should not implement it, or speculate about its implementation
* interface HtmlParser {
* public List<String> getUrls(String url) {}
* }
*/
class Solution {
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
Set<String> set = new HashSet<>();
Queue<String> queue = new LinkedList<>();
String hostname = getHostname(startUrl);
queue.offer(startUrl);
set.add(startUrl);
while (!queue.isEmpty()) {
String currentUrl = queue.poll();
for (String url : htmlParser.getUrls(currentUrl)) {
if (url.contains(hostname) && !set.contains(url)) {
queue.offer(url);
set.add(url);
}
}
}
return new ArrayList<String>(set);
}
private String getHostname(String Url) {
String[] ss = Url.split("/");
return ss[2];
}
}
DFS
/**
* // This is the HtmlParser's API interface.
* // You should not implement it, or speculate about its implementation
* interface HtmlParser {
* public List<String> getUrls(String url) {}
* }
*/
class Solution {
private String startHostname;
private HashSet<String> visited = new HashSet<String>();
// T: O(ml), m: the number of edges, l: the maximum length of a URL
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
startHostname = getHostname(startUrl);
dfs(startUrl, htmlParser);
return new ArrayList<>(visited);
}
private void dfs(String url, HtmlParser htmlParser) {
visited.add(url);
for (String nextUrl : htmlParser.getUrls(url)) {
if (getHostname(nextUrl).equals(startHostname) && !visited.contains(nextUrl)) {
dfs(nextUrl, htmlParser);
}
}
}
private String getHostname(String url) {
String[] ss = url.split("/");
return ss[2];
}
}