0) { for($i = 0; $i < 4 && $i < sizeof($SPIDER["temp"]); $i++) $urls[] = array_pop($SPIDER["temp"]); multiGetURL($urls); } $fp = fopen($CONFIG["sitemap_file"], "w+"); $xml_sitemap = genXmlSitemap(); fputs($fp, $xml_sitemap); fclose($fp); } function handleHref($html, $href, $url) { global $SPIDER; $url_info = parse_url($href); if($url_info["scheme"] == "javascript") { return false; } if($url_info["scheme"] == "http") { if(!urlInSpider($href)) { if(!isLinkExternal($href, $SPIDER["baseurl"])) { if(!urlInTemp($href)) $SPIDER["temp"][] = $href; } } } } function getAnchors($url, $html) { global $SPIDER; $anchors = getTags($html, ''); for($i = 0; $i < sizeof($anchors); $i++) { $href = getTagField($anchors[$i], "href="); $href = correctUrl($href, $SPIDER["baseurl"]); handleHref($html, $href, $url); } } function multiGetURL($urls) { global $SPIDER; $htmls = curlMultiGetPage($urls); for($i = 0; $i < sizeof($urls); $i++) { echo "Checking ".$urls[$i]." ...\n"; if($htmls[$i] != "") { $SPIDER["spider"][] = $urls[$i]; getAnchors($urls[$i], $htmls[$i]); } } } function urlInSpider($url) { global $SPIDER; return in_array($url, $SPIDER["spider"]); } function urlInTemp($url) { global $SPIDER; return in_array($url, $SPIDER["temp"]); } ?>