net.func.php 4.55 KB
<?php
// PHP xml sitemap generator


// this function uses curl libraries to fetch contemporary a list of pages
function curlMultiGetPage($urls) {
        global $CONFIG;
        $htmls = array();
        $mc = curl_multi_init();
        for($i = 0; $i < sizeof($urls); $i++) {
                $ch[$i] = curl_init($urls[$i]);
                curl_setopt($ch[$i], CURLOPT_RETURNTRANSFER, 1);
                curl_setopt($ch[$i], CURLOPT_USERAGENT, $CONFIG["agent"]);
                curl_setopt($ch[$i], CURLOPT_HEADER, 0);
                curl_setopt($ch[$i], CURLOPT_CONNECTTIMEOUT, 10);
                curl_setopt($ch[$i], CURLOPT_FOLLOWLOCATION, 1);
                curl_multi_add_handle($mc, $ch[$i]);
        }
        do {
                $n = curl_multi_exec($mc, $active);
        } while($active);
        for($i = 0; $i < sizeof($urls); $i++) {
                $data = "";
                $data = curl_multi_getcontent($ch[$i]);
                if($data == "" || curl_errno($ch[$i])) {
                        $htmls[$i] = "";
                } else
                        $htmls[$i] = $data;
                curl_close($ch[$i]);
        }
        return $htmls;
}

//returns domain without http:// and without ending slash
function formatDomain($domain) {
        $domain = str_replace(" ","",$domain);
        $domain = str_replace("http://","",$domain);
        $domain = str_replace("http:\\","",$domain);
        if (strpos($domain,"/") == strlen($domain)-1)
                $domain  = substr($domain,0,strlen($domain)-1);
        return $domain;
}

function rebuildQuery($query) {
        $newterms = array();
        $terms = explode("&", $query);
        while(($tt = array_pop($terms))) {
                if(!in_array($tt, $newterms)) {
                        array_push($newterms, $tt);
                }
        }
        return implode("&",$newterms);
}

// this function corrects an url rebuilding it on the base domain
function correctURL($url, $domain) {
        if(strncmp($url, "//", 2) == 0) {
                $url = "http://".substr($url, 2);
        }
        $url = str_replace("'", "", $url);
        $url_info = parse_url($url);
        if($url_info["scheme"] == "http" || $url_info["scheme"] == "mailto" || $url_info["scheme"] == "javascript")
                return $url;
        if ($url_info["host"] == ""){
                $cur_link = parse_url($domain);
//                echo "$cur_link[path]|$domain<br>";
                $newurl = $cur_link["host"];
                if(strncmp($url_info["path"], "./", 2) == 0)
                        $url_info["path"] = substr($url_info["path"], 2);
                if($url_info["path"] != "") {
                        if($url_info["path"][0] == "/")
                                $newurl .= $url_info["path"];
                        else {
                                if(($ps = strrpos($cur_link["path"], "/")) > 0)
                                        $curpath = substr($cur_link["path"], 0, $ps);
                                $newurl .= "/".$curpath."/".$url_info["path"];
                        }
                }
        //        echo "tempnewurl: $newurl<br>";
        } else {
                $newurl = $url_info["host"];
                if($url_info["path"][0] == "/")
                        $newurl .= $url_info["path"];
                else
                        $newurl .= "/".$url_info["path"];

        }
        if($url_info["query"] != "")
                $newurl .= "?".rebuildQuery($url_info["query"]);
        while(strpos($newurl, "//") !== false)
                $newurl = str_replace("//", "/", $newurl);
        $newurl = "http://".$newurl;
        $newurl = str_replace("&amp;", "&", $newurl);
        return $newurl;
}

// $url is an external link ? [both $url and $versus need http://]
function isLinkExternal($url, $versus){
        $url_info = parse_url($url);
        $dom_info = parse_url($versus);

        if($url_info["scheme"] != "http" && $url_info["scheme"] != "")
                return true;
        if ($url_info["host"] != $dom_info["host"] && $url_info["host"] != ""
                && $url_info["host"] != "www.".$dom_info["host"]
                && "www.".$url_info["host"] != $dom_info["host"])
                        return true;
        if($url_info["port"] != $dom_info["port"])
                return true;
        if($url_info["path"][1] == "~")
                return true;
        $type = substr(strrchr($url_info['path'],"."),1);
        if($type == "jpg" || $type == "JPG" || $type == "jpeg" || $type == "png" || $type == "gif" || $type == "rar" || $type == "db")return true;
        return false;
}

?>