';//first tag to look for to rip $get_last = '';// end tag. may have to be more specific. like(
) ///////////////////////////////////////////////// // depending on your server you may have to use htmlenties on the file get contents // $c = stripslashes(htmlentities($var)); // $v = html_entity_decode($c); // and variables below and use html_entities_decode on the echoed output //this script adds the external site url to the links //you can use HTML base and not add url to links. like so // //// set external url in head // // //Ripped Link// url href is now "html" in links function curl_get_file_contents($URL)//Curl content { $c = curl_init(); curl_setopt($c, CURLOPT_RETURNTRANSFER, 1); curl_setopt($c, CURLOPT_URL, $URL); curl_setopt($c, CURLOPT_SSL_VERIFYPEER, false); $contents = curl_exec($c); curl_close($c); if ($contents) return $contents; else return FALSE; } function getURL($url) { if (!parse_url($url)) { return false; } $host= parse_url($url,PHP_URL_HOST); $scheme= parse_url($url,PHP_URL_SCHEME); switch ($scheme) { case 'https': $scheme = 'ssl://'; $port = 443; break; case 'http': default: $scheme = ''; $port = 80; } //Fsock content $fp = @fsockopen($scheme . $host, $port, $errno, $errstr, 30); if ($fp) { stream_set_timeout($fp,5); $out = "GET / HTTP/1.1\r\n"; $out .= "Host: $host\r\n"; $out .= "Connection: Close\r\n\r\n"; fwrite($fp, $out); $body = false; while (!feof($fp)) { $s = fgets($fp, 1024); if ($body) $in .= $s; if ($s == "\r\n") $body = true; } fclose($fp); return $in; }else{ return false; } } //we are checking if we have ripped content in our file // we check file last modified and rip from site by our setting //Ex: once a day or once a week etc //if time stamp is greater than file last modified plus one week // we will rip links once a week $html=''; if(file_exists('ripped_content.txt') && filesize('ripped_content.txt')>25) { //if the file exists we check last modified date unix timestsmp $last_mod = filemtime('ripped_content.txt'); // delete date. One day is 86400 unix one week 86400 * 7 //we get current last modified unix time and add one week to it $delete_date = $last_mod + 86400 * 7; //if the current date timestamp is greater than last modified pluss one week //we rip again if( time() >= $delete_date){ $html = false; if(function_exists('curl_exec'){ $html=stripslashes(curl_get_file_contents($url)); }else{ $html=stripslashes(getURL('replace_url')); } if($html){//write html to file file_put_contents('ripped_content.txt', $html); } }else{//we get content from file $html = stripslashes(file_get_contents('ripped_content.txt')); } }else{// if file does not exist get new content if(function_exists('curl_exec'){ $html=stripslashes(curl_get_file_contents($url)); }else{ $html=stripslashes(getURL($url)); } } // get all matches function search($start,$end,$string){ $reg="!".preg_quote($start)."(.*?)".preg_quote($end)."!is"; if(preg_match_all($reg,$string,$matches)){ //if(preg_match($reg,$string,$matches)){ return $matches[0]; } else{ return false; } } // so we should have html content to extract links etc if(!empty($link) && file_exists('ripped_content.txt') && $html){ $parts = search($get_first, $get_last, $html); foreach ( $parts as $part){ if(strpos($part, 'href="/')!== false){// sometime they use ' ' sometimes " " // the below str_replace for link must be modified //based on the link format of the site you are ripping echo str_replace('href="/','href="' . $link . '/',$part); }else{ echo str_replace("href='","href='" . $link . "/",$part); } echo "
"; }// if not replacing link or just static content rip }elseif(empty($link) && file_exists('ripped_content.txt') && $html){ $parts = search($get_first, $get_last, $html); foreach ( $parts as $part){ echo $part; echo "
"; } } ?>