';//first tag to look for to rip
$get_last = '';// end tag. may have to be more specific. like(
)
/////////////////////////////////////////////////
// depending on your server you may have to use htmlenties on the file get contents
// $c = stripslashes(htmlentities($var));
// $v = html_entity_decode($c);
// and variables below and use html_entities_decode on the echoed output
//this script adds the external site url to the links
//you can use HTML base and not add url to links. like so
//
//
// set external url in head
//
//
//
Ripped Link// url href is now "html" in links
function curl_get_file_contents($URL)//Curl content
{
$c = curl_init();
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($c, CURLOPT_URL, $URL);
curl_setopt($c, CURLOPT_SSL_VERIFYPEER, false);
$contents = curl_exec($c);
curl_close($c);
if ($contents) return $contents;
else return FALSE;
}
function getURL($url) {
if (!parse_url($url)) {
return false;
}
$host= parse_url($url,PHP_URL_HOST);
$scheme= parse_url($url,PHP_URL_SCHEME);
switch ($scheme) {
case 'https':
$scheme = 'ssl://';
$port = 443;
break;
case 'http':
default:
$scheme = '';
$port = 80;
}
//Fsock content
$fp = @fsockopen($scheme . $host, $port, $errno, $errstr, 30);
if ($fp) {
stream_set_timeout($fp,5);
$out = "GET / HTTP/1.1\r\n";
$out .= "Host: $host\r\n";
$out .= "Connection: Close\r\n\r\n";
fwrite($fp, $out);
$body = false;
while (!feof($fp)) {
$s = fgets($fp, 1024);
if ($body)
$in .= $s;
if ($s == "\r\n")
$body = true;
}
fclose($fp);
return $in;
}else{
return false;
}
}
//we are checking if we have ripped content in our file
// we check file last modified and rip from site by our setting
//Ex: once a day or once a week etc
//if time stamp is greater than file last modified plus one week
// we will rip links once a week
$html='';
if(file_exists('ripped_content.txt') && filesize('ripped_content.txt')>25) {
//if the file exists we check last modified date unix timestsmp
$last_mod = filemtime('ripped_content.txt');
// delete date. One day is 86400 unix one week 86400 * 7
//we get current last modified unix time and add one week to it
$delete_date = $last_mod + 86400 * 7;
//if the current date timestamp is greater than last modified pluss one week
//we rip again
if( time() >= $delete_date){
$html = false;
if(function_exists('curl_exec'){
$html=stripslashes(curl_get_file_contents($url));
}else{
$html=stripslashes(getURL('replace_url'));
}
if($html){//write html to file
file_put_contents('ripped_content.txt', $html);
}
}else{//we get content from file
$html = stripslashes(file_get_contents('ripped_content.txt'));
}
}else{// if file does not exist get new content
if(function_exists('curl_exec'){
$html=stripslashes(curl_get_file_contents($url));
}else{
$html=stripslashes(getURL($url));
}
}
// get all matches
function search($start,$end,$string){
$reg="!".preg_quote($start)."(.*?)".preg_quote($end)."!is";
if(preg_match_all($reg,$string,$matches)){
//if(preg_match($reg,$string,$matches)){
return $matches[0];
}
else{
return false;
}
}
// so we should have html content to extract links etc
if(!empty($link) && file_exists('ripped_content.txt') && $html){
$parts = search($get_first, $get_last, $html);
foreach ( $parts as $part){
if(strpos($part, 'href="/')!== false){// sometime they use ' ' sometimes " "
// the below str_replace for link must be modified
//based on the link format of the site you are ripping
echo str_replace('href="/','href="' . $link . '/',$part);
}else{
echo str_replace("href='","href='" . $link . "/",$part);
}
echo "
";
}// if not replacing link or just static content rip
}elseif(empty($link) && file_exists('ripped_content.txt') && $html){
$parts = search($get_first, $get_last, $html);
foreach ( $parts as $part){
echo $part;
echo "
";
}
}
?>