Get Images From Website Using PHP

<form method="POST" action="<?php echo $PHP_SELF; ?>" enctype="multipart/form-data">
http://<input type="text" name="webaddress" id="webaddress" value="www.webaddress.com">/
<input type="submit" name="submit" value="Submit!">
</form>
<?php 
	 // Defining the basic cURL function
    function curl($url) {
        // Assigning cURL options to an array
        $options = Array(
            CURLOPT_RETURNTRANSFER => TRUE,  // Setting cURL's option to return the webpage data
            CURLOPT_FOLLOWLOCATION => TRUE,  // Setting cURL to follow 'location' HTTP headers
            CURLOPT_AUTOREFERER => TRUE, // Automatically set the referer where following 'location' HTTP headers
            CURLOPT_CONNECTTIMEOUT => 320,   // Setting the amount of time (in seconds) before the request times out
            CURLOPT_TIMEOUT => 320,  // Setting the maximum amount of time for cURL to execute queries
            CURLOPT_MAXREDIRS => 20, // Setting the maximum number of redirections to follow
            CURLOPT_USERAGENT => "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1a2pre) Gecko/2008073000 Shredder/3.0a2pre ThunderBrowse/3.2.1.8",  // Setting the useragent
            CURLOPT_URL => $url, // Setting cURL's URL option with the $url variable passed into the function
        );
         
        $ch = curl_init();  // Initialising cURL
        curl_setopt_array($ch, $options);   // Setting cURL's options using the previously assigned array data in $options
        $data = curl_exec($ch); // Executing the cURL request and assigning the returned data to the $data variable
        curl_close($ch);    // Closing cURL
        return $data;   // Returning the data from the function
    }
    
    function print_array($aArray) {
// Print a nicely formatted array representation:
  echo '<pre>';
  print_r($aArray);
  echo '</pre>';
}
    
      // Defining the basic scraping function
    function scrape_between($data, $start, $end){
        $data = stristr($data, $start); // Stripping all data from before $start
        $data = substr($data, strlen($start));  // Stripping $start
        $stop = stripos($data, $end);   // Getting the position of the $end of the data to scrape
        $data = substr($data, 0, $stop);    // Stripping all data from after and including the $end of the data to scrape
        return $data;   // Returning the scraped data from the function
    }
    
    //incase you wanne scan single item
    
 // $scraped_page = curl("http://www.xenimus.com");    // Downloading IMDB home page to variable $scraped_page
  //  $scraped_data = scrape_between($scraped_page, "<span", "</span>");   // Scraping downloaded dara in $scraped_page for content between <title> and </title> tags
  //  echo $scraped_data; // Echoing $scraped data
  
  
if (isset ($_POST['submit'])) // if the form was submitted start the code
{
	$chosenurl = $_POST['webaddress'];
	$newurl = "http://" . $chosenurl . "/";
	echo "You chose: " . $newurl;

       $url = $newurl;    // Assigning the URL we want to scrape to the variable $url
       $urlexplode = scrape_between($url, "//", "/");
    $results_page = curl($url); // Downloading the results page using our curl() funtion
     
    $results_page = scrape_between($results_page, "<body", "</body>"); // Scraping out only the middle section of the results page that contains our results
     
    $separate_results = explode("<img", $results_page);   // Expploding the results into separate parts into an array
         
    // For each separate result, scrape the URL
    foreach ($separate_results as $separate_result) {
        if ($separate_result != "") {
            $results_urls[] = $urlexplode . "/" . scrape_between($separate_result, "src="", """) . "]"; // Scraping the image links and adding website link infront, adding "]" at the end so we can check between later
        }
    }

    print_array($results_urls); // Printing out our array of URLs we've just scraped
    
    //display image for image 
    foreach ($results_urls as $images)
    {
    	$image = scrape_between($images, $urlexplode, "/");
    	//check if an image is taken from second source
    	if (strpos($images,'http') !== false) {
    //delete the first link so we can show the image from the second sorce and take out "}"
    $image = scrape_between($images, "/", "]");
   
    echo "<img src="" . $image . "">";
}
    	else{
    		//otherwise if there is no second source display image and take out "]"
    		$image = substr($images,0,-1);
    	echo "<img src="http://" . $image . "">"; }
    }
}
?>
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

Powered by WordPress.com.

Up ↑

%d bloggers like this: