PHP Webbrowser simulieren

So, 26.04.2009 - 11:21 -- Daniel Espendiller

Einfache Seite

$objekt=new Browser("IE");
$objekt->url="http://www.golem.de";
$str=$objekt->read();
echo $str;

Cookies

$objekt=new Browser("IE");
$objekt->url="http://www.golem.de";
$objekt->cookies_set(array("cookie1"=>"value1","cookie2"=>"value2"));
$str=$objekt->read();
echo $str;

Loginfenster - POST

POST-Kommandos durchführen z.B. für Logins. Cookies werden automatisch aus dem weitergereicht für den zweiten Seitenaufruf.

$objekt=new Browser("Firefox");
$objekt->post=array("username"=>"peter","password"=>"peter00");
$objekt->url="http://example.de/login.html";
$str=$objekt->read();
 
$objekt->url="http://example.de/page.html";
$str=$objekt->read();

Quellcode

class Browser {
	var $url;
	var $cookies;
	var $headers;
	var $r_headers;
	var $r_string;
	var $timout=5;
	var $proxy;
	var $post;
	var $fileheader;
 
function Browser($client) {
	if ($client=="Firefox") {
		$this->headers['User-Agent']="Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9 Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
		$this->headers['Accept']="text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
		$this->headers['Accept-Language']="de-de,de;q=0.8,en-us;q=0.5,en;q=0.3";
		#$this->headers['Accept-Encoding']="gzip,deflate";
		$this->headers['Accept-Charset']="ISO-8859-1,utf-8;q=0.7,*;q=0.7";
		$this->headers['Keep-Alive']="300";
		$this->headers['Connection']="keep-alive";
	}
	if ($client=="IE") {
		$this->headers['User-Agent']="Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 1.1.4322)";
		$this->headers['Accept']="image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
		$this->headers['Accept-Language']="de";
		#$this->headers['Accept-Encoding']="gzip,deflate";
		$this->headers['Connection']="keep-alive";
	}
}
 
function header_show($value) {
	if (isset($this->headers[$value])) return $this->headers[$value];
}
 
function header_set($key,$value) {
	 $this->headers[$key]=$value;
}
 
function header_remove($key) {
	 unset($this->headers[$key]);
}
 
function cookies_set($arr) {
	 $this->cookies=$arr;
}
 
function cookies_add($name,$value) {
	 $this->cookies[$name]=$value;
}
 
 
function temp() {
	 print_r($this->cookies);
}
 
function read_header() {
	if (count($this->post)>0) {
		$pstr=$this->posts();
		$this->headers['Content-Type']="application/x-www-form-urlencoded";
		$this->headers['Content-Length']=strlen($pstr);
		$pstr="\r\n".$pstr;
	} elseif (strlen($this->fileheader)>0) { 
		$pstr="\r\n".$this->fileheader;
    } else { $pstr="\r\n\r\n"; }
 
	if (count($this->cookies)>0) $this->headers["Cookie"]=$this->keyer($this->cookies,"=","; ");
 
	if (count($this->headers)>0) $header= $this->keyer($this->headers,": ","\r\n")."\r\n";
	if (count($this->post)>0 OR strlen($this->fileheader)>0) {$method="POST"; } else {$method="GET"; }
	if (strlen($this->proxy)>0) {
		return $method." ".$this->url." HTTP/1.0\r\nHost: ".$url['host']."\r\n".$header.$pstr;
	} else {
		$url = parse_url($this->url);
		if (isset($url['port'])) { $port=$url['port']; } else { $port=80; }
		if (isset($url['query'])) { $query="?".$url['query']; } else { $query=""; }
		#echo $method." ".$url['path'].$query." HTTP/1.0\r\nHost: ".$url['host']."\r\n".$header.$pstr;
		return $method." ".$url['path'].$query." HTTP/1.0\r\nHost: ".$url['host']."\r\n".$header.$pstr;
	}
}
 
function posts() {
	foreach($this->post as $key => $value) {
		$b[]=$key."=".$value;
	}
	#print_r($b);
	return implode("&",$b);
}
 
function read() {
ini_set('auto_detect_line_endings', '1');
	$url = parse_url($this->url);
	if (isset($url['port'])) { $port=$url['port']; } else { $port=80; }
	if (isset($url['query'])) { $query="?".$url['query']; } else { $query=""; }
 
	if (strlen($this->proxy)>0) {
	$s=split(":",$this->proxy);
		@$fp = fsockopen ($s[0], $s[1], $errno, $errstr, $this->timout);
	} else { @$fp = fsockopen ($url['host'], $port, $errno, $errstr, $this->timout); }
 
	if (!$fp) { return "error";} else {
#	stream_set_timeout($fp, 180);
		fputs ($fp, $this->read_header());
 
	while (!feof($fp)) {
		$s = fgets($fp);
		if ($body==true) {
           $this->r_string.=$s;
		} else { $this->r_headers.=$s; }
 
       if ( $s == "\r\n" ) {
			$body = true;
       }
 
   }
	   fclose($fp);
	}
	$back=$this->def($this->r_string);
	$this->get_cookies();
	unset($this->headers['Content-Type']);	unset($this->headers['Content-Length']); unset($this->post);
	unset($this->r_string);
	return $back;
}
 
function my_gzdecode($string) {
  $string = substr($string, 10);
  return gzinflate($string);
}
 
function def($str) {
	if (strpos($this->r_headers,"gzip") > 0) $this->r_string = $this->my_gzdecode($this->r_string);
	if (strpos($this->r_headers,"deflate") > 0) $this->r_string = gzuncompress($this->r_string);
	return $this->r_string;
}
 
function keyer($array,$mitte,$end) {
	foreach($array as $key => $value) {
		$b[]=$key.$mitte.$value;
	}
	return implode($end,$b);
}
 
function postfile($postdata, $filedata,$mimetype) {
 
     $data = "";
     $boundary = "---------------------".substr(md5(rand(0,32000)),0,10);
     $this->headers['Content-type']="multipart/form-data; boundary=".$boundary;
 
     foreach($postdata as $key => $val){
         $data .= "--$boundary\n";
         $data .= "Content-Disposition: form-data; name=\"".$key."\"\n\n".$val."\n";
     }
     $data .= "--$boundary\n";
     $data .= "Content-Disposition: form-data; name=\"{$filedata[0]}\"; filename=\"{$filedata[1]}\"\n";
     $data .= "Content-Type: ".$mimetype."\n";
     $data .= "Content-Transfer-Encoding: binary\n\n";
     $data .= $filedata[2]."\n";
     $data .= "--$boundary--\n";
     $this->headers['Content-length']=strlen($data);
     $this->fileheader=$data;
 
}
 
function get_cookies()
	{
		$head = explode("\r\n",$this->r_headers);
		foreach($head as $headline) {
			if (preg_match('/^Set-Cookie: /i',$headline)) {
 
					#if (! is_array($cookies) ) {
					#	$cookies = array();
					#}

					$headline = trim($headline);
					$headline = preg_replace("/^Set-Cookie: /i", "", $headline);
					$cookiesplit = split(";",$headline);
 
					#$cookieinfo = array();

					// avr und value
					list($cookieinfo['name'],$cookieinfo['value']) = split("=",$cookiesplit[0],2);
 
					// zeit als timestamp
					if ( $cookiesplit[1]) {
						$cookieinfo['time'] = strtotime(preg_replace("/^expires=/i", "", trim($cookiesplit[1])));
					}
 
					// path
					if ( $cookiesplit[2]) {
						$cookieinfo['path'] = preg_replace("/^path=/i", "", trim($cookiesplit[2]));
					}
 
					//domain
					if ( $cookiesplit[3]) {
						$cookieinfo['domain'] = preg_replace("/^domain=/i", "", trim($cookiesplit[3]));
					}
 
					// secure
					if ( strtolower(trim($cookiesplit[4]))=="secure") {
						$cookieinfo['secure'] = true;
					}
 
					if (strlen($cookieinfo['time'])==0 OR $cookieinfo['time']>time()) $cookies[] = $cookieinfo;
			}
		}
 
		if (count($cookies)>0) { foreach ($cookies as $cook) $this->cookies_add($cook['name'],$cook['value']); }
		#if (count($this->cookies)>0) { $this->cookies=array_merge($this->cookies,$b_cookies); } else { }
		#return $b_cookies;
	}
 
 
}