<?php
$url = 'http://b2binform.ru/result?c=203&page=';
$domen = 'http://b2binform.ru';
function myCurl($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_USERAGENT, 'Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.10.229 Version/11.61');
curl_setopt($ch, CURLOPT_ENCODING, 'utf-8');
curl_setopt($ch, CURLOPT_AUTOREFERER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 200);
curl_setopt($ch, CURLOPT_COOKIEFILE, dirname(__FILE__).'/cookie.txt');
curl_setopt($ch, CURLOPT_COOKIEJAR, dirname(__FILE__).'/cookie.txt');
curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$result = array();
for ($i = 1; $i <= 42; $i++) {
$result[$i] = myCurl($url . $i);
}
#echo '<pre>' . print_r($result[1], 1) . '</pre>'; exit;
#echo nl2br(htmlspecialchars($result[1])); exit;
$links = array();
function getLinks($res) {
global $domen;
preg_match_all('#<a href="(/c/.*.html)"><div class="search_result_item">#', $res, $matches);
return array_map(function ($val) use ($domen) {return $domen . $val;}, $matches[1]); // добавляем домен к ссылкам
}
foreach ($result as $link) {
$links = array_merge(getLinks($link), $links);
}
/*
$links = array_map('getLinks', $result);
*/
#echo '<pre>' . print_r($links, 1) . '</pre>';
#echo count($links, COUNT_RECURSIVE) - 42; // минус ключи
function getInfo($res) {
preg_match_all('#<div class="information_top">(.*)<div class="map">#isU', $res, $matches);
return $matches[1];
}
#$info = array_map('myCurl', $links);
#$info = myCurl($links[0]);
#$info = getInfo($info);
#echo '<pre>' . print_r($info[0], 1) . '</pre>';
$info = array();
foreach($links as $link) {
$inf = getInfo(myCurl($link));
$info[] = $inf[0];
}
#echo '<pre>' . print_r(array_unique($info), 1) . '</pre>';
$info = array_unique($info);
function getParam($res) {
preg_match_all('#<div><span>(.*)</span>(.*)</div>#isU', $res, $matches);
$array = array_combine($matches[1], array_map('strip_tags', $matches[2]));
array_pop($array);
return $array;
}
$ress = array();
foreach($info as $val) {
$ress[] = getParam($val);
}
#$test = getParam($info[0]);
#echo '<pre>' . print_r($ress, 1) . '</pre>';
$file = 'result.csv';
$fields = array('id', 'Наименование', 'Адрес', 'Телефоны', 'Email', 'WWW');
$firstline = implode(';', $fields);
file_put_contents($file, iconv('UTF-8', 'windows-1251', $firstline) . PHP_EOL, FILE_APPEND | LOCK_EX);
foreach($ress as $key => $array) {
$str = ++$key . ';';
foreach($fields as $field) {
if ($field != 'id') {
if (array_key_exists($field, $array)) {
$str .= $array[$field] . ';';
} else {
$str .= "\t;";
}
}
}
$str .= PHP_EOL;
if (mb_check_encoding($str, 'UTF-8')) {
$str = iconv('UTF-8', 'windows-1251', $str);
}
file_put_contents($file, $str, FILE_APPEND | LOCK_EX);
}
#echo $str;