250 lines
7.9 KiB
PHP
250 lines
7.9 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
$outDir = __DIR__ . '/dataset';
|
|
if (!is_dir($outDir)) mkdir($outDir, 0777, true);
|
|
|
|
/**
|
|
* 地区 + 城市(用于命名)
|
|
*/
|
|
$geoPoints = [
|
|
|
|
// ap = Asia Pacific
|
|
['region'=>'ap','name'=>'hangzhou','lat'=>30.274086,'lon'=>120.155071],
|
|
['region'=>'ap','name'=>'beijing','lat'=>39.9042,'lon'=>116.4074],
|
|
['region'=>'ap','name'=>'guangzhou','lat'=>23.1291,'lon'=>113.2644],
|
|
['region'=>'ap','name'=>'chengdu','lat'=>30.5728,'lon'=>104.0668],
|
|
['region'=>'ap','name'=>'tokyo','lat'=>35.6762,'lon'=>139.6503],
|
|
['region'=>'ap','name'=>'seoul','lat'=>37.5665,'lon'=>126.9780],
|
|
['region'=>'ap','name'=>'bangkok','lat'=>13.7563,'lon'=>100.5018],
|
|
['region'=>'ap','name'=>'singapore','lat'=>1.3521,'lon'=>103.8198],
|
|
['region'=>'ap','name'=>'jakarta','lat'=>-6.2088,'lon'=>106.8456],
|
|
['region'=>'ap','name'=>'delhi','lat'=>28.6139,'lon'=>77.2090],
|
|
['region'=>'ap','name'=>'mumbai','lat'=>19.0760,'lon'=>72.8777],
|
|
|
|
// me = Middle East / Africa
|
|
['region'=>'me','name'=>'dubai','lat'=>25.2048,'lon'=>55.2708],
|
|
['region'=>'me','name'=>'istanbul','lat'=>41.0082,'lon'=>28.9784],
|
|
['region'=>'me','name'=>'cairo','lat'=>30.0444,'lon'=>31.2357],
|
|
['region'=>'me','name'=>'nairobi','lat'=>-1.2921,'lon'=>36.8219],
|
|
['region'=>'me','name'=>'johannesburg','lat'=>-26.2041,'lon'=>28.0473],
|
|
|
|
// eu
|
|
['region'=>'eu','name'=>'london','lat'=>51.5072,'lon'=>-0.1276],
|
|
['region'=>'eu','name'=>'paris','lat'=>48.8566,'lon'=>2.3522],
|
|
['region'=>'eu','name'=>'madrid','lat'=>40.4168,'lon'=>-3.7038],
|
|
['region'=>'eu','name'=>'rome','lat'=>41.9028,'lon'=>12.4964],
|
|
['region'=>'eu','name'=>'berlin','lat'=>52.5200,'lon'=>13.4050],
|
|
['region'=>'eu','name'=>'warsaw','lat'=>52.2297,'lon'=>21.0122],
|
|
['region'=>'eu','name'=>'moscow','lat'=>55.7558,'lon'=>37.6173],
|
|
|
|
// na
|
|
['region'=>'na','name'=>'newyork','lat'=>40.7128,'lon'=>-74.0060],
|
|
['region'=>'na','name'=>'chicago','lat'=>41.8781,'lon'=>-87.6298],
|
|
['region'=>'na','name'=>'losangeles','lat'=>34.0522,'lon'=>-118.2437],
|
|
['region'=>'na','name'=>'dallas','lat'=>32.7767,'lon'=>-96.7970],
|
|
['region'=>'na','name'=>'toronto','lat'=>43.6532,'lon'=>-79.3832],
|
|
['region'=>'na','name'=>'mexicocity','lat'=>19.4326,'lon'=>-99.1332],
|
|
|
|
// sa / oc
|
|
['region'=>'sa','name'=>'saopaulo','lat'=>-23.5505,'lon'=>-46.6333],
|
|
['region'=>'sa','name'=>'santiago','lat'=>-33.4489,'lon'=>-70.6693],
|
|
['region'=>'oc','name'=>'sydney','lat'=>-33.8688,'lon'=>151.2093],
|
|
];
|
|
|
|
$url = "https://apis.ihg.com/graphql/v1/hotels";
|
|
|
|
$headers = [
|
|
'sec-ch-ua-platform: "Windows"',
|
|
'Referer: https://www.ihg.com.cn/',
|
|
'sec-ch-ua: "Microsoft Edge";v="147", "Not.A/Brand";v="8", "Chromium";v="147"',
|
|
'ihg-language: zh-CN',
|
|
'sec-ch-ua-mobile: ?0',
|
|
'x-ihg-api-key: se9ym5iAzaW8pxfBjkmgbuGjJcr3Pj6Y',
|
|
'IHG-SessionId: 6cc0bb0f-8850-4302-9d4b-80e2f2ef4860',
|
|
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36 Edg/147.0.0.0',
|
|
'Accept: application/json, text/plain, */*',
|
|
'Content-Type: application/json; charset=UTF-8',
|
|
'IHG-TransactionId: 50fef06e-2f3b-416c-a6af-69ba809c7666',
|
|
];
|
|
|
|
function buildPayload($lat, $lon) {
|
|
|
|
$query = <<<'GRAPHQL'
|
|
query GetHotelDetails($detailsInput: HotelArgs, $mediaArgs: MediaArgs){
|
|
getHotels(input:$detailsInput){
|
|
hotelInfo{
|
|
hotelCode
|
|
callCenter{phoneNumber}
|
|
address{
|
|
street1 street2 street3 city zip
|
|
state{code name}
|
|
country{name code}
|
|
checkInAddress{city country line1 state zipcode}
|
|
}
|
|
location{boardTypes{boardType}}
|
|
distanceFrom{kilometers miles}
|
|
marketing{optOutDateWeb optInDateWeb marketingText{welcomeMessage}}
|
|
brandInfo{
|
|
SPBrandName brandCode brandName chainCode
|
|
futureBrandInfo{rebrandingDate hotelName chainCode brandName brandCode}
|
|
spTransitionalBrandIdentifier
|
|
}
|
|
greenEngage{
|
|
certificationPrograms{
|
|
certifiedByGloballyRecognizedSustainableProgram
|
|
environmentalCertificationProgram{listItem}
|
|
}
|
|
lowCarbon{lowCarbonHotelDescription isLowCarbonHotel}
|
|
lowCarbonReady{lowCarbonReadyHotelDescription isLowCarbonReadyHotel}
|
|
}
|
|
room{hotelHighlights{hotelDisclaimer}}
|
|
badges{name id}
|
|
facilities{name id}
|
|
parking{
|
|
complimentaryDailySelfParking
|
|
parkingDescription
|
|
carParkingAvailable
|
|
valetParkingAvailable
|
|
}
|
|
policies{
|
|
pet{petsAllowed guideDogsOrServiceAnimalsAllowed description}
|
|
}
|
|
stripes{id name}
|
|
renovationAlertsList{alertType flagEndDate flagStartDate other}
|
|
profile{
|
|
name
|
|
webNonBrandedHotelLogo{url}
|
|
seoCity
|
|
nonIhgCrsUrl
|
|
independentNonIHGWebsiteURL
|
|
hotelLogo{originalUrl}
|
|
averageReview
|
|
tpiLevel2Violator
|
|
primaryImageUrl{originalUrl}
|
|
latLong{lon lat}
|
|
hotelStatus
|
|
preSellDate
|
|
dateOpened
|
|
totalReviews
|
|
vatIncluded
|
|
}
|
|
media(input:$mediaArgs){
|
|
primaryPhotos{
|
|
allPhotos{
|
|
type
|
|
primary
|
|
caption
|
|
originalUrl
|
|
formats{url aspectHeight aspectWidth}
|
|
}
|
|
}
|
|
}
|
|
foodAndBeverage{
|
|
complimentaryBreakfastDetails{complimentaryGrabAndGoBreakfast}
|
|
}
|
|
restaurant{onSiteRestaurantsCount}
|
|
tax{
|
|
taxAndFeeDetail
|
|
serviceCharge{
|
|
startAndEndDate{startDate endDate}
|
|
description
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
GRAPHQL;
|
|
|
|
$data = json_encode([
|
|
'operationName' => 'GetHotelDetails',
|
|
'variables' => [
|
|
'detailsInput' => [
|
|
'rebrandStartDate' => '2026-04-28',
|
|
'geoLocation' => [
|
|
'lat' => $lat,
|
|
'lon' => $lon,
|
|
'radius' => 1336,
|
|
],
|
|
'geoLocationDistance' => [
|
|
'distanceType' => 'STRAIGHT_LINE',
|
|
'distanceUnit' => 'KM',
|
|
],
|
|
'size' => 3000,
|
|
'fallbackSearch' => [
|
|
'minHotels' => 1,
|
|
'maxRadius' => 100,
|
|
'incrementRadiusBy' => 70,
|
|
],
|
|
'sortBy' => 'DISTANCE',
|
|
],
|
|
'mediaArgs' => [
|
|
'formats' => [
|
|
['aspectHeight' => '3', 'aspectWidth' => '4'],
|
|
['aspectHeight' => '5', 'aspectWidth' => '16'],
|
|
],
|
|
],
|
|
],
|
|
'query' => $query,
|
|
], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
|
return $data;
|
|
/**
|
|
return json_encode([
|
|
"operationName"=>"GetHotelDetails",
|
|
"variables"=>[
|
|
"detailsInput"=>[
|
|
"geoLocation"=>[
|
|
"lat"=>$lat,
|
|
"lon"=>$lon,
|
|
"radius"=>1300
|
|
],
|
|
"size"=>3000,
|
|
"sortBy"=>"DISTANCE"
|
|
]
|
|
],
|
|
"query"=>"query GetHotelDetails(\$detailsInput:HotelArgs){getHotels(input:\$detailsInput){hotelInfo{hotelCode}}}"
|
|
], JSON_UNESCAPED_UNICODE);
|
|
*/
|
|
}
|
|
|
|
function fetch($url, $headers, $payload) {
|
|
$ch = curl_init($url);
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_POST => true,
|
|
CURLOPT_HTTPHEADER => $headers,
|
|
CURLOPT_POSTFIELDS => $payload,
|
|
CURLOPT_ENCODING => '',
|
|
CURLOPT_TIMEOUT => 20,
|
|
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0, // 对应 --http2
|
|
]);
|
|
$res = curl_exec($ch);
|
|
curl_close($ch);
|
|
return $res;
|
|
}
|
|
|
|
/**
|
|
* 主循环
|
|
*/
|
|
foreach ($geoPoints as $p) {
|
|
|
|
$filename = "{$p['region']}-{$p['name']}.json";
|
|
$filepath = $outDir . '/' . $filename;
|
|
|
|
echo "抓取 {$filename}...\n";
|
|
|
|
$payload = buildPayload($p['lat'], $p['lon']);
|
|
$res = fetch($url, $headers, $payload);
|
|
|
|
if (!$res) {
|
|
echo "失败: {$filename}\n";
|
|
continue;
|
|
}
|
|
|
|
file_put_contents($filepath, $res);
|
|
|
|
// 防封(非常重要)
|
|
usleep(300000); // 0.3秒
|
|
}
|
|
|
|
echo "全部完成\n"; |