Validate PF project listings and sync own price
This commit is contained in:
@@ -119,6 +119,15 @@ def _extract_id_from_url(url: str) -> str | None:
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def is_listing_url(url: str) -> bool:
|
||||
"""True only for a concrete PF listing URL.
|
||||
|
||||
PF search pages also contain listing-like JSON. Treating them as a detail
|
||||
page can bind monitoring to a random result, so callers must reject them.
|
||||
"""
|
||||
return bool(_extract_id_from_url(url or ""))
|
||||
|
||||
|
||||
def _is_listing_dict(item: dict) -> bool:
|
||||
"""Heuristic: a listing dict contains a price plus an id-like field."""
|
||||
if not isinstance(item, dict):
|
||||
@@ -131,12 +140,22 @@ def _is_listing_dict(item: dict) -> bool:
|
||||
class PropertyFinderScraper:
|
||||
source = SOURCE
|
||||
|
||||
def is_listing_url(self, url: str) -> bool:
|
||||
return is_listing_url(url)
|
||||
|
||||
def listing_id_from_url(self, url: str) -> str | None:
|
||||
return _extract_id_from_url(url)
|
||||
|
||||
def fetch_listing(self, url: str) -> ScrapedListing | None:
|
||||
"""Refetch a known listing URL. Returns:
|
||||
- ScrapedListing(is_active=False) if the URL returns 404 (listing removed)
|
||||
- ScrapedListing with current data if alive
|
||||
- None on network/parse failure (we won't update the DB in that case)
|
||||
"""
|
||||
if not is_listing_url(url):
|
||||
logger.warning("PF fetch_listing rejected non-listing URL: %s", url)
|
||||
return None
|
||||
|
||||
try:
|
||||
html = fetch_html(url)
|
||||
except ScraperError as e:
|
||||
@@ -199,6 +218,9 @@ class PropertyFinderScraper:
|
||||
def get_permit(self, url: str) -> str | None:
|
||||
"""Fetch a listing page and return only its DLD permit number (or None).
|
||||
Used to compare candidates against our own permit during suggestions."""
|
||||
if not is_listing_url(url):
|
||||
logger.warning("PF get_permit rejected non-listing URL: %s", url)
|
||||
return None
|
||||
try:
|
||||
html = fetch_html(url)
|
||||
except ScraperError as e:
|
||||
@@ -300,6 +322,8 @@ class PropertyFinderScraper:
|
||||
agent_name, agency_name = _extract_broker(node)
|
||||
share = node.get("share_url") or node.get("path")
|
||||
cand_url = share if str(share).startswith("http") else urljoin(BASE_URL, str(share or ""))
|
||||
if not is_listing_url(cand_url):
|
||||
continue
|
||||
|
||||
results.append(
|
||||
ScrapedListing(
|
||||
|
||||
Reference in New Issue
Block a user