Validate PF project listings and sync own price
This commit is contained in:
@@ -119,6 +119,15 @@ def _extract_id_from_url(url: str) -> str | None:
|
|||||||
return m.group(1) if m else None
|
return m.group(1) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def is_listing_url(url: str) -> bool:
|
||||||
|
"""True only for a concrete PF listing URL.
|
||||||
|
|
||||||
|
PF search pages also contain listing-like JSON. Treating them as a detail
|
||||||
|
page can bind monitoring to a random result, so callers must reject them.
|
||||||
|
"""
|
||||||
|
return bool(_extract_id_from_url(url or ""))
|
||||||
|
|
||||||
|
|
||||||
def _is_listing_dict(item: dict) -> bool:
|
def _is_listing_dict(item: dict) -> bool:
|
||||||
"""Heuristic: a listing dict contains a price plus an id-like field."""
|
"""Heuristic: a listing dict contains a price plus an id-like field."""
|
||||||
if not isinstance(item, dict):
|
if not isinstance(item, dict):
|
||||||
@@ -131,12 +140,22 @@ def _is_listing_dict(item: dict) -> bool:
|
|||||||
class PropertyFinderScraper:
|
class PropertyFinderScraper:
|
||||||
source = SOURCE
|
source = SOURCE
|
||||||
|
|
||||||
|
def is_listing_url(self, url: str) -> bool:
|
||||||
|
return is_listing_url(url)
|
||||||
|
|
||||||
|
def listing_id_from_url(self, url: str) -> str | None:
|
||||||
|
return _extract_id_from_url(url)
|
||||||
|
|
||||||
def fetch_listing(self, url: str) -> ScrapedListing | None:
|
def fetch_listing(self, url: str) -> ScrapedListing | None:
|
||||||
"""Refetch a known listing URL. Returns:
|
"""Refetch a known listing URL. Returns:
|
||||||
- ScrapedListing(is_active=False) if the URL returns 404 (listing removed)
|
- ScrapedListing(is_active=False) if the URL returns 404 (listing removed)
|
||||||
- ScrapedListing with current data if alive
|
- ScrapedListing with current data if alive
|
||||||
- None on network/parse failure (we won't update the DB in that case)
|
- None on network/parse failure (we won't update the DB in that case)
|
||||||
"""
|
"""
|
||||||
|
if not is_listing_url(url):
|
||||||
|
logger.warning("PF fetch_listing rejected non-listing URL: %s", url)
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = fetch_html(url)
|
html = fetch_html(url)
|
||||||
except ScraperError as e:
|
except ScraperError as e:
|
||||||
@@ -199,6 +218,9 @@ class PropertyFinderScraper:
|
|||||||
def get_permit(self, url: str) -> str | None:
|
def get_permit(self, url: str) -> str | None:
|
||||||
"""Fetch a listing page and return only its DLD permit number (or None).
|
"""Fetch a listing page and return only its DLD permit number (or None).
|
||||||
Used to compare candidates against our own permit during suggestions."""
|
Used to compare candidates against our own permit during suggestions."""
|
||||||
|
if not is_listing_url(url):
|
||||||
|
logger.warning("PF get_permit rejected non-listing URL: %s", url)
|
||||||
|
return None
|
||||||
try:
|
try:
|
||||||
html = fetch_html(url)
|
html = fetch_html(url)
|
||||||
except ScraperError as e:
|
except ScraperError as e:
|
||||||
@@ -300,6 +322,8 @@ class PropertyFinderScraper:
|
|||||||
agent_name, agency_name = _extract_broker(node)
|
agent_name, agency_name = _extract_broker(node)
|
||||||
share = node.get("share_url") or node.get("path")
|
share = node.get("share_url") or node.get("path")
|
||||||
cand_url = share if str(share).startswith("http") else urljoin(BASE_URL, str(share or ""))
|
cand_url = share if str(share).startswith("http") else urljoin(BASE_URL, str(share or ""))
|
||||||
|
if not is_listing_url(cand_url):
|
||||||
|
continue
|
||||||
|
|
||||||
results.append(
|
results.append(
|
||||||
ScrapedListing(
|
ScrapedListing(
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ Adding new competitors is done via the web UI (user pastes URLs) — not here.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
@@ -61,6 +60,12 @@ def detect_source_from_url(url: str) -> Source | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _is_supported_listing_url(source: Source, url: str) -> bool:
|
||||||
|
if source == Source.PROPERTYFINDER:
|
||||||
|
return PF.is_listing_url(url)
|
||||||
|
return source == Source.BAYUT
|
||||||
|
|
||||||
|
|
||||||
def _fmt_price(value: float | None, currency: str | None = "AED") -> str:
|
def _fmt_price(value: float | None, currency: str | None = "AED") -> str:
|
||||||
if value is None:
|
if value is None:
|
||||||
return "—"
|
return "—"
|
||||||
@@ -80,6 +85,21 @@ def _listing_key(source: Source | str, external_id: str) -> tuple[str, str]:
|
|||||||
return source_value, str(external_id or "")
|
return source_value, str(external_id or "")
|
||||||
|
|
||||||
|
|
||||||
|
def _project_own_listing_key(project: Project) -> tuple[str, str] | None:
|
||||||
|
if not project.our_url:
|
||||||
|
return None
|
||||||
|
source = detect_source_from_url(project.our_url)
|
||||||
|
if source == Source.PROPERTYFINDER:
|
||||||
|
listing_id = PF.listing_id_from_url(project.our_url)
|
||||||
|
return _listing_key(source, listing_id) if listing_id else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _is_own_listing(project: Project, item: ScrapedListing) -> bool:
|
||||||
|
own_key = _project_own_listing_key(project)
|
||||||
|
return bool(own_key and own_key == _listing_key(item.source, item.external_id))
|
||||||
|
|
||||||
|
|
||||||
def _format_listing_added(project: Project, listing: CompetitorListing, *, auto: bool) -> str:
|
def _format_listing_added(project: Project, listing: CompetitorListing, *, auto: bool) -> str:
|
||||||
title = listing.title or "без названия"
|
title = listing.title or "без названия"
|
||||||
prefix = "✅ <b>Автоматически добавлен конкурент</b>" if auto else "✅ <b>Добавлен конкурент</b>"
|
prefix = "✅ <b>Автоматически добавлен конкурент</b>" if auto else "✅ <b>Добавлен конкурент</b>"
|
||||||
@@ -119,6 +139,8 @@ def add_competitor_url(db: Session, project: Project, url: str) -> tuple[Competi
|
|||||||
"Bayut временно не поддерживается — площадка перешла на защищённый "
|
"Bayut временно не поддерживается — площадка перешла на защищённый "
|
||||||
"рендеринг. Добавляйте ссылки PropertyFinder."
|
"рендеринг. Добавляйте ссылки PropertyFinder."
|
||||||
)
|
)
|
||||||
|
if not _is_supported_listing_url(source, url):
|
||||||
|
return None, "Укажите ссылку на конкретное объявление, а не на страницу поиска"
|
||||||
|
|
||||||
scraper = _scraper_for(source)
|
scraper = _scraper_for(source)
|
||||||
scraped = scraper.fetch_listing(url)
|
scraped = scraper.fetch_listing(url)
|
||||||
@@ -139,6 +161,8 @@ def add_competitor_url(db: Session, project: Project, url: str) -> tuple[Competi
|
|||||||
)
|
)
|
||||||
if existing:
|
if existing:
|
||||||
return None, "Это объявление уже добавлено в проект"
|
return None, "Это объявление уже добавлено в проект"
|
||||||
|
if _is_own_listing(project, scraped):
|
||||||
|
return None, "Это ссылка на наш объект, а не на конкурента"
|
||||||
|
|
||||||
now = datetime.utcnow()
|
now = datetime.utcnow()
|
||||||
listing = CompetitorListing(
|
listing = CompetitorListing(
|
||||||
@@ -258,6 +282,7 @@ def sync_permit_competitors(
|
|||||||
item
|
item
|
||||||
for item in suggestions["propertyfinder"]
|
for item in suggestions["propertyfinder"]
|
||||||
if _normalize_permit(item.permit_number) == normalized_permit
|
if _normalize_permit(item.permit_number) == normalized_permit
|
||||||
|
and not _is_own_listing(project, item)
|
||||||
]
|
]
|
||||||
|
|
||||||
matched_keys = {_listing_key(item.source, item.external_id) for item in matches}
|
matched_keys = {_listing_key(item.source, item.external_id) for item in matches}
|
||||||
@@ -306,6 +331,7 @@ def check_project(db: Session, project: Project) -> list[str]:
|
|||||||
"""Re-scan all tracked competitor listings for one project. Returns notification texts."""
|
"""Re-scan all tracked competitor listings for one project. Returns notification texts."""
|
||||||
changes: list[str] = []
|
changes: list[str] = []
|
||||||
now = datetime.utcnow()
|
now = datetime.utcnow()
|
||||||
|
changes.extend(refresh_our_listing(db, project, now=now))
|
||||||
sync_changes, _, _ = sync_permit_competitors(db, project)
|
sync_changes, _, _ = sync_permit_competitors(db, project)
|
||||||
changes.extend(sync_changes)
|
changes.extend(sync_changes)
|
||||||
|
|
||||||
@@ -372,6 +398,52 @@ def check_project(db: Session, project: Project) -> list[str]:
|
|||||||
return changes
|
return changes
|
||||||
|
|
||||||
|
|
||||||
|
def refresh_our_listing(db: Session, project: Project, *, now: datetime | None = None) -> list[str]:
|
||||||
|
"""Parse our own listing and keep project.our_price in sync.
|
||||||
|
|
||||||
|
This never creates a competitor listing. It only updates project metadata
|
||||||
|
from the concrete `our_url`, so PF search pages are ignored.
|
||||||
|
"""
|
||||||
|
url = (project.our_url or "").strip()
|
||||||
|
if not url:
|
||||||
|
return []
|
||||||
|
source = detect_source_from_url(url)
|
||||||
|
if source is None or source == Source.BAYUT and not BAYUT_ENABLED:
|
||||||
|
return []
|
||||||
|
if not _is_supported_listing_url(source, url):
|
||||||
|
logger.warning("Project %s has non-listing our_url: %s", project.id, url)
|
||||||
|
return []
|
||||||
|
|
||||||
|
scraped = _scraper_for(source).fetch_listing(url)
|
||||||
|
if scraped is None or not scraped.is_active:
|
||||||
|
return []
|
||||||
|
|
||||||
|
changed: list[str] = []
|
||||||
|
if scraped.permit_number and not project.dld_permit:
|
||||||
|
project.dld_permit = scraped.permit_number
|
||||||
|
|
||||||
|
old_price = project.our_price
|
||||||
|
new_price = scraped.price
|
||||||
|
if new_price is not None and old_price != new_price:
|
||||||
|
project.our_price = new_price
|
||||||
|
if old_price is not None:
|
||||||
|
delta = new_price - old_price
|
||||||
|
pct = (delta / old_price * 100.0) if old_price else 0.0
|
||||||
|
arrow = "📈" if delta > 0 else "📉"
|
||||||
|
changed.append(
|
||||||
|
f"{arrow} <b>Наша цена скорректирована</b> — {_source_label(source.value)}\n"
|
||||||
|
f"{project.title}\n"
|
||||||
|
f"Было: {_fmt_price(old_price)}\n"
|
||||||
|
f"Стало: {_fmt_price(new_price, scraped.currency or 'AED')} "
|
||||||
|
f"({'+' if delta > 0 else ''}{delta:,.0f} / {pct:+.1f}%)\n"
|
||||||
|
f"{url}".replace(",", " ")
|
||||||
|
)
|
||||||
|
if now is not None:
|
||||||
|
project.last_checked_at = now
|
||||||
|
db.flush()
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
def _notify_owner(project: Project, changes: list[str]) -> None:
|
def _notify_owner(project: Project, changes: list[str]) -> None:
|
||||||
if not changes:
|
if not changes:
|
||||||
return
|
return
|
||||||
@@ -463,9 +535,10 @@ def suggest_similar(
|
|||||||
excluded.update((l.source.value, l.external_id) for l in project.listings)
|
excluded.update((l.source.value, l.external_id) for l in project.listings)
|
||||||
if project.our_url:
|
if project.our_url:
|
||||||
own_src = detect_source_from_url(project.our_url)
|
own_src = detect_source_from_url(project.our_url)
|
||||||
m = re.search(r"(\d+)\.html", project.our_url)
|
if own_src == Source.PROPERTYFINDER:
|
||||||
if own_src and m:
|
listing_id = PF.listing_id_from_url(project.our_url)
|
||||||
excluded.add((own_src.value, m.group(1)))
|
if listing_id:
|
||||||
|
excluded.add((own_src.value, listing_id))
|
||||||
for src in out:
|
for src in out:
|
||||||
out[src] = [c for c in out[src] if (src, c.external_id) not in excluded]
|
out[src] = [c for c in out[src] if (src, c.external_id) not in excluded]
|
||||||
|
|
||||||
@@ -479,8 +552,9 @@ def suggest_similar(
|
|||||||
permits = list(ex.map(PF.get_permit, [c.url for c in pf]))
|
permits = list(ex.map(PF.get_permit, [c.url for c in pf]))
|
||||||
for cand, permit in zip(pf, permits):
|
for cand, permit in zip(pf, permits):
|
||||||
cand.permit_number = permit
|
cand.permit_number = permit
|
||||||
matches = [c for c in pf if c.permit_number == our_permit]
|
normalized = _normalize_permit(our_permit)
|
||||||
others = [c for c in pf if c.permit_number != our_permit]
|
matches = [c for c in pf if _normalize_permit(c.permit_number) == normalized]
|
||||||
|
others = [c for c in pf if _normalize_permit(c.permit_number) != normalized]
|
||||||
out["propertyfinder"] = matches + others[:_SUGGEST_OTHERS_LIMIT]
|
out["propertyfinder"] = matches + others[:_SUGGEST_OTHERS_LIMIT]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("PF permit enrichment failed: %s", e)
|
logger.exception("PF permit enrichment failed: %s", e)
|
||||||
|
|||||||
@@ -5,12 +5,15 @@ import (
|
|||||||
"database/sql"
|
"database/sql"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
var ErrNotFound = errors.New("not found")
|
var ErrNotFound = errors.New("not found")
|
||||||
var ErrTelegramRequired = errors.New("telegram required")
|
var ErrTelegramRequired = errors.New("telegram required")
|
||||||
|
|
||||||
|
var propertyFinderListingURLRe = regexp.MustCompile(`(?i)propertyfinder\.ae/.+-(\d+)\.html(?:[?#].*)?$`)
|
||||||
|
|
||||||
func (a *App) CurrentEmployee(ctx context.Context, portalUserID string, required bool) (*Employee, error) {
|
func (a *App) CurrentEmployee(ctx context.Context, portalUserID string, required bool) (*Employee, error) {
|
||||||
if portalUserID == "" {
|
if portalUserID == "" {
|
||||||
if required {
|
if required {
|
||||||
@@ -301,6 +304,11 @@ func (a *App) CreateProject(ctx context.Context, ownerID int64, p ProjectPayload
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
p.Title = title
|
||||||
|
p.DealType = deal
|
||||||
|
if err := validateProjectRequired(p); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
res, err := a.DB.ExecContext(ctx, `
|
res, err := a.DB.ExecContext(ctx, `
|
||||||
INSERT INTO projects
|
INSERT INTO projects
|
||||||
(title, deal_type, owner_id, our_price, notes, dld_permit, building, bedrooms, size_sqft, our_url, created_at)
|
(title, deal_type, owner_id, our_price, notes, dld_permit, building, bedrooms, size_sqft, our_url, created_at)
|
||||||
@@ -334,6 +342,12 @@ func (a *App) UpdateProject(ctx context.Context, ownerID, projectID int64, p Pro
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
p = mergeProjectPayload(current, p)
|
||||||
|
p.Title = title
|
||||||
|
p.DealType = deal
|
||||||
|
if err := validateProjectRequired(p); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
_, err = a.DB.ExecContext(ctx, `
|
_, err = a.DB.ExecContext(ctx, `
|
||||||
UPDATE projects
|
UPDATE projects
|
||||||
SET title = ?, deal_type = ?, our_price = ?, notes = ?, dld_permit = ?,
|
SET title = ?, deal_type = ?, our_price = ?, notes = ?, dld_permit = ?,
|
||||||
@@ -348,6 +362,59 @@ func (a *App) UpdateProject(ctx context.Context, ownerID, projectID int64, p Pro
|
|||||||
return a.ProjectByID(ctx, ownerID, projectID, true)
|
return a.ProjectByID(ctx, ownerID, projectID, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func mergeProjectPayload(current *Project, p ProjectPayload) ProjectPayload {
|
||||||
|
if current == nil {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
if p.OurPrice == nil {
|
||||||
|
p.OurPrice = current.OurPrice
|
||||||
|
}
|
||||||
|
if cleanPtr(p.DLDPermit) == nil {
|
||||||
|
p.DLDPermit = current.DLDPermit
|
||||||
|
}
|
||||||
|
if cleanPtr(p.Building) == nil {
|
||||||
|
p.Building = current.Building
|
||||||
|
}
|
||||||
|
if p.Bedrooms == nil {
|
||||||
|
p.Bedrooms = current.Bedrooms
|
||||||
|
}
|
||||||
|
if p.SizeSqft == nil {
|
||||||
|
p.SizeSqft = current.SizeSqft
|
||||||
|
}
|
||||||
|
if cleanPtr(p.OurURL) == nil {
|
||||||
|
p.OurURL = current.OurURL
|
||||||
|
}
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateProjectRequired(p ProjectPayload) error {
|
||||||
|
if cleanString(p.Title) == "" {
|
||||||
|
return fmt.Errorf("title is required")
|
||||||
|
}
|
||||||
|
if p.OurPrice == nil || *p.OurPrice <= 0 {
|
||||||
|
return fmt.Errorf("our_price is required")
|
||||||
|
}
|
||||||
|
if cleanPtr(p.DLDPermit) == nil {
|
||||||
|
return fmt.Errorf("dld_permit is required")
|
||||||
|
}
|
||||||
|
if cleanPtr(p.Building) == nil {
|
||||||
|
return fmt.Errorf("building is required")
|
||||||
|
}
|
||||||
|
if p.Bedrooms == nil {
|
||||||
|
return fmt.Errorf("bedrooms is required")
|
||||||
|
}
|
||||||
|
if p.SizeSqft == nil || *p.SizeSqft <= 0 {
|
||||||
|
return fmt.Errorf("size_sqft is required")
|
||||||
|
}
|
||||||
|
if cleanPtr(p.OurURL) == nil {
|
||||||
|
return fmt.Errorf("our_url is required")
|
||||||
|
}
|
||||||
|
if !propertyFinderListingURLRe.MatchString(strings.TrimSpace(*p.OurURL)) {
|
||||||
|
return fmt.Errorf("our_url must be a concrete PropertyFinder listing URL")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) DeleteProject(ctx context.Context, ownerID, projectID int64) error {
|
func (a *App) DeleteProject(ctx context.Context, ownerID, projectID int64) error {
|
||||||
tx, err := a.DB.BeginTx(ctx, nil)
|
tx, err := a.DB.BeginTx(ctx, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user