Compare commits

..

4 Commits

Author SHA1 Message Date
Grendgi
6750722429 feat: parse project metadata from PF links
All checks were successful
CI / hygiene (push) Successful in 2s
Build and Deploy / build-and-deploy (push) Successful in 38s
CI / go (push) Successful in 26s
CI / python (push) Successful in 16s
2026-06-24 14:03:25 +03:00
Grendgi
31c498af39 fix: use python 3.9 compatible sqlalchemy annotations
All checks were successful
CI / hygiene (push) Successful in 2s
Build and Deploy / build-and-deploy (push) Successful in 13s
CI / go (push) Successful in 22s
CI / python (push) Successful in 14s
2026-06-18 11:18:16 +03:00
Grendgi
2648a74b8c fix: support python 3.9 model annotations
Some checks failed
CI / hygiene (push) Successful in 2s
Build and Deploy / build-and-deploy (push) Successful in 14s
CI / go (push) Successful in 20s
CI / python (push) Failing after 14s
2026-06-18 11:09:42 +03:00
Grendgi
47ac87bbc3 fix: bootstrap pip in ci
Some checks failed
CI / hygiene (push) Successful in 1s
Build and Deploy / build-and-deploy (push) Successful in 12s
CI / go (push) Successful in 27s
CI / python (push) Failing after 14s
2026-06-18 10:29:46 +03:00
9 changed files with 301 additions and 18 deletions

View File

@@ -32,6 +32,10 @@ jobs:
needs: hygiene
steps:
- uses: actions/checkout@v4
- run: |
if ! python3 -m pip --version; then
python3 -m ensurepip --upgrade || (apt-get update && apt-get install -y python3-pip)
fi
- run: python3 -m pip install -r requirements.txt
- run: python3 -m compileall app
- run: python3 -m unittest discover -s tests

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
from datetime import datetime
from enum import Enum
from typing import Optional
from sqlalchemy import Boolean, DateTime, Enum as SAEnum, Float, ForeignKey, Integer, String, Text, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
@@ -27,9 +30,9 @@ class Employee(Base):
id: Mapped[int] = mapped_column(Integer, primary_key=True)
name: Mapped[str] = mapped_column(String(200))
portal_user_id: Mapped[str | None] = mapped_column(String(100), unique=True, index=True, nullable=True)
tg_chat_id: Mapped[str | None] = mapped_column(String(64), unique=True, nullable=True)
tg_username: Mapped[str | None] = mapped_column(String(200), nullable=True)
portal_user_id: Mapped[Optional[str]] = mapped_column(String(100), unique=True, index=True, nullable=True)
tg_chat_id: Mapped[Optional[str]] = mapped_column(String(64), unique=True, nullable=True)
tg_username: Mapped[Optional[str]] = mapped_column(String(200), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
projects: Mapped[list["Project"]] = relationship(back_populates="owner")
@@ -43,21 +46,21 @@ class Project(Base):
id: Mapped[int] = mapped_column(Integer, primary_key=True)
title: Mapped[str] = mapped_column(String(300))
deal_type: Mapped[DealType] = mapped_column(SAEnum(DealType))
our_price: Mapped[float | None] = mapped_column(Float, nullable=True)
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
our_price: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# Опциональные параметры — используются для подсказок похожих объявлений
dld_permit: Mapped[str | None] = mapped_column(String(100), index=True, nullable=True)
building: Mapped[str | None] = mapped_column(String(300), nullable=True)
bedrooms: Mapped[int | None] = mapped_column(Integer, nullable=True)
size_sqft: Mapped[float | None] = mapped_column(Float, nullable=True)
our_url: Mapped[str | None] = mapped_column(Text, nullable=True)
dld_permit: Mapped[Optional[str]] = mapped_column(String(100), index=True, nullable=True)
building: Mapped[Optional[str]] = mapped_column(String(300), nullable=True)
bedrooms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
size_sqft: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
our_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
owner_id: Mapped[int] = mapped_column(ForeignKey("employees.id"))
owner: Mapped[Employee] = relationship(back_populates="projects")
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
last_checked_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
last_checked_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
listings: Mapped[list["CompetitorListing"]] = relationship(
back_populates="project", cascade="all, delete-orphan"
@@ -77,15 +80,15 @@ class CompetitorListing(Base):
source: Mapped[Source] = mapped_column(SAEnum(Source))
external_id: Mapped[str] = mapped_column(String(100)) # ID на стороне PF/Bayut
url: Mapped[str] = mapped_column(Text)
title: Mapped[str | None] = mapped_column(String(500), nullable=True)
agent_name: Mapped[str | None] = mapped_column(String(300), nullable=True)
agency_name: Mapped[str | None] = mapped_column(String(300), nullable=True)
permit_number: Mapped[str | None] = mapped_column(String(100), nullable=True)
title: Mapped[Optional[str]] = mapped_column(String(500), nullable=True)
agent_name: Mapped[Optional[str]] = mapped_column(String(300), nullable=True)
agency_name: Mapped[Optional[str]] = mapped_column(String(300), nullable=True)
permit_number: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
auto_discovered: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
permit_missing_checks: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
current_price: Mapped[float | None] = mapped_column(Float, nullable=True)
currency: Mapped[str | None] = mapped_column(String(10), nullable=True, default="AED")
current_price: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
currency: Mapped[Optional[str]] = mapped_column(String(10), nullable=True, default="AED")
status: Mapped[ListingStatus] = mapped_column(SAEnum(ListingStatus), default=ListingStatus.ACTIVE)
first_seen_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
@@ -103,5 +106,5 @@ class PriceHistory(Base):
listing_id: Mapped[int] = mapped_column(ForeignKey("competitor_listings.id"))
listing: Mapped[CompetitorListing] = relationship(back_populates="price_history")
price: Mapped[float | None] = mapped_column(Float, nullable=True)
price: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
recorded_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)

View File

@@ -35,6 +35,9 @@ class ScrapedListing:
permit_number: str | None
agent_name: str | None
agency_name: str | None
building: str | None = None
bedrooms: int | None = None
size_sqft: float | None = None
is_active: bool = True

View File

@@ -100,6 +100,110 @@ def _extract_permit(item: dict) -> str | None:
return None
def _parse_int(value) -> int | None:
if value is None:
return None
if isinstance(value, bool):
return None
if isinstance(value, (int, float)):
return int(value)
text = str(value).strip().lower()
if text in {"studio", "студия"}:
return 0
m = re.search(r"\d+", text)
return int(m.group(0)) if m else None
def _extract_bedrooms(item: dict) -> int | None:
for key in ("bedrooms", "bedroom", "beds", "rooms", "bedroom_count", "bedrooms_count"):
value = item.get(key)
if isinstance(value, dict):
value = value.get("value") or value.get("count") or value.get("name")
parsed = _parse_int(value)
if parsed is not None:
return parsed
for node in _walk(item):
if not isinstance(node, dict):
continue
name = str(node.get("name") or node.get("label") or node.get("key") or "").lower()
if "bed" not in name and "спал" not in name:
continue
parsed = _parse_int(node.get("value") or node.get("count") or node.get("text"))
if parsed is not None:
return parsed
return None
def _area_to_sqft(value, unit: str | None = None) -> float | None:
parsed = parse_price(value)
if parsed is None:
return None
unit_text = (unit or "").lower()
if any(token in unit_text for token in ("sqm", "sq m", "m2", "", "метр")):
return round(parsed * 10.7639, 2)
return parsed
def _extract_size_sqft(item: dict) -> float | None:
for key in ("size", "area", "property_size", "built_up_area", "builtup_area", "plot_area"):
value = item.get(key)
unit = None
if isinstance(value, dict):
unit = value.get("unit") or value.get("unit_label") or value.get("unitLabel")
value = value.get("value") or value.get("amount") or value.get("text")
parsed = _area_to_sqft(value, unit)
if parsed is not None:
return parsed
for node in _walk(item):
if not isinstance(node, dict):
continue
name = str(node.get("name") or node.get("label") or node.get("key") or "").lower()
if not any(token in name for token in ("size", "area", "sqft", "sq ft", "площад")):
continue
parsed = _area_to_sqft(
node.get("value") or node.get("amount") or node.get("text"),
str(node.get("unit") or node.get("unit_label") or ""),
)
if parsed is not None:
return parsed
return None
def _location_candidate(node: dict) -> tuple[int, str] | None:
rank = _LOC_TYPE_PRIORITY.get(str(node.get("type", "")).upper(), -1)
name = str(node.get("name") or "").strip()
if rank < 0 or not name:
return None
return rank, name
def _extract_building_from(node) -> str | None:
best_name: str | None = None
best_rank = -1
for item in _walk(node):
if not isinstance(item, dict):
continue
candidate = _location_candidate(item)
if not candidate:
continue
rank, name = candidate
if rank > best_rank:
best_rank, best_name = rank, name
return best_name
def _extract_building(data: dict, item: dict) -> str | None:
for key in ("location", "location_tree", "locations", "locationTree", "community"):
value = item.get(key)
if value:
building = _extract_building_from(value)
if building:
return building
return _extract_building_from(data)
def _find_permit_on_page(data: dict) -> str | None:
"""The DLD permit number lives in a regulatory block rendered as an image,
but its plain value is still in __NEXT_DATA__: the dict that carries a
@@ -212,6 +316,9 @@ class PropertyFinderScraper:
permit_number=_find_permit_on_page(data) or _extract_permit(best),
agent_name=agent_name,
agency_name=agency_name,
building=_extract_building(data, best),
bedrooms=_extract_bedrooms(best),
size_sqft=_extract_size_sqft(best),
is_active=True,
)
@@ -336,6 +443,9 @@ class PropertyFinderScraper:
permit_number=_extract_permit(node),
agent_name=agent_name,
agency_name=agency_name,
building=_extract_building(data, node),
bedrooms=_extract_bedrooms(node),
size_sqft=_extract_size_sqft(node),
is_active=True,
)
)

View File

@@ -189,6 +189,43 @@ def add_competitor_url(db: Session, project: Project, url: str) -> tuple[Competi
return listing, ""
def parse_our_listing_url(url: str) -> dict:
"""Parse our own PF listing for project metadata.
Used by the Go API before project validation, so users can paste only the
concrete object URL and let the service fill price/permit/building/area.
"""
url = (url or "").strip()
if not url:
raise ValueError("URL пустой")
source = detect_source_from_url(url)
if source is None:
raise ValueError("URL должен быть с propertyfinder.ae или bayut.com")
if source == Source.BAYUT and not BAYUT_ENABLED:
raise ValueError(
"Bayut временно не поддерживается — площадка перешла на защищённый "
"рендеринг. Используйте ссылку PropertyFinder."
)
if not _is_supported_listing_url(source, url):
raise ValueError("Укажите ссылку на конкретное объявление, а не на страницу поиска")
scraped = _scraper_for(source).fetch_listing(url)
if scraped is None:
raise ValueError("Не удалось загрузить страницу — сайт мог заблокировать запрос, попробуйте позже")
if not scraped.is_active:
raise ValueError("Страница объявления вернула 404 — ссылка битая или объявление снято")
return {
"title": scraped.title,
"our_price": scraped.price,
"dld_permit": scraped.permit_number,
"building": scraped.building,
"bedrooms": scraped.bedrooms,
"size_sqft": scraped.size_sqft,
"currency": scraped.currency or "AED",
}
def add_competitor_urls(db: Session, project: Project, urls: list[str]) -> dict:
"""Add several pasted/selected URLs in one go (used by the suggest page's
multi-select). Processes them sequentially — each one re-fetches the page —
@@ -421,6 +458,12 @@ def refresh_our_listing(db: Session, project: Project, *, now: datetime | None =
changed: list[str] = []
if scraped.permit_number and not project.dld_permit:
project.dld_permit = scraped.permit_number
if scraped.building and not project.building:
project.building = scraped.building
if scraped.bedrooms is not None and project.bedrooms is None:
project.bedrooms = scraped.bedrooms
if scraped.size_sqft is not None and project.size_sqft is None:
project.size_sqft = scraped.size_sqft
old_price = project.our_price
new_price = scraped.price

View File

@@ -17,6 +17,8 @@ from app.models import Project
from app.services.monitor import (
BAYUT_ENABLED,
add_competitor_url,
notify_project_changes,
parse_our_listing_url,
run_check_all,
run_check_for_project,
sync_permit_competitors,
@@ -133,6 +135,14 @@ def cmd_suggest(payload: dict[str, Any]) -> None:
db.close()
def cmd_parse_own_listing(payload: dict[str, Any]) -> None:
url = str(payload.get("url") or "")
try:
_write(parse_our_listing_url(url))
except ValueError as exc:
_fail(str(exc))
def cmd_health(_: dict[str, Any]) -> None:
db = SessionLocal()
try:
@@ -149,6 +159,7 @@ COMMANDS = {
"check-project": cmd_check_project,
"check-all": cmd_check_all,
"suggest": cmd_suggest,
"parse-own-listing": cmd_parse_own_listing,
}

View File

@@ -356,6 +356,10 @@ func (a *App) CreateProject(ctx context.Context, ownerID int64, p ProjectPayload
}
p.Title = title
p.DealType = deal
p, err = a.enrichProjectPayloadFromURL(ctx, p)
if err != nil {
return nil, err
}
if err := validateProjectRequired(p); err != nil {
return nil, err
}
@@ -395,6 +399,10 @@ func (a *App) UpdateProject(ctx context.Context, ownerID, projectID int64, p Pro
p = mergeProjectPayload(current, p)
p.Title = title
p.DealType = deal
p, err = a.enrichProjectPayloadFromURL(ctx, p)
if err != nil {
return nil, err
}
if err := validateProjectRequired(p); err != nil {
return nil, err
}
@@ -437,6 +445,51 @@ func mergeProjectPayload(current *Project, p ProjectPayload) ProjectPayload {
return p
}
func (a *App) enrichProjectPayloadFromURL(ctx context.Context, p ProjectPayload) (ProjectPayload, error) {
url := cleanPtr(p.OurURL)
if url == nil || a.Worker == nil {
return p, nil
}
parsed, err := a.Worker.ParseOwnListing(ctx, *url)
if err != nil {
if projectMissingParsedFields(p) {
return p, fmt.Errorf("parse our_url: %w", err)
}
return p, nil
}
return applyParsedOwnListing(p, parsed), nil
}
func projectMissingParsedFields(p ProjectPayload) bool {
return p.OurPrice == nil ||
cleanPtr(p.DLDPermit) == nil ||
cleanPtr(p.Building) == nil ||
p.Bedrooms == nil ||
p.SizeSqft == nil
}
func applyParsedOwnListing(p ProjectPayload, parsed *ParsedOwnListing) ProjectPayload {
if parsed == nil {
return p
}
if parsed.OurPrice != nil && *parsed.OurPrice > 0 {
p.OurPrice = parsed.OurPrice
}
if permit := cleanPtr(parsed.DLDPermit); permit != nil {
p.DLDPermit = permit
}
if building := cleanPtr(parsed.Building); building != nil {
p.Building = building
}
if parsed.Bedrooms != nil {
p.Bedrooms = parsed.Bedrooms
}
if parsed.SizeSqft != nil && *parsed.SizeSqft > 0 {
p.SizeSqft = parsed.SizeSqft
}
return p
}
func validateProjectRequired(p ProjectPayload) error {
if cleanString(p.Title) == "" {
return fmt.Errorf("title is required")

View File

@@ -65,3 +65,41 @@ func TestValidateProjectRequiredRejectsListingLikeURLWithoutID(t *testing.T) {
t.Fatalf("unexpected error: %v", err)
}
}
func TestApplyParsedOwnListingFillsProjectMetadata(t *testing.T) {
payload := ProjectPayload{
Title: "Full Park View",
DealType: "sale",
OurURL: strPtr(
"https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-dubai-creek-harbour-the-lagoons-harbour-gate-harbour-gate-tower-2-86176216.html",
),
}
parsed := &ParsedOwnListing{
OurPrice: float64Ptr(3500000),
DLDPermit: strPtr("7140504127"),
Building: strPtr("Harbour Gate Tower 2"),
Bedrooms: int64Ptr(2),
SizeSqft: float64Ptr(1081),
}
payload = applyParsedOwnListing(payload, parsed)
if err := validateProjectRequired(payload); err != nil {
t.Fatalf("validateProjectRequired() after parsed metadata returned error: %v", err)
}
if payload.OurPrice == nil || *payload.OurPrice != 3500000 {
t.Fatalf("our_price was not applied: %#v", payload.OurPrice)
}
if payload.DLDPermit == nil || *payload.DLDPermit != "7140504127" {
t.Fatalf("dld_permit was not applied: %#v", payload.DLDPermit)
}
if payload.Building == nil || *payload.Building != "Harbour Gate Tower 2" {
t.Fatalf("building was not applied: %#v", payload.Building)
}
if payload.Bedrooms == nil || *payload.Bedrooms != 2 {
t.Fatalf("bedrooms was not applied: %#v", payload.Bedrooms)
}
if payload.SizeSqft == nil || *payload.SizeSqft != 1081 {
t.Fatalf("size_sqft was not applied: %#v", payload.SizeSqft)
}
}

View File

@@ -51,6 +51,16 @@ type Suggestion struct {
IsActive bool `json:"is_active"`
}
type ParsedOwnListing struct {
Title *string `json:"title"`
OurPrice *float64 `json:"our_price"`
DLDPermit *string `json:"dld_permit"`
Building *string `json:"building"`
Bedrooms *int64 `json:"bedrooms"`
SizeSqft *float64 `json:"size_sqft"`
Currency *string `json:"currency"`
}
type SuggestionsResponse struct {
OurPermit *string `json:"our_permit"`
BayutEnabled bool `json:"bayut_enabled"`
@@ -108,6 +118,14 @@ func (w *Worker) Suggest(ctx context.Context, projectID int64) (*SuggestionsResp
return &out, nil
}
func (w *Worker) ParseOwnListing(ctx context.Context, url string) (*ParsedOwnListing, error) {
var out ParsedOwnListing
if err := w.call(ctx, "parse-own-listing", map[string]any{"url": url}, &out); err != nil {
return nil, err
}
return &out, nil
}
func (w *Worker) Health(ctx context.Context) error {
var out HealthResult
if err := w.call(ctx, "health", map[string]any{}, &out); err != nil {