Compare commits

...

8 Commits

Author SHA1 Message Date
Grendgi
6750722429 feat: parse project metadata from PF links
All checks were successful
CI / hygiene (push) Successful in 2s
Build and Deploy / build-and-deploy (push) Successful in 38s
CI / go (push) Successful in 26s
CI / python (push) Successful in 16s
2026-06-24 14:03:25 +03:00
Grendgi
31c498af39 fix: use python 3.9 compatible sqlalchemy annotations
All checks were successful
CI / hygiene (push) Successful in 2s
Build and Deploy / build-and-deploy (push) Successful in 13s
CI / go (push) Successful in 22s
CI / python (push) Successful in 14s
2026-06-18 11:18:16 +03:00
Grendgi
2648a74b8c fix: support python 3.9 model annotations
Some checks failed
CI / hygiene (push) Successful in 2s
Build and Deploy / build-and-deploy (push) Successful in 14s
CI / go (push) Successful in 20s
CI / python (push) Failing after 14s
2026-06-18 11:09:42 +03:00
Grendgi
47ac87bbc3 fix: bootstrap pip in ci
Some checks failed
CI / hygiene (push) Successful in 1s
Build and Deploy / build-and-deploy (push) Successful in 12s
CI / go (push) Successful in 27s
CI / python (push) Failing after 14s
2026-06-18 10:29:46 +03:00
Grendgi
cb8e290d8f test: cover propertyfinder matching rules
Some checks failed
CI / hygiene (push) Successful in 2s
Build and Deploy / build-and-deploy (push) Successful in 35s
CI / go (push) Successful in 46s
CI / python (push) Failing after 2s
2026-06-17 17:12:49 +03:00
Grendgi
f73c9fba5f feat: expose monitoring pf health detail 2026-06-17 15:59:53 +03:00
Grendgi
ccd56165c7 chore: use common internal auth 2026-06-17 14:26:04 +03:00
Grendgi
ea2063ff40 chore: use common header parsing 2026-06-17 14:19:13 +03:00
13 changed files with 673 additions and 49 deletions

View File

@@ -32,4 +32,10 @@ jobs:
needs: hygiene
steps:
- uses: actions/checkout@v4
- run: |
if ! python3 -m pip --version; then
python3 -m ensurepip --upgrade || (apt-get update && apt-get install -y python3-pip)
fi
- run: python3 -m pip install -r requirements.txt
- run: python3 -m compileall app
- run: python3 -m unittest discover -s tests

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
from datetime import datetime
from enum import Enum
from typing import Optional
from sqlalchemy import Boolean, DateTime, Enum as SAEnum, Float, ForeignKey, Integer, String, Text, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
@@ -27,9 +30,9 @@ class Employee(Base):
id: Mapped[int] = mapped_column(Integer, primary_key=True)
name: Mapped[str] = mapped_column(String(200))
portal_user_id: Mapped[str | None] = mapped_column(String(100), unique=True, index=True, nullable=True)
tg_chat_id: Mapped[str | None] = mapped_column(String(64), unique=True, nullable=True)
tg_username: Mapped[str | None] = mapped_column(String(200), nullable=True)
portal_user_id: Mapped[Optional[str]] = mapped_column(String(100), unique=True, index=True, nullable=True)
tg_chat_id: Mapped[Optional[str]] = mapped_column(String(64), unique=True, nullable=True)
tg_username: Mapped[Optional[str]] = mapped_column(String(200), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
projects: Mapped[list["Project"]] = relationship(back_populates="owner")
@@ -43,21 +46,21 @@ class Project(Base):
id: Mapped[int] = mapped_column(Integer, primary_key=True)
title: Mapped[str] = mapped_column(String(300))
deal_type: Mapped[DealType] = mapped_column(SAEnum(DealType))
our_price: Mapped[float | None] = mapped_column(Float, nullable=True)
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
our_price: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# Опциональные параметры — используются для подсказок похожих объявлений
dld_permit: Mapped[str | None] = mapped_column(String(100), index=True, nullable=True)
building: Mapped[str | None] = mapped_column(String(300), nullable=True)
bedrooms: Mapped[int | None] = mapped_column(Integer, nullable=True)
size_sqft: Mapped[float | None] = mapped_column(Float, nullable=True)
our_url: Mapped[str | None] = mapped_column(Text, nullable=True)
dld_permit: Mapped[Optional[str]] = mapped_column(String(100), index=True, nullable=True)
building: Mapped[Optional[str]] = mapped_column(String(300), nullable=True)
bedrooms: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
size_sqft: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
our_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
owner_id: Mapped[int] = mapped_column(ForeignKey("employees.id"))
owner: Mapped[Employee] = relationship(back_populates="projects")
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
last_checked_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
last_checked_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
listings: Mapped[list["CompetitorListing"]] = relationship(
back_populates="project", cascade="all, delete-orphan"
@@ -77,15 +80,15 @@ class CompetitorListing(Base):
source: Mapped[Source] = mapped_column(SAEnum(Source))
external_id: Mapped[str] = mapped_column(String(100)) # ID на стороне PF/Bayut
url: Mapped[str] = mapped_column(Text)
title: Mapped[str | None] = mapped_column(String(500), nullable=True)
agent_name: Mapped[str | None] = mapped_column(String(300), nullable=True)
agency_name: Mapped[str | None] = mapped_column(String(300), nullable=True)
permit_number: Mapped[str | None] = mapped_column(String(100), nullable=True)
title: Mapped[Optional[str]] = mapped_column(String(500), nullable=True)
agent_name: Mapped[Optional[str]] = mapped_column(String(300), nullable=True)
agency_name: Mapped[Optional[str]] = mapped_column(String(300), nullable=True)
permit_number: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
auto_discovered: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
permit_missing_checks: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
current_price: Mapped[float | None] = mapped_column(Float, nullable=True)
currency: Mapped[str | None] = mapped_column(String(10), nullable=True, default="AED")
current_price: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
currency: Mapped[Optional[str]] = mapped_column(String(10), nullable=True, default="AED")
status: Mapped[ListingStatus] = mapped_column(SAEnum(ListingStatus), default=ListingStatus.ACTIVE)
first_seen_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
@@ -103,5 +106,5 @@ class PriceHistory(Base):
listing_id: Mapped[int] = mapped_column(ForeignKey("competitor_listings.id"))
listing: Mapped[CompetitorListing] = relationship(back_populates="price_history")
price: Mapped[float | None] = mapped_column(Float, nullable=True)
price: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
recorded_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)

View File

@@ -35,6 +35,9 @@ class ScrapedListing:
permit_number: str | None
agent_name: str | None
agency_name: str | None
building: str | None = None
bedrooms: int | None = None
size_sqft: float | None = None
is_active: bool = True

View File

@@ -100,6 +100,110 @@ def _extract_permit(item: dict) -> str | None:
return None
def _parse_int(value) -> int | None:
if value is None:
return None
if isinstance(value, bool):
return None
if isinstance(value, (int, float)):
return int(value)
text = str(value).strip().lower()
if text in {"studio", "студия"}:
return 0
m = re.search(r"\d+", text)
return int(m.group(0)) if m else None
def _extract_bedrooms(item: dict) -> int | None:
for key in ("bedrooms", "bedroom", "beds", "rooms", "bedroom_count", "bedrooms_count"):
value = item.get(key)
if isinstance(value, dict):
value = value.get("value") or value.get("count") or value.get("name")
parsed = _parse_int(value)
if parsed is not None:
return parsed
for node in _walk(item):
if not isinstance(node, dict):
continue
name = str(node.get("name") or node.get("label") or node.get("key") or "").lower()
if "bed" not in name and "спал" not in name:
continue
parsed = _parse_int(node.get("value") or node.get("count") or node.get("text"))
if parsed is not None:
return parsed
return None
def _area_to_sqft(value, unit: str | None = None) -> float | None:
parsed = parse_price(value)
if parsed is None:
return None
unit_text = (unit or "").lower()
if any(token in unit_text for token in ("sqm", "sq m", "m2", "", "метр")):
return round(parsed * 10.7639, 2)
return parsed
def _extract_size_sqft(item: dict) -> float | None:
for key in ("size", "area", "property_size", "built_up_area", "builtup_area", "plot_area"):
value = item.get(key)
unit = None
if isinstance(value, dict):
unit = value.get("unit") or value.get("unit_label") or value.get("unitLabel")
value = value.get("value") or value.get("amount") or value.get("text")
parsed = _area_to_sqft(value, unit)
if parsed is not None:
return parsed
for node in _walk(item):
if not isinstance(node, dict):
continue
name = str(node.get("name") or node.get("label") or node.get("key") or "").lower()
if not any(token in name for token in ("size", "area", "sqft", "sq ft", "площад")):
continue
parsed = _area_to_sqft(
node.get("value") or node.get("amount") or node.get("text"),
str(node.get("unit") or node.get("unit_label") or ""),
)
if parsed is not None:
return parsed
return None
def _location_candidate(node: dict) -> tuple[int, str] | None:
rank = _LOC_TYPE_PRIORITY.get(str(node.get("type", "")).upper(), -1)
name = str(node.get("name") or "").strip()
if rank < 0 or not name:
return None
return rank, name
def _extract_building_from(node) -> str | None:
best_name: str | None = None
best_rank = -1
for item in _walk(node):
if not isinstance(item, dict):
continue
candidate = _location_candidate(item)
if not candidate:
continue
rank, name = candidate
if rank > best_rank:
best_rank, best_name = rank, name
return best_name
def _extract_building(data: dict, item: dict) -> str | None:
for key in ("location", "location_tree", "locations", "locationTree", "community"):
value = item.get(key)
if value:
building = _extract_building_from(value)
if building:
return building
return _extract_building_from(data)
def _find_permit_on_page(data: dict) -> str | None:
"""The DLD permit number lives in a regulatory block rendered as an image,
but its plain value is still in __NEXT_DATA__: the dict that carries a
@@ -212,6 +316,9 @@ class PropertyFinderScraper:
permit_number=_find_permit_on_page(data) or _extract_permit(best),
agent_name=agent_name,
agency_name=agency_name,
building=_extract_building(data, best),
bedrooms=_extract_bedrooms(best),
size_sqft=_extract_size_sqft(best),
is_active=True,
)
@@ -336,6 +443,9 @@ class PropertyFinderScraper:
permit_number=_extract_permit(node),
agent_name=agent_name,
agency_name=agency_name,
building=_extract_building(data, node),
bedrooms=_extract_bedrooms(node),
size_sqft=_extract_size_sqft(node),
is_active=True,
)
)

View File

@@ -189,6 +189,43 @@ def add_competitor_url(db: Session, project: Project, url: str) -> tuple[Competi
return listing, ""
def parse_our_listing_url(url: str) -> dict:
"""Parse our own PF listing for project metadata.
Used by the Go API before project validation, so users can paste only the
concrete object URL and let the service fill price/permit/building/area.
"""
url = (url or "").strip()
if not url:
raise ValueError("URL пустой")
source = detect_source_from_url(url)
if source is None:
raise ValueError("URL должен быть с propertyfinder.ae или bayut.com")
if source == Source.BAYUT and not BAYUT_ENABLED:
raise ValueError(
"Bayut временно не поддерживается — площадка перешла на защищённый "
"рендеринг. Используйте ссылку PropertyFinder."
)
if not _is_supported_listing_url(source, url):
raise ValueError("Укажите ссылку на конкретное объявление, а не на страницу поиска")
scraped = _scraper_for(source).fetch_listing(url)
if scraped is None:
raise ValueError("Не удалось загрузить страницу — сайт мог заблокировать запрос, попробуйте позже")
if not scraped.is_active:
raise ValueError("Страница объявления вернула 404 — ссылка битая или объявление снято")
return {
"title": scraped.title,
"our_price": scraped.price,
"dld_permit": scraped.permit_number,
"building": scraped.building,
"bedrooms": scraped.bedrooms,
"size_sqft": scraped.size_sqft,
"currency": scraped.currency or "AED",
}
def add_competitor_urls(db: Session, project: Project, urls: list[str]) -> dict:
"""Add several pasted/selected URLs in one go (used by the suggest page's
multi-select). Processes them sequentially — each one re-fetches the page —
@@ -421,6 +458,12 @@ def refresh_our_listing(db: Session, project: Project, *, now: datetime | None =
changed: list[str] = []
if scraped.permit_number and not project.dld_permit:
project.dld_permit = scraped.permit_number
if scraped.building and not project.building:
project.building = scraped.building
if scraped.bedrooms is not None and project.bedrooms is None:
project.bedrooms = scraped.bedrooms
if scraped.size_sqft is not None and project.size_sqft is None:
project.size_sqft = scraped.size_sqft
old_price = project.our_price
new_price = scraped.price

View File

@@ -10,11 +10,15 @@ import json
import sys
from typing import Any
from sqlalchemy import text
from app.db import SessionLocal, init_db
from app.models import Project
from app.services.monitor import (
BAYUT_ENABLED,
add_competitor_url,
notify_project_changes,
parse_our_listing_url,
run_check_all,
run_check_for_project,
sync_permit_competitors,
@@ -131,12 +135,31 @@ def cmd_suggest(payload: dict[str, Any]) -> None:
db.close()
def cmd_parse_own_listing(payload: dict[str, Any]) -> None:
url = str(payload.get("url") or "")
try:
_write(parse_our_listing_url(url))
except ValueError as exc:
_fail(str(exc))
def cmd_health(_: dict[str, Any]) -> None:
db = SessionLocal()
try:
db.execute(text("SELECT 1"))
_write({"status": "ok"})
finally:
db.close()
COMMANDS = {
"health": cmd_health,
"add-listing": cmd_add_listing,
"add-listings": cmd_add_listings,
"check-project": cmd_check_project,
"check-all": cmd_check_all,
"suggest": cmd_suggest,
"parse-own-listing": cmd_parse_own_listing,
}

7
go.mod
View File

@@ -1,8 +1,11 @@
module monitoring-pf
go 1.25.0
go 1.25.7
require modernc.org/sqlite v1.50.1
require (
gitea.estateliga.work/admin/portal-common v0.3.0
modernc.org/sqlite v1.50.1
)
require (
github.com/dustin/go-humanize v1.0.1 // indirect

2
go.sum
View File

@@ -1,3 +1,5 @@
gitea.estateliga.work/admin/portal-common v0.3.0 h1:xpr9UeLXk5pCcNXcTVGZzJZr0Ni7An7DV0OkuYv9qVM=
gitea.estateliga.work/admin/portal-common v0.3.0/go.mod h1:C860q6g38KVMsv+mKv6k1Vm7smVRCycl+N6r63TElnk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=

View File

@@ -1,12 +1,16 @@
package pf
import (
"context"
"database/sql"
"encoding/json"
"errors"
"net/http"
"strconv"
"strings"
"time"
commonmw "gitea.estateliga.work/admin/portal-common/middleware"
)
type Server struct {
@@ -21,17 +25,32 @@ type bulkPayload struct {
URLs []string `json:"urls"`
}
type componentProbe struct {
Name string `json:"name"`
Status string `json:"status"`
LatencyMs int64 `json:"latency_ms"`
Error string `json:"error,omitempty"`
}
func (s Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
path := s.apiPath(r.URL.Path)
switch {
case path == "/healthz":
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
case path == "/health/detail":
s.healthDetail(w, r)
case path == "/":
writeJSON(w, http.StatusOK, map[string]string{"service": "monitoring-pf", "ui": "portal", "api": "go"})
case !strings.HasPrefix(path, "/api/v1"):
writeError(w, http.StatusNotFound, "not found")
case !s.checkInternalAuth(w, r):
return
default:
commonmw.InternalAuth(s.App.Cfg.InternalAPIKey)(http.HandlerFunc(s.serveAPI)).ServeHTTP(w, r)
}
}
func (s Server) serveAPI(w http.ResponseWriter, r *http.Request) {
path := s.apiPath(r.URL.Path)
switch {
case path == "/api/v1/access/me" && r.Method == http.MethodGet:
s.accessMe(w, r)
case path == "/api/v1/summary" && r.Method == http.MethodGet:
@@ -53,18 +72,6 @@ func (s Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}
}
func (s Server) checkInternalAuth(w http.ResponseWriter, r *http.Request) bool {
want := strings.TrimSpace(s.App.Cfg.InternalAPIKey)
if want == "" {
return true
}
if r.Header.Get("X-Internal-Key") != want {
writeError(w, http.StatusUnauthorized, "unauthorized")
return false
}
return true
}
func (s Server) apiPath(path string) string {
base := s.App.Cfg.PublicBasePath
if base != "" && path == base {
@@ -76,6 +83,104 @@ func (s Server) apiPath(path string) string {
return path
}
func (s Server) healthDetail(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
defer cancel()
components := []componentProbe{
s.probeDatabase(ctx),
s.probeWorker(ctx),
s.probeTelegram(ctx),
s.probeScheduler(ctx),
s.probeProjectIntegrity(ctx),
}
writeJSON(w, http.StatusOK, map[string]any{"components": components})
}
func (s Server) probeDatabase(ctx context.Context) componentProbe {
start := time.Now()
if err := s.App.DB.PingContext(ctx); err != nil {
return componentProbe{Name: "sqlite", Status: "down", LatencyMs: time.Since(start).Milliseconds(), Error: err.Error()}
}
return componentProbe{Name: "sqlite", Status: "ok", LatencyMs: time.Since(start).Milliseconds()}
}
func (s Server) probeWorker(ctx context.Context) componentProbe {
start := time.Now()
if err := s.App.Worker.Health(ctx); err != nil {
return componentProbe{Name: "python_worker", Status: "down", LatencyMs: time.Since(start).Milliseconds(), Error: err.Error()}
}
return componentProbe{Name: "python_worker", Status: "ok", LatencyMs: time.Since(start).Milliseconds()}
}
func (s Server) probeTelegram(ctx context.Context) componentProbe {
start := time.Now()
if !s.App.TG.Enabled() {
return componentProbe{Name: "telegram_bot", Status: "down", LatencyMs: time.Since(start).Milliseconds(), Error: "TG_BOT_TOKEN is not configured"}
}
if _, err := s.App.TG.BotUsername(ctx); err != nil {
return componentProbe{Name: "telegram_bot", Status: "down", LatencyMs: time.Since(start).Milliseconds(), Error: err.Error()}
}
return componentProbe{Name: "telegram_bot", Status: "ok", LatencyMs: time.Since(start).Milliseconds()}
}
func (s Server) probeScheduler(ctx context.Context) componentProbe {
start := time.Now()
staleAfter := max(1, s.App.Cfg.ScrapeIntervalHours*2) * 3600
staleModifier := "-" + strconv.Itoa(staleAfter) + " seconds"
var total, neverChecked, stale int64
err := s.App.DB.QueryRowContext(ctx, `
SELECT
COUNT(*) AS total,
COALESCE(SUM(CASE WHEN last_checked_at IS NULL THEN 1 ELSE 0 END), 0) AS never_checked,
COALESCE(SUM(CASE WHEN last_checked_at IS NOT NULL AND datetime(last_checked_at) < datetime('now', ?) THEN 1 ELSE 0 END), 0) AS stale
FROM projects`,
staleModifier,
).Scan(&total, &neverChecked, &stale)
if err != nil {
return componentProbe{Name: "scheduler", Status: "down", LatencyMs: time.Since(start).Milliseconds(), Error: err.Error()}
}
if total > 0 && (neverChecked > 0 || stale > 0) {
return componentProbe{
Name: "scheduler",
Status: "down",
LatencyMs: time.Since(start).Milliseconds(),
Error: "projects=" + strconv.FormatInt(total, 10) +
" never_checked=" + strconv.FormatInt(neverChecked, 10) +
" stale=" + strconv.FormatInt(stale, 10) +
" stale_after_sec=" + strconv.Itoa(staleAfter),
}
}
return componentProbe{Name: "scheduler", Status: "ok", LatencyMs: time.Since(start).Milliseconds()}
}
func (s Server) probeProjectIntegrity(ctx context.Context) componentProbe {
start := time.Now()
var projects, missingOwner, autoWithoutPermit, searchURLs int64
err := s.App.DB.QueryRowContext(ctx, `
SELECT
(SELECT COUNT(*) FROM projects),
(SELECT COUNT(*) FROM projects p LEFT JOIN employees e ON e.id = p.owner_id WHERE e.id IS NULL),
(SELECT COUNT(*) FROM competitor_listings WHERE auto_discovered = 1 AND (permit_number IS NULL OR trim(permit_number) = '')),
(SELECT COUNT(*) FROM competitor_listings WHERE url LIKE '%/search%' OR url LIKE '%?%' AND external_id = '')
`).Scan(&projects, &missingOwner, &autoWithoutPermit, &searchURLs)
if err != nil {
return componentProbe{Name: "project_integrity", Status: "down", LatencyMs: time.Since(start).Milliseconds(), Error: err.Error()}
}
if missingOwner > 0 || autoWithoutPermit > 0 || searchURLs > 0 {
return componentProbe{
Name: "project_integrity",
Status: "down",
LatencyMs: time.Since(start).Milliseconds(),
Error: "projects=" + strconv.FormatInt(projects, 10) +
" missing_owner=" + strconv.FormatInt(missingOwner, 10) +
" auto_without_permit=" + strconv.FormatInt(autoWithoutPermit, 10) +
" suspicious_search_urls=" + strconv.FormatInt(searchURLs, 10),
}
}
return componentProbe{Name: "project_integrity", Status: "ok", LatencyMs: time.Since(start).Milliseconds()}
}
func (s Server) accessMe(w http.ResponseWriter, r *http.Request) {
portalID := portalUserID(r)
emp, err := s.App.CurrentEmployee(r.Context(), portalID, false)
@@ -510,11 +615,11 @@ func portalUserID(r *http.Request) string {
}
func isAdmin(r *http.Request) bool {
return r.Header.Get("X-User-Is-Admin") == "1"
return commonmw.HeaderBool(r, "X-User-Is-Admin")
}
func canViewTeam(r *http.Request) bool {
return isAdmin(r) || r.Header.Get("X-User-Is-Department-Head") == "1"
return isAdmin(r) || commonmw.HeaderBool(r, "X-User-Is-Department-Head")
}
func canManagePortalUser(r *http.Request, targetPortalID string) bool {
@@ -534,19 +639,7 @@ func canManagePortalUser(r *http.Request, targetPortalID string) bool {
}
func subordinatePortalIDs(r *http.Request) []string {
raw := strings.TrimSpace(r.Header.Get("X-User-Subordinates"))
if raw == "" {
return []string{}
}
parts := strings.Split(raw, ",")
out := make([]string, 0, len(parts))
for _, part := range parts {
id := strings.TrimSpace(part)
if id != "" {
out = append(out, id)
}
}
return out
return commonmw.HeaderCSV(r, "X-User-Subordinates")
}
func ownerPortalIDFromQuery(r *http.Request) *string {

View File

@@ -356,6 +356,10 @@ func (a *App) CreateProject(ctx context.Context, ownerID int64, p ProjectPayload
}
p.Title = title
p.DealType = deal
p, err = a.enrichProjectPayloadFromURL(ctx, p)
if err != nil {
return nil, err
}
if err := validateProjectRequired(p); err != nil {
return nil, err
}
@@ -395,6 +399,10 @@ func (a *App) UpdateProject(ctx context.Context, ownerID, projectID int64, p Pro
p = mergeProjectPayload(current, p)
p.Title = title
p.DealType = deal
p, err = a.enrichProjectPayloadFromURL(ctx, p)
if err != nil {
return nil, err
}
if err := validateProjectRequired(p); err != nil {
return nil, err
}
@@ -437,6 +445,51 @@ func mergeProjectPayload(current *Project, p ProjectPayload) ProjectPayload {
return p
}
func (a *App) enrichProjectPayloadFromURL(ctx context.Context, p ProjectPayload) (ProjectPayload, error) {
url := cleanPtr(p.OurURL)
if url == nil || a.Worker == nil {
return p, nil
}
parsed, err := a.Worker.ParseOwnListing(ctx, *url)
if err != nil {
if projectMissingParsedFields(p) {
return p, fmt.Errorf("parse our_url: %w", err)
}
return p, nil
}
return applyParsedOwnListing(p, parsed), nil
}
func projectMissingParsedFields(p ProjectPayload) bool {
return p.OurPrice == nil ||
cleanPtr(p.DLDPermit) == nil ||
cleanPtr(p.Building) == nil ||
p.Bedrooms == nil ||
p.SizeSqft == nil
}
func applyParsedOwnListing(p ProjectPayload, parsed *ParsedOwnListing) ProjectPayload {
if parsed == nil {
return p
}
if parsed.OurPrice != nil && *parsed.OurPrice > 0 {
p.OurPrice = parsed.OurPrice
}
if permit := cleanPtr(parsed.DLDPermit); permit != nil {
p.DLDPermit = permit
}
if building := cleanPtr(parsed.Building); building != nil {
p.Building = building
}
if parsed.Bedrooms != nil {
p.Bedrooms = parsed.Bedrooms
}
if parsed.SizeSqft != nil && *parsed.SizeSqft > 0 {
p.SizeSqft = parsed.SizeSqft
}
return p
}
func validateProjectRequired(p ProjectPayload) error {
if cleanString(p.Title) == "" {
return fmt.Errorf("title is required")

105
internal/pf/store_test.go Normal file
View File

@@ -0,0 +1,105 @@
package pf
import (
"strings"
"testing"
)
func strPtr(v string) *string {
return &v
}
func int64Ptr(v int64) *int64 {
return &v
}
func float64Ptr(v float64) *float64 {
return &v
}
func validProjectPayload() ProjectPayload {
return ProjectPayload{
Title: "Full Park View",
DealType: "sale",
OurPrice: float64Ptr(2500000),
DLDPermit: strPtr("7140504127"),
Building: strPtr("Harbour Gate Tower 2"),
Bedrooms: int64Ptr(2),
SizeSqft: float64Ptr(1081),
OurURL: strPtr(
"https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-dubai-creek-harbour-the-lagoons-harbour-gate-harbour-gate-tower-2-86176216.html",
),
}
}
func TestValidateProjectRequiredAcceptsConcretePropertyFinderListingURL(t *testing.T) {
payload := validProjectPayload()
if err := validateProjectRequired(payload); err != nil {
t.Fatalf("validateProjectRequired() returned unexpected error: %v", err)
}
}
func TestValidateProjectRequiredRejectsSearchPageAsOurURL(t *testing.T) {
payload := validProjectPayload()
payload.OurURL = strPtr("https://www.propertyfinder.ae/en/search?c=1&l=12345")
err := validateProjectRequired(payload)
if err == nil {
t.Fatal("validateProjectRequired() accepted a search page as our_url")
}
if !strings.Contains(err.Error(), "concrete PropertyFinder listing URL") {
t.Fatalf("unexpected error: %v", err)
}
}
func TestValidateProjectRequiredRejectsListingLikeURLWithoutID(t *testing.T) {
payload := validProjectPayload()
payload.OurURL = strPtr("https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-dubai-creek-harbour.html")
err := validateProjectRequired(payload)
if err == nil {
t.Fatal("validateProjectRequired() accepted a listing-like URL without listing id")
}
if !strings.Contains(err.Error(), "concrete PropertyFinder listing URL") {
t.Fatalf("unexpected error: %v", err)
}
}
func TestApplyParsedOwnListingFillsProjectMetadata(t *testing.T) {
payload := ProjectPayload{
Title: "Full Park View",
DealType: "sale",
OurURL: strPtr(
"https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-dubai-creek-harbour-the-lagoons-harbour-gate-harbour-gate-tower-2-86176216.html",
),
}
parsed := &ParsedOwnListing{
OurPrice: float64Ptr(3500000),
DLDPermit: strPtr("7140504127"),
Building: strPtr("Harbour Gate Tower 2"),
Bedrooms: int64Ptr(2),
SizeSqft: float64Ptr(1081),
}
payload = applyParsedOwnListing(payload, parsed)
if err := validateProjectRequired(payload); err != nil {
t.Fatalf("validateProjectRequired() after parsed metadata returned error: %v", err)
}
if payload.OurPrice == nil || *payload.OurPrice != 3500000 {
t.Fatalf("our_price was not applied: %#v", payload.OurPrice)
}
if payload.DLDPermit == nil || *payload.DLDPermit != "7140504127" {
t.Fatalf("dld_permit was not applied: %#v", payload.DLDPermit)
}
if payload.Building == nil || *payload.Building != "Harbour Gate Tower 2" {
t.Fatalf("building was not applied: %#v", payload.Building)
}
if payload.Bedrooms == nil || *payload.Bedrooms != 2 {
t.Fatalf("bedrooms was not applied: %#v", payload.Bedrooms)
}
if payload.SizeSqft == nil || *payload.SizeSqft != 1081 {
t.Fatalf("size_sqft was not applied: %#v", payload.SizeSqft)
}
}

View File

@@ -34,6 +34,10 @@ type CheckResult struct {
Changes int `json:"changes"`
}
type HealthResult struct {
Status string `json:"status"`
}
type Suggestion struct {
Source string `json:"source"`
ExternalID string `json:"external_id"`
@@ -47,6 +51,16 @@ type Suggestion struct {
IsActive bool `json:"is_active"`
}
type ParsedOwnListing struct {
Title *string `json:"title"`
OurPrice *float64 `json:"our_price"`
DLDPermit *string `json:"dld_permit"`
Building *string `json:"building"`
Bedrooms *int64 `json:"bedrooms"`
SizeSqft *float64 `json:"size_sqft"`
Currency *string `json:"currency"`
}
type SuggestionsResponse struct {
OurPermit *string `json:"our_permit"`
BayutEnabled bool `json:"bayut_enabled"`
@@ -104,6 +118,25 @@ func (w *Worker) Suggest(ctx context.Context, projectID int64) (*SuggestionsResp
return &out, nil
}
func (w *Worker) ParseOwnListing(ctx context.Context, url string) (*ParsedOwnListing, error) {
var out ParsedOwnListing
if err := w.call(ctx, "parse-own-listing", map[string]any{"url": url}, &out); err != nil {
return nil, err
}
return &out, nil
}
func (w *Worker) Health(ctx context.Context) error {
var out HealthResult
if err := w.call(ctx, "health", map[string]any{}, &out); err != nil {
return err
}
if out.Status != "ok" {
return fmt.Errorf("worker status=%s", out.Status)
}
return nil
}
func (w *Worker) call(ctx context.Context, command string, payload any, out any) error {
ctx, cancel := context.WithTimeout(ctx, 15*time.Minute)
defer cancel()

View File

@@ -0,0 +1,147 @@
from __future__ import annotations
import unittest
from unittest.mock import patch
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from app.db import Base
from app.models import CompetitorListing, DealType, Employee, ListingStatus, Project, Source
from app.scrapers.base import ScrapedListing
from app.scrapers.propertyfinder import PropertyFinderScraper
from app.services import monitor
PF_OWN_URL = (
"https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-dubai-creek-harbour-"
"the-lagoons-harbour-gate-harbour-gate-tower-2-86176216.html"
)
PF_COMPETITOR_URL = (
"https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-dubai-creek-harbour-"
"the-lagoons-harbour-gate-harbour-gate-tower-2-86170000.html"
)
def _listing(external_id: str, permit: str | None, url: str = PF_COMPETITOR_URL) -> ScrapedListing:
return ScrapedListing(
source="propertyfinder",
external_id=external_id,
url=url,
title=f"Listing {external_id}",
price=2_500_000,
currency="AED",
permit_number=permit,
agent_name="Agent",
agency_name="Agency",
is_active=True,
)
class MonitoringRulesTest(unittest.TestCase):
def setUp(self) -> None:
engine = create_engine("sqlite:///:memory:", future=True)
Base.metadata.create_all(engine)
self.Session = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True)
self.db = self.Session()
owner = Employee(name="Agent", portal_user_id="agent-1", tg_chat_id="100")
self.db.add(owner)
self.db.flush()
self.project = Project(
title="Full Park View",
deal_type=DealType.SALE,
our_price=2_500_000,
dld_permit="7140504127",
building="Harbour Gate Tower 2",
bedrooms=2,
size_sqft=1081,
our_url=PF_OWN_URL,
owner_id=owner.id,
)
self.db.add(self.project)
self.db.commit()
def tearDown(self) -> None:
self.db.close()
def test_propertyfinder_rejects_search_pages(self) -> None:
scraper = PropertyFinderScraper()
self.assertFalse(scraper.is_listing_url("https://www.propertyfinder.ae/en/search?c=1&l=12345"))
self.assertIsNone(scraper.fetch_listing("https://www.propertyfinder.ae/en/search?c=1&l=12345"))
@patch.object(monitor.PF, "get_permit", side_effect=["7140504127"])
@patch.object(
monitor.PF,
"search_similar",
return_value=[
_listing("86176216", None, url=PF_OWN_URL),
_listing("86170000", None, url=PF_COMPETITOR_URL),
],
)
def test_suggest_similar_excludes_own_listing(self, _search, _permit) -> None:
suggestions = monitor.suggest_similar(self.project, our_permit="7140504127")
self.assertEqual(["86170000"], [item.external_id for item in suggestions["propertyfinder"]])
@patch.object(
monitor,
"suggest_similar",
return_value={
"propertyfinder": [
_listing("86170000", "7140504127"),
_listing("86170001", "DIFFERENT"),
],
"bayut": [],
},
)
def test_sync_permit_competitors_adds_only_exact_permit_matches(self, _suggest) -> None:
changes, suggestions, permit = monitor.sync_permit_competitors(self.db, self.project)
listings = self.db.query(CompetitorListing).order_by(CompetitorListing.external_id).all()
self.assertEqual("7140504127", permit)
self.assertEqual(1, len(listings))
self.assertEqual("86170000", listings[0].external_id)
self.assertTrue(listings[0].auto_discovered)
self.assertEqual(["86170001"], [item.external_id for item in suggestions["propertyfinder"]])
self.assertEqual(1, len(changes))
@patch.object(monitor, "suggest_similar", return_value={"propertyfinder": [], "bayut": []})
def test_auto_permit_listing_is_removed_only_after_three_misses(self, _suggest) -> None:
listing = CompetitorListing(
project_id=self.project.id,
source=Source.PROPERTYFINDER,
external_id="86170000",
url=PF_COMPETITOR_URL,
title="Competitor",
permit_number="7140504127",
auto_discovered=True,
permit_missing_checks=0,
current_price=2_500_000,
currency="AED",
status=ListingStatus.ACTIVE,
)
self.db.add(listing)
self.db.commit()
changes, _, _ = monitor.sync_permit_competitors(self.db, self.project)
self.db.flush()
self.assertEqual([], changes)
self.assertEqual(1, self.db.query(CompetitorListing).count())
self.assertEqual(1, listing.permit_missing_checks)
changes, _, _ = monitor.sync_permit_competitors(self.db, self.project)
self.db.flush()
self.assertEqual([], changes)
self.assertEqual(1, self.db.query(CompetitorListing).count())
self.assertEqual(2, listing.permit_missing_checks)
changes, _, _ = monitor.sync_permit_competitors(self.db, self.project)
self.db.flush()
self.assertEqual(1, len(changes))
self.assertEqual(0, self.db.query(CompetitorListing).count())
if __name__ == "__main__":
unittest.main()