Add monitoring TG service
This commit is contained in:
21
.dockerignore
Normal file
21
.dockerignore
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
.gitea/
|
||||||
|
.env
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
__pycache__/
|
||||||
|
**/__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
*.egg-info/
|
||||||
|
.pytest_cache/
|
||||||
|
.mypy_cache/
|
||||||
|
.ruff_cache/
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
.claude/
|
||||||
|
.DS_Store
|
||||||
|
data/
|
||||||
|
*.session
|
||||||
|
*.session-journal
|
||||||
54
.env.example
Normal file
54
.env.example
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
# Telegram MTProto credentials — get from https://my.telegram.org
|
||||||
|
TG_API_ID=
|
||||||
|
TG_API_HASH=
|
||||||
|
TG_PHONE=
|
||||||
|
|
||||||
|
# --- ONE OF THE TWO BELOW IS REQUIRED ---
|
||||||
|
# Preferred (no volumes, k8s-friendly): get the string by running
|
||||||
|
# docker compose run --rm -it app python -m parser_bot.auth
|
||||||
|
# It prints `TG_SESSION_STRING=...` — paste that line here.
|
||||||
|
TG_SESSION_STRING=
|
||||||
|
|
||||||
|
# Fallback (file-based): only used if TG_SESSION_STRING is empty.
|
||||||
|
# Requires mounting ./data/session as a volume.
|
||||||
|
TG_SESSION_PATH=/data/session/parser.session
|
||||||
|
|
||||||
|
# Postgres
|
||||||
|
POSTGRES_USER=parser
|
||||||
|
POSTGRES_PASSWORD=parser
|
||||||
|
POSTGRES_DB=parser
|
||||||
|
POSTGRES_HOST=db
|
||||||
|
POSTGRES_PORT=5432
|
||||||
|
|
||||||
|
# Polling
|
||||||
|
POLL_INTERVAL_SECONDS=60
|
||||||
|
POLL_HISTORY_LIMIT=50
|
||||||
|
|
||||||
|
# API
|
||||||
|
API_HOST=0.0.0.0
|
||||||
|
API_PORT=8000
|
||||||
|
|
||||||
|
# Media (downloaded photos / small videos / docs from parsed messages)
|
||||||
|
MEDIA_DIR=/data/media
|
||||||
|
MEDIA_MAX_BYTES=20971520
|
||||||
|
|
||||||
|
# Local LLM (Ollama) — runs Qwen 2.5 7B Q4 on CPU. Set LLM_ENABLED=false to disable.
|
||||||
|
LLM_ENABLED=true
|
||||||
|
LLM_BASE_URL=http://ollama:11434
|
||||||
|
LLM_MODEL=qwen2.5:7b-instruct-q4_K_M
|
||||||
|
LLM_TIMEOUT_SECONDS=120
|
||||||
|
LLM_MIN_TEXT_LENGTH=20
|
||||||
|
# How often the background classifier wakes up and how many messages it
|
||||||
|
# processes per tick. With 5/20s ≈ 900 messages/hour at ~3-6s per call.
|
||||||
|
LLM_CLASSIFY_INTERVAL_SECONDS=20
|
||||||
|
LLM_CLASSIFY_BATCH_SIZE=5
|
||||||
|
|
||||||
|
# Admin allowlist for /auth.html, /docs, /openapi.json, /redoc and the
|
||||||
|
# /api/v1/auth/* endpoints. Comma-separated list of client IPs.
|
||||||
|
# Empty = no restriction (everyone is admin) — convenient for local dev.
|
||||||
|
# Example: ADMIN_ALLOWED_IPS=89.110.109.221,127.0.0.1
|
||||||
|
ADMIN_ALLOWED_IPS=
|
||||||
|
# Honor X-Forwarded-For / X-Real-IP from a reverse proxy (Docker port-
|
||||||
|
# forward, nginx, traefik) when resolving the client IP for the allowlist.
|
||||||
|
TRUST_PROXY_HEADERS=true
|
||||||
|
|
||||||
58
.gitea/workflows/deploy.yaml
Normal file
58
.gitea/workflows/deploy.yaml
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
name: Build and Deploy
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
env:
|
||||||
|
INTERNAL_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
|
||||||
|
NODE_REGISTRY: localhost:30300
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Install Docker CLI
|
||||||
|
run: |
|
||||||
|
curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-27.5.1.tgz \
|
||||||
|
| tar xz --strip-components=1 -C /usr/local/bin docker/docker
|
||||||
|
docker version
|
||||||
|
|
||||||
|
- name: Install kubectl
|
||||||
|
run: |
|
||||||
|
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
||||||
|
chmod +x kubectl
|
||||||
|
mv kubectl /usr/local/bin/
|
||||||
|
kubectl version --client
|
||||||
|
|
||||||
|
- name: Login to Gitea Registry
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
|
||||||
|
docker login ${{ env.INTERNAL_REGISTRY }} \
|
||||||
|
-u ${{ secrets.REGISTRY_USERNAME }} --password-stdin
|
||||||
|
|
||||||
|
- name: Build and push server
|
||||||
|
run: |
|
||||||
|
docker build -f Dockerfile \
|
||||||
|
-t ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }} \
|
||||||
|
-t ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:latest \
|
||||||
|
.
|
||||||
|
docker push ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }}
|
||||||
|
docker push ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:latest
|
||||||
|
|
||||||
|
- name: Deploy to Kubernetes
|
||||||
|
env:
|
||||||
|
KUBECONFIG: /kubeconfig/config
|
||||||
|
run: |
|
||||||
|
kubectl apply -f k8s/namespace.yaml
|
||||||
|
kubectl apply -f k8s/secrets.yaml
|
||||||
|
kubectl apply -f k8s/configmap.yaml
|
||||||
|
kubectl apply -f k8s/postgres.yaml
|
||||||
|
kubectl apply -f k8s/server-deployment.yaml
|
||||||
|
kubectl apply -f k8s/server-service.yaml
|
||||||
|
kubectl -n monitoring-tg set image deployment/monitoring-tg-server \
|
||||||
|
monitoring-tg-server=${{ env.NODE_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }}
|
||||||
|
kubectl -n monitoring-tg rollout status deployment/monitoring-tg-server --timeout=180s
|
||||||
14
.gitignore
vendored
Normal file
14
.gitignore
vendored
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*.egg-info/
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
.env
|
||||||
|
*.session
|
||||||
|
*.session-journal
|
||||||
|
.pytest_cache/
|
||||||
|
.mypy_cache/
|
||||||
|
.ruff_cache/
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
data/
|
||||||
28
Dockerfile
Normal file
28
Dockerfile
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PIP_NO_CACHE_DIR=1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
build-essential \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY pyproject.toml ./
|
||||||
|
COPY src ./src
|
||||||
|
COPY alembic.ini ./
|
||||||
|
COPY alembic ./alembic
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip && pip install -e .
|
||||||
|
|
||||||
|
RUN mkdir -p /data/session /data/media
|
||||||
|
|
||||||
|
COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
|
||||||
|
RUN chmod +x /usr/local/bin/entrypoint.sh
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
|
||||||
|
CMD ["python", "-m", "parser_bot.main"]
|
||||||
123
README.md
Normal file
123
README.md
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
# parser-tg-bot
|
||||||
|
|
||||||
|
Парсер публичных Telegram-каналов на Telethon (MTProto). Сохраняет сообщения в Postgres,
|
||||||
|
управляется через REST API. Период опроса настраивается через `.env`. На следующем шаге
|
||||||
|
легко перевести на realtime через `events.NewMessage`.
|
||||||
|
|
||||||
|
## Стек
|
||||||
|
|
||||||
|
- Python 3.11, Telethon, FastAPI, SQLAlchemy 2 (async) + Alembic, APScheduler, Postgres 16
|
||||||
|
|
||||||
|
## Структура
|
||||||
|
|
||||||
|
```text
|
||||||
|
src/parser_bot/
|
||||||
|
├── api/ # FastAPI роуты + Pydantic-схемы
|
||||||
|
├── db/ # SQLAlchemy модели + сессии
|
||||||
|
├── scheduler/ # APScheduler-воркер периодического опроса
|
||||||
|
├── telegram/ # Telethon-клиент (resolve, fetch)
|
||||||
|
├── web/static/ # SPA-странички (HTML/CSS/JS, без бандлера)
|
||||||
|
├── config.py # pydantic-settings
|
||||||
|
└── main.py # FastAPI lifespan + uvicorn
|
||||||
|
alembic/ # миграции
|
||||||
|
```
|
||||||
|
|
||||||
|
## Первый запуск (локально, через Docker)
|
||||||
|
|
||||||
|
1. Получить `api_id` и `api_hash` на [my.telegram.org](https://my.telegram.org) → API development tools.
|
||||||
|
2. Скопировать `.env.example` в `.env` и заполнить `TG_API_ID`, `TG_API_HASH`, `TG_PHONE`.
|
||||||
|
3. Поднять Postgres + накатить миграции:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d db
|
||||||
|
docker compose run --rm app alembic upgrade head
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Запуск:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
docker compose logs app --tail=50
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Авторизация Telegram** — открыть [http://localhost:8000/auth.html](http://localhost:8000/auth.html)
|
||||||
|
и нажать «Отправить код». Telegram пришлёт код на номер из `TG_PHONE` →
|
||||||
|
ввести код (и 2FA-пароль, если включён). Готово, парсер начнёт опрос.
|
||||||
|
|
||||||
|
Сессия сохраняется в `./data/session/parser.session` — рестарты её переиспользуют,
|
||||||
|
повторно входить не нужно.
|
||||||
|
|
||||||
|
### Админ-доступ и коды подразделов
|
||||||
|
|
||||||
|
- `ADMIN_PASSWORD` — дополнительный пароль для админских функций. Если не задан,
|
||||||
|
остаётся прежний режим: доступ определяется только `ADMIN_ALLOWED_IPS`.
|
||||||
|
- [http://localhost:8000/admin.html](http://localhost:8000/admin.html) — вход по
|
||||||
|
админ-паролю. После входа доступны удаление и редактирование подразделов,
|
||||||
|
просмотр их кодов, управление каналами, ручной опрос, промпты, авторизация
|
||||||
|
Telegram и Swagger.
|
||||||
|
- При создании подраздела обязательно задаётся `Код доступа`. Пользователь вводит
|
||||||
|
этот код при первом открытии данных подраздела; после входа он может добавлять
|
||||||
|
каналы в этот подраздел. Админ видит код в списке подразделов.
|
||||||
|
|
||||||
|
### Прод-вариант: без UI и без volume (k8s-friendly)
|
||||||
|
|
||||||
|
Сделай интерактивный логин **один раз** на dev-машине и получи опаковую строку:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose run --rm -it app python -m parser_bot.auth
|
||||||
|
```
|
||||||
|
|
||||||
|
Скрипт напечатает строку вида `TG_SESSION_STRING=1AbcD...`. Положи её в
|
||||||
|
`.env` или k8s Secret — после этого приложение поднимается без UI и без
|
||||||
|
монтирования сессионного файла:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
TG_SESSION_STRING=1AbcDef... # вместо TG_SESSION_PATH/volume
|
||||||
|
```
|
||||||
|
|
||||||
|
> ⚠️ **`ApiIdPublishedFloodError`** — Telegram заблокировал твою пару
|
||||||
|
> `api_id`/`api_hash` (попала в публичный доступ). Создай **новое** приложение
|
||||||
|
> на [my.telegram.org](https://my.telegram.org) и не публикуй креды нигде.
|
||||||
|
> Старый `api_id` восстановить нельзя.
|
||||||
|
|
||||||
|
## UI
|
||||||
|
|
||||||
|
После запуска доступны страницы:
|
||||||
|
|
||||||
|
- [Дашборд](http://localhost:8000/) — общая статистика, топ каналов, кнопка опросить всех
|
||||||
|
- [Каналы](http://localhost:8000/channels.html) — добавить / удалить / включить-выключить / опросить вручную
|
||||||
|
- [Сообщения](http://localhost:8000/messages.html) — фильтр по каналу, поиск по тексту, пагинация, raw JSON
|
||||||
|
- [Настройки](http://localhost:8000/settings.html) — текущая конфигурация и подсказки
|
||||||
|
- [Авторизация](http://localhost:8000/auth.html) — веб-логин в Telegram (код + 2FA)
|
||||||
|
- [Swagger UI](http://localhost:8000/docs) — интерактивный API
|
||||||
|
|
||||||
|
Глубокая ссылка `messages.html?channel_id=42` открывает ленту конкретного канала.
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
- `GET /healthz` — health check
|
||||||
|
- `GET /api/v1/auth/status` — авторизован ли клиент
|
||||||
|
- `POST /api/v1/auth/send-code` — отправить код на `TG_PHONE`
|
||||||
|
- `POST /api/v1/auth/submit-code` `{"code": "12345"}` — подтвердить код
|
||||||
|
- `POST /api/v1/auth/submit-password` `{"password": "..."}` — 2FA-пароль
|
||||||
|
- `POST /api/v1/auth/logout` — завершить сессию
|
||||||
|
- `GET /api/v1/stats` — глобальные счётчики
|
||||||
|
- `GET /api/v1/settings` — read-only вид конфигурации
|
||||||
|
- `GET /api/v1/channels` — список каналов
|
||||||
|
- `POST /api/v1/channels` `{"identifier": "@durov"}` — добавить
|
||||||
|
- `GET /api/v1/channels/{id}` — карточка
|
||||||
|
- `PATCH /api/v1/channels/{id}` `{"is_active": false}` — включить/выключить
|
||||||
|
- `DELETE /api/v1/channels/{id}` — удалить
|
||||||
|
- `GET /api/v1/channels/{id}/stats` — счётчики по каналу
|
||||||
|
- `POST /api/v1/channels/{id}/poll` — форсировать опрос одного канала
|
||||||
|
- `POST /api/v1/poll` — форсировать опрос всех активных каналов
|
||||||
|
- `GET /api/v1/messages?channel_id=...&q=...&limit=50&offset=0` — лента
|
||||||
|
- `GET /api/v1/messages/{id}` — одно сообщение (с `raw` JSONB)
|
||||||
|
|
||||||
|
## Дальше
|
||||||
|
|
||||||
|
- **Realtime**: заменить APScheduler на `client.add_event_handler(handler, events.NewMessage)`,
|
||||||
|
оставив periodic poll как фоновый «доводчик» для пропущенных сообщений.
|
||||||
|
- **Go-микросервис**: контракт = таблицы `channels` / `messages` в Postgres.
|
||||||
|
Go-сервис может либо читать ту же БД, либо ходить в `/api/v1/messages`.
|
||||||
|
- **k8s**: добавить Helm-чарт; `data/session/` маппится на PVC, `.env` — в Secret.
|
||||||
39
alembic.ini
Normal file
39
alembic.ini
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
[alembic]
|
||||||
|
script_location = alembic
|
||||||
|
prepend_sys_path = src
|
||||||
|
version_path_separator = os
|
||||||
|
sqlalchemy.url = postgresql+asyncpg://parser:parser@db:5432/parser
|
||||||
|
|
||||||
|
[loggers]
|
||||||
|
keys = root,sqlalchemy,alembic
|
||||||
|
|
||||||
|
[handlers]
|
||||||
|
keys = console
|
||||||
|
|
||||||
|
[formatters]
|
||||||
|
keys = generic
|
||||||
|
|
||||||
|
[logger_root]
|
||||||
|
level = WARN
|
||||||
|
handlers = console
|
||||||
|
qualname =
|
||||||
|
|
||||||
|
[logger_sqlalchemy]
|
||||||
|
level = WARN
|
||||||
|
handlers =
|
||||||
|
qualname = sqlalchemy.engine
|
||||||
|
|
||||||
|
[logger_alembic]
|
||||||
|
level = INFO
|
||||||
|
handlers =
|
||||||
|
qualname = alembic
|
||||||
|
|
||||||
|
[handler_console]
|
||||||
|
class = StreamHandler
|
||||||
|
args = (sys.stderr,)
|
||||||
|
level = NOTSET
|
||||||
|
formatter = generic
|
||||||
|
|
||||||
|
[formatter_generic]
|
||||||
|
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||||
|
datefmt = %H:%M:%S
|
||||||
52
alembic/env.py
Normal file
52
alembic/env.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import asyncio
|
||||||
|
from logging.config import fileConfig
|
||||||
|
|
||||||
|
from alembic import context
|
||||||
|
from sqlalchemy import pool
|
||||||
|
from sqlalchemy.engine import Connection
|
||||||
|
from sqlalchemy.ext.asyncio import async_engine_from_config
|
||||||
|
|
||||||
|
from parser_bot.config import settings
|
||||||
|
from parser_bot.db.models import Base
|
||||||
|
|
||||||
|
config = context.config
|
||||||
|
config.set_main_option("sqlalchemy.url", settings.database_url)
|
||||||
|
|
||||||
|
if config.config_file_name is not None:
|
||||||
|
fileConfig(config.config_file_name)
|
||||||
|
|
||||||
|
target_metadata = Base.metadata
|
||||||
|
|
||||||
|
|
||||||
|
def run_migrations_offline() -> None:
|
||||||
|
context.configure(
|
||||||
|
url=settings.database_url,
|
||||||
|
target_metadata=target_metadata,
|
||||||
|
literal_binds=True,
|
||||||
|
dialect_opts={"paramstyle": "named"},
|
||||||
|
)
|
||||||
|
with context.begin_transaction():
|
||||||
|
context.run_migrations()
|
||||||
|
|
||||||
|
|
||||||
|
def do_run_migrations(connection: Connection) -> None:
|
||||||
|
context.configure(connection=connection, target_metadata=target_metadata)
|
||||||
|
with context.begin_transaction():
|
||||||
|
context.run_migrations()
|
||||||
|
|
||||||
|
|
||||||
|
async def run_migrations_online() -> None:
|
||||||
|
connectable = async_engine_from_config(
|
||||||
|
config.get_section(config.config_ini_section, {}),
|
||||||
|
prefix="sqlalchemy.",
|
||||||
|
poolclass=pool.NullPool,
|
||||||
|
)
|
||||||
|
async with connectable.connect() as connection:
|
||||||
|
await connection.run_sync(do_run_migrations)
|
||||||
|
await connectable.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
if context.is_offline_mode():
|
||||||
|
run_migrations_offline()
|
||||||
|
else:
|
||||||
|
asyncio.run(run_migrations_online())
|
||||||
25
alembic/script.py.mako
Normal file
25
alembic/script.py.mako
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
"""${message}
|
||||||
|
|
||||||
|
Revision ID: ${up_revision}
|
||||||
|
Revises: ${down_revision | comma,n}
|
||||||
|
Create Date: ${create_date}
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
${imports if imports else ""}
|
||||||
|
|
||||||
|
revision: str = ${repr(up_revision)}
|
||||||
|
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||||
|
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
${upgrades if upgrades else "pass"}
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
${downgrades if downgrades else "pass"}
|
||||||
71
alembic/versions/0001_initial.py
Normal file
71
alembic/versions/0001_initial.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
"""initial schema: channels + messages
|
||||||
|
|
||||||
|
Revision ID: 0001
|
||||||
|
Revises:
|
||||||
|
Create Date: 2026-05-05
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
revision: str = "0001"
|
||||||
|
down_revision: Union[str, None] = None
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
"channels",
|
||||||
|
sa.Column("id", sa.Integer(), primary_key=True),
|
||||||
|
sa.Column("tg_id", sa.BigInteger(), nullable=True, unique=True),
|
||||||
|
sa.Column("identifier", sa.String(length=255), nullable=False, unique=True),
|
||||||
|
sa.Column("title", sa.String(length=512), nullable=True),
|
||||||
|
sa.Column("is_active", sa.Boolean(), nullable=False, server_default=sa.text("true")),
|
||||||
|
sa.Column("last_message_id", sa.BigInteger(), nullable=True),
|
||||||
|
sa.Column("last_polled_at", sa.DateTime(timezone=True), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"created_at",
|
||||||
|
sa.DateTime(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.func.now(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
op.create_table(
|
||||||
|
"messages",
|
||||||
|
sa.Column("id", sa.Integer(), primary_key=True),
|
||||||
|
sa.Column(
|
||||||
|
"channel_id",
|
||||||
|
sa.Integer(),
|
||||||
|
sa.ForeignKey("channels.id", ondelete="CASCADE"),
|
||||||
|
nullable=False,
|
||||||
|
),
|
||||||
|
sa.Column("tg_message_id", sa.BigInteger(), nullable=False),
|
||||||
|
sa.Column("date", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.Column("text", sa.Text(), nullable=True),
|
||||||
|
sa.Column("sender_id", sa.BigInteger(), nullable=True),
|
||||||
|
sa.Column("has_media", sa.Boolean(), nullable=False, server_default=sa.text("false")),
|
||||||
|
sa.Column("views", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("forwards", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("raw", postgresql.JSONB(), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"fetched_at",
|
||||||
|
sa.DateTime(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.func.now(),
|
||||||
|
),
|
||||||
|
sa.UniqueConstraint("channel_id", "tg_message_id", name="uq_channel_message"),
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"ix_messages_channel_date", "messages", ["channel_id", "date"], unique=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index("ix_messages_channel_date", table_name="messages")
|
||||||
|
op.drop_table("messages")
|
||||||
|
op.drop_table("channels")
|
||||||
28
alembic/versions/0002_add_media_files.py
Normal file
28
alembic/versions/0002_add_media_files.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
"""add media_files JSONB column to messages
|
||||||
|
|
||||||
|
Revision ID: 0002
|
||||||
|
Revises: 0001
|
||||||
|
Create Date: 2026-05-05
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
revision: str = "0002"
|
||||||
|
down_revision: Union[str, None] = "0001"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"messages",
|
||||||
|
sa.Column("media_files", postgresql.JSONB(), nullable=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("messages", "media_files")
|
||||||
39
alembic/versions/0003_add_grouped_id.py
Normal file
39
alembic/versions/0003_add_grouped_id.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""add grouped_id to messages (Telegram album/media-group key)
|
||||||
|
|
||||||
|
Revision ID: 0003
|
||||||
|
Revises: 0002
|
||||||
|
Create Date: 2026-05-05
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "0003"
|
||||||
|
down_revision: Union[str, None] = "0002"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column("messages", sa.Column("grouped_id", sa.BigInteger(), nullable=True))
|
||||||
|
op.create_index(
|
||||||
|
"ix_messages_grouped_id", "messages", ["channel_id", "grouped_id"]
|
||||||
|
)
|
||||||
|
# Backfill grouped_id from the stored raw JSONB for existing rows so that
|
||||||
|
# albums saved before this migration are grouped retroactively.
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
UPDATE messages
|
||||||
|
SET grouped_id = (raw->>'grouped_id')::bigint
|
||||||
|
WHERE grouped_id IS NULL
|
||||||
|
AND raw IS NOT NULL
|
||||||
|
AND raw->>'grouped_id' IS NOT NULL
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index("ix_messages_grouped_id", table_name="messages")
|
||||||
|
op.drop_column("messages", "grouped_id")
|
||||||
34
alembic/versions/0004_add_extracted.py
Normal file
34
alembic/versions/0004_add_extracted.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
"""add extracted JSONB column to messages
|
||||||
|
|
||||||
|
Revision ID: 0004
|
||||||
|
Revises: 0003
|
||||||
|
Create Date: 2026-05-05
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
revision: str = "0004"
|
||||||
|
down_revision: Union[str, None] = "0003"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"messages",
|
||||||
|
sa.Column("extracted", postgresql.JSONB(), nullable=True),
|
||||||
|
)
|
||||||
|
# GIN index for json queries (e.g. filter by extracted->'real_estate'->>'kind').
|
||||||
|
op.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS ix_messages_extracted_gin "
|
||||||
|
"ON messages USING GIN (extracted)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.execute("DROP INDEX IF EXISTS ix_messages_extracted_gin")
|
||||||
|
op.drop_column("messages", "extracted")
|
||||||
30
alembic/versions/0005_add_sender_info.py
Normal file
30
alembic/versions/0005_add_sender_info.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
"""add sender_username and sender_name to messages
|
||||||
|
|
||||||
|
Revision ID: 0005
|
||||||
|
Revises: 0004
|
||||||
|
Create Date: 2026-05-06
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "0005"
|
||||||
|
down_revision: Union[str, None] = "0004"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"messages", sa.Column("sender_username", sa.String(length=64), nullable=True)
|
||||||
|
)
|
||||||
|
op.add_column(
|
||||||
|
"messages", sa.Column("sender_name", sa.String(length=255), nullable=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("messages", "sender_name")
|
||||||
|
op.drop_column("messages", "sender_username")
|
||||||
35
alembic/versions/0006_add_app_settings.py
Normal file
35
alembic/versions/0006_add_app_settings.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
"""key/value store for runtime-editable settings (LLM prompt, etc.)
|
||||||
|
|
||||||
|
Revision ID: 0006
|
||||||
|
Revises: 0005
|
||||||
|
Create Date: 2026-05-06
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
revision: str = "0006"
|
||||||
|
down_revision: Union[str, None] = "0005"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
"app_settings",
|
||||||
|
sa.Column("key", sa.String(length=64), primary_key=True),
|
||||||
|
sa.Column("value", postgresql.JSONB(), nullable=False),
|
||||||
|
sa.Column(
|
||||||
|
"updated_at",
|
||||||
|
sa.DateTime(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.func.now(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_table("app_settings")
|
||||||
37
alembic/versions/0007_add_channel_vertical.py
Normal file
37
alembic/versions/0007_add_channel_vertical.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
"""split channels into two verticals: real_estate / hr
|
||||||
|
|
||||||
|
Existing rows get `real_estate` per the migration decision — the service was
|
||||||
|
real-estate-only before this column existed.
|
||||||
|
|
||||||
|
Revision ID: 0007
|
||||||
|
Revises: 0006
|
||||||
|
Create Date: 2026-05-19
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "0007"
|
||||||
|
down_revision: Union[str, None] = "0006"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"channels",
|
||||||
|
sa.Column(
|
||||||
|
"vertical",
|
||||||
|
sa.String(length=32),
|
||||||
|
nullable=False,
|
||||||
|
server_default="real_estate",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
op.create_index("ix_channels_vertical", "channels", ["vertical"])
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index("ix_channels_vertical", table_name="channels")
|
||||||
|
op.drop_column("channels", "vertical")
|
||||||
110
alembic/versions/0008_add_sections.py
Normal file
110
alembic/versions/0008_add_sections.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
"""sub-sections inside each vertical (e.g. Real Estate → Dubai / Moscow)
|
||||||
|
|
||||||
|
A channel now belongs to exactly one section, and each section to exactly
|
||||||
|
one vertical. The migration auto-creates a `Общий` section per vertical
|
||||||
|
that has at least one channel and pins all existing channels there, so the
|
||||||
|
service keeps working without manual reclassification after upgrade.
|
||||||
|
|
||||||
|
Revision ID: 0008
|
||||||
|
Revises: 0007
|
||||||
|
Create Date: 2026-05-20
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "0008"
|
||||||
|
down_revision: Union[str, None] = "0007"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
"sections",
|
||||||
|
sa.Column("id", sa.Integer(), primary_key=True),
|
||||||
|
sa.Column("vertical", sa.String(length=32), nullable=False),
|
||||||
|
sa.Column("slug", sa.String(length=64), nullable=False),
|
||||||
|
sa.Column("title", sa.String(length=255), nullable=False),
|
||||||
|
sa.Column("emoji", sa.String(length=8), nullable=True),
|
||||||
|
sa.Column("description", sa.Text(), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"created_at",
|
||||||
|
sa.DateTime(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.func.now(),
|
||||||
|
),
|
||||||
|
sa.UniqueConstraint("vertical", "slug", name="uq_section_vertical_slug"),
|
||||||
|
)
|
||||||
|
op.create_index("ix_sections_vertical", "sections", ["vertical"])
|
||||||
|
|
||||||
|
# Auto-create a `default` section for each vertical that already has channels,
|
||||||
|
# so the backfill below has somewhere to point.
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO sections (vertical, slug, title, emoji)
|
||||||
|
SELECT DISTINCT c.vertical,
|
||||||
|
'default',
|
||||||
|
CASE c.vertical
|
||||||
|
WHEN 'hr' THEN 'Общий HR'
|
||||||
|
ELSE 'Общий'
|
||||||
|
END,
|
||||||
|
CASE c.vertical WHEN 'hr' THEN '👥' ELSE '🏠' END
|
||||||
|
FROM channels c
|
||||||
|
ON CONFLICT (vertical, slug) DO NOTHING
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add nullable section_id first so the backfill can populate it.
|
||||||
|
op.add_column(
|
||||||
|
"channels",
|
||||||
|
sa.Column("section_id", sa.Integer(), nullable=True),
|
||||||
|
)
|
||||||
|
op.create_foreign_key(
|
||||||
|
"fk_channels_section",
|
||||||
|
"channels",
|
||||||
|
"sections",
|
||||||
|
["section_id"],
|
||||||
|
["id"],
|
||||||
|
ondelete="RESTRICT",
|
||||||
|
)
|
||||||
|
op.create_index("ix_channels_section_id", "channels", ["section_id"])
|
||||||
|
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
UPDATE channels c
|
||||||
|
SET section_id = s.id
|
||||||
|
FROM sections s
|
||||||
|
WHERE s.vertical = c.vertical AND s.slug = 'default'
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Now we can safely require section_id.
|
||||||
|
op.alter_column("channels", "section_id", nullable=False)
|
||||||
|
|
||||||
|
# Per-section LLM prompt keys are longer than 64 chars
|
||||||
|
# (`llm_system_prompt:real_estate:some-long-slug`), so widen the key column.
|
||||||
|
op.alter_column(
|
||||||
|
"app_settings",
|
||||||
|
"key",
|
||||||
|
existing_type=sa.String(length=64),
|
||||||
|
type_=sa.String(length=128),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.alter_column(
|
||||||
|
"app_settings",
|
||||||
|
"key",
|
||||||
|
existing_type=sa.String(length=128),
|
||||||
|
type_=sa.String(length=64),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
op.drop_index("ix_channels_section_id", table_name="channels")
|
||||||
|
op.drop_constraint("fk_channels_section", "channels", type_="foreignkey")
|
||||||
|
op.drop_column("channels", "section_id")
|
||||||
|
op.drop_index("ix_sections_vertical", table_name="sections")
|
||||||
|
op.drop_table("sections")
|
||||||
24
alembic/versions/0009_add_section_access_code.py
Normal file
24
alembic/versions/0009_add_section_access_code.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
"""add access code to sections
|
||||||
|
|
||||||
|
Revision ID: 0009
|
||||||
|
Revises: 0008
|
||||||
|
Create Date: 2026-05-29
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "0009"
|
||||||
|
down_revision: Union[str, None] = "0008"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column("sections", sa.Column("access_code", sa.String(length=255), nullable=True))
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("sections", "access_code")
|
||||||
64
docker-compose.yml
Normal file
64
docker-compose.yml
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
services:
|
||||||
|
ollama:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
environment:
|
||||||
|
OLLAMA_HOST: 0.0.0.0:11434
|
||||||
|
OLLAMA_KEEP_ALIVE: 24h
|
||||||
|
OLLAMA_NUM_PARALLEL: "1"
|
||||||
|
OLLAMA_NUM_THREAD: "8"
|
||||||
|
volumes:
|
||||||
|
- ./data/ollama:/root/.ollama
|
||||||
|
ports:
|
||||||
|
- "11434:11434"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "ollama", "list"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 30
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
ollama-pull:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
depends_on:
|
||||||
|
ollama:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
OLLAMA_HOST: ollama:11434
|
||||||
|
entrypoint: ["/bin/sh", "-c"]
|
||||||
|
command: ["ollama list | grep -q qwen2.5:7b-instruct-q4_K_M || ollama pull qwen2.5:7b-instruct-q4_K_M"]
|
||||||
|
restart: "no"
|
||||||
|
|
||||||
|
db:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-parser}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-parser}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-parser}
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
volumes:
|
||||||
|
- pgdata:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-parser}"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
|
||||||
|
app:
|
||||||
|
build: .
|
||||||
|
env_file: .env
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
ollama:
|
||||||
|
condition: service_healthy
|
||||||
|
ports:
|
||||||
|
- "80:8000"
|
||||||
|
volumes:
|
||||||
|
- ./data/session:/data/session
|
||||||
|
- ./data/media:/data/media
|
||||||
|
- ./src:/app/src
|
||||||
|
- ./alembic:/app/alembic
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
pgdata:
|
||||||
16
docker/entrypoint.sh
Normal file
16
docker/entrypoint.sh
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Run migrations on every container start. Idempotent: alembic skips
|
||||||
|
# revisions already applied. Skipped for one-shot commands like `alembic`
|
||||||
|
# itself (would deadlock when explicitly invoked) and for the auth helper.
|
||||||
|
case "$1" in
|
||||||
|
alembic|python\ -m\ parser_bot.auth|/bin/sh|sh|bash)
|
||||||
|
exec "$@"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
echo "[entrypoint] running alembic upgrade head"
|
||||||
|
alembic upgrade head
|
||||||
|
|
||||||
|
exec "$@"
|
||||||
20
k8s/configmap.yaml
Normal file
20
k8s/configmap.yaml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: monitoring-tg-config
|
||||||
|
namespace: monitoring-tg
|
||||||
|
data:
|
||||||
|
API_HOST: "0.0.0.0"
|
||||||
|
API_PORT: "8000"
|
||||||
|
PUBLIC_BASE_PATH: "/api/monitoring-tg"
|
||||||
|
POSTGRES_HOST: "postgres.monitoring-tg.svc.cluster.local"
|
||||||
|
POSTGRES_PORT: "5432"
|
||||||
|
POSTGRES_USER: "parser"
|
||||||
|
POSTGRES_DB: "parser"
|
||||||
|
TG_SESSION_PATH: "/data/session/parser.session"
|
||||||
|
MEDIA_DIR: "/data/media"
|
||||||
|
POLL_INTERVAL_SECONDS: "60"
|
||||||
|
POLL_HISTORY_LIMIT: "50"
|
||||||
|
LLM_ENABLED: "1"
|
||||||
|
LLM_BASE_URL: "http://ollama.ollama.svc.cluster.local:11434"
|
||||||
|
LLM_MODEL: "qwen2.5:7b-instruct-q4_K_M"
|
||||||
12
k8s/kustomization.yaml
Normal file
12
k8s/kustomization.yaml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
namespace: monitoring-tg
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- configmap.yaml
|
||||||
|
- secrets.yaml
|
||||||
|
- postgres.yaml
|
||||||
|
- server-deployment.yaml
|
||||||
|
- server-service.yaml
|
||||||
4
k8s/namespace.yaml
Normal file
4
k8s/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: monitoring-tg
|
||||||
65
k8s/postgres.yaml
Normal file
65
k8s/postgres.yaml
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: postgres
|
||||||
|
namespace: monitoring-tg
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: postgres
|
||||||
|
ports:
|
||||||
|
- port: 5432
|
||||||
|
targetPort: 5432
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: postgres
|
||||||
|
namespace: monitoring-tg
|
||||||
|
spec:
|
||||||
|
serviceName: postgres
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: postgres
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: postgres
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: postgres
|
||||||
|
image: postgres:16-alpine
|
||||||
|
ports:
|
||||||
|
- containerPort: 5432
|
||||||
|
envFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: postgres-secret
|
||||||
|
volumeMounts:
|
||||||
|
- name: pgdata
|
||||||
|
mountPath: /var/lib/postgresql/data
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "parser", "-d", "parser"]
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "parser", "-d", "parser"]
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: pgdata
|
||||||
|
spec:
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
storageClassName: local-path
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
25
k8s/secrets.yaml
Normal file
25
k8s/secrets.yaml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: monitoring-tg-secrets
|
||||||
|
namespace: monitoring-tg
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
TG_API_ID: "0"
|
||||||
|
TG_API_HASH: "CHANGE_ME"
|
||||||
|
TG_PHONE: "CHANGE_ME"
|
||||||
|
TG_SESSION_STRING: ""
|
||||||
|
POSTGRES_PASSWORD: "parser"
|
||||||
|
ADMIN_ALLOWED_IPS: ""
|
||||||
|
ADMIN_PASSWORD: "CHANGE_ME"
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: postgres-secret
|
||||||
|
namespace: monitoring-tg
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
POSTGRES_USER: "parser"
|
||||||
|
POSTGRES_PASSWORD: "parser"
|
||||||
|
POSTGRES_DB: "parser"
|
||||||
70
k8s/server-deployment.yaml
Normal file
70
k8s/server-deployment.yaml
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: monitoring-tg-data
|
||||||
|
namespace: monitoring-tg
|
||||||
|
spec:
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
storageClassName: local-path
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: monitoring-tg-server
|
||||||
|
namespace: monitoring-tg
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: monitoring-tg-server
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: monitoring-tg-server
|
||||||
|
spec:
|
||||||
|
terminationGracePeriodSeconds: 20
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1000
|
||||||
|
containers:
|
||||||
|
- name: monitoring-tg-server
|
||||||
|
image: localhost:30300/admin/monitoring-tg-server:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: monitoring-tg-config
|
||||||
|
- secretRef:
|
||||||
|
name: monitoring-tg-secrets
|
||||||
|
volumeMounts:
|
||||||
|
- name: app-data
|
||||||
|
mountPath: /data
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8000
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 30
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8000
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8000
|
||||||
|
periodSeconds: 5
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 800m
|
||||||
|
memory: 1Gi
|
||||||
|
volumes:
|
||||||
|
- name: app-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: monitoring-tg-data
|
||||||
18
k8s/server-service.yaml
Normal file
18
k8s/server-service.yaml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: monitoring-tg-server
|
||||||
|
namespace: monitoring-tg
|
||||||
|
annotations:
|
||||||
|
portal.estateliga.work/enabled: "true"
|
||||||
|
portal.estateliga.work/name: "Мониторинг TG"
|
||||||
|
portal.estateliga.work/description: "Парсер и анализ Telegram-каналов"
|
||||||
|
portal.estateliga.work/icon: "pulse"
|
||||||
|
portal.estateliga.work/path: "/api/monitoring-tg"
|
||||||
|
portal.estateliga.work/code: "monitoring_tg"
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: monitoring-tg-server
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: 8000
|
||||||
44
pyproject.toml
Normal file
44
pyproject.toml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
[project]
|
||||||
|
name = "parser-tg-bot"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Telegram channel parser — periodic polling + storage, future Go microservice"
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
dependencies = [
|
||||||
|
"telethon>=1.36",
|
||||||
|
"fastapi>=0.115",
|
||||||
|
"uvicorn[standard]>=0.32",
|
||||||
|
"sqlalchemy[asyncio]>=2.0",
|
||||||
|
"asyncpg>=0.30",
|
||||||
|
"alembic>=1.14",
|
||||||
|
"apscheduler>=3.10",
|
||||||
|
"pydantic>=2.9",
|
||||||
|
"pydantic-settings>=2.6",
|
||||||
|
"python-dotenv>=1.0",
|
||||||
|
"structlog>=24.4",
|
||||||
|
"httpx>=0.27",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"ruff>=0.7",
|
||||||
|
"mypy>=1.13",
|
||||||
|
"pytest>=8.3",
|
||||||
|
"pytest-asyncio>=0.24",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=68"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
where = ["src"]
|
||||||
|
|
||||||
|
[tool.setuptools.package-data]
|
||||||
|
"parser_bot.web" = ["static/*", "static/**/*"]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 100
|
||||||
|
target-version = "py311"
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
asyncio_mode = "auto"
|
||||||
0
src/parser_bot/__init__.py
Normal file
0
src/parser_bot/__init__.py
Normal file
116
src/parser_bot/access.py
Normal file
116
src/parser_bot/access.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
"""Admin access helpers for admin-only surfaces (auth, OpenAPI docs).
|
||||||
|
|
||||||
|
Resolution:
|
||||||
|
1. If `ADMIN_ALLOWED_IPS` is empty → no network restriction.
|
||||||
|
2. Otherwise the request's client IP must be in the allowlist.
|
||||||
|
3. When `TRUST_PROXY_HEADERS=true` (default) and one of the proxy headers
|
||||||
|
is present, the first IP in `X-Forwarded-For` (or `X-Real-IP`) is used.
|
||||||
|
Without this, behind a Docker port-forward the source IP is always the
|
||||||
|
gateway, which is useless for ACLs.
|
||||||
|
4. If `ADMIN_PASSWORD` is set, the request must also present a valid signed
|
||||||
|
admin cookie or the password in `X-Admin-Password`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import secrets
|
||||||
|
|
||||||
|
from fastapi import HTTPException, Request, Response
|
||||||
|
|
||||||
|
from parser_bot.config import settings
|
||||||
|
|
||||||
|
ADMIN_COOKIE = "parser_admin"
|
||||||
|
_ADMIN_TOKEN_MESSAGE = b"parser-tg-bot-admin-v1"
|
||||||
|
|
||||||
|
|
||||||
|
def client_ip(request: Request) -> str:
|
||||||
|
"""Best-effort source IP of the request."""
|
||||||
|
if settings.trust_proxy_headers:
|
||||||
|
xff = request.headers.get("x-forwarded-for")
|
||||||
|
if xff:
|
||||||
|
# Standard form: "client, proxy1, proxy2" — first is closest to user.
|
||||||
|
return xff.split(",")[0].strip()
|
||||||
|
real = request.headers.get("x-real-ip")
|
||||||
|
if real:
|
||||||
|
return real.strip()
|
||||||
|
return request.client.host if request.client else "0.0.0.0"
|
||||||
|
|
||||||
|
|
||||||
|
def is_admin_network_allowed(request: Request) -> bool:
|
||||||
|
allowed = settings.admin_ip_set
|
||||||
|
if not allowed:
|
||||||
|
return True
|
||||||
|
return client_ip(request) in allowed
|
||||||
|
|
||||||
|
|
||||||
|
def admin_password_enabled() -> bool:
|
||||||
|
return bool(settings.admin_password)
|
||||||
|
|
||||||
|
|
||||||
|
def verify_admin_password(password: str | None) -> bool:
|
||||||
|
if not settings.admin_password:
|
||||||
|
return True
|
||||||
|
if password is None:
|
||||||
|
return False
|
||||||
|
return secrets.compare_digest(password, settings.admin_password)
|
||||||
|
|
||||||
|
|
||||||
|
def admin_token() -> str:
|
||||||
|
return hmac.new(
|
||||||
|
settings.admin_password.encode("utf-8"),
|
||||||
|
_ADMIN_TOKEN_MESSAGE,
|
||||||
|
hashlib.sha256,
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def verify_admin_token(token: str | None) -> bool:
|
||||||
|
if not settings.admin_password:
|
||||||
|
return True
|
||||||
|
if token is None:
|
||||||
|
return False
|
||||||
|
return secrets.compare_digest(token, admin_token())
|
||||||
|
|
||||||
|
|
||||||
|
def set_admin_cookie(response: Response) -> None:
|
||||||
|
response.set_cookie(
|
||||||
|
ADMIN_COOKIE,
|
||||||
|
admin_token(),
|
||||||
|
httponly=True,
|
||||||
|
samesite="lax",
|
||||||
|
secure=False,
|
||||||
|
max_age=60 * 60 * 24 * 30,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_admin_cookie(response: Response) -> None:
|
||||||
|
response.delete_cookie(ADMIN_COOKIE)
|
||||||
|
|
||||||
|
|
||||||
|
def is_admin_request(request: Request) -> bool:
|
||||||
|
if not is_admin_network_allowed(request):
|
||||||
|
return False
|
||||||
|
if not settings.admin_password:
|
||||||
|
return True
|
||||||
|
return verify_admin_token(request.cookies.get(ADMIN_COOKIE)) or verify_admin_password(
|
||||||
|
request.headers.get("x-admin-password")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def require_admin_network(request: Request) -> None:
|
||||||
|
"""FastAPI dependency for the admin login page/API.
|
||||||
|
|
||||||
|
This keeps the IP allowlist useful even before the password cookie exists.
|
||||||
|
"""
|
||||||
|
if not is_admin_network_allowed(request):
|
||||||
|
raise HTTPException(status_code=404)
|
||||||
|
|
||||||
|
|
||||||
|
def require_admin(request: Request) -> None:
|
||||||
|
"""FastAPI dependency: 404 for non-admins.
|
||||||
|
|
||||||
|
Admin endpoints keep returning 404 instead of 403 to avoid advertising
|
||||||
|
their existence to clients outside the admin boundary.
|
||||||
|
"""
|
||||||
|
if not is_admin_request(request):
|
||||||
|
raise HTTPException(status_code=404)
|
||||||
0
src/parser_bot/api/__init__.py
Normal file
0
src/parser_bot/api/__init__.py
Normal file
1048
src/parser_bot/api/routes.py
Normal file
1048
src/parser_bot/api/routes.py
Normal file
File diff suppressed because it is too large
Load Diff
231
src/parser_bot/api/schemas.py
Normal file
231
src/parser_bot/api/schemas.py
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||||
|
|
||||||
|
Vertical = Literal["real_estate", "hr"]
|
||||||
|
|
||||||
|
# Section slugs are used as URL segments — keep them URL-safe.
|
||||||
|
_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,62}[a-z0-9]$|^[a-z0-9]$")
|
||||||
|
|
||||||
|
|
||||||
|
class SectionCreate(BaseModel):
|
||||||
|
vertical: Vertical
|
||||||
|
slug: str = Field(..., min_length=1, max_length=64)
|
||||||
|
title: str = Field(..., min_length=1, max_length=255)
|
||||||
|
emoji: str | None = Field(None, max_length=8)
|
||||||
|
description: str | None = None
|
||||||
|
access_code: str = Field(..., min_length=3, max_length=255)
|
||||||
|
|
||||||
|
@field_validator("slug")
|
||||||
|
@classmethod
|
||||||
|
def _check_slug(cls, v: str) -> str:
|
||||||
|
if not _SLUG_RE.match(v):
|
||||||
|
raise ValueError(
|
||||||
|
"slug must be lowercase letters/digits with '-' or '_' separators"
|
||||||
|
)
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
|
class SectionUpdate(BaseModel):
|
||||||
|
title: str | None = Field(None, min_length=1, max_length=255)
|
||||||
|
emoji: str | None = Field(None, max_length=8)
|
||||||
|
description: str | None = None
|
||||||
|
access_code: str | None = Field(None, min_length=3, max_length=255)
|
||||||
|
|
||||||
|
|
||||||
|
class SectionOut(BaseModel):
|
||||||
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
id: int
|
||||||
|
vertical: Vertical
|
||||||
|
slug: str
|
||||||
|
title: str
|
||||||
|
emoji: str | None
|
||||||
|
description: str | None
|
||||||
|
access_code: str | None = None
|
||||||
|
created_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
class SectionWithStats(SectionOut):
|
||||||
|
"""Section payload enriched with rollup counts for the section chooser page."""
|
||||||
|
|
||||||
|
channels_total: int = 0
|
||||||
|
channels_active: int = 0
|
||||||
|
messages_total: int = 0
|
||||||
|
leads_total: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class ChannelCreate(BaseModel):
|
||||||
|
identifier: str = Field(..., min_length=1, max_length=255, description="@username or t.me link")
|
||||||
|
vertical: Vertical = "real_estate"
|
||||||
|
section: str = Field(
|
||||||
|
..., min_length=1, max_length=64,
|
||||||
|
description="Slug of the section inside the vertical (e.g. 'dubai')",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChannelUpdate(BaseModel):
|
||||||
|
is_active: bool | None = None
|
||||||
|
vertical: Vertical | None = None
|
||||||
|
section: str | None = Field(
|
||||||
|
None, min_length=1, max_length=64,
|
||||||
|
description="Move the channel to another section in the same vertical",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChannelOut(BaseModel):
|
||||||
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
id: int
|
||||||
|
tg_id: int | None
|
||||||
|
identifier: str
|
||||||
|
title: str | None
|
||||||
|
vertical: Vertical
|
||||||
|
section_id: int
|
||||||
|
section_slug: str | None = None
|
||||||
|
is_active: bool
|
||||||
|
last_message_id: int | None
|
||||||
|
last_polled_at: datetime | None
|
||||||
|
created_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
class ChannelStats(BaseModel):
|
||||||
|
channel_id: int
|
||||||
|
identifier: str
|
||||||
|
title: str | None
|
||||||
|
vertical: Vertical
|
||||||
|
section_slug: str | None = None
|
||||||
|
is_active: bool
|
||||||
|
last_polled_at: datetime | None
|
||||||
|
message_count: int
|
||||||
|
last_message_at: datetime | None
|
||||||
|
|
||||||
|
|
||||||
|
class MediaFile(BaseModel):
|
||||||
|
kind: str # photo | video | document | audio | sticker | unknown
|
||||||
|
url: str | None = None
|
||||||
|
mime: str | None = None
|
||||||
|
size: int | None = None
|
||||||
|
skipped: str | None = None # set when not downloaded (e.g. "too_large")
|
||||||
|
|
||||||
|
|
||||||
|
class RealEstate(BaseModel):
|
||||||
|
kind: str | None = None
|
||||||
|
property_type: str | None = None
|
||||||
|
rooms: str | None = None
|
||||||
|
area_m2: float | None = None
|
||||||
|
price: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class Lead(BaseModel):
|
||||||
|
is_listing: bool
|
||||||
|
kind: str | None = None # sale | rent | purchase
|
||||||
|
property_type: str | None = None
|
||||||
|
rooms: str | None = None
|
||||||
|
area_m2: float | None = None
|
||||||
|
price_text: str | None = None
|
||||||
|
price_value: float | None = None
|
||||||
|
currency: str | None = None # RUB | USD | EUR | AED | GBP | CNY | TRY | KZT | BYN | UAH
|
||||||
|
location: str | None = None
|
||||||
|
contact_phone: str | None = None
|
||||||
|
contact_name: str | None = None
|
||||||
|
summary: str | None = None
|
||||||
|
confidence: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class HrLead(BaseModel):
|
||||||
|
"""LLM verdict for HR-vertical messages (jobs / resumes / bare contacts)."""
|
||||||
|
|
||||||
|
is_lead: bool
|
||||||
|
kind: str | None = None # vacancy | resume | contact
|
||||||
|
title: str | None = None
|
||||||
|
company: str | None = None
|
||||||
|
candidate_name: str | None = None
|
||||||
|
experience_years: float | None = None
|
||||||
|
skills: list[str] = []
|
||||||
|
location: str | None = None
|
||||||
|
remote: bool | None = None
|
||||||
|
employment_type: str | None = None
|
||||||
|
salary_text: str | None = None
|
||||||
|
salary_value: float | None = None
|
||||||
|
currency: str | None = None
|
||||||
|
contact_phone: str | None = None
|
||||||
|
contact_name: str | None = None
|
||||||
|
summary: str | None = None
|
||||||
|
confidence: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class Extracted(BaseModel):
|
||||||
|
phones: list[str] = []
|
||||||
|
names: list[str] = []
|
||||||
|
tg_handles: list[str] = []
|
||||||
|
real_estate: RealEstate | None = None
|
||||||
|
lead: Lead | None = None
|
||||||
|
hr_lead: HrLead | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class MessageOut(BaseModel):
|
||||||
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
id: int
|
||||||
|
channel_id: int
|
||||||
|
channel_vertical: Vertical | None = None
|
||||||
|
channel_section_slug: str | None = None
|
||||||
|
tg_message_id: int
|
||||||
|
grouped_id: int | None = None
|
||||||
|
group_size: int = 1
|
||||||
|
date: datetime
|
||||||
|
text: str | None
|
||||||
|
sender_id: int | None
|
||||||
|
has_media: bool
|
||||||
|
media_files: list[MediaFile] | None = None
|
||||||
|
extracted: Extracted | None = None
|
||||||
|
sender_username: str | None = None
|
||||||
|
sender_name: str | None = None
|
||||||
|
post_url: str | None = None
|
||||||
|
views: int | None
|
||||||
|
forwards: int | None
|
||||||
|
fetched_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalStats(BaseModel):
|
||||||
|
vertical: Vertical
|
||||||
|
section_slug: str | None = None
|
||||||
|
channels_total: int
|
||||||
|
channels_active: int
|
||||||
|
messages_total: int
|
||||||
|
messages_last_24h: int
|
||||||
|
leads_total: int = 0
|
||||||
|
leads_last_24h: int = 0
|
||||||
|
poll_interval_seconds: int
|
||||||
|
last_poll_at: datetime | None
|
||||||
|
|
||||||
|
|
||||||
|
class AuthStatus(BaseModel):
|
||||||
|
authorized: bool
|
||||||
|
username: str | None = None
|
||||||
|
phone: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class AuthCode(BaseModel):
|
||||||
|
code: str = Field(..., min_length=3, max_length=12)
|
||||||
|
|
||||||
|
|
||||||
|
class AuthPassword(BaseModel):
|
||||||
|
password: str = Field(..., min_length=1)
|
||||||
|
|
||||||
|
|
||||||
|
class AuthCodeResult(BaseModel):
|
||||||
|
needs_password: bool
|
||||||
|
|
||||||
|
|
||||||
|
class AdminLogin(BaseModel):
|
||||||
|
password: str = Field(..., min_length=1)
|
||||||
|
|
||||||
|
|
||||||
|
class SectionLogin(BaseModel):
|
||||||
|
vertical: Vertical
|
||||||
|
section: str = Field(..., min_length=1, max_length=64)
|
||||||
|
code: str = Field(..., min_length=1, max_length=255)
|
||||||
51
src/parser_bot/auth.py
Normal file
51
src/parser_bot/auth.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
"""Interactive Telethon login. Run once on a dev machine, copy the printed
|
||||||
|
TG_SESSION_STRING into your .env / k8s Secret, then deploy without ever
|
||||||
|
touching auth again.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
docker compose run --rm -it app python -m parser_bot.auth
|
||||||
|
|
||||||
|
Telegram requires interactive code entry only for the very first login;
|
||||||
|
the resulting StringSession can be reused on any host until you log out
|
||||||
|
or someone invalidates the session in Telegram settings.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from telethon import TelegramClient
|
||||||
|
from telethon.sessions import StringSession
|
||||||
|
|
||||||
|
from parser_bot.config import settings
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> int:
|
||||||
|
if not sys.stdin.isatty():
|
||||||
|
print(
|
||||||
|
"ERROR: not a TTY. Re-run with: "
|
||||||
|
"docker compose run --rm -it app python -m parser_bot.auth",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
client = TelegramClient(StringSession(), settings.tg_api_id, settings.tg_api_hash)
|
||||||
|
await client.start(phone=settings.tg_phone)
|
||||||
|
me = await client.get_me()
|
||||||
|
session_str = client.session.save()
|
||||||
|
await client.disconnect()
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(f"authorized as {me.username or me.id}")
|
||||||
|
print()
|
||||||
|
print("Add this line to your .env (or k8s Secret) and never share it:")
|
||||||
|
print()
|
||||||
|
print(f"TG_SESSION_STRING={session_str}")
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"After saving, no further interactive auth is needed. Restarts, rebuilds,"
|
||||||
|
" redeploys all reuse this string."
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(asyncio.run(main()))
|
||||||
64
src/parser_bot/config.py
Normal file
64
src/parser_bot/config.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
from pydantic import Field
|
||||||
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
|
||||||
|
|
||||||
|
tg_api_id: int = Field(..., alias="TG_API_ID")
|
||||||
|
tg_api_hash: str = Field(..., alias="TG_API_HASH")
|
||||||
|
tg_phone: str = Field(..., alias="TG_PHONE")
|
||||||
|
tg_session_path: str = Field("/data/session/parser.session", alias="TG_SESSION_PATH")
|
||||||
|
# Preferred for prod / k8s: opaque base64-ish string from `python -m parser_bot.auth`.
|
||||||
|
# If set, takes priority over file-based session.
|
||||||
|
tg_session_string: str | None = Field(None, alias="TG_SESSION_STRING")
|
||||||
|
|
||||||
|
postgres_user: str = Field("parser", alias="POSTGRES_USER")
|
||||||
|
postgres_password: str = Field("parser", alias="POSTGRES_PASSWORD")
|
||||||
|
postgres_db: str = Field("parser", alias="POSTGRES_DB")
|
||||||
|
postgres_host: str = Field("db", alias="POSTGRES_HOST")
|
||||||
|
postgres_port: int = Field(5432, alias="POSTGRES_PORT")
|
||||||
|
|
||||||
|
poll_interval_seconds: int = Field(60, alias="POLL_INTERVAL_SECONDS")
|
||||||
|
poll_history_limit: int = Field(50, alias="POLL_HISTORY_LIMIT")
|
||||||
|
|
||||||
|
api_host: str = Field("0.0.0.0", alias="API_HOST")
|
||||||
|
api_port: int = Field(8000, alias="API_PORT")
|
||||||
|
public_base_path: str = Field("", alias="PUBLIC_BASE_PATH")
|
||||||
|
|
||||||
|
media_dir: str = Field("/data/media", alias="MEDIA_DIR")
|
||||||
|
media_max_bytes: int = Field(20 * 1024 * 1024, alias="MEDIA_MAX_BYTES")
|
||||||
|
|
||||||
|
# Local LLM via Ollama for lead classification & extraction
|
||||||
|
llm_enabled: bool = Field(True, alias="LLM_ENABLED")
|
||||||
|
llm_base_url: str = Field("http://ollama:11434", alias="LLM_BASE_URL")
|
||||||
|
llm_model: str = Field("qwen2.5:7b-instruct-q4_K_M", alias="LLM_MODEL")
|
||||||
|
llm_timeout_seconds: int = Field(120, alias="LLM_TIMEOUT_SECONDS")
|
||||||
|
llm_min_text_length: int = Field(20, alias="LLM_MIN_TEXT_LENGTH")
|
||||||
|
llm_classify_interval_seconds: int = Field(20, alias="LLM_CLASSIFY_INTERVAL_SECONDS")
|
||||||
|
llm_classify_batch_size: int = Field(5, alias="LLM_CLASSIFY_BATCH_SIZE")
|
||||||
|
|
||||||
|
# Admin allowlist for /auth.html, /docs, /openapi.json, /redoc and the
|
||||||
|
# /auth/* API endpoints. Comma-separated IPv4/IPv6. Empty (default) means
|
||||||
|
# no restriction — convenient for local dev. Set explicitly in prod.
|
||||||
|
admin_allowed_ips: str = Field("", alias="ADMIN_ALLOWED_IPS")
|
||||||
|
# Optional second factor for admin-only UI/API operations. Empty keeps the
|
||||||
|
# previous IP-only behavior for local/dev deployments.
|
||||||
|
admin_password: str = Field("", alias="ADMIN_PASSWORD")
|
||||||
|
# When true, honor X-Forwarded-For / X-Real-IP set by a reverse proxy
|
||||||
|
# in front of uvicorn (Docker port-forward, nginx, traefik, etc).
|
||||||
|
trust_proxy_headers: bool = Field(True, alias="TRUST_PROXY_HEADERS")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def admin_ip_set(self) -> set[str]:
|
||||||
|
return {s.strip() for s in self.admin_allowed_ips.split(",") if s.strip()}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def database_url(self) -> str:
|
||||||
|
return (
|
||||||
|
f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}"
|
||||||
|
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
settings = Settings()
|
||||||
0
src/parser_bot/db/__init__.py
Normal file
0
src/parser_bot/db/__init__.py
Normal file
119
src/parser_bot/db/models.py
Normal file
119
src/parser_bot/db/models.py
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from sqlalchemy import (
|
||||||
|
BigInteger,
|
||||||
|
DateTime,
|
||||||
|
ForeignKey,
|
||||||
|
Index,
|
||||||
|
String,
|
||||||
|
Text,
|
||||||
|
UniqueConstraint,
|
||||||
|
func,
|
||||||
|
)
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||||
|
|
||||||
|
|
||||||
|
class Base(DeclarativeBase):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Section(Base):
|
||||||
|
"""A sub-section inside a vertical, e.g. ('real_estate', 'dubai').
|
||||||
|
|
||||||
|
The pair (vertical, slug) is unique and identifies a section in URLs
|
||||||
|
and API calls. A channel belongs to exactly one section, the section
|
||||||
|
knows its vertical, and the LLM prompt store can hold a per-section
|
||||||
|
override that falls back to the vertical-level prompt.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__tablename__ = "sections"
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint("vertical", "slug", name="uq_section_vertical_slug"),
|
||||||
|
Index("ix_sections_vertical", "vertical"),
|
||||||
|
)
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(primary_key=True)
|
||||||
|
vertical: Mapped[str] = mapped_column(String(32))
|
||||||
|
slug: Mapped[str] = mapped_column(String(64))
|
||||||
|
title: Mapped[str] = mapped_column(String(255))
|
||||||
|
emoji: Mapped[str | None] = mapped_column(String(8), nullable=True)
|
||||||
|
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
access_code: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
channels: Mapped[list["Channel"]] = relationship(back_populates="section")
|
||||||
|
|
||||||
|
|
||||||
|
class Channel(Base):
|
||||||
|
__tablename__ = "channels"
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(primary_key=True)
|
||||||
|
# Telegram numeric channel id (peer id), nullable until first resolve
|
||||||
|
tg_id: Mapped[int | None] = mapped_column(BigInteger, unique=True, nullable=True)
|
||||||
|
# Username or t.me/joinchat link supplied by user
|
||||||
|
identifier: Mapped[str] = mapped_column(String(255), unique=True)
|
||||||
|
title: Mapped[str | None] = mapped_column(String(512), nullable=True)
|
||||||
|
# 'real_estate' or 'hr' — picks which LLM prompt and lead schema is used
|
||||||
|
vertical: Mapped[str] = mapped_column(
|
||||||
|
String(32), default="real_estate", server_default="real_estate", index=True
|
||||||
|
)
|
||||||
|
section_id: Mapped[int] = mapped_column(
|
||||||
|
ForeignKey("sections.id", ondelete="RESTRICT"), index=True
|
||||||
|
)
|
||||||
|
is_active: Mapped[bool] = mapped_column(default=True, server_default="true")
|
||||||
|
last_message_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
last_polled_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
section: Mapped[Section] = relationship(back_populates="channels")
|
||||||
|
messages: Mapped[list["Message"]] = relationship(
|
||||||
|
back_populates="channel",
|
||||||
|
cascade="all, delete-orphan",
|
||||||
|
passive_deletes=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Message(Base):
|
||||||
|
__tablename__ = "messages"
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint("channel_id", "tg_message_id", name="uq_channel_message"),
|
||||||
|
Index("ix_messages_channel_date", "channel_id", "date"),
|
||||||
|
)
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(primary_key=True)
|
||||||
|
channel_id: Mapped[int] = mapped_column(ForeignKey("channels.id", ondelete="CASCADE"))
|
||||||
|
tg_message_id: Mapped[int] = mapped_column(BigInteger)
|
||||||
|
date: Mapped[datetime] = mapped_column(DateTime(timezone=True))
|
||||||
|
text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
sender_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
sender_username: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||||
|
sender_name: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||||
|
grouped_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
has_media: Mapped[bool] = mapped_column(default=False, server_default="false")
|
||||||
|
views: Mapped[int | None] = mapped_column(nullable=True)
|
||||||
|
forwards: Mapped[int | None] = mapped_column(nullable=True)
|
||||||
|
raw: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||||
|
media_files: Mapped[list | None] = mapped_column(JSONB, nullable=True)
|
||||||
|
extracted: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||||
|
fetched_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
channel: Mapped[Channel] = relationship(back_populates="messages")
|
||||||
|
|
||||||
|
|
||||||
|
class AppSetting(Base):
|
||||||
|
"""Runtime-editable settings, edited from the UI without a restart."""
|
||||||
|
|
||||||
|
__tablename__ = "app_settings"
|
||||||
|
|
||||||
|
key: Mapped[str] = mapped_column(String(128), primary_key=True)
|
||||||
|
value: Mapped[dict | str | int | bool | None] = mapped_column(JSONB, nullable=False)
|
||||||
|
updated_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), server_default=func.now()
|
||||||
|
)
|
||||||
25
src/parser_bot/db/session.py
Normal file
25
src/parser_bot/db/session.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||||
|
|
||||||
|
from parser_bot.config import settings
|
||||||
|
|
||||||
|
engine = create_async_engine(settings.database_url, pool_pre_ping=True)
|
||||||
|
SessionFactory = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def session_scope() -> AsyncIterator[AsyncSession]:
|
||||||
|
async with SessionFactory() as session:
|
||||||
|
try:
|
||||||
|
yield session
|
||||||
|
await session.commit()
|
||||||
|
except Exception:
|
||||||
|
await session.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
async def get_session() -> AsyncIterator[AsyncSession]:
|
||||||
|
async with SessionFactory() as session:
|
||||||
|
yield session
|
||||||
334
src/parser_bot/extractors.py
Normal file
334
src/parser_bot/extractors.py
Normal file
@@ -0,0 +1,334 @@
|
|||||||
|
"""Heuristic extractors for Telegram message text.
|
||||||
|
|
||||||
|
Russian-first, regex/keyword based, no ML deps. Goal is to surface signals for
|
||||||
|
the UI: phone numbers, person names (FIO), and real-estate intent (sale/rent/
|
||||||
|
purchase). False positives are tolerable — operator triages in the UI.
|
||||||
|
|
||||||
|
Output shape (used as JSONB in messages.extracted):
|
||||||
|
{
|
||||||
|
"phones": ["+79123456789", ...],
|
||||||
|
"names": ["Иван Петров", ...],
|
||||||
|
"real_estate": {
|
||||||
|
"kind": "sale" | "rent" | "purchase" | null,
|
||||||
|
"property_type": str | null, # квартира, дом, ...
|
||||||
|
"rooms": str | null, # "2-к"
|
||||||
|
"area_m2": float | null,
|
||||||
|
"price": str | null, # raw matched string
|
||||||
|
} | null
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
# --- Telegram @handles ---------------------------------------------------
|
||||||
|
|
||||||
|
# Plain @username — Telegram allows 5–32 chars, letters/digits/_, no leading digit.
|
||||||
|
_TG_HANDLE_RE = re.compile(r"(?<![\w/])@([A-Za-z][A-Za-z0-9_]{4,31})\b")
|
||||||
|
# t.me / telegram.me links to a user/channel handle (not joinchat / +invite).
|
||||||
|
_TG_LINK_RE = re.compile(
|
||||||
|
r"(?:https?://)?(?:t|telegram)\.me/(?!joinchat/|\+)([A-Za-z][A-Za-z0-9_]{4,31})\b"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_tg_handles(text: str | None) -> list[str]:
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
out: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for h in _TG_HANDLE_RE.findall(text):
|
||||||
|
key = h.lower()
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
out.append("@" + h)
|
||||||
|
for h in _TG_LINK_RE.findall(text):
|
||||||
|
key = h.lower()
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
out.append("@" + h)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# --- Phones --------------------------------------------------------------
|
||||||
|
|
||||||
|
# Russian-format: starts with +7, 7, or 8 (no plus), 11 digits total.
|
||||||
|
_PHONE_RU_RE = re.compile(
|
||||||
|
r"(?<!\d)(?:\+?7|8)[\s\-().]*\d{3}[\s\-().]*\d{3}[\s\-().]*\d{2}[\s\-().]*\d{2}(?!\d)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# International format: starts with `+<country code>` then 7–14 more digits
|
||||||
|
# with optional separators. Catches +971 (UAE), +1 (US), +44 (UK), etc.
|
||||||
|
_PHONE_INTL_RE = re.compile(
|
||||||
|
r"(?<![\w\d])\+\d{1,3}[\s\-().]*(?:\d[\s\-().]*){6,14}\d(?!\d)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_phones(text: str | None) -> list[str]:
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
out: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
|
||||||
|
# Pass 1: Russian-style. Normalize to +7XXXXXXXXXX.
|
||||||
|
for raw in _PHONE_RU_RE.findall(text):
|
||||||
|
digits = re.sub(r"\D", "", raw)
|
||||||
|
if len(digits) == 11 and digits[0] in "78":
|
||||||
|
normalized = "+7" + digits[1:]
|
||||||
|
elif len(digits) == 10:
|
||||||
|
normalized = "+7" + digits
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if normalized not in seen:
|
||||||
|
seen.add(normalized)
|
||||||
|
out.append(normalized)
|
||||||
|
|
||||||
|
# Pass 2: international "+<country>...". Keep raw plus-prefix; just
|
||||||
|
# collapse separators so the result is +<digits>.
|
||||||
|
for raw in _PHONE_INTL_RE.findall(text):
|
||||||
|
digits = re.sub(r"\D", "", raw)
|
||||||
|
if not (8 <= len(digits) <= 15):
|
||||||
|
continue
|
||||||
|
normalized = "+" + digits
|
||||||
|
# If it normalized to something we already captured (e.g. +7 number
|
||||||
|
# picked up by both passes), skip.
|
||||||
|
if normalized in seen:
|
||||||
|
continue
|
||||||
|
seen.add(normalized)
|
||||||
|
out.append(normalized)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# --- Names (ФИО) ---------------------------------------------------------
|
||||||
|
|
||||||
|
# Two or three capitalized Cyrillic tokens in a row. Allows hyphens (Иванов-Петров).
|
||||||
|
_NAME_RE = re.compile(
|
||||||
|
r"\b([А-ЯЁ][а-яё]+(?:\-[А-ЯЁ][а-яё]+)?(?:\s+[А-ЯЁ][а-яё]+(?:\-[А-ЯЁ][а-яё]+)?){1,2})\b"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Common false positives — geo/places/orgs/etc. Skip exact matches.
|
||||||
|
_NAME_BLOCKLIST = {
|
||||||
|
"Российская Федерация",
|
||||||
|
"Санкт Петербург",
|
||||||
|
"Санкт-Петербург",
|
||||||
|
"Нижний Новгород",
|
||||||
|
"Великий Новгород",
|
||||||
|
"Ростов На Дону",
|
||||||
|
"Ростов-На-Дону",
|
||||||
|
"Москва Сити",
|
||||||
|
"Красная Площадь",
|
||||||
|
"Чёрное Море",
|
||||||
|
"Чёрного Моря",
|
||||||
|
"Без Депозита",
|
||||||
|
"Без Залога",
|
||||||
|
"Без Комиссии",
|
||||||
|
"Сдам Квартиру",
|
||||||
|
"Продам Квартиру",
|
||||||
|
"Куплю Квартиру",
|
||||||
|
"Сдам Студию",
|
||||||
|
"Продам Студию",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Words that look like names but rarely are (months, weekdays, common nouns).
|
||||||
|
_NAME_TOKEN_BLOCK = {
|
||||||
|
"Январь", "Февраль", "Март", "Апрель", "Май", "Июнь",
|
||||||
|
"Июль", "Август", "Сентябрь", "Октябрь", "Ноябрь", "Декабрь",
|
||||||
|
"Понедельник", "Вторник", "Среда", "Четверг", "Пятница", "Суббота", "Воскресенье",
|
||||||
|
"Москва", "Питер", "Россия", "Кремль", "Метро",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_names(text: str | None) -> list[str]:
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
out: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for match in _NAME_RE.findall(text):
|
||||||
|
candidate = match.strip()
|
||||||
|
if candidate in _NAME_BLOCKLIST:
|
||||||
|
continue
|
||||||
|
tokens = re.split(r"[\s\-]+", candidate)
|
||||||
|
if any(t in _NAME_TOKEN_BLOCK for t in tokens):
|
||||||
|
continue
|
||||||
|
# Heuristic: at least one token must have len >= 4 (rules out "Ул.")
|
||||||
|
if not any(len(t) >= 4 for t in tokens):
|
||||||
|
continue
|
||||||
|
if candidate not in seen:
|
||||||
|
seen.add(candidate)
|
||||||
|
out.append(candidate)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# --- Real estate ---------------------------------------------------------
|
||||||
|
|
||||||
|
_DEAL_KEYWORDS: dict[str, tuple[str, ...]] = {
|
||||||
|
"rent": (
|
||||||
|
# ru
|
||||||
|
"сдаётся", "сдается", "сдаю", "сдадим", "сдам", "сдаём",
|
||||||
|
"аренда", "арендую", "арендуем", "снять",
|
||||||
|
"посуточно", "помесячно",
|
||||||
|
# en
|
||||||
|
"for rent", "to let", "rental", "renting", "lease", "leasing",
|
||||||
|
"per year", "per month", "/year", "/month", "/mo",
|
||||||
|
),
|
||||||
|
"sale": (
|
||||||
|
# ru
|
||||||
|
"продаётся", "продается", "продаю", "продадим", "продам", "продаём",
|
||||||
|
"продажа", "к продаже",
|
||||||
|
# en
|
||||||
|
"for sale", "#forsale", "selling", "selling price", "sale price",
|
||||||
|
),
|
||||||
|
"purchase": (
|
||||||
|
# ru
|
||||||
|
"куплю", "купим", "покупаю", "покупка", "ищу квартиру",
|
||||||
|
"ищу дом", "ищем квартиру", "рассматриваю покупку",
|
||||||
|
# en
|
||||||
|
"looking for", "want to buy", "wanted", "requirement", "wtb",
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
_PROPERTY_TYPES: tuple[tuple[str, str], ...] = (
|
||||||
|
# ru
|
||||||
|
("квартир", "квартира"),
|
||||||
|
("студи", "студия"),
|
||||||
|
("апартамент", "апартаменты"),
|
||||||
|
("комнат", "комната"),
|
||||||
|
("таунхаус", "таунхаус"),
|
||||||
|
("коттедж", "коттедж"),
|
||||||
|
("дача", "дача"),
|
||||||
|
("дом", "дом"),
|
||||||
|
("офис", "офис"),
|
||||||
|
("склад", "склад"),
|
||||||
|
("помещен", "помещение"),
|
||||||
|
("земельн", "земельный участок"),
|
||||||
|
("участок", "участок"),
|
||||||
|
("гараж", "гараж"),
|
||||||
|
("машиномест", "машиноместо"),
|
||||||
|
# en — kept as Russian labels for UI consistency
|
||||||
|
("villa", "дом"),
|
||||||
|
("townhouse", "таунхаус"),
|
||||||
|
("penthouse", "апартаменты"),
|
||||||
|
("apartment", "квартира"),
|
||||||
|
("studio", "студия"),
|
||||||
|
("plot", "участок"),
|
||||||
|
(" land ", "участок"),
|
||||||
|
("office", "офис"),
|
||||||
|
("warehouse", "склад"),
|
||||||
|
("retail", "помещение"),
|
||||||
|
("garage", "гараж"),
|
||||||
|
)
|
||||||
|
|
||||||
|
_AREA_M2_RE = re.compile(
|
||||||
|
r"(\d[\d\s,]*\d|\d)\s*(?:м[²2]|кв\.?\s*м|кв\.\s*метр)",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_AREA_SQFT_RE = re.compile(
|
||||||
|
r"(\d[\d\s,]*\d|\d)\s*(?:sqft|sq\.?\s*ft|sq\s+ft|square\s+feet)",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_number(s: str) -> float | None:
|
||||||
|
cleaned = s.replace(" ", "").replace(",", "")
|
||||||
|
try:
|
||||||
|
return float(cleaned)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
_ROOMS_RE = re.compile(
|
||||||
|
r"\b(\d)[\-\s]*(?:к\b|комн|комнатн|-комнат|br\b|bed\b|bedroom|-bed)",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
# Studio is a special-case "0 rooms" indicator; not extracted as rooms count.
|
||||||
|
_PRICE_RE = re.compile(
|
||||||
|
r"(\d[\d\s.,]*\d|\d)\s*(млн|млрд|тыс|тысяч|миллионов?|миллиардов?|руб(?:лей)?|₽|р/мес|/мес|р\b)",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_kind(low: str) -> str | None:
|
||||||
|
for kind, words in _DEAL_KEYWORDS.items():
|
||||||
|
for w in words:
|
||||||
|
if w in low:
|
||||||
|
return kind
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_property_type(low: str) -> str | None:
|
||||||
|
for stem, label in _PROPERTY_TYPES:
|
||||||
|
if stem in low:
|
||||||
|
return label
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_real_estate(text: str | None) -> dict[str, Any] | None:
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
low = text.lower()
|
||||||
|
kind = _detect_kind(low)
|
||||||
|
prop = _detect_property_type(low)
|
||||||
|
if kind is None and prop is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
rooms_m = _ROOMS_RE.search(low)
|
||||||
|
rooms = f"{rooms_m.group(1)}-к" if rooms_m else None
|
||||||
|
if rooms is None and ("студи" in low or "studio" in low):
|
||||||
|
rooms = "студия"
|
||||||
|
|
||||||
|
area: float | None = None
|
||||||
|
area_m = _AREA_M2_RE.search(text)
|
||||||
|
if area_m:
|
||||||
|
area = _parse_number(area_m.group(1))
|
||||||
|
if area is None:
|
||||||
|
sqft_m = _AREA_SQFT_RE.search(text)
|
||||||
|
if sqft_m:
|
||||||
|
sqft = _parse_number(sqft_m.group(1))
|
||||||
|
if sqft is not None:
|
||||||
|
area = round(sqft * 0.0929, 1)
|
||||||
|
|
||||||
|
price_m = _PRICE_RE.search(text)
|
||||||
|
price = price_m.group(0).strip() if price_m else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"kind": kind,
|
||||||
|
"property_type": prop,
|
||||||
|
"rooms": rooms,
|
||||||
|
"area_m2": area,
|
||||||
|
"price": price,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Top-level analyzer --------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def analyze(text: str | None) -> dict[str, Any]:
|
||||||
|
"""Synchronous regex-only analysis. Cheap and runs at insert time."""
|
||||||
|
return {
|
||||||
|
"phones": extract_phones(text),
|
||||||
|
"names": extract_names(text),
|
||||||
|
"tg_handles": extract_tg_handles(text),
|
||||||
|
"real_estate": extract_real_estate(text),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def analyze_with_llm(
|
||||||
|
text: str | None,
|
||||||
|
vertical: str = "real_estate",
|
||||||
|
section_slug: str | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Regex extraction + local LLM lead classification, routed by vertical.
|
||||||
|
|
||||||
|
`section_slug` lets the classifier pick a section-specific system prompt
|
||||||
|
(e.g. Dubai-focused for `real_estate:dubai`) with fallback to the
|
||||||
|
vertical-default prompt. The LLM verdict goes under `lead` for RE and
|
||||||
|
under `hr_lead` for HR. Falls back to regex-only if Ollama is unavailable.
|
||||||
|
"""
|
||||||
|
base = analyze(text)
|
||||||
|
# Lazy import to avoid hard dep on httpx in environments where LLM is off.
|
||||||
|
from parser_bot.llm import classify
|
||||||
|
|
||||||
|
verdict = await classify(text, vertical, section_slug) # type: ignore[arg-type]
|
||||||
|
if verdict is not None:
|
||||||
|
base["hr_lead" if vertical == "hr" else "lead"] = verdict
|
||||||
|
return base
|
||||||
44
src/parser_bot/links.py
Normal file
44
src/parser_bot/links.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
"""Build Telegram URLs from stored channel metadata."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
_USERNAME_RE = re.compile(r"^@?([A-Za-z][A-Za-z0-9_]{4,31})$")
|
||||||
|
_TME_URL_RE = re.compile(
|
||||||
|
r"^(?:https?://)?(?:t|telegram)\.me/(?:s/)?([A-Za-z][A-Za-z0-9_]{4,31})(?:/.*)?$"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def channel_username(identifier: str | None) -> str | None:
|
||||||
|
"""Extract the public username from a channel identifier if any.
|
||||||
|
|
||||||
|
Returns None for private channels (joinchat, +invite, raw IDs).
|
||||||
|
"""
|
||||||
|
if not identifier:
|
||||||
|
return None
|
||||||
|
s = identifier.strip()
|
||||||
|
m = _USERNAME_RE.match(s)
|
||||||
|
if m:
|
||||||
|
return m.group(1)
|
||||||
|
m = _TME_URL_RE.match(s)
|
||||||
|
if m:
|
||||||
|
return m.group(1)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def post_url(identifier: str | None, tg_id: int | None, tg_message_id: int) -> str | None:
|
||||||
|
"""Build a deep link to a Telegram post.
|
||||||
|
|
||||||
|
Public channel: https://t.me/<username>/<msg_id>
|
||||||
|
Private channel (no public username, only tg_id): https://t.me/c/<short>/<msg_id>
|
||||||
|
where <short> is the absolute id with the leading -100 stripped.
|
||||||
|
"""
|
||||||
|
username = channel_username(identifier)
|
||||||
|
if username:
|
||||||
|
return f"https://t.me/{username}/{tg_message_id}"
|
||||||
|
if tg_id is None:
|
||||||
|
return None
|
||||||
|
raw = abs(tg_id)
|
||||||
|
s = str(raw)
|
||||||
|
short = s[3:] if s.startswith("100") and len(s) > 3 else s
|
||||||
|
return f"https://t.me/c/{short}/{tg_message_id}"
|
||||||
363
src/parser_bot/llm.py
Normal file
363
src/parser_bot/llm.py
Normal file
@@ -0,0 +1,363 @@
|
|||||||
|
"""Local LLM (Ollama) client for lead classification & extraction.
|
||||||
|
|
||||||
|
Two verticals share one model and one process:
|
||||||
|
- real_estate: high recall on listings (sale/rent/purchase),
|
||||||
|
- hr: vacancies, resumes, bare contact leads.
|
||||||
|
|
||||||
|
The system prompt and JSON schema differ per vertical; the rest of the
|
||||||
|
plumbing (timeouts, single-lock concurrency, JSON-mode parsing) is shared.
|
||||||
|
On any error returns `None` and the caller falls back to regex-only extraction.
|
||||||
|
|
||||||
|
The model runs on CPU via Ollama (Qwen2.5 7B Q4_K_M). Each call ~3–6s on
|
||||||
|
i5-12400. Concurrency is 1 (Ollama already saturates CPU per call).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from typing import Any, Literal
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from parser_bot.config import settings
|
||||||
|
|
||||||
|
log = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
# Single shared lock so we never run two LLM requests at once on the GPU —
|
||||||
|
# they would just thrash VRAM and finish slower than sequential.
|
||||||
|
_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
Vertical = Literal["real_estate", "hr"]
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_RE_SYSTEM_PROMPT = """\
|
||||||
|
Ты — аналитик объявлений о недвижимости. Тебе дают текст из Telegram-канала.
|
||||||
|
Сообщение МОЖЕТ БЫТЬ НА ЛЮБОМ ЯЗЫКЕ — русский, английский, арабский, любой
|
||||||
|
другой. Обрабатывай его одинаково независимо от языка.
|
||||||
|
|
||||||
|
Задача: определить, является ли это РЕАЛЬНЫМ объявлением о покупке, продаже
|
||||||
|
или аренде НЕДВИЖИМОСТИ (квартира, дом/villa, студия/studio, апартаменты,
|
||||||
|
комната, таунхаус/townhouse, дача, коттедж, пентхаус/penthouse, офис,
|
||||||
|
склад, помещение, земельный участок/plot/land, гараж, машиноместо).
|
||||||
|
Учитывай намёки и нечёткие формулировки — лучше отметить сомнительный лид
|
||||||
|
как `is_listing=true` с низкой confidence, чем пропустить.
|
||||||
|
|
||||||
|
Сигналы что это ОБЪЯВЛЕНИЕ (kind):
|
||||||
|
— продажа/sale: «продаётся», «продаю», «продажа», «for sale», «#forsale»,
|
||||||
|
«selling price», «selling», «price», «AED 33M», ценник в любой валюте.
|
||||||
|
— аренда/rent: «сдаётся», «сдаю», «аренда», «for rent», «to let», «rental»,
|
||||||
|
«per year», «per month», «AED ... /year».
|
||||||
|
— покупка/purchase: «куплю», «куплю в», «looking for», «want to buy»,
|
||||||
|
«wanted», «requirement».
|
||||||
|
|
||||||
|
ОДНО сообщение может быть и про продажу, И про аренду одновременно
|
||||||
|
(«FOR SALE | RENT» / «продажа или аренда»). В таком случае выбирай
|
||||||
|
основное намерение по самому тексту; если равноценно — `kind="sale"`
|
||||||
|
и упомяни аренду в summary.
|
||||||
|
|
||||||
|
НЕ объявления (is_listing=false):
|
||||||
|
— общие новости / статьи / аналитика рынка;
|
||||||
|
— воспоминания и истории («когда-то продавал квартиру»);
|
||||||
|
— шутки, мемы, цитаты;
|
||||||
|
— реклама услуг агентств без конкретного объекта;
|
||||||
|
— чужие пересланные объявления без контактов и явного предложения от автора.
|
||||||
|
|
||||||
|
Отвечай СТРОГО валидным JSON по схеме (никаких комментариев, никакого markdown):
|
||||||
|
{
|
||||||
|
"is_listing": boolean,
|
||||||
|
"kind": "sale" | "rent" | "purchase" | null,
|
||||||
|
"property_type": "квартира" | "дом" | "студия" | "апартаменты" | "комната" | "таунхаус" | "дача" | "коттедж" | "офис" | "склад" | "помещение" | "участок" | "гараж" | "машиноместо" | null,
|
||||||
|
"rooms": "студия" | "1-к" | "2-к" | "3-к" | "4-к" | "5+к" | null,
|
||||||
|
"area_m2": number | null,
|
||||||
|
"price_text": string | null,
|
||||||
|
"price_value": number | null,
|
||||||
|
"currency": "RUB" | "USD" | "EUR" | "AED" | "GBP" | "CNY" | "TRY" | "KZT" | "BYN" | "UAH" | null,
|
||||||
|
"location": string | null,
|
||||||
|
"contact_phone": string | null,
|
||||||
|
"contact_name": string | null,
|
||||||
|
"summary": string,
|
||||||
|
"confidence": number
|
||||||
|
}
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
- summary — ОДНО короткое предложение НА РУССКОМ языке (даже если исходный
|
||||||
|
текст на английском или другом). Это нужно для единообразного UI.
|
||||||
|
- property_type — пиши значение по-русски (villa→дом, apartment→квартира,
|
||||||
|
townhouse→таунхаус, plot/land→участок, studio→студия, penthouse→апартаменты,
|
||||||
|
house→дом, office→офис, warehouse→склад, retail→помещение).
|
||||||
|
- rooms — для англоязычного «3BR», «3 BR», «3 bed», «3-bedroom» возвращай
|
||||||
|
«3-к»; для «studio» → «студия».
|
||||||
|
- area_m2 — площадь В КВАДРАТНЫХ МЕТРАХ. Если в тексте sqft / sq.ft / sq ft /
|
||||||
|
square feet — переведи: m² = sqft × 0.0929. Округляй до целого.
|
||||||
|
- confidence ∈ [0, 1]: 0.9+ если явное объявление с ценой/контактом,
|
||||||
|
0.5–0.8 если правдоподобно, 0.2–0.4 если намёк.
|
||||||
|
- price_text — точная цитата из текста («2.5 млн ₽», «AED 850 000», «$320k»,
|
||||||
|
«300 тыс. дирхам», «د.إ 1.2M», «70,000,000 AED», «AED 4.3M», «AED 1.75M»).
|
||||||
|
- price_value — числовая величина цены В УКАЗАННОЙ ВАЛЮТЕ (не конвертируй).
|
||||||
|
Раскрывай сокращения: «AED 4.3M» → 4300000, «$320k» → 320000.
|
||||||
|
- currency — определяй гибко: ₽/руб/р/RUB/рублей → RUB; $/USD/долл/бакс → USD;
|
||||||
|
€/EUR/евро → EUR; AED/дирхам/дирхамов/дирхама/dh/dhs/د.إ/Dirhams → AED;
|
||||||
|
₺/TRY/лир/лира → TRY; ¥/CNY/юань → CNY; ₸/KZT/тенге → KZT;
|
||||||
|
Br/BYN/бел.руб → BYN; ₴/UAH/грн → UAH. Если не уверен — null.
|
||||||
|
- contact_phone — любой номер телефона в тексте (с + или без, российский,
|
||||||
|
ОАЭ, любой международный).
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_HR_SYSTEM_PROMPT = """\
|
||||||
|
Ты — аналитик HR-объявлений. Тебе дают текст из Telegram-канала. Сообщение
|
||||||
|
МОЖЕТ БЫТЬ НА ЛЮБОМ ЯЗЫКЕ — обрабатывай одинаково.
|
||||||
|
|
||||||
|
Задача: определить, относится ли сообщение к рынку труда, и какого типа лид
|
||||||
|
это. Допускаются три типа (`kind`):
|
||||||
|
— vacancy — компания/наниматель ищет сотрудника («ищем разработчика»,
|
||||||
|
«hiring backend engineer», «требуется бухгалтер», «we are looking for»);
|
||||||
|
— resume — соискатель ищет работу («ищу работу», «open to work», «available
|
||||||
|
for hire», «рассматриваю предложения», «my CV», «резюме»);
|
||||||
|
— contact — короткое сообщение с именем/контактом и намёком на профессию,
|
||||||
|
без явной вакансии/резюме («Иван Петров, Python, +7…», «@nick — UI/UX,
|
||||||
|
Дубай»). Используй, когда vacancy и resume не подходят, но из текста ясно,
|
||||||
|
что это HR-контакт.
|
||||||
|
|
||||||
|
Лучше отметить сомнительный случай `is_lead=true` с низкой confidence,
|
||||||
|
чем пропустить. НО полностью исключай:
|
||||||
|
— общие новости и аналитика рынка труда без конкретной вакансии/резюме;
|
||||||
|
— реклама курсов, школ, маркетплейсов услуг (Profi.ru и т.п.);
|
||||||
|
— чужие пересланные посты без контактов и без явного предложения от автора;
|
||||||
|
— объявления о продаже/аренде недвижимости, услуг и товаров;
|
||||||
|
— мемы, шутки, цитаты.
|
||||||
|
|
||||||
|
Отвечай СТРОГО валидным JSON по схеме (никаких комментариев, никакого markdown):
|
||||||
|
{
|
||||||
|
"is_lead": boolean,
|
||||||
|
"kind": "vacancy" | "resume" | "contact" | null,
|
||||||
|
"title": string | null,
|
||||||
|
"company": string | null,
|
||||||
|
"candidate_name": string | null,
|
||||||
|
"experience_years": number | null,
|
||||||
|
"skills": string[],
|
||||||
|
"location": string | null,
|
||||||
|
"remote": boolean | null,
|
||||||
|
"employment_type": "full-time" | "part-time" | "contract" | "internship" | null,
|
||||||
|
"salary_text": string | null,
|
||||||
|
"salary_value": number | null,
|
||||||
|
"currency": "RUB" | "USD" | "EUR" | "AED" | "GBP" | "CNY" | "TRY" | "KZT" | "BYN" | "UAH" | null,
|
||||||
|
"contact_phone": string | null,
|
||||||
|
"contact_name": string | null,
|
||||||
|
"summary": string,
|
||||||
|
"confidence": number
|
||||||
|
}
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
- title — должность/роль ОДНОЙ строкой («Senior Python Developer», «Бухгалтер»,
|
||||||
|
«UI/UX-дизайнер»). Для resume — желаемая роль. Для contact — то, что заявлено.
|
||||||
|
- company — название компании-нанимателя, если оно явно указано (vacancy).
|
||||||
|
- candidate_name — ФИО или ник кандидата (resume / contact).
|
||||||
|
- experience_years — стаж в годах числом. «5+ years» → 5. Если не указан — null.
|
||||||
|
- skills — короткий массив ключевых навыков/технологий (до ~10 элементов).
|
||||||
|
- remote — true для «удалёнка / remote / WFH / hybrid: remote», false для
|
||||||
|
«офис / on-site», null если не указано.
|
||||||
|
- employment_type — full-time для «полная занятость / full-time», part-time
|
||||||
|
для «частичная / part-time», contract для «договор/контракт/freelance»,
|
||||||
|
internship для «стажировка/internship». Иначе null.
|
||||||
|
- salary_text — точная цитата с зарплатой («200–300k ₽», «$5k/mo», «AED 18,000 per month»).
|
||||||
|
- salary_value — число В УКАЗАННОЙ ВАЛЮТЕ. Если диапазон — нижняя граница.
|
||||||
|
Раскрывай сокращения: «200k» → 200000, «1.5M» → 1500000.
|
||||||
|
- currency — определяй гибко: ₽/руб/RUB → RUB; $/USD/долл → USD; €/EUR/евро → EUR;
|
||||||
|
AED/дирхам/dh/dhs → AED; ₺/TRY/лир → TRY; ¥/CNY/юань → CNY; ₸/KZT/тенге → KZT;
|
||||||
|
Br/BYN/бел.руб → BYN; ₴/UAH/грн → UAH. Если не уверен — null.
|
||||||
|
- contact_phone — любой номер телефона (RU / международный, с + или без).
|
||||||
|
- contact_name — имя контактного лица (рекрутер / соискатель / автор).
|
||||||
|
- summary — ОДНО короткое предложение НА РУССКОМ языке.
|
||||||
|
- confidence ∈ [0, 1]: 0.9+ если явная вакансия/резюме с деталями, 0.5–0.8
|
||||||
|
если правдоподобно, 0.2–0.4 если намёк.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# Back-compat alias — older imports referenced DEFAULT_SYSTEM_PROMPT.
|
||||||
|
DEFAULT_SYSTEM_PROMPT = DEFAULT_RE_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
|
||||||
|
def _build_user_prompt(text: str) -> str:
|
||||||
|
return f"Текст сообщения:\n```\n{text}\n```\nВерни JSON."
|
||||||
|
|
||||||
|
|
||||||
|
_VALID_CURRENCIES = {
|
||||||
|
"RUB", "USD", "EUR", "AED", "GBP", "CNY", "TRY", "KZT", "BYN", "UAH"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_real_estate(payload: Any) -> dict | None:
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
return None
|
||||||
|
is_listing = bool(payload.get("is_listing"))
|
||||||
|
currency = payload.get("currency")
|
||||||
|
if currency is not None:
|
||||||
|
currency = str(currency).upper()
|
||||||
|
if currency not in _VALID_CURRENCIES:
|
||||||
|
currency = None
|
||||||
|
return {
|
||||||
|
"is_listing": is_listing,
|
||||||
|
"kind": payload.get("kind") if payload.get("kind") in ("sale", "rent", "purchase") else None,
|
||||||
|
"property_type": payload.get("property_type") or None,
|
||||||
|
"rooms": payload.get("rooms") or None,
|
||||||
|
"area_m2": _as_float(payload.get("area_m2")),
|
||||||
|
"price_text": payload.get("price_text") or None,
|
||||||
|
"price_value": _as_float(payload.get("price_value")),
|
||||||
|
"currency": currency,
|
||||||
|
"location": payload.get("location") or None,
|
||||||
|
"contact_phone": payload.get("contact_phone") or None,
|
||||||
|
"contact_name": payload.get("contact_name") or None,
|
||||||
|
"summary": (payload.get("summary") or "")[:300],
|
||||||
|
"confidence": max(0.0, min(1.0, _as_float(payload.get("confidence")) or 0.0)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_hr(payload: Any) -> dict | None:
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
return None
|
||||||
|
is_lead = bool(payload.get("is_lead"))
|
||||||
|
currency = payload.get("currency")
|
||||||
|
if currency is not None:
|
||||||
|
currency = str(currency).upper()
|
||||||
|
if currency not in _VALID_CURRENCIES:
|
||||||
|
currency = None
|
||||||
|
skills_raw = payload.get("skills") or []
|
||||||
|
if isinstance(skills_raw, str):
|
||||||
|
skills = [s.strip() for s in skills_raw.split(",") if s.strip()]
|
||||||
|
elif isinstance(skills_raw, list):
|
||||||
|
skills = [str(s).strip() for s in skills_raw if str(s).strip()]
|
||||||
|
else:
|
||||||
|
skills = []
|
||||||
|
skills = skills[:15]
|
||||||
|
employment = payload.get("employment_type")
|
||||||
|
if employment is not None and employment not in (
|
||||||
|
"full-time", "part-time", "contract", "internship"
|
||||||
|
):
|
||||||
|
employment = None
|
||||||
|
remote_raw = payload.get("remote")
|
||||||
|
remote = bool(remote_raw) if isinstance(remote_raw, bool) else None
|
||||||
|
return {
|
||||||
|
"is_lead": is_lead,
|
||||||
|
"kind": payload.get("kind") if payload.get("kind") in ("vacancy", "resume", "contact") else None,
|
||||||
|
"title": payload.get("title") or None,
|
||||||
|
"company": payload.get("company") or None,
|
||||||
|
"candidate_name": payload.get("candidate_name") or None,
|
||||||
|
"experience_years": _as_float(payload.get("experience_years")),
|
||||||
|
"skills": skills,
|
||||||
|
"location": payload.get("location") or None,
|
||||||
|
"remote": remote,
|
||||||
|
"employment_type": employment,
|
||||||
|
"salary_text": payload.get("salary_text") or None,
|
||||||
|
"salary_value": _as_float(payload.get("salary_value")),
|
||||||
|
"currency": currency,
|
||||||
|
"contact_phone": payload.get("contact_phone") or None,
|
||||||
|
"contact_name": payload.get("contact_name") or None,
|
||||||
|
"summary": (payload.get("summary") or "")[:300],
|
||||||
|
"confidence": max(0.0, min(1.0, _as_float(payload.get("confidence")) or 0.0)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _as_float(v: Any) -> float | None:
|
||||||
|
if v is None or isinstance(v, bool):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def is_ready() -> bool:
|
||||||
|
"""Check that Ollama is up and the configured model is pulled."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=5) as client:
|
||||||
|
r = await client.get(f"{settings.llm_base_url}/api/tags")
|
||||||
|
r.raise_for_status()
|
||||||
|
tags = {m.get("name") for m in r.json().get("models", [])}
|
||||||
|
return any(t.startswith(settings.llm_model.split(":")[0]) for t in tags)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def default_prompt(vertical: Vertical) -> str:
|
||||||
|
return DEFAULT_HR_SYSTEM_PROMPT if vertical == "hr" else DEFAULT_RE_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
|
||||||
|
async def classify(
|
||||||
|
text: str | None,
|
||||||
|
vertical: Vertical = "real_estate",
|
||||||
|
section_slug: str | None = None,
|
||||||
|
) -> dict | None:
|
||||||
|
"""Classify a message text under the given vertical/section.
|
||||||
|
|
||||||
|
The system prompt is resolved with `section → vertical → built-in` fallback,
|
||||||
|
so a per-section prompt can fine-tune extraction (e.g. AED/sqft for Dubai)
|
||||||
|
while unconfigured sections keep using the vertical-wide prompt.
|
||||||
|
Returns a vertical-specific structured dict or None on error / short text.
|
||||||
|
"""
|
||||||
|
if not settings.llm_enabled:
|
||||||
|
return None
|
||||||
|
if not text or len(text.strip()) < settings.llm_min_text_length:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Lazy import to avoid a circular: prompt_store -> db.session -> config.
|
||||||
|
from parser_bot import prompt_store
|
||||||
|
|
||||||
|
system = await prompt_store.resolve(vertical, section_slug, default_prompt(vertical))
|
||||||
|
payload = {
|
||||||
|
"model": settings.llm_model,
|
||||||
|
"prompt": _build_user_prompt(text),
|
||||||
|
"system": system,
|
||||||
|
"format": "json",
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_ctx": 4096, "num_predict": 600},
|
||||||
|
}
|
||||||
|
async with _lock:
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=settings.llm_timeout_seconds) as client:
|
||||||
|
r = await client.post(
|
||||||
|
f"{settings.llm_base_url}/api/generate", json=payload
|
||||||
|
)
|
||||||
|
if r.status_code != 200:
|
||||||
|
# Surface the actual server message — most useful one is
|
||||||
|
# `model '...' not found`, which otherwise would just look
|
||||||
|
# like a generic HTTP error and leave the worker to spin.
|
||||||
|
log.warning(
|
||||||
|
"llm_request_failed",
|
||||||
|
status=r.status_code,
|
||||||
|
model=settings.llm_model,
|
||||||
|
vertical=vertical,
|
||||||
|
section=section_slug,
|
||||||
|
body=r.text[:300],
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
data = r.json()
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning(
|
||||||
|
"llm_request_failed", error=str(exc), model=settings.llm_model, vertical=vertical
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
raw = (data.get("response") or "").strip()
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
parsed = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Best effort: extract first {...} block.
|
||||||
|
start, end = raw.find("{"), raw.rfind("}")
|
||||||
|
if start == -1 or end == -1:
|
||||||
|
log.warning("llm_invalid_json", raw=raw[:200], vertical=vertical)
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
parsed = json.loads(raw[start : end + 1])
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
log.warning("llm_invalid_json", raw=raw[:200], vertical=vertical)
|
||||||
|
return None
|
||||||
|
|
||||||
|
if vertical == "hr":
|
||||||
|
return _coerce_hr(parsed)
|
||||||
|
return _coerce_real_estate(parsed)
|
||||||
205
src/parser_bot/main.py
Normal file
205
src/parser_bot/main.py
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
import uvicorn
|
||||||
|
from fastapi import Depends, FastAPI, HTTPException
|
||||||
|
from fastapi.openapi.docs import get_redoc_html, get_swagger_ui_html
|
||||||
|
from fastapi.openapi.utils import get_openapi
|
||||||
|
from fastapi.responses import FileResponse, JSONResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from starlette.types import Scope
|
||||||
|
|
||||||
|
from parser_bot.access import require_admin, require_admin_network
|
||||||
|
from parser_bot.api.routes import router
|
||||||
|
from parser_bot.config import settings
|
||||||
|
from parser_bot.scheduler.poller import build_scheduler
|
||||||
|
from parser_bot.telegram.client import is_authorized, start_client, stop_client
|
||||||
|
|
||||||
|
structlog.configure(
|
||||||
|
processors=[
|
||||||
|
structlog.processors.TimeStamper(fmt="iso"),
|
||||||
|
structlog.processors.add_log_level,
|
||||||
|
structlog.processors.JSONRenderer(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
log = structlog.get_logger()
|
||||||
|
|
||||||
|
STATIC_DIR = Path(__file__).parent / "web" / "static"
|
||||||
|
NOCACHE = {"Cache-Control": "no-cache, must-revalidate"}
|
||||||
|
|
||||||
|
|
||||||
|
class NoCacheStaticFiles(StaticFiles):
|
||||||
|
"""StaticFiles with Cache-Control: no-cache.
|
||||||
|
|
||||||
|
The browser still gets to validate via ETag/Last-Modified (304 is fine),
|
||||||
|
but it will not silently serve a stale JS bundle after a deploy.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def get_response(self, path: str, scope: Scope):
|
||||||
|
response = await super().get_response(path, scope)
|
||||||
|
response.headers["Cache-Control"] = "no-cache, must-revalidate"
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
await start_client()
|
||||||
|
scheduler = build_scheduler()
|
||||||
|
scheduler.start()
|
||||||
|
authorized = await is_authorized()
|
||||||
|
log.info(
|
||||||
|
"startup", poll_interval=settings.poll_interval_seconds, authorized=authorized
|
||||||
|
)
|
||||||
|
if not authorized:
|
||||||
|
log.warning("not_authorized", action="open /auth.html to log in")
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
scheduler.shutdown(wait=False)
|
||||||
|
await stop_client()
|
||||||
|
log.info("shutdown")
|
||||||
|
|
||||||
|
|
||||||
|
def _serve_section_template(vertical_dir: str, page: str) -> FileResponse:
|
||||||
|
"""Resolve a section-scoped URL to a single shared template.
|
||||||
|
|
||||||
|
Sections are dynamic (created via UI), so `/real-estate/dubai/channels.html`
|
||||||
|
can't be a real file. We serve `web/static/<vertical_dir>/section/<page>`
|
||||||
|
for any section slug — the section name is read from the URL by JS.
|
||||||
|
"""
|
||||||
|
target_name = page if page else "index.html"
|
||||||
|
if "/" in target_name or target_name.startswith(".."):
|
||||||
|
raise HTTPException(404)
|
||||||
|
target = STATIC_DIR / vertical_dir / "section" / target_name
|
||||||
|
if not target.is_file():
|
||||||
|
raise HTTPException(404)
|
||||||
|
return FileResponse(target, headers=NOCACHE)
|
||||||
|
|
||||||
|
|
||||||
|
def create_app() -> FastAPI:
|
||||||
|
public_base = settings.public_base_path.rstrip("/")
|
||||||
|
# Disable the default /docs, /redoc and /openapi.json — we serve our own
|
||||||
|
# admin-gated versions below.
|
||||||
|
app = FastAPI(
|
||||||
|
title="parser-tg-bot",
|
||||||
|
lifespan=lifespan,
|
||||||
|
docs_url=None,
|
||||||
|
redoc_url=None,
|
||||||
|
openapi_url=None,
|
||||||
|
)
|
||||||
|
app.include_router(router, prefix="/api/v1")
|
||||||
|
|
||||||
|
@app.get("/healthz")
|
||||||
|
async def healthz() -> dict[str, str]:
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
@app.get("/", include_in_schema=False)
|
||||||
|
async def index() -> FileResponse:
|
||||||
|
return FileResponse(STATIC_DIR / "index.html", headers=NOCACHE)
|
||||||
|
|
||||||
|
# Admin-only: Telegram login page. Registered BEFORE the static catch-all
|
||||||
|
# so the static mount can't accidentally serve it to non-admin visitors.
|
||||||
|
@app.get(
|
||||||
|
"/admin.html",
|
||||||
|
include_in_schema=False,
|
||||||
|
dependencies=[Depends(require_admin_network)],
|
||||||
|
)
|
||||||
|
async def admin_page() -> FileResponse:
|
||||||
|
return FileResponse(STATIC_DIR / "admin.html", headers=NOCACHE)
|
||||||
|
|
||||||
|
@app.get(
|
||||||
|
"/auth.html",
|
||||||
|
include_in_schema=False,
|
||||||
|
dependencies=[Depends(require_admin)],
|
||||||
|
)
|
||||||
|
async def auth_page() -> FileResponse:
|
||||||
|
return FileResponse(STATIC_DIR / "auth.html", headers=NOCACHE)
|
||||||
|
|
||||||
|
# Admin-only: OpenAPI surface. Custom routes so we can wrap them in
|
||||||
|
# `require_admin`; the auto-generated ones from FastAPI bypass it.
|
||||||
|
@app.get(
|
||||||
|
"/openapi.json",
|
||||||
|
include_in_schema=False,
|
||||||
|
dependencies=[Depends(require_admin)],
|
||||||
|
)
|
||||||
|
async def openapi_json() -> JSONResponse:
|
||||||
|
return JSONResponse(
|
||||||
|
get_openapi(
|
||||||
|
title=app.title,
|
||||||
|
version=app.version,
|
||||||
|
openapi_version=app.openapi_version,
|
||||||
|
description=app.description,
|
||||||
|
routes=app.routes,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get(
|
||||||
|
"/docs",
|
||||||
|
include_in_schema=False,
|
||||||
|
dependencies=[Depends(require_admin)],
|
||||||
|
)
|
||||||
|
async def docs() -> FileResponse:
|
||||||
|
return get_swagger_ui_html(
|
||||||
|
openapi_url=f"{public_base}/openapi.json" if public_base else "/openapi.json",
|
||||||
|
title=app.title + " — docs",
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get(
|
||||||
|
"/redoc",
|
||||||
|
include_in_schema=False,
|
||||||
|
dependencies=[Depends(require_admin)],
|
||||||
|
)
|
||||||
|
async def redoc() -> FileResponse:
|
||||||
|
return get_redoc_html(
|
||||||
|
openapi_url=f"{public_base}/openapi.json" if public_base else "/openapi.json",
|
||||||
|
title=app.title + " — redoc",
|
||||||
|
)
|
||||||
|
|
||||||
|
# IMPORTANT: register /static and /media mounts BEFORE the dynamic
|
||||||
|
# vertical/section routes. Starlette matches routes in registration order,
|
||||||
|
# and a generic /{v}/{s}/{page} pattern would otherwise eat /static/*.
|
||||||
|
app.mount("/static", NoCacheStaticFiles(directory=STATIC_DIR), name="static")
|
||||||
|
media_dir = Path(settings.media_dir)
|
||||||
|
media_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
# /media is fine to cache — file names are content-stable.
|
||||||
|
app.mount("/media", StaticFiles(directory=media_dir), name="media")
|
||||||
|
|
||||||
|
# Section-templated dynamic routes, explicit per vertical so /static/*,
|
||||||
|
# /api/*, /media/* (and any future top-level path) can't be captured.
|
||||||
|
@app.get("/real-estate/{section}/", include_in_schema=False)
|
||||||
|
async def re_section_root(section: str) -> FileResponse:
|
||||||
|
return _serve_section_template("real-estate", "index.html")
|
||||||
|
|
||||||
|
@app.get("/real-estate/{section}/{page}", include_in_schema=False)
|
||||||
|
async def re_section_page(section: str, page: str) -> FileResponse:
|
||||||
|
return _serve_section_template("real-estate", page)
|
||||||
|
|
||||||
|
@app.get("/hr/{section}/", include_in_schema=False)
|
||||||
|
async def hr_section_root(section: str) -> FileResponse:
|
||||||
|
return _serve_section_template("hr", "index.html")
|
||||||
|
|
||||||
|
@app.get("/hr/{section}/{page}", include_in_schema=False)
|
||||||
|
async def hr_section_page(section: str, page: str) -> FileResponse:
|
||||||
|
return _serve_section_template("hr", page)
|
||||||
|
|
||||||
|
# Catch-all for top-level static pages (chooser, css, etc.). auth.html is
|
||||||
|
# already handled above, so the static catch-all can't bypass the guard.
|
||||||
|
app.mount("/", NoCacheStaticFiles(directory=STATIC_DIR, html=True), name="pages")
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
app = create_app()
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
uvicorn.run(
|
||||||
|
"parser_bot.main:app",
|
||||||
|
host=settings.api_host,
|
||||||
|
port=settings.api_port,
|
||||||
|
log_config=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
130
src/parser_bot/prompt_store.py
Normal file
130
src/parser_bot/prompt_store.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
"""Runtime-editable LLM system prompts, persisted in app_settings.
|
||||||
|
|
||||||
|
Three resolution levels with fallback (more specific → less specific):
|
||||||
|
1. `llm_system_prompt:<vertical>:<section_slug>` — section override
|
||||||
|
2. `llm_system_prompt:<vertical>` — vertical override
|
||||||
|
3. built-in DEFAULT_RE_SYSTEM_PROMPT / DEFAULT_HR_SYSTEM_PROMPT
|
||||||
|
|
||||||
|
The prompt is read on every classification call but cached for a short
|
||||||
|
window so the DB isn't hit per-message. Edits via the API invalidate the
|
||||||
|
cache for that level, so a save in the UI takes effect within seconds.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||||
|
|
||||||
|
from parser_bot.db.models import AppSetting
|
||||||
|
from parser_bot.db.session import session_scope
|
||||||
|
|
||||||
|
Vertical = Literal["real_estate", "hr"]
|
||||||
|
|
||||||
|
_KEY_PREFIX = "llm_system_prompt:"
|
||||||
|
_CACHE_TTL_S = 5.0
|
||||||
|
_cache: dict[str, tuple[float, str | None]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _key(vertical: Vertical, section_slug: str | None = None) -> str:
|
||||||
|
if section_slug:
|
||||||
|
return f"{_KEY_PREFIX}{vertical}:{section_slug}"
|
||||||
|
return f"{_KEY_PREFIX}{vertical}"
|
||||||
|
|
||||||
|
|
||||||
|
async def _load(key: str) -> str | None:
|
||||||
|
"""Read a stored prompt by exact key. None if missing or empty."""
|
||||||
|
now = time.monotonic()
|
||||||
|
cached_at, cached_value = _cache.get(key, (0.0, None))
|
||||||
|
if now - cached_at < _CACHE_TTL_S:
|
||||||
|
return cached_value
|
||||||
|
|
||||||
|
async with session_scope() as session:
|
||||||
|
row = await session.execute(
|
||||||
|
select(AppSetting.value).where(AppSetting.key == key)
|
||||||
|
)
|
||||||
|
value = row.scalar_one_or_none()
|
||||||
|
|
||||||
|
text = value if isinstance(value, str) and value.strip() else None
|
||||||
|
_cache[key] = (now, text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
async def resolve(
|
||||||
|
vertical: Vertical, section_slug: str | None, default: str
|
||||||
|
) -> str:
|
||||||
|
"""Pick the most specific prompt available, falling back to `default`.
|
||||||
|
|
||||||
|
Always consults section-level → vertical-level → default. This is what
|
||||||
|
the classifier uses for every message.
|
||||||
|
"""
|
||||||
|
if section_slug:
|
||||||
|
text = await _load(_key(vertical, section_slug))
|
||||||
|
if text is not None:
|
||||||
|
return text
|
||||||
|
text = await _load(_key(vertical))
|
||||||
|
if text is not None:
|
||||||
|
return text
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
async def get(
|
||||||
|
vertical: Vertical, section_slug: str | None, default: str
|
||||||
|
) -> tuple[str, str]:
|
||||||
|
"""For the settings UI: return (text, source) where source is one of
|
||||||
|
'section' | 'vertical' | 'default'. Lets the editor show which override
|
||||||
|
is currently active without a second round-trip.
|
||||||
|
"""
|
||||||
|
if section_slug:
|
||||||
|
text = await _load(_key(vertical, section_slug))
|
||||||
|
if text is not None:
|
||||||
|
return text, "section"
|
||||||
|
text = await _load(_key(vertical))
|
||||||
|
if text is not None:
|
||||||
|
return text, "vertical"
|
||||||
|
return default, "default"
|
||||||
|
|
||||||
|
|
||||||
|
async def set_prompt(
|
||||||
|
vertical: Vertical, section_slug: str | None, text: str
|
||||||
|
) -> None:
|
||||||
|
"""Save a new prompt at the given level (section or vertical)."""
|
||||||
|
if not isinstance(text, str) or not text.strip():
|
||||||
|
raise ValueError("prompt must be a non-empty string")
|
||||||
|
key = _key(vertical, section_slug)
|
||||||
|
async with session_scope() as session:
|
||||||
|
stmt = (
|
||||||
|
pg_insert(AppSetting)
|
||||||
|
.values(key=key, value=text)
|
||||||
|
.on_conflict_do_update(
|
||||||
|
index_elements=["key"], set_={"value": text}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await session.execute(stmt)
|
||||||
|
invalidate(key)
|
||||||
|
|
||||||
|
|
||||||
|
async def reset(vertical: Vertical, section_slug: str | None) -> None:
|
||||||
|
"""Drop the override at the given level."""
|
||||||
|
key = _key(vertical, section_slug)
|
||||||
|
async with session_scope() as session:
|
||||||
|
await session.execute(
|
||||||
|
AppSetting.__table__.delete().where(AppSetting.key == key)
|
||||||
|
)
|
||||||
|
invalidate(key)
|
||||||
|
|
||||||
|
|
||||||
|
def invalidate(key: str | None = None) -> None:
|
||||||
|
if key is None:
|
||||||
|
_cache.clear()
|
||||||
|
else:
|
||||||
|
_cache.pop(key, None)
|
||||||
|
|
||||||
|
|
||||||
|
async def is_overridden(
|
||||||
|
vertical: Vertical, section_slug: str | None = None
|
||||||
|
) -> bool:
|
||||||
|
"""True iff a custom prompt is stored at this exact level."""
|
||||||
|
text = await _load(_key(vertical, section_slug))
|
||||||
|
return text is not None
|
||||||
0
src/parser_bot/scheduler/__init__.py
Normal file
0
src/parser_bot/scheduler/__init__.py
Normal file
349
src/parser_bot/scheduler/poller.py
Normal file
349
src/parser_bot/scheduler/poller.py
Normal file
@@ -0,0 +1,349 @@
|
|||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
|
from sqlalchemy import func, select
|
||||||
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||||
|
|
||||||
|
from parser_bot.config import settings
|
||||||
|
from parser_bot.db.models import Channel, Message, Section
|
||||||
|
from parser_bot.db.session import session_scope
|
||||||
|
from parser_bot.extractors import analyze, analyze_with_llm
|
||||||
|
from parser_bot.telegram.client import (
|
||||||
|
fetch_new_messages,
|
||||||
|
fetch_specific_messages_with_media,
|
||||||
|
is_authorized,
|
||||||
|
resolve_channel,
|
||||||
|
)
|
||||||
|
|
||||||
|
log = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def _verdict_key(vertical: str) -> str:
|
||||||
|
"""JSONB key under `extracted` where the LLM verdict lives for this vertical."""
|
||||||
|
return "hr_lead" if vertical == "hr" else "lead"
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_work_clause(vertical: str | None):
|
||||||
|
"""Rows that still need LLM classification.
|
||||||
|
|
||||||
|
A row needs work when:
|
||||||
|
- extracted IS NULL (never analyzed), or
|
||||||
|
- the verdict for this vertical is missing.
|
||||||
|
|
||||||
|
Without `vertical`, falls back to "missing any verdict" — used by
|
||||||
|
aggregate /llm/queue display when no vertical is selected.
|
||||||
|
"""
|
||||||
|
if vertical is None:
|
||||||
|
return (Message.extracted.is_(None)) | (
|
||||||
|
Message.extracted["lead"].is_(None) & Message.extracted["hr_lead"].is_(None)
|
||||||
|
)
|
||||||
|
key = _verdict_key(vertical)
|
||||||
|
return (Message.extracted.is_(None)) | (Message.extracted[key].is_(None))
|
||||||
|
|
||||||
|
|
||||||
|
async def poll_channel(channel_id: int) -> int:
|
||||||
|
"""Poll one channel for new messages. Returns count of inserted rows."""
|
||||||
|
async with session_scope() as session:
|
||||||
|
channel = await session.get(Channel, channel_id)
|
||||||
|
if channel is None or not channel.is_active:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if channel.tg_id is None or channel.title is None:
|
||||||
|
resolved = await resolve_channel(channel.identifier)
|
||||||
|
channel.tg_id = resolved.tg_id
|
||||||
|
channel.title = resolved.title
|
||||||
|
|
||||||
|
msgs = await fetch_new_messages(
|
||||||
|
channel.identifier,
|
||||||
|
min_id=channel.last_message_id,
|
||||||
|
limit=settings.poll_history_limit,
|
||||||
|
download_media_for_channel_id=channel.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
for m in msgs:
|
||||||
|
# Only the cheap regex pass runs in the poll path. LLM classification
|
||||||
|
# is handled by `classify_pending` in a background scheduler job so
|
||||||
|
# that a poll request never blocks on a 5s/message LLM call.
|
||||||
|
stmt = (
|
||||||
|
pg_insert(Message)
|
||||||
|
.values(
|
||||||
|
channel_id=channel.id,
|
||||||
|
tg_message_id=m.tg_message_id,
|
||||||
|
date=m.date,
|
||||||
|
text=m.text,
|
||||||
|
sender_id=m.sender_id,
|
||||||
|
sender_username=m.sender_username,
|
||||||
|
sender_name=m.sender_name,
|
||||||
|
grouped_id=m.grouped_id,
|
||||||
|
has_media=m.has_media,
|
||||||
|
views=m.views,
|
||||||
|
forwards=m.forwards,
|
||||||
|
raw=m.raw,
|
||||||
|
media_files=m.media_files or None,
|
||||||
|
extracted=analyze(m.text) if m.text else None,
|
||||||
|
)
|
||||||
|
.on_conflict_do_nothing(index_elements=["channel_id", "tg_message_id"])
|
||||||
|
)
|
||||||
|
result = await session.execute(stmt)
|
||||||
|
inserted += result.rowcount or 0
|
||||||
|
|
||||||
|
if msgs:
|
||||||
|
channel.last_message_id = max(
|
||||||
|
channel.last_message_id or 0, msgs[-1].tg_message_id
|
||||||
|
)
|
||||||
|
channel.last_polled_at = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"polled_channel",
|
||||||
|
channel=channel.identifier,
|
||||||
|
vertical=channel.vertical,
|
||||||
|
fetched=len(msgs),
|
||||||
|
inserted=inserted,
|
||||||
|
)
|
||||||
|
return inserted
|
||||||
|
|
||||||
|
|
||||||
|
async def poll_all() -> None:
|
||||||
|
if not await is_authorized():
|
||||||
|
log.debug("poll_skipped_not_authorized")
|
||||||
|
return
|
||||||
|
|
||||||
|
async with session_scope() as session:
|
||||||
|
result = await session.execute(select(Channel.id).where(Channel.is_active.is_(True)))
|
||||||
|
ids = [row[0] for row in result.all()]
|
||||||
|
|
||||||
|
for channel_id in ids:
|
||||||
|
try:
|
||||||
|
await poll_channel(channel_id)
|
||||||
|
except Exception as exc:
|
||||||
|
log.error("poll_failed", channel_id=channel_id, error=str(exc))
|
||||||
|
|
||||||
|
|
||||||
|
async def backfill_media(channel_id: int, batch_size: int = 50) -> dict[str, int]:
|
||||||
|
"""Re-download media for messages with has_media=True but media_files IS NULL.
|
||||||
|
|
||||||
|
Goes through one batch (oldest-first by tg_message_id) at a time so the call
|
||||||
|
stays bounded; the UI can press the button repeatedly until 'pending' is 0.
|
||||||
|
"""
|
||||||
|
if not await is_authorized():
|
||||||
|
raise RuntimeError("not authorized")
|
||||||
|
|
||||||
|
async with session_scope() as session:
|
||||||
|
channel = await session.get(Channel, channel_id)
|
||||||
|
if channel is None:
|
||||||
|
raise RuntimeError("channel not found")
|
||||||
|
|
||||||
|
pending_q = select(func.count(Message.id)).where(
|
||||||
|
Message.channel_id == channel_id,
|
||||||
|
Message.has_media.is_(True),
|
||||||
|
Message.media_files.is_(None),
|
||||||
|
)
|
||||||
|
pending_total = (await session.execute(pending_q)).scalar_one()
|
||||||
|
|
||||||
|
rows = (
|
||||||
|
await session.execute(
|
||||||
|
select(Message.id, Message.tg_message_id)
|
||||||
|
.where(
|
||||||
|
Message.channel_id == channel_id,
|
||||||
|
Message.has_media.is_(True),
|
||||||
|
Message.media_files.is_(None),
|
||||||
|
)
|
||||||
|
.order_by(Message.tg_message_id.asc())
|
||||||
|
.limit(batch_size)
|
||||||
|
)
|
||||||
|
).all()
|
||||||
|
if not rows:
|
||||||
|
return {"updated": 0, "pending": 0}
|
||||||
|
|
||||||
|
tg_ids = [r.tg_message_id for r in rows]
|
||||||
|
results = await fetch_specific_messages_with_media(
|
||||||
|
channel.identifier, tg_ids, channel_id
|
||||||
|
)
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
for db_id, tg_id in rows:
|
||||||
|
files = results.get(tg_id)
|
||||||
|
if not files:
|
||||||
|
continue
|
||||||
|
msg = await session.get(Message, db_id)
|
||||||
|
if msg is None:
|
||||||
|
continue
|
||||||
|
msg.media_files = files
|
||||||
|
updated += 1
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"backfill_media",
|
||||||
|
channel_id=channel_id,
|
||||||
|
updated=updated,
|
||||||
|
remaining=max(0, pending_total - updated),
|
||||||
|
)
|
||||||
|
return {"updated": updated, "pending": max(0, pending_total - updated)}
|
||||||
|
|
||||||
|
|
||||||
|
async def reanalyze_channel(channel_id: int, batch_size: int = 5) -> dict[str, int]:
|
||||||
|
"""Re-run extractors (regex + LLM) over messages missing this channel's verdict.
|
||||||
|
|
||||||
|
Picks the vertical AND section from the channel row so the right LLM
|
||||||
|
prompt is used. Only reanalyzes rows where the corresponding verdict key
|
||||||
|
is missing. Newest first so fresh leads surface during long backfills.
|
||||||
|
"""
|
||||||
|
async with session_scope() as session:
|
||||||
|
result = await session.execute(
|
||||||
|
select(Channel, Section.slug)
|
||||||
|
.join(Section, Section.id == Channel.section_id)
|
||||||
|
.where(Channel.id == channel_id)
|
||||||
|
)
|
||||||
|
row = result.one_or_none()
|
||||||
|
if row is None:
|
||||||
|
return {"updated": 0, "pending": 0}
|
||||||
|
channel, section_slug = row
|
||||||
|
vertical = channel.vertical
|
||||||
|
needs_work = _needs_work_clause(vertical)
|
||||||
|
|
||||||
|
pending_total = (
|
||||||
|
await session.execute(
|
||||||
|
select(func.count(Message.id)).where(
|
||||||
|
Message.channel_id == channel_id,
|
||||||
|
Message.text.is_not(None),
|
||||||
|
needs_work,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
).scalar_one()
|
||||||
|
|
||||||
|
rows = (
|
||||||
|
await session.execute(
|
||||||
|
select(Message.id, Message.text)
|
||||||
|
.where(
|
||||||
|
Message.channel_id == channel_id,
|
||||||
|
Message.text.is_not(None),
|
||||||
|
needs_work,
|
||||||
|
)
|
||||||
|
.order_by(Message.id.desc())
|
||||||
|
.limit(batch_size)
|
||||||
|
)
|
||||||
|
).all()
|
||||||
|
if not rows:
|
||||||
|
return {"updated": 0, "pending": 0}
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
for db_id, text in rows:
|
||||||
|
extracted = (
|
||||||
|
await analyze_with_llm(text, vertical, section_slug)
|
||||||
|
if settings.llm_enabled
|
||||||
|
else analyze(text)
|
||||||
|
)
|
||||||
|
msg = await session.get(Message, db_id)
|
||||||
|
if msg is None:
|
||||||
|
continue
|
||||||
|
msg.extracted = extracted
|
||||||
|
updated += 1
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"reanalyzed_channel",
|
||||||
|
channel_id=channel_id,
|
||||||
|
vertical=vertical,
|
||||||
|
section=section_slug,
|
||||||
|
updated=updated,
|
||||||
|
remaining=max(0, pending_total - updated),
|
||||||
|
)
|
||||||
|
return {"updated": updated, "pending": max(0, pending_total - updated)}
|
||||||
|
|
||||||
|
|
||||||
|
async def pending_llm_count(
|
||||||
|
vertical: str | None = None, section_slug: str | None = None
|
||||||
|
) -> int:
|
||||||
|
"""How many text messages still need LLM classification.
|
||||||
|
|
||||||
|
When `vertical` is set, only counts messages from channels of that vertical
|
||||||
|
(and optionally that section) whose vertical-specific verdict is missing.
|
||||||
|
"""
|
||||||
|
if not settings.llm_enabled:
|
||||||
|
return 0
|
||||||
|
needs_work = _needs_work_clause(vertical)
|
||||||
|
async with session_scope() as session:
|
||||||
|
stmt = select(func.count(Message.id)).where(
|
||||||
|
Message.text.is_not(None),
|
||||||
|
needs_work,
|
||||||
|
)
|
||||||
|
if vertical is not None:
|
||||||
|
stmt = stmt.join(Channel, Channel.id == Message.channel_id).where(
|
||||||
|
Channel.vertical == vertical
|
||||||
|
)
|
||||||
|
if section_slug is not None:
|
||||||
|
stmt = stmt.join(Section, Section.id == Channel.section_id).where(
|
||||||
|
Section.slug == section_slug
|
||||||
|
)
|
||||||
|
return (await session.execute(stmt)).scalar_one()
|
||||||
|
|
||||||
|
|
||||||
|
async def classify_pending(batch_size: int = 5) -> int:
|
||||||
|
"""Run LLM over a batch of unclassified messages across all channels.
|
||||||
|
|
||||||
|
Walks newest-first and picks the prompt/vertical/section from each
|
||||||
|
message's channel, so RE and HR channels (and per-section overrides)
|
||||||
|
share the same classifier worker without crosstalk.
|
||||||
|
"""
|
||||||
|
if not settings.llm_enabled:
|
||||||
|
return 0
|
||||||
|
needs_work = _needs_work_clause(None)
|
||||||
|
|
||||||
|
async with session_scope() as session:
|
||||||
|
rows = (
|
||||||
|
await session.execute(
|
||||||
|
select(Message.id, Message.text, Channel.vertical, Section.slug)
|
||||||
|
.join(Channel, Channel.id == Message.channel_id)
|
||||||
|
.join(Section, Section.id == Channel.section_id)
|
||||||
|
.where(Message.text.is_not(None), needs_work)
|
||||||
|
.order_by(Message.id.desc())
|
||||||
|
.limit(batch_size)
|
||||||
|
)
|
||||||
|
).all()
|
||||||
|
if not rows:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
for db_id, text, vertical, section_slug in rows:
|
||||||
|
# If extracted already has THIS vertical's verdict, skip — needs_work
|
||||||
|
# uses an OR over both keys and would otherwise re-run RE channels
|
||||||
|
# that already have a lead just because hr_lead is null.
|
||||||
|
existing = (
|
||||||
|
await session.execute(select(Message.extracted).where(Message.id == db_id))
|
||||||
|
).scalar_one_or_none()
|
||||||
|
key = _verdict_key(vertical)
|
||||||
|
if existing and existing.get(key) is not None:
|
||||||
|
continue
|
||||||
|
extracted = await analyze_with_llm(text, vertical, section_slug)
|
||||||
|
msg = await session.get(Message, db_id)
|
||||||
|
if msg is None:
|
||||||
|
continue
|
||||||
|
msg.extracted = extracted
|
||||||
|
updated += 1
|
||||||
|
|
||||||
|
if updated:
|
||||||
|
log.info("classify_pending_batch", updated=updated)
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def build_scheduler() -> AsyncIOScheduler:
|
||||||
|
scheduler = AsyncIOScheduler()
|
||||||
|
scheduler.add_job(
|
||||||
|
poll_all,
|
||||||
|
"interval",
|
||||||
|
seconds=settings.poll_interval_seconds,
|
||||||
|
id="poll_all",
|
||||||
|
max_instances=1,
|
||||||
|
coalesce=True,
|
||||||
|
)
|
||||||
|
if settings.llm_enabled:
|
||||||
|
scheduler.add_job(
|
||||||
|
classify_pending,
|
||||||
|
"interval",
|
||||||
|
seconds=settings.llm_classify_interval_seconds,
|
||||||
|
id="classify_pending",
|
||||||
|
max_instances=1,
|
||||||
|
coalesce=True,
|
||||||
|
kwargs={"batch_size": settings.llm_classify_batch_size},
|
||||||
|
)
|
||||||
|
return scheduler
|
||||||
0
src/parser_bot/telegram/__init__.py
Normal file
0
src/parser_bot/telegram/__init__.py
Normal file
319
src/parser_bot/telegram/client.py
Normal file
319
src/parser_bot/telegram/client.py
Normal file
@@ -0,0 +1,319 @@
|
|||||||
|
import json
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
from telethon import TelegramClient
|
||||||
|
from telethon.sessions import StringSession
|
||||||
|
from telethon.tl.types import Channel as TgChannel
|
||||||
|
from telethon.tl.types import Message as TgMessage
|
||||||
|
from telethon.tl.types import (
|
||||||
|
MessageMediaDocument,
|
||||||
|
MessageMediaPhoto,
|
||||||
|
)
|
||||||
|
|
||||||
|
from parser_bot.config import settings
|
||||||
|
|
||||||
|
log = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def _json_safe(value: Any) -> Any:
|
||||||
|
"""Coerce Telethon's to_dict() output into JSONB-safe primitives."""
|
||||||
|
return json.loads(json.dumps(value, default=str))
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class ResolvedChannel:
|
||||||
|
tg_id: int
|
||||||
|
title: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class FetchedMessage:
|
||||||
|
tg_message_id: int
|
||||||
|
date: datetime
|
||||||
|
text: str | None
|
||||||
|
sender_id: int | None
|
||||||
|
sender_username: str | None
|
||||||
|
sender_name: str | None
|
||||||
|
grouped_id: int | None
|
||||||
|
has_media: bool
|
||||||
|
views: int | None
|
||||||
|
forwards: int | None
|
||||||
|
raw: dict
|
||||||
|
media_files: list[dict] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _sender_info(msg: TgMessage) -> tuple[str | None, str | None]:
|
||||||
|
"""Best-effort sender username + display name from a Telethon Message.
|
||||||
|
|
||||||
|
For public channel posts the "sender" is usually the channel itself —
|
||||||
|
in that case we fall back to `post_author` (the optional signature on
|
||||||
|
signed posts) so the operator at least knows who signed it.
|
||||||
|
"""
|
||||||
|
username: str | None = None
|
||||||
|
name: str | None = None
|
||||||
|
sender = msg.sender
|
||||||
|
if sender is not None:
|
||||||
|
username = getattr(sender, "username", None)
|
||||||
|
first = getattr(sender, "first_name", None)
|
||||||
|
last = getattr(sender, "last_name", None)
|
||||||
|
title = getattr(sender, "title", None)
|
||||||
|
if first or last:
|
||||||
|
name = " ".join(p for p in (first, last) if p)
|
||||||
|
elif title:
|
||||||
|
name = title
|
||||||
|
post_author = getattr(msg, "post_author", None)
|
||||||
|
if not name and post_author:
|
||||||
|
name = post_author
|
||||||
|
return username, name
|
||||||
|
|
||||||
|
|
||||||
|
def _media_kind(media: Any) -> str:
|
||||||
|
if isinstance(media, MessageMediaPhoto):
|
||||||
|
return "photo"
|
||||||
|
if isinstance(media, MessageMediaDocument):
|
||||||
|
doc = getattr(media, "document", None)
|
||||||
|
mime = (getattr(doc, "mime_type", "") or "").lower()
|
||||||
|
if mime.startswith("video/"):
|
||||||
|
return "video"
|
||||||
|
if mime.startswith("audio/"):
|
||||||
|
return "audio"
|
||||||
|
if mime == "image/webp":
|
||||||
|
return "sticker"
|
||||||
|
return "document"
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def _media_size(media: Any) -> int | None:
|
||||||
|
doc = getattr(media, "document", None)
|
||||||
|
if doc is not None:
|
||||||
|
return getattr(doc, "size", None)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _media_mime(media: Any) -> str | None:
|
||||||
|
doc = getattr(media, "document", None)
|
||||||
|
if doc is not None:
|
||||||
|
return getattr(doc, "mime_type", None)
|
||||||
|
if isinstance(media, MessageMediaPhoto):
|
||||||
|
return "image/jpeg"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _download_message_media(
|
||||||
|
client: TelegramClient, msg: TgMessage, channel_id: int
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Download media from a single message into <media_dir>/<channel_id>/.
|
||||||
|
|
||||||
|
Returns a list of dicts: {kind, url?, mime?, size?, skipped?}.
|
||||||
|
Large documents/videos are skipped to avoid eating disk; metadata is kept
|
||||||
|
so the UI can still show that media existed.
|
||||||
|
"""
|
||||||
|
if msg.media is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
kind = _media_kind(msg.media)
|
||||||
|
size = _media_size(msg.media)
|
||||||
|
mime = _media_mime(msg.media)
|
||||||
|
info: dict = {"kind": kind, "mime": mime, "size": size}
|
||||||
|
|
||||||
|
if size is not None and size > settings.media_max_bytes:
|
||||||
|
info["skipped"] = "too_large"
|
||||||
|
return [info]
|
||||||
|
|
||||||
|
target_dir = Path(settings.media_dir) / str(channel_id)
|
||||||
|
target_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
prefix = target_dir / f"{msg.id}"
|
||||||
|
try:
|
||||||
|
path = await client.download_media(msg, file=str(prefix))
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("media_download_failed", msg_id=msg.id, error=str(exc))
|
||||||
|
info["skipped"] = "download_error"
|
||||||
|
return [info]
|
||||||
|
if path is None:
|
||||||
|
info["skipped"] = "no_file"
|
||||||
|
return [info]
|
||||||
|
filename = Path(path).name
|
||||||
|
public_base = settings.public_base_path.rstrip("/")
|
||||||
|
info["url"] = f"{public_base}/media/{channel_id}/{filename}"
|
||||||
|
return [info]
|
||||||
|
|
||||||
|
|
||||||
|
_client: TelegramClient | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_client() -> TelegramClient:
|
||||||
|
"""Build a Telethon client. Prefer StringSession from env (k8s-friendly),
|
||||||
|
fall back to file-based session at TG_SESSION_PATH for local dev."""
|
||||||
|
global _client
|
||||||
|
if _client is None:
|
||||||
|
session = (
|
||||||
|
StringSession(settings.tg_session_string)
|
||||||
|
if settings.tg_session_string
|
||||||
|
else settings.tg_session_path
|
||||||
|
)
|
||||||
|
_client = TelegramClient(session, settings.tg_api_id, settings.tg_api_hash)
|
||||||
|
return _client
|
||||||
|
|
||||||
|
|
||||||
|
async def start_client() -> TelegramClient:
|
||||||
|
"""Connect Telethon. Does NOT require authorization — connecting an
|
||||||
|
unauthorized client is fine and is a prerequisite for the web login flow.
|
||||||
|
Callers that need an authorized client must use `require_authorized()`.
|
||||||
|
"""
|
||||||
|
client = get_client()
|
||||||
|
if not client.is_connected():
|
||||||
|
await client.connect()
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
async def stop_client() -> None:
|
||||||
|
global _client
|
||||||
|
if _client is not None and _client.is_connected():
|
||||||
|
await _client.disconnect()
|
||||||
|
_client = None
|
||||||
|
|
||||||
|
|
||||||
|
async def require_authorized() -> TelegramClient:
|
||||||
|
client = await start_client()
|
||||||
|
if not await client.is_user_authorized():
|
||||||
|
raise RuntimeError("not authorized: complete login at /auth.html")
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
async def is_authorized() -> bool:
|
||||||
|
client = await start_client()
|
||||||
|
return await client.is_user_authorized()
|
||||||
|
|
||||||
|
|
||||||
|
async def current_username() -> str | None:
|
||||||
|
client = await start_client()
|
||||||
|
if not await client.is_user_authorized():
|
||||||
|
return None
|
||||||
|
me = await client.get_me()
|
||||||
|
if me is None:
|
||||||
|
return None
|
||||||
|
return me.username or str(me.id)
|
||||||
|
|
||||||
|
|
||||||
|
_pending_phone_code_hash: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
async def send_login_code() -> None:
|
||||||
|
"""Step 1: ask Telegram to send the login code to TG_PHONE."""
|
||||||
|
global _pending_phone_code_hash
|
||||||
|
client = await start_client()
|
||||||
|
if await client.is_user_authorized():
|
||||||
|
raise RuntimeError("already authorized")
|
||||||
|
sent = await client.send_code_request(settings.tg_phone)
|
||||||
|
_pending_phone_code_hash = sent.phone_code_hash
|
||||||
|
|
||||||
|
|
||||||
|
async def submit_login_code(code: str) -> bool:
|
||||||
|
"""Step 2: submit the code. Returns True if 2FA password is still required."""
|
||||||
|
global _pending_phone_code_hash
|
||||||
|
if _pending_phone_code_hash is None:
|
||||||
|
raise RuntimeError("no pending login: call send-code first")
|
||||||
|
client = await start_client()
|
||||||
|
from telethon.errors import SessionPasswordNeededError
|
||||||
|
|
||||||
|
try:
|
||||||
|
await client.sign_in(
|
||||||
|
phone=settings.tg_phone,
|
||||||
|
code=code,
|
||||||
|
phone_code_hash=_pending_phone_code_hash,
|
||||||
|
)
|
||||||
|
except SessionPasswordNeededError:
|
||||||
|
return True
|
||||||
|
_pending_phone_code_hash = None
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def submit_login_password(password: str) -> None:
|
||||||
|
"""Step 3 (only if 2FA): submit the cloud password."""
|
||||||
|
global _pending_phone_code_hash
|
||||||
|
client = await start_client()
|
||||||
|
await client.sign_in(password=password)
|
||||||
|
_pending_phone_code_hash = None
|
||||||
|
|
||||||
|
|
||||||
|
async def logout() -> None:
|
||||||
|
global _pending_phone_code_hash
|
||||||
|
client = await start_client()
|
||||||
|
if await client.is_user_authorized():
|
||||||
|
await client.log_out()
|
||||||
|
_pending_phone_code_hash = None
|
||||||
|
|
||||||
|
|
||||||
|
async def resolve_channel(identifier: str) -> ResolvedChannel:
|
||||||
|
client = await start_client()
|
||||||
|
entity = await client.get_entity(identifier)
|
||||||
|
if not isinstance(entity, TgChannel):
|
||||||
|
raise ValueError(f"{identifier!r} is not a channel")
|
||||||
|
return ResolvedChannel(tg_id=entity.id, title=entity.title or identifier)
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_specific_messages_with_media(
|
||||||
|
identifier: str, tg_message_ids: list[int], channel_id: int
|
||||||
|
) -> dict[int, list[dict]]:
|
||||||
|
"""Re-fetch a list of specific messages by id and download their media.
|
||||||
|
|
||||||
|
Returns {tg_message_id: media_files} for messages whose media was
|
||||||
|
successfully resolved (skipped or downloaded). Used by the backfill flow
|
||||||
|
for messages that were saved before media-download was implemented.
|
||||||
|
"""
|
||||||
|
client = await require_authorized()
|
||||||
|
entity = await client.get_entity(identifier)
|
||||||
|
out: dict[int, list[dict]] = {}
|
||||||
|
msgs = await client.get_messages(entity, ids=list(tg_message_ids))
|
||||||
|
for msg in msgs:
|
||||||
|
if msg is None or not isinstance(msg, TgMessage) or msg.media is None:
|
||||||
|
continue
|
||||||
|
out[msg.id] = await _download_message_media(client, msg, channel_id)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_new_messages(
|
||||||
|
identifier: str,
|
||||||
|
min_id: int | None,
|
||||||
|
limit: int,
|
||||||
|
download_media_for_channel_id: int | None = None,
|
||||||
|
) -> list[FetchedMessage]:
|
||||||
|
client = await start_client()
|
||||||
|
entity = await client.get_entity(identifier)
|
||||||
|
kwargs = {"limit": limit}
|
||||||
|
if min_id is not None:
|
||||||
|
kwargs["min_id"] = min_id
|
||||||
|
out: list[FetchedMessage] = []
|
||||||
|
async for msg in client.iter_messages(entity, **kwargs):
|
||||||
|
if not isinstance(msg, TgMessage):
|
||||||
|
continue
|
||||||
|
media_files: list[dict] = []
|
||||||
|
if msg.media is not None and download_media_for_channel_id is not None:
|
||||||
|
media_files = await _download_message_media(
|
||||||
|
client, msg, download_media_for_channel_id
|
||||||
|
)
|
||||||
|
sender_username, sender_name = _sender_info(msg)
|
||||||
|
out.append(
|
||||||
|
FetchedMessage(
|
||||||
|
tg_message_id=msg.id,
|
||||||
|
date=msg.date,
|
||||||
|
text=msg.message,
|
||||||
|
sender_id=getattr(msg.sender_id, "user_id", msg.sender_id)
|
||||||
|
if msg.sender_id is not None
|
||||||
|
else None,
|
||||||
|
sender_username=sender_username,
|
||||||
|
sender_name=sender_name,
|
||||||
|
grouped_id=getattr(msg, "grouped_id", None),
|
||||||
|
has_media=msg.media is not None,
|
||||||
|
views=msg.views,
|
||||||
|
forwards=msg.forwards,
|
||||||
|
raw=_json_safe(msg.to_dict()),
|
||||||
|
media_files=media_files,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
out.sort(key=lambda m: m.tg_message_id)
|
||||||
|
return out
|
||||||
36
src/parser_bot/web/static/admin.html
Normal file
36
src/parser_bot/web/static/admin.html
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Админ — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>parser-tg-bot</h1>
|
||||||
|
<nav>
|
||||||
|
<a href="/api/monitoring-tg/">Разделы</a>
|
||||||
|
<a class="admin-login-link active" href="/api/monitoring-tg/admin.html">Админ</a>
|
||||||
|
<a class="admin-link" href="/api/monitoring-tg/auth.html">Авторизация</a>
|
||||||
|
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
|
||||||
|
</nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<h2>Админ-доступ</h2>
|
||||||
|
|
||||||
|
<div class="card" style="max-width:520px">
|
||||||
|
<div id="admin-status" class="muted" style="margin-bottom:12px">Проверка...</div>
|
||||||
|
<form id="admin-form" class="row">
|
||||||
|
<input type="password" id="admin-password" autocomplete="current-password"
|
||||||
|
placeholder="Админ пароль" required style="flex:1; min-width:220px" />
|
||||||
|
<button type="submit">Войти</button>
|
||||||
|
</form>
|
||||||
|
<div class="row" style="margin-top:12px">
|
||||||
|
<button id="admin-logout" class="secondary" type="button">Выйти</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/admin.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
85
src/parser_bot/web/static/auth.html
Normal file
85
src/parser_bot/web/static/auth.html
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Авторизация — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>parser-tg-bot</h1>
|
||||||
|
<nav>
|
||||||
|
<a href="/api/monitoring-tg/">Разделы</a>
|
||||||
|
<a href="/api/monitoring-tg/real-estate/">🏠 Недвижимость</a>
|
||||||
|
<a href="/api/monitoring-tg/hr/">👥 HR</a>
|
||||||
|
<a class="admin-login-link" href="/api/monitoring-tg/admin.html">Админ</a>
|
||||||
|
<a class="admin-link active" href="/api/monitoring-tg/auth.html">Авторизация</a>
|
||||||
|
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
|
||||||
|
</nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<h2>Авторизация Telegram</h2>
|
||||||
|
|
||||||
|
<div class="card" style="max-width:520px">
|
||||||
|
<div id="status-block">
|
||||||
|
<div class="empty">Проверка статуса...</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="step-idle" hidden>
|
||||||
|
<p>
|
||||||
|
Не авторизовано. Номер из конфигурации: <span class="mono" id="phone"></span>.
|
||||||
|
Нажми кнопку ниже — Telegram пришлёт одноразовый код на этот номер.
|
||||||
|
</p>
|
||||||
|
<button id="btn-send">Отправить код</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="step-code" hidden>
|
||||||
|
<p>Код отправлен на <span class="mono" id="phone-2"></span>. Введи его:</p>
|
||||||
|
<form id="form-code" class="row">
|
||||||
|
<input type="text" id="code" inputmode="numeric" autocomplete="one-time-code"
|
||||||
|
placeholder="12345" required style="flex:1; min-width:160px" />
|
||||||
|
<button type="submit">Подтвердить</button>
|
||||||
|
</form>
|
||||||
|
<button id="btn-resend" class="secondary" style="margin-top:8px">
|
||||||
|
Запросить код повторно
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="step-password" hidden>
|
||||||
|
<p>На аккаунте включён 2FA. Введи облачный пароль Telegram:</p>
|
||||||
|
<form id="form-password" class="row">
|
||||||
|
<input type="password" id="password" autocomplete="current-password"
|
||||||
|
required style="flex:1; min-width:200px" />
|
||||||
|
<button type="submit">Войти</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="step-done" hidden>
|
||||||
|
<p>
|
||||||
|
Авторизовано как <span class="mono" id="username"></span>.
|
||||||
|
Парсер начнёт опрашивать каналы согласно расписанию.
|
||||||
|
</p>
|
||||||
|
<div class="row">
|
||||||
|
<a id="return-link" href="/api/monitoring-tg/"><button>Перейти к разделам</button></a>
|
||||||
|
<button id="btn-logout" class="danger">Выйти</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="max-width:520px; margin-top:16px">
|
||||||
|
<h3 style="margin-top:0">Прод-вариант (без UI)</h3>
|
||||||
|
<p class="muted">
|
||||||
|
Для деплоя в k8s удобнее заранее получить опаковую строку сессии и положить её
|
||||||
|
в Secret — тогда поды поднимаются без интерактива:
|
||||||
|
</p>
|
||||||
|
<pre>docker compose run --rm -it app python -m parser_bot.auth</pre>
|
||||||
|
<p class="muted">
|
||||||
|
Скрипт напечатает <span class="mono">TG_SESSION_STRING=...</span> — вставить
|
||||||
|
в <span class="mono">.env</span> или Secret и забыть про авторизацию.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/auth.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
241
src/parser_bot/web/static/css/app.css
Normal file
241
src/parser_bot/web/static/css/app.css
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
:root {
|
||||||
|
--bg: #0f1115;
|
||||||
|
--panel: #161a22;
|
||||||
|
--panel-2: #1d222c;
|
||||||
|
--border: #262c38;
|
||||||
|
--text: #e6e8ec;
|
||||||
|
--muted: #8a93a3;
|
||||||
|
--accent: #4f8cff;
|
||||||
|
--accent-hover: #6aa0ff;
|
||||||
|
--danger: #ff6464;
|
||||||
|
--ok: #2ecc71;
|
||||||
|
--warn: #f1c40f;
|
||||||
|
}
|
||||||
|
|
||||||
|
* { box-sizing: border-box; }
|
||||||
|
|
||||||
|
body {
|
||||||
|
margin: 0;
|
||||||
|
font: 14px/1.45 -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--text);
|
||||||
|
}
|
||||||
|
|
||||||
|
a { color: var(--accent); text-decoration: none; }
|
||||||
|
a:hover { color: var(--accent-hover); }
|
||||||
|
|
||||||
|
header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 24px;
|
||||||
|
padding: 14px 24px;
|
||||||
|
background: var(--panel);
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
header h1 {
|
||||||
|
font-size: 16px;
|
||||||
|
margin: 0;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
nav { display: flex; gap: 6px; }
|
||||||
|
nav a {
|
||||||
|
padding: 6px 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
color: var(--muted);
|
||||||
|
}
|
||||||
|
nav a.active, nav a:hover {
|
||||||
|
color: var(--text);
|
||||||
|
background: var(--panel-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
main { padding: 24px; max-width: 1200px; margin: 0 auto; }
|
||||||
|
h2 { font-size: 18px; margin: 0 0 16px; }
|
||||||
|
h3 { font-size: 14px; margin: 24px 0 12px; color: var(--muted); font-weight: 500; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
|
||||||
|
.row { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }
|
||||||
|
.spacer { flex: 1; }
|
||||||
|
|
||||||
|
.card {
|
||||||
|
background: var(--panel);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stats-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
||||||
|
gap: 12px;
|
||||||
|
margin-bottom: 24px;
|
||||||
|
}
|
||||||
|
.stat .label { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.stat .value { font-size: 24px; font-weight: 600; margin-top: 4px; }
|
||||||
|
|
||||||
|
input, select, textarea, button {
|
||||||
|
font: inherit;
|
||||||
|
color: var(--text);
|
||||||
|
background: var(--panel-2);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 8px 10px;
|
||||||
|
outline: none;
|
||||||
|
}
|
||||||
|
input:focus, select:focus { border-color: var(--accent); }
|
||||||
|
|
||||||
|
button {
|
||||||
|
cursor: pointer;
|
||||||
|
background: var(--accent);
|
||||||
|
border-color: var(--accent);
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
button:hover { background: var(--accent-hover); border-color: var(--accent-hover); }
|
||||||
|
button.secondary { background: var(--panel-2); color: var(--text); }
|
||||||
|
button.secondary:hover { background: var(--border); }
|
||||||
|
button.danger { background: transparent; color: var(--danger); border-color: var(--border); }
|
||||||
|
button.danger:hover { background: rgba(255, 100, 100, 0.1); }
|
||||||
|
button:disabled { opacity: 0.5; cursor: not-allowed; }
|
||||||
|
|
||||||
|
table { width: 100%; border-collapse: collapse; }
|
||||||
|
th, td { padding: 10px 12px; text-align: left; border-bottom: 1px solid var(--border); }
|
||||||
|
th { color: var(--muted); font-weight: 500; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
tr:hover td { background: var(--panel-2); }
|
||||||
|
|
||||||
|
.badge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 2px 8px;
|
||||||
|
border-radius: 999px;
|
||||||
|
font-size: 11px;
|
||||||
|
background: var(--panel-2);
|
||||||
|
color: var(--muted);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
.badge.ok { color: var(--ok); border-color: rgba(46, 204, 113, 0.4); }
|
||||||
|
.badge.off { color: var(--muted); }
|
||||||
|
.badge.warn { color: var(--warn); border-color: rgba(241, 196, 15, 0.4); }
|
||||||
|
|
||||||
|
.muted { color: var(--muted); }
|
||||||
|
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
|
||||||
|
|
||||||
|
.message {
|
||||||
|
padding: 12px 16px;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
.message:last-child { border-bottom: none; }
|
||||||
|
.message-meta { display: flex; gap: 12px; color: var(--muted); font-size: 12px; margin-bottom: 6px; }
|
||||||
|
.message-text { white-space: pre-wrap; word-break: break-word; }
|
||||||
|
|
||||||
|
.message-tags {
|
||||||
|
display: flex; flex-wrap: wrap; gap: 6px;
|
||||||
|
margin-top: 8px;
|
||||||
|
}
|
||||||
|
.message-tags .badge.re { color: #2ecc71; border-color: rgba(46, 204, 113, 0.4); }
|
||||||
|
.message-tags .badge.phone { color: #4f8cff; border-color: rgba(79, 140, 255, 0.4); }
|
||||||
|
.message-tags .badge.name { color: #f1c40f; border-color: rgba(241, 196, 15, 0.4); }
|
||||||
|
.message-tags .badge.tg { color: #4f8cff; border-color: rgba(79, 140, 255, 0.4); }
|
||||||
|
.message-tags .badge.tg-link { color: #fff; background: rgba(79, 140, 255, 0.2); border-color: rgba(79, 140, 255, 0.6); }
|
||||||
|
.message-tags .badge.tg-link:hover { background: rgba(79, 140, 255, 0.35); }
|
||||||
|
|
||||||
|
.lead-card {
|
||||||
|
margin-top: 10px;
|
||||||
|
padding: 10px 14px;
|
||||||
|
border-radius: 8px;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
background: rgba(46, 204, 113, 0.05);
|
||||||
|
}
|
||||||
|
.lead-card.lead-strong { border-color: rgba(46, 204, 113, 0.6); background: rgba(46, 204, 113, 0.1); }
|
||||||
|
.lead-card.lead-medium { border-color: rgba(241, 196, 15, 0.5); background: rgba(241, 196, 15, 0.06); }
|
||||||
|
.lead-card.lead-weak { border-color: rgba(138, 147, 163, 0.4); background: rgba(138, 147, 163, 0.05); }
|
||||||
|
.lead-head { display: flex; flex-wrap: wrap; align-items: center; gap: 10px; }
|
||||||
|
.lead-facts { color: var(--text); font-weight: 500; }
|
||||||
|
.lead-summary { margin-top: 4px; color: var(--muted); font-size: 13px; }
|
||||||
|
.lead-confidence {
|
||||||
|
margin-left: auto; padding: 2px 8px; border-radius: 999px;
|
||||||
|
background: var(--panel-2); border: 1px solid var(--border);
|
||||||
|
font-size: 11px; color: var(--muted); font-variant-numeric: tabular-nums;
|
||||||
|
}
|
||||||
|
.badge.lead { color: #2ecc71; border-color: rgba(46, 204, 113, 0.5); font-weight: 600; }
|
||||||
|
|
||||||
|
.message-media {
|
||||||
|
display: flex; flex-wrap: wrap; gap: 8px;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
.media-thumb {
|
||||||
|
max-width: 240px; max-height: 240px;
|
||||||
|
border-radius: 6px; cursor: zoom-in;
|
||||||
|
background: var(--panel-2);
|
||||||
|
}
|
||||||
|
.media-video { max-width: 360px; max-height: 240px; border-radius: 6px; background: black; }
|
||||||
|
.media-doc {
|
||||||
|
display: inline-flex; align-items: center; gap: 8px;
|
||||||
|
padding: 8px 12px; background: var(--panel-2);
|
||||||
|
border: 1px solid var(--border); border-radius: 6px;
|
||||||
|
color: var(--text);
|
||||||
|
}
|
||||||
|
.media-doc:hover { border-color: var(--accent); }
|
||||||
|
.media-skipped {
|
||||||
|
display: inline-flex; align-items: center; gap: 8px;
|
||||||
|
padding: 6px 10px; background: var(--panel-2);
|
||||||
|
border-radius: 6px; font-size: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#lightbox {
|
||||||
|
position: fixed; inset: 0; z-index: 2000;
|
||||||
|
background: rgba(0,0,0,0.85);
|
||||||
|
display: flex; align-items: center; justify-content: center;
|
||||||
|
cursor: zoom-out;
|
||||||
|
}
|
||||||
|
#lightbox img { max-width: 95vw; max-height: 95vh; border-radius: 4px; }
|
||||||
|
|
||||||
|
.toolbar { display: flex; gap: 8px; align-items: center; margin-bottom: 16px; flex-wrap: wrap; }
|
||||||
|
.toolbar input[type="search"], .toolbar select { min-width: 200px; }
|
||||||
|
|
||||||
|
.toast {
|
||||||
|
position: fixed;
|
||||||
|
bottom: 20px;
|
||||||
|
right: 20px;
|
||||||
|
background: var(--panel);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 10px 16px;
|
||||||
|
box-shadow: 0 6px 24px rgba(0,0,0,0.4);
|
||||||
|
animation: slideIn 0.18s ease-out;
|
||||||
|
z-index: 1000;
|
||||||
|
max-width: 360px;
|
||||||
|
}
|
||||||
|
.toast.error { border-color: var(--danger); }
|
||||||
|
.toast.success { border-color: var(--ok); }
|
||||||
|
@keyframes slideIn { from { transform: translateY(8px); opacity: 0; } to { transform: none; opacity: 1; } }
|
||||||
|
|
||||||
|
.empty { padding: 32px; text-align: center; color: var(--muted); }
|
||||||
|
|
||||||
|
.sections-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||||
|
gap: 16px;
|
||||||
|
margin-top: 16px;
|
||||||
|
}
|
||||||
|
.section-tile { padding: 16px; }
|
||||||
|
.section-tile-link { display: block; color: var(--text); }
|
||||||
|
.section-tile-link:hover { color: var(--text); }
|
||||||
|
.section-tile-head { display: flex; align-items: center; gap: 10px; margin-bottom: 8px; }
|
||||||
|
.section-emoji { font-size: 28px; }
|
||||||
|
.section-title { font-size: 16px; font-weight: 600; }
|
||||||
|
.section-stats { display: flex; flex-wrap: wrap; gap: 12px; color: var(--muted); font-size: 13px; }
|
||||||
|
.section-stats b { color: var(--text); }
|
||||||
|
.section-desc { margin-top: 8px; font-size: 13px; }
|
||||||
|
.section-code { margin-top: 8px; color: var(--warn); font-size: 12px; }
|
||||||
|
.section-slug { margin-top: 8px; font-size: 11px; }
|
||||||
|
.pagination { display: flex; gap: 8px; justify-content: center; margin-top: 16px; }
|
||||||
|
|
||||||
|
dialog {
|
||||||
|
background: var(--panel);
|
||||||
|
color: var(--text);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 20px;
|
||||||
|
min-width: 400px;
|
||||||
|
max-width: 80vw;
|
||||||
|
max-height: 80vh;
|
||||||
|
}
|
||||||
|
dialog::backdrop { background: rgba(0,0,0,0.6); }
|
||||||
|
pre { background: var(--bg); padding: 12px; border-radius: 6px; overflow: auto; font-size: 12px; max-height: 60vh; }
|
||||||
99
src/parser_bot/web/static/hr/index.html
Normal file
99
src/parser_bot/web/static/hr/index.html
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>👥 HR — подразделы</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot · 👥 HR / Кадры</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<div class="row">
|
||||||
|
<h2>Подразделы HR</h2>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<button id="open-create">+ Новый подраздел</button>
|
||||||
|
</div>
|
||||||
|
<p class="muted">
|
||||||
|
Каждый подраздел — это собственный набор каналов, своя статистика и свой
|
||||||
|
LLM-промпт (с фоллбэком на промпт вертикали). Например: IT, продажи,
|
||||||
|
маркетинг, рабочие специальности.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div id="sections-grid"></div>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<dialog id="create-dialog">
|
||||||
|
<h3 style="margin-top:0">Новый подраздел</h3>
|
||||||
|
<form id="create-form">
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Название</span>
|
||||||
|
<input type="text" id="new-title" required placeholder="IT" style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<div class="row" style="gap:8px; margin-bottom:8px; font-size:12px">
|
||||||
|
<span style="min-width:120px" class="muted">URL-адрес</span>
|
||||||
|
<span class="muted mono">/hr/<span id="new-slug-preview">(введите название)</span>/</span>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<a href="#" id="new-slug-manual" class="muted">изменить вручную</a>
|
||||||
|
</div>
|
||||||
|
<label class="row slug-row" style="gap:8px; margin-bottom:8px" hidden>
|
||||||
|
<span style="min-width:120px" class="muted">Slug</span>
|
||||||
|
<input type="text" id="new-slug" pattern="[a-z0-9][a-z0-9_-]*[a-z0-9]?"
|
||||||
|
placeholder="it" style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Иконка</span>
|
||||||
|
<input type="text" id="new-emoji" maxlength="4" placeholder="💻" style="width:80px" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Код доступа</span>
|
||||||
|
<input type="text" id="new-access-code" required minlength="3"
|
||||||
|
autocomplete="new-password" style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
|
||||||
|
<span style="min-width:120px" class="muted">Описание</span>
|
||||||
|
<textarea id="new-description" rows="3" style="flex:1"></textarea>
|
||||||
|
</label>
|
||||||
|
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
|
||||||
|
<button type="button" id="create-cancel" class="secondary">Отмена</button>
|
||||||
|
<button type="submit">Создать</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<dialog id="edit-dialog">
|
||||||
|
<h3 style="margin-top:0">Редактировать подраздел</h3>
|
||||||
|
<form id="edit-form">
|
||||||
|
<input type="hidden" id="edit-slug" />
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Название</span>
|
||||||
|
<input type="text" id="edit-title" required style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Иконка</span>
|
||||||
|
<input type="text" id="edit-emoji" maxlength="4" style="width:80px" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Код доступа</span>
|
||||||
|
<input type="text" id="edit-access-code" required minlength="3"
|
||||||
|
autocomplete="new-password" style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
|
||||||
|
<span style="min-width:120px" class="muted">Описание</span>
|
||||||
|
<textarea id="edit-description" rows="3" style="flex:1"></textarea>
|
||||||
|
</label>
|
||||||
|
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
|
||||||
|
<button type="button" id="edit-cancel" class="secondary">Отмена</button>
|
||||||
|
<button type="submit">Сохранить</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/sections-list.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
48
src/parser_bot/web/static/hr/section/channels.html
Normal file
48
src/parser_bot/web/static/hr/section/channels.html
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>👥 HR · Каналы — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<h2 id="page-heading">Каналы подраздела</h2>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:24px">
|
||||||
|
<form id="add-form" class="row">
|
||||||
|
<input type="text" id="identifier" placeholder="@channel или https://t.me/..." required style="flex:1; min-width:280px" />
|
||||||
|
<button type="submit">Добавить канал</button>
|
||||||
|
</form>
|
||||||
|
<div class="muted" style="margin-top:8px; font-size:12px">
|
||||||
|
Канал будет привязан к текущему подразделу.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ID</th>
|
||||||
|
<th>Канал</th>
|
||||||
|
<th>Telegram ID</th>
|
||||||
|
<th>Сообщ.</th>
|
||||||
|
<th>Последний опрос</th>
|
||||||
|
<th>Статус</th>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="tbody"></tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/channels.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
43
src/parser_bot/web/static/hr/section/index.html
Normal file
43
src/parser_bot/web/static/hr/section/index.html
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>👥 HR · Дашборд — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<div class="row">
|
||||||
|
<h2 id="page-heading">Дашборд</h2>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<button id="poll-all">Опросить все каналы подраздела</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="stats-grid" id="stats"></div>
|
||||||
|
|
||||||
|
<h3>Каналы подраздела</h3>
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Канал</th>
|
||||||
|
<th>Сообщений</th>
|
||||||
|
<th>Последнее сообщение</th>
|
||||||
|
<th>Последний опрос</th>
|
||||||
|
<th>Статус</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="channels-tbody"></tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/dashboard.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
78
src/parser_bot/web/static/hr/section/messages.html
Normal file
78
src/parser_bot/web/static/hr/section/messages.html
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>👥 HR · Сообщения — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<h2 id="page-heading">Сообщения подраздела</h2>
|
||||||
|
|
||||||
|
<div class="toolbar card">
|
||||||
|
<select id="channel-filter">
|
||||||
|
<option value="">Все каналы подраздела</option>
|
||||||
|
</select>
|
||||||
|
<input type="search" id="search" placeholder="Поиск по тексту..." />
|
||||||
|
<select id="hr-kind">
|
||||||
|
<option value="">Любой тип лида</option>
|
||||||
|
<option value="any">👥 HR (любой)</option>
|
||||||
|
<option value="vacancy">📢 Вакансия (наниматель)</option>
|
||||||
|
<option value="resume">📄 Резюме (соискатель)</option>
|
||||||
|
<option value="contact">📇 Лид-контакт</option>
|
||||||
|
</select>
|
||||||
|
<label class="row" style="gap:6px">
|
||||||
|
<input type="checkbox" id="leads-only" />
|
||||||
|
<span class="muted">🎯 Только лиды (ИИ)</span>
|
||||||
|
</label>
|
||||||
|
<select id="min-confidence" title="Минимальная уверенность ИИ">
|
||||||
|
<option value="0.3">0.3+</option>
|
||||||
|
<option value="0.5" selected>0.5+</option>
|
||||||
|
<option value="0.7">0.7+</option>
|
||||||
|
<option value="0.9">0.9+</option>
|
||||||
|
</select>
|
||||||
|
<label class="row" style="gap:6px">
|
||||||
|
<input type="checkbox" id="has-phone" />
|
||||||
|
<span class="muted">📞 С телефоном</span>
|
||||||
|
</label>
|
||||||
|
<select id="limit">
|
||||||
|
<option value="25">25</option>
|
||||||
|
<option value="50" selected>50</option>
|
||||||
|
<option value="100">100</option>
|
||||||
|
<option value="200">200</option>
|
||||||
|
</select>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<label class="row" style="gap:6px">
|
||||||
|
<input type="checkbox" id="autorefresh" />
|
||||||
|
<span class="muted">Автообновление</span>
|
||||||
|
</label>
|
||||||
|
<button id="refresh" class="secondary">Обновить</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" id="list"></div>
|
||||||
|
|
||||||
|
<div class="pagination">
|
||||||
|
<button id="prev" class="secondary">← Назад</button>
|
||||||
|
<span class="muted" id="page-info" style="align-self:center"></span>
|
||||||
|
<button id="next" class="secondary">Вперёд →</button>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<dialog id="raw-dialog">
|
||||||
|
<h3 style="margin-top:0">Сообщение</h3>
|
||||||
|
<pre id="raw-content"></pre>
|
||||||
|
<div class="row" style="justify-content:flex-end; margin-top:12px">
|
||||||
|
<button class="secondary" id="raw-close">Закрыть</button>
|
||||||
|
</div>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/messages.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
66
src/parser_bot/web/static/hr/section/settings.html
Normal file
66
src/parser_bot/web/static/hr/section/settings.html
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>👥 HR · Настройки — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<h2 id="page-heading">Настройки подраздела</h2>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:24px">
|
||||||
|
<h3 style="margin-top:0">Текущая конфигурация</h3>
|
||||||
|
<table>
|
||||||
|
<tbody id="config-tbody">
|
||||||
|
<tr><td colspan="2" class="empty">Загрузка...</td></tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<div class="muted" style="font-size:12px; margin-top:12px">
|
||||||
|
Параметры задаются через переменные окружения (<span class="mono">.env</span>).
|
||||||
|
Для изменения отредактируйте <span class="mono">.env</span> и перезапустите контейнер:
|
||||||
|
<span class="mono">docker compose restart app</span>.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:24px">
|
||||||
|
<h3 style="margin-top:0">Действия</h3>
|
||||||
|
<div class="row">
|
||||||
|
<button id="poll-all">Опросить все каналы подраздела сейчас</button>
|
||||||
|
<a href="/api/monitoring-tg/docs" target="_blank" class="badge">OpenAPI / Swagger</a>
|
||||||
|
<a href="/api/monitoring-tg/healthz" target="_blank" class="badge">Health check</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:24px">
|
||||||
|
<h3 style="margin-top:0">🤖 Промпт ИИ</h3>
|
||||||
|
<div class="row" style="margin-bottom:8px">
|
||||||
|
<span class="badge" id="prompt-status">—</span>
|
||||||
|
<span class="muted" id="prompt-length"></span>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<select id="prompt-level" title="Уровень редактирования промпта">
|
||||||
|
<option value="section" selected>Промпт подраздела</option>
|
||||||
|
<option value="vertical">Промпт вертикали</option>
|
||||||
|
</select>
|
||||||
|
<button id="prompt-reset" class="secondary">Сбросить уровень</button>
|
||||||
|
<button id="prompt-save">Сохранить</button>
|
||||||
|
</div>
|
||||||
|
<textarea id="prompt-editor" rows="22"
|
||||||
|
style="width:100%; font-family:ui-monospace, SFMono-Regular, Menlo, monospace; font-size:12px"></textarea>
|
||||||
|
<div class="muted" style="font-size:12px; margin-top:8px">
|
||||||
|
Каскад: <strong>section → vertical → default</strong>. Если промпта на
|
||||||
|
уровне подраздела нет, используется промпт вертикали; если и его нет —
|
||||||
|
встроенный по умолчанию. Сохранение применится в течение ~5 сек.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/settings.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
76
src/parser_bot/web/static/index.html
Normal file
76
src/parser_bot/web/static/index.html
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>parser-tg-bot — выбор раздела</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
<style>
|
||||||
|
.chooser {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
||||||
|
gap: 16px;
|
||||||
|
max-width: 880px;
|
||||||
|
margin: 32px auto 0;
|
||||||
|
}
|
||||||
|
.chooser .tile {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 8px;
|
||||||
|
padding: 28px 24px;
|
||||||
|
border-radius: 12px;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
background: var(--panel);
|
||||||
|
color: var(--text);
|
||||||
|
text-decoration: none;
|
||||||
|
transition: transform 0.08s, border-color 0.1s;
|
||||||
|
}
|
||||||
|
.chooser .tile:hover {
|
||||||
|
border-color: var(--accent);
|
||||||
|
transform: translateY(-2px);
|
||||||
|
}
|
||||||
|
.chooser .tile .emoji { font-size: 40px; }
|
||||||
|
.chooser .tile .title { font-size: 18px; font-weight: 600; }
|
||||||
|
.chooser .tile .hint { color: var(--muted); font-size: 13px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>parser-tg-bot</h1>
|
||||||
|
<nav>
|
||||||
|
<a href="/api/monitoring-tg/" class="active">Разделы</a>
|
||||||
|
<a class="admin-login-link" href="/api/monitoring-tg/admin.html">Админ</a>
|
||||||
|
<a class="admin-link" href="/api/monitoring-tg/auth.html">Авторизация</a>
|
||||||
|
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
|
||||||
|
</nav>
|
||||||
|
</header>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/access.js"></script>
|
||||||
|
<main>
|
||||||
|
<h2>Выберите вертикаль</h2>
|
||||||
|
<p class="muted">
|
||||||
|
У каждой вертикали — свои подразделы (например, «Дубай», «Москва»
|
||||||
|
внутри Недвижимости, или «IT», «Продажи» внутри HR). Канал привязан
|
||||||
|
к одному подразделу одной вертикали.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div class="chooser">
|
||||||
|
<a class="tile" href="/api/monitoring-tg/real-estate/">
|
||||||
|
<div class="emoji">🏠</div>
|
||||||
|
<div class="title">Недвижимость</div>
|
||||||
|
<div class="hint">
|
||||||
|
Объявления о покупке, продаже и аренде квартир, домов, апартаментов,
|
||||||
|
земли, коммерции. RU / EN / арабский — любой язык.
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
<a class="tile" href="/api/monitoring-tg/hr/">
|
||||||
|
<div class="emoji">👥</div>
|
||||||
|
<div class="title">HR / Кадры</div>
|
||||||
|
<div class="hint">
|
||||||
|
Вакансии (наниматели), резюме (соискатели) и короткие лиды-контакты
|
||||||
|
с указанием профессии и контактов.
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
41
src/parser_bot/web/static/js/access.js
Normal file
41
src/parser_bot/web/static/js/access.js
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
// Ask the backend whether this client is on the admin allowlist and hide
|
||||||
|
// admin-only nav links if not. The backend independently enforces the
|
||||||
|
// allowlist on every admin endpoint, so this is purely cosmetic — it just
|
||||||
|
// removes dead controls from the UI for non-admin visitors.
|
||||||
|
|
||||||
|
let _adminPromise = null;
|
||||||
|
export function isAdmin() {
|
||||||
|
if (!_adminPromise) {
|
||||||
|
_adminPromise = fetch("/api/monitoring-tg/api/v1/access/me")
|
||||||
|
.then(r => r.ok ? r.json() : { is_admin: false })
|
||||||
|
.then(d => !!d.is_admin)
|
||||||
|
.catch(() => false);
|
||||||
|
}
|
||||||
|
return _adminPromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function adminStatus() {
|
||||||
|
return fetch("/api/monitoring-tg/api/v1/access/me")
|
||||||
|
.then(r => r.ok ? r.json() : { is_admin: false, admin_ip_allowed: false })
|
||||||
|
.catch(() => ({ is_admin: false, admin_ip_allowed: false }));
|
||||||
|
}
|
||||||
|
|
||||||
|
adminStatus().then(status => {
|
||||||
|
const admin = !!status.is_admin;
|
||||||
|
const canOpenAdmin = !!status.admin_ip_allowed;
|
||||||
|
if (admin) return;
|
||||||
|
// Remove any `.admin-link` from the DOM. Works for both server-rendered
|
||||||
|
// navs (auth.html, chooser pages) and JS-built navs (nav.js fires before
|
||||||
|
// its own write, but DOMContentLoaded ordering means the elements appear
|
||||||
|
// after — handle via a MutationObserver for late insertions).
|
||||||
|
const hide = () => {
|
||||||
|
document.querySelectorAll(".admin-link").forEach(el => el.remove());
|
||||||
|
document.querySelectorAll(".admin-only").forEach(el => el.remove());
|
||||||
|
if (!canOpenAdmin) {
|
||||||
|
document.querySelectorAll(".admin-login-link").forEach(el => el.remove());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
hide();
|
||||||
|
const mo = new MutationObserver(hide);
|
||||||
|
mo.observe(document.body, { childList: true, subtree: true });
|
||||||
|
});
|
||||||
49
src/parser_bot/web/static/js/admin.js
Normal file
49
src/parser_bot/web/static/js/admin.js
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
import "/api/monitoring-tg/static/js/access.js";
|
||||||
|
|
||||||
|
const form = document.getElementById("admin-form");
|
||||||
|
const password = document.getElementById("admin-password");
|
||||||
|
const statusEl = document.getElementById("admin-status");
|
||||||
|
const logoutBtn = document.getElementById("admin-logout");
|
||||||
|
|
||||||
|
function returnUrl() {
|
||||||
|
const params = new URLSearchParams(location.search);
|
||||||
|
return params.get("return") || "/";
|
||||||
|
}
|
||||||
|
|
||||||
|
async function refresh() {
|
||||||
|
const status = await api.accessMe();
|
||||||
|
if (status.is_admin) {
|
||||||
|
statusEl.textContent = "Админ-доступ активен.";
|
||||||
|
form.hidden = true;
|
||||||
|
logoutBtn.hidden = false;
|
||||||
|
} else if (!status.admin_password_enabled) {
|
||||||
|
statusEl.textContent = "Админ пароль не задан. Доступ управляется IP-allowlist.";
|
||||||
|
form.hidden = true;
|
||||||
|
logoutBtn.hidden = true;
|
||||||
|
} else {
|
||||||
|
statusEl.textContent = "Введите админ пароль, чтобы открыть админские функции.";
|
||||||
|
form.hidden = false;
|
||||||
|
logoutBtn.hidden = true;
|
||||||
|
setTimeout(() => password.focus(), 30);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
form.addEventListener("submit", async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
try {
|
||||||
|
await api.adminLogin(password.value);
|
||||||
|
password.value = "";
|
||||||
|
toast("Админ-доступ открыт", "success");
|
||||||
|
location.href = returnUrl();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
logoutBtn.addEventListener("click", async () => {
|
||||||
|
await api.adminLogout();
|
||||||
|
location.reload();
|
||||||
|
});
|
||||||
|
|
||||||
|
refresh().catch(err => toast(err.message, "error"));
|
||||||
192
src/parser_bot/web/static/js/api.js
Normal file
192
src/parser_bot/web/static/js/api.js
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
import { getVertical, getSection } from "/api/monitoring-tg/static/js/vertical.js";
|
||||||
|
|
||||||
|
const BASE = "/api/monitoring-tg/api/v1";
|
||||||
|
let sectionLoginPromise = null;
|
||||||
|
|
||||||
|
async function unlockCurrentSection() {
|
||||||
|
if (sectionLoginPromise) return sectionLoginPromise;
|
||||||
|
sectionLoginPromise = (async () => {
|
||||||
|
const vertical = getVertical();
|
||||||
|
const section = getSection();
|
||||||
|
if (!section) return false;
|
||||||
|
const code = prompt(`Введите код подраздела "${section}"`);
|
||||||
|
if (!code) return false;
|
||||||
|
await request("/access/section-login", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({ vertical, section, code }),
|
||||||
|
sectionRetry: false,
|
||||||
|
});
|
||||||
|
return true;
|
||||||
|
})();
|
||||||
|
try {
|
||||||
|
return await sectionLoginPromise;
|
||||||
|
} finally {
|
||||||
|
sectionLoginPromise = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function request(path, options = {}) {
|
||||||
|
const { sectionRetry = true, ...fetchOptions } = options;
|
||||||
|
const res = await fetch(BASE + path, {
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
...fetchOptions,
|
||||||
|
});
|
||||||
|
if (!res.ok) {
|
||||||
|
let detail = res.statusText;
|
||||||
|
try { detail = (await res.json()).detail || detail; } catch {}
|
||||||
|
if (res.status === 401 && detail === "section code required" && sectionRetry) {
|
||||||
|
if (await unlockCurrentSection()) {
|
||||||
|
return request(path, { ...options, sectionRetry: false });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new Error(`${res.status}: ${detail}`);
|
||||||
|
}
|
||||||
|
if (res.status === 204) return null;
|
||||||
|
return res.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build a query string scoped to the current (vertical, section). The
|
||||||
|
// section is intentionally optional — pages at /<vertical>/ (chooser)
|
||||||
|
// pass null so they see all sections, while pages inside a section
|
||||||
|
// always carry their section slug.
|
||||||
|
function qs(extra = {}, { vertical, section } = {}) {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
params.set("vertical", vertical ?? getVertical());
|
||||||
|
const s = section === undefined ? getSection() : section;
|
||||||
|
if (s) params.set("section", s);
|
||||||
|
for (const [k, v] of Object.entries(extra)) {
|
||||||
|
if (v == null || v === false) continue;
|
||||||
|
params.set(k, String(v));
|
||||||
|
}
|
||||||
|
return params.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
export const api = {
|
||||||
|
accessMe: () => request("/access/me"),
|
||||||
|
adminLogin: (password) =>
|
||||||
|
request("/access/admin-login", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({ password }),
|
||||||
|
sectionRetry: false,
|
||||||
|
}),
|
||||||
|
adminLogout: () =>
|
||||||
|
request("/access/admin-logout", { method: "POST", sectionRetry: false }),
|
||||||
|
sectionLogin: ({ vertical, section, code }) =>
|
||||||
|
request("/access/section-login", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({ vertical, section, code }),
|
||||||
|
sectionRetry: false,
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Auth — section-agnostic.
|
||||||
|
authStatus: () => request("/auth/status"),
|
||||||
|
authSendCode: () => request("/auth/send-code", { method: "POST" }),
|
||||||
|
authSubmitCode: (code) =>
|
||||||
|
request("/auth/submit-code", { method: "POST", body: JSON.stringify({ code }) }),
|
||||||
|
authSubmitPassword: (password) =>
|
||||||
|
request("/auth/submit-password", { method: "POST", body: JSON.stringify({ password }) }),
|
||||||
|
authLogout: () => request("/auth/logout", { method: "POST" }),
|
||||||
|
|
||||||
|
// Sections (sub-sections within a vertical).
|
||||||
|
listSections: (vertical) => request(`/sections?${qs({}, { vertical, section: null })}`),
|
||||||
|
createSection: ({ vertical, slug, title, emoji, description, accessCode }) =>
|
||||||
|
request("/sections", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({
|
||||||
|
vertical: vertical ?? getVertical(),
|
||||||
|
slug, title, emoji, description, access_code: accessCode,
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
updateSection: (vertical, slug, patch) =>
|
||||||
|
request(`/sections/${encodeURIComponent(vertical)}/${encodeURIComponent(slug)}`, {
|
||||||
|
method: "PATCH",
|
||||||
|
body: JSON.stringify(patch),
|
||||||
|
}),
|
||||||
|
deleteSection: (vertical, slug) =>
|
||||||
|
request(`/sections/${encodeURIComponent(vertical)}/${encodeURIComponent(slug)}`, {
|
||||||
|
method: "DELETE",
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Scoped reads: implicit (vertical, section) from URL.
|
||||||
|
globalStats: (scope) => request(`/stats?${qs({}, scope)}`),
|
||||||
|
|
||||||
|
listChannels: (scope) => request(`/channels?${qs({}, scope)}`),
|
||||||
|
getChannel: (id, scope) => request(`/channels/${id}?${qs({}, scope)}`),
|
||||||
|
channelStats: (id, scope) => request(`/channels/${id}/stats?${qs({}, scope)}`),
|
||||||
|
addChannel: (identifier, scope = {}) => {
|
||||||
|
const vertical = scope.vertical ?? getVertical();
|
||||||
|
const section = scope.section === undefined ? getSection() : scope.section;
|
||||||
|
if (!section) {
|
||||||
|
throw new Error("addChannel requires a section context");
|
||||||
|
}
|
||||||
|
return request("/channels", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({ identifier, vertical, section }),
|
||||||
|
});
|
||||||
|
},
|
||||||
|
updateChannel: (id, patch, scope) =>
|
||||||
|
request(`/channels/${id}?${qs({}, scope)}`, {
|
||||||
|
method: "PATCH", body: JSON.stringify(patch),
|
||||||
|
}),
|
||||||
|
deleteChannel: (id, scope) =>
|
||||||
|
request(`/channels/${id}?${qs({}, scope)}`, { method: "DELETE" }),
|
||||||
|
pollChannel: (id, scope) =>
|
||||||
|
request(`/channels/${id}/poll?${qs({}, scope)}`, { method: "POST" }),
|
||||||
|
backfillMedia: (id, batch = 50, scope) =>
|
||||||
|
request(`/channels/${id}/backfill-media?${qs({ batch }, scope)}`, { method: "POST" }),
|
||||||
|
reanalyze: (id, batch = 500, scope) =>
|
||||||
|
request(`/channels/${id}/reanalyze?${qs({ batch }, scope)}`, { method: "POST" }),
|
||||||
|
|
||||||
|
pollAll: (scope) => request(`/poll?${qs({}, scope)}`, { method: "POST" }),
|
||||||
|
|
||||||
|
listMessages: ({ channelId, q, realEstate, hrKind, hasPhone, leadsOnly,
|
||||||
|
minConfidence, limit = 50, offset = 0,
|
||||||
|
vertical, section } = {}) => {
|
||||||
|
const extra = { limit, offset };
|
||||||
|
if (channelId) extra.channel_id = channelId;
|
||||||
|
if (q) extra.q = q;
|
||||||
|
if (realEstate) extra.real_estate = realEstate;
|
||||||
|
if (hrKind) extra.hr_kind = hrKind;
|
||||||
|
if (hasPhone) extra.has_phone = "true";
|
||||||
|
if (leadsOnly) {
|
||||||
|
extra.leads_only = "true";
|
||||||
|
if (minConfidence != null) extra.min_confidence = minConfidence;
|
||||||
|
}
|
||||||
|
return request(`/messages?${qs(extra, { vertical, section })}`);
|
||||||
|
},
|
||||||
|
getMessage: (id, scope) => request(`/messages/${id}?${qs({}, scope)}`),
|
||||||
|
|
||||||
|
llmStatus: () => request("/llm/status"),
|
||||||
|
llmQueue: (scope) => request(`/llm/queue?${qs({}, scope)}`),
|
||||||
|
llmPromptGet: (scope) => request(`/llm/prompt?${qs({}, scope)}`),
|
||||||
|
llmPromptSave: (prompt, scope) =>
|
||||||
|
request(`/llm/prompt?${qs({}, scope)}`, {
|
||||||
|
method: "PUT", body: JSON.stringify({ prompt }),
|
||||||
|
}),
|
||||||
|
llmPromptReset: (scope) =>
|
||||||
|
request(`/llm/prompt?${qs({}, scope)}`, { method: "DELETE" }),
|
||||||
|
};
|
||||||
|
|
||||||
|
export function toast(message, type = "info") {
|
||||||
|
const el = document.createElement("div");
|
||||||
|
el.className = `toast ${type}`;
|
||||||
|
el.textContent = message;
|
||||||
|
document.body.appendChild(el);
|
||||||
|
setTimeout(() => el.remove(), 3500);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function fmtDate(iso) {
|
||||||
|
if (!iso) return "—";
|
||||||
|
const d = new Date(iso);
|
||||||
|
return d.toLocaleString();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function fmtRelative(iso) {
|
||||||
|
if (!iso) return "—";
|
||||||
|
const d = new Date(iso);
|
||||||
|
const diff = (Date.now() - d.getTime()) / 1000;
|
||||||
|
if (diff < 60) return `${Math.floor(diff)}s ago`;
|
||||||
|
if (diff < 3600) return `${Math.floor(diff / 60)}m ago`;
|
||||||
|
if (diff < 86400) return `${Math.floor(diff / 3600)}h ago`;
|
||||||
|
return `${Math.floor(diff / 86400)}d ago`;
|
||||||
|
}
|
||||||
120
src/parser_bot/web/static/js/auth.js
Normal file
120
src/parser_bot/web/static/js/auth.js
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
|
||||||
|
const returnTo = (() => {
|
||||||
|
const raw = new URLSearchParams(location.search).get("return");
|
||||||
|
// Only allow same-origin relative paths to avoid open-redirect via ?return=
|
||||||
|
if (raw && raw.startsWith("/") && !raw.startsWith("//")) return raw;
|
||||||
|
return null;
|
||||||
|
})();
|
||||||
|
const returnLink = document.getElementById("return-link");
|
||||||
|
if (returnLink && returnTo) {
|
||||||
|
returnLink.href = returnTo;
|
||||||
|
returnLink.querySelector("button").textContent = "← Вернуться";
|
||||||
|
}
|
||||||
|
|
||||||
|
const steps = ["idle", "code", "password", "done"];
|
||||||
|
function show(step) {
|
||||||
|
steps.forEach(s => {
|
||||||
|
document.getElementById(`step-${s}`).hidden = s !== step;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function setStatus(html) {
|
||||||
|
document.getElementById("status-block").innerHTML = html;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function refresh() {
|
||||||
|
const status = await api.authStatus();
|
||||||
|
document.getElementById("phone").textContent = status.phone || "—";
|
||||||
|
document.getElementById("phone-2").textContent = status.phone || "—";
|
||||||
|
|
||||||
|
if (status.authorized) {
|
||||||
|
setStatus(`<div class="badge ok">Авторизовано</div>`);
|
||||||
|
document.getElementById("username").textContent = status.username || "(unnamed)";
|
||||||
|
show("done");
|
||||||
|
} else {
|
||||||
|
setStatus(`<div class="badge warn">Не авторизовано</div>`);
|
||||||
|
show("idle");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("btn-send").addEventListener("click", async (e) => {
|
||||||
|
e.target.disabled = true;
|
||||||
|
try {
|
||||||
|
await api.authSendCode();
|
||||||
|
toast("Код отправлен в Telegram", "success");
|
||||||
|
show("code");
|
||||||
|
document.getElementById("code").focus();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
e.target.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("btn-resend").addEventListener("click", async (e) => {
|
||||||
|
e.target.disabled = true;
|
||||||
|
try {
|
||||||
|
await api.authSendCode();
|
||||||
|
toast("Новый код отправлен", "success");
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
e.target.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("form-code").addEventListener("submit", async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const code = document.getElementById("code").value.trim();
|
||||||
|
const btn = e.target.querySelector("button");
|
||||||
|
btn.disabled = true;
|
||||||
|
try {
|
||||||
|
const res = await api.authSubmitCode(code);
|
||||||
|
if (res.needs_password) {
|
||||||
|
toast("Введи 2FA-пароль", "success");
|
||||||
|
show("password");
|
||||||
|
document.getElementById("password").focus();
|
||||||
|
} else {
|
||||||
|
toast("Готово", "success");
|
||||||
|
await refresh();
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("form-password").addEventListener("submit", async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const password = document.getElementById("password").value;
|
||||||
|
const btn = e.target.querySelector("button");
|
||||||
|
btn.disabled = true;
|
||||||
|
try {
|
||||||
|
await api.authSubmitPassword(password);
|
||||||
|
toast("Авторизовано", "success");
|
||||||
|
document.getElementById("password").value = "";
|
||||||
|
await refresh();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("btn-logout").addEventListener("click", async (e) => {
|
||||||
|
if (!confirm("Выйти из Telegram-сессии?")) return;
|
||||||
|
e.target.disabled = true;
|
||||||
|
try {
|
||||||
|
await api.authLogout();
|
||||||
|
toast("Сессия завершена", "success");
|
||||||
|
await refresh();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
e.target.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
refresh().catch(err => toast(err.message, "error"));
|
||||||
132
src/parser_bot/web/static/js/channels.js
Normal file
132
src/parser_bot/web/static/js/channels.js
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
import { api, toast, fmtRelative } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
|
||||||
|
import { getVertical, getSection, sectionBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||||
|
|
||||||
|
const V = getVertical();
|
||||||
|
const section = getSection();
|
||||||
|
const sBase = sectionBase();
|
||||||
|
const meta = VERTICAL_META[V];
|
||||||
|
|
||||||
|
function escape(s) {
|
||||||
|
if (s == null) return "";
|
||||||
|
return String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
const admin = await isAdmin();
|
||||||
|
const channels = await api.listChannels();
|
||||||
|
const tbody = document.getElementById("tbody");
|
||||||
|
if (!channels.length) {
|
||||||
|
tbody.innerHTML = `<tr><td colspan="7" class="empty">Каналов пока нет</td></tr>`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const stats = await Promise.all(channels.map(c => api.channelStats(c.id).catch(() => null)));
|
||||||
|
tbody.innerHTML = channels.map((c, i) => {
|
||||||
|
const s = stats[i] || {};
|
||||||
|
return `
|
||||||
|
<tr data-id="${c.id}">
|
||||||
|
<td class="muted mono">${c.id}</td>
|
||||||
|
<td>
|
||||||
|
<div>${escape(c.title || "—")}</div>
|
||||||
|
<div class="muted mono" style="font-size:12px">${escape(c.identifier)}</div>
|
||||||
|
</td>
|
||||||
|
<td class="mono muted">${c.tg_id ?? "—"}</td>
|
||||||
|
<td>${(s.message_count ?? 0).toLocaleString()}</td>
|
||||||
|
<td>${fmtRelative(c.last_polled_at)}</td>
|
||||||
|
<td>
|
||||||
|
<label class="row" style="gap:6px">
|
||||||
|
<input type="checkbox" data-action="toggle" ${c.is_active ? "checked" : ""} ${admin ? "" : "disabled"} />
|
||||||
|
<span class="badge ${c.is_active ? "ok" : "off"}">${c.is_active ? "on" : "off"}</span>
|
||||||
|
</label>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<div class="row" style="gap:6px">
|
||||||
|
<a href="${sBase}/messages.html?channel_id=${c.id}" class="badge">сообщения</a>
|
||||||
|
${admin ? `
|
||||||
|
<button class="secondary" data-action="poll">Опросить</button>
|
||||||
|
<button class="secondary" data-action="backfill-media">Подкачать медиа</button>
|
||||||
|
<button class="secondary" data-action="reanalyze">Переанализировать</button>
|
||||||
|
<button class="danger" data-action="delete">Удалить</button>
|
||||||
|
` : ""}
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("add-form").addEventListener("submit", async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const input = document.getElementById("identifier");
|
||||||
|
const id = input.value.trim();
|
||||||
|
if (!id) return;
|
||||||
|
const btn = e.target.querySelector("button");
|
||||||
|
btn.disabled = true;
|
||||||
|
try {
|
||||||
|
await api.addChannel(id);
|
||||||
|
const where = section ? `${meta.short} / ${section}` : meta.short;
|
||||||
|
toast(`Канал добавлен в "${where}"`, "success");
|
||||||
|
input.value = "";
|
||||||
|
await load();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("tbody").addEventListener("click", async (e) => {
|
||||||
|
const btn = e.target.closest("[data-action]");
|
||||||
|
if (!btn) return;
|
||||||
|
const tr = btn.closest("tr");
|
||||||
|
const id = Number(tr.dataset.id);
|
||||||
|
const action = btn.dataset.action;
|
||||||
|
try {
|
||||||
|
if (action === "delete") {
|
||||||
|
if (!confirm("Удалить канал и все его сообщения?")) return;
|
||||||
|
await api.deleteChannel(id);
|
||||||
|
toast("Удалено", "success");
|
||||||
|
await load();
|
||||||
|
} else if (action === "poll") {
|
||||||
|
btn.disabled = true;
|
||||||
|
const res = await api.pollChannel(id);
|
||||||
|
toast(`Добавлено ${res.inserted} сообщений`, "success");
|
||||||
|
await load();
|
||||||
|
} else if (action === "backfill-media") {
|
||||||
|
btn.disabled = true;
|
||||||
|
let totalUpdated = 0;
|
||||||
|
let pending = Infinity;
|
||||||
|
while (pending > 0) {
|
||||||
|
btn.textContent = `Качаю... (готово: ${totalUpdated})`;
|
||||||
|
const res = await api.backfillMedia(id, 50);
|
||||||
|
totalUpdated += res.updated;
|
||||||
|
pending = res.pending;
|
||||||
|
if (res.updated === 0) break;
|
||||||
|
}
|
||||||
|
btn.textContent = "Подкачать медиа";
|
||||||
|
toast(`Подкачано ${totalUpdated}, осталось ${pending}`, "success");
|
||||||
|
} else if (action === "reanalyze") {
|
||||||
|
btn.disabled = true;
|
||||||
|
let total = 0;
|
||||||
|
let pending = Infinity;
|
||||||
|
while (pending > 0) {
|
||||||
|
btn.textContent = `Анализирую... (${total})`;
|
||||||
|
const res = await api.reanalyze(id, 500);
|
||||||
|
total += res.updated;
|
||||||
|
pending = res.pending;
|
||||||
|
if (res.updated === 0) break;
|
||||||
|
}
|
||||||
|
btn.textContent = "Переанализировать";
|
||||||
|
toast(`Проанализировано ${total} сообщений, осталось ${pending}`, "success");
|
||||||
|
} else if (action === "toggle") {
|
||||||
|
const isActive = btn.checked;
|
||||||
|
await api.updateChannel(id, { is_active: isActive });
|
||||||
|
toast(isActive ? "Канал включён" : "Канал выключен", "success");
|
||||||
|
await load();
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
await load();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
load().catch(err => toast(err.message, "error"));
|
||||||
87
src/parser_bot/web/static/js/dashboard.js
Normal file
87
src/parser_bot/web/static/js/dashboard.js
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
import { api, toast, fmtRelative } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
|
||||||
|
import { getVertical, getSection, sectionBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||||
|
|
||||||
|
const V = getVertical();
|
||||||
|
const section = getSection();
|
||||||
|
const sBase = sectionBase();
|
||||||
|
const meta = VERTICAL_META[V];
|
||||||
|
|
||||||
|
function escape(s) {
|
||||||
|
if (s == null) return "";
|
||||||
|
return String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadStats() {
|
||||||
|
const [stats, llm, queue] = await Promise.all([
|
||||||
|
api.globalStats(),
|
||||||
|
api.llmStatus().catch(() => ({ enabled: false, ready: false, model: "—" })),
|
||||||
|
api.llmQueue().catch(() => ({ pending: null })),
|
||||||
|
]);
|
||||||
|
const grid = document.getElementById("stats");
|
||||||
|
const llmBadge = llm.enabled
|
||||||
|
? (llm.ready ? `<span class="badge ok">ready</span>` : `<span class="badge warn">загружается</span>`)
|
||||||
|
: `<span class="badge off">off</span>`;
|
||||||
|
const queueValue = queue.pending == null ? "—" : queue.pending.toLocaleString();
|
||||||
|
grid.innerHTML = `
|
||||||
|
<div class="card stat"><div class="label">Каналы</div><div class="value">${stats.channels_active} / ${stats.channels_total}</div></div>
|
||||||
|
<div class="card stat"><div class="label">Сообщений всего</div><div class="value">${stats.messages_total.toLocaleString()}</div></div>
|
||||||
|
<div class="card stat"><div class="label">Сообщений за 24ч</div><div class="value">${stats.messages_last_24h.toLocaleString()}</div></div>
|
||||||
|
<div class="card stat"><div class="label">🎯 Лидов всего</div><div class="value">${(stats.leads_total ?? 0).toLocaleString()}</div></div>
|
||||||
|
<div class="card stat"><div class="label">🎯 Лидов за 24ч</div><div class="value"><a href="${sBase}/messages.html?leads_only=true">${(stats.leads_last_24h ?? 0).toLocaleString()}</a></div></div>
|
||||||
|
<div class="card stat"><div class="label">⏳ В очереди ИИ</div><div class="value">${queueValue}</div></div>
|
||||||
|
<div class="card stat"><div class="label">Период опроса</div><div class="value">${stats.poll_interval_seconds}s</div></div>
|
||||||
|
<div class="card stat"><div class="label">Последний опрос</div><div class="value">${fmtRelative(stats.last_poll_at)}</div></div>
|
||||||
|
<div class="card stat"><div class="label">Локальный ИИ</div><div class="value" style="font-size:14px">${llmBadge}<div class="muted mono" style="font-size:11px;margin-top:4px">${escape(llm.model || "")}</div></div></div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadChannels() {
|
||||||
|
const channels = await api.listChannels();
|
||||||
|
const tbody = document.getElementById("channels-tbody");
|
||||||
|
if (!channels.length) {
|
||||||
|
tbody.innerHTML = `<tr><td colspan="5" class="empty">Каналов в этом подразделе пока нет — добавьте их на странице <a href="${sBase}/channels.html">Каналы</a></td></tr>`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const stats = await Promise.all(channels.map(c => api.channelStats(c.id).catch(() => null)));
|
||||||
|
tbody.innerHTML = channels.map((c, i) => {
|
||||||
|
const s = stats[i] || {};
|
||||||
|
return `
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<div><a href="${sBase}/messages.html?channel_id=${c.id}">${escape(c.title || c.identifier)}</a></div>
|
||||||
|
<div class="muted mono" style="font-size:12px">${escape(c.identifier)}</div>
|
||||||
|
</td>
|
||||||
|
<td>${(s.message_count ?? 0).toLocaleString()}</td>
|
||||||
|
<td>${fmtRelative(s.last_message_at)}</td>
|
||||||
|
<td>${fmtRelative(c.last_polled_at)}</td>
|
||||||
|
<td>${c.is_active ? '<span class="badge ok">on</span>' : '<span class="badge off">off</span>'}</td>
|
||||||
|
</tr>`;
|
||||||
|
}).join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("poll-all").addEventListener("click", async (e) => {
|
||||||
|
e.target.disabled = true;
|
||||||
|
try {
|
||||||
|
const res = await api.pollAll();
|
||||||
|
const scope = section ? `${meta.short} / ${section}` : meta.short;
|
||||||
|
toast(`В очереди ${res.queued ?? 0} каналов (${scope}) — опрос идёт в фоне`, "success");
|
||||||
|
await loadAll();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
e.target.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
async function loadAll() {
|
||||||
|
try {
|
||||||
|
document.getElementById("poll-all").hidden = !(await isAdmin());
|
||||||
|
await Promise.all([loadStats(), loadChannels()]);
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
loadAll();
|
||||||
|
setInterval(loadAll, 15000);
|
||||||
433
src/parser_bot/web/static/js/messages.js
Normal file
433
src/parser_bot/web/static/js/messages.js
Normal file
@@ -0,0 +1,433 @@
|
|||||||
|
import { api, toast, fmtDate } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
import { getVertical, getSection, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||||
|
|
||||||
|
const V = getVertical();
|
||||||
|
const section = getSection();
|
||||||
|
const meta = VERTICAL_META[V];
|
||||||
|
|
||||||
|
const state = {
|
||||||
|
offset: 0,
|
||||||
|
limit: 50,
|
||||||
|
channelId: null,
|
||||||
|
q: "",
|
||||||
|
realEstate: "",
|
||||||
|
hrKind: "",
|
||||||
|
hasPhone: false,
|
||||||
|
leadsOnly: false,
|
||||||
|
minConfidence: 0.5,
|
||||||
|
channels: [],
|
||||||
|
autorefresh: false,
|
||||||
|
timer: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
function escape(s) {
|
||||||
|
if (s == null) return "";
|
||||||
|
return String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
|
||||||
|
}
|
||||||
|
|
||||||
|
function highlight(text, q) {
|
||||||
|
if (!q || !text) return escape(text);
|
||||||
|
const escaped = escape(text);
|
||||||
|
const re = new RegExp(escape(q).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "gi");
|
||||||
|
return escaped.replace(re, m => `<mark style="background:#f1c40f33;color:inherit">${m}</mark>`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function channelTitle(id) {
|
||||||
|
const c = state.channels.find(c => c.id === id);
|
||||||
|
return c ? (c.title || c.identifier) : `#${id}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtSize(bytes) {
|
||||||
|
if (bytes == null) return "";
|
||||||
|
if (bytes < 1024) return `${bytes}B`;
|
||||||
|
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)}KB`;
|
||||||
|
return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const REAL_ESTATE_LABELS = { sale: "продажа", rent: "аренда", purchase: "покупка" };
|
||||||
|
const HR_KIND_LABELS = { vacancy: "вакансия", resume: "резюме", contact: "контакт" };
|
||||||
|
|
||||||
|
function senderContacts(m) {
|
||||||
|
const contacts = [];
|
||||||
|
if (m && m.post_url) {
|
||||||
|
contacts.push(`<a class="badge tg-link" href="${escape(m.post_url)}" target="_blank">📬 Открыть в Telegram</a>`);
|
||||||
|
}
|
||||||
|
if (m && m.sender_username) {
|
||||||
|
const u = m.sender_username.startsWith("@") ? m.sender_username : "@" + m.sender_username;
|
||||||
|
contacts.push(`<a class="badge tg" href="https://t.me/${escape(m.sender_username.replace(/^@/, ""))}" target="_blank">✉️ ${escape(u)}</a>`);
|
||||||
|
} else if (m && m.sender_name) {
|
||||||
|
contacts.push(`<span class="badge name">✍️ ${escape(m.sender_name)}</span>`);
|
||||||
|
}
|
||||||
|
const handles = (m && m.extracted && m.extracted.tg_handles) || [];
|
||||||
|
for (const h of handles) {
|
||||||
|
const bare = h.replace(/^@/, "");
|
||||||
|
contacts.push(`<a class="badge tg" href="https://t.me/${escape(bare)}" target="_blank">✉️ ${escape(h)}</a>`);
|
||||||
|
}
|
||||||
|
return contacts;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderReLead(lead, m) {
|
||||||
|
if (!lead || !lead.is_listing) return "";
|
||||||
|
const tone =
|
||||||
|
lead.confidence >= 0.7 ? "lead-strong" :
|
||||||
|
lead.confidence >= 0.4 ? "lead-medium" : "lead-weak";
|
||||||
|
const bits = [];
|
||||||
|
if (lead.kind) bits.push(REAL_ESTATE_LABELS[lead.kind] || lead.kind);
|
||||||
|
if (lead.property_type) bits.push(lead.property_type);
|
||||||
|
if (lead.rooms) bits.push(lead.rooms);
|
||||||
|
if (lead.area_m2) bits.push(`${lead.area_m2} м²`);
|
||||||
|
const priceBit = lead.price_text
|
||||||
|
|| (lead.price_value != null
|
||||||
|
? `${lead.price_value.toLocaleString()}${lead.currency ? " " + lead.currency : ""}`
|
||||||
|
: null);
|
||||||
|
if (priceBit) bits.push(priceBit);
|
||||||
|
else if (lead.currency) bits.push(lead.currency);
|
||||||
|
if (lead.location) bits.push(lead.location);
|
||||||
|
const facts = bits.length
|
||||||
|
? `<div class="lead-facts">${escape(bits.join(" · "))}</div>` : "";
|
||||||
|
const summary = lead.summary
|
||||||
|
? `<div class="lead-summary">${escape(lead.summary)}</div>` : "";
|
||||||
|
const contacts = [];
|
||||||
|
if (lead.contact_phone) {
|
||||||
|
contacts.push(`<a class="badge phone" href="tel:${escape(lead.contact_phone)}">📞 ${escape(lead.contact_phone)}</a>`);
|
||||||
|
}
|
||||||
|
if (lead.contact_name) {
|
||||||
|
contacts.push(`<span class="badge name">👤 ${escape(lead.contact_name)}</span>`);
|
||||||
|
}
|
||||||
|
contacts.push(...senderContacts(m));
|
||||||
|
return `
|
||||||
|
<div class="lead-card ${tone}">
|
||||||
|
<div class="lead-head">
|
||||||
|
<span class="badge lead">🎯 ЛИД · 🏠</span>
|
||||||
|
${facts}
|
||||||
|
<span class="lead-confidence">${(lead.confidence * 100).toFixed(0)}%</span>
|
||||||
|
</div>
|
||||||
|
${summary}
|
||||||
|
${contacts.length ? `<div class="message-tags">${contacts.join(" ")}</div>` : ""}
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderHrLead(lead, m) {
|
||||||
|
if (!lead || !lead.is_lead) return "";
|
||||||
|
const tone =
|
||||||
|
lead.confidence >= 0.7 ? "lead-strong" :
|
||||||
|
lead.confidence >= 0.4 ? "lead-medium" : "lead-weak";
|
||||||
|
const bits = [];
|
||||||
|
if (lead.kind) bits.push(HR_KIND_LABELS[lead.kind] || lead.kind);
|
||||||
|
if (lead.title) bits.push(lead.title);
|
||||||
|
if (lead.company) bits.push(lead.company);
|
||||||
|
if (lead.candidate_name) bits.push(lead.candidate_name);
|
||||||
|
if (lead.experience_years != null) bits.push(`${lead.experience_years}+ лет опыта`);
|
||||||
|
if (lead.employment_type) bits.push(lead.employment_type);
|
||||||
|
if (lead.remote === true) bits.push("удалёнка");
|
||||||
|
else if (lead.remote === false) bits.push("офис");
|
||||||
|
if (lead.location) bits.push(lead.location);
|
||||||
|
const salaryBit = lead.salary_text
|
||||||
|
|| (lead.salary_value != null
|
||||||
|
? `${lead.salary_value.toLocaleString()}${lead.currency ? " " + lead.currency : ""}`
|
||||||
|
: null);
|
||||||
|
if (salaryBit) bits.push(salaryBit);
|
||||||
|
else if (lead.currency) bits.push(lead.currency);
|
||||||
|
const facts = bits.length
|
||||||
|
? `<div class="lead-facts">${escape(bits.join(" · "))}</div>` : "";
|
||||||
|
const summary = lead.summary
|
||||||
|
? `<div class="lead-summary">${escape(lead.summary)}</div>` : "";
|
||||||
|
const skills = (lead.skills || []).slice(0, 12);
|
||||||
|
const skillsBlock = skills.length
|
||||||
|
? `<div class="message-tags">${skills.map(s => `<span class="badge">${escape(s)}</span>`).join(" ")}</div>`
|
||||||
|
: "";
|
||||||
|
const contacts = [];
|
||||||
|
if (lead.contact_phone) {
|
||||||
|
contacts.push(`<a class="badge phone" href="tel:${escape(lead.contact_phone)}">📞 ${escape(lead.contact_phone)}</a>`);
|
||||||
|
}
|
||||||
|
if (lead.contact_name) {
|
||||||
|
contacts.push(`<span class="badge name">👤 ${escape(lead.contact_name)}</span>`);
|
||||||
|
}
|
||||||
|
contacts.push(...senderContacts(m));
|
||||||
|
return `
|
||||||
|
<div class="lead-card ${tone}">
|
||||||
|
<div class="lead-head">
|
||||||
|
<span class="badge lead">🎯 ЛИД · 👥</span>
|
||||||
|
${facts}
|
||||||
|
<span class="lead-confidence">${(lead.confidence * 100).toFixed(0)}%</span>
|
||||||
|
</div>
|
||||||
|
${summary}
|
||||||
|
${skillsBlock}
|
||||||
|
${contacts.length ? `<div class="message-tags">${contacts.join(" ")}</div>` : ""}
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderExtracted(ex) {
|
||||||
|
if (!ex) return "";
|
||||||
|
const parts = [];
|
||||||
|
const re = ex.real_estate;
|
||||||
|
const showRegexRE =
|
||||||
|
V === "real_estate" && re && !(ex.lead && ex.lead.is_listing);
|
||||||
|
if (showRegexRE) {
|
||||||
|
const bits = [];
|
||||||
|
if (re.kind) bits.push(REAL_ESTATE_LABELS[re.kind] || re.kind);
|
||||||
|
if (re.property_type) bits.push(re.property_type);
|
||||||
|
if (re.rooms) bits.push(re.rooms);
|
||||||
|
if (re.area_m2) bits.push(`${re.area_m2} м²`);
|
||||||
|
if (re.price) bits.push(re.price);
|
||||||
|
if (bits.length) parts.push(`<span class="badge re">🏠 regex: ${escape(bits.join(" · "))}</span>`);
|
||||||
|
}
|
||||||
|
// Phones/names from regex are still useful even when there's a lead — show
|
||||||
|
// only those that aren't already inside the lead card.
|
||||||
|
const inLead = new Set();
|
||||||
|
const activeLead = V === "hr" ? ex.hr_lead : ex.lead;
|
||||||
|
if (activeLead) {
|
||||||
|
if (activeLead.contact_phone) inLead.add(activeLead.contact_phone);
|
||||||
|
if (activeLead.contact_name) inLead.add(activeLead.contact_name);
|
||||||
|
}
|
||||||
|
for (const p of ex.phones || []) {
|
||||||
|
if (inLead.has(p)) continue;
|
||||||
|
parts.push(`<a class="badge phone" href="tel:${escape(p)}">📞 ${escape(p)}</a>`);
|
||||||
|
}
|
||||||
|
for (const n of (ex.names || []).slice(0, 3)) {
|
||||||
|
if (inLead.has(n)) continue;
|
||||||
|
parts.push(`<span class="badge name">👤 ${escape(n)}</span>`);
|
||||||
|
}
|
||||||
|
if ((ex.names || []).length > 3) {
|
||||||
|
parts.push(`<span class="badge name muted">+${ex.names.length - 3}</span>`);
|
||||||
|
}
|
||||||
|
const leadShown = (V === "hr" && ex.hr_lead && ex.hr_lead.is_lead) ||
|
||||||
|
(V === "real_estate" && ex.lead && ex.lead.is_listing);
|
||||||
|
if (!leadShown) {
|
||||||
|
for (const h of (ex.tg_handles || [])) {
|
||||||
|
const bare = h.replace(/^@/, "");
|
||||||
|
parts.push(`<a class="badge tg" href="https://t.me/${escape(bare)}" target="_blank">✉️ ${escape(h)}</a>`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const tags = parts.length ? `<div class="message-tags">${parts.join(" ")}</div>` : "";
|
||||||
|
return tags;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderMedia(files) {
|
||||||
|
if (!files || !files.length) return "";
|
||||||
|
return `<div class="message-media">${files.map(f => {
|
||||||
|
if (f.skipped) {
|
||||||
|
const why = f.skipped === "too_large" ? "слишком большой" : f.skipped;
|
||||||
|
return `<div class="media-item media-skipped"><span class="badge warn">${escape(f.kind)}</span>
|
||||||
|
<span class="muted">${why}${f.size ? `, ${fmtSize(f.size)}` : ""}</span></div>`;
|
||||||
|
}
|
||||||
|
if (!f.url) return "";
|
||||||
|
if (f.kind === "photo" || f.kind === "sticker") {
|
||||||
|
return `<a href="${escape(f.url)}" target="_blank" data-action="lightbox" data-url="${escape(f.url)}">
|
||||||
|
<img class="media-thumb" src="${escape(f.url)}" loading="lazy" alt="" />
|
||||||
|
</a>`;
|
||||||
|
}
|
||||||
|
if (f.kind === "video") {
|
||||||
|
return `<video class="media-video" src="${escape(f.url)}" controls preload="metadata"></video>`;
|
||||||
|
}
|
||||||
|
if (f.kind === "audio") {
|
||||||
|
return `<audio src="${escape(f.url)}" controls preload="none" style="width:100%"></audio>`;
|
||||||
|
}
|
||||||
|
return `<a class="media-doc" href="${escape(f.url)}" target="_blank" download>
|
||||||
|
<span class="badge">${escape(f.kind)}</span>
|
||||||
|
<span>${escape(f.mime || "файл")}</span>
|
||||||
|
<span class="muted">${fmtSize(f.size)}</span>
|
||||||
|
</a>`;
|
||||||
|
}).join("")}</div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function readUrl() {
|
||||||
|
const params = new URLSearchParams(location.search);
|
||||||
|
if (params.has("channel_id")) state.channelId = Number(params.get("channel_id"));
|
||||||
|
if (params.has("q")) state.q = params.get("q");
|
||||||
|
if (params.has("real_estate")) state.realEstate = params.get("real_estate");
|
||||||
|
if (params.has("hr_kind")) state.hrKind = params.get("hr_kind");
|
||||||
|
if (params.get("has_phone") === "true") state.hasPhone = true;
|
||||||
|
if (params.get("leads_only") === "true") state.leadsOnly = true;
|
||||||
|
if (params.has("min_confidence")) state.minConfidence = Number(params.get("min_confidence"));
|
||||||
|
}
|
||||||
|
|
||||||
|
function syncControls() {
|
||||||
|
document.getElementById("channel-filter").value = state.channelId ?? "";
|
||||||
|
document.getElementById("search").value = state.q;
|
||||||
|
const reSel = document.getElementById("real-estate");
|
||||||
|
if (reSel) reSel.value = state.realEstate;
|
||||||
|
const hrSel = document.getElementById("hr-kind");
|
||||||
|
if (hrSel) hrSel.value = state.hrKind;
|
||||||
|
document.getElementById("has-phone").checked = state.hasPhone;
|
||||||
|
document.getElementById("leads-only").checked = state.leadsOnly;
|
||||||
|
document.getElementById("min-confidence").value = String(state.minConfidence);
|
||||||
|
document.getElementById("limit").value = state.limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadChannels() {
|
||||||
|
state.channels = await api.listChannels();
|
||||||
|
const sel = document.getElementById("channel-filter");
|
||||||
|
sel.innerHTML = `<option value="">Все каналы (${meta.short})</option>` + state.channels.map(c =>
|
||||||
|
`<option value="${c.id}">${escape(c.title || c.identifier)}</option>`
|
||||||
|
).join("");
|
||||||
|
syncControls();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadMessages() {
|
||||||
|
const list = document.getElementById("list");
|
||||||
|
list.innerHTML = `<div class="empty">Загрузка...</div>`;
|
||||||
|
try {
|
||||||
|
const msgs = await api.listMessages({
|
||||||
|
channelId: state.channelId,
|
||||||
|
q: state.q || undefined,
|
||||||
|
realEstate: state.realEstate || undefined,
|
||||||
|
hrKind: state.hrKind || undefined,
|
||||||
|
hasPhone: state.hasPhone || undefined,
|
||||||
|
leadsOnly: state.leadsOnly || undefined,
|
||||||
|
minConfidence: state.leadsOnly ? state.minConfidence : undefined,
|
||||||
|
limit: state.limit,
|
||||||
|
offset: state.offset,
|
||||||
|
});
|
||||||
|
if (!msgs.length) {
|
||||||
|
list.innerHTML = `<div class="empty">Сообщений нет</div>`;
|
||||||
|
} else {
|
||||||
|
list.innerHTML = msgs.map(m => `
|
||||||
|
<div class="message" data-id="${m.id}">
|
||||||
|
<div class="message-meta">
|
||||||
|
<a href="?channel_id=${m.channel_id}">${escape(channelTitle(m.channel_id))}</a>
|
||||||
|
<span>·</span>
|
||||||
|
<span>${fmtDate(m.date)}</span>
|
||||||
|
<span>·</span>
|
||||||
|
<span class="mono">#${m.tg_message_id}</span>
|
||||||
|
${m.group_size > 1 ? `<span class="badge">альбом · ${m.group_size}</span>` : (m.has_media ? '<span class="badge">media</span>' : '')}
|
||||||
|
${m.views != null ? `<span>👁 ${m.views}</span>` : ''}
|
||||||
|
${m.forwards ? `<span>↗ ${m.forwards}</span>` : ''}
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<a href="#" data-action="raw">json</a>
|
||||||
|
</div>
|
||||||
|
<div class="message-text">${m.text ? highlight(m.text, state.q) : '<span class="muted">(без текста)</span>'}</div>
|
||||||
|
${V === "hr"
|
||||||
|
? renderHrLead(m.extracted && m.extracted.hr_lead, m)
|
||||||
|
: renderReLead(m.extracted && m.extracted.lead, m)}
|
||||||
|
${renderExtracted(m.extracted)}
|
||||||
|
${renderMedia(m.media_files)}
|
||||||
|
</div>
|
||||||
|
`).join("");
|
||||||
|
}
|
||||||
|
document.getElementById("page-info").textContent =
|
||||||
|
`${state.offset + 1}–${state.offset + msgs.length}`;
|
||||||
|
document.getElementById("prev").disabled = state.offset === 0;
|
||||||
|
document.getElementById("next").disabled = msgs.length < state.limit;
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
list.innerHTML = `<div class="empty">Ошибка: ${escape(err.message)}</div>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("channel-filter").addEventListener("change", (e) => {
|
||||||
|
state.channelId = e.target.value ? Number(e.target.value) : null;
|
||||||
|
state.offset = 0;
|
||||||
|
loadMessages();
|
||||||
|
});
|
||||||
|
|
||||||
|
let searchTimer;
|
||||||
|
document.getElementById("search").addEventListener("input", (e) => {
|
||||||
|
clearTimeout(searchTimer);
|
||||||
|
searchTimer = setTimeout(() => {
|
||||||
|
state.q = e.target.value.trim();
|
||||||
|
state.offset = 0;
|
||||||
|
loadMessages();
|
||||||
|
}, 250);
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("limit").addEventListener("change", (e) => {
|
||||||
|
state.limit = Number(e.target.value);
|
||||||
|
state.offset = 0;
|
||||||
|
loadMessages();
|
||||||
|
});
|
||||||
|
|
||||||
|
const reSelEl = document.getElementById("real-estate");
|
||||||
|
if (reSelEl) {
|
||||||
|
reSelEl.addEventListener("change", (e) => {
|
||||||
|
state.realEstate = e.target.value;
|
||||||
|
state.offset = 0;
|
||||||
|
loadMessages();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const hrSelEl = document.getElementById("hr-kind");
|
||||||
|
if (hrSelEl) {
|
||||||
|
hrSelEl.addEventListener("change", (e) => {
|
||||||
|
state.hrKind = e.target.value;
|
||||||
|
state.offset = 0;
|
||||||
|
loadMessages();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("has-phone").addEventListener("change", (e) => {
|
||||||
|
state.hasPhone = e.target.checked;
|
||||||
|
state.offset = 0;
|
||||||
|
loadMessages();
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("leads-only").addEventListener("change", (e) => {
|
||||||
|
state.leadsOnly = e.target.checked;
|
||||||
|
state.offset = 0;
|
||||||
|
loadMessages();
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("min-confidence").addEventListener("change", (e) => {
|
||||||
|
state.minConfidence = Number(e.target.value);
|
||||||
|
if (state.leadsOnly) {
|
||||||
|
state.offset = 0;
|
||||||
|
loadMessages();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("refresh").addEventListener("click", loadMessages);
|
||||||
|
|
||||||
|
document.getElementById("prev").addEventListener("click", () => {
|
||||||
|
state.offset = Math.max(0, state.offset - state.limit);
|
||||||
|
loadMessages();
|
||||||
|
});
|
||||||
|
document.getElementById("next").addEventListener("click", () => {
|
||||||
|
state.offset += state.limit;
|
||||||
|
loadMessages();
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("autorefresh").addEventListener("change", (e) => {
|
||||||
|
state.autorefresh = e.target.checked;
|
||||||
|
if (state.timer) { clearInterval(state.timer); state.timer = null; }
|
||||||
|
if (state.autorefresh) state.timer = setInterval(loadMessages, 10000);
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("list").addEventListener("click", async (e) => {
|
||||||
|
const lightbox = e.target.closest("[data-action='lightbox']");
|
||||||
|
if (lightbox) {
|
||||||
|
e.preventDefault();
|
||||||
|
openLightbox(lightbox.dataset.url);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const a = e.target.closest("[data-action='raw']");
|
||||||
|
if (!a) return;
|
||||||
|
e.preventDefault();
|
||||||
|
const id = Number(a.closest(".message").dataset.id);
|
||||||
|
try {
|
||||||
|
const msg = await api.getMessage(id);
|
||||||
|
document.getElementById("raw-content").textContent = JSON.stringify(msg, null, 2);
|
||||||
|
document.getElementById("raw-dialog").showModal();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function openLightbox(url) {
|
||||||
|
let lb = document.getElementById("lightbox");
|
||||||
|
if (!lb) {
|
||||||
|
lb = document.createElement("div");
|
||||||
|
lb.id = "lightbox";
|
||||||
|
lb.addEventListener("click", () => lb.remove());
|
||||||
|
document.body.appendChild(lb);
|
||||||
|
}
|
||||||
|
lb.innerHTML = `<img src="${escape(url)}" alt="" />`;
|
||||||
|
}
|
||||||
|
document.getElementById("raw-close").addEventListener("click", () => {
|
||||||
|
document.getElementById("raw-dialog").close();
|
||||||
|
});
|
||||||
|
|
||||||
|
readUrl();
|
||||||
|
(async () => {
|
||||||
|
await loadChannels();
|
||||||
|
await loadMessages();
|
||||||
|
})();
|
||||||
25
src/parser_bot/web/static/js/nav-status.js
Normal file
25
src/parser_bot/web/static/js/nav-status.js
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
import { api } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
|
||||||
|
import { appBase } from "/api/monitoring-tg/static/js/vertical.js";
|
||||||
|
|
||||||
|
// "Telegram not authorized" banner. Only useful for admins — non-admin
|
||||||
|
// visitors can't open /auth.html anyway, so showing the banner would be
|
||||||
|
// noise (and the /auth/status call itself 404s for non-admins).
|
||||||
|
(async () => {
|
||||||
|
if (!(await isAdmin())) return;
|
||||||
|
try {
|
||||||
|
const status = await api.authStatus();
|
||||||
|
if (status.authorized) return;
|
||||||
|
const banner = document.createElement("div");
|
||||||
|
banner.className = "card";
|
||||||
|
banner.style.cssText =
|
||||||
|
"border-color: rgba(241, 196, 15, 0.5); background: rgba(241, 196, 15, 0.08); margin-bottom: 16px;";
|
||||||
|
banner.innerHTML = `
|
||||||
|
<strong>Telegram не авторизован.</strong>
|
||||||
|
Парсер не сможет ходить за сообщениями, пока вы не залогинитесь.
|
||||||
|
<a href="${appBase()}/auth.html?return=${encodeURIComponent(location.pathname)}">Открыть страницу авторизации →</a>
|
||||||
|
`;
|
||||||
|
const main = document.querySelector("main");
|
||||||
|
if (main) main.insertBefore(banner, main.firstChild);
|
||||||
|
} catch {}
|
||||||
|
})();
|
||||||
71
src/parser_bot/web/static/js/nav.js
Normal file
71
src/parser_bot/web/static/js/nav.js
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
import { api } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
// Import for side-effect: access.js hides .admin-link elements for non-admins.
|
||||||
|
import "/api/monitoring-tg/static/js/access.js";
|
||||||
|
import {
|
||||||
|
VERTICAL_META,
|
||||||
|
appBase,
|
||||||
|
getVertical,
|
||||||
|
getSection,
|
||||||
|
verticalBase,
|
||||||
|
sectionBase,
|
||||||
|
} from "/api/monitoring-tg/static/js/vertical.js";
|
||||||
|
|
||||||
|
const V = getVertical();
|
||||||
|
const section = getSection();
|
||||||
|
const meta = VERTICAL_META[V];
|
||||||
|
|
||||||
|
const titleEl = document.getElementById("page-title");
|
||||||
|
if (titleEl) {
|
||||||
|
titleEl.textContent = section
|
||||||
|
? `parser-tg-bot · ${meta.emoji} ${meta.short} · ${section}`
|
||||||
|
: `parser-tg-bot · ${meta.emoji} ${meta.short}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const navEl = document.getElementById("nav-section");
|
||||||
|
if (navEl) {
|
||||||
|
const here = location.pathname;
|
||||||
|
const active = (href) => here === href ? "active" : "";
|
||||||
|
const links = [];
|
||||||
|
|
||||||
|
// Up-link: chooser if we are inside a section, vertical-list otherwise.
|
||||||
|
if (section) {
|
||||||
|
links.push(`<a href="${verticalBase()}/">← ${meta.short} (подразделы)</a>`);
|
||||||
|
} else {
|
||||||
|
links.push(`<a href="${appBase()}/">← Разделы</a>`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (section) {
|
||||||
|
const sBase = sectionBase();
|
||||||
|
links.push(
|
||||||
|
`<a href="${sBase}/" class="${active(sBase + '/')}">Дашборд</a>`,
|
||||||
|
`<a href="${sBase}/channels.html" class="${active(sBase + '/channels.html')}">Каналы</a>`,
|
||||||
|
`<a href="${sBase}/messages.html" class="${active(sBase + '/messages.html')}">Сообщения</a>`,
|
||||||
|
`<a href="${sBase}/settings.html" class="admin-only ${active(sBase + '/settings.html')}">Настройки</a>`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
links.push(
|
||||||
|
`<a class="admin-login-link" href="${appBase()}/admin.html?return=${encodeURIComponent(location.pathname)}">Админ</a>`,
|
||||||
|
`<a class="admin-link" href="${appBase()}/auth.html">Авторизация</a>`,
|
||||||
|
`<a class="admin-link" href="${appBase()}/docs" target="_blank">API</a>`,
|
||||||
|
);
|
||||||
|
navEl.innerHTML = links.join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Best-effort: resolve section's display title from the API and update the
|
||||||
|
// page heading. Falls back to the raw slug if the network call fails.
|
||||||
|
const headingEl = document.getElementById("page-heading");
|
||||||
|
if (headingEl && section) {
|
||||||
|
api.listSections(V)
|
||||||
|
.then(sections => {
|
||||||
|
const s = sections.find(x => x.slug === section);
|
||||||
|
if (s) {
|
||||||
|
const baseText = headingEl.dataset.base || headingEl.textContent;
|
||||||
|
headingEl.dataset.base = baseText;
|
||||||
|
headingEl.textContent = `${baseText} · ${s.emoji ? s.emoji + " " : ""}${s.title}`;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
|
export { section, V, meta };
|
||||||
202
src/parser_bot/web/static/js/sections-list.js
Normal file
202
src/parser_bot/web/static/js/sections-list.js
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
|
||||||
|
import { getVertical, verticalBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||||
|
import { slugify } from "/api/monitoring-tg/static/js/slugify.js";
|
||||||
|
|
||||||
|
const V = getVertical();
|
||||||
|
const base = verticalBase(V);
|
||||||
|
const meta = VERTICAL_META[V];
|
||||||
|
let sectionsBySlug = new Map();
|
||||||
|
|
||||||
|
function escape(s) {
|
||||||
|
if (s == null) return "";
|
||||||
|
return String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function render() {
|
||||||
|
const grid = document.getElementById("sections-grid");
|
||||||
|
grid.innerHTML = `<div class="empty">Загрузка...</div>`;
|
||||||
|
try {
|
||||||
|
const admin = await isAdmin();
|
||||||
|
const sections = await api.listSections(V);
|
||||||
|
sectionsBySlug = new Map(sections.map(s => [s.slug, s]));
|
||||||
|
if (!sections.length) {
|
||||||
|
grid.innerHTML = `<div class="empty">Подразделов пока нет — нажми «+ Новый подраздел»</div>`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
grid.innerHTML = `<div class="sections-grid">${sections.map(s => `
|
||||||
|
<div class="card section-tile" data-slug="${escape(s.slug)}">
|
||||||
|
<a href="${base}/${encodeURIComponent(s.slug)}/" class="section-tile-link">
|
||||||
|
<div class="section-tile-head">
|
||||||
|
<span class="section-emoji">${escape(s.emoji || meta.emoji)}</span>
|
||||||
|
<span class="section-title">${escape(s.title)}</span>
|
||||||
|
</div>
|
||||||
|
<div class="section-stats">
|
||||||
|
<span title="Каналов (активных/всего)"><b>${s.channels_active}</b> / ${s.channels_total} каналов</span>
|
||||||
|
<span title="Сообщений всего">${s.messages_total.toLocaleString()} сообщ.</span>
|
||||||
|
<span title="🎯 Лидов">${s.leads_total.toLocaleString()} лидов</span>
|
||||||
|
</div>
|
||||||
|
${s.description ? `<div class="section-desc muted">${escape(s.description)}</div>` : ""}
|
||||||
|
${admin ? `<div class="section-code mono">Код: ${escape(s.access_code || "не задан")}</div>` : ""}
|
||||||
|
<div class="section-slug muted mono">${escape(V)} / ${escape(s.slug)}</div>
|
||||||
|
</a>
|
||||||
|
${admin ? `
|
||||||
|
<div class="row admin-only" style="justify-content:flex-end; gap:8px; margin-top:8px">
|
||||||
|
<button class="secondary" data-action="edit">Переименовать</button>
|
||||||
|
<button class="danger" data-action="delete">Удалить</button>
|
||||||
|
</div>
|
||||||
|
` : ""}
|
||||||
|
</div>
|
||||||
|
`).join("")}</div>`;
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
grid.innerHTML = `<div class="empty">Ошибка: ${escape(err.message)}</div>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Create-section dialog with auto-slug -------------------------------
|
||||||
|
|
||||||
|
const titleInput = document.getElementById("new-title");
|
||||||
|
const slugInput = document.getElementById("new-slug");
|
||||||
|
const slugPreview = document.getElementById("new-slug-preview");
|
||||||
|
const slugManualToggle = document.getElementById("new-slug-manual");
|
||||||
|
|
||||||
|
// Track whether the user has taken manual control of the slug. As soon as
|
||||||
|
// they touch the slug field directly, stop auto-syncing it.
|
||||||
|
let slugIsAuto = true;
|
||||||
|
|
||||||
|
function syncSlugFromTitle() {
|
||||||
|
if (!slugIsAuto) return;
|
||||||
|
const proposed = slugify(titleInput.value);
|
||||||
|
slugInput.value = proposed;
|
||||||
|
if (slugPreview) {
|
||||||
|
slugPreview.textContent = proposed || "(введите название)";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (titleInput) {
|
||||||
|
titleInput.addEventListener("input", syncSlugFromTitle);
|
||||||
|
}
|
||||||
|
if (slugInput) {
|
||||||
|
slugInput.addEventListener("input", () => { slugIsAuto = false; });
|
||||||
|
}
|
||||||
|
if (slugManualToggle) {
|
||||||
|
slugManualToggle.addEventListener("click", (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const hidden = slugInput.closest(".slug-row");
|
||||||
|
if (hidden) hidden.hidden = !hidden.hidden;
|
||||||
|
slugInput.focus();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function resetForm() {
|
||||||
|
document.getElementById("create-form").reset();
|
||||||
|
slugIsAuto = true;
|
||||||
|
if (slugPreview) slugPreview.textContent = "(введите название)";
|
||||||
|
if (slugInput) slugInput.value = "";
|
||||||
|
const hidden = slugInput?.closest(".slug-row");
|
||||||
|
if (hidden) hidden.hidden = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("open-create").addEventListener("click", () => {
|
||||||
|
resetForm();
|
||||||
|
document.getElementById("create-dialog").showModal();
|
||||||
|
setTimeout(() => titleInput?.focus(), 50);
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("create-cancel").addEventListener("click", () => {
|
||||||
|
document.getElementById("create-dialog").close();
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("edit-cancel").addEventListener("click", () => {
|
||||||
|
document.getElementById("edit-dialog").close();
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("create-form").addEventListener("submit", async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const title = titleInput.value.trim();
|
||||||
|
if (!title) return;
|
||||||
|
// Re-sync once more in case `input` didn't fire before submit (autofill).
|
||||||
|
if (slugIsAuto) syncSlugFromTitle();
|
||||||
|
const slug = slugInput.value.trim() || slugify(title);
|
||||||
|
if (!slug) {
|
||||||
|
toast("Не удалось сформировать slug — введите его вручную", "error");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const emoji = document.getElementById("new-emoji").value.trim() || null;
|
||||||
|
const accessCode = document.getElementById("new-access-code").value.trim();
|
||||||
|
if (accessCode.length < 3) {
|
||||||
|
toast("Код доступа должен быть не короче 3 символов", "error");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const description = document.getElementById("new-description").value.trim() || null;
|
||||||
|
try {
|
||||||
|
await api.createSection({ vertical: V, slug, title, emoji, description, accessCode });
|
||||||
|
toast(`Подраздел "${title}" создан`, "success");
|
||||||
|
document.getElementById("create-dialog").close();
|
||||||
|
resetForm();
|
||||||
|
await render();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("sections-grid").addEventListener("click", async (e) => {
|
||||||
|
const btn = e.target.closest("[data-action]");
|
||||||
|
if (!btn) return;
|
||||||
|
const tile = btn.closest(".section-tile");
|
||||||
|
const slug = tile.dataset.slug;
|
||||||
|
const action = btn.dataset.action;
|
||||||
|
if (action === "edit") {
|
||||||
|
const section = sectionsBySlug.get(slug);
|
||||||
|
if (!section) return;
|
||||||
|
document.getElementById("edit-slug").value = slug;
|
||||||
|
document.getElementById("edit-title").value = section.title || "";
|
||||||
|
document.getElementById("edit-emoji").value = section.emoji || "";
|
||||||
|
document.getElementById("edit-access-code").value = section.access_code || "";
|
||||||
|
document.getElementById("edit-description").value = section.description || "";
|
||||||
|
document.getElementById("edit-dialog").showModal();
|
||||||
|
setTimeout(() => document.getElementById("edit-title").focus(), 50);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (action !== "delete") return;
|
||||||
|
if (!confirm(`Удалить подраздел "${slug}"? Удалить можно только пустой подраздел (без каналов).`)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await api.deleteSection(V, slug);
|
||||||
|
toast(`Подраздел "${slug}" удалён`, "success");
|
||||||
|
await render();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("edit-form").addEventListener("submit", async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const slug = document.getElementById("edit-slug").value;
|
||||||
|
const title = document.getElementById("edit-title").value.trim();
|
||||||
|
const emoji = document.getElementById("edit-emoji").value.trim() || null;
|
||||||
|
const accessCode = document.getElementById("edit-access-code").value.trim();
|
||||||
|
const description = document.getElementById("edit-description").value.trim() || null;
|
||||||
|
if (!title) return;
|
||||||
|
if (accessCode.length < 3) {
|
||||||
|
toast("Код доступа должен быть не короче 3 символов", "error");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await api.updateSection(V, slug, {
|
||||||
|
title,
|
||||||
|
emoji,
|
||||||
|
description,
|
||||||
|
access_code: accessCode,
|
||||||
|
});
|
||||||
|
toast(`Подраздел "${title}" сохранён`, "success");
|
||||||
|
document.getElementById("edit-dialog").close();
|
||||||
|
await render();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
render();
|
||||||
118
src/parser_bot/web/static/js/settings.js
Normal file
118
src/parser_bot/web/static/js/settings.js
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
import { api, toast, fmtDate } from "/api/monitoring-tg/static/js/api.js";
|
||||||
|
import { getVertical, getSection, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||||
|
|
||||||
|
const V = getVertical();
|
||||||
|
const section = getSection();
|
||||||
|
const meta = VERTICAL_META[V];
|
||||||
|
|
||||||
|
// `level` decides which override layer the editor edits/saves/resets.
|
||||||
|
// "section" → store key llm_system_prompt:<vertical>:<section_slug>
|
||||||
|
// "vertical" → store key llm_system_prompt:<vertical>
|
||||||
|
// Effective resolution always goes section → vertical → default.
|
||||||
|
let level = section ? "section" : "vertical";
|
||||||
|
|
||||||
|
const levelEl = document.getElementById("prompt-level");
|
||||||
|
if (levelEl) {
|
||||||
|
if (!section) {
|
||||||
|
levelEl.value = "vertical";
|
||||||
|
levelEl.disabled = true;
|
||||||
|
} else {
|
||||||
|
levelEl.value = "section";
|
||||||
|
levelEl.addEventListener("change", async (e) => {
|
||||||
|
level = e.target.value;
|
||||||
|
await loadPrompt();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function levelScope() {
|
||||||
|
return level === "section"
|
||||||
|
? { vertical: V, section }
|
||||||
|
: { vertical: V, section: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadConfig() {
|
||||||
|
const res = await fetch("/api/monitoring-tg/api/v1/settings");
|
||||||
|
if (!res.ok) throw new Error(res.statusText);
|
||||||
|
const cfg = await res.json();
|
||||||
|
const stats = await api.globalStats();
|
||||||
|
|
||||||
|
const scopeLabel = section ? `${meta.short} / ${section}` : meta.short;
|
||||||
|
const rows = [
|
||||||
|
["Раздел", `${meta.emoji} ${scopeLabel}`],
|
||||||
|
["Период опроса", `${cfg.poll_interval_seconds}s`],
|
||||||
|
["Лимит истории за опрос", cfg.poll_history_limit],
|
||||||
|
["Telethon session", cfg.tg_session_path],
|
||||||
|
["Postgres host", `${cfg.postgres_host}:${cfg.postgres_port}/${cfg.postgres_db}`],
|
||||||
|
["API host", `${cfg.api_host}:${cfg.api_port}`],
|
||||||
|
[`Каналов в ${scopeLabel}`, `${stats.channels_active} активных / ${stats.channels_total}`],
|
||||||
|
[`Сообщений в ${scopeLabel}`, stats.messages_total.toLocaleString()],
|
||||||
|
["Последний опрос (scope)", fmtDate(stats.last_poll_at)],
|
||||||
|
];
|
||||||
|
document.getElementById("config-tbody").innerHTML = rows.map(([k, v]) =>
|
||||||
|
`<tr><td class="muted">${k}</td><td class="mono">${v ?? "—"}</td></tr>`
|
||||||
|
).join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("poll-all").addEventListener("click", async (e) => {
|
||||||
|
e.target.disabled = true;
|
||||||
|
try {
|
||||||
|
const res = await api.pollAll();
|
||||||
|
toast(`В очереди ${res.queued ?? 0} каналов — опрос идёт в фоне`, "success");
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
e.target.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
async function loadPrompt() {
|
||||||
|
const data = await api.llmPromptGet(levelScope());
|
||||||
|
const editor = document.getElementById("prompt-editor");
|
||||||
|
editor.value = data.prompt || "";
|
||||||
|
const status = document.getElementById("prompt-status");
|
||||||
|
const lengthEl = document.getElementById("prompt-length");
|
||||||
|
|
||||||
|
const map = {
|
||||||
|
section: ["override · подраздел", "ok"],
|
||||||
|
vertical: ["override · вертикаль", "ok"],
|
||||||
|
default: ["встроенный по умолчанию", "off"],
|
||||||
|
};
|
||||||
|
const [label, cls] = map[data.source] || ["—", "off"];
|
||||||
|
status.textContent = label;
|
||||||
|
status.className = `badge ${cls}`;
|
||||||
|
lengthEl.textContent = `${(data.prompt || "").length.toLocaleString()} символов`;
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("prompt-save").addEventListener("click", async (e) => {
|
||||||
|
const text = document.getElementById("prompt-editor").value;
|
||||||
|
e.target.disabled = true;
|
||||||
|
try {
|
||||||
|
await api.llmPromptSave(text, levelScope());
|
||||||
|
const where = level === "section" ? `${meta.short} / ${section}` : meta.short;
|
||||||
|
toast(`Промпт ${where} сохранён, применится в течение 5 секунд`, "success");
|
||||||
|
await loadPrompt();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
e.target.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById("prompt-reset").addEventListener("click", async (e) => {
|
||||||
|
const where = level === "section" ? `подраздела "${section}"` : `вертикали "${meta.short}"`;
|
||||||
|
if (!confirm(`Сбросить пользовательский промпт ${where} и вернуться к фоллбэку?`)) return;
|
||||||
|
e.target.disabled = true;
|
||||||
|
try {
|
||||||
|
await api.llmPromptReset(levelScope());
|
||||||
|
toast(`Промпт ${where} сброшен`, "success");
|
||||||
|
await loadPrompt();
|
||||||
|
} catch (err) {
|
||||||
|
toast(err.message, "error");
|
||||||
|
} finally {
|
||||||
|
e.target.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
loadConfig().catch(err => toast(err.message, "error"));
|
||||||
|
loadPrompt().catch(err => toast(err.message, "error"));
|
||||||
22
src/parser_bot/web/static/js/slugify.js
Normal file
22
src/parser_bot/web/static/js/slugify.js
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
// URL-safe slug from arbitrary text. Cyrillic → Latin so titles like
|
||||||
|
// "Дубай Marina" become "dubai-marina" without forcing the user to type
|
||||||
|
// a slug by hand.
|
||||||
|
|
||||||
|
const RU_TO_LAT = {
|
||||||
|
а: "a", б: "b", в: "v", г: "g", д: "d", е: "e", ё: "yo", ж: "zh",
|
||||||
|
з: "z", и: "i", й: "y", к: "k", л: "l", м: "m", н: "n", о: "o",
|
||||||
|
п: "p", р: "r", с: "s", т: "t", у: "u", ф: "f", х: "h", ц: "ts",
|
||||||
|
ч: "ch", ш: "sh", щ: "sch", ъ: "", ы: "y", ь: "", э: "e", ю: "yu",
|
||||||
|
я: "ya",
|
||||||
|
};
|
||||||
|
|
||||||
|
export function slugify(text) {
|
||||||
|
return (text || "")
|
||||||
|
.toLowerCase()
|
||||||
|
.split("")
|
||||||
|
.map(c => RU_TO_LAT[c] ?? c)
|
||||||
|
.join("")
|
||||||
|
.replace(/[^a-z0-9]+/g, "-")
|
||||||
|
.replace(/^-+|-+$/g, "")
|
||||||
|
.slice(0, 64);
|
||||||
|
}
|
||||||
76
src/parser_bot/web/static/js/vertical.js
Normal file
76
src/parser_bot/web/static/js/vertical.js
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
const APP_BASE = "/api/monitoring-tg";
|
||||||
|
|
||||||
|
// Detect the current scope from the URL path.
|
||||||
|
//
|
||||||
|
// / → vertical=null, section=null
|
||||||
|
// /real-estate/ → vertical=real_estate, section=null (section chooser)
|
||||||
|
// /real-estate/dubai/ → vertical=real_estate, section=dubai
|
||||||
|
// /real-estate/dubai/channels.html → same
|
||||||
|
// /hr/ → vertical=hr, section=null
|
||||||
|
// /hr/it/settings.html → vertical=hr, section=it
|
||||||
|
//
|
||||||
|
// Section slug comes from URL path[2] and is opaque (created via UI). The
|
||||||
|
// frontend treats it as a string and passes it to the API; the backend
|
||||||
|
// resolves slug→Section row at query time.
|
||||||
|
|
||||||
|
function _segments() {
|
||||||
|
const segments = location.pathname.split("/").filter(Boolean);
|
||||||
|
const base = APP_BASE.split("/").filter(Boolean);
|
||||||
|
if (base.every((part, idx) => segments[idx] === part)) {
|
||||||
|
return segments.slice(base.length);
|
||||||
|
}
|
||||||
|
return segments;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getVerticalSlug() {
|
||||||
|
const seg = (_segments()[0] || "").toLowerCase();
|
||||||
|
if (seg === "hr") return "hr";
|
||||||
|
if (seg === "real-estate") return "real-estate";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getVertical() {
|
||||||
|
const slug = getVerticalSlug();
|
||||||
|
if (slug === "hr") return "hr";
|
||||||
|
if (slug === "real-estate") return "real_estate";
|
||||||
|
return "real_estate"; // harmless default for section-less pages
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getSection() {
|
||||||
|
const segs = _segments();
|
||||||
|
// Only treat segment[1] as a section slug when segment[0] is a known vertical.
|
||||||
|
if (!getVerticalSlug()) return null;
|
||||||
|
const candidate = segs[1];
|
||||||
|
if (!candidate || candidate.endsWith(".html")) return null;
|
||||||
|
return candidate.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
export const VERTICAL_META = {
|
||||||
|
real_estate: {
|
||||||
|
slug: "real-estate",
|
||||||
|
title: "Недвижимость",
|
||||||
|
short: "Недвижимость",
|
||||||
|
emoji: "🏠",
|
||||||
|
leadLabel: "Объявление",
|
||||||
|
},
|
||||||
|
hr: {
|
||||||
|
slug: "hr",
|
||||||
|
title: "HR / Кадры",
|
||||||
|
short: "HR",
|
||||||
|
emoji: "👥",
|
||||||
|
leadLabel: "HR-лид",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export function appBase() {
|
||||||
|
return APP_BASE;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function verticalBase(vertical = getVertical()) {
|
||||||
|
return `${APP_BASE}/${VERTICAL_META[vertical].slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function sectionBase(vertical = getVertical(), section = getSection()) {
|
||||||
|
const v = verticalBase(vertical);
|
||||||
|
return section ? `${v}/${section}` : v;
|
||||||
|
}
|
||||||
99
src/parser_bot/web/static/real-estate/index.html
Normal file
99
src/parser_bot/web/static/real-estate/index.html
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>🏠 Недвижимость — подразделы</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot · 🏠 Недвижимость</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<div class="row">
|
||||||
|
<h2>Подразделы недвижимости</h2>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<button id="open-create">+ Новый подраздел</button>
|
||||||
|
</div>
|
||||||
|
<p class="muted">
|
||||||
|
Каждый подраздел — это собственный набор каналов, своя статистика и свой
|
||||||
|
LLM-промпт (с фоллбэком на промпт вертикали). Например: Дубай, Москва,
|
||||||
|
Сочи, коммерческая недвижимость.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div id="sections-grid"></div>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<dialog id="create-dialog">
|
||||||
|
<h3 style="margin-top:0">Новый подраздел</h3>
|
||||||
|
<form id="create-form">
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Название</span>
|
||||||
|
<input type="text" id="new-title" required placeholder="Дубай" style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<div class="row" style="gap:8px; margin-bottom:8px; font-size:12px">
|
||||||
|
<span style="min-width:120px" class="muted">URL-адрес</span>
|
||||||
|
<span class="muted mono">/real-estate/<span id="new-slug-preview">(введите название)</span>/</span>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<a href="#" id="new-slug-manual" class="muted">изменить вручную</a>
|
||||||
|
</div>
|
||||||
|
<label class="row slug-row" style="gap:8px; margin-bottom:8px" hidden>
|
||||||
|
<span style="min-width:120px" class="muted">Slug</span>
|
||||||
|
<input type="text" id="new-slug" pattern="[a-z0-9][a-z0-9_-]*[a-z0-9]?"
|
||||||
|
placeholder="dubai" style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Иконка</span>
|
||||||
|
<input type="text" id="new-emoji" maxlength="4" placeholder="🌴" style="width:80px" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Код доступа</span>
|
||||||
|
<input type="text" id="new-access-code" required minlength="3"
|
||||||
|
autocomplete="new-password" style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
|
||||||
|
<span style="min-width:120px" class="muted">Описание</span>
|
||||||
|
<textarea id="new-description" rows="3" style="flex:1"></textarea>
|
||||||
|
</label>
|
||||||
|
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
|
||||||
|
<button type="button" id="create-cancel" class="secondary">Отмена</button>
|
||||||
|
<button type="submit">Создать</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<dialog id="edit-dialog">
|
||||||
|
<h3 style="margin-top:0">Редактировать подраздел</h3>
|
||||||
|
<form id="edit-form">
|
||||||
|
<input type="hidden" id="edit-slug" />
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Название</span>
|
||||||
|
<input type="text" id="edit-title" required style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Иконка</span>
|
||||||
|
<input type="text" id="edit-emoji" maxlength="4" style="width:80px" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||||
|
<span style="min-width:120px" class="muted">Код доступа</span>
|
||||||
|
<input type="text" id="edit-access-code" required minlength="3"
|
||||||
|
autocomplete="new-password" style="flex:1" />
|
||||||
|
</label>
|
||||||
|
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
|
||||||
|
<span style="min-width:120px" class="muted">Описание</span>
|
||||||
|
<textarea id="edit-description" rows="3" style="flex:1"></textarea>
|
||||||
|
</label>
|
||||||
|
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
|
||||||
|
<button type="button" id="edit-cancel" class="secondary">Отмена</button>
|
||||||
|
<button type="submit">Сохранить</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/sections-list.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
48
src/parser_bot/web/static/real-estate/section/channels.html
Normal file
48
src/parser_bot/web/static/real-estate/section/channels.html
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>🏠 Недвижимость · Каналы — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<h2 id="page-heading">Каналы подраздела</h2>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:24px">
|
||||||
|
<form id="add-form" class="row">
|
||||||
|
<input type="text" id="identifier" placeholder="@channel или https://t.me/..." required style="flex:1; min-width:280px" />
|
||||||
|
<button type="submit">Добавить канал</button>
|
||||||
|
</form>
|
||||||
|
<div class="muted" style="margin-top:8px; font-size:12px">
|
||||||
|
Канал будет привязан к текущему подразделу.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ID</th>
|
||||||
|
<th>Канал</th>
|
||||||
|
<th>Telegram ID</th>
|
||||||
|
<th>Сообщ.</th>
|
||||||
|
<th>Последний опрос</th>
|
||||||
|
<th>Статус</th>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="tbody"></tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/channels.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
43
src/parser_bot/web/static/real-estate/section/index.html
Normal file
43
src/parser_bot/web/static/real-estate/section/index.html
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>🏠 Недвижимость · Дашборд — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<div class="row">
|
||||||
|
<h2 id="page-heading">Дашборд</h2>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<button id="poll-all">Опросить все каналы подраздела</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="stats-grid" id="stats"></div>
|
||||||
|
|
||||||
|
<h3>Каналы подраздела</h3>
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Канал</th>
|
||||||
|
<th>Сообщений</th>
|
||||||
|
<th>Последнее сообщение</th>
|
||||||
|
<th>Последний опрос</th>
|
||||||
|
<th>Статус</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="channels-tbody"></tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/dashboard.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
78
src/parser_bot/web/static/real-estate/section/messages.html
Normal file
78
src/parser_bot/web/static/real-estate/section/messages.html
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>🏠 Недвижимость · Сообщения — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<h2 id="page-heading">Сообщения подраздела</h2>
|
||||||
|
|
||||||
|
<div class="toolbar card">
|
||||||
|
<select id="channel-filter">
|
||||||
|
<option value="">Все каналы подраздела</option>
|
||||||
|
</select>
|
||||||
|
<input type="search" id="search" placeholder="Поиск по тексту..." />
|
||||||
|
<select id="real-estate">
|
||||||
|
<option value="">Любая тема</option>
|
||||||
|
<option value="any">🏠 Недвижимость (любая)</option>
|
||||||
|
<option value="sale">🏠 Продажа</option>
|
||||||
|
<option value="rent">🏠 Аренда</option>
|
||||||
|
<option value="purchase">🏠 Покупка</option>
|
||||||
|
</select>
|
||||||
|
<label class="row" style="gap:6px">
|
||||||
|
<input type="checkbox" id="leads-only" />
|
||||||
|
<span class="muted">🎯 Только лиды (ИИ)</span>
|
||||||
|
</label>
|
||||||
|
<select id="min-confidence" title="Минимальная уверенность ИИ">
|
||||||
|
<option value="0.3">0.3+</option>
|
||||||
|
<option value="0.5" selected>0.5+</option>
|
||||||
|
<option value="0.7">0.7+</option>
|
||||||
|
<option value="0.9">0.9+</option>
|
||||||
|
</select>
|
||||||
|
<label class="row" style="gap:6px">
|
||||||
|
<input type="checkbox" id="has-phone" />
|
||||||
|
<span class="muted">📞 С телефоном</span>
|
||||||
|
</label>
|
||||||
|
<select id="limit">
|
||||||
|
<option value="25">25</option>
|
||||||
|
<option value="50" selected>50</option>
|
||||||
|
<option value="100">100</option>
|
||||||
|
<option value="200">200</option>
|
||||||
|
</select>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<label class="row" style="gap:6px">
|
||||||
|
<input type="checkbox" id="autorefresh" />
|
||||||
|
<span class="muted">Автообновление</span>
|
||||||
|
</label>
|
||||||
|
<button id="refresh" class="secondary">Обновить</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" id="list"></div>
|
||||||
|
|
||||||
|
<div class="pagination">
|
||||||
|
<button id="prev" class="secondary">← Назад</button>
|
||||||
|
<span class="muted" id="page-info" style="align-self:center"></span>
|
||||||
|
<button id="next" class="secondary">Вперёд →</button>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<dialog id="raw-dialog">
|
||||||
|
<h3 style="margin-top:0">Сообщение</h3>
|
||||||
|
<pre id="raw-content"></pre>
|
||||||
|
<div class="row" style="justify-content:flex-end; margin-top:12px">
|
||||||
|
<button class="secondary" id="raw-close">Закрыть</button>
|
||||||
|
</div>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/messages.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
66
src/parser_bot/web/static/real-estate/section/settings.html
Normal file
66
src/parser_bot/web/static/real-estate/section/settings.html
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="ru">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>🏠 Недвижимость · Настройки — parser-tg-bot</title>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1 id="page-title">parser-tg-bot</h1>
|
||||||
|
<nav id="nav-section"></nav>
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<h2 id="page-heading">Настройки подраздела</h2>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:24px">
|
||||||
|
<h3 style="margin-top:0">Текущая конфигурация</h3>
|
||||||
|
<table>
|
||||||
|
<tbody id="config-tbody">
|
||||||
|
<tr><td colspan="2" class="empty">Загрузка...</td></tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<div class="muted" style="font-size:12px; margin-top:12px">
|
||||||
|
Параметры задаются через переменные окружения (<span class="mono">.env</span>).
|
||||||
|
Для изменения отредактируйте <span class="mono">.env</span> и перезапустите контейнер:
|
||||||
|
<span class="mono">docker compose restart app</span>.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:24px">
|
||||||
|
<h3 style="margin-top:0">Действия</h3>
|
||||||
|
<div class="row">
|
||||||
|
<button id="poll-all">Опросить все каналы подраздела сейчас</button>
|
||||||
|
<a href="/api/monitoring-tg/docs" target="_blank" class="badge">OpenAPI / Swagger</a>
|
||||||
|
<a href="/api/monitoring-tg/healthz" target="_blank" class="badge">Health check</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:24px">
|
||||||
|
<h3 style="margin-top:0">🤖 Промпт ИИ</h3>
|
||||||
|
<div class="row" style="margin-bottom:8px">
|
||||||
|
<span class="badge" id="prompt-status">—</span>
|
||||||
|
<span class="muted" id="prompt-length"></span>
|
||||||
|
<div class="spacer"></div>
|
||||||
|
<select id="prompt-level" title="Уровень редактирования промпта">
|
||||||
|
<option value="section" selected>Промпт подраздела</option>
|
||||||
|
<option value="vertical">Промпт вертикали</option>
|
||||||
|
</select>
|
||||||
|
<button id="prompt-reset" class="secondary">Сбросить уровень</button>
|
||||||
|
<button id="prompt-save">Сохранить</button>
|
||||||
|
</div>
|
||||||
|
<textarea id="prompt-editor" rows="22"
|
||||||
|
style="width:100%; font-family:ui-monospace, SFMono-Regular, Menlo, monospace; font-size:12px"></textarea>
|
||||||
|
<div class="muted" style="font-size:12px; margin-top:8px">
|
||||||
|
Каскад: <strong>section → vertical → default</strong>. Если промпта на
|
||||||
|
уровне подраздела нет, используется промпт вертикали; если и его нет —
|
||||||
|
встроенный по умолчанию. Сохранение применится в течение ~5 сек.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||||
|
<script type="module" src="/api/monitoring-tg/static/js/settings.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user