Add monitoring TG service
This commit is contained in:
21
.dockerignore
Normal file
21
.dockerignore
Normal file
@@ -0,0 +1,21 @@
|
||||
.git/
|
||||
.gitignore
|
||||
.gitea/
|
||||
.env
|
||||
.venv/
|
||||
venv/
|
||||
__pycache__/
|
||||
**/__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.egg-info/
|
||||
.pytest_cache/
|
||||
.mypy_cache/
|
||||
.ruff_cache/
|
||||
.idea/
|
||||
.vscode/
|
||||
.claude/
|
||||
.DS_Store
|
||||
data/
|
||||
*.session
|
||||
*.session-journal
|
||||
54
.env.example
Normal file
54
.env.example
Normal file
@@ -0,0 +1,54 @@
|
||||
# Telegram MTProto credentials — get from https://my.telegram.org
|
||||
TG_API_ID=
|
||||
TG_API_HASH=
|
||||
TG_PHONE=
|
||||
|
||||
# --- ONE OF THE TWO BELOW IS REQUIRED ---
|
||||
# Preferred (no volumes, k8s-friendly): get the string by running
|
||||
# docker compose run --rm -it app python -m parser_bot.auth
|
||||
# It prints `TG_SESSION_STRING=...` — paste that line here.
|
||||
TG_SESSION_STRING=
|
||||
|
||||
# Fallback (file-based): only used if TG_SESSION_STRING is empty.
|
||||
# Requires mounting ./data/session as a volume.
|
||||
TG_SESSION_PATH=/data/session/parser.session
|
||||
|
||||
# Postgres
|
||||
POSTGRES_USER=parser
|
||||
POSTGRES_PASSWORD=parser
|
||||
POSTGRES_DB=parser
|
||||
POSTGRES_HOST=db
|
||||
POSTGRES_PORT=5432
|
||||
|
||||
# Polling
|
||||
POLL_INTERVAL_SECONDS=60
|
||||
POLL_HISTORY_LIMIT=50
|
||||
|
||||
# API
|
||||
API_HOST=0.0.0.0
|
||||
API_PORT=8000
|
||||
|
||||
# Media (downloaded photos / small videos / docs from parsed messages)
|
||||
MEDIA_DIR=/data/media
|
||||
MEDIA_MAX_BYTES=20971520
|
||||
|
||||
# Local LLM (Ollama) — runs Qwen 2.5 7B Q4 on CPU. Set LLM_ENABLED=false to disable.
|
||||
LLM_ENABLED=true
|
||||
LLM_BASE_URL=http://ollama:11434
|
||||
LLM_MODEL=qwen2.5:7b-instruct-q4_K_M
|
||||
LLM_TIMEOUT_SECONDS=120
|
||||
LLM_MIN_TEXT_LENGTH=20
|
||||
# How often the background classifier wakes up and how many messages it
|
||||
# processes per tick. With 5/20s ≈ 900 messages/hour at ~3-6s per call.
|
||||
LLM_CLASSIFY_INTERVAL_SECONDS=20
|
||||
LLM_CLASSIFY_BATCH_SIZE=5
|
||||
|
||||
# Admin allowlist for /auth.html, /docs, /openapi.json, /redoc and the
|
||||
# /api/v1/auth/* endpoints. Comma-separated list of client IPs.
|
||||
# Empty = no restriction (everyone is admin) — convenient for local dev.
|
||||
# Example: ADMIN_ALLOWED_IPS=89.110.109.221,127.0.0.1
|
||||
ADMIN_ALLOWED_IPS=
|
||||
# Honor X-Forwarded-For / X-Real-IP from a reverse proxy (Docker port-
|
||||
# forward, nginx, traefik) when resolving the client IP for the allowlist.
|
||||
TRUST_PROXY_HEADERS=true
|
||||
|
||||
58
.gitea/workflows/deploy.yaml
Normal file
58
.gitea/workflows/deploy.yaml
Normal file
@@ -0,0 +1,58 @@
|
||||
name: Build and Deploy
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
env:
|
||||
INTERNAL_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
|
||||
NODE_REGISTRY: localhost:30300
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Docker CLI
|
||||
run: |
|
||||
curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-27.5.1.tgz \
|
||||
| tar xz --strip-components=1 -C /usr/local/bin docker/docker
|
||||
docker version
|
||||
|
||||
- name: Install kubectl
|
||||
run: |
|
||||
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
||||
chmod +x kubectl
|
||||
mv kubectl /usr/local/bin/
|
||||
kubectl version --client
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
run: |
|
||||
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
|
||||
docker login ${{ env.INTERNAL_REGISTRY }} \
|
||||
-u ${{ secrets.REGISTRY_USERNAME }} --password-stdin
|
||||
|
||||
- name: Build and push server
|
||||
run: |
|
||||
docker build -f Dockerfile \
|
||||
-t ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }} \
|
||||
-t ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:latest \
|
||||
.
|
||||
docker push ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }}
|
||||
docker push ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:latest
|
||||
|
||||
- name: Deploy to Kubernetes
|
||||
env:
|
||||
KUBECONFIG: /kubeconfig/config
|
||||
run: |
|
||||
kubectl apply -f k8s/namespace.yaml
|
||||
kubectl apply -f k8s/secrets.yaml
|
||||
kubectl apply -f k8s/configmap.yaml
|
||||
kubectl apply -f k8s/postgres.yaml
|
||||
kubectl apply -f k8s/server-deployment.yaml
|
||||
kubectl apply -f k8s/server-service.yaml
|
||||
kubectl -n monitoring-tg set image deployment/monitoring-tg-server \
|
||||
monitoring-tg-server=${{ env.NODE_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }}
|
||||
kubectl -n monitoring-tg rollout status deployment/monitoring-tg-server --timeout=180s
|
||||
14
.gitignore
vendored
Normal file
14
.gitignore
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.egg-info/
|
||||
.venv/
|
||||
venv/
|
||||
.env
|
||||
*.session
|
||||
*.session-journal
|
||||
.pytest_cache/
|
||||
.mypy_cache/
|
||||
.ruff_cache/
|
||||
.idea/
|
||||
.vscode/
|
||||
data/
|
||||
28
Dockerfile
Normal file
28
Dockerfile
Normal file
@@ -0,0 +1,28 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY pyproject.toml ./
|
||||
COPY src ./src
|
||||
COPY alembic.ini ./
|
||||
COPY alembic ./alembic
|
||||
|
||||
RUN pip install --upgrade pip && pip install -e .
|
||||
|
||||
RUN mkdir -p /data/session /data/media
|
||||
|
||||
COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/entrypoint.sh
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
|
||||
CMD ["python", "-m", "parser_bot.main"]
|
||||
123
README.md
Normal file
123
README.md
Normal file
@@ -0,0 +1,123 @@
|
||||
# parser-tg-bot
|
||||
|
||||
Парсер публичных Telegram-каналов на Telethon (MTProto). Сохраняет сообщения в Postgres,
|
||||
управляется через REST API. Период опроса настраивается через `.env`. На следующем шаге
|
||||
легко перевести на realtime через `events.NewMessage`.
|
||||
|
||||
## Стек
|
||||
|
||||
- Python 3.11, Telethon, FastAPI, SQLAlchemy 2 (async) + Alembic, APScheduler, Postgres 16
|
||||
|
||||
## Структура
|
||||
|
||||
```text
|
||||
src/parser_bot/
|
||||
├── api/ # FastAPI роуты + Pydantic-схемы
|
||||
├── db/ # SQLAlchemy модели + сессии
|
||||
├── scheduler/ # APScheduler-воркер периодического опроса
|
||||
├── telegram/ # Telethon-клиент (resolve, fetch)
|
||||
├── web/static/ # SPA-странички (HTML/CSS/JS, без бандлера)
|
||||
├── config.py # pydantic-settings
|
||||
└── main.py # FastAPI lifespan + uvicorn
|
||||
alembic/ # миграции
|
||||
```
|
||||
|
||||
## Первый запуск (локально, через Docker)
|
||||
|
||||
1. Получить `api_id` и `api_hash` на [my.telegram.org](https://my.telegram.org) → API development tools.
|
||||
2. Скопировать `.env.example` в `.env` и заполнить `TG_API_ID`, `TG_API_HASH`, `TG_PHONE`.
|
||||
3. Поднять Postgres + накатить миграции:
|
||||
|
||||
```bash
|
||||
docker compose up -d db
|
||||
docker compose run --rm app alembic upgrade head
|
||||
```
|
||||
|
||||
4. Запуск:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
docker compose logs app --tail=50
|
||||
```
|
||||
|
||||
5. **Авторизация Telegram** — открыть [http://localhost:8000/auth.html](http://localhost:8000/auth.html)
|
||||
и нажать «Отправить код». Telegram пришлёт код на номер из `TG_PHONE` →
|
||||
ввести код (и 2FA-пароль, если включён). Готово, парсер начнёт опрос.
|
||||
|
||||
Сессия сохраняется в `./data/session/parser.session` — рестарты её переиспользуют,
|
||||
повторно входить не нужно.
|
||||
|
||||
### Админ-доступ и коды подразделов
|
||||
|
||||
- `ADMIN_PASSWORD` — дополнительный пароль для админских функций. Если не задан,
|
||||
остаётся прежний режим: доступ определяется только `ADMIN_ALLOWED_IPS`.
|
||||
- [http://localhost:8000/admin.html](http://localhost:8000/admin.html) — вход по
|
||||
админ-паролю. После входа доступны удаление и редактирование подразделов,
|
||||
просмотр их кодов, управление каналами, ручной опрос, промпты, авторизация
|
||||
Telegram и Swagger.
|
||||
- При создании подраздела обязательно задаётся `Код доступа`. Пользователь вводит
|
||||
этот код при первом открытии данных подраздела; после входа он может добавлять
|
||||
каналы в этот подраздел. Админ видит код в списке подразделов.
|
||||
|
||||
### Прод-вариант: без UI и без volume (k8s-friendly)
|
||||
|
||||
Сделай интерактивный логин **один раз** на dev-машине и получи опаковую строку:
|
||||
|
||||
```bash
|
||||
docker compose run --rm -it app python -m parser_bot.auth
|
||||
```
|
||||
|
||||
Скрипт напечатает строку вида `TG_SESSION_STRING=1AbcD...`. Положи её в
|
||||
`.env` или k8s Secret — после этого приложение поднимается без UI и без
|
||||
монтирования сессионного файла:
|
||||
|
||||
```ini
|
||||
TG_SESSION_STRING=1AbcDef... # вместо TG_SESSION_PATH/volume
|
||||
```
|
||||
|
||||
> ⚠️ **`ApiIdPublishedFloodError`** — Telegram заблокировал твою пару
|
||||
> `api_id`/`api_hash` (попала в публичный доступ). Создай **новое** приложение
|
||||
> на [my.telegram.org](https://my.telegram.org) и не публикуй креды нигде.
|
||||
> Старый `api_id` восстановить нельзя.
|
||||
|
||||
## UI
|
||||
|
||||
После запуска доступны страницы:
|
||||
|
||||
- [Дашборд](http://localhost:8000/) — общая статистика, топ каналов, кнопка опросить всех
|
||||
- [Каналы](http://localhost:8000/channels.html) — добавить / удалить / включить-выключить / опросить вручную
|
||||
- [Сообщения](http://localhost:8000/messages.html) — фильтр по каналу, поиск по тексту, пагинация, raw JSON
|
||||
- [Настройки](http://localhost:8000/settings.html) — текущая конфигурация и подсказки
|
||||
- [Авторизация](http://localhost:8000/auth.html) — веб-логин в Telegram (код + 2FA)
|
||||
- [Swagger UI](http://localhost:8000/docs) — интерактивный API
|
||||
|
||||
Глубокая ссылка `messages.html?channel_id=42` открывает ленту конкретного канала.
|
||||
|
||||
## API
|
||||
|
||||
- `GET /healthz` — health check
|
||||
- `GET /api/v1/auth/status` — авторизован ли клиент
|
||||
- `POST /api/v1/auth/send-code` — отправить код на `TG_PHONE`
|
||||
- `POST /api/v1/auth/submit-code` `{"code": "12345"}` — подтвердить код
|
||||
- `POST /api/v1/auth/submit-password` `{"password": "..."}` — 2FA-пароль
|
||||
- `POST /api/v1/auth/logout` — завершить сессию
|
||||
- `GET /api/v1/stats` — глобальные счётчики
|
||||
- `GET /api/v1/settings` — read-only вид конфигурации
|
||||
- `GET /api/v1/channels` — список каналов
|
||||
- `POST /api/v1/channels` `{"identifier": "@durov"}` — добавить
|
||||
- `GET /api/v1/channels/{id}` — карточка
|
||||
- `PATCH /api/v1/channels/{id}` `{"is_active": false}` — включить/выключить
|
||||
- `DELETE /api/v1/channels/{id}` — удалить
|
||||
- `GET /api/v1/channels/{id}/stats` — счётчики по каналу
|
||||
- `POST /api/v1/channels/{id}/poll` — форсировать опрос одного канала
|
||||
- `POST /api/v1/poll` — форсировать опрос всех активных каналов
|
||||
- `GET /api/v1/messages?channel_id=...&q=...&limit=50&offset=0` — лента
|
||||
- `GET /api/v1/messages/{id}` — одно сообщение (с `raw` JSONB)
|
||||
|
||||
## Дальше
|
||||
|
||||
- **Realtime**: заменить APScheduler на `client.add_event_handler(handler, events.NewMessage)`,
|
||||
оставив periodic poll как фоновый «доводчик» для пропущенных сообщений.
|
||||
- **Go-микросервис**: контракт = таблицы `channels` / `messages` в Postgres.
|
||||
Go-сервис может либо читать ту же БД, либо ходить в `/api/v1/messages`.
|
||||
- **k8s**: добавить Helm-чарт; `data/session/` маппится на PVC, `.env` — в Secret.
|
||||
39
alembic.ini
Normal file
39
alembic.ini
Normal file
@@ -0,0 +1,39 @@
|
||||
[alembic]
|
||||
script_location = alembic
|
||||
prepend_sys_path = src
|
||||
version_path_separator = os
|
||||
sqlalchemy.url = postgresql+asyncpg://parser:parser@db:5432/parser
|
||||
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
52
alembic/env.py
Normal file
52
alembic/env.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import asyncio
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.ext.asyncio import async_engine_from_config
|
||||
|
||||
from parser_bot.config import settings
|
||||
from parser_bot.db.models import Base
|
||||
|
||||
config = context.config
|
||||
config.set_main_option("sqlalchemy.url", settings.database_url)
|
||||
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
context.configure(
|
||||
url=settings.database_url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection: Connection) -> None:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_migrations_online() -> None:
|
||||
connectable = async_engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
await connectable.dispose()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
asyncio.run(run_migrations_online())
|
||||
25
alembic/script.py.mako
Normal file
25
alembic/script.py.mako
Normal file
@@ -0,0 +1,25 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
71
alembic/versions/0001_initial.py
Normal file
71
alembic/versions/0001_initial.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""initial schema: channels + messages
|
||||
|
||||
Revision ID: 0001
|
||||
Revises:
|
||||
Create Date: 2026-05-05
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0001"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"channels",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("tg_id", sa.BigInteger(), nullable=True, unique=True),
|
||||
sa.Column("identifier", sa.String(length=255), nullable=False, unique=True),
|
||||
sa.Column("title", sa.String(length=512), nullable=True),
|
||||
sa.Column("is_active", sa.Boolean(), nullable=False, server_default=sa.text("true")),
|
||||
sa.Column("last_message_id", sa.BigInteger(), nullable=True),
|
||||
sa.Column("last_polled_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
)
|
||||
|
||||
op.create_table(
|
||||
"messages",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column(
|
||||
"channel_id",
|
||||
sa.Integer(),
|
||||
sa.ForeignKey("channels.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("tg_message_id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("date", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("text", sa.Text(), nullable=True),
|
||||
sa.Column("sender_id", sa.BigInteger(), nullable=True),
|
||||
sa.Column("has_media", sa.Boolean(), nullable=False, server_default=sa.text("false")),
|
||||
sa.Column("views", sa.Integer(), nullable=True),
|
||||
sa.Column("forwards", sa.Integer(), nullable=True),
|
||||
sa.Column("raw", postgresql.JSONB(), nullable=True),
|
||||
sa.Column(
|
||||
"fetched_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
sa.UniqueConstraint("channel_id", "tg_message_id", name="uq_channel_message"),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_messages_channel_date", "messages", ["channel_id", "date"], unique=False
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_messages_channel_date", table_name="messages")
|
||||
op.drop_table("messages")
|
||||
op.drop_table("channels")
|
||||
28
alembic/versions/0002_add_media_files.py
Normal file
28
alembic/versions/0002_add_media_files.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""add media_files JSONB column to messages
|
||||
|
||||
Revision ID: 0002
|
||||
Revises: 0001
|
||||
Create Date: 2026-05-05
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0002"
|
||||
down_revision: Union[str, None] = "0001"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"messages",
|
||||
sa.Column("media_files", postgresql.JSONB(), nullable=True),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("messages", "media_files")
|
||||
39
alembic/versions/0003_add_grouped_id.py
Normal file
39
alembic/versions/0003_add_grouped_id.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""add grouped_id to messages (Telegram album/media-group key)
|
||||
|
||||
Revision ID: 0003
|
||||
Revises: 0002
|
||||
Create Date: 2026-05-05
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0003"
|
||||
down_revision: Union[str, None] = "0002"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column("messages", sa.Column("grouped_id", sa.BigInteger(), nullable=True))
|
||||
op.create_index(
|
||||
"ix_messages_grouped_id", "messages", ["channel_id", "grouped_id"]
|
||||
)
|
||||
# Backfill grouped_id from the stored raw JSONB for existing rows so that
|
||||
# albums saved before this migration are grouped retroactively.
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE messages
|
||||
SET grouped_id = (raw->>'grouped_id')::bigint
|
||||
WHERE grouped_id IS NULL
|
||||
AND raw IS NOT NULL
|
||||
AND raw->>'grouped_id' IS NOT NULL
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_messages_grouped_id", table_name="messages")
|
||||
op.drop_column("messages", "grouped_id")
|
||||
34
alembic/versions/0004_add_extracted.py
Normal file
34
alembic/versions/0004_add_extracted.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""add extracted JSONB column to messages
|
||||
|
||||
Revision ID: 0004
|
||||
Revises: 0003
|
||||
Create Date: 2026-05-05
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0004"
|
||||
down_revision: Union[str, None] = "0003"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"messages",
|
||||
sa.Column("extracted", postgresql.JSONB(), nullable=True),
|
||||
)
|
||||
# GIN index for json queries (e.g. filter by extracted->'real_estate'->>'kind').
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_messages_extracted_gin "
|
||||
"ON messages USING GIN (extracted)"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute("DROP INDEX IF EXISTS ix_messages_extracted_gin")
|
||||
op.drop_column("messages", "extracted")
|
||||
30
alembic/versions/0005_add_sender_info.py
Normal file
30
alembic/versions/0005_add_sender_info.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""add sender_username and sender_name to messages
|
||||
|
||||
Revision ID: 0005
|
||||
Revises: 0004
|
||||
Create Date: 2026-05-06
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0005"
|
||||
down_revision: Union[str, None] = "0004"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"messages", sa.Column("sender_username", sa.String(length=64), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
"messages", sa.Column("sender_name", sa.String(length=255), nullable=True)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("messages", "sender_name")
|
||||
op.drop_column("messages", "sender_username")
|
||||
35
alembic/versions/0006_add_app_settings.py
Normal file
35
alembic/versions/0006_add_app_settings.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""key/value store for runtime-editable settings (LLM prompt, etc.)
|
||||
|
||||
Revision ID: 0006
|
||||
Revises: 0005
|
||||
Create Date: 2026-05-06
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0006"
|
||||
down_revision: Union[str, None] = "0005"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"app_settings",
|
||||
sa.Column("key", sa.String(length=64), primary_key=True),
|
||||
sa.Column("value", postgresql.JSONB(), nullable=False),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("app_settings")
|
||||
37
alembic/versions/0007_add_channel_vertical.py
Normal file
37
alembic/versions/0007_add_channel_vertical.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""split channels into two verticals: real_estate / hr
|
||||
|
||||
Existing rows get `real_estate` per the migration decision — the service was
|
||||
real-estate-only before this column existed.
|
||||
|
||||
Revision ID: 0007
|
||||
Revises: 0006
|
||||
Create Date: 2026-05-19
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0007"
|
||||
down_revision: Union[str, None] = "0006"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"channels",
|
||||
sa.Column(
|
||||
"vertical",
|
||||
sa.String(length=32),
|
||||
nullable=False,
|
||||
server_default="real_estate",
|
||||
),
|
||||
)
|
||||
op.create_index("ix_channels_vertical", "channels", ["vertical"])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_channels_vertical", table_name="channels")
|
||||
op.drop_column("channels", "vertical")
|
||||
110
alembic/versions/0008_add_sections.py
Normal file
110
alembic/versions/0008_add_sections.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""sub-sections inside each vertical (e.g. Real Estate → Dubai / Moscow)
|
||||
|
||||
A channel now belongs to exactly one section, and each section to exactly
|
||||
one vertical. The migration auto-creates a `Общий` section per vertical
|
||||
that has at least one channel and pins all existing channels there, so the
|
||||
service keeps working without manual reclassification after upgrade.
|
||||
|
||||
Revision ID: 0008
|
||||
Revises: 0007
|
||||
Create Date: 2026-05-20
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0008"
|
||||
down_revision: Union[str, None] = "0007"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"sections",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("vertical", sa.String(length=32), nullable=False),
|
||||
sa.Column("slug", sa.String(length=64), nullable=False),
|
||||
sa.Column("title", sa.String(length=255), nullable=False),
|
||||
sa.Column("emoji", sa.String(length=8), nullable=True),
|
||||
sa.Column("description", sa.Text(), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
sa.UniqueConstraint("vertical", "slug", name="uq_section_vertical_slug"),
|
||||
)
|
||||
op.create_index("ix_sections_vertical", "sections", ["vertical"])
|
||||
|
||||
# Auto-create a `default` section for each vertical that already has channels,
|
||||
# so the backfill below has somewhere to point.
|
||||
op.execute(
|
||||
"""
|
||||
INSERT INTO sections (vertical, slug, title, emoji)
|
||||
SELECT DISTINCT c.vertical,
|
||||
'default',
|
||||
CASE c.vertical
|
||||
WHEN 'hr' THEN 'Общий HR'
|
||||
ELSE 'Общий'
|
||||
END,
|
||||
CASE c.vertical WHEN 'hr' THEN '👥' ELSE '🏠' END
|
||||
FROM channels c
|
||||
ON CONFLICT (vertical, slug) DO NOTHING
|
||||
"""
|
||||
)
|
||||
|
||||
# Add nullable section_id first so the backfill can populate it.
|
||||
op.add_column(
|
||||
"channels",
|
||||
sa.Column("section_id", sa.Integer(), nullable=True),
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_channels_section",
|
||||
"channels",
|
||||
"sections",
|
||||
["section_id"],
|
||||
["id"],
|
||||
ondelete="RESTRICT",
|
||||
)
|
||||
op.create_index("ix_channels_section_id", "channels", ["section_id"])
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE channels c
|
||||
SET section_id = s.id
|
||||
FROM sections s
|
||||
WHERE s.vertical = c.vertical AND s.slug = 'default'
|
||||
"""
|
||||
)
|
||||
|
||||
# Now we can safely require section_id.
|
||||
op.alter_column("channels", "section_id", nullable=False)
|
||||
|
||||
# Per-section LLM prompt keys are longer than 64 chars
|
||||
# (`llm_system_prompt:real_estate:some-long-slug`), so widen the key column.
|
||||
op.alter_column(
|
||||
"app_settings",
|
||||
"key",
|
||||
existing_type=sa.String(length=64),
|
||||
type_=sa.String(length=128),
|
||||
existing_nullable=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.alter_column(
|
||||
"app_settings",
|
||||
"key",
|
||||
existing_type=sa.String(length=128),
|
||||
type_=sa.String(length=64),
|
||||
existing_nullable=False,
|
||||
)
|
||||
op.drop_index("ix_channels_section_id", table_name="channels")
|
||||
op.drop_constraint("fk_channels_section", "channels", type_="foreignkey")
|
||||
op.drop_column("channels", "section_id")
|
||||
op.drop_index("ix_sections_vertical", table_name="sections")
|
||||
op.drop_table("sections")
|
||||
24
alembic/versions/0009_add_section_access_code.py
Normal file
24
alembic/versions/0009_add_section_access_code.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""add access code to sections
|
||||
|
||||
Revision ID: 0009
|
||||
Revises: 0008
|
||||
Create Date: 2026-05-29
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "0009"
|
||||
down_revision: Union[str, None] = "0008"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column("sections", sa.Column("access_code", sa.String(length=255), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("sections", "access_code")
|
||||
64
docker-compose.yml
Normal file
64
docker-compose.yml
Normal file
@@ -0,0 +1,64 @@
|
||||
services:
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
environment:
|
||||
OLLAMA_HOST: 0.0.0.0:11434
|
||||
OLLAMA_KEEP_ALIVE: 24h
|
||||
OLLAMA_NUM_PARALLEL: "1"
|
||||
OLLAMA_NUM_THREAD: "8"
|
||||
volumes:
|
||||
- ./data/ollama:/root/.ollama
|
||||
ports:
|
||||
- "11434:11434"
|
||||
healthcheck:
|
||||
test: ["CMD", "ollama", "list"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 30
|
||||
restart: unless-stopped
|
||||
|
||||
ollama-pull:
|
||||
image: ollama/ollama:latest
|
||||
depends_on:
|
||||
ollama:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
OLLAMA_HOST: ollama:11434
|
||||
entrypoint: ["/bin/sh", "-c"]
|
||||
command: ["ollama list | grep -q qwen2.5:7b-instruct-q4_K_M || ollama pull qwen2.5:7b-instruct-q4_K_M"]
|
||||
restart: "no"
|
||||
|
||||
db:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-parser}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-parser}
|
||||
POSTGRES_DB: ${POSTGRES_DB:-parser}
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-parser}"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
app:
|
||||
build: .
|
||||
env_file: .env
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
ollama:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "80:8000"
|
||||
volumes:
|
||||
- ./data/session:/data/session
|
||||
- ./data/media:/data/media
|
||||
- ./src:/app/src
|
||||
- ./alembic:/app/alembic
|
||||
|
||||
volumes:
|
||||
pgdata:
|
||||
16
docker/entrypoint.sh
Normal file
16
docker/entrypoint.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
# Run migrations on every container start. Idempotent: alembic skips
|
||||
# revisions already applied. Skipped for one-shot commands like `alembic`
|
||||
# itself (would deadlock when explicitly invoked) and for the auth helper.
|
||||
case "$1" in
|
||||
alembic|python\ -m\ parser_bot.auth|/bin/sh|sh|bash)
|
||||
exec "$@"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "[entrypoint] running alembic upgrade head"
|
||||
alembic upgrade head
|
||||
|
||||
exec "$@"
|
||||
20
k8s/configmap.yaml
Normal file
20
k8s/configmap.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: monitoring-tg-config
|
||||
namespace: monitoring-tg
|
||||
data:
|
||||
API_HOST: "0.0.0.0"
|
||||
API_PORT: "8000"
|
||||
PUBLIC_BASE_PATH: "/api/monitoring-tg"
|
||||
POSTGRES_HOST: "postgres.monitoring-tg.svc.cluster.local"
|
||||
POSTGRES_PORT: "5432"
|
||||
POSTGRES_USER: "parser"
|
||||
POSTGRES_DB: "parser"
|
||||
TG_SESSION_PATH: "/data/session/parser.session"
|
||||
MEDIA_DIR: "/data/media"
|
||||
POLL_INTERVAL_SECONDS: "60"
|
||||
POLL_HISTORY_LIMIT: "50"
|
||||
LLM_ENABLED: "1"
|
||||
LLM_BASE_URL: "http://ollama.ollama.svc.cluster.local:11434"
|
||||
LLM_MODEL: "qwen2.5:7b-instruct-q4_K_M"
|
||||
12
k8s/kustomization.yaml
Normal file
12
k8s/kustomization.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: monitoring-tg
|
||||
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- configmap.yaml
|
||||
- secrets.yaml
|
||||
- postgres.yaml
|
||||
- server-deployment.yaml
|
||||
- server-service.yaml
|
||||
4
k8s/namespace.yaml
Normal file
4
k8s/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: monitoring-tg
|
||||
65
k8s/postgres.yaml
Normal file
65
k8s/postgres.yaml
Normal file
@@ -0,0 +1,65 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: postgres
|
||||
namespace: monitoring-tg
|
||||
spec:
|
||||
selector:
|
||||
app: postgres
|
||||
ports:
|
||||
- port: 5432
|
||||
targetPort: 5432
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: postgres
|
||||
namespace: monitoring-tg
|
||||
spec:
|
||||
serviceName: postgres
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: postgres
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: postgres
|
||||
spec:
|
||||
containers:
|
||||
- name: postgres
|
||||
image: postgres:16-alpine
|
||||
ports:
|
||||
- containerPort: 5432
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: postgres-secret
|
||||
volumeMounts:
|
||||
- name: pgdata
|
||||
mountPath: /var/lib/postgresql/data
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
livenessProbe:
|
||||
exec:
|
||||
command: ["pg_isready", "-U", "parser", "-d", "parser"]
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
exec:
|
||||
command: ["pg_isready", "-U", "parser", "-d", "parser"]
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: pgdata
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
storageClassName: local-path
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
25
k8s/secrets.yaml
Normal file
25
k8s/secrets.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: monitoring-tg-secrets
|
||||
namespace: monitoring-tg
|
||||
type: Opaque
|
||||
stringData:
|
||||
TG_API_ID: "0"
|
||||
TG_API_HASH: "CHANGE_ME"
|
||||
TG_PHONE: "CHANGE_ME"
|
||||
TG_SESSION_STRING: ""
|
||||
POSTGRES_PASSWORD: "parser"
|
||||
ADMIN_ALLOWED_IPS: ""
|
||||
ADMIN_PASSWORD: "CHANGE_ME"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: postgres-secret
|
||||
namespace: monitoring-tg
|
||||
type: Opaque
|
||||
stringData:
|
||||
POSTGRES_USER: "parser"
|
||||
POSTGRES_PASSWORD: "parser"
|
||||
POSTGRES_DB: "parser"
|
||||
70
k8s/server-deployment.yaml
Normal file
70
k8s/server-deployment.yaml
Normal file
@@ -0,0 +1,70 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: monitoring-tg-data
|
||||
namespace: monitoring-tg
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
storageClassName: local-path
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: monitoring-tg-server
|
||||
namespace: monitoring-tg
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: monitoring-tg-server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: monitoring-tg-server
|
||||
spec:
|
||||
terminationGracePeriodSeconds: 20
|
||||
securityContext:
|
||||
fsGroup: 1000
|
||||
containers:
|
||||
- name: monitoring-tg-server
|
||||
image: localhost:30300/admin/monitoring-tg-server:latest
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: monitoring-tg-config
|
||||
- secretRef:
|
||||
name: monitoring-tg-secrets
|
||||
volumeMounts:
|
||||
- name: app-data
|
||||
mountPath: /data
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8000
|
||||
periodSeconds: 5
|
||||
failureThreshold: 30
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8000
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8000
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 800m
|
||||
memory: 1Gi
|
||||
volumes:
|
||||
- name: app-data
|
||||
persistentVolumeClaim:
|
||||
claimName: monitoring-tg-data
|
||||
18
k8s/server-service.yaml
Normal file
18
k8s/server-service.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: monitoring-tg-server
|
||||
namespace: monitoring-tg
|
||||
annotations:
|
||||
portal.estateliga.work/enabled: "true"
|
||||
portal.estateliga.work/name: "Мониторинг TG"
|
||||
portal.estateliga.work/description: "Парсер и анализ Telegram-каналов"
|
||||
portal.estateliga.work/icon: "pulse"
|
||||
portal.estateliga.work/path: "/api/monitoring-tg"
|
||||
portal.estateliga.work/code: "monitoring_tg"
|
||||
spec:
|
||||
selector:
|
||||
app: monitoring-tg-server
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 8000
|
||||
44
pyproject.toml
Normal file
44
pyproject.toml
Normal file
@@ -0,0 +1,44 @@
|
||||
[project]
|
||||
name = "parser-tg-bot"
|
||||
version = "0.1.0"
|
||||
description = "Telegram channel parser — periodic polling + storage, future Go microservice"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"telethon>=1.36",
|
||||
"fastapi>=0.115",
|
||||
"uvicorn[standard]>=0.32",
|
||||
"sqlalchemy[asyncio]>=2.0",
|
||||
"asyncpg>=0.30",
|
||||
"alembic>=1.14",
|
||||
"apscheduler>=3.10",
|
||||
"pydantic>=2.9",
|
||||
"pydantic-settings>=2.6",
|
||||
"python-dotenv>=1.0",
|
||||
"structlog>=24.4",
|
||||
"httpx>=0.27",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"ruff>=0.7",
|
||||
"mypy>=1.13",
|
||||
"pytest>=8.3",
|
||||
"pytest-asyncio>=0.24",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=68"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"parser_bot.web" = ["static/*", "static/**/*"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
target-version = "py311"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_mode = "auto"
|
||||
0
src/parser_bot/__init__.py
Normal file
0
src/parser_bot/__init__.py
Normal file
116
src/parser_bot/access.py
Normal file
116
src/parser_bot/access.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""Admin access helpers for admin-only surfaces (auth, OpenAPI docs).
|
||||
|
||||
Resolution:
|
||||
1. If `ADMIN_ALLOWED_IPS` is empty → no network restriction.
|
||||
2. Otherwise the request's client IP must be in the allowlist.
|
||||
3. When `TRUST_PROXY_HEADERS=true` (default) and one of the proxy headers
|
||||
is present, the first IP in `X-Forwarded-For` (or `X-Real-IP`) is used.
|
||||
Without this, behind a Docker port-forward the source IP is always the
|
||||
gateway, which is useless for ACLs.
|
||||
4. If `ADMIN_PASSWORD` is set, the request must also present a valid signed
|
||||
admin cookie or the password in `X-Admin-Password`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import secrets
|
||||
|
||||
from fastapi import HTTPException, Request, Response
|
||||
|
||||
from parser_bot.config import settings
|
||||
|
||||
ADMIN_COOKIE = "parser_admin"
|
||||
_ADMIN_TOKEN_MESSAGE = b"parser-tg-bot-admin-v1"
|
||||
|
||||
|
||||
def client_ip(request: Request) -> str:
|
||||
"""Best-effort source IP of the request."""
|
||||
if settings.trust_proxy_headers:
|
||||
xff = request.headers.get("x-forwarded-for")
|
||||
if xff:
|
||||
# Standard form: "client, proxy1, proxy2" — first is closest to user.
|
||||
return xff.split(",")[0].strip()
|
||||
real = request.headers.get("x-real-ip")
|
||||
if real:
|
||||
return real.strip()
|
||||
return request.client.host if request.client else "0.0.0.0"
|
||||
|
||||
|
||||
def is_admin_network_allowed(request: Request) -> bool:
|
||||
allowed = settings.admin_ip_set
|
||||
if not allowed:
|
||||
return True
|
||||
return client_ip(request) in allowed
|
||||
|
||||
|
||||
def admin_password_enabled() -> bool:
|
||||
return bool(settings.admin_password)
|
||||
|
||||
|
||||
def verify_admin_password(password: str | None) -> bool:
|
||||
if not settings.admin_password:
|
||||
return True
|
||||
if password is None:
|
||||
return False
|
||||
return secrets.compare_digest(password, settings.admin_password)
|
||||
|
||||
|
||||
def admin_token() -> str:
|
||||
return hmac.new(
|
||||
settings.admin_password.encode("utf-8"),
|
||||
_ADMIN_TOKEN_MESSAGE,
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
|
||||
|
||||
def verify_admin_token(token: str | None) -> bool:
|
||||
if not settings.admin_password:
|
||||
return True
|
||||
if token is None:
|
||||
return False
|
||||
return secrets.compare_digest(token, admin_token())
|
||||
|
||||
|
||||
def set_admin_cookie(response: Response) -> None:
|
||||
response.set_cookie(
|
||||
ADMIN_COOKIE,
|
||||
admin_token(),
|
||||
httponly=True,
|
||||
samesite="lax",
|
||||
secure=False,
|
||||
max_age=60 * 60 * 24 * 30,
|
||||
)
|
||||
|
||||
|
||||
def clear_admin_cookie(response: Response) -> None:
|
||||
response.delete_cookie(ADMIN_COOKIE)
|
||||
|
||||
|
||||
def is_admin_request(request: Request) -> bool:
|
||||
if not is_admin_network_allowed(request):
|
||||
return False
|
||||
if not settings.admin_password:
|
||||
return True
|
||||
return verify_admin_token(request.cookies.get(ADMIN_COOKIE)) or verify_admin_password(
|
||||
request.headers.get("x-admin-password")
|
||||
)
|
||||
|
||||
|
||||
def require_admin_network(request: Request) -> None:
|
||||
"""FastAPI dependency for the admin login page/API.
|
||||
|
||||
This keeps the IP allowlist useful even before the password cookie exists.
|
||||
"""
|
||||
if not is_admin_network_allowed(request):
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
|
||||
def require_admin(request: Request) -> None:
|
||||
"""FastAPI dependency: 404 for non-admins.
|
||||
|
||||
Admin endpoints keep returning 404 instead of 403 to avoid advertising
|
||||
their existence to clients outside the admin boundary.
|
||||
"""
|
||||
if not is_admin_request(request):
|
||||
raise HTTPException(status_code=404)
|
||||
0
src/parser_bot/api/__init__.py
Normal file
0
src/parser_bot/api/__init__.py
Normal file
1048
src/parser_bot/api/routes.py
Normal file
1048
src/parser_bot/api/routes.py
Normal file
File diff suppressed because it is too large
Load Diff
231
src/parser_bot/api/schemas.py
Normal file
231
src/parser_bot/api/schemas.py
Normal file
@@ -0,0 +1,231 @@
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
Vertical = Literal["real_estate", "hr"]
|
||||
|
||||
# Section slugs are used as URL segments — keep them URL-safe.
|
||||
_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,62}[a-z0-9]$|^[a-z0-9]$")
|
||||
|
||||
|
||||
class SectionCreate(BaseModel):
|
||||
vertical: Vertical
|
||||
slug: str = Field(..., min_length=1, max_length=64)
|
||||
title: str = Field(..., min_length=1, max_length=255)
|
||||
emoji: str | None = Field(None, max_length=8)
|
||||
description: str | None = None
|
||||
access_code: str = Field(..., min_length=3, max_length=255)
|
||||
|
||||
@field_validator("slug")
|
||||
@classmethod
|
||||
def _check_slug(cls, v: str) -> str:
|
||||
if not _SLUG_RE.match(v):
|
||||
raise ValueError(
|
||||
"slug must be lowercase letters/digits with '-' or '_' separators"
|
||||
)
|
||||
return v
|
||||
|
||||
|
||||
class SectionUpdate(BaseModel):
|
||||
title: str | None = Field(None, min_length=1, max_length=255)
|
||||
emoji: str | None = Field(None, max_length=8)
|
||||
description: str | None = None
|
||||
access_code: str | None = Field(None, min_length=3, max_length=255)
|
||||
|
||||
|
||||
class SectionOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
vertical: Vertical
|
||||
slug: str
|
||||
title: str
|
||||
emoji: str | None
|
||||
description: str | None
|
||||
access_code: str | None = None
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class SectionWithStats(SectionOut):
|
||||
"""Section payload enriched with rollup counts for the section chooser page."""
|
||||
|
||||
channels_total: int = 0
|
||||
channels_active: int = 0
|
||||
messages_total: int = 0
|
||||
leads_total: int = 0
|
||||
|
||||
|
||||
class ChannelCreate(BaseModel):
|
||||
identifier: str = Field(..., min_length=1, max_length=255, description="@username or t.me link")
|
||||
vertical: Vertical = "real_estate"
|
||||
section: str = Field(
|
||||
..., min_length=1, max_length=64,
|
||||
description="Slug of the section inside the vertical (e.g. 'dubai')",
|
||||
)
|
||||
|
||||
|
||||
class ChannelUpdate(BaseModel):
|
||||
is_active: bool | None = None
|
||||
vertical: Vertical | None = None
|
||||
section: str | None = Field(
|
||||
None, min_length=1, max_length=64,
|
||||
description="Move the channel to another section in the same vertical",
|
||||
)
|
||||
|
||||
|
||||
class ChannelOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
tg_id: int | None
|
||||
identifier: str
|
||||
title: str | None
|
||||
vertical: Vertical
|
||||
section_id: int
|
||||
section_slug: str | None = None
|
||||
is_active: bool
|
||||
last_message_id: int | None
|
||||
last_polled_at: datetime | None
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class ChannelStats(BaseModel):
|
||||
channel_id: int
|
||||
identifier: str
|
||||
title: str | None
|
||||
vertical: Vertical
|
||||
section_slug: str | None = None
|
||||
is_active: bool
|
||||
last_polled_at: datetime | None
|
||||
message_count: int
|
||||
last_message_at: datetime | None
|
||||
|
||||
|
||||
class MediaFile(BaseModel):
|
||||
kind: str # photo | video | document | audio | sticker | unknown
|
||||
url: str | None = None
|
||||
mime: str | None = None
|
||||
size: int | None = None
|
||||
skipped: str | None = None # set when not downloaded (e.g. "too_large")
|
||||
|
||||
|
||||
class RealEstate(BaseModel):
|
||||
kind: str | None = None
|
||||
property_type: str | None = None
|
||||
rooms: str | None = None
|
||||
area_m2: float | None = None
|
||||
price: str | None = None
|
||||
|
||||
|
||||
class Lead(BaseModel):
|
||||
is_listing: bool
|
||||
kind: str | None = None # sale | rent | purchase
|
||||
property_type: str | None = None
|
||||
rooms: str | None = None
|
||||
area_m2: float | None = None
|
||||
price_text: str | None = None
|
||||
price_value: float | None = None
|
||||
currency: str | None = None # RUB | USD | EUR | AED | GBP | CNY | TRY | KZT | BYN | UAH
|
||||
location: str | None = None
|
||||
contact_phone: str | None = None
|
||||
contact_name: str | None = None
|
||||
summary: str | None = None
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
class HrLead(BaseModel):
|
||||
"""LLM verdict for HR-vertical messages (jobs / resumes / bare contacts)."""
|
||||
|
||||
is_lead: bool
|
||||
kind: str | None = None # vacancy | resume | contact
|
||||
title: str | None = None
|
||||
company: str | None = None
|
||||
candidate_name: str | None = None
|
||||
experience_years: float | None = None
|
||||
skills: list[str] = []
|
||||
location: str | None = None
|
||||
remote: bool | None = None
|
||||
employment_type: str | None = None
|
||||
salary_text: str | None = None
|
||||
salary_value: float | None = None
|
||||
currency: str | None = None
|
||||
contact_phone: str | None = None
|
||||
contact_name: str | None = None
|
||||
summary: str | None = None
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
class Extracted(BaseModel):
|
||||
phones: list[str] = []
|
||||
names: list[str] = []
|
||||
tg_handles: list[str] = []
|
||||
real_estate: RealEstate | None = None
|
||||
lead: Lead | None = None
|
||||
hr_lead: HrLead | None = None
|
||||
|
||||
|
||||
class MessageOut(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: int
|
||||
channel_id: int
|
||||
channel_vertical: Vertical | None = None
|
||||
channel_section_slug: str | None = None
|
||||
tg_message_id: int
|
||||
grouped_id: int | None = None
|
||||
group_size: int = 1
|
||||
date: datetime
|
||||
text: str | None
|
||||
sender_id: int | None
|
||||
has_media: bool
|
||||
media_files: list[MediaFile] | None = None
|
||||
extracted: Extracted | None = None
|
||||
sender_username: str | None = None
|
||||
sender_name: str | None = None
|
||||
post_url: str | None = None
|
||||
views: int | None
|
||||
forwards: int | None
|
||||
fetched_at: datetime
|
||||
|
||||
|
||||
class GlobalStats(BaseModel):
|
||||
vertical: Vertical
|
||||
section_slug: str | None = None
|
||||
channels_total: int
|
||||
channels_active: int
|
||||
messages_total: int
|
||||
messages_last_24h: int
|
||||
leads_total: int = 0
|
||||
leads_last_24h: int = 0
|
||||
poll_interval_seconds: int
|
||||
last_poll_at: datetime | None
|
||||
|
||||
|
||||
class AuthStatus(BaseModel):
|
||||
authorized: bool
|
||||
username: str | None = None
|
||||
phone: str | None = None
|
||||
|
||||
|
||||
class AuthCode(BaseModel):
|
||||
code: str = Field(..., min_length=3, max_length=12)
|
||||
|
||||
|
||||
class AuthPassword(BaseModel):
|
||||
password: str = Field(..., min_length=1)
|
||||
|
||||
|
||||
class AuthCodeResult(BaseModel):
|
||||
needs_password: bool
|
||||
|
||||
|
||||
class AdminLogin(BaseModel):
|
||||
password: str = Field(..., min_length=1)
|
||||
|
||||
|
||||
class SectionLogin(BaseModel):
|
||||
vertical: Vertical
|
||||
section: str = Field(..., min_length=1, max_length=64)
|
||||
code: str = Field(..., min_length=1, max_length=255)
|
||||
51
src/parser_bot/auth.py
Normal file
51
src/parser_bot/auth.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Interactive Telethon login. Run once on a dev machine, copy the printed
|
||||
TG_SESSION_STRING into your .env / k8s Secret, then deploy without ever
|
||||
touching auth again.
|
||||
|
||||
Usage:
|
||||
docker compose run --rm -it app python -m parser_bot.auth
|
||||
|
||||
Telegram requires interactive code entry only for the very first login;
|
||||
the resulting StringSession can be reused on any host until you log out
|
||||
or someone invalidates the session in Telegram settings.
|
||||
"""
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from telethon import TelegramClient
|
||||
from telethon.sessions import StringSession
|
||||
|
||||
from parser_bot.config import settings
|
||||
|
||||
|
||||
async def main() -> int:
|
||||
if not sys.stdin.isatty():
|
||||
print(
|
||||
"ERROR: not a TTY. Re-run with: "
|
||||
"docker compose run --rm -it app python -m parser_bot.auth",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
client = TelegramClient(StringSession(), settings.tg_api_id, settings.tg_api_hash)
|
||||
await client.start(phone=settings.tg_phone)
|
||||
me = await client.get_me()
|
||||
session_str = client.session.save()
|
||||
await client.disconnect()
|
||||
|
||||
print()
|
||||
print(f"authorized as {me.username or me.id}")
|
||||
print()
|
||||
print("Add this line to your .env (or k8s Secret) and never share it:")
|
||||
print()
|
||||
print(f"TG_SESSION_STRING={session_str}")
|
||||
print()
|
||||
print(
|
||||
"After saving, no further interactive auth is needed. Restarts, rebuilds,"
|
||||
" redeploys all reuse this string."
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(asyncio.run(main()))
|
||||
64
src/parser_bot/config.py
Normal file
64
src/parser_bot/config.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
|
||||
|
||||
tg_api_id: int = Field(..., alias="TG_API_ID")
|
||||
tg_api_hash: str = Field(..., alias="TG_API_HASH")
|
||||
tg_phone: str = Field(..., alias="TG_PHONE")
|
||||
tg_session_path: str = Field("/data/session/parser.session", alias="TG_SESSION_PATH")
|
||||
# Preferred for prod / k8s: opaque base64-ish string from `python -m parser_bot.auth`.
|
||||
# If set, takes priority over file-based session.
|
||||
tg_session_string: str | None = Field(None, alias="TG_SESSION_STRING")
|
||||
|
||||
postgres_user: str = Field("parser", alias="POSTGRES_USER")
|
||||
postgres_password: str = Field("parser", alias="POSTGRES_PASSWORD")
|
||||
postgres_db: str = Field("parser", alias="POSTGRES_DB")
|
||||
postgres_host: str = Field("db", alias="POSTGRES_HOST")
|
||||
postgres_port: int = Field(5432, alias="POSTGRES_PORT")
|
||||
|
||||
poll_interval_seconds: int = Field(60, alias="POLL_INTERVAL_SECONDS")
|
||||
poll_history_limit: int = Field(50, alias="POLL_HISTORY_LIMIT")
|
||||
|
||||
api_host: str = Field("0.0.0.0", alias="API_HOST")
|
||||
api_port: int = Field(8000, alias="API_PORT")
|
||||
public_base_path: str = Field("", alias="PUBLIC_BASE_PATH")
|
||||
|
||||
media_dir: str = Field("/data/media", alias="MEDIA_DIR")
|
||||
media_max_bytes: int = Field(20 * 1024 * 1024, alias="MEDIA_MAX_BYTES")
|
||||
|
||||
# Local LLM via Ollama for lead classification & extraction
|
||||
llm_enabled: bool = Field(True, alias="LLM_ENABLED")
|
||||
llm_base_url: str = Field("http://ollama:11434", alias="LLM_BASE_URL")
|
||||
llm_model: str = Field("qwen2.5:7b-instruct-q4_K_M", alias="LLM_MODEL")
|
||||
llm_timeout_seconds: int = Field(120, alias="LLM_TIMEOUT_SECONDS")
|
||||
llm_min_text_length: int = Field(20, alias="LLM_MIN_TEXT_LENGTH")
|
||||
llm_classify_interval_seconds: int = Field(20, alias="LLM_CLASSIFY_INTERVAL_SECONDS")
|
||||
llm_classify_batch_size: int = Field(5, alias="LLM_CLASSIFY_BATCH_SIZE")
|
||||
|
||||
# Admin allowlist for /auth.html, /docs, /openapi.json, /redoc and the
|
||||
# /auth/* API endpoints. Comma-separated IPv4/IPv6. Empty (default) means
|
||||
# no restriction — convenient for local dev. Set explicitly in prod.
|
||||
admin_allowed_ips: str = Field("", alias="ADMIN_ALLOWED_IPS")
|
||||
# Optional second factor for admin-only UI/API operations. Empty keeps the
|
||||
# previous IP-only behavior for local/dev deployments.
|
||||
admin_password: str = Field("", alias="ADMIN_PASSWORD")
|
||||
# When true, honor X-Forwarded-For / X-Real-IP set by a reverse proxy
|
||||
# in front of uvicorn (Docker port-forward, nginx, traefik, etc).
|
||||
trust_proxy_headers: bool = Field(True, alias="TRUST_PROXY_HEADERS")
|
||||
|
||||
@property
|
||||
def admin_ip_set(self) -> set[str]:
|
||||
return {s.strip() for s in self.admin_allowed_ips.split(",") if s.strip()}
|
||||
|
||||
@property
|
||||
def database_url(self) -> str:
|
||||
return (
|
||||
f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}"
|
||||
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
|
||||
)
|
||||
|
||||
|
||||
settings = Settings()
|
||||
0
src/parser_bot/db/__init__.py
Normal file
0
src/parser_bot/db/__init__.py
Normal file
119
src/parser_bot/db/models.py
Normal file
119
src/parser_bot/db/models.py
Normal file
@@ -0,0 +1,119 @@
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
BigInteger,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Index,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
func,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
class Section(Base):
|
||||
"""A sub-section inside a vertical, e.g. ('real_estate', 'dubai').
|
||||
|
||||
The pair (vertical, slug) is unique and identifies a section in URLs
|
||||
and API calls. A channel belongs to exactly one section, the section
|
||||
knows its vertical, and the LLM prompt store can hold a per-section
|
||||
override that falls back to the vertical-level prompt.
|
||||
"""
|
||||
|
||||
__tablename__ = "sections"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("vertical", "slug", name="uq_section_vertical_slug"),
|
||||
Index("ix_sections_vertical", "vertical"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
vertical: Mapped[str] = mapped_column(String(32))
|
||||
slug: Mapped[str] = mapped_column(String(64))
|
||||
title: Mapped[str] = mapped_column(String(255))
|
||||
emoji: Mapped[str | None] = mapped_column(String(8), nullable=True)
|
||||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
access_code: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
|
||||
channels: Mapped[list["Channel"]] = relationship(back_populates="section")
|
||||
|
||||
|
||||
class Channel(Base):
|
||||
__tablename__ = "channels"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
# Telegram numeric channel id (peer id), nullable until first resolve
|
||||
tg_id: Mapped[int | None] = mapped_column(BigInteger, unique=True, nullable=True)
|
||||
# Username or t.me/joinchat link supplied by user
|
||||
identifier: Mapped[str] = mapped_column(String(255), unique=True)
|
||||
title: Mapped[str | None] = mapped_column(String(512), nullable=True)
|
||||
# 'real_estate' or 'hr' — picks which LLM prompt and lead schema is used
|
||||
vertical: Mapped[str] = mapped_column(
|
||||
String(32), default="real_estate", server_default="real_estate", index=True
|
||||
)
|
||||
section_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("sections.id", ondelete="RESTRICT"), index=True
|
||||
)
|
||||
is_active: Mapped[bool] = mapped_column(default=True, server_default="true")
|
||||
last_message_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||
last_polled_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
|
||||
section: Mapped[Section] = relationship(back_populates="channels")
|
||||
messages: Mapped[list["Message"]] = relationship(
|
||||
back_populates="channel",
|
||||
cascade="all, delete-orphan",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
|
||||
class Message(Base):
|
||||
__tablename__ = "messages"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("channel_id", "tg_message_id", name="uq_channel_message"),
|
||||
Index("ix_messages_channel_date", "channel_id", "date"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
channel_id: Mapped[int] = mapped_column(ForeignKey("channels.id", ondelete="CASCADE"))
|
||||
tg_message_id: Mapped[int] = mapped_column(BigInteger)
|
||||
date: Mapped[datetime] = mapped_column(DateTime(timezone=True))
|
||||
text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
sender_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||
sender_username: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
sender_name: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||
grouped_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||
has_media: Mapped[bool] = mapped_column(default=False, server_default="false")
|
||||
views: Mapped[int | None] = mapped_column(nullable=True)
|
||||
forwards: Mapped[int | None] = mapped_column(nullable=True)
|
||||
raw: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||
media_files: Mapped[list | None] = mapped_column(JSONB, nullable=True)
|
||||
extracted: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
|
||||
fetched_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
|
||||
channel: Mapped[Channel] = relationship(back_populates="messages")
|
||||
|
||||
|
||||
class AppSetting(Base):
|
||||
"""Runtime-editable settings, edited from the UI without a restart."""
|
||||
|
||||
__tablename__ = "app_settings"
|
||||
|
||||
key: Mapped[str] = mapped_column(String(128), primary_key=True)
|
||||
value: Mapped[dict | str | int | bool | None] = mapped_column(JSONB, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
25
src/parser_bot/db/session.py
Normal file
25
src/parser_bot/db/session.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||
|
||||
from parser_bot.config import settings
|
||||
|
||||
engine = create_async_engine(settings.database_url, pool_pre_ping=True)
|
||||
SessionFactory = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def session_scope() -> AsyncIterator[AsyncSession]:
|
||||
async with SessionFactory() as session:
|
||||
try:
|
||||
yield session
|
||||
await session.commit()
|
||||
except Exception:
|
||||
await session.rollback()
|
||||
raise
|
||||
|
||||
|
||||
async def get_session() -> AsyncIterator[AsyncSession]:
|
||||
async with SessionFactory() as session:
|
||||
yield session
|
||||
334
src/parser_bot/extractors.py
Normal file
334
src/parser_bot/extractors.py
Normal file
@@ -0,0 +1,334 @@
|
||||
"""Heuristic extractors for Telegram message text.
|
||||
|
||||
Russian-first, regex/keyword based, no ML deps. Goal is to surface signals for
|
||||
the UI: phone numbers, person names (FIO), and real-estate intent (sale/rent/
|
||||
purchase). False positives are tolerable — operator triages in the UI.
|
||||
|
||||
Output shape (used as JSONB in messages.extracted):
|
||||
{
|
||||
"phones": ["+79123456789", ...],
|
||||
"names": ["Иван Петров", ...],
|
||||
"real_estate": {
|
||||
"kind": "sale" | "rent" | "purchase" | null,
|
||||
"property_type": str | null, # квартира, дом, ...
|
||||
"rooms": str | null, # "2-к"
|
||||
"area_m2": float | null,
|
||||
"price": str | null, # raw matched string
|
||||
} | null
|
||||
}
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
# --- Telegram @handles ---------------------------------------------------
|
||||
|
||||
# Plain @username — Telegram allows 5–32 chars, letters/digits/_, no leading digit.
|
||||
_TG_HANDLE_RE = re.compile(r"(?<![\w/])@([A-Za-z][A-Za-z0-9_]{4,31})\b")
|
||||
# t.me / telegram.me links to a user/channel handle (not joinchat / +invite).
|
||||
_TG_LINK_RE = re.compile(
|
||||
r"(?:https?://)?(?:t|telegram)\.me/(?!joinchat/|\+)([A-Za-z][A-Za-z0-9_]{4,31})\b"
|
||||
)
|
||||
|
||||
|
||||
def extract_tg_handles(text: str | None) -> list[str]:
|
||||
if not text:
|
||||
return []
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for h in _TG_HANDLE_RE.findall(text):
|
||||
key = h.lower()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append("@" + h)
|
||||
for h in _TG_LINK_RE.findall(text):
|
||||
key = h.lower()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append("@" + h)
|
||||
return out
|
||||
|
||||
|
||||
# --- Phones --------------------------------------------------------------
|
||||
|
||||
# Russian-format: starts with +7, 7, or 8 (no plus), 11 digits total.
|
||||
_PHONE_RU_RE = re.compile(
|
||||
r"(?<!\d)(?:\+?7|8)[\s\-().]*\d{3}[\s\-().]*\d{3}[\s\-().]*\d{2}[\s\-().]*\d{2}(?!\d)"
|
||||
)
|
||||
|
||||
# International format: starts with `+<country code>` then 7–14 more digits
|
||||
# with optional separators. Catches +971 (UAE), +1 (US), +44 (UK), etc.
|
||||
_PHONE_INTL_RE = re.compile(
|
||||
r"(?<![\w\d])\+\d{1,3}[\s\-().]*(?:\d[\s\-().]*){6,14}\d(?!\d)"
|
||||
)
|
||||
|
||||
|
||||
def extract_phones(text: str | None) -> list[str]:
|
||||
if not text:
|
||||
return []
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
# Pass 1: Russian-style. Normalize to +7XXXXXXXXXX.
|
||||
for raw in _PHONE_RU_RE.findall(text):
|
||||
digits = re.sub(r"\D", "", raw)
|
||||
if len(digits) == 11 and digits[0] in "78":
|
||||
normalized = "+7" + digits[1:]
|
||||
elif len(digits) == 10:
|
||||
normalized = "+7" + digits
|
||||
else:
|
||||
continue
|
||||
if normalized not in seen:
|
||||
seen.add(normalized)
|
||||
out.append(normalized)
|
||||
|
||||
# Pass 2: international "+<country>...". Keep raw plus-prefix; just
|
||||
# collapse separators so the result is +<digits>.
|
||||
for raw in _PHONE_INTL_RE.findall(text):
|
||||
digits = re.sub(r"\D", "", raw)
|
||||
if not (8 <= len(digits) <= 15):
|
||||
continue
|
||||
normalized = "+" + digits
|
||||
# If it normalized to something we already captured (e.g. +7 number
|
||||
# picked up by both passes), skip.
|
||||
if normalized in seen:
|
||||
continue
|
||||
seen.add(normalized)
|
||||
out.append(normalized)
|
||||
return out
|
||||
|
||||
|
||||
# --- Names (ФИО) ---------------------------------------------------------
|
||||
|
||||
# Two or three capitalized Cyrillic tokens in a row. Allows hyphens (Иванов-Петров).
|
||||
_NAME_RE = re.compile(
|
||||
r"\b([А-ЯЁ][а-яё]+(?:\-[А-ЯЁ][а-яё]+)?(?:\s+[А-ЯЁ][а-яё]+(?:\-[А-ЯЁ][а-яё]+)?){1,2})\b"
|
||||
)
|
||||
|
||||
# Common false positives — geo/places/orgs/etc. Skip exact matches.
|
||||
_NAME_BLOCKLIST = {
|
||||
"Российская Федерация",
|
||||
"Санкт Петербург",
|
||||
"Санкт-Петербург",
|
||||
"Нижний Новгород",
|
||||
"Великий Новгород",
|
||||
"Ростов На Дону",
|
||||
"Ростов-На-Дону",
|
||||
"Москва Сити",
|
||||
"Красная Площадь",
|
||||
"Чёрное Море",
|
||||
"Чёрного Моря",
|
||||
"Без Депозита",
|
||||
"Без Залога",
|
||||
"Без Комиссии",
|
||||
"Сдам Квартиру",
|
||||
"Продам Квартиру",
|
||||
"Куплю Квартиру",
|
||||
"Сдам Студию",
|
||||
"Продам Студию",
|
||||
}
|
||||
|
||||
# Words that look like names but rarely are (months, weekdays, common nouns).
|
||||
_NAME_TOKEN_BLOCK = {
|
||||
"Январь", "Февраль", "Март", "Апрель", "Май", "Июнь",
|
||||
"Июль", "Август", "Сентябрь", "Октябрь", "Ноябрь", "Декабрь",
|
||||
"Понедельник", "Вторник", "Среда", "Четверг", "Пятница", "Суббота", "Воскресенье",
|
||||
"Москва", "Питер", "Россия", "Кремль", "Метро",
|
||||
}
|
||||
|
||||
|
||||
def extract_names(text: str | None) -> list[str]:
|
||||
if not text:
|
||||
return []
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for match in _NAME_RE.findall(text):
|
||||
candidate = match.strip()
|
||||
if candidate in _NAME_BLOCKLIST:
|
||||
continue
|
||||
tokens = re.split(r"[\s\-]+", candidate)
|
||||
if any(t in _NAME_TOKEN_BLOCK for t in tokens):
|
||||
continue
|
||||
# Heuristic: at least one token must have len >= 4 (rules out "Ул.")
|
||||
if not any(len(t) >= 4 for t in tokens):
|
||||
continue
|
||||
if candidate not in seen:
|
||||
seen.add(candidate)
|
||||
out.append(candidate)
|
||||
return out
|
||||
|
||||
|
||||
# --- Real estate ---------------------------------------------------------
|
||||
|
||||
_DEAL_KEYWORDS: dict[str, tuple[str, ...]] = {
|
||||
"rent": (
|
||||
# ru
|
||||
"сдаётся", "сдается", "сдаю", "сдадим", "сдам", "сдаём",
|
||||
"аренда", "арендую", "арендуем", "снять",
|
||||
"посуточно", "помесячно",
|
||||
# en
|
||||
"for rent", "to let", "rental", "renting", "lease", "leasing",
|
||||
"per year", "per month", "/year", "/month", "/mo",
|
||||
),
|
||||
"sale": (
|
||||
# ru
|
||||
"продаётся", "продается", "продаю", "продадим", "продам", "продаём",
|
||||
"продажа", "к продаже",
|
||||
# en
|
||||
"for sale", "#forsale", "selling", "selling price", "sale price",
|
||||
),
|
||||
"purchase": (
|
||||
# ru
|
||||
"куплю", "купим", "покупаю", "покупка", "ищу квартиру",
|
||||
"ищу дом", "ищем квартиру", "рассматриваю покупку",
|
||||
# en
|
||||
"looking for", "want to buy", "wanted", "requirement", "wtb",
|
||||
),
|
||||
}
|
||||
|
||||
_PROPERTY_TYPES: tuple[tuple[str, str], ...] = (
|
||||
# ru
|
||||
("квартир", "квартира"),
|
||||
("студи", "студия"),
|
||||
("апартамент", "апартаменты"),
|
||||
("комнат", "комната"),
|
||||
("таунхаус", "таунхаус"),
|
||||
("коттедж", "коттедж"),
|
||||
("дача", "дача"),
|
||||
("дом", "дом"),
|
||||
("офис", "офис"),
|
||||
("склад", "склад"),
|
||||
("помещен", "помещение"),
|
||||
("земельн", "земельный участок"),
|
||||
("участок", "участок"),
|
||||
("гараж", "гараж"),
|
||||
("машиномест", "машиноместо"),
|
||||
# en — kept as Russian labels for UI consistency
|
||||
("villa", "дом"),
|
||||
("townhouse", "таунхаус"),
|
||||
("penthouse", "апартаменты"),
|
||||
("apartment", "квартира"),
|
||||
("studio", "студия"),
|
||||
("plot", "участок"),
|
||||
(" land ", "участок"),
|
||||
("office", "офис"),
|
||||
("warehouse", "склад"),
|
||||
("retail", "помещение"),
|
||||
("garage", "гараж"),
|
||||
)
|
||||
|
||||
_AREA_M2_RE = re.compile(
|
||||
r"(\d[\d\s,]*\d|\d)\s*(?:м[²2]|кв\.?\s*м|кв\.\s*метр)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_AREA_SQFT_RE = re.compile(
|
||||
r"(\d[\d\s,]*\d|\d)\s*(?:sqft|sq\.?\s*ft|sq\s+ft|square\s+feet)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _parse_number(s: str) -> float | None:
|
||||
cleaned = s.replace(" ", "").replace(",", "")
|
||||
try:
|
||||
return float(cleaned)
|
||||
except ValueError:
|
||||
return None
|
||||
_ROOMS_RE = re.compile(
|
||||
r"\b(\d)[\-\s]*(?:к\b|комн|комнатн|-комнат|br\b|bed\b|bedroom|-bed)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
# Studio is a special-case "0 rooms" indicator; not extracted as rooms count.
|
||||
_PRICE_RE = re.compile(
|
||||
r"(\d[\d\s.,]*\d|\d)\s*(млн|млрд|тыс|тысяч|миллионов?|миллиардов?|руб(?:лей)?|₽|р/мес|/мес|р\b)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _detect_kind(low: str) -> str | None:
|
||||
for kind, words in _DEAL_KEYWORDS.items():
|
||||
for w in words:
|
||||
if w in low:
|
||||
return kind
|
||||
return None
|
||||
|
||||
|
||||
def _detect_property_type(low: str) -> str | None:
|
||||
for stem, label in _PROPERTY_TYPES:
|
||||
if stem in low:
|
||||
return label
|
||||
return None
|
||||
|
||||
|
||||
def extract_real_estate(text: str | None) -> dict[str, Any] | None:
|
||||
if not text:
|
||||
return None
|
||||
low = text.lower()
|
||||
kind = _detect_kind(low)
|
||||
prop = _detect_property_type(low)
|
||||
if kind is None and prop is None:
|
||||
return None
|
||||
|
||||
rooms_m = _ROOMS_RE.search(low)
|
||||
rooms = f"{rooms_m.group(1)}-к" if rooms_m else None
|
||||
if rooms is None and ("студи" in low or "studio" in low):
|
||||
rooms = "студия"
|
||||
|
||||
area: float | None = None
|
||||
area_m = _AREA_M2_RE.search(text)
|
||||
if area_m:
|
||||
area = _parse_number(area_m.group(1))
|
||||
if area is None:
|
||||
sqft_m = _AREA_SQFT_RE.search(text)
|
||||
if sqft_m:
|
||||
sqft = _parse_number(sqft_m.group(1))
|
||||
if sqft is not None:
|
||||
area = round(sqft * 0.0929, 1)
|
||||
|
||||
price_m = _PRICE_RE.search(text)
|
||||
price = price_m.group(0).strip() if price_m else None
|
||||
|
||||
return {
|
||||
"kind": kind,
|
||||
"property_type": prop,
|
||||
"rooms": rooms,
|
||||
"area_m2": area,
|
||||
"price": price,
|
||||
}
|
||||
|
||||
|
||||
# --- Top-level analyzer --------------------------------------------------
|
||||
|
||||
|
||||
def analyze(text: str | None) -> dict[str, Any]:
|
||||
"""Synchronous regex-only analysis. Cheap and runs at insert time."""
|
||||
return {
|
||||
"phones": extract_phones(text),
|
||||
"names": extract_names(text),
|
||||
"tg_handles": extract_tg_handles(text),
|
||||
"real_estate": extract_real_estate(text),
|
||||
}
|
||||
|
||||
|
||||
async def analyze_with_llm(
|
||||
text: str | None,
|
||||
vertical: str = "real_estate",
|
||||
section_slug: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Regex extraction + local LLM lead classification, routed by vertical.
|
||||
|
||||
`section_slug` lets the classifier pick a section-specific system prompt
|
||||
(e.g. Dubai-focused for `real_estate:dubai`) with fallback to the
|
||||
vertical-default prompt. The LLM verdict goes under `lead` for RE and
|
||||
under `hr_lead` for HR. Falls back to regex-only if Ollama is unavailable.
|
||||
"""
|
||||
base = analyze(text)
|
||||
# Lazy import to avoid hard dep on httpx in environments where LLM is off.
|
||||
from parser_bot.llm import classify
|
||||
|
||||
verdict = await classify(text, vertical, section_slug) # type: ignore[arg-type]
|
||||
if verdict is not None:
|
||||
base["hr_lead" if vertical == "hr" else "lead"] = verdict
|
||||
return base
|
||||
44
src/parser_bot/links.py
Normal file
44
src/parser_bot/links.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Build Telegram URLs from stored channel metadata."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
_USERNAME_RE = re.compile(r"^@?([A-Za-z][A-Za-z0-9_]{4,31})$")
|
||||
_TME_URL_RE = re.compile(
|
||||
r"^(?:https?://)?(?:t|telegram)\.me/(?:s/)?([A-Za-z][A-Za-z0-9_]{4,31})(?:/.*)?$"
|
||||
)
|
||||
|
||||
|
||||
def channel_username(identifier: str | None) -> str | None:
|
||||
"""Extract the public username from a channel identifier if any.
|
||||
|
||||
Returns None for private channels (joinchat, +invite, raw IDs).
|
||||
"""
|
||||
if not identifier:
|
||||
return None
|
||||
s = identifier.strip()
|
||||
m = _USERNAME_RE.match(s)
|
||||
if m:
|
||||
return m.group(1)
|
||||
m = _TME_URL_RE.match(s)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def post_url(identifier: str | None, tg_id: int | None, tg_message_id: int) -> str | None:
|
||||
"""Build a deep link to a Telegram post.
|
||||
|
||||
Public channel: https://t.me/<username>/<msg_id>
|
||||
Private channel (no public username, only tg_id): https://t.me/c/<short>/<msg_id>
|
||||
where <short> is the absolute id with the leading -100 stripped.
|
||||
"""
|
||||
username = channel_username(identifier)
|
||||
if username:
|
||||
return f"https://t.me/{username}/{tg_message_id}"
|
||||
if tg_id is None:
|
||||
return None
|
||||
raw = abs(tg_id)
|
||||
s = str(raw)
|
||||
short = s[3:] if s.startswith("100") and len(s) > 3 else s
|
||||
return f"https://t.me/c/{short}/{tg_message_id}"
|
||||
363
src/parser_bot/llm.py
Normal file
363
src/parser_bot/llm.py
Normal file
@@ -0,0 +1,363 @@
|
||||
"""Local LLM (Ollama) client for lead classification & extraction.
|
||||
|
||||
Two verticals share one model and one process:
|
||||
- real_estate: high recall on listings (sale/rent/purchase),
|
||||
- hr: vacancies, resumes, bare contact leads.
|
||||
|
||||
The system prompt and JSON schema differ per vertical; the rest of the
|
||||
plumbing (timeouts, single-lock concurrency, JSON-mode parsing) is shared.
|
||||
On any error returns `None` and the caller falls back to regex-only extraction.
|
||||
|
||||
The model runs on CPU via Ollama (Qwen2.5 7B Q4_K_M). Each call ~3–6s on
|
||||
i5-12400. Concurrency is 1 (Ollama already saturates CPU per call).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Any, Literal
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
|
||||
from parser_bot.config import settings
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
# Single shared lock so we never run two LLM requests at once on the GPU —
|
||||
# they would just thrash VRAM and finish slower than sequential.
|
||||
_lock = asyncio.Lock()
|
||||
|
||||
|
||||
Vertical = Literal["real_estate", "hr"]
|
||||
|
||||
|
||||
DEFAULT_RE_SYSTEM_PROMPT = """\
|
||||
Ты — аналитик объявлений о недвижимости. Тебе дают текст из Telegram-канала.
|
||||
Сообщение МОЖЕТ БЫТЬ НА ЛЮБОМ ЯЗЫКЕ — русский, английский, арабский, любой
|
||||
другой. Обрабатывай его одинаково независимо от языка.
|
||||
|
||||
Задача: определить, является ли это РЕАЛЬНЫМ объявлением о покупке, продаже
|
||||
или аренде НЕДВИЖИМОСТИ (квартира, дом/villa, студия/studio, апартаменты,
|
||||
комната, таунхаус/townhouse, дача, коттедж, пентхаус/penthouse, офис,
|
||||
склад, помещение, земельный участок/plot/land, гараж, машиноместо).
|
||||
Учитывай намёки и нечёткие формулировки — лучше отметить сомнительный лид
|
||||
как `is_listing=true` с низкой confidence, чем пропустить.
|
||||
|
||||
Сигналы что это ОБЪЯВЛЕНИЕ (kind):
|
||||
— продажа/sale: «продаётся», «продаю», «продажа», «for sale», «#forsale»,
|
||||
«selling price», «selling», «price», «AED 33M», ценник в любой валюте.
|
||||
— аренда/rent: «сдаётся», «сдаю», «аренда», «for rent», «to let», «rental»,
|
||||
«per year», «per month», «AED ... /year».
|
||||
— покупка/purchase: «куплю», «куплю в», «looking for», «want to buy»,
|
||||
«wanted», «requirement».
|
||||
|
||||
ОДНО сообщение может быть и про продажу, И про аренду одновременно
|
||||
(«FOR SALE | RENT» / «продажа или аренда»). В таком случае выбирай
|
||||
основное намерение по самому тексту; если равноценно — `kind="sale"`
|
||||
и упомяни аренду в summary.
|
||||
|
||||
НЕ объявления (is_listing=false):
|
||||
— общие новости / статьи / аналитика рынка;
|
||||
— воспоминания и истории («когда-то продавал квартиру»);
|
||||
— шутки, мемы, цитаты;
|
||||
— реклама услуг агентств без конкретного объекта;
|
||||
— чужие пересланные объявления без контактов и явного предложения от автора.
|
||||
|
||||
Отвечай СТРОГО валидным JSON по схеме (никаких комментариев, никакого markdown):
|
||||
{
|
||||
"is_listing": boolean,
|
||||
"kind": "sale" | "rent" | "purchase" | null,
|
||||
"property_type": "квартира" | "дом" | "студия" | "апартаменты" | "комната" | "таунхаус" | "дача" | "коттедж" | "офис" | "склад" | "помещение" | "участок" | "гараж" | "машиноместо" | null,
|
||||
"rooms": "студия" | "1-к" | "2-к" | "3-к" | "4-к" | "5+к" | null,
|
||||
"area_m2": number | null,
|
||||
"price_text": string | null,
|
||||
"price_value": number | null,
|
||||
"currency": "RUB" | "USD" | "EUR" | "AED" | "GBP" | "CNY" | "TRY" | "KZT" | "BYN" | "UAH" | null,
|
||||
"location": string | null,
|
||||
"contact_phone": string | null,
|
||||
"contact_name": string | null,
|
||||
"summary": string,
|
||||
"confidence": number
|
||||
}
|
||||
|
||||
Поля:
|
||||
- summary — ОДНО короткое предложение НА РУССКОМ языке (даже если исходный
|
||||
текст на английском или другом). Это нужно для единообразного UI.
|
||||
- property_type — пиши значение по-русски (villa→дом, apartment→квартира,
|
||||
townhouse→таунхаус, plot/land→участок, studio→студия, penthouse→апартаменты,
|
||||
house→дом, office→офис, warehouse→склад, retail→помещение).
|
||||
- rooms — для англоязычного «3BR», «3 BR», «3 bed», «3-bedroom» возвращай
|
||||
«3-к»; для «studio» → «студия».
|
||||
- area_m2 — площадь В КВАДРАТНЫХ МЕТРАХ. Если в тексте sqft / sq.ft / sq ft /
|
||||
square feet — переведи: m² = sqft × 0.0929. Округляй до целого.
|
||||
- confidence ∈ [0, 1]: 0.9+ если явное объявление с ценой/контактом,
|
||||
0.5–0.8 если правдоподобно, 0.2–0.4 если намёк.
|
||||
- price_text — точная цитата из текста («2.5 млн ₽», «AED 850 000», «$320k»,
|
||||
«300 тыс. дирхам», «د.إ 1.2M», «70,000,000 AED», «AED 4.3M», «AED 1.75M»).
|
||||
- price_value — числовая величина цены В УКАЗАННОЙ ВАЛЮТЕ (не конвертируй).
|
||||
Раскрывай сокращения: «AED 4.3M» → 4300000, «$320k» → 320000.
|
||||
- currency — определяй гибко: ₽/руб/р/RUB/рублей → RUB; $/USD/долл/бакс → USD;
|
||||
€/EUR/евро → EUR; AED/дирхам/дирхамов/дирхама/dh/dhs/د.إ/Dirhams → AED;
|
||||
₺/TRY/лир/лира → TRY; ¥/CNY/юань → CNY; ₸/KZT/тенге → KZT;
|
||||
Br/BYN/бел.руб → BYN; ₴/UAH/грн → UAH. Если не уверен — null.
|
||||
- contact_phone — любой номер телефона в тексте (с + или без, российский,
|
||||
ОАЭ, любой международный).
|
||||
"""
|
||||
|
||||
|
||||
DEFAULT_HR_SYSTEM_PROMPT = """\
|
||||
Ты — аналитик HR-объявлений. Тебе дают текст из Telegram-канала. Сообщение
|
||||
МОЖЕТ БЫТЬ НА ЛЮБОМ ЯЗЫКЕ — обрабатывай одинаково.
|
||||
|
||||
Задача: определить, относится ли сообщение к рынку труда, и какого типа лид
|
||||
это. Допускаются три типа (`kind`):
|
||||
— vacancy — компания/наниматель ищет сотрудника («ищем разработчика»,
|
||||
«hiring backend engineer», «требуется бухгалтер», «we are looking for»);
|
||||
— resume — соискатель ищет работу («ищу работу», «open to work», «available
|
||||
for hire», «рассматриваю предложения», «my CV», «резюме»);
|
||||
— contact — короткое сообщение с именем/контактом и намёком на профессию,
|
||||
без явной вакансии/резюме («Иван Петров, Python, +7…», «@nick — UI/UX,
|
||||
Дубай»). Используй, когда vacancy и resume не подходят, но из текста ясно,
|
||||
что это HR-контакт.
|
||||
|
||||
Лучше отметить сомнительный случай `is_lead=true` с низкой confidence,
|
||||
чем пропустить. НО полностью исключай:
|
||||
— общие новости и аналитика рынка труда без конкретной вакансии/резюме;
|
||||
— реклама курсов, школ, маркетплейсов услуг (Profi.ru и т.п.);
|
||||
— чужие пересланные посты без контактов и без явного предложения от автора;
|
||||
— объявления о продаже/аренде недвижимости, услуг и товаров;
|
||||
— мемы, шутки, цитаты.
|
||||
|
||||
Отвечай СТРОГО валидным JSON по схеме (никаких комментариев, никакого markdown):
|
||||
{
|
||||
"is_lead": boolean,
|
||||
"kind": "vacancy" | "resume" | "contact" | null,
|
||||
"title": string | null,
|
||||
"company": string | null,
|
||||
"candidate_name": string | null,
|
||||
"experience_years": number | null,
|
||||
"skills": string[],
|
||||
"location": string | null,
|
||||
"remote": boolean | null,
|
||||
"employment_type": "full-time" | "part-time" | "contract" | "internship" | null,
|
||||
"salary_text": string | null,
|
||||
"salary_value": number | null,
|
||||
"currency": "RUB" | "USD" | "EUR" | "AED" | "GBP" | "CNY" | "TRY" | "KZT" | "BYN" | "UAH" | null,
|
||||
"contact_phone": string | null,
|
||||
"contact_name": string | null,
|
||||
"summary": string,
|
||||
"confidence": number
|
||||
}
|
||||
|
||||
Поля:
|
||||
- title — должность/роль ОДНОЙ строкой («Senior Python Developer», «Бухгалтер»,
|
||||
«UI/UX-дизайнер»). Для resume — желаемая роль. Для contact — то, что заявлено.
|
||||
- company — название компании-нанимателя, если оно явно указано (vacancy).
|
||||
- candidate_name — ФИО или ник кандидата (resume / contact).
|
||||
- experience_years — стаж в годах числом. «5+ years» → 5. Если не указан — null.
|
||||
- skills — короткий массив ключевых навыков/технологий (до ~10 элементов).
|
||||
- remote — true для «удалёнка / remote / WFH / hybrid: remote», false для
|
||||
«офис / on-site», null если не указано.
|
||||
- employment_type — full-time для «полная занятость / full-time», part-time
|
||||
для «частичная / part-time», contract для «договор/контракт/freelance»,
|
||||
internship для «стажировка/internship». Иначе null.
|
||||
- salary_text — точная цитата с зарплатой («200–300k ₽», «$5k/mo», «AED 18,000 per month»).
|
||||
- salary_value — число В УКАЗАННОЙ ВАЛЮТЕ. Если диапазон — нижняя граница.
|
||||
Раскрывай сокращения: «200k» → 200000, «1.5M» → 1500000.
|
||||
- currency — определяй гибко: ₽/руб/RUB → RUB; $/USD/долл → USD; €/EUR/евро → EUR;
|
||||
AED/дирхам/dh/dhs → AED; ₺/TRY/лир → TRY; ¥/CNY/юань → CNY; ₸/KZT/тенге → KZT;
|
||||
Br/BYN/бел.руб → BYN; ₴/UAH/грн → UAH. Если не уверен — null.
|
||||
- contact_phone — любой номер телефона (RU / международный, с + или без).
|
||||
- contact_name — имя контактного лица (рекрутер / соискатель / автор).
|
||||
- summary — ОДНО короткое предложение НА РУССКОМ языке.
|
||||
- confidence ∈ [0, 1]: 0.9+ если явная вакансия/резюме с деталями, 0.5–0.8
|
||||
если правдоподобно, 0.2–0.4 если намёк.
|
||||
"""
|
||||
|
||||
|
||||
# Back-compat alias — older imports referenced DEFAULT_SYSTEM_PROMPT.
|
||||
DEFAULT_SYSTEM_PROMPT = DEFAULT_RE_SYSTEM_PROMPT
|
||||
|
||||
|
||||
def _build_user_prompt(text: str) -> str:
|
||||
return f"Текст сообщения:\n```\n{text}\n```\nВерни JSON."
|
||||
|
||||
|
||||
_VALID_CURRENCIES = {
|
||||
"RUB", "USD", "EUR", "AED", "GBP", "CNY", "TRY", "KZT", "BYN", "UAH"
|
||||
}
|
||||
|
||||
|
||||
def _coerce_real_estate(payload: Any) -> dict | None:
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
is_listing = bool(payload.get("is_listing"))
|
||||
currency = payload.get("currency")
|
||||
if currency is not None:
|
||||
currency = str(currency).upper()
|
||||
if currency not in _VALID_CURRENCIES:
|
||||
currency = None
|
||||
return {
|
||||
"is_listing": is_listing,
|
||||
"kind": payload.get("kind") if payload.get("kind") in ("sale", "rent", "purchase") else None,
|
||||
"property_type": payload.get("property_type") or None,
|
||||
"rooms": payload.get("rooms") or None,
|
||||
"area_m2": _as_float(payload.get("area_m2")),
|
||||
"price_text": payload.get("price_text") or None,
|
||||
"price_value": _as_float(payload.get("price_value")),
|
||||
"currency": currency,
|
||||
"location": payload.get("location") or None,
|
||||
"contact_phone": payload.get("contact_phone") or None,
|
||||
"contact_name": payload.get("contact_name") or None,
|
||||
"summary": (payload.get("summary") or "")[:300],
|
||||
"confidence": max(0.0, min(1.0, _as_float(payload.get("confidence")) or 0.0)),
|
||||
}
|
||||
|
||||
|
||||
def _coerce_hr(payload: Any) -> dict | None:
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
is_lead = bool(payload.get("is_lead"))
|
||||
currency = payload.get("currency")
|
||||
if currency is not None:
|
||||
currency = str(currency).upper()
|
||||
if currency not in _VALID_CURRENCIES:
|
||||
currency = None
|
||||
skills_raw = payload.get("skills") or []
|
||||
if isinstance(skills_raw, str):
|
||||
skills = [s.strip() for s in skills_raw.split(",") if s.strip()]
|
||||
elif isinstance(skills_raw, list):
|
||||
skills = [str(s).strip() for s in skills_raw if str(s).strip()]
|
||||
else:
|
||||
skills = []
|
||||
skills = skills[:15]
|
||||
employment = payload.get("employment_type")
|
||||
if employment is not None and employment not in (
|
||||
"full-time", "part-time", "contract", "internship"
|
||||
):
|
||||
employment = None
|
||||
remote_raw = payload.get("remote")
|
||||
remote = bool(remote_raw) if isinstance(remote_raw, bool) else None
|
||||
return {
|
||||
"is_lead": is_lead,
|
||||
"kind": payload.get("kind") if payload.get("kind") in ("vacancy", "resume", "contact") else None,
|
||||
"title": payload.get("title") or None,
|
||||
"company": payload.get("company") or None,
|
||||
"candidate_name": payload.get("candidate_name") or None,
|
||||
"experience_years": _as_float(payload.get("experience_years")),
|
||||
"skills": skills,
|
||||
"location": payload.get("location") or None,
|
||||
"remote": remote,
|
||||
"employment_type": employment,
|
||||
"salary_text": payload.get("salary_text") or None,
|
||||
"salary_value": _as_float(payload.get("salary_value")),
|
||||
"currency": currency,
|
||||
"contact_phone": payload.get("contact_phone") or None,
|
||||
"contact_name": payload.get("contact_name") or None,
|
||||
"summary": (payload.get("summary") or "")[:300],
|
||||
"confidence": max(0.0, min(1.0, _as_float(payload.get("confidence")) or 0.0)),
|
||||
}
|
||||
|
||||
|
||||
def _as_float(v: Any) -> float | None:
|
||||
if v is None or isinstance(v, bool):
|
||||
return None
|
||||
try:
|
||||
return float(v)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
async def is_ready() -> bool:
|
||||
"""Check that Ollama is up and the configured model is pulled."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5) as client:
|
||||
r = await client.get(f"{settings.llm_base_url}/api/tags")
|
||||
r.raise_for_status()
|
||||
tags = {m.get("name") for m in r.json().get("models", [])}
|
||||
return any(t.startswith(settings.llm_model.split(":")[0]) for t in tags)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def default_prompt(vertical: Vertical) -> str:
|
||||
return DEFAULT_HR_SYSTEM_PROMPT if vertical == "hr" else DEFAULT_RE_SYSTEM_PROMPT
|
||||
|
||||
|
||||
async def classify(
|
||||
text: str | None,
|
||||
vertical: Vertical = "real_estate",
|
||||
section_slug: str | None = None,
|
||||
) -> dict | None:
|
||||
"""Classify a message text under the given vertical/section.
|
||||
|
||||
The system prompt is resolved with `section → vertical → built-in` fallback,
|
||||
so a per-section prompt can fine-tune extraction (e.g. AED/sqft for Dubai)
|
||||
while unconfigured sections keep using the vertical-wide prompt.
|
||||
Returns a vertical-specific structured dict or None on error / short text.
|
||||
"""
|
||||
if not settings.llm_enabled:
|
||||
return None
|
||||
if not text or len(text.strip()) < settings.llm_min_text_length:
|
||||
return None
|
||||
|
||||
# Lazy import to avoid a circular: prompt_store -> db.session -> config.
|
||||
from parser_bot import prompt_store
|
||||
|
||||
system = await prompt_store.resolve(vertical, section_slug, default_prompt(vertical))
|
||||
payload = {
|
||||
"model": settings.llm_model,
|
||||
"prompt": _build_user_prompt(text),
|
||||
"system": system,
|
||||
"format": "json",
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_ctx": 4096, "num_predict": 600},
|
||||
}
|
||||
async with _lock:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=settings.llm_timeout_seconds) as client:
|
||||
r = await client.post(
|
||||
f"{settings.llm_base_url}/api/generate", json=payload
|
||||
)
|
||||
if r.status_code != 200:
|
||||
# Surface the actual server message — most useful one is
|
||||
# `model '...' not found`, which otherwise would just look
|
||||
# like a generic HTTP error and leave the worker to spin.
|
||||
log.warning(
|
||||
"llm_request_failed",
|
||||
status=r.status_code,
|
||||
model=settings.llm_model,
|
||||
vertical=vertical,
|
||||
section=section_slug,
|
||||
body=r.text[:300],
|
||||
)
|
||||
return None
|
||||
data = r.json()
|
||||
except Exception as exc:
|
||||
log.warning(
|
||||
"llm_request_failed", error=str(exc), model=settings.llm_model, vertical=vertical
|
||||
)
|
||||
return None
|
||||
|
||||
raw = (data.get("response") or "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
# Best effort: extract first {...} block.
|
||||
start, end = raw.find("{"), raw.rfind("}")
|
||||
if start == -1 or end == -1:
|
||||
log.warning("llm_invalid_json", raw=raw[:200], vertical=vertical)
|
||||
return None
|
||||
try:
|
||||
parsed = json.loads(raw[start : end + 1])
|
||||
except json.JSONDecodeError:
|
||||
log.warning("llm_invalid_json", raw=raw[:200], vertical=vertical)
|
||||
return None
|
||||
|
||||
if vertical == "hr":
|
||||
return _coerce_hr(parsed)
|
||||
return _coerce_real_estate(parsed)
|
||||
205
src/parser_bot/main.py
Normal file
205
src/parser_bot/main.py
Normal file
@@ -0,0 +1,205 @@
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
import structlog
|
||||
import uvicorn
|
||||
from fastapi import Depends, FastAPI, HTTPException
|
||||
from fastapi.openapi.docs import get_redoc_html, get_swagger_ui_html
|
||||
from fastapi.openapi.utils import get_openapi
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from starlette.types import Scope
|
||||
|
||||
from parser_bot.access import require_admin, require_admin_network
|
||||
from parser_bot.api.routes import router
|
||||
from parser_bot.config import settings
|
||||
from parser_bot.scheduler.poller import build_scheduler
|
||||
from parser_bot.telegram.client import is_authorized, start_client, stop_client
|
||||
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.add_log_level,
|
||||
structlog.processors.JSONRenderer(),
|
||||
]
|
||||
)
|
||||
log = structlog.get_logger()
|
||||
|
||||
STATIC_DIR = Path(__file__).parent / "web" / "static"
|
||||
NOCACHE = {"Cache-Control": "no-cache, must-revalidate"}
|
||||
|
||||
|
||||
class NoCacheStaticFiles(StaticFiles):
|
||||
"""StaticFiles with Cache-Control: no-cache.
|
||||
|
||||
The browser still gets to validate via ETag/Last-Modified (304 is fine),
|
||||
but it will not silently serve a stale JS bundle after a deploy.
|
||||
"""
|
||||
|
||||
async def get_response(self, path: str, scope: Scope):
|
||||
response = await super().get_response(path, scope)
|
||||
response.headers["Cache-Control"] = "no-cache, must-revalidate"
|
||||
return response
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
await start_client()
|
||||
scheduler = build_scheduler()
|
||||
scheduler.start()
|
||||
authorized = await is_authorized()
|
||||
log.info(
|
||||
"startup", poll_interval=settings.poll_interval_seconds, authorized=authorized
|
||||
)
|
||||
if not authorized:
|
||||
log.warning("not_authorized", action="open /auth.html to log in")
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
scheduler.shutdown(wait=False)
|
||||
await stop_client()
|
||||
log.info("shutdown")
|
||||
|
||||
|
||||
def _serve_section_template(vertical_dir: str, page: str) -> FileResponse:
|
||||
"""Resolve a section-scoped URL to a single shared template.
|
||||
|
||||
Sections are dynamic (created via UI), so `/real-estate/dubai/channels.html`
|
||||
can't be a real file. We serve `web/static/<vertical_dir>/section/<page>`
|
||||
for any section slug — the section name is read from the URL by JS.
|
||||
"""
|
||||
target_name = page if page else "index.html"
|
||||
if "/" in target_name or target_name.startswith(".."):
|
||||
raise HTTPException(404)
|
||||
target = STATIC_DIR / vertical_dir / "section" / target_name
|
||||
if not target.is_file():
|
||||
raise HTTPException(404)
|
||||
return FileResponse(target, headers=NOCACHE)
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
public_base = settings.public_base_path.rstrip("/")
|
||||
# Disable the default /docs, /redoc and /openapi.json — we serve our own
|
||||
# admin-gated versions below.
|
||||
app = FastAPI(
|
||||
title="parser-tg-bot",
|
||||
lifespan=lifespan,
|
||||
docs_url=None,
|
||||
redoc_url=None,
|
||||
openapi_url=None,
|
||||
)
|
||||
app.include_router(router, prefix="/api/v1")
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz() -> dict[str, str]:
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def index() -> FileResponse:
|
||||
return FileResponse(STATIC_DIR / "index.html", headers=NOCACHE)
|
||||
|
||||
# Admin-only: Telegram login page. Registered BEFORE the static catch-all
|
||||
# so the static mount can't accidentally serve it to non-admin visitors.
|
||||
@app.get(
|
||||
"/admin.html",
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(require_admin_network)],
|
||||
)
|
||||
async def admin_page() -> FileResponse:
|
||||
return FileResponse(STATIC_DIR / "admin.html", headers=NOCACHE)
|
||||
|
||||
@app.get(
|
||||
"/auth.html",
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
async def auth_page() -> FileResponse:
|
||||
return FileResponse(STATIC_DIR / "auth.html", headers=NOCACHE)
|
||||
|
||||
# Admin-only: OpenAPI surface. Custom routes so we can wrap them in
|
||||
# `require_admin`; the auto-generated ones from FastAPI bypass it.
|
||||
@app.get(
|
||||
"/openapi.json",
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
async def openapi_json() -> JSONResponse:
|
||||
return JSONResponse(
|
||||
get_openapi(
|
||||
title=app.title,
|
||||
version=app.version,
|
||||
openapi_version=app.openapi_version,
|
||||
description=app.description,
|
||||
routes=app.routes,
|
||||
)
|
||||
)
|
||||
|
||||
@app.get(
|
||||
"/docs",
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
async def docs() -> FileResponse:
|
||||
return get_swagger_ui_html(
|
||||
openapi_url=f"{public_base}/openapi.json" if public_base else "/openapi.json",
|
||||
title=app.title + " — docs",
|
||||
)
|
||||
|
||||
@app.get(
|
||||
"/redoc",
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
async def redoc() -> FileResponse:
|
||||
return get_redoc_html(
|
||||
openapi_url=f"{public_base}/openapi.json" if public_base else "/openapi.json",
|
||||
title=app.title + " — redoc",
|
||||
)
|
||||
|
||||
# IMPORTANT: register /static and /media mounts BEFORE the dynamic
|
||||
# vertical/section routes. Starlette matches routes in registration order,
|
||||
# and a generic /{v}/{s}/{page} pattern would otherwise eat /static/*.
|
||||
app.mount("/static", NoCacheStaticFiles(directory=STATIC_DIR), name="static")
|
||||
media_dir = Path(settings.media_dir)
|
||||
media_dir.mkdir(parents=True, exist_ok=True)
|
||||
# /media is fine to cache — file names are content-stable.
|
||||
app.mount("/media", StaticFiles(directory=media_dir), name="media")
|
||||
|
||||
# Section-templated dynamic routes, explicit per vertical so /static/*,
|
||||
# /api/*, /media/* (and any future top-level path) can't be captured.
|
||||
@app.get("/real-estate/{section}/", include_in_schema=False)
|
||||
async def re_section_root(section: str) -> FileResponse:
|
||||
return _serve_section_template("real-estate", "index.html")
|
||||
|
||||
@app.get("/real-estate/{section}/{page}", include_in_schema=False)
|
||||
async def re_section_page(section: str, page: str) -> FileResponse:
|
||||
return _serve_section_template("real-estate", page)
|
||||
|
||||
@app.get("/hr/{section}/", include_in_schema=False)
|
||||
async def hr_section_root(section: str) -> FileResponse:
|
||||
return _serve_section_template("hr", "index.html")
|
||||
|
||||
@app.get("/hr/{section}/{page}", include_in_schema=False)
|
||||
async def hr_section_page(section: str, page: str) -> FileResponse:
|
||||
return _serve_section_template("hr", page)
|
||||
|
||||
# Catch-all for top-level static pages (chooser, css, etc.). auth.html is
|
||||
# already handled above, so the static catch-all can't bypass the guard.
|
||||
app.mount("/", NoCacheStaticFiles(directory=STATIC_DIR, html=True), name="pages")
|
||||
return app
|
||||
|
||||
|
||||
app = create_app()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
uvicorn.run(
|
||||
"parser_bot.main:app",
|
||||
host=settings.api_host,
|
||||
port=settings.api_port,
|
||||
log_config=None,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
130
src/parser_bot/prompt_store.py
Normal file
130
src/parser_bot/prompt_store.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""Runtime-editable LLM system prompts, persisted in app_settings.
|
||||
|
||||
Three resolution levels with fallback (more specific → less specific):
|
||||
1. `llm_system_prompt:<vertical>:<section_slug>` — section override
|
||||
2. `llm_system_prompt:<vertical>` — vertical override
|
||||
3. built-in DEFAULT_RE_SYSTEM_PROMPT / DEFAULT_HR_SYSTEM_PROMPT
|
||||
|
||||
The prompt is read on every classification call but cached for a short
|
||||
window so the DB isn't hit per-message. Edits via the API invalidate the
|
||||
cache for that level, so a save in the UI takes effect within seconds.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from typing import Literal
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
|
||||
from parser_bot.db.models import AppSetting
|
||||
from parser_bot.db.session import session_scope
|
||||
|
||||
Vertical = Literal["real_estate", "hr"]
|
||||
|
||||
_KEY_PREFIX = "llm_system_prompt:"
|
||||
_CACHE_TTL_S = 5.0
|
||||
_cache: dict[str, tuple[float, str | None]] = {}
|
||||
|
||||
|
||||
def _key(vertical: Vertical, section_slug: str | None = None) -> str:
|
||||
if section_slug:
|
||||
return f"{_KEY_PREFIX}{vertical}:{section_slug}"
|
||||
return f"{_KEY_PREFIX}{vertical}"
|
||||
|
||||
|
||||
async def _load(key: str) -> str | None:
|
||||
"""Read a stored prompt by exact key. None if missing or empty."""
|
||||
now = time.monotonic()
|
||||
cached_at, cached_value = _cache.get(key, (0.0, None))
|
||||
if now - cached_at < _CACHE_TTL_S:
|
||||
return cached_value
|
||||
|
||||
async with session_scope() as session:
|
||||
row = await session.execute(
|
||||
select(AppSetting.value).where(AppSetting.key == key)
|
||||
)
|
||||
value = row.scalar_one_or_none()
|
||||
|
||||
text = value if isinstance(value, str) and value.strip() else None
|
||||
_cache[key] = (now, text)
|
||||
return text
|
||||
|
||||
|
||||
async def resolve(
|
||||
vertical: Vertical, section_slug: str | None, default: str
|
||||
) -> str:
|
||||
"""Pick the most specific prompt available, falling back to `default`.
|
||||
|
||||
Always consults section-level → vertical-level → default. This is what
|
||||
the classifier uses for every message.
|
||||
"""
|
||||
if section_slug:
|
||||
text = await _load(_key(vertical, section_slug))
|
||||
if text is not None:
|
||||
return text
|
||||
text = await _load(_key(vertical))
|
||||
if text is not None:
|
||||
return text
|
||||
return default
|
||||
|
||||
|
||||
async def get(
|
||||
vertical: Vertical, section_slug: str | None, default: str
|
||||
) -> tuple[str, str]:
|
||||
"""For the settings UI: return (text, source) where source is one of
|
||||
'section' | 'vertical' | 'default'. Lets the editor show which override
|
||||
is currently active without a second round-trip.
|
||||
"""
|
||||
if section_slug:
|
||||
text = await _load(_key(vertical, section_slug))
|
||||
if text is not None:
|
||||
return text, "section"
|
||||
text = await _load(_key(vertical))
|
||||
if text is not None:
|
||||
return text, "vertical"
|
||||
return default, "default"
|
||||
|
||||
|
||||
async def set_prompt(
|
||||
vertical: Vertical, section_slug: str | None, text: str
|
||||
) -> None:
|
||||
"""Save a new prompt at the given level (section or vertical)."""
|
||||
if not isinstance(text, str) or not text.strip():
|
||||
raise ValueError("prompt must be a non-empty string")
|
||||
key = _key(vertical, section_slug)
|
||||
async with session_scope() as session:
|
||||
stmt = (
|
||||
pg_insert(AppSetting)
|
||||
.values(key=key, value=text)
|
||||
.on_conflict_do_update(
|
||||
index_elements=["key"], set_={"value": text}
|
||||
)
|
||||
)
|
||||
await session.execute(stmt)
|
||||
invalidate(key)
|
||||
|
||||
|
||||
async def reset(vertical: Vertical, section_slug: str | None) -> None:
|
||||
"""Drop the override at the given level."""
|
||||
key = _key(vertical, section_slug)
|
||||
async with session_scope() as session:
|
||||
await session.execute(
|
||||
AppSetting.__table__.delete().where(AppSetting.key == key)
|
||||
)
|
||||
invalidate(key)
|
||||
|
||||
|
||||
def invalidate(key: str | None = None) -> None:
|
||||
if key is None:
|
||||
_cache.clear()
|
||||
else:
|
||||
_cache.pop(key, None)
|
||||
|
||||
|
||||
async def is_overridden(
|
||||
vertical: Vertical, section_slug: str | None = None
|
||||
) -> bool:
|
||||
"""True iff a custom prompt is stored at this exact level."""
|
||||
text = await _load(_key(vertical, section_slug))
|
||||
return text is not None
|
||||
0
src/parser_bot/scheduler/__init__.py
Normal file
0
src/parser_bot/scheduler/__init__.py
Normal file
349
src/parser_bot/scheduler/poller.py
Normal file
349
src/parser_bot/scheduler/poller.py
Normal file
@@ -0,0 +1,349 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import structlog
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
|
||||
from parser_bot.config import settings
|
||||
from parser_bot.db.models import Channel, Message, Section
|
||||
from parser_bot.db.session import session_scope
|
||||
from parser_bot.extractors import analyze, analyze_with_llm
|
||||
from parser_bot.telegram.client import (
|
||||
fetch_new_messages,
|
||||
fetch_specific_messages_with_media,
|
||||
is_authorized,
|
||||
resolve_channel,
|
||||
)
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
def _verdict_key(vertical: str) -> str:
|
||||
"""JSONB key under `extracted` where the LLM verdict lives for this vertical."""
|
||||
return "hr_lead" if vertical == "hr" else "lead"
|
||||
|
||||
|
||||
def _needs_work_clause(vertical: str | None):
|
||||
"""Rows that still need LLM classification.
|
||||
|
||||
A row needs work when:
|
||||
- extracted IS NULL (never analyzed), or
|
||||
- the verdict for this vertical is missing.
|
||||
|
||||
Without `vertical`, falls back to "missing any verdict" — used by
|
||||
aggregate /llm/queue display when no vertical is selected.
|
||||
"""
|
||||
if vertical is None:
|
||||
return (Message.extracted.is_(None)) | (
|
||||
Message.extracted["lead"].is_(None) & Message.extracted["hr_lead"].is_(None)
|
||||
)
|
||||
key = _verdict_key(vertical)
|
||||
return (Message.extracted.is_(None)) | (Message.extracted[key].is_(None))
|
||||
|
||||
|
||||
async def poll_channel(channel_id: int) -> int:
|
||||
"""Poll one channel for new messages. Returns count of inserted rows."""
|
||||
async with session_scope() as session:
|
||||
channel = await session.get(Channel, channel_id)
|
||||
if channel is None or not channel.is_active:
|
||||
return 0
|
||||
|
||||
if channel.tg_id is None or channel.title is None:
|
||||
resolved = await resolve_channel(channel.identifier)
|
||||
channel.tg_id = resolved.tg_id
|
||||
channel.title = resolved.title
|
||||
|
||||
msgs = await fetch_new_messages(
|
||||
channel.identifier,
|
||||
min_id=channel.last_message_id,
|
||||
limit=settings.poll_history_limit,
|
||||
download_media_for_channel_id=channel.id,
|
||||
)
|
||||
|
||||
inserted = 0
|
||||
for m in msgs:
|
||||
# Only the cheap regex pass runs in the poll path. LLM classification
|
||||
# is handled by `classify_pending` in a background scheduler job so
|
||||
# that a poll request never blocks on a 5s/message LLM call.
|
||||
stmt = (
|
||||
pg_insert(Message)
|
||||
.values(
|
||||
channel_id=channel.id,
|
||||
tg_message_id=m.tg_message_id,
|
||||
date=m.date,
|
||||
text=m.text,
|
||||
sender_id=m.sender_id,
|
||||
sender_username=m.sender_username,
|
||||
sender_name=m.sender_name,
|
||||
grouped_id=m.grouped_id,
|
||||
has_media=m.has_media,
|
||||
views=m.views,
|
||||
forwards=m.forwards,
|
||||
raw=m.raw,
|
||||
media_files=m.media_files or None,
|
||||
extracted=analyze(m.text) if m.text else None,
|
||||
)
|
||||
.on_conflict_do_nothing(index_elements=["channel_id", "tg_message_id"])
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
inserted += result.rowcount or 0
|
||||
|
||||
if msgs:
|
||||
channel.last_message_id = max(
|
||||
channel.last_message_id or 0, msgs[-1].tg_message_id
|
||||
)
|
||||
channel.last_polled_at = datetime.now(timezone.utc)
|
||||
|
||||
log.info(
|
||||
"polled_channel",
|
||||
channel=channel.identifier,
|
||||
vertical=channel.vertical,
|
||||
fetched=len(msgs),
|
||||
inserted=inserted,
|
||||
)
|
||||
return inserted
|
||||
|
||||
|
||||
async def poll_all() -> None:
|
||||
if not await is_authorized():
|
||||
log.debug("poll_skipped_not_authorized")
|
||||
return
|
||||
|
||||
async with session_scope() as session:
|
||||
result = await session.execute(select(Channel.id).where(Channel.is_active.is_(True)))
|
||||
ids = [row[0] for row in result.all()]
|
||||
|
||||
for channel_id in ids:
|
||||
try:
|
||||
await poll_channel(channel_id)
|
||||
except Exception as exc:
|
||||
log.error("poll_failed", channel_id=channel_id, error=str(exc))
|
||||
|
||||
|
||||
async def backfill_media(channel_id: int, batch_size: int = 50) -> dict[str, int]:
|
||||
"""Re-download media for messages with has_media=True but media_files IS NULL.
|
||||
|
||||
Goes through one batch (oldest-first by tg_message_id) at a time so the call
|
||||
stays bounded; the UI can press the button repeatedly until 'pending' is 0.
|
||||
"""
|
||||
if not await is_authorized():
|
||||
raise RuntimeError("not authorized")
|
||||
|
||||
async with session_scope() as session:
|
||||
channel = await session.get(Channel, channel_id)
|
||||
if channel is None:
|
||||
raise RuntimeError("channel not found")
|
||||
|
||||
pending_q = select(func.count(Message.id)).where(
|
||||
Message.channel_id == channel_id,
|
||||
Message.has_media.is_(True),
|
||||
Message.media_files.is_(None),
|
||||
)
|
||||
pending_total = (await session.execute(pending_q)).scalar_one()
|
||||
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Message.id, Message.tg_message_id)
|
||||
.where(
|
||||
Message.channel_id == channel_id,
|
||||
Message.has_media.is_(True),
|
||||
Message.media_files.is_(None),
|
||||
)
|
||||
.order_by(Message.tg_message_id.asc())
|
||||
.limit(batch_size)
|
||||
)
|
||||
).all()
|
||||
if not rows:
|
||||
return {"updated": 0, "pending": 0}
|
||||
|
||||
tg_ids = [r.tg_message_id for r in rows]
|
||||
results = await fetch_specific_messages_with_media(
|
||||
channel.identifier, tg_ids, channel_id
|
||||
)
|
||||
|
||||
updated = 0
|
||||
for db_id, tg_id in rows:
|
||||
files = results.get(tg_id)
|
||||
if not files:
|
||||
continue
|
||||
msg = await session.get(Message, db_id)
|
||||
if msg is None:
|
||||
continue
|
||||
msg.media_files = files
|
||||
updated += 1
|
||||
|
||||
log.info(
|
||||
"backfill_media",
|
||||
channel_id=channel_id,
|
||||
updated=updated,
|
||||
remaining=max(0, pending_total - updated),
|
||||
)
|
||||
return {"updated": updated, "pending": max(0, pending_total - updated)}
|
||||
|
||||
|
||||
async def reanalyze_channel(channel_id: int, batch_size: int = 5) -> dict[str, int]:
|
||||
"""Re-run extractors (regex + LLM) over messages missing this channel's verdict.
|
||||
|
||||
Picks the vertical AND section from the channel row so the right LLM
|
||||
prompt is used. Only reanalyzes rows where the corresponding verdict key
|
||||
is missing. Newest first so fresh leads surface during long backfills.
|
||||
"""
|
||||
async with session_scope() as session:
|
||||
result = await session.execute(
|
||||
select(Channel, Section.slug)
|
||||
.join(Section, Section.id == Channel.section_id)
|
||||
.where(Channel.id == channel_id)
|
||||
)
|
||||
row = result.one_or_none()
|
||||
if row is None:
|
||||
return {"updated": 0, "pending": 0}
|
||||
channel, section_slug = row
|
||||
vertical = channel.vertical
|
||||
needs_work = _needs_work_clause(vertical)
|
||||
|
||||
pending_total = (
|
||||
await session.execute(
|
||||
select(func.count(Message.id)).where(
|
||||
Message.channel_id == channel_id,
|
||||
Message.text.is_not(None),
|
||||
needs_work,
|
||||
)
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Message.id, Message.text)
|
||||
.where(
|
||||
Message.channel_id == channel_id,
|
||||
Message.text.is_not(None),
|
||||
needs_work,
|
||||
)
|
||||
.order_by(Message.id.desc())
|
||||
.limit(batch_size)
|
||||
)
|
||||
).all()
|
||||
if not rows:
|
||||
return {"updated": 0, "pending": 0}
|
||||
|
||||
updated = 0
|
||||
for db_id, text in rows:
|
||||
extracted = (
|
||||
await analyze_with_llm(text, vertical, section_slug)
|
||||
if settings.llm_enabled
|
||||
else analyze(text)
|
||||
)
|
||||
msg = await session.get(Message, db_id)
|
||||
if msg is None:
|
||||
continue
|
||||
msg.extracted = extracted
|
||||
updated += 1
|
||||
|
||||
log.info(
|
||||
"reanalyzed_channel",
|
||||
channel_id=channel_id,
|
||||
vertical=vertical,
|
||||
section=section_slug,
|
||||
updated=updated,
|
||||
remaining=max(0, pending_total - updated),
|
||||
)
|
||||
return {"updated": updated, "pending": max(0, pending_total - updated)}
|
||||
|
||||
|
||||
async def pending_llm_count(
|
||||
vertical: str | None = None, section_slug: str | None = None
|
||||
) -> int:
|
||||
"""How many text messages still need LLM classification.
|
||||
|
||||
When `vertical` is set, only counts messages from channels of that vertical
|
||||
(and optionally that section) whose vertical-specific verdict is missing.
|
||||
"""
|
||||
if not settings.llm_enabled:
|
||||
return 0
|
||||
needs_work = _needs_work_clause(vertical)
|
||||
async with session_scope() as session:
|
||||
stmt = select(func.count(Message.id)).where(
|
||||
Message.text.is_not(None),
|
||||
needs_work,
|
||||
)
|
||||
if vertical is not None:
|
||||
stmt = stmt.join(Channel, Channel.id == Message.channel_id).where(
|
||||
Channel.vertical == vertical
|
||||
)
|
||||
if section_slug is not None:
|
||||
stmt = stmt.join(Section, Section.id == Channel.section_id).where(
|
||||
Section.slug == section_slug
|
||||
)
|
||||
return (await session.execute(stmt)).scalar_one()
|
||||
|
||||
|
||||
async def classify_pending(batch_size: int = 5) -> int:
|
||||
"""Run LLM over a batch of unclassified messages across all channels.
|
||||
|
||||
Walks newest-first and picks the prompt/vertical/section from each
|
||||
message's channel, so RE and HR channels (and per-section overrides)
|
||||
share the same classifier worker without crosstalk.
|
||||
"""
|
||||
if not settings.llm_enabled:
|
||||
return 0
|
||||
needs_work = _needs_work_clause(None)
|
||||
|
||||
async with session_scope() as session:
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(Message.id, Message.text, Channel.vertical, Section.slug)
|
||||
.join(Channel, Channel.id == Message.channel_id)
|
||||
.join(Section, Section.id == Channel.section_id)
|
||||
.where(Message.text.is_not(None), needs_work)
|
||||
.order_by(Message.id.desc())
|
||||
.limit(batch_size)
|
||||
)
|
||||
).all()
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
updated = 0
|
||||
for db_id, text, vertical, section_slug in rows:
|
||||
# If extracted already has THIS vertical's verdict, skip — needs_work
|
||||
# uses an OR over both keys and would otherwise re-run RE channels
|
||||
# that already have a lead just because hr_lead is null.
|
||||
existing = (
|
||||
await session.execute(select(Message.extracted).where(Message.id == db_id))
|
||||
).scalar_one_or_none()
|
||||
key = _verdict_key(vertical)
|
||||
if existing and existing.get(key) is not None:
|
||||
continue
|
||||
extracted = await analyze_with_llm(text, vertical, section_slug)
|
||||
msg = await session.get(Message, db_id)
|
||||
if msg is None:
|
||||
continue
|
||||
msg.extracted = extracted
|
||||
updated += 1
|
||||
|
||||
if updated:
|
||||
log.info("classify_pending_batch", updated=updated)
|
||||
return updated
|
||||
|
||||
|
||||
def build_scheduler() -> AsyncIOScheduler:
|
||||
scheduler = AsyncIOScheduler()
|
||||
scheduler.add_job(
|
||||
poll_all,
|
||||
"interval",
|
||||
seconds=settings.poll_interval_seconds,
|
||||
id="poll_all",
|
||||
max_instances=1,
|
||||
coalesce=True,
|
||||
)
|
||||
if settings.llm_enabled:
|
||||
scheduler.add_job(
|
||||
classify_pending,
|
||||
"interval",
|
||||
seconds=settings.llm_classify_interval_seconds,
|
||||
id="classify_pending",
|
||||
max_instances=1,
|
||||
coalesce=True,
|
||||
kwargs={"batch_size": settings.llm_classify_batch_size},
|
||||
)
|
||||
return scheduler
|
||||
0
src/parser_bot/telegram/__init__.py
Normal file
0
src/parser_bot/telegram/__init__.py
Normal file
319
src/parser_bot/telegram/client.py
Normal file
319
src/parser_bot/telegram/client.py
Normal file
@@ -0,0 +1,319 @@
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from telethon import TelegramClient
|
||||
from telethon.sessions import StringSession
|
||||
from telethon.tl.types import Channel as TgChannel
|
||||
from telethon.tl.types import Message as TgMessage
|
||||
from telethon.tl.types import (
|
||||
MessageMediaDocument,
|
||||
MessageMediaPhoto,
|
||||
)
|
||||
|
||||
from parser_bot.config import settings
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
def _json_safe(value: Any) -> Any:
|
||||
"""Coerce Telethon's to_dict() output into JSONB-safe primitives."""
|
||||
return json.loads(json.dumps(value, default=str))
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ResolvedChannel:
|
||||
tg_id: int
|
||||
title: str
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class FetchedMessage:
|
||||
tg_message_id: int
|
||||
date: datetime
|
||||
text: str | None
|
||||
sender_id: int | None
|
||||
sender_username: str | None
|
||||
sender_name: str | None
|
||||
grouped_id: int | None
|
||||
has_media: bool
|
||||
views: int | None
|
||||
forwards: int | None
|
||||
raw: dict
|
||||
media_files: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
def _sender_info(msg: TgMessage) -> tuple[str | None, str | None]:
|
||||
"""Best-effort sender username + display name from a Telethon Message.
|
||||
|
||||
For public channel posts the "sender" is usually the channel itself —
|
||||
in that case we fall back to `post_author` (the optional signature on
|
||||
signed posts) so the operator at least knows who signed it.
|
||||
"""
|
||||
username: str | None = None
|
||||
name: str | None = None
|
||||
sender = msg.sender
|
||||
if sender is not None:
|
||||
username = getattr(sender, "username", None)
|
||||
first = getattr(sender, "first_name", None)
|
||||
last = getattr(sender, "last_name", None)
|
||||
title = getattr(sender, "title", None)
|
||||
if first or last:
|
||||
name = " ".join(p for p in (first, last) if p)
|
||||
elif title:
|
||||
name = title
|
||||
post_author = getattr(msg, "post_author", None)
|
||||
if not name and post_author:
|
||||
name = post_author
|
||||
return username, name
|
||||
|
||||
|
||||
def _media_kind(media: Any) -> str:
|
||||
if isinstance(media, MessageMediaPhoto):
|
||||
return "photo"
|
||||
if isinstance(media, MessageMediaDocument):
|
||||
doc = getattr(media, "document", None)
|
||||
mime = (getattr(doc, "mime_type", "") or "").lower()
|
||||
if mime.startswith("video/"):
|
||||
return "video"
|
||||
if mime.startswith("audio/"):
|
||||
return "audio"
|
||||
if mime == "image/webp":
|
||||
return "sticker"
|
||||
return "document"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _media_size(media: Any) -> int | None:
|
||||
doc = getattr(media, "document", None)
|
||||
if doc is not None:
|
||||
return getattr(doc, "size", None)
|
||||
return None
|
||||
|
||||
|
||||
def _media_mime(media: Any) -> str | None:
|
||||
doc = getattr(media, "document", None)
|
||||
if doc is not None:
|
||||
return getattr(doc, "mime_type", None)
|
||||
if isinstance(media, MessageMediaPhoto):
|
||||
return "image/jpeg"
|
||||
return None
|
||||
|
||||
|
||||
async def _download_message_media(
|
||||
client: TelegramClient, msg: TgMessage, channel_id: int
|
||||
) -> list[dict]:
|
||||
"""Download media from a single message into <media_dir>/<channel_id>/.
|
||||
|
||||
Returns a list of dicts: {kind, url?, mime?, size?, skipped?}.
|
||||
Large documents/videos are skipped to avoid eating disk; metadata is kept
|
||||
so the UI can still show that media existed.
|
||||
"""
|
||||
if msg.media is None:
|
||||
return []
|
||||
|
||||
kind = _media_kind(msg.media)
|
||||
size = _media_size(msg.media)
|
||||
mime = _media_mime(msg.media)
|
||||
info: dict = {"kind": kind, "mime": mime, "size": size}
|
||||
|
||||
if size is not None and size > settings.media_max_bytes:
|
||||
info["skipped"] = "too_large"
|
||||
return [info]
|
||||
|
||||
target_dir = Path(settings.media_dir) / str(channel_id)
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
prefix = target_dir / f"{msg.id}"
|
||||
try:
|
||||
path = await client.download_media(msg, file=str(prefix))
|
||||
except Exception as exc:
|
||||
log.warning("media_download_failed", msg_id=msg.id, error=str(exc))
|
||||
info["skipped"] = "download_error"
|
||||
return [info]
|
||||
if path is None:
|
||||
info["skipped"] = "no_file"
|
||||
return [info]
|
||||
filename = Path(path).name
|
||||
public_base = settings.public_base_path.rstrip("/")
|
||||
info["url"] = f"{public_base}/media/{channel_id}/{filename}"
|
||||
return [info]
|
||||
|
||||
|
||||
_client: TelegramClient | None = None
|
||||
|
||||
|
||||
def get_client() -> TelegramClient:
|
||||
"""Build a Telethon client. Prefer StringSession from env (k8s-friendly),
|
||||
fall back to file-based session at TG_SESSION_PATH for local dev."""
|
||||
global _client
|
||||
if _client is None:
|
||||
session = (
|
||||
StringSession(settings.tg_session_string)
|
||||
if settings.tg_session_string
|
||||
else settings.tg_session_path
|
||||
)
|
||||
_client = TelegramClient(session, settings.tg_api_id, settings.tg_api_hash)
|
||||
return _client
|
||||
|
||||
|
||||
async def start_client() -> TelegramClient:
|
||||
"""Connect Telethon. Does NOT require authorization — connecting an
|
||||
unauthorized client is fine and is a prerequisite for the web login flow.
|
||||
Callers that need an authorized client must use `require_authorized()`.
|
||||
"""
|
||||
client = get_client()
|
||||
if not client.is_connected():
|
||||
await client.connect()
|
||||
return client
|
||||
|
||||
|
||||
async def stop_client() -> None:
|
||||
global _client
|
||||
if _client is not None and _client.is_connected():
|
||||
await _client.disconnect()
|
||||
_client = None
|
||||
|
||||
|
||||
async def require_authorized() -> TelegramClient:
|
||||
client = await start_client()
|
||||
if not await client.is_user_authorized():
|
||||
raise RuntimeError("not authorized: complete login at /auth.html")
|
||||
return client
|
||||
|
||||
|
||||
async def is_authorized() -> bool:
|
||||
client = await start_client()
|
||||
return await client.is_user_authorized()
|
||||
|
||||
|
||||
async def current_username() -> str | None:
|
||||
client = await start_client()
|
||||
if not await client.is_user_authorized():
|
||||
return None
|
||||
me = await client.get_me()
|
||||
if me is None:
|
||||
return None
|
||||
return me.username or str(me.id)
|
||||
|
||||
|
||||
_pending_phone_code_hash: str | None = None
|
||||
|
||||
|
||||
async def send_login_code() -> None:
|
||||
"""Step 1: ask Telegram to send the login code to TG_PHONE."""
|
||||
global _pending_phone_code_hash
|
||||
client = await start_client()
|
||||
if await client.is_user_authorized():
|
||||
raise RuntimeError("already authorized")
|
||||
sent = await client.send_code_request(settings.tg_phone)
|
||||
_pending_phone_code_hash = sent.phone_code_hash
|
||||
|
||||
|
||||
async def submit_login_code(code: str) -> bool:
|
||||
"""Step 2: submit the code. Returns True if 2FA password is still required."""
|
||||
global _pending_phone_code_hash
|
||||
if _pending_phone_code_hash is None:
|
||||
raise RuntimeError("no pending login: call send-code first")
|
||||
client = await start_client()
|
||||
from telethon.errors import SessionPasswordNeededError
|
||||
|
||||
try:
|
||||
await client.sign_in(
|
||||
phone=settings.tg_phone,
|
||||
code=code,
|
||||
phone_code_hash=_pending_phone_code_hash,
|
||||
)
|
||||
except SessionPasswordNeededError:
|
||||
return True
|
||||
_pending_phone_code_hash = None
|
||||
return False
|
||||
|
||||
|
||||
async def submit_login_password(password: str) -> None:
|
||||
"""Step 3 (only if 2FA): submit the cloud password."""
|
||||
global _pending_phone_code_hash
|
||||
client = await start_client()
|
||||
await client.sign_in(password=password)
|
||||
_pending_phone_code_hash = None
|
||||
|
||||
|
||||
async def logout() -> None:
|
||||
global _pending_phone_code_hash
|
||||
client = await start_client()
|
||||
if await client.is_user_authorized():
|
||||
await client.log_out()
|
||||
_pending_phone_code_hash = None
|
||||
|
||||
|
||||
async def resolve_channel(identifier: str) -> ResolvedChannel:
|
||||
client = await start_client()
|
||||
entity = await client.get_entity(identifier)
|
||||
if not isinstance(entity, TgChannel):
|
||||
raise ValueError(f"{identifier!r} is not a channel")
|
||||
return ResolvedChannel(tg_id=entity.id, title=entity.title or identifier)
|
||||
|
||||
|
||||
async def fetch_specific_messages_with_media(
|
||||
identifier: str, tg_message_ids: list[int], channel_id: int
|
||||
) -> dict[int, list[dict]]:
|
||||
"""Re-fetch a list of specific messages by id and download their media.
|
||||
|
||||
Returns {tg_message_id: media_files} for messages whose media was
|
||||
successfully resolved (skipped or downloaded). Used by the backfill flow
|
||||
for messages that were saved before media-download was implemented.
|
||||
"""
|
||||
client = await require_authorized()
|
||||
entity = await client.get_entity(identifier)
|
||||
out: dict[int, list[dict]] = {}
|
||||
msgs = await client.get_messages(entity, ids=list(tg_message_ids))
|
||||
for msg in msgs:
|
||||
if msg is None or not isinstance(msg, TgMessage) or msg.media is None:
|
||||
continue
|
||||
out[msg.id] = await _download_message_media(client, msg, channel_id)
|
||||
return out
|
||||
|
||||
|
||||
async def fetch_new_messages(
|
||||
identifier: str,
|
||||
min_id: int | None,
|
||||
limit: int,
|
||||
download_media_for_channel_id: int | None = None,
|
||||
) -> list[FetchedMessage]:
|
||||
client = await start_client()
|
||||
entity = await client.get_entity(identifier)
|
||||
kwargs = {"limit": limit}
|
||||
if min_id is not None:
|
||||
kwargs["min_id"] = min_id
|
||||
out: list[FetchedMessage] = []
|
||||
async for msg in client.iter_messages(entity, **kwargs):
|
||||
if not isinstance(msg, TgMessage):
|
||||
continue
|
||||
media_files: list[dict] = []
|
||||
if msg.media is not None and download_media_for_channel_id is not None:
|
||||
media_files = await _download_message_media(
|
||||
client, msg, download_media_for_channel_id
|
||||
)
|
||||
sender_username, sender_name = _sender_info(msg)
|
||||
out.append(
|
||||
FetchedMessage(
|
||||
tg_message_id=msg.id,
|
||||
date=msg.date,
|
||||
text=msg.message,
|
||||
sender_id=getattr(msg.sender_id, "user_id", msg.sender_id)
|
||||
if msg.sender_id is not None
|
||||
else None,
|
||||
sender_username=sender_username,
|
||||
sender_name=sender_name,
|
||||
grouped_id=getattr(msg, "grouped_id", None),
|
||||
has_media=msg.media is not None,
|
||||
views=msg.views,
|
||||
forwards=msg.forwards,
|
||||
raw=_json_safe(msg.to_dict()),
|
||||
media_files=media_files,
|
||||
)
|
||||
)
|
||||
out.sort(key=lambda m: m.tg_message_id)
|
||||
return out
|
||||
36
src/parser_bot/web/static/admin.html
Normal file
36
src/parser_bot/web/static/admin.html
Normal file
@@ -0,0 +1,36 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Админ — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>parser-tg-bot</h1>
|
||||
<nav>
|
||||
<a href="/api/monitoring-tg/">Разделы</a>
|
||||
<a class="admin-login-link active" href="/api/monitoring-tg/admin.html">Админ</a>
|
||||
<a class="admin-link" href="/api/monitoring-tg/auth.html">Авторизация</a>
|
||||
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
|
||||
</nav>
|
||||
</header>
|
||||
<main>
|
||||
<h2>Админ-доступ</h2>
|
||||
|
||||
<div class="card" style="max-width:520px">
|
||||
<div id="admin-status" class="muted" style="margin-bottom:12px">Проверка...</div>
|
||||
<form id="admin-form" class="row">
|
||||
<input type="password" id="admin-password" autocomplete="current-password"
|
||||
placeholder="Админ пароль" required style="flex:1; min-width:220px" />
|
||||
<button type="submit">Войти</button>
|
||||
</form>
|
||||
<div class="row" style="margin-top:12px">
|
||||
<button id="admin-logout" class="secondary" type="button">Выйти</button>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/admin.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
85
src/parser_bot/web/static/auth.html
Normal file
85
src/parser_bot/web/static/auth.html
Normal file
@@ -0,0 +1,85 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Авторизация — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>parser-tg-bot</h1>
|
||||
<nav>
|
||||
<a href="/api/monitoring-tg/">Разделы</a>
|
||||
<a href="/api/monitoring-tg/real-estate/">🏠 Недвижимость</a>
|
||||
<a href="/api/monitoring-tg/hr/">👥 HR</a>
|
||||
<a class="admin-login-link" href="/api/monitoring-tg/admin.html">Админ</a>
|
||||
<a class="admin-link active" href="/api/monitoring-tg/auth.html">Авторизация</a>
|
||||
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
|
||||
</nav>
|
||||
</header>
|
||||
<main>
|
||||
<h2>Авторизация Telegram</h2>
|
||||
|
||||
<div class="card" style="max-width:520px">
|
||||
<div id="status-block">
|
||||
<div class="empty">Проверка статуса...</div>
|
||||
</div>
|
||||
|
||||
<div id="step-idle" hidden>
|
||||
<p>
|
||||
Не авторизовано. Номер из конфигурации: <span class="mono" id="phone"></span>.
|
||||
Нажми кнопку ниже — Telegram пришлёт одноразовый код на этот номер.
|
||||
</p>
|
||||
<button id="btn-send">Отправить код</button>
|
||||
</div>
|
||||
|
||||
<div id="step-code" hidden>
|
||||
<p>Код отправлен на <span class="mono" id="phone-2"></span>. Введи его:</p>
|
||||
<form id="form-code" class="row">
|
||||
<input type="text" id="code" inputmode="numeric" autocomplete="one-time-code"
|
||||
placeholder="12345" required style="flex:1; min-width:160px" />
|
||||
<button type="submit">Подтвердить</button>
|
||||
</form>
|
||||
<button id="btn-resend" class="secondary" style="margin-top:8px">
|
||||
Запросить код повторно
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div id="step-password" hidden>
|
||||
<p>На аккаунте включён 2FA. Введи облачный пароль Telegram:</p>
|
||||
<form id="form-password" class="row">
|
||||
<input type="password" id="password" autocomplete="current-password"
|
||||
required style="flex:1; min-width:200px" />
|
||||
<button type="submit">Войти</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div id="step-done" hidden>
|
||||
<p>
|
||||
Авторизовано как <span class="mono" id="username"></span>.
|
||||
Парсер начнёт опрашивать каналы согласно расписанию.
|
||||
</p>
|
||||
<div class="row">
|
||||
<a id="return-link" href="/api/monitoring-tg/"><button>Перейти к разделам</button></a>
|
||||
<button id="btn-logout" class="danger">Выйти</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card" style="max-width:520px; margin-top:16px">
|
||||
<h3 style="margin-top:0">Прод-вариант (без UI)</h3>
|
||||
<p class="muted">
|
||||
Для деплоя в k8s удобнее заранее получить опаковую строку сессии и положить её
|
||||
в Secret — тогда поды поднимаются без интерактива:
|
||||
</p>
|
||||
<pre>docker compose run --rm -it app python -m parser_bot.auth</pre>
|
||||
<p class="muted">
|
||||
Скрипт напечатает <span class="mono">TG_SESSION_STRING=...</span> — вставить
|
||||
в <span class="mono">.env</span> или Secret и забыть про авторизацию.
|
||||
</p>
|
||||
</div>
|
||||
</main>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/auth.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
241
src/parser_bot/web/static/css/app.css
Normal file
241
src/parser_bot/web/static/css/app.css
Normal file
@@ -0,0 +1,241 @@
|
||||
:root {
|
||||
--bg: #0f1115;
|
||||
--panel: #161a22;
|
||||
--panel-2: #1d222c;
|
||||
--border: #262c38;
|
||||
--text: #e6e8ec;
|
||||
--muted: #8a93a3;
|
||||
--accent: #4f8cff;
|
||||
--accent-hover: #6aa0ff;
|
||||
--danger: #ff6464;
|
||||
--ok: #2ecc71;
|
||||
--warn: #f1c40f;
|
||||
}
|
||||
|
||||
* { box-sizing: border-box; }
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
font: 14px/1.45 -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
a { color: var(--accent); text-decoration: none; }
|
||||
a:hover { color: var(--accent-hover); }
|
||||
|
||||
header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 24px;
|
||||
padding: 14px 24px;
|
||||
background: var(--panel);
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
header h1 {
|
||||
font-size: 16px;
|
||||
margin: 0;
|
||||
font-weight: 600;
|
||||
}
|
||||
nav { display: flex; gap: 6px; }
|
||||
nav a {
|
||||
padding: 6px 12px;
|
||||
border-radius: 6px;
|
||||
color: var(--muted);
|
||||
}
|
||||
nav a.active, nav a:hover {
|
||||
color: var(--text);
|
||||
background: var(--panel-2);
|
||||
}
|
||||
|
||||
main { padding: 24px; max-width: 1200px; margin: 0 auto; }
|
||||
h2 { font-size: 18px; margin: 0 0 16px; }
|
||||
h3 { font-size: 14px; margin: 24px 0 12px; color: var(--muted); font-weight: 500; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
|
||||
.row { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }
|
||||
.spacer { flex: 1; }
|
||||
|
||||
.card {
|
||||
background: var(--panel);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 16px;
|
||||
}
|
||||
|
||||
.stats-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
||||
gap: 12px;
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
.stat .label { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.stat .value { font-size: 24px; font-weight: 600; margin-top: 4px; }
|
||||
|
||||
input, select, textarea, button {
|
||||
font: inherit;
|
||||
color: var(--text);
|
||||
background: var(--panel-2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
padding: 8px 10px;
|
||||
outline: none;
|
||||
}
|
||||
input:focus, select:focus { border-color: var(--accent); }
|
||||
|
||||
button {
|
||||
cursor: pointer;
|
||||
background: var(--accent);
|
||||
border-color: var(--accent);
|
||||
color: white;
|
||||
}
|
||||
button:hover { background: var(--accent-hover); border-color: var(--accent-hover); }
|
||||
button.secondary { background: var(--panel-2); color: var(--text); }
|
||||
button.secondary:hover { background: var(--border); }
|
||||
button.danger { background: transparent; color: var(--danger); border-color: var(--border); }
|
||||
button.danger:hover { background: rgba(255, 100, 100, 0.1); }
|
||||
button:disabled { opacity: 0.5; cursor: not-allowed; }
|
||||
|
||||
table { width: 100%; border-collapse: collapse; }
|
||||
th, td { padding: 10px 12px; text-align: left; border-bottom: 1px solid var(--border); }
|
||||
th { color: var(--muted); font-weight: 500; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
tr:hover td { background: var(--panel-2); }
|
||||
|
||||
.badge {
|
||||
display: inline-block;
|
||||
padding: 2px 8px;
|
||||
border-radius: 999px;
|
||||
font-size: 11px;
|
||||
background: var(--panel-2);
|
||||
color: var(--muted);
|
||||
border: 1px solid var(--border);
|
||||
}
|
||||
.badge.ok { color: var(--ok); border-color: rgba(46, 204, 113, 0.4); }
|
||||
.badge.off { color: var(--muted); }
|
||||
.badge.warn { color: var(--warn); border-color: rgba(241, 196, 15, 0.4); }
|
||||
|
||||
.muted { color: var(--muted); }
|
||||
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
|
||||
|
||||
.message {
|
||||
padding: 12px 16px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
.message:last-child { border-bottom: none; }
|
||||
.message-meta { display: flex; gap: 12px; color: var(--muted); font-size: 12px; margin-bottom: 6px; }
|
||||
.message-text { white-space: pre-wrap; word-break: break-word; }
|
||||
|
||||
.message-tags {
|
||||
display: flex; flex-wrap: wrap; gap: 6px;
|
||||
margin-top: 8px;
|
||||
}
|
||||
.message-tags .badge.re { color: #2ecc71; border-color: rgba(46, 204, 113, 0.4); }
|
||||
.message-tags .badge.phone { color: #4f8cff; border-color: rgba(79, 140, 255, 0.4); }
|
||||
.message-tags .badge.name { color: #f1c40f; border-color: rgba(241, 196, 15, 0.4); }
|
||||
.message-tags .badge.tg { color: #4f8cff; border-color: rgba(79, 140, 255, 0.4); }
|
||||
.message-tags .badge.tg-link { color: #fff; background: rgba(79, 140, 255, 0.2); border-color: rgba(79, 140, 255, 0.6); }
|
||||
.message-tags .badge.tg-link:hover { background: rgba(79, 140, 255, 0.35); }
|
||||
|
||||
.lead-card {
|
||||
margin-top: 10px;
|
||||
padding: 10px 14px;
|
||||
border-radius: 8px;
|
||||
border: 1px solid var(--border);
|
||||
background: rgba(46, 204, 113, 0.05);
|
||||
}
|
||||
.lead-card.lead-strong { border-color: rgba(46, 204, 113, 0.6); background: rgba(46, 204, 113, 0.1); }
|
||||
.lead-card.lead-medium { border-color: rgba(241, 196, 15, 0.5); background: rgba(241, 196, 15, 0.06); }
|
||||
.lead-card.lead-weak { border-color: rgba(138, 147, 163, 0.4); background: rgba(138, 147, 163, 0.05); }
|
||||
.lead-head { display: flex; flex-wrap: wrap; align-items: center; gap: 10px; }
|
||||
.lead-facts { color: var(--text); font-weight: 500; }
|
||||
.lead-summary { margin-top: 4px; color: var(--muted); font-size: 13px; }
|
||||
.lead-confidence {
|
||||
margin-left: auto; padding: 2px 8px; border-radius: 999px;
|
||||
background: var(--panel-2); border: 1px solid var(--border);
|
||||
font-size: 11px; color: var(--muted); font-variant-numeric: tabular-nums;
|
||||
}
|
||||
.badge.lead { color: #2ecc71; border-color: rgba(46, 204, 113, 0.5); font-weight: 600; }
|
||||
|
||||
.message-media {
|
||||
display: flex; flex-wrap: wrap; gap: 8px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
.media-thumb {
|
||||
max-width: 240px; max-height: 240px;
|
||||
border-radius: 6px; cursor: zoom-in;
|
||||
background: var(--panel-2);
|
||||
}
|
||||
.media-video { max-width: 360px; max-height: 240px; border-radius: 6px; background: black; }
|
||||
.media-doc {
|
||||
display: inline-flex; align-items: center; gap: 8px;
|
||||
padding: 8px 12px; background: var(--panel-2);
|
||||
border: 1px solid var(--border); border-radius: 6px;
|
||||
color: var(--text);
|
||||
}
|
||||
.media-doc:hover { border-color: var(--accent); }
|
||||
.media-skipped {
|
||||
display: inline-flex; align-items: center; gap: 8px;
|
||||
padding: 6px 10px; background: var(--panel-2);
|
||||
border-radius: 6px; font-size: 12px;
|
||||
}
|
||||
|
||||
#lightbox {
|
||||
position: fixed; inset: 0; z-index: 2000;
|
||||
background: rgba(0,0,0,0.85);
|
||||
display: flex; align-items: center; justify-content: center;
|
||||
cursor: zoom-out;
|
||||
}
|
||||
#lightbox img { max-width: 95vw; max-height: 95vh; border-radius: 4px; }
|
||||
|
||||
.toolbar { display: flex; gap: 8px; align-items: center; margin-bottom: 16px; flex-wrap: wrap; }
|
||||
.toolbar input[type="search"], .toolbar select { min-width: 200px; }
|
||||
|
||||
.toast {
|
||||
position: fixed;
|
||||
bottom: 20px;
|
||||
right: 20px;
|
||||
background: var(--panel);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 10px 16px;
|
||||
box-shadow: 0 6px 24px rgba(0,0,0,0.4);
|
||||
animation: slideIn 0.18s ease-out;
|
||||
z-index: 1000;
|
||||
max-width: 360px;
|
||||
}
|
||||
.toast.error { border-color: var(--danger); }
|
||||
.toast.success { border-color: var(--ok); }
|
||||
@keyframes slideIn { from { transform: translateY(8px); opacity: 0; } to { transform: none; opacity: 1; } }
|
||||
|
||||
.empty { padding: 32px; text-align: center; color: var(--muted); }
|
||||
|
||||
.sections-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||
gap: 16px;
|
||||
margin-top: 16px;
|
||||
}
|
||||
.section-tile { padding: 16px; }
|
||||
.section-tile-link { display: block; color: var(--text); }
|
||||
.section-tile-link:hover { color: var(--text); }
|
||||
.section-tile-head { display: flex; align-items: center; gap: 10px; margin-bottom: 8px; }
|
||||
.section-emoji { font-size: 28px; }
|
||||
.section-title { font-size: 16px; font-weight: 600; }
|
||||
.section-stats { display: flex; flex-wrap: wrap; gap: 12px; color: var(--muted); font-size: 13px; }
|
||||
.section-stats b { color: var(--text); }
|
||||
.section-desc { margin-top: 8px; font-size: 13px; }
|
||||
.section-code { margin-top: 8px; color: var(--warn); font-size: 12px; }
|
||||
.section-slug { margin-top: 8px; font-size: 11px; }
|
||||
.pagination { display: flex; gap: 8px; justify-content: center; margin-top: 16px; }
|
||||
|
||||
dialog {
|
||||
background: var(--panel);
|
||||
color: var(--text);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
min-width: 400px;
|
||||
max-width: 80vw;
|
||||
max-height: 80vh;
|
||||
}
|
||||
dialog::backdrop { background: rgba(0,0,0,0.6); }
|
||||
pre { background: var(--bg); padding: 12px; border-radius: 6px; overflow: auto; font-size: 12px; max-height: 60vh; }
|
||||
99
src/parser_bot/web/static/hr/index.html
Normal file
99
src/parser_bot/web/static/hr/index.html
Normal file
@@ -0,0 +1,99 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>👥 HR — подразделы</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot · 👥 HR / Кадры</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<div class="row">
|
||||
<h2>Подразделы HR</h2>
|
||||
<div class="spacer"></div>
|
||||
<button id="open-create">+ Новый подраздел</button>
|
||||
</div>
|
||||
<p class="muted">
|
||||
Каждый подраздел — это собственный набор каналов, своя статистика и свой
|
||||
LLM-промпт (с фоллбэком на промпт вертикали). Например: IT, продажи,
|
||||
маркетинг, рабочие специальности.
|
||||
</p>
|
||||
|
||||
<div id="sections-grid"></div>
|
||||
</main>
|
||||
|
||||
<dialog id="create-dialog">
|
||||
<h3 style="margin-top:0">Новый подраздел</h3>
|
||||
<form id="create-form">
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Название</span>
|
||||
<input type="text" id="new-title" required placeholder="IT" style="flex:1" />
|
||||
</label>
|
||||
<div class="row" style="gap:8px; margin-bottom:8px; font-size:12px">
|
||||
<span style="min-width:120px" class="muted">URL-адрес</span>
|
||||
<span class="muted mono">/hr/<span id="new-slug-preview">(введите название)</span>/</span>
|
||||
<div class="spacer"></div>
|
||||
<a href="#" id="new-slug-manual" class="muted">изменить вручную</a>
|
||||
</div>
|
||||
<label class="row slug-row" style="gap:8px; margin-bottom:8px" hidden>
|
||||
<span style="min-width:120px" class="muted">Slug</span>
|
||||
<input type="text" id="new-slug" pattern="[a-z0-9][a-z0-9_-]*[a-z0-9]?"
|
||||
placeholder="it" style="flex:1" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Иконка</span>
|
||||
<input type="text" id="new-emoji" maxlength="4" placeholder="💻" style="width:80px" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Код доступа</span>
|
||||
<input type="text" id="new-access-code" required minlength="3"
|
||||
autocomplete="new-password" style="flex:1" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
|
||||
<span style="min-width:120px" class="muted">Описание</span>
|
||||
<textarea id="new-description" rows="3" style="flex:1"></textarea>
|
||||
</label>
|
||||
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
|
||||
<button type="button" id="create-cancel" class="secondary">Отмена</button>
|
||||
<button type="submit">Создать</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="edit-dialog">
|
||||
<h3 style="margin-top:0">Редактировать подраздел</h3>
|
||||
<form id="edit-form">
|
||||
<input type="hidden" id="edit-slug" />
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Название</span>
|
||||
<input type="text" id="edit-title" required style="flex:1" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Иконка</span>
|
||||
<input type="text" id="edit-emoji" maxlength="4" style="width:80px" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Код доступа</span>
|
||||
<input type="text" id="edit-access-code" required minlength="3"
|
||||
autocomplete="new-password" style="flex:1" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
|
||||
<span style="min-width:120px" class="muted">Описание</span>
|
||||
<textarea id="edit-description" rows="3" style="flex:1"></textarea>
|
||||
</label>
|
||||
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
|
||||
<button type="button" id="edit-cancel" class="secondary">Отмена</button>
|
||||
<button type="submit">Сохранить</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/sections-list.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
48
src/parser_bot/web/static/hr/section/channels.html
Normal file
48
src/parser_bot/web/static/hr/section/channels.html
Normal file
@@ -0,0 +1,48 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>👥 HR · Каналы — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<h2 id="page-heading">Каналы подраздела</h2>
|
||||
|
||||
<div class="card" style="margin-bottom:24px">
|
||||
<form id="add-form" class="row">
|
||||
<input type="text" id="identifier" placeholder="@channel или https://t.me/..." required style="flex:1; min-width:280px" />
|
||||
<button type="submit">Добавить канал</button>
|
||||
</form>
|
||||
<div class="muted" style="margin-top:8px; font-size:12px">
|
||||
Канал будет привязан к текущему подразделу.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Канал</th>
|
||||
<th>Telegram ID</th>
|
||||
<th>Сообщ.</th>
|
||||
<th>Последний опрос</th>
|
||||
<th>Статус</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</main>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/channels.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
43
src/parser_bot/web/static/hr/section/index.html
Normal file
43
src/parser_bot/web/static/hr/section/index.html
Normal file
@@ -0,0 +1,43 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>👥 HR · Дашборд — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<div class="row">
|
||||
<h2 id="page-heading">Дашборд</h2>
|
||||
<div class="spacer"></div>
|
||||
<button id="poll-all">Опросить все каналы подраздела</button>
|
||||
</div>
|
||||
|
||||
<div class="stats-grid" id="stats"></div>
|
||||
|
||||
<h3>Каналы подраздела</h3>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Канал</th>
|
||||
<th>Сообщений</th>
|
||||
<th>Последнее сообщение</th>
|
||||
<th>Последний опрос</th>
|
||||
<th>Статус</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="channels-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</main>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/dashboard.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
78
src/parser_bot/web/static/hr/section/messages.html
Normal file
78
src/parser_bot/web/static/hr/section/messages.html
Normal file
@@ -0,0 +1,78 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>👥 HR · Сообщения — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<h2 id="page-heading">Сообщения подраздела</h2>
|
||||
|
||||
<div class="toolbar card">
|
||||
<select id="channel-filter">
|
||||
<option value="">Все каналы подраздела</option>
|
||||
</select>
|
||||
<input type="search" id="search" placeholder="Поиск по тексту..." />
|
||||
<select id="hr-kind">
|
||||
<option value="">Любой тип лида</option>
|
||||
<option value="any">👥 HR (любой)</option>
|
||||
<option value="vacancy">📢 Вакансия (наниматель)</option>
|
||||
<option value="resume">📄 Резюме (соискатель)</option>
|
||||
<option value="contact">📇 Лид-контакт</option>
|
||||
</select>
|
||||
<label class="row" style="gap:6px">
|
||||
<input type="checkbox" id="leads-only" />
|
||||
<span class="muted">🎯 Только лиды (ИИ)</span>
|
||||
</label>
|
||||
<select id="min-confidence" title="Минимальная уверенность ИИ">
|
||||
<option value="0.3">0.3+</option>
|
||||
<option value="0.5" selected>0.5+</option>
|
||||
<option value="0.7">0.7+</option>
|
||||
<option value="0.9">0.9+</option>
|
||||
</select>
|
||||
<label class="row" style="gap:6px">
|
||||
<input type="checkbox" id="has-phone" />
|
||||
<span class="muted">📞 С телефоном</span>
|
||||
</label>
|
||||
<select id="limit">
|
||||
<option value="25">25</option>
|
||||
<option value="50" selected>50</option>
|
||||
<option value="100">100</option>
|
||||
<option value="200">200</option>
|
||||
</select>
|
||||
<div class="spacer"></div>
|
||||
<label class="row" style="gap:6px">
|
||||
<input type="checkbox" id="autorefresh" />
|
||||
<span class="muted">Автообновление</span>
|
||||
</label>
|
||||
<button id="refresh" class="secondary">Обновить</button>
|
||||
</div>
|
||||
|
||||
<div class="card" id="list"></div>
|
||||
|
||||
<div class="pagination">
|
||||
<button id="prev" class="secondary">← Назад</button>
|
||||
<span class="muted" id="page-info" style="align-self:center"></span>
|
||||
<button id="next" class="secondary">Вперёд →</button>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<dialog id="raw-dialog">
|
||||
<h3 style="margin-top:0">Сообщение</h3>
|
||||
<pre id="raw-content"></pre>
|
||||
<div class="row" style="justify-content:flex-end; margin-top:12px">
|
||||
<button class="secondary" id="raw-close">Закрыть</button>
|
||||
</div>
|
||||
</dialog>
|
||||
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/messages.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
66
src/parser_bot/web/static/hr/section/settings.html
Normal file
66
src/parser_bot/web/static/hr/section/settings.html
Normal file
@@ -0,0 +1,66 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>👥 HR · Настройки — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<h2 id="page-heading">Настройки подраздела</h2>
|
||||
|
||||
<div class="card" style="margin-bottom:24px">
|
||||
<h3 style="margin-top:0">Текущая конфигурация</h3>
|
||||
<table>
|
||||
<tbody id="config-tbody">
|
||||
<tr><td colspan="2" class="empty">Загрузка...</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<div class="muted" style="font-size:12px; margin-top:12px">
|
||||
Параметры задаются через переменные окружения (<span class="mono">.env</span>).
|
||||
Для изменения отредактируйте <span class="mono">.env</span> и перезапустите контейнер:
|
||||
<span class="mono">docker compose restart app</span>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card" style="margin-bottom:24px">
|
||||
<h3 style="margin-top:0">Действия</h3>
|
||||
<div class="row">
|
||||
<button id="poll-all">Опросить все каналы подраздела сейчас</button>
|
||||
<a href="/api/monitoring-tg/docs" target="_blank" class="badge">OpenAPI / Swagger</a>
|
||||
<a href="/api/monitoring-tg/healthz" target="_blank" class="badge">Health check</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card" style="margin-bottom:24px">
|
||||
<h3 style="margin-top:0">🤖 Промпт ИИ</h3>
|
||||
<div class="row" style="margin-bottom:8px">
|
||||
<span class="badge" id="prompt-status">—</span>
|
||||
<span class="muted" id="prompt-length"></span>
|
||||
<div class="spacer"></div>
|
||||
<select id="prompt-level" title="Уровень редактирования промпта">
|
||||
<option value="section" selected>Промпт подраздела</option>
|
||||
<option value="vertical">Промпт вертикали</option>
|
||||
</select>
|
||||
<button id="prompt-reset" class="secondary">Сбросить уровень</button>
|
||||
<button id="prompt-save">Сохранить</button>
|
||||
</div>
|
||||
<textarea id="prompt-editor" rows="22"
|
||||
style="width:100%; font-family:ui-monospace, SFMono-Regular, Menlo, monospace; font-size:12px"></textarea>
|
||||
<div class="muted" style="font-size:12px; margin-top:8px">
|
||||
Каскад: <strong>section → vertical → default</strong>. Если промпта на
|
||||
уровне подраздела нет, используется промпт вертикали; если и его нет —
|
||||
встроенный по умолчанию. Сохранение применится в течение ~5 сек.
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/settings.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
76
src/parser_bot/web/static/index.html
Normal file
76
src/parser_bot/web/static/index.html
Normal file
@@ -0,0 +1,76 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>parser-tg-bot — выбор раздела</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
<style>
|
||||
.chooser {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
||||
gap: 16px;
|
||||
max-width: 880px;
|
||||
margin: 32px auto 0;
|
||||
}
|
||||
.chooser .tile {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 8px;
|
||||
padding: 28px 24px;
|
||||
border-radius: 12px;
|
||||
border: 1px solid var(--border);
|
||||
background: var(--panel);
|
||||
color: var(--text);
|
||||
text-decoration: none;
|
||||
transition: transform 0.08s, border-color 0.1s;
|
||||
}
|
||||
.chooser .tile:hover {
|
||||
border-color: var(--accent);
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
.chooser .tile .emoji { font-size: 40px; }
|
||||
.chooser .tile .title { font-size: 18px; font-weight: 600; }
|
||||
.chooser .tile .hint { color: var(--muted); font-size: 13px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>parser-tg-bot</h1>
|
||||
<nav>
|
||||
<a href="/api/monitoring-tg/" class="active">Разделы</a>
|
||||
<a class="admin-login-link" href="/api/monitoring-tg/admin.html">Админ</a>
|
||||
<a class="admin-link" href="/api/monitoring-tg/auth.html">Авторизация</a>
|
||||
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
|
||||
</nav>
|
||||
</header>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/access.js"></script>
|
||||
<main>
|
||||
<h2>Выберите вертикаль</h2>
|
||||
<p class="muted">
|
||||
У каждой вертикали — свои подразделы (например, «Дубай», «Москва»
|
||||
внутри Недвижимости, или «IT», «Продажи» внутри HR). Канал привязан
|
||||
к одному подразделу одной вертикали.
|
||||
</p>
|
||||
|
||||
<div class="chooser">
|
||||
<a class="tile" href="/api/monitoring-tg/real-estate/">
|
||||
<div class="emoji">🏠</div>
|
||||
<div class="title">Недвижимость</div>
|
||||
<div class="hint">
|
||||
Объявления о покупке, продаже и аренде квартир, домов, апартаментов,
|
||||
земли, коммерции. RU / EN / арабский — любой язык.
|
||||
</div>
|
||||
</a>
|
||||
<a class="tile" href="/api/monitoring-tg/hr/">
|
||||
<div class="emoji">👥</div>
|
||||
<div class="title">HR / Кадры</div>
|
||||
<div class="hint">
|
||||
Вакансии (наниматели), резюме (соискатели) и короткие лиды-контакты
|
||||
с указанием профессии и контактов.
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
41
src/parser_bot/web/static/js/access.js
Normal file
41
src/parser_bot/web/static/js/access.js
Normal file
@@ -0,0 +1,41 @@
|
||||
// Ask the backend whether this client is on the admin allowlist and hide
|
||||
// admin-only nav links if not. The backend independently enforces the
|
||||
// allowlist on every admin endpoint, so this is purely cosmetic — it just
|
||||
// removes dead controls from the UI for non-admin visitors.
|
||||
|
||||
let _adminPromise = null;
|
||||
export function isAdmin() {
|
||||
if (!_adminPromise) {
|
||||
_adminPromise = fetch("/api/monitoring-tg/api/v1/access/me")
|
||||
.then(r => r.ok ? r.json() : { is_admin: false })
|
||||
.then(d => !!d.is_admin)
|
||||
.catch(() => false);
|
||||
}
|
||||
return _adminPromise;
|
||||
}
|
||||
|
||||
export function adminStatus() {
|
||||
return fetch("/api/monitoring-tg/api/v1/access/me")
|
||||
.then(r => r.ok ? r.json() : { is_admin: false, admin_ip_allowed: false })
|
||||
.catch(() => ({ is_admin: false, admin_ip_allowed: false }));
|
||||
}
|
||||
|
||||
adminStatus().then(status => {
|
||||
const admin = !!status.is_admin;
|
||||
const canOpenAdmin = !!status.admin_ip_allowed;
|
||||
if (admin) return;
|
||||
// Remove any `.admin-link` from the DOM. Works for both server-rendered
|
||||
// navs (auth.html, chooser pages) and JS-built navs (nav.js fires before
|
||||
// its own write, but DOMContentLoaded ordering means the elements appear
|
||||
// after — handle via a MutationObserver for late insertions).
|
||||
const hide = () => {
|
||||
document.querySelectorAll(".admin-link").forEach(el => el.remove());
|
||||
document.querySelectorAll(".admin-only").forEach(el => el.remove());
|
||||
if (!canOpenAdmin) {
|
||||
document.querySelectorAll(".admin-login-link").forEach(el => el.remove());
|
||||
}
|
||||
};
|
||||
hide();
|
||||
const mo = new MutationObserver(hide);
|
||||
mo.observe(document.body, { childList: true, subtree: true });
|
||||
});
|
||||
49
src/parser_bot/web/static/js/admin.js
Normal file
49
src/parser_bot/web/static/js/admin.js
Normal file
@@ -0,0 +1,49 @@
|
||||
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
|
||||
import "/api/monitoring-tg/static/js/access.js";
|
||||
|
||||
const form = document.getElementById("admin-form");
|
||||
const password = document.getElementById("admin-password");
|
||||
const statusEl = document.getElementById("admin-status");
|
||||
const logoutBtn = document.getElementById("admin-logout");
|
||||
|
||||
function returnUrl() {
|
||||
const params = new URLSearchParams(location.search);
|
||||
return params.get("return") || "/";
|
||||
}
|
||||
|
||||
async function refresh() {
|
||||
const status = await api.accessMe();
|
||||
if (status.is_admin) {
|
||||
statusEl.textContent = "Админ-доступ активен.";
|
||||
form.hidden = true;
|
||||
logoutBtn.hidden = false;
|
||||
} else if (!status.admin_password_enabled) {
|
||||
statusEl.textContent = "Админ пароль не задан. Доступ управляется IP-allowlist.";
|
||||
form.hidden = true;
|
||||
logoutBtn.hidden = true;
|
||||
} else {
|
||||
statusEl.textContent = "Введите админ пароль, чтобы открыть админские функции.";
|
||||
form.hidden = false;
|
||||
logoutBtn.hidden = true;
|
||||
setTimeout(() => password.focus(), 30);
|
||||
}
|
||||
}
|
||||
|
||||
form.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
try {
|
||||
await api.adminLogin(password.value);
|
||||
password.value = "";
|
||||
toast("Админ-доступ открыт", "success");
|
||||
location.href = returnUrl();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
}
|
||||
});
|
||||
|
||||
logoutBtn.addEventListener("click", async () => {
|
||||
await api.adminLogout();
|
||||
location.reload();
|
||||
});
|
||||
|
||||
refresh().catch(err => toast(err.message, "error"));
|
||||
192
src/parser_bot/web/static/js/api.js
Normal file
192
src/parser_bot/web/static/js/api.js
Normal file
@@ -0,0 +1,192 @@
|
||||
import { getVertical, getSection } from "/api/monitoring-tg/static/js/vertical.js";
|
||||
|
||||
const BASE = "/api/monitoring-tg/api/v1";
|
||||
let sectionLoginPromise = null;
|
||||
|
||||
async function unlockCurrentSection() {
|
||||
if (sectionLoginPromise) return sectionLoginPromise;
|
||||
sectionLoginPromise = (async () => {
|
||||
const vertical = getVertical();
|
||||
const section = getSection();
|
||||
if (!section) return false;
|
||||
const code = prompt(`Введите код подраздела "${section}"`);
|
||||
if (!code) return false;
|
||||
await request("/access/section-login", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({ vertical, section, code }),
|
||||
sectionRetry: false,
|
||||
});
|
||||
return true;
|
||||
})();
|
||||
try {
|
||||
return await sectionLoginPromise;
|
||||
} finally {
|
||||
sectionLoginPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
async function request(path, options = {}) {
|
||||
const { sectionRetry = true, ...fetchOptions } = options;
|
||||
const res = await fetch(BASE + path, {
|
||||
headers: { "Content-Type": "application/json" },
|
||||
...fetchOptions,
|
||||
});
|
||||
if (!res.ok) {
|
||||
let detail = res.statusText;
|
||||
try { detail = (await res.json()).detail || detail; } catch {}
|
||||
if (res.status === 401 && detail === "section code required" && sectionRetry) {
|
||||
if (await unlockCurrentSection()) {
|
||||
return request(path, { ...options, sectionRetry: false });
|
||||
}
|
||||
}
|
||||
throw new Error(`${res.status}: ${detail}`);
|
||||
}
|
||||
if (res.status === 204) return null;
|
||||
return res.json();
|
||||
}
|
||||
|
||||
// Build a query string scoped to the current (vertical, section). The
|
||||
// section is intentionally optional — pages at /<vertical>/ (chooser)
|
||||
// pass null so they see all sections, while pages inside a section
|
||||
// always carry their section slug.
|
||||
function qs(extra = {}, { vertical, section } = {}) {
|
||||
const params = new URLSearchParams();
|
||||
params.set("vertical", vertical ?? getVertical());
|
||||
const s = section === undefined ? getSection() : section;
|
||||
if (s) params.set("section", s);
|
||||
for (const [k, v] of Object.entries(extra)) {
|
||||
if (v == null || v === false) continue;
|
||||
params.set(k, String(v));
|
||||
}
|
||||
return params.toString();
|
||||
}
|
||||
|
||||
export const api = {
|
||||
accessMe: () => request("/access/me"),
|
||||
adminLogin: (password) =>
|
||||
request("/access/admin-login", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({ password }),
|
||||
sectionRetry: false,
|
||||
}),
|
||||
adminLogout: () =>
|
||||
request("/access/admin-logout", { method: "POST", sectionRetry: false }),
|
||||
sectionLogin: ({ vertical, section, code }) =>
|
||||
request("/access/section-login", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({ vertical, section, code }),
|
||||
sectionRetry: false,
|
||||
}),
|
||||
|
||||
// Auth — section-agnostic.
|
||||
authStatus: () => request("/auth/status"),
|
||||
authSendCode: () => request("/auth/send-code", { method: "POST" }),
|
||||
authSubmitCode: (code) =>
|
||||
request("/auth/submit-code", { method: "POST", body: JSON.stringify({ code }) }),
|
||||
authSubmitPassword: (password) =>
|
||||
request("/auth/submit-password", { method: "POST", body: JSON.stringify({ password }) }),
|
||||
authLogout: () => request("/auth/logout", { method: "POST" }),
|
||||
|
||||
// Sections (sub-sections within a vertical).
|
||||
listSections: (vertical) => request(`/sections?${qs({}, { vertical, section: null })}`),
|
||||
createSection: ({ vertical, slug, title, emoji, description, accessCode }) =>
|
||||
request("/sections", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
vertical: vertical ?? getVertical(),
|
||||
slug, title, emoji, description, access_code: accessCode,
|
||||
}),
|
||||
}),
|
||||
updateSection: (vertical, slug, patch) =>
|
||||
request(`/sections/${encodeURIComponent(vertical)}/${encodeURIComponent(slug)}`, {
|
||||
method: "PATCH",
|
||||
body: JSON.stringify(patch),
|
||||
}),
|
||||
deleteSection: (vertical, slug) =>
|
||||
request(`/sections/${encodeURIComponent(vertical)}/${encodeURIComponent(slug)}`, {
|
||||
method: "DELETE",
|
||||
}),
|
||||
|
||||
// Scoped reads: implicit (vertical, section) from URL.
|
||||
globalStats: (scope) => request(`/stats?${qs({}, scope)}`),
|
||||
|
||||
listChannels: (scope) => request(`/channels?${qs({}, scope)}`),
|
||||
getChannel: (id, scope) => request(`/channels/${id}?${qs({}, scope)}`),
|
||||
channelStats: (id, scope) => request(`/channels/${id}/stats?${qs({}, scope)}`),
|
||||
addChannel: (identifier, scope = {}) => {
|
||||
const vertical = scope.vertical ?? getVertical();
|
||||
const section = scope.section === undefined ? getSection() : scope.section;
|
||||
if (!section) {
|
||||
throw new Error("addChannel requires a section context");
|
||||
}
|
||||
return request("/channels", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({ identifier, vertical, section }),
|
||||
});
|
||||
},
|
||||
updateChannel: (id, patch, scope) =>
|
||||
request(`/channels/${id}?${qs({}, scope)}`, {
|
||||
method: "PATCH", body: JSON.stringify(patch),
|
||||
}),
|
||||
deleteChannel: (id, scope) =>
|
||||
request(`/channels/${id}?${qs({}, scope)}`, { method: "DELETE" }),
|
||||
pollChannel: (id, scope) =>
|
||||
request(`/channels/${id}/poll?${qs({}, scope)}`, { method: "POST" }),
|
||||
backfillMedia: (id, batch = 50, scope) =>
|
||||
request(`/channels/${id}/backfill-media?${qs({ batch }, scope)}`, { method: "POST" }),
|
||||
reanalyze: (id, batch = 500, scope) =>
|
||||
request(`/channels/${id}/reanalyze?${qs({ batch }, scope)}`, { method: "POST" }),
|
||||
|
||||
pollAll: (scope) => request(`/poll?${qs({}, scope)}`, { method: "POST" }),
|
||||
|
||||
listMessages: ({ channelId, q, realEstate, hrKind, hasPhone, leadsOnly,
|
||||
minConfidence, limit = 50, offset = 0,
|
||||
vertical, section } = {}) => {
|
||||
const extra = { limit, offset };
|
||||
if (channelId) extra.channel_id = channelId;
|
||||
if (q) extra.q = q;
|
||||
if (realEstate) extra.real_estate = realEstate;
|
||||
if (hrKind) extra.hr_kind = hrKind;
|
||||
if (hasPhone) extra.has_phone = "true";
|
||||
if (leadsOnly) {
|
||||
extra.leads_only = "true";
|
||||
if (minConfidence != null) extra.min_confidence = minConfidence;
|
||||
}
|
||||
return request(`/messages?${qs(extra, { vertical, section })}`);
|
||||
},
|
||||
getMessage: (id, scope) => request(`/messages/${id}?${qs({}, scope)}`),
|
||||
|
||||
llmStatus: () => request("/llm/status"),
|
||||
llmQueue: (scope) => request(`/llm/queue?${qs({}, scope)}`),
|
||||
llmPromptGet: (scope) => request(`/llm/prompt?${qs({}, scope)}`),
|
||||
llmPromptSave: (prompt, scope) =>
|
||||
request(`/llm/prompt?${qs({}, scope)}`, {
|
||||
method: "PUT", body: JSON.stringify({ prompt }),
|
||||
}),
|
||||
llmPromptReset: (scope) =>
|
||||
request(`/llm/prompt?${qs({}, scope)}`, { method: "DELETE" }),
|
||||
};
|
||||
|
||||
export function toast(message, type = "info") {
|
||||
const el = document.createElement("div");
|
||||
el.className = `toast ${type}`;
|
||||
el.textContent = message;
|
||||
document.body.appendChild(el);
|
||||
setTimeout(() => el.remove(), 3500);
|
||||
}
|
||||
|
||||
export function fmtDate(iso) {
|
||||
if (!iso) return "—";
|
||||
const d = new Date(iso);
|
||||
return d.toLocaleString();
|
||||
}
|
||||
|
||||
export function fmtRelative(iso) {
|
||||
if (!iso) return "—";
|
||||
const d = new Date(iso);
|
||||
const diff = (Date.now() - d.getTime()) / 1000;
|
||||
if (diff < 60) return `${Math.floor(diff)}s ago`;
|
||||
if (diff < 3600) return `${Math.floor(diff / 60)}m ago`;
|
||||
if (diff < 86400) return `${Math.floor(diff / 3600)}h ago`;
|
||||
return `${Math.floor(diff / 86400)}d ago`;
|
||||
}
|
||||
120
src/parser_bot/web/static/js/auth.js
Normal file
120
src/parser_bot/web/static/js/auth.js
Normal file
@@ -0,0 +1,120 @@
|
||||
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
|
||||
|
||||
const returnTo = (() => {
|
||||
const raw = new URLSearchParams(location.search).get("return");
|
||||
// Only allow same-origin relative paths to avoid open-redirect via ?return=
|
||||
if (raw && raw.startsWith("/") && !raw.startsWith("//")) return raw;
|
||||
return null;
|
||||
})();
|
||||
const returnLink = document.getElementById("return-link");
|
||||
if (returnLink && returnTo) {
|
||||
returnLink.href = returnTo;
|
||||
returnLink.querySelector("button").textContent = "← Вернуться";
|
||||
}
|
||||
|
||||
const steps = ["idle", "code", "password", "done"];
|
||||
function show(step) {
|
||||
steps.forEach(s => {
|
||||
document.getElementById(`step-${s}`).hidden = s !== step;
|
||||
});
|
||||
}
|
||||
|
||||
function setStatus(html) {
|
||||
document.getElementById("status-block").innerHTML = html;
|
||||
}
|
||||
|
||||
async function refresh() {
|
||||
const status = await api.authStatus();
|
||||
document.getElementById("phone").textContent = status.phone || "—";
|
||||
document.getElementById("phone-2").textContent = status.phone || "—";
|
||||
|
||||
if (status.authorized) {
|
||||
setStatus(`<div class="badge ok">Авторизовано</div>`);
|
||||
document.getElementById("username").textContent = status.username || "(unnamed)";
|
||||
show("done");
|
||||
} else {
|
||||
setStatus(`<div class="badge warn">Не авторизовано</div>`);
|
||||
show("idle");
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById("btn-send").addEventListener("click", async (e) => {
|
||||
e.target.disabled = true;
|
||||
try {
|
||||
await api.authSendCode();
|
||||
toast("Код отправлен в Telegram", "success");
|
||||
show("code");
|
||||
document.getElementById("code").focus();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
e.target.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("btn-resend").addEventListener("click", async (e) => {
|
||||
e.target.disabled = true;
|
||||
try {
|
||||
await api.authSendCode();
|
||||
toast("Новый код отправлен", "success");
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
e.target.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("form-code").addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const code = document.getElementById("code").value.trim();
|
||||
const btn = e.target.querySelector("button");
|
||||
btn.disabled = true;
|
||||
try {
|
||||
const res = await api.authSubmitCode(code);
|
||||
if (res.needs_password) {
|
||||
toast("Введи 2FA-пароль", "success");
|
||||
show("password");
|
||||
document.getElementById("password").focus();
|
||||
} else {
|
||||
toast("Готово", "success");
|
||||
await refresh();
|
||||
}
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("form-password").addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const password = document.getElementById("password").value;
|
||||
const btn = e.target.querySelector("button");
|
||||
btn.disabled = true;
|
||||
try {
|
||||
await api.authSubmitPassword(password);
|
||||
toast("Авторизовано", "success");
|
||||
document.getElementById("password").value = "";
|
||||
await refresh();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("btn-logout").addEventListener("click", async (e) => {
|
||||
if (!confirm("Выйти из Telegram-сессии?")) return;
|
||||
e.target.disabled = true;
|
||||
try {
|
||||
await api.authLogout();
|
||||
toast("Сессия завершена", "success");
|
||||
await refresh();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
e.target.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
refresh().catch(err => toast(err.message, "error"));
|
||||
132
src/parser_bot/web/static/js/channels.js
Normal file
132
src/parser_bot/web/static/js/channels.js
Normal file
@@ -0,0 +1,132 @@
|
||||
import { api, toast, fmtRelative } from "/api/monitoring-tg/static/js/api.js";
|
||||
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
|
||||
import { getVertical, getSection, sectionBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||
|
||||
const V = getVertical();
|
||||
const section = getSection();
|
||||
const sBase = sectionBase();
|
||||
const meta = VERTICAL_META[V];
|
||||
|
||||
function escape(s) {
|
||||
if (s == null) return "";
|
||||
return String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
|
||||
}
|
||||
|
||||
async function load() {
|
||||
const admin = await isAdmin();
|
||||
const channels = await api.listChannels();
|
||||
const tbody = document.getElementById("tbody");
|
||||
if (!channels.length) {
|
||||
tbody.innerHTML = `<tr><td colspan="7" class="empty">Каналов пока нет</td></tr>`;
|
||||
return;
|
||||
}
|
||||
const stats = await Promise.all(channels.map(c => api.channelStats(c.id).catch(() => null)));
|
||||
tbody.innerHTML = channels.map((c, i) => {
|
||||
const s = stats[i] || {};
|
||||
return `
|
||||
<tr data-id="${c.id}">
|
||||
<td class="muted mono">${c.id}</td>
|
||||
<td>
|
||||
<div>${escape(c.title || "—")}</div>
|
||||
<div class="muted mono" style="font-size:12px">${escape(c.identifier)}</div>
|
||||
</td>
|
||||
<td class="mono muted">${c.tg_id ?? "—"}</td>
|
||||
<td>${(s.message_count ?? 0).toLocaleString()}</td>
|
||||
<td>${fmtRelative(c.last_polled_at)}</td>
|
||||
<td>
|
||||
<label class="row" style="gap:6px">
|
||||
<input type="checkbox" data-action="toggle" ${c.is_active ? "checked" : ""} ${admin ? "" : "disabled"} />
|
||||
<span class="badge ${c.is_active ? "ok" : "off"}">${c.is_active ? "on" : "off"}</span>
|
||||
</label>
|
||||
</td>
|
||||
<td>
|
||||
<div class="row" style="gap:6px">
|
||||
<a href="${sBase}/messages.html?channel_id=${c.id}" class="badge">сообщения</a>
|
||||
${admin ? `
|
||||
<button class="secondary" data-action="poll">Опросить</button>
|
||||
<button class="secondary" data-action="backfill-media">Подкачать медиа</button>
|
||||
<button class="secondary" data-action="reanalyze">Переанализировать</button>
|
||||
<button class="danger" data-action="delete">Удалить</button>
|
||||
` : ""}
|
||||
</div>
|
||||
</td>
|
||||
</tr>`;
|
||||
}).join("");
|
||||
}
|
||||
|
||||
document.getElementById("add-form").addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const input = document.getElementById("identifier");
|
||||
const id = input.value.trim();
|
||||
if (!id) return;
|
||||
const btn = e.target.querySelector("button");
|
||||
btn.disabled = true;
|
||||
try {
|
||||
await api.addChannel(id);
|
||||
const where = section ? `${meta.short} / ${section}` : meta.short;
|
||||
toast(`Канал добавлен в "${where}"`, "success");
|
||||
input.value = "";
|
||||
await load();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("tbody").addEventListener("click", async (e) => {
|
||||
const btn = e.target.closest("[data-action]");
|
||||
if (!btn) return;
|
||||
const tr = btn.closest("tr");
|
||||
const id = Number(tr.dataset.id);
|
||||
const action = btn.dataset.action;
|
||||
try {
|
||||
if (action === "delete") {
|
||||
if (!confirm("Удалить канал и все его сообщения?")) return;
|
||||
await api.deleteChannel(id);
|
||||
toast("Удалено", "success");
|
||||
await load();
|
||||
} else if (action === "poll") {
|
||||
btn.disabled = true;
|
||||
const res = await api.pollChannel(id);
|
||||
toast(`Добавлено ${res.inserted} сообщений`, "success");
|
||||
await load();
|
||||
} else if (action === "backfill-media") {
|
||||
btn.disabled = true;
|
||||
let totalUpdated = 0;
|
||||
let pending = Infinity;
|
||||
while (pending > 0) {
|
||||
btn.textContent = `Качаю... (готово: ${totalUpdated})`;
|
||||
const res = await api.backfillMedia(id, 50);
|
||||
totalUpdated += res.updated;
|
||||
pending = res.pending;
|
||||
if (res.updated === 0) break;
|
||||
}
|
||||
btn.textContent = "Подкачать медиа";
|
||||
toast(`Подкачано ${totalUpdated}, осталось ${pending}`, "success");
|
||||
} else if (action === "reanalyze") {
|
||||
btn.disabled = true;
|
||||
let total = 0;
|
||||
let pending = Infinity;
|
||||
while (pending > 0) {
|
||||
btn.textContent = `Анализирую... (${total})`;
|
||||
const res = await api.reanalyze(id, 500);
|
||||
total += res.updated;
|
||||
pending = res.pending;
|
||||
if (res.updated === 0) break;
|
||||
}
|
||||
btn.textContent = "Переанализировать";
|
||||
toast(`Проанализировано ${total} сообщений, осталось ${pending}`, "success");
|
||||
} else if (action === "toggle") {
|
||||
const isActive = btn.checked;
|
||||
await api.updateChannel(id, { is_active: isActive });
|
||||
toast(isActive ? "Канал включён" : "Канал выключен", "success");
|
||||
await load();
|
||||
}
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
await load();
|
||||
}
|
||||
});
|
||||
|
||||
load().catch(err => toast(err.message, "error"));
|
||||
87
src/parser_bot/web/static/js/dashboard.js
Normal file
87
src/parser_bot/web/static/js/dashboard.js
Normal file
@@ -0,0 +1,87 @@
|
||||
import { api, toast, fmtRelative } from "/api/monitoring-tg/static/js/api.js";
|
||||
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
|
||||
import { getVertical, getSection, sectionBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||
|
||||
const V = getVertical();
|
||||
const section = getSection();
|
||||
const sBase = sectionBase();
|
||||
const meta = VERTICAL_META[V];
|
||||
|
||||
function escape(s) {
|
||||
if (s == null) return "";
|
||||
return String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
|
||||
}
|
||||
|
||||
async function loadStats() {
|
||||
const [stats, llm, queue] = await Promise.all([
|
||||
api.globalStats(),
|
||||
api.llmStatus().catch(() => ({ enabled: false, ready: false, model: "—" })),
|
||||
api.llmQueue().catch(() => ({ pending: null })),
|
||||
]);
|
||||
const grid = document.getElementById("stats");
|
||||
const llmBadge = llm.enabled
|
||||
? (llm.ready ? `<span class="badge ok">ready</span>` : `<span class="badge warn">загружается</span>`)
|
||||
: `<span class="badge off">off</span>`;
|
||||
const queueValue = queue.pending == null ? "—" : queue.pending.toLocaleString();
|
||||
grid.innerHTML = `
|
||||
<div class="card stat"><div class="label">Каналы</div><div class="value">${stats.channels_active} / ${stats.channels_total}</div></div>
|
||||
<div class="card stat"><div class="label">Сообщений всего</div><div class="value">${stats.messages_total.toLocaleString()}</div></div>
|
||||
<div class="card stat"><div class="label">Сообщений за 24ч</div><div class="value">${stats.messages_last_24h.toLocaleString()}</div></div>
|
||||
<div class="card stat"><div class="label">🎯 Лидов всего</div><div class="value">${(stats.leads_total ?? 0).toLocaleString()}</div></div>
|
||||
<div class="card stat"><div class="label">🎯 Лидов за 24ч</div><div class="value"><a href="${sBase}/messages.html?leads_only=true">${(stats.leads_last_24h ?? 0).toLocaleString()}</a></div></div>
|
||||
<div class="card stat"><div class="label">⏳ В очереди ИИ</div><div class="value">${queueValue}</div></div>
|
||||
<div class="card stat"><div class="label">Период опроса</div><div class="value">${stats.poll_interval_seconds}s</div></div>
|
||||
<div class="card stat"><div class="label">Последний опрос</div><div class="value">${fmtRelative(stats.last_poll_at)}</div></div>
|
||||
<div class="card stat"><div class="label">Локальный ИИ</div><div class="value" style="font-size:14px">${llmBadge}<div class="muted mono" style="font-size:11px;margin-top:4px">${escape(llm.model || "")}</div></div></div>
|
||||
`;
|
||||
}
|
||||
|
||||
async function loadChannels() {
|
||||
const channels = await api.listChannels();
|
||||
const tbody = document.getElementById("channels-tbody");
|
||||
if (!channels.length) {
|
||||
tbody.innerHTML = `<tr><td colspan="5" class="empty">Каналов в этом подразделе пока нет — добавьте их на странице <a href="${sBase}/channels.html">Каналы</a></td></tr>`;
|
||||
return;
|
||||
}
|
||||
const stats = await Promise.all(channels.map(c => api.channelStats(c.id).catch(() => null)));
|
||||
tbody.innerHTML = channels.map((c, i) => {
|
||||
const s = stats[i] || {};
|
||||
return `
|
||||
<tr>
|
||||
<td>
|
||||
<div><a href="${sBase}/messages.html?channel_id=${c.id}">${escape(c.title || c.identifier)}</a></div>
|
||||
<div class="muted mono" style="font-size:12px">${escape(c.identifier)}</div>
|
||||
</td>
|
||||
<td>${(s.message_count ?? 0).toLocaleString()}</td>
|
||||
<td>${fmtRelative(s.last_message_at)}</td>
|
||||
<td>${fmtRelative(c.last_polled_at)}</td>
|
||||
<td>${c.is_active ? '<span class="badge ok">on</span>' : '<span class="badge off">off</span>'}</td>
|
||||
</tr>`;
|
||||
}).join("");
|
||||
}
|
||||
|
||||
document.getElementById("poll-all").addEventListener("click", async (e) => {
|
||||
e.target.disabled = true;
|
||||
try {
|
||||
const res = await api.pollAll();
|
||||
const scope = section ? `${meta.short} / ${section}` : meta.short;
|
||||
toast(`В очереди ${res.queued ?? 0} каналов (${scope}) — опрос идёт в фоне`, "success");
|
||||
await loadAll();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
e.target.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
async function loadAll() {
|
||||
try {
|
||||
document.getElementById("poll-all").hidden = !(await isAdmin());
|
||||
await Promise.all([loadStats(), loadChannels()]);
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
}
|
||||
}
|
||||
|
||||
loadAll();
|
||||
setInterval(loadAll, 15000);
|
||||
433
src/parser_bot/web/static/js/messages.js
Normal file
433
src/parser_bot/web/static/js/messages.js
Normal file
@@ -0,0 +1,433 @@
|
||||
import { api, toast, fmtDate } from "/api/monitoring-tg/static/js/api.js";
|
||||
import { getVertical, getSection, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||
|
||||
const V = getVertical();
|
||||
const section = getSection();
|
||||
const meta = VERTICAL_META[V];
|
||||
|
||||
const state = {
|
||||
offset: 0,
|
||||
limit: 50,
|
||||
channelId: null,
|
||||
q: "",
|
||||
realEstate: "",
|
||||
hrKind: "",
|
||||
hasPhone: false,
|
||||
leadsOnly: false,
|
||||
minConfidence: 0.5,
|
||||
channels: [],
|
||||
autorefresh: false,
|
||||
timer: null,
|
||||
};
|
||||
|
||||
function escape(s) {
|
||||
if (s == null) return "";
|
||||
return String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
|
||||
}
|
||||
|
||||
function highlight(text, q) {
|
||||
if (!q || !text) return escape(text);
|
||||
const escaped = escape(text);
|
||||
const re = new RegExp(escape(q).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "gi");
|
||||
return escaped.replace(re, m => `<mark style="background:#f1c40f33;color:inherit">${m}</mark>`);
|
||||
}
|
||||
|
||||
function channelTitle(id) {
|
||||
const c = state.channels.find(c => c.id === id);
|
||||
return c ? (c.title || c.identifier) : `#${id}`;
|
||||
}
|
||||
|
||||
function fmtSize(bytes) {
|
||||
if (bytes == null) return "";
|
||||
if (bytes < 1024) return `${bytes}B`;
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)}KB`;
|
||||
return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
|
||||
}
|
||||
|
||||
const REAL_ESTATE_LABELS = { sale: "продажа", rent: "аренда", purchase: "покупка" };
|
||||
const HR_KIND_LABELS = { vacancy: "вакансия", resume: "резюме", contact: "контакт" };
|
||||
|
||||
function senderContacts(m) {
|
||||
const contacts = [];
|
||||
if (m && m.post_url) {
|
||||
contacts.push(`<a class="badge tg-link" href="${escape(m.post_url)}" target="_blank">📬 Открыть в Telegram</a>`);
|
||||
}
|
||||
if (m && m.sender_username) {
|
||||
const u = m.sender_username.startsWith("@") ? m.sender_username : "@" + m.sender_username;
|
||||
contacts.push(`<a class="badge tg" href="https://t.me/${escape(m.sender_username.replace(/^@/, ""))}" target="_blank">✉️ ${escape(u)}</a>`);
|
||||
} else if (m && m.sender_name) {
|
||||
contacts.push(`<span class="badge name">✍️ ${escape(m.sender_name)}</span>`);
|
||||
}
|
||||
const handles = (m && m.extracted && m.extracted.tg_handles) || [];
|
||||
for (const h of handles) {
|
||||
const bare = h.replace(/^@/, "");
|
||||
contacts.push(`<a class="badge tg" href="https://t.me/${escape(bare)}" target="_blank">✉️ ${escape(h)}</a>`);
|
||||
}
|
||||
return contacts;
|
||||
}
|
||||
|
||||
function renderReLead(lead, m) {
|
||||
if (!lead || !lead.is_listing) return "";
|
||||
const tone =
|
||||
lead.confidence >= 0.7 ? "lead-strong" :
|
||||
lead.confidence >= 0.4 ? "lead-medium" : "lead-weak";
|
||||
const bits = [];
|
||||
if (lead.kind) bits.push(REAL_ESTATE_LABELS[lead.kind] || lead.kind);
|
||||
if (lead.property_type) bits.push(lead.property_type);
|
||||
if (lead.rooms) bits.push(lead.rooms);
|
||||
if (lead.area_m2) bits.push(`${lead.area_m2} м²`);
|
||||
const priceBit = lead.price_text
|
||||
|| (lead.price_value != null
|
||||
? `${lead.price_value.toLocaleString()}${lead.currency ? " " + lead.currency : ""}`
|
||||
: null);
|
||||
if (priceBit) bits.push(priceBit);
|
||||
else if (lead.currency) bits.push(lead.currency);
|
||||
if (lead.location) bits.push(lead.location);
|
||||
const facts = bits.length
|
||||
? `<div class="lead-facts">${escape(bits.join(" · "))}</div>` : "";
|
||||
const summary = lead.summary
|
||||
? `<div class="lead-summary">${escape(lead.summary)}</div>` : "";
|
||||
const contacts = [];
|
||||
if (lead.contact_phone) {
|
||||
contacts.push(`<a class="badge phone" href="tel:${escape(lead.contact_phone)}">📞 ${escape(lead.contact_phone)}</a>`);
|
||||
}
|
||||
if (lead.contact_name) {
|
||||
contacts.push(`<span class="badge name">👤 ${escape(lead.contact_name)}</span>`);
|
||||
}
|
||||
contacts.push(...senderContacts(m));
|
||||
return `
|
||||
<div class="lead-card ${tone}">
|
||||
<div class="lead-head">
|
||||
<span class="badge lead">🎯 ЛИД · 🏠</span>
|
||||
${facts}
|
||||
<span class="lead-confidence">${(lead.confidence * 100).toFixed(0)}%</span>
|
||||
</div>
|
||||
${summary}
|
||||
${contacts.length ? `<div class="message-tags">${contacts.join(" ")}</div>` : ""}
|
||||
</div>`;
|
||||
}
|
||||
|
||||
function renderHrLead(lead, m) {
|
||||
if (!lead || !lead.is_lead) return "";
|
||||
const tone =
|
||||
lead.confidence >= 0.7 ? "lead-strong" :
|
||||
lead.confidence >= 0.4 ? "lead-medium" : "lead-weak";
|
||||
const bits = [];
|
||||
if (lead.kind) bits.push(HR_KIND_LABELS[lead.kind] || lead.kind);
|
||||
if (lead.title) bits.push(lead.title);
|
||||
if (lead.company) bits.push(lead.company);
|
||||
if (lead.candidate_name) bits.push(lead.candidate_name);
|
||||
if (lead.experience_years != null) bits.push(`${lead.experience_years}+ лет опыта`);
|
||||
if (lead.employment_type) bits.push(lead.employment_type);
|
||||
if (lead.remote === true) bits.push("удалёнка");
|
||||
else if (lead.remote === false) bits.push("офис");
|
||||
if (lead.location) bits.push(lead.location);
|
||||
const salaryBit = lead.salary_text
|
||||
|| (lead.salary_value != null
|
||||
? `${lead.salary_value.toLocaleString()}${lead.currency ? " " + lead.currency : ""}`
|
||||
: null);
|
||||
if (salaryBit) bits.push(salaryBit);
|
||||
else if (lead.currency) bits.push(lead.currency);
|
||||
const facts = bits.length
|
||||
? `<div class="lead-facts">${escape(bits.join(" · "))}</div>` : "";
|
||||
const summary = lead.summary
|
||||
? `<div class="lead-summary">${escape(lead.summary)}</div>` : "";
|
||||
const skills = (lead.skills || []).slice(0, 12);
|
||||
const skillsBlock = skills.length
|
||||
? `<div class="message-tags">${skills.map(s => `<span class="badge">${escape(s)}</span>`).join(" ")}</div>`
|
||||
: "";
|
||||
const contacts = [];
|
||||
if (lead.contact_phone) {
|
||||
contacts.push(`<a class="badge phone" href="tel:${escape(lead.contact_phone)}">📞 ${escape(lead.contact_phone)}</a>`);
|
||||
}
|
||||
if (lead.contact_name) {
|
||||
contacts.push(`<span class="badge name">👤 ${escape(lead.contact_name)}</span>`);
|
||||
}
|
||||
contacts.push(...senderContacts(m));
|
||||
return `
|
||||
<div class="lead-card ${tone}">
|
||||
<div class="lead-head">
|
||||
<span class="badge lead">🎯 ЛИД · 👥</span>
|
||||
${facts}
|
||||
<span class="lead-confidence">${(lead.confidence * 100).toFixed(0)}%</span>
|
||||
</div>
|
||||
${summary}
|
||||
${skillsBlock}
|
||||
${contacts.length ? `<div class="message-tags">${contacts.join(" ")}</div>` : ""}
|
||||
</div>`;
|
||||
}
|
||||
|
||||
function renderExtracted(ex) {
|
||||
if (!ex) return "";
|
||||
const parts = [];
|
||||
const re = ex.real_estate;
|
||||
const showRegexRE =
|
||||
V === "real_estate" && re && !(ex.lead && ex.lead.is_listing);
|
||||
if (showRegexRE) {
|
||||
const bits = [];
|
||||
if (re.kind) bits.push(REAL_ESTATE_LABELS[re.kind] || re.kind);
|
||||
if (re.property_type) bits.push(re.property_type);
|
||||
if (re.rooms) bits.push(re.rooms);
|
||||
if (re.area_m2) bits.push(`${re.area_m2} м²`);
|
||||
if (re.price) bits.push(re.price);
|
||||
if (bits.length) parts.push(`<span class="badge re">🏠 regex: ${escape(bits.join(" · "))}</span>`);
|
||||
}
|
||||
// Phones/names from regex are still useful even when there's a lead — show
|
||||
// only those that aren't already inside the lead card.
|
||||
const inLead = new Set();
|
||||
const activeLead = V === "hr" ? ex.hr_lead : ex.lead;
|
||||
if (activeLead) {
|
||||
if (activeLead.contact_phone) inLead.add(activeLead.contact_phone);
|
||||
if (activeLead.contact_name) inLead.add(activeLead.contact_name);
|
||||
}
|
||||
for (const p of ex.phones || []) {
|
||||
if (inLead.has(p)) continue;
|
||||
parts.push(`<a class="badge phone" href="tel:${escape(p)}">📞 ${escape(p)}</a>`);
|
||||
}
|
||||
for (const n of (ex.names || []).slice(0, 3)) {
|
||||
if (inLead.has(n)) continue;
|
||||
parts.push(`<span class="badge name">👤 ${escape(n)}</span>`);
|
||||
}
|
||||
if ((ex.names || []).length > 3) {
|
||||
parts.push(`<span class="badge name muted">+${ex.names.length - 3}</span>`);
|
||||
}
|
||||
const leadShown = (V === "hr" && ex.hr_lead && ex.hr_lead.is_lead) ||
|
||||
(V === "real_estate" && ex.lead && ex.lead.is_listing);
|
||||
if (!leadShown) {
|
||||
for (const h of (ex.tg_handles || [])) {
|
||||
const bare = h.replace(/^@/, "");
|
||||
parts.push(`<a class="badge tg" href="https://t.me/${escape(bare)}" target="_blank">✉️ ${escape(h)}</a>`);
|
||||
}
|
||||
}
|
||||
const tags = parts.length ? `<div class="message-tags">${parts.join(" ")}</div>` : "";
|
||||
return tags;
|
||||
}
|
||||
|
||||
function renderMedia(files) {
|
||||
if (!files || !files.length) return "";
|
||||
return `<div class="message-media">${files.map(f => {
|
||||
if (f.skipped) {
|
||||
const why = f.skipped === "too_large" ? "слишком большой" : f.skipped;
|
||||
return `<div class="media-item media-skipped"><span class="badge warn">${escape(f.kind)}</span>
|
||||
<span class="muted">${why}${f.size ? `, ${fmtSize(f.size)}` : ""}</span></div>`;
|
||||
}
|
||||
if (!f.url) return "";
|
||||
if (f.kind === "photo" || f.kind === "sticker") {
|
||||
return `<a href="${escape(f.url)}" target="_blank" data-action="lightbox" data-url="${escape(f.url)}">
|
||||
<img class="media-thumb" src="${escape(f.url)}" loading="lazy" alt="" />
|
||||
</a>`;
|
||||
}
|
||||
if (f.kind === "video") {
|
||||
return `<video class="media-video" src="${escape(f.url)}" controls preload="metadata"></video>`;
|
||||
}
|
||||
if (f.kind === "audio") {
|
||||
return `<audio src="${escape(f.url)}" controls preload="none" style="width:100%"></audio>`;
|
||||
}
|
||||
return `<a class="media-doc" href="${escape(f.url)}" target="_blank" download>
|
||||
<span class="badge">${escape(f.kind)}</span>
|
||||
<span>${escape(f.mime || "файл")}</span>
|
||||
<span class="muted">${fmtSize(f.size)}</span>
|
||||
</a>`;
|
||||
}).join("")}</div>`;
|
||||
}
|
||||
|
||||
function readUrl() {
|
||||
const params = new URLSearchParams(location.search);
|
||||
if (params.has("channel_id")) state.channelId = Number(params.get("channel_id"));
|
||||
if (params.has("q")) state.q = params.get("q");
|
||||
if (params.has("real_estate")) state.realEstate = params.get("real_estate");
|
||||
if (params.has("hr_kind")) state.hrKind = params.get("hr_kind");
|
||||
if (params.get("has_phone") === "true") state.hasPhone = true;
|
||||
if (params.get("leads_only") === "true") state.leadsOnly = true;
|
||||
if (params.has("min_confidence")) state.minConfidence = Number(params.get("min_confidence"));
|
||||
}
|
||||
|
||||
function syncControls() {
|
||||
document.getElementById("channel-filter").value = state.channelId ?? "";
|
||||
document.getElementById("search").value = state.q;
|
||||
const reSel = document.getElementById("real-estate");
|
||||
if (reSel) reSel.value = state.realEstate;
|
||||
const hrSel = document.getElementById("hr-kind");
|
||||
if (hrSel) hrSel.value = state.hrKind;
|
||||
document.getElementById("has-phone").checked = state.hasPhone;
|
||||
document.getElementById("leads-only").checked = state.leadsOnly;
|
||||
document.getElementById("min-confidence").value = String(state.minConfidence);
|
||||
document.getElementById("limit").value = state.limit;
|
||||
}
|
||||
|
||||
async function loadChannels() {
|
||||
state.channels = await api.listChannels();
|
||||
const sel = document.getElementById("channel-filter");
|
||||
sel.innerHTML = `<option value="">Все каналы (${meta.short})</option>` + state.channels.map(c =>
|
||||
`<option value="${c.id}">${escape(c.title || c.identifier)}</option>`
|
||||
).join("");
|
||||
syncControls();
|
||||
}
|
||||
|
||||
async function loadMessages() {
|
||||
const list = document.getElementById("list");
|
||||
list.innerHTML = `<div class="empty">Загрузка...</div>`;
|
||||
try {
|
||||
const msgs = await api.listMessages({
|
||||
channelId: state.channelId,
|
||||
q: state.q || undefined,
|
||||
realEstate: state.realEstate || undefined,
|
||||
hrKind: state.hrKind || undefined,
|
||||
hasPhone: state.hasPhone || undefined,
|
||||
leadsOnly: state.leadsOnly || undefined,
|
||||
minConfidence: state.leadsOnly ? state.minConfidence : undefined,
|
||||
limit: state.limit,
|
||||
offset: state.offset,
|
||||
});
|
||||
if (!msgs.length) {
|
||||
list.innerHTML = `<div class="empty">Сообщений нет</div>`;
|
||||
} else {
|
||||
list.innerHTML = msgs.map(m => `
|
||||
<div class="message" data-id="${m.id}">
|
||||
<div class="message-meta">
|
||||
<a href="?channel_id=${m.channel_id}">${escape(channelTitle(m.channel_id))}</a>
|
||||
<span>·</span>
|
||||
<span>${fmtDate(m.date)}</span>
|
||||
<span>·</span>
|
||||
<span class="mono">#${m.tg_message_id}</span>
|
||||
${m.group_size > 1 ? `<span class="badge">альбом · ${m.group_size}</span>` : (m.has_media ? '<span class="badge">media</span>' : '')}
|
||||
${m.views != null ? `<span>👁 ${m.views}</span>` : ''}
|
||||
${m.forwards ? `<span>↗ ${m.forwards}</span>` : ''}
|
||||
<div class="spacer"></div>
|
||||
<a href="#" data-action="raw">json</a>
|
||||
</div>
|
||||
<div class="message-text">${m.text ? highlight(m.text, state.q) : '<span class="muted">(без текста)</span>'}</div>
|
||||
${V === "hr"
|
||||
? renderHrLead(m.extracted && m.extracted.hr_lead, m)
|
||||
: renderReLead(m.extracted && m.extracted.lead, m)}
|
||||
${renderExtracted(m.extracted)}
|
||||
${renderMedia(m.media_files)}
|
||||
</div>
|
||||
`).join("");
|
||||
}
|
||||
document.getElementById("page-info").textContent =
|
||||
`${state.offset + 1}–${state.offset + msgs.length}`;
|
||||
document.getElementById("prev").disabled = state.offset === 0;
|
||||
document.getElementById("next").disabled = msgs.length < state.limit;
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
list.innerHTML = `<div class="empty">Ошибка: ${escape(err.message)}</div>`;
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById("channel-filter").addEventListener("change", (e) => {
|
||||
state.channelId = e.target.value ? Number(e.target.value) : null;
|
||||
state.offset = 0;
|
||||
loadMessages();
|
||||
});
|
||||
|
||||
let searchTimer;
|
||||
document.getElementById("search").addEventListener("input", (e) => {
|
||||
clearTimeout(searchTimer);
|
||||
searchTimer = setTimeout(() => {
|
||||
state.q = e.target.value.trim();
|
||||
state.offset = 0;
|
||||
loadMessages();
|
||||
}, 250);
|
||||
});
|
||||
|
||||
document.getElementById("limit").addEventListener("change", (e) => {
|
||||
state.limit = Number(e.target.value);
|
||||
state.offset = 0;
|
||||
loadMessages();
|
||||
});
|
||||
|
||||
const reSelEl = document.getElementById("real-estate");
|
||||
if (reSelEl) {
|
||||
reSelEl.addEventListener("change", (e) => {
|
||||
state.realEstate = e.target.value;
|
||||
state.offset = 0;
|
||||
loadMessages();
|
||||
});
|
||||
}
|
||||
|
||||
const hrSelEl = document.getElementById("hr-kind");
|
||||
if (hrSelEl) {
|
||||
hrSelEl.addEventListener("change", (e) => {
|
||||
state.hrKind = e.target.value;
|
||||
state.offset = 0;
|
||||
loadMessages();
|
||||
});
|
||||
}
|
||||
|
||||
document.getElementById("has-phone").addEventListener("change", (e) => {
|
||||
state.hasPhone = e.target.checked;
|
||||
state.offset = 0;
|
||||
loadMessages();
|
||||
});
|
||||
|
||||
document.getElementById("leads-only").addEventListener("change", (e) => {
|
||||
state.leadsOnly = e.target.checked;
|
||||
state.offset = 0;
|
||||
loadMessages();
|
||||
});
|
||||
|
||||
document.getElementById("min-confidence").addEventListener("change", (e) => {
|
||||
state.minConfidence = Number(e.target.value);
|
||||
if (state.leadsOnly) {
|
||||
state.offset = 0;
|
||||
loadMessages();
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("refresh").addEventListener("click", loadMessages);
|
||||
|
||||
document.getElementById("prev").addEventListener("click", () => {
|
||||
state.offset = Math.max(0, state.offset - state.limit);
|
||||
loadMessages();
|
||||
});
|
||||
document.getElementById("next").addEventListener("click", () => {
|
||||
state.offset += state.limit;
|
||||
loadMessages();
|
||||
});
|
||||
|
||||
document.getElementById("autorefresh").addEventListener("change", (e) => {
|
||||
state.autorefresh = e.target.checked;
|
||||
if (state.timer) { clearInterval(state.timer); state.timer = null; }
|
||||
if (state.autorefresh) state.timer = setInterval(loadMessages, 10000);
|
||||
});
|
||||
|
||||
document.getElementById("list").addEventListener("click", async (e) => {
|
||||
const lightbox = e.target.closest("[data-action='lightbox']");
|
||||
if (lightbox) {
|
||||
e.preventDefault();
|
||||
openLightbox(lightbox.dataset.url);
|
||||
return;
|
||||
}
|
||||
const a = e.target.closest("[data-action='raw']");
|
||||
if (!a) return;
|
||||
e.preventDefault();
|
||||
const id = Number(a.closest(".message").dataset.id);
|
||||
try {
|
||||
const msg = await api.getMessage(id);
|
||||
document.getElementById("raw-content").textContent = JSON.stringify(msg, null, 2);
|
||||
document.getElementById("raw-dialog").showModal();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
}
|
||||
});
|
||||
|
||||
function openLightbox(url) {
|
||||
let lb = document.getElementById("lightbox");
|
||||
if (!lb) {
|
||||
lb = document.createElement("div");
|
||||
lb.id = "lightbox";
|
||||
lb.addEventListener("click", () => lb.remove());
|
||||
document.body.appendChild(lb);
|
||||
}
|
||||
lb.innerHTML = `<img src="${escape(url)}" alt="" />`;
|
||||
}
|
||||
document.getElementById("raw-close").addEventListener("click", () => {
|
||||
document.getElementById("raw-dialog").close();
|
||||
});
|
||||
|
||||
readUrl();
|
||||
(async () => {
|
||||
await loadChannels();
|
||||
await loadMessages();
|
||||
})();
|
||||
25
src/parser_bot/web/static/js/nav-status.js
Normal file
25
src/parser_bot/web/static/js/nav-status.js
Normal file
@@ -0,0 +1,25 @@
|
||||
import { api } from "/api/monitoring-tg/static/js/api.js";
|
||||
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
|
||||
import { appBase } from "/api/monitoring-tg/static/js/vertical.js";
|
||||
|
||||
// "Telegram not authorized" banner. Only useful for admins — non-admin
|
||||
// visitors can't open /auth.html anyway, so showing the banner would be
|
||||
// noise (and the /auth/status call itself 404s for non-admins).
|
||||
(async () => {
|
||||
if (!(await isAdmin())) return;
|
||||
try {
|
||||
const status = await api.authStatus();
|
||||
if (status.authorized) return;
|
||||
const banner = document.createElement("div");
|
||||
banner.className = "card";
|
||||
banner.style.cssText =
|
||||
"border-color: rgba(241, 196, 15, 0.5); background: rgba(241, 196, 15, 0.08); margin-bottom: 16px;";
|
||||
banner.innerHTML = `
|
||||
<strong>Telegram не авторизован.</strong>
|
||||
Парсер не сможет ходить за сообщениями, пока вы не залогинитесь.
|
||||
<a href="${appBase()}/auth.html?return=${encodeURIComponent(location.pathname)}">Открыть страницу авторизации →</a>
|
||||
`;
|
||||
const main = document.querySelector("main");
|
||||
if (main) main.insertBefore(banner, main.firstChild);
|
||||
} catch {}
|
||||
})();
|
||||
71
src/parser_bot/web/static/js/nav.js
Normal file
71
src/parser_bot/web/static/js/nav.js
Normal file
@@ -0,0 +1,71 @@
|
||||
import { api } from "/api/monitoring-tg/static/js/api.js";
|
||||
// Import for side-effect: access.js hides .admin-link elements for non-admins.
|
||||
import "/api/monitoring-tg/static/js/access.js";
|
||||
import {
|
||||
VERTICAL_META,
|
||||
appBase,
|
||||
getVertical,
|
||||
getSection,
|
||||
verticalBase,
|
||||
sectionBase,
|
||||
} from "/api/monitoring-tg/static/js/vertical.js";
|
||||
|
||||
const V = getVertical();
|
||||
const section = getSection();
|
||||
const meta = VERTICAL_META[V];
|
||||
|
||||
const titleEl = document.getElementById("page-title");
|
||||
if (titleEl) {
|
||||
titleEl.textContent = section
|
||||
? `parser-tg-bot · ${meta.emoji} ${meta.short} · ${section}`
|
||||
: `parser-tg-bot · ${meta.emoji} ${meta.short}`;
|
||||
}
|
||||
|
||||
const navEl = document.getElementById("nav-section");
|
||||
if (navEl) {
|
||||
const here = location.pathname;
|
||||
const active = (href) => here === href ? "active" : "";
|
||||
const links = [];
|
||||
|
||||
// Up-link: chooser if we are inside a section, vertical-list otherwise.
|
||||
if (section) {
|
||||
links.push(`<a href="${verticalBase()}/">← ${meta.short} (подразделы)</a>`);
|
||||
} else {
|
||||
links.push(`<a href="${appBase()}/">← Разделы</a>`);
|
||||
}
|
||||
|
||||
if (section) {
|
||||
const sBase = sectionBase();
|
||||
links.push(
|
||||
`<a href="${sBase}/" class="${active(sBase + '/')}">Дашборд</a>`,
|
||||
`<a href="${sBase}/channels.html" class="${active(sBase + '/channels.html')}">Каналы</a>`,
|
||||
`<a href="${sBase}/messages.html" class="${active(sBase + '/messages.html')}">Сообщения</a>`,
|
||||
`<a href="${sBase}/settings.html" class="admin-only ${active(sBase + '/settings.html')}">Настройки</a>`,
|
||||
);
|
||||
}
|
||||
|
||||
links.push(
|
||||
`<a class="admin-login-link" href="${appBase()}/admin.html?return=${encodeURIComponent(location.pathname)}">Админ</a>`,
|
||||
`<a class="admin-link" href="${appBase()}/auth.html">Авторизация</a>`,
|
||||
`<a class="admin-link" href="${appBase()}/docs" target="_blank">API</a>`,
|
||||
);
|
||||
navEl.innerHTML = links.join("");
|
||||
}
|
||||
|
||||
// Best-effort: resolve section's display title from the API and update the
|
||||
// page heading. Falls back to the raw slug if the network call fails.
|
||||
const headingEl = document.getElementById("page-heading");
|
||||
if (headingEl && section) {
|
||||
api.listSections(V)
|
||||
.then(sections => {
|
||||
const s = sections.find(x => x.slug === section);
|
||||
if (s) {
|
||||
const baseText = headingEl.dataset.base || headingEl.textContent;
|
||||
headingEl.dataset.base = baseText;
|
||||
headingEl.textContent = `${baseText} · ${s.emoji ? s.emoji + " " : ""}${s.title}`;
|
||||
}
|
||||
})
|
||||
.catch(() => {});
|
||||
}
|
||||
|
||||
export { section, V, meta };
|
||||
202
src/parser_bot/web/static/js/sections-list.js
Normal file
202
src/parser_bot/web/static/js/sections-list.js
Normal file
@@ -0,0 +1,202 @@
|
||||
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
|
||||
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
|
||||
import { getVertical, verticalBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||
import { slugify } from "/api/monitoring-tg/static/js/slugify.js";
|
||||
|
||||
const V = getVertical();
|
||||
const base = verticalBase(V);
|
||||
const meta = VERTICAL_META[V];
|
||||
let sectionsBySlug = new Map();
|
||||
|
||||
function escape(s) {
|
||||
if (s == null) return "";
|
||||
return String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
|
||||
}
|
||||
|
||||
async function render() {
|
||||
const grid = document.getElementById("sections-grid");
|
||||
grid.innerHTML = `<div class="empty">Загрузка...</div>`;
|
||||
try {
|
||||
const admin = await isAdmin();
|
||||
const sections = await api.listSections(V);
|
||||
sectionsBySlug = new Map(sections.map(s => [s.slug, s]));
|
||||
if (!sections.length) {
|
||||
grid.innerHTML = `<div class="empty">Подразделов пока нет — нажми «+ Новый подраздел»</div>`;
|
||||
return;
|
||||
}
|
||||
grid.innerHTML = `<div class="sections-grid">${sections.map(s => `
|
||||
<div class="card section-tile" data-slug="${escape(s.slug)}">
|
||||
<a href="${base}/${encodeURIComponent(s.slug)}/" class="section-tile-link">
|
||||
<div class="section-tile-head">
|
||||
<span class="section-emoji">${escape(s.emoji || meta.emoji)}</span>
|
||||
<span class="section-title">${escape(s.title)}</span>
|
||||
</div>
|
||||
<div class="section-stats">
|
||||
<span title="Каналов (активных/всего)"><b>${s.channels_active}</b> / ${s.channels_total} каналов</span>
|
||||
<span title="Сообщений всего">${s.messages_total.toLocaleString()} сообщ.</span>
|
||||
<span title="🎯 Лидов">${s.leads_total.toLocaleString()} лидов</span>
|
||||
</div>
|
||||
${s.description ? `<div class="section-desc muted">${escape(s.description)}</div>` : ""}
|
||||
${admin ? `<div class="section-code mono">Код: ${escape(s.access_code || "не задан")}</div>` : ""}
|
||||
<div class="section-slug muted mono">${escape(V)} / ${escape(s.slug)}</div>
|
||||
</a>
|
||||
${admin ? `
|
||||
<div class="row admin-only" style="justify-content:flex-end; gap:8px; margin-top:8px">
|
||||
<button class="secondary" data-action="edit">Переименовать</button>
|
||||
<button class="danger" data-action="delete">Удалить</button>
|
||||
</div>
|
||||
` : ""}
|
||||
</div>
|
||||
`).join("")}</div>`;
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
grid.innerHTML = `<div class="empty">Ошибка: ${escape(err.message)}</div>`;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Create-section dialog with auto-slug -------------------------------
|
||||
|
||||
const titleInput = document.getElementById("new-title");
|
||||
const slugInput = document.getElementById("new-slug");
|
||||
const slugPreview = document.getElementById("new-slug-preview");
|
||||
const slugManualToggle = document.getElementById("new-slug-manual");
|
||||
|
||||
// Track whether the user has taken manual control of the slug. As soon as
|
||||
// they touch the slug field directly, stop auto-syncing it.
|
||||
let slugIsAuto = true;
|
||||
|
||||
function syncSlugFromTitle() {
|
||||
if (!slugIsAuto) return;
|
||||
const proposed = slugify(titleInput.value);
|
||||
slugInput.value = proposed;
|
||||
if (slugPreview) {
|
||||
slugPreview.textContent = proposed || "(введите название)";
|
||||
}
|
||||
}
|
||||
|
||||
if (titleInput) {
|
||||
titleInput.addEventListener("input", syncSlugFromTitle);
|
||||
}
|
||||
if (slugInput) {
|
||||
slugInput.addEventListener("input", () => { slugIsAuto = false; });
|
||||
}
|
||||
if (slugManualToggle) {
|
||||
slugManualToggle.addEventListener("click", (e) => {
|
||||
e.preventDefault();
|
||||
const hidden = slugInput.closest(".slug-row");
|
||||
if (hidden) hidden.hidden = !hidden.hidden;
|
||||
slugInput.focus();
|
||||
});
|
||||
}
|
||||
|
||||
function resetForm() {
|
||||
document.getElementById("create-form").reset();
|
||||
slugIsAuto = true;
|
||||
if (slugPreview) slugPreview.textContent = "(введите название)";
|
||||
if (slugInput) slugInput.value = "";
|
||||
const hidden = slugInput?.closest(".slug-row");
|
||||
if (hidden) hidden.hidden = true;
|
||||
}
|
||||
|
||||
document.getElementById("open-create").addEventListener("click", () => {
|
||||
resetForm();
|
||||
document.getElementById("create-dialog").showModal();
|
||||
setTimeout(() => titleInput?.focus(), 50);
|
||||
});
|
||||
|
||||
document.getElementById("create-cancel").addEventListener("click", () => {
|
||||
document.getElementById("create-dialog").close();
|
||||
});
|
||||
|
||||
document.getElementById("edit-cancel").addEventListener("click", () => {
|
||||
document.getElementById("edit-dialog").close();
|
||||
});
|
||||
|
||||
document.getElementById("create-form").addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const title = titleInput.value.trim();
|
||||
if (!title) return;
|
||||
// Re-sync once more in case `input` didn't fire before submit (autofill).
|
||||
if (slugIsAuto) syncSlugFromTitle();
|
||||
const slug = slugInput.value.trim() || slugify(title);
|
||||
if (!slug) {
|
||||
toast("Не удалось сформировать slug — введите его вручную", "error");
|
||||
return;
|
||||
}
|
||||
const emoji = document.getElementById("new-emoji").value.trim() || null;
|
||||
const accessCode = document.getElementById("new-access-code").value.trim();
|
||||
if (accessCode.length < 3) {
|
||||
toast("Код доступа должен быть не короче 3 символов", "error");
|
||||
return;
|
||||
}
|
||||
const description = document.getElementById("new-description").value.trim() || null;
|
||||
try {
|
||||
await api.createSection({ vertical: V, slug, title, emoji, description, accessCode });
|
||||
toast(`Подраздел "${title}" создан`, "success");
|
||||
document.getElementById("create-dialog").close();
|
||||
resetForm();
|
||||
await render();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("sections-grid").addEventListener("click", async (e) => {
|
||||
const btn = e.target.closest("[data-action]");
|
||||
if (!btn) return;
|
||||
const tile = btn.closest(".section-tile");
|
||||
const slug = tile.dataset.slug;
|
||||
const action = btn.dataset.action;
|
||||
if (action === "edit") {
|
||||
const section = sectionsBySlug.get(slug);
|
||||
if (!section) return;
|
||||
document.getElementById("edit-slug").value = slug;
|
||||
document.getElementById("edit-title").value = section.title || "";
|
||||
document.getElementById("edit-emoji").value = section.emoji || "";
|
||||
document.getElementById("edit-access-code").value = section.access_code || "";
|
||||
document.getElementById("edit-description").value = section.description || "";
|
||||
document.getElementById("edit-dialog").showModal();
|
||||
setTimeout(() => document.getElementById("edit-title").focus(), 50);
|
||||
return;
|
||||
}
|
||||
if (action !== "delete") return;
|
||||
if (!confirm(`Удалить подраздел "${slug}"? Удалить можно только пустой подраздел (без каналов).`)) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await api.deleteSection(V, slug);
|
||||
toast(`Подраздел "${slug}" удалён`, "success");
|
||||
await render();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("edit-form").addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const slug = document.getElementById("edit-slug").value;
|
||||
const title = document.getElementById("edit-title").value.trim();
|
||||
const emoji = document.getElementById("edit-emoji").value.trim() || null;
|
||||
const accessCode = document.getElementById("edit-access-code").value.trim();
|
||||
const description = document.getElementById("edit-description").value.trim() || null;
|
||||
if (!title) return;
|
||||
if (accessCode.length < 3) {
|
||||
toast("Код доступа должен быть не короче 3 символов", "error");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await api.updateSection(V, slug, {
|
||||
title,
|
||||
emoji,
|
||||
description,
|
||||
access_code: accessCode,
|
||||
});
|
||||
toast(`Подраздел "${title}" сохранён`, "success");
|
||||
document.getElementById("edit-dialog").close();
|
||||
await render();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
}
|
||||
});
|
||||
|
||||
render();
|
||||
118
src/parser_bot/web/static/js/settings.js
Normal file
118
src/parser_bot/web/static/js/settings.js
Normal file
@@ -0,0 +1,118 @@
|
||||
import { api, toast, fmtDate } from "/api/monitoring-tg/static/js/api.js";
|
||||
import { getVertical, getSection, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
|
||||
|
||||
const V = getVertical();
|
||||
const section = getSection();
|
||||
const meta = VERTICAL_META[V];
|
||||
|
||||
// `level` decides which override layer the editor edits/saves/resets.
|
||||
// "section" → store key llm_system_prompt:<vertical>:<section_slug>
|
||||
// "vertical" → store key llm_system_prompt:<vertical>
|
||||
// Effective resolution always goes section → vertical → default.
|
||||
let level = section ? "section" : "vertical";
|
||||
|
||||
const levelEl = document.getElementById("prompt-level");
|
||||
if (levelEl) {
|
||||
if (!section) {
|
||||
levelEl.value = "vertical";
|
||||
levelEl.disabled = true;
|
||||
} else {
|
||||
levelEl.value = "section";
|
||||
levelEl.addEventListener("change", async (e) => {
|
||||
level = e.target.value;
|
||||
await loadPrompt();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function levelScope() {
|
||||
return level === "section"
|
||||
? { vertical: V, section }
|
||||
: { vertical: V, section: null };
|
||||
}
|
||||
|
||||
async function loadConfig() {
|
||||
const res = await fetch("/api/monitoring-tg/api/v1/settings");
|
||||
if (!res.ok) throw new Error(res.statusText);
|
||||
const cfg = await res.json();
|
||||
const stats = await api.globalStats();
|
||||
|
||||
const scopeLabel = section ? `${meta.short} / ${section}` : meta.short;
|
||||
const rows = [
|
||||
["Раздел", `${meta.emoji} ${scopeLabel}`],
|
||||
["Период опроса", `${cfg.poll_interval_seconds}s`],
|
||||
["Лимит истории за опрос", cfg.poll_history_limit],
|
||||
["Telethon session", cfg.tg_session_path],
|
||||
["Postgres host", `${cfg.postgres_host}:${cfg.postgres_port}/${cfg.postgres_db}`],
|
||||
["API host", `${cfg.api_host}:${cfg.api_port}`],
|
||||
[`Каналов в ${scopeLabel}`, `${stats.channels_active} активных / ${stats.channels_total}`],
|
||||
[`Сообщений в ${scopeLabel}`, stats.messages_total.toLocaleString()],
|
||||
["Последний опрос (scope)", fmtDate(stats.last_poll_at)],
|
||||
];
|
||||
document.getElementById("config-tbody").innerHTML = rows.map(([k, v]) =>
|
||||
`<tr><td class="muted">${k}</td><td class="mono">${v ?? "—"}</td></tr>`
|
||||
).join("");
|
||||
}
|
||||
|
||||
document.getElementById("poll-all").addEventListener("click", async (e) => {
|
||||
e.target.disabled = true;
|
||||
try {
|
||||
const res = await api.pollAll();
|
||||
toast(`В очереди ${res.queued ?? 0} каналов — опрос идёт в фоне`, "success");
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
e.target.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
async function loadPrompt() {
|
||||
const data = await api.llmPromptGet(levelScope());
|
||||
const editor = document.getElementById("prompt-editor");
|
||||
editor.value = data.prompt || "";
|
||||
const status = document.getElementById("prompt-status");
|
||||
const lengthEl = document.getElementById("prompt-length");
|
||||
|
||||
const map = {
|
||||
section: ["override · подраздел", "ok"],
|
||||
vertical: ["override · вертикаль", "ok"],
|
||||
default: ["встроенный по умолчанию", "off"],
|
||||
};
|
||||
const [label, cls] = map[data.source] || ["—", "off"];
|
||||
status.textContent = label;
|
||||
status.className = `badge ${cls}`;
|
||||
lengthEl.textContent = `${(data.prompt || "").length.toLocaleString()} символов`;
|
||||
}
|
||||
|
||||
document.getElementById("prompt-save").addEventListener("click", async (e) => {
|
||||
const text = document.getElementById("prompt-editor").value;
|
||||
e.target.disabled = true;
|
||||
try {
|
||||
await api.llmPromptSave(text, levelScope());
|
||||
const where = level === "section" ? `${meta.short} / ${section}` : meta.short;
|
||||
toast(`Промпт ${where} сохранён, применится в течение 5 секунд`, "success");
|
||||
await loadPrompt();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
e.target.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById("prompt-reset").addEventListener("click", async (e) => {
|
||||
const where = level === "section" ? `подраздела "${section}"` : `вертикали "${meta.short}"`;
|
||||
if (!confirm(`Сбросить пользовательский промпт ${where} и вернуться к фоллбэку?`)) return;
|
||||
e.target.disabled = true;
|
||||
try {
|
||||
await api.llmPromptReset(levelScope());
|
||||
toast(`Промпт ${where} сброшен`, "success");
|
||||
await loadPrompt();
|
||||
} catch (err) {
|
||||
toast(err.message, "error");
|
||||
} finally {
|
||||
e.target.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
loadConfig().catch(err => toast(err.message, "error"));
|
||||
loadPrompt().catch(err => toast(err.message, "error"));
|
||||
22
src/parser_bot/web/static/js/slugify.js
Normal file
22
src/parser_bot/web/static/js/slugify.js
Normal file
@@ -0,0 +1,22 @@
|
||||
// URL-safe slug from arbitrary text. Cyrillic → Latin so titles like
|
||||
// "Дубай Marina" become "dubai-marina" without forcing the user to type
|
||||
// a slug by hand.
|
||||
|
||||
const RU_TO_LAT = {
|
||||
а: "a", б: "b", в: "v", г: "g", д: "d", е: "e", ё: "yo", ж: "zh",
|
||||
з: "z", и: "i", й: "y", к: "k", л: "l", м: "m", н: "n", о: "o",
|
||||
п: "p", р: "r", с: "s", т: "t", у: "u", ф: "f", х: "h", ц: "ts",
|
||||
ч: "ch", ш: "sh", щ: "sch", ъ: "", ы: "y", ь: "", э: "e", ю: "yu",
|
||||
я: "ya",
|
||||
};
|
||||
|
||||
export function slugify(text) {
|
||||
return (text || "")
|
||||
.toLowerCase()
|
||||
.split("")
|
||||
.map(c => RU_TO_LAT[c] ?? c)
|
||||
.join("")
|
||||
.replace(/[^a-z0-9]+/g, "-")
|
||||
.replace(/^-+|-+$/g, "")
|
||||
.slice(0, 64);
|
||||
}
|
||||
76
src/parser_bot/web/static/js/vertical.js
Normal file
76
src/parser_bot/web/static/js/vertical.js
Normal file
@@ -0,0 +1,76 @@
|
||||
const APP_BASE = "/api/monitoring-tg";
|
||||
|
||||
// Detect the current scope from the URL path.
|
||||
//
|
||||
// / → vertical=null, section=null
|
||||
// /real-estate/ → vertical=real_estate, section=null (section chooser)
|
||||
// /real-estate/dubai/ → vertical=real_estate, section=dubai
|
||||
// /real-estate/dubai/channels.html → same
|
||||
// /hr/ → vertical=hr, section=null
|
||||
// /hr/it/settings.html → vertical=hr, section=it
|
||||
//
|
||||
// Section slug comes from URL path[2] and is opaque (created via UI). The
|
||||
// frontend treats it as a string and passes it to the API; the backend
|
||||
// resolves slug→Section row at query time.
|
||||
|
||||
function _segments() {
|
||||
const segments = location.pathname.split("/").filter(Boolean);
|
||||
const base = APP_BASE.split("/").filter(Boolean);
|
||||
if (base.every((part, idx) => segments[idx] === part)) {
|
||||
return segments.slice(base.length);
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
|
||||
export function getVerticalSlug() {
|
||||
const seg = (_segments()[0] || "").toLowerCase();
|
||||
if (seg === "hr") return "hr";
|
||||
if (seg === "real-estate") return "real-estate";
|
||||
return null;
|
||||
}
|
||||
|
||||
export function getVertical() {
|
||||
const slug = getVerticalSlug();
|
||||
if (slug === "hr") return "hr";
|
||||
if (slug === "real-estate") return "real_estate";
|
||||
return "real_estate"; // harmless default for section-less pages
|
||||
}
|
||||
|
||||
export function getSection() {
|
||||
const segs = _segments();
|
||||
// Only treat segment[1] as a section slug when segment[0] is a known vertical.
|
||||
if (!getVerticalSlug()) return null;
|
||||
const candidate = segs[1];
|
||||
if (!candidate || candidate.endsWith(".html")) return null;
|
||||
return candidate.toLowerCase();
|
||||
}
|
||||
|
||||
export const VERTICAL_META = {
|
||||
real_estate: {
|
||||
slug: "real-estate",
|
||||
title: "Недвижимость",
|
||||
short: "Недвижимость",
|
||||
emoji: "🏠",
|
||||
leadLabel: "Объявление",
|
||||
},
|
||||
hr: {
|
||||
slug: "hr",
|
||||
title: "HR / Кадры",
|
||||
short: "HR",
|
||||
emoji: "👥",
|
||||
leadLabel: "HR-лид",
|
||||
},
|
||||
};
|
||||
|
||||
export function appBase() {
|
||||
return APP_BASE;
|
||||
}
|
||||
|
||||
export function verticalBase(vertical = getVertical()) {
|
||||
return `${APP_BASE}/${VERTICAL_META[vertical].slug}`;
|
||||
}
|
||||
|
||||
export function sectionBase(vertical = getVertical(), section = getSection()) {
|
||||
const v = verticalBase(vertical);
|
||||
return section ? `${v}/${section}` : v;
|
||||
}
|
||||
99
src/parser_bot/web/static/real-estate/index.html
Normal file
99
src/parser_bot/web/static/real-estate/index.html
Normal file
@@ -0,0 +1,99 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>🏠 Недвижимость — подразделы</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot · 🏠 Недвижимость</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<div class="row">
|
||||
<h2>Подразделы недвижимости</h2>
|
||||
<div class="spacer"></div>
|
||||
<button id="open-create">+ Новый подраздел</button>
|
||||
</div>
|
||||
<p class="muted">
|
||||
Каждый подраздел — это собственный набор каналов, своя статистика и свой
|
||||
LLM-промпт (с фоллбэком на промпт вертикали). Например: Дубай, Москва,
|
||||
Сочи, коммерческая недвижимость.
|
||||
</p>
|
||||
|
||||
<div id="sections-grid"></div>
|
||||
</main>
|
||||
|
||||
<dialog id="create-dialog">
|
||||
<h3 style="margin-top:0">Новый подраздел</h3>
|
||||
<form id="create-form">
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Название</span>
|
||||
<input type="text" id="new-title" required placeholder="Дубай" style="flex:1" />
|
||||
</label>
|
||||
<div class="row" style="gap:8px; margin-bottom:8px; font-size:12px">
|
||||
<span style="min-width:120px" class="muted">URL-адрес</span>
|
||||
<span class="muted mono">/real-estate/<span id="new-slug-preview">(введите название)</span>/</span>
|
||||
<div class="spacer"></div>
|
||||
<a href="#" id="new-slug-manual" class="muted">изменить вручную</a>
|
||||
</div>
|
||||
<label class="row slug-row" style="gap:8px; margin-bottom:8px" hidden>
|
||||
<span style="min-width:120px" class="muted">Slug</span>
|
||||
<input type="text" id="new-slug" pattern="[a-z0-9][a-z0-9_-]*[a-z0-9]?"
|
||||
placeholder="dubai" style="flex:1" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Иконка</span>
|
||||
<input type="text" id="new-emoji" maxlength="4" placeholder="🌴" style="width:80px" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Код доступа</span>
|
||||
<input type="text" id="new-access-code" required minlength="3"
|
||||
autocomplete="new-password" style="flex:1" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
|
||||
<span style="min-width:120px" class="muted">Описание</span>
|
||||
<textarea id="new-description" rows="3" style="flex:1"></textarea>
|
||||
</label>
|
||||
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
|
||||
<button type="button" id="create-cancel" class="secondary">Отмена</button>
|
||||
<button type="submit">Создать</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="edit-dialog">
|
||||
<h3 style="margin-top:0">Редактировать подраздел</h3>
|
||||
<form id="edit-form">
|
||||
<input type="hidden" id="edit-slug" />
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Название</span>
|
||||
<input type="text" id="edit-title" required style="flex:1" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Иконка</span>
|
||||
<input type="text" id="edit-emoji" maxlength="4" style="width:80px" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px">
|
||||
<span style="min-width:120px" class="muted">Код доступа</span>
|
||||
<input type="text" id="edit-access-code" required minlength="3"
|
||||
autocomplete="new-password" style="flex:1" />
|
||||
</label>
|
||||
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
|
||||
<span style="min-width:120px" class="muted">Описание</span>
|
||||
<textarea id="edit-description" rows="3" style="flex:1"></textarea>
|
||||
</label>
|
||||
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
|
||||
<button type="button" id="edit-cancel" class="secondary">Отмена</button>
|
||||
<button type="submit">Сохранить</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/sections-list.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
48
src/parser_bot/web/static/real-estate/section/channels.html
Normal file
48
src/parser_bot/web/static/real-estate/section/channels.html
Normal file
@@ -0,0 +1,48 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>🏠 Недвижимость · Каналы — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<h2 id="page-heading">Каналы подраздела</h2>
|
||||
|
||||
<div class="card" style="margin-bottom:24px">
|
||||
<form id="add-form" class="row">
|
||||
<input type="text" id="identifier" placeholder="@channel или https://t.me/..." required style="flex:1; min-width:280px" />
|
||||
<button type="submit">Добавить канал</button>
|
||||
</form>
|
||||
<div class="muted" style="margin-top:8px; font-size:12px">
|
||||
Канал будет привязан к текущему подразделу.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Канал</th>
|
||||
<th>Telegram ID</th>
|
||||
<th>Сообщ.</th>
|
||||
<th>Последний опрос</th>
|
||||
<th>Статус</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</main>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/channels.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
43
src/parser_bot/web/static/real-estate/section/index.html
Normal file
43
src/parser_bot/web/static/real-estate/section/index.html
Normal file
@@ -0,0 +1,43 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>🏠 Недвижимость · Дашборд — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<div class="row">
|
||||
<h2 id="page-heading">Дашборд</h2>
|
||||
<div class="spacer"></div>
|
||||
<button id="poll-all">Опросить все каналы подраздела</button>
|
||||
</div>
|
||||
|
||||
<div class="stats-grid" id="stats"></div>
|
||||
|
||||
<h3>Каналы подраздела</h3>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Канал</th>
|
||||
<th>Сообщений</th>
|
||||
<th>Последнее сообщение</th>
|
||||
<th>Последний опрос</th>
|
||||
<th>Статус</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="channels-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</main>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/dashboard.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
78
src/parser_bot/web/static/real-estate/section/messages.html
Normal file
78
src/parser_bot/web/static/real-estate/section/messages.html
Normal file
@@ -0,0 +1,78 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>🏠 Недвижимость · Сообщения — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<h2 id="page-heading">Сообщения подраздела</h2>
|
||||
|
||||
<div class="toolbar card">
|
||||
<select id="channel-filter">
|
||||
<option value="">Все каналы подраздела</option>
|
||||
</select>
|
||||
<input type="search" id="search" placeholder="Поиск по тексту..." />
|
||||
<select id="real-estate">
|
||||
<option value="">Любая тема</option>
|
||||
<option value="any">🏠 Недвижимость (любая)</option>
|
||||
<option value="sale">🏠 Продажа</option>
|
||||
<option value="rent">🏠 Аренда</option>
|
||||
<option value="purchase">🏠 Покупка</option>
|
||||
</select>
|
||||
<label class="row" style="gap:6px">
|
||||
<input type="checkbox" id="leads-only" />
|
||||
<span class="muted">🎯 Только лиды (ИИ)</span>
|
||||
</label>
|
||||
<select id="min-confidence" title="Минимальная уверенность ИИ">
|
||||
<option value="0.3">0.3+</option>
|
||||
<option value="0.5" selected>0.5+</option>
|
||||
<option value="0.7">0.7+</option>
|
||||
<option value="0.9">0.9+</option>
|
||||
</select>
|
||||
<label class="row" style="gap:6px">
|
||||
<input type="checkbox" id="has-phone" />
|
||||
<span class="muted">📞 С телефоном</span>
|
||||
</label>
|
||||
<select id="limit">
|
||||
<option value="25">25</option>
|
||||
<option value="50" selected>50</option>
|
||||
<option value="100">100</option>
|
||||
<option value="200">200</option>
|
||||
</select>
|
||||
<div class="spacer"></div>
|
||||
<label class="row" style="gap:6px">
|
||||
<input type="checkbox" id="autorefresh" />
|
||||
<span class="muted">Автообновление</span>
|
||||
</label>
|
||||
<button id="refresh" class="secondary">Обновить</button>
|
||||
</div>
|
||||
|
||||
<div class="card" id="list"></div>
|
||||
|
||||
<div class="pagination">
|
||||
<button id="prev" class="secondary">← Назад</button>
|
||||
<span class="muted" id="page-info" style="align-self:center"></span>
|
||||
<button id="next" class="secondary">Вперёд →</button>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<dialog id="raw-dialog">
|
||||
<h3 style="margin-top:0">Сообщение</h3>
|
||||
<pre id="raw-content"></pre>
|
||||
<div class="row" style="justify-content:flex-end; margin-top:12px">
|
||||
<button class="secondary" id="raw-close">Закрыть</button>
|
||||
</div>
|
||||
</dialog>
|
||||
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/messages.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
66
src/parser_bot/web/static/real-estate/section/settings.html
Normal file
66
src/parser_bot/web/static/real-estate/section/settings.html
Normal file
@@ -0,0 +1,66 @@
|
||||
<!doctype html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>🏠 Недвижимость · Настройки — parser-tg-bot</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1 id="page-title">parser-tg-bot</h1>
|
||||
<nav id="nav-section"></nav>
|
||||
</header>
|
||||
<main>
|
||||
<h2 id="page-heading">Настройки подраздела</h2>
|
||||
|
||||
<div class="card" style="margin-bottom:24px">
|
||||
<h3 style="margin-top:0">Текущая конфигурация</h3>
|
||||
<table>
|
||||
<tbody id="config-tbody">
|
||||
<tr><td colspan="2" class="empty">Загрузка...</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<div class="muted" style="font-size:12px; margin-top:12px">
|
||||
Параметры задаются через переменные окружения (<span class="mono">.env</span>).
|
||||
Для изменения отредактируйте <span class="mono">.env</span> и перезапустите контейнер:
|
||||
<span class="mono">docker compose restart app</span>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card" style="margin-bottom:24px">
|
||||
<h3 style="margin-top:0">Действия</h3>
|
||||
<div class="row">
|
||||
<button id="poll-all">Опросить все каналы подраздела сейчас</button>
|
||||
<a href="/api/monitoring-tg/docs" target="_blank" class="badge">OpenAPI / Swagger</a>
|
||||
<a href="/api/monitoring-tg/healthz" target="_blank" class="badge">Health check</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card" style="margin-bottom:24px">
|
||||
<h3 style="margin-top:0">🤖 Промпт ИИ</h3>
|
||||
<div class="row" style="margin-bottom:8px">
|
||||
<span class="badge" id="prompt-status">—</span>
|
||||
<span class="muted" id="prompt-length"></span>
|
||||
<div class="spacer"></div>
|
||||
<select id="prompt-level" title="Уровень редактирования промпта">
|
||||
<option value="section" selected>Промпт подраздела</option>
|
||||
<option value="vertical">Промпт вертикали</option>
|
||||
</select>
|
||||
<button id="prompt-reset" class="secondary">Сбросить уровень</button>
|
||||
<button id="prompt-save">Сохранить</button>
|
||||
</div>
|
||||
<textarea id="prompt-editor" rows="22"
|
||||
style="width:100%; font-family:ui-monospace, SFMono-Regular, Menlo, monospace; font-size:12px"></textarea>
|
||||
<div class="muted" style="font-size:12px; margin-top:8px">
|
||||
Каскад: <strong>section → vertical → default</strong>. Если промпта на
|
||||
уровне подраздела нет, используется промпт вертикали; если и его нет —
|
||||
встроенный по умолчанию. Сохранение применится в течение ~5 сек.
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
|
||||
<script type="module" src="/api/monitoring-tg/static/js/settings.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user