Add monitoring TG service

This commit is contained in:
Grendgi
2026-06-04 14:55:41 +03:00
commit f9e072774c
74 changed files with 7232 additions and 0 deletions

21
.dockerignore Normal file
View File

@@ -0,0 +1,21 @@
.git/
.gitignore
.gitea/
.env
.venv/
venv/
__pycache__/
**/__pycache__/
*.pyc
*.pyo
*.egg-info/
.pytest_cache/
.mypy_cache/
.ruff_cache/
.idea/
.vscode/
.claude/
.DS_Store
data/
*.session
*.session-journal

54
.env.example Normal file
View File

@@ -0,0 +1,54 @@
# Telegram MTProto credentials — get from https://my.telegram.org
TG_API_ID=
TG_API_HASH=
TG_PHONE=
# --- ONE OF THE TWO BELOW IS REQUIRED ---
# Preferred (no volumes, k8s-friendly): get the string by running
# docker compose run --rm -it app python -m parser_bot.auth
# It prints `TG_SESSION_STRING=...` — paste that line here.
TG_SESSION_STRING=
# Fallback (file-based): only used if TG_SESSION_STRING is empty.
# Requires mounting ./data/session as a volume.
TG_SESSION_PATH=/data/session/parser.session
# Postgres
POSTGRES_USER=parser
POSTGRES_PASSWORD=parser
POSTGRES_DB=parser
POSTGRES_HOST=db
POSTGRES_PORT=5432
# Polling
POLL_INTERVAL_SECONDS=60
POLL_HISTORY_LIMIT=50
# API
API_HOST=0.0.0.0
API_PORT=8000
# Media (downloaded photos / small videos / docs from parsed messages)
MEDIA_DIR=/data/media
MEDIA_MAX_BYTES=20971520
# Local LLM (Ollama) — runs Qwen 2.5 7B Q4 on CPU. Set LLM_ENABLED=false to disable.
LLM_ENABLED=true
LLM_BASE_URL=http://ollama:11434
LLM_MODEL=qwen2.5:7b-instruct-q4_K_M
LLM_TIMEOUT_SECONDS=120
LLM_MIN_TEXT_LENGTH=20
# How often the background classifier wakes up and how many messages it
# processes per tick. With 5/20s ≈ 900 messages/hour at ~3-6s per call.
LLM_CLASSIFY_INTERVAL_SECONDS=20
LLM_CLASSIFY_BATCH_SIZE=5
# Admin allowlist for /auth.html, /docs, /openapi.json, /redoc and the
# /api/v1/auth/* endpoints. Comma-separated list of client IPs.
# Empty = no restriction (everyone is admin) — convenient for local dev.
# Example: ADMIN_ALLOWED_IPS=89.110.109.221,127.0.0.1
ADMIN_ALLOWED_IPS=
# Honor X-Forwarded-For / X-Real-IP from a reverse proxy (Docker port-
# forward, nginx, traefik) when resolving the client IP for the allowlist.
TRUST_PROXY_HEADERS=true

View File

@@ -0,0 +1,58 @@
name: Build and Deploy
on:
push:
branches: [main]
env:
INTERNAL_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
NODE_REGISTRY: localhost:30300
jobs:
build-and-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Docker CLI
run: |
curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-27.5.1.tgz \
| tar xz --strip-components=1 -C /usr/local/bin docker/docker
docker version
- name: Install kubectl
run: |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
mv kubectl /usr/local/bin/
kubectl version --client
- name: Login to Gitea Registry
run: |
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
docker login ${{ env.INTERNAL_REGISTRY }} \
-u ${{ secrets.REGISTRY_USERNAME }} --password-stdin
- name: Build and push server
run: |
docker build -f Dockerfile \
-t ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }} \
-t ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:latest \
.
docker push ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }}
docker push ${{ env.INTERNAL_REGISTRY }}/admin/monitoring-tg-server:latest
- name: Deploy to Kubernetes
env:
KUBECONFIG: /kubeconfig/config
run: |
kubectl apply -f k8s/namespace.yaml
kubectl apply -f k8s/secrets.yaml
kubectl apply -f k8s/configmap.yaml
kubectl apply -f k8s/postgres.yaml
kubectl apply -f k8s/server-deployment.yaml
kubectl apply -f k8s/server-service.yaml
kubectl -n monitoring-tg set image deployment/monitoring-tg-server \
monitoring-tg-server=${{ env.NODE_REGISTRY }}/admin/monitoring-tg-server:${{ github.sha }}
kubectl -n monitoring-tg rollout status deployment/monitoring-tg-server --timeout=180s

14
.gitignore vendored Normal file
View File

@@ -0,0 +1,14 @@
__pycache__/
*.py[cod]
*.egg-info/
.venv/
venv/
.env
*.session
*.session-journal
.pytest_cache/
.mypy_cache/
.ruff_cache/
.idea/
.vscode/
data/

28
Dockerfile Normal file
View File

@@ -0,0 +1,28 @@
FROM python:3.11-slim
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*
COPY pyproject.toml ./
COPY src ./src
COPY alembic.ini ./
COPY alembic ./alembic
RUN pip install --upgrade pip && pip install -e .
RUN mkdir -p /data/session /data/media
COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh
EXPOSE 8000
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD ["python", "-m", "parser_bot.main"]

123
README.md Normal file
View File

@@ -0,0 +1,123 @@
# parser-tg-bot
Парсер публичных Telegram-каналов на Telethon (MTProto). Сохраняет сообщения в Postgres,
управляется через REST API. Период опроса настраивается через `.env`. На следующем шаге
легко перевести на realtime через `events.NewMessage`.
## Стек
- Python 3.11, Telethon, FastAPI, SQLAlchemy 2 (async) + Alembic, APScheduler, Postgres 16
## Структура
```text
src/parser_bot/
├── api/ # FastAPI роуты + Pydantic-схемы
├── db/ # SQLAlchemy модели + сессии
├── scheduler/ # APScheduler-воркер периодического опроса
├── telegram/ # Telethon-клиент (resolve, fetch)
├── web/static/ # SPA-странички (HTML/CSS/JS, без бандлера)
├── config.py # pydantic-settings
└── main.py # FastAPI lifespan + uvicorn
alembic/ # миграции
```
## Первый запуск (локально, через Docker)
1. Получить `api_id` и `api_hash` на [my.telegram.org](https://my.telegram.org) → API development tools.
2. Скопировать `.env.example` в `.env` и заполнить `TG_API_ID`, `TG_API_HASH`, `TG_PHONE`.
3. Поднять Postgres + накатить миграции:
```bash
docker compose up -d db
docker compose run --rm app alembic upgrade head
```
4. Запуск:
```bash
docker compose up -d
docker compose logs app --tail=50
```
5. **Авторизация Telegram** — открыть [http://localhost:8000/auth.html](http://localhost:8000/auth.html)
и нажать «Отправить код». Telegram пришлёт код на номер из `TG_PHONE` →
ввести код (и 2FA-пароль, если включён). Готово, парсер начнёт опрос.
Сессия сохраняется в `./data/session/parser.session` — рестарты её переиспользуют,
повторно входить не нужно.
### Админ-доступ и коды подразделов
- `ADMIN_PASSWORD` — дополнительный пароль для админских функций. Если не задан,
остаётся прежний режим: доступ определяется только `ADMIN_ALLOWED_IPS`.
- [http://localhost:8000/admin.html](http://localhost:8000/admin.html) — вход по
админ-паролю. После входа доступны удаление и редактирование подразделов,
просмотр их кодов, управление каналами, ручной опрос, промпты, авторизация
Telegram и Swagger.
- При создании подраздела обязательно задаётся `Код доступа`. Пользователь вводит
этот код при первом открытии данных подраздела; после входа он может добавлять
каналы в этот подраздел. Админ видит код в списке подразделов.
### Прод-вариант: без UI и без volume (k8s-friendly)
Сделай интерактивный логин **один раз** на dev-машине и получи опаковую строку:
```bash
docker compose run --rm -it app python -m parser_bot.auth
```
Скрипт напечатает строку вида `TG_SESSION_STRING=1AbcD...`. Положи её в
`.env` или k8s Secret — после этого приложение поднимается без UI и без
монтирования сессионного файла:
```ini
TG_SESSION_STRING=1AbcDef... # вместо TG_SESSION_PATH/volume
```
> ⚠️ **`ApiIdPublishedFloodError`** — Telegram заблокировал твою пару
> `api_id`/`api_hash` (попала в публичный доступ). Создай **новое** приложение
> на [my.telegram.org](https://my.telegram.org) и не публикуй креды нигде.
> Старый `api_id` восстановить нельзя.
## UI
После запуска доступны страницы:
- [Дашборд](http://localhost:8000/) — общая статистика, топ каналов, кнопка опросить всех
- [Каналы](http://localhost:8000/channels.html) — добавить / удалить / включить-выключить / опросить вручную
- [Сообщения](http://localhost:8000/messages.html) — фильтр по каналу, поиск по тексту, пагинация, raw JSON
- [Настройки](http://localhost:8000/settings.html) — текущая конфигурация и подсказки
- [Авторизация](http://localhost:8000/auth.html) — веб-логин в Telegram (код + 2FA)
- [Swagger UI](http://localhost:8000/docs) — интерактивный API
Глубокая ссылка `messages.html?channel_id=42` открывает ленту конкретного канала.
## API
- `GET /healthz` — health check
- `GET /api/v1/auth/status` — авторизован ли клиент
- `POST /api/v1/auth/send-code` — отправить код на `TG_PHONE`
- `POST /api/v1/auth/submit-code` `{"code": "12345"}` — подтвердить код
- `POST /api/v1/auth/submit-password` `{"password": "..."}` — 2FA-пароль
- `POST /api/v1/auth/logout` — завершить сессию
- `GET /api/v1/stats` — глобальные счётчики
- `GET /api/v1/settings` — read-only вид конфигурации
- `GET /api/v1/channels` — список каналов
- `POST /api/v1/channels` `{"identifier": "@durov"}` — добавить
- `GET /api/v1/channels/{id}` — карточка
- `PATCH /api/v1/channels/{id}` `{"is_active": false}` — включить/выключить
- `DELETE /api/v1/channels/{id}` — удалить
- `GET /api/v1/channels/{id}/stats` — счётчики по каналу
- `POST /api/v1/channels/{id}/poll` — форсировать опрос одного канала
- `POST /api/v1/poll` — форсировать опрос всех активных каналов
- `GET /api/v1/messages?channel_id=...&q=...&limit=50&offset=0` — лента
- `GET /api/v1/messages/{id}` — одно сообщение (с `raw` JSONB)
## Дальше
- **Realtime**: заменить APScheduler на `client.add_event_handler(handler, events.NewMessage)`,
оставив periodic poll как фоновый «доводчик» для пропущенных сообщений.
- **Go-микросервис**: контракт = таблицы `channels` / `messages` в Postgres.
Go-сервис может либо читать ту же БД, либо ходить в `/api/v1/messages`.
- **k8s**: добавить Helm-чарт; `data/session/` маппится на PVC, `.env` — в Secret.

39
alembic.ini Normal file
View File

@@ -0,0 +1,39 @@
[alembic]
script_location = alembic
prepend_sys_path = src
version_path_separator = os
sqlalchemy.url = postgresql+asyncpg://parser:parser@db:5432/parser
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

52
alembic/env.py Normal file
View File

@@ -0,0 +1,52 @@
import asyncio
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
from parser_bot.config import settings
from parser_bot.db.models import Base
config = context.config
config.set_main_option("sqlalchemy.url", settings.database_url)
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
def run_migrations_offline() -> None:
context.configure(
url=settings.database_url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection: Connection) -> None:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
async def run_migrations_online() -> None:
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
if context.is_offline_mode():
run_migrations_offline()
else:
asyncio.run(run_migrations_online())

25
alembic/script.py.mako Normal file
View File

@@ -0,0 +1,25 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,71 @@
"""initial schema: channels + messages
Revision ID: 0001
Revises:
Create Date: 2026-05-05
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"channels",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("tg_id", sa.BigInteger(), nullable=True, unique=True),
sa.Column("identifier", sa.String(length=255), nullable=False, unique=True),
sa.Column("title", sa.String(length=512), nullable=True),
sa.Column("is_active", sa.Boolean(), nullable=False, server_default=sa.text("true")),
sa.Column("last_message_id", sa.BigInteger(), nullable=True),
sa.Column("last_polled_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.create_table(
"messages",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column(
"channel_id",
sa.Integer(),
sa.ForeignKey("channels.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("tg_message_id", sa.BigInteger(), nullable=False),
sa.Column("date", sa.DateTime(timezone=True), nullable=False),
sa.Column("text", sa.Text(), nullable=True),
sa.Column("sender_id", sa.BigInteger(), nullable=True),
sa.Column("has_media", sa.Boolean(), nullable=False, server_default=sa.text("false")),
sa.Column("views", sa.Integer(), nullable=True),
sa.Column("forwards", sa.Integer(), nullable=True),
sa.Column("raw", postgresql.JSONB(), nullable=True),
sa.Column(
"fetched_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.UniqueConstraint("channel_id", "tg_message_id", name="uq_channel_message"),
)
op.create_index(
"ix_messages_channel_date", "messages", ["channel_id", "date"], unique=False
)
def downgrade() -> None:
op.drop_index("ix_messages_channel_date", table_name="messages")
op.drop_table("messages")
op.drop_table("channels")

View File

@@ -0,0 +1,28 @@
"""add media_files JSONB column to messages
Revision ID: 0002
Revises: 0001
Create Date: 2026-05-05
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0002"
down_revision: Union[str, None] = "0001"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"messages",
sa.Column("media_files", postgresql.JSONB(), nullable=True),
)
def downgrade() -> None:
op.drop_column("messages", "media_files")

View File

@@ -0,0 +1,39 @@
"""add grouped_id to messages (Telegram album/media-group key)
Revision ID: 0003
Revises: 0002
Create Date: 2026-05-05
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0003"
down_revision: Union[str, None] = "0002"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("messages", sa.Column("grouped_id", sa.BigInteger(), nullable=True))
op.create_index(
"ix_messages_grouped_id", "messages", ["channel_id", "grouped_id"]
)
# Backfill grouped_id from the stored raw JSONB for existing rows so that
# albums saved before this migration are grouped retroactively.
op.execute(
"""
UPDATE messages
SET grouped_id = (raw->>'grouped_id')::bigint
WHERE grouped_id IS NULL
AND raw IS NOT NULL
AND raw->>'grouped_id' IS NOT NULL
"""
)
def downgrade() -> None:
op.drop_index("ix_messages_grouped_id", table_name="messages")
op.drop_column("messages", "grouped_id")

View File

@@ -0,0 +1,34 @@
"""add extracted JSONB column to messages
Revision ID: 0004
Revises: 0003
Create Date: 2026-05-05
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0004"
down_revision: Union[str, None] = "0003"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"messages",
sa.Column("extracted", postgresql.JSONB(), nullable=True),
)
# GIN index for json queries (e.g. filter by extracted->'real_estate'->>'kind').
op.execute(
"CREATE INDEX IF NOT EXISTS ix_messages_extracted_gin "
"ON messages USING GIN (extracted)"
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_messages_extracted_gin")
op.drop_column("messages", "extracted")

View File

@@ -0,0 +1,30 @@
"""add sender_username and sender_name to messages
Revision ID: 0005
Revises: 0004
Create Date: 2026-05-06
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0005"
down_revision: Union[str, None] = "0004"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"messages", sa.Column("sender_username", sa.String(length=64), nullable=True)
)
op.add_column(
"messages", sa.Column("sender_name", sa.String(length=255), nullable=True)
)
def downgrade() -> None:
op.drop_column("messages", "sender_name")
op.drop_column("messages", "sender_username")

View File

@@ -0,0 +1,35 @@
"""key/value store for runtime-editable settings (LLM prompt, etc.)
Revision ID: 0006
Revises: 0005
Create Date: 2026-05-06
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0006"
down_revision: Union[str, None] = "0005"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"app_settings",
sa.Column("key", sa.String(length=64), primary_key=True),
sa.Column("value", postgresql.JSONB(), nullable=False),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
def downgrade() -> None:
op.drop_table("app_settings")

View File

@@ -0,0 +1,37 @@
"""split channels into two verticals: real_estate / hr
Existing rows get `real_estate` per the migration decision — the service was
real-estate-only before this column existed.
Revision ID: 0007
Revises: 0006
Create Date: 2026-05-19
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0007"
down_revision: Union[str, None] = "0006"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"channels",
sa.Column(
"vertical",
sa.String(length=32),
nullable=False,
server_default="real_estate",
),
)
op.create_index("ix_channels_vertical", "channels", ["vertical"])
def downgrade() -> None:
op.drop_index("ix_channels_vertical", table_name="channels")
op.drop_column("channels", "vertical")

View File

@@ -0,0 +1,110 @@
"""sub-sections inside each vertical (e.g. Real Estate → Dubai / Moscow)
A channel now belongs to exactly one section, and each section to exactly
one vertical. The migration auto-creates a `Общий` section per vertical
that has at least one channel and pins all existing channels there, so the
service keeps working without manual reclassification after upgrade.
Revision ID: 0008
Revises: 0007
Create Date: 2026-05-20
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0008"
down_revision: Union[str, None] = "0007"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"sections",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("vertical", sa.String(length=32), nullable=False),
sa.Column("slug", sa.String(length=64), nullable=False),
sa.Column("title", sa.String(length=255), nullable=False),
sa.Column("emoji", sa.String(length=8), nullable=True),
sa.Column("description", sa.Text(), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.UniqueConstraint("vertical", "slug", name="uq_section_vertical_slug"),
)
op.create_index("ix_sections_vertical", "sections", ["vertical"])
# Auto-create a `default` section for each vertical that already has channels,
# so the backfill below has somewhere to point.
op.execute(
"""
INSERT INTO sections (vertical, slug, title, emoji)
SELECT DISTINCT c.vertical,
'default',
CASE c.vertical
WHEN 'hr' THEN 'Общий HR'
ELSE 'Общий'
END,
CASE c.vertical WHEN 'hr' THEN '👥' ELSE '🏠' END
FROM channels c
ON CONFLICT (vertical, slug) DO NOTHING
"""
)
# Add nullable section_id first so the backfill can populate it.
op.add_column(
"channels",
sa.Column("section_id", sa.Integer(), nullable=True),
)
op.create_foreign_key(
"fk_channels_section",
"channels",
"sections",
["section_id"],
["id"],
ondelete="RESTRICT",
)
op.create_index("ix_channels_section_id", "channels", ["section_id"])
op.execute(
"""
UPDATE channels c
SET section_id = s.id
FROM sections s
WHERE s.vertical = c.vertical AND s.slug = 'default'
"""
)
# Now we can safely require section_id.
op.alter_column("channels", "section_id", nullable=False)
# Per-section LLM prompt keys are longer than 64 chars
# (`llm_system_prompt:real_estate:some-long-slug`), so widen the key column.
op.alter_column(
"app_settings",
"key",
existing_type=sa.String(length=64),
type_=sa.String(length=128),
existing_nullable=False,
)
def downgrade() -> None:
op.alter_column(
"app_settings",
"key",
existing_type=sa.String(length=128),
type_=sa.String(length=64),
existing_nullable=False,
)
op.drop_index("ix_channels_section_id", table_name="channels")
op.drop_constraint("fk_channels_section", "channels", type_="foreignkey")
op.drop_column("channels", "section_id")
op.drop_index("ix_sections_vertical", table_name="sections")
op.drop_table("sections")

View File

@@ -0,0 +1,24 @@
"""add access code to sections
Revision ID: 0009
Revises: 0008
Create Date: 2026-05-29
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0009"
down_revision: Union[str, None] = "0008"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("sections", sa.Column("access_code", sa.String(length=255), nullable=True))
def downgrade() -> None:
op.drop_column("sections", "access_code")

64
docker-compose.yml Normal file
View File

@@ -0,0 +1,64 @@
services:
ollama:
image: ollama/ollama:latest
environment:
OLLAMA_HOST: 0.0.0.0:11434
OLLAMA_KEEP_ALIVE: 24h
OLLAMA_NUM_PARALLEL: "1"
OLLAMA_NUM_THREAD: "8"
volumes:
- ./data/ollama:/root/.ollama
ports:
- "11434:11434"
healthcheck:
test: ["CMD", "ollama", "list"]
interval: 10s
timeout: 5s
retries: 30
restart: unless-stopped
ollama-pull:
image: ollama/ollama:latest
depends_on:
ollama:
condition: service_healthy
environment:
OLLAMA_HOST: ollama:11434
entrypoint: ["/bin/sh", "-c"]
command: ["ollama list | grep -q qwen2.5:7b-instruct-q4_K_M || ollama pull qwen2.5:7b-instruct-q4_K_M"]
restart: "no"
db:
image: postgres:16-alpine
environment:
POSTGRES_USER: ${POSTGRES_USER:-parser}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-parser}
POSTGRES_DB: ${POSTGRES_DB:-parser}
ports:
- "5432:5432"
volumes:
- pgdata:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-parser}"]
interval: 5s
timeout: 5s
retries: 10
app:
build: .
env_file: .env
depends_on:
db:
condition: service_healthy
ollama:
condition: service_healthy
ports:
- "80:8000"
volumes:
- ./data/session:/data/session
- ./data/media:/data/media
- ./src:/app/src
- ./alembic:/app/alembic
volumes:
pgdata:

16
docker/entrypoint.sh Normal file
View File

@@ -0,0 +1,16 @@
#!/bin/sh
set -e
# Run migrations on every container start. Idempotent: alembic skips
# revisions already applied. Skipped for one-shot commands like `alembic`
# itself (would deadlock when explicitly invoked) and for the auth helper.
case "$1" in
alembic|python\ -m\ parser_bot.auth|/bin/sh|sh|bash)
exec "$@"
;;
esac
echo "[entrypoint] running alembic upgrade head"
alembic upgrade head
exec "$@"

20
k8s/configmap.yaml Normal file
View File

@@ -0,0 +1,20 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: monitoring-tg-config
namespace: monitoring-tg
data:
API_HOST: "0.0.0.0"
API_PORT: "8000"
PUBLIC_BASE_PATH: "/api/monitoring-tg"
POSTGRES_HOST: "postgres.monitoring-tg.svc.cluster.local"
POSTGRES_PORT: "5432"
POSTGRES_USER: "parser"
POSTGRES_DB: "parser"
TG_SESSION_PATH: "/data/session/parser.session"
MEDIA_DIR: "/data/media"
POLL_INTERVAL_SECONDS: "60"
POLL_HISTORY_LIMIT: "50"
LLM_ENABLED: "1"
LLM_BASE_URL: "http://ollama.ollama.svc.cluster.local:11434"
LLM_MODEL: "qwen2.5:7b-instruct-q4_K_M"

12
k8s/kustomization.yaml Normal file
View File

@@ -0,0 +1,12 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring-tg
resources:
- namespace.yaml
- configmap.yaml
- secrets.yaml
- postgres.yaml
- server-deployment.yaml
- server-service.yaml

4
k8s/namespace.yaml Normal file
View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: monitoring-tg

65
k8s/postgres.yaml Normal file
View File

@@ -0,0 +1,65 @@
apiVersion: v1
kind: Service
metadata:
name: postgres
namespace: monitoring-tg
spec:
selector:
app: postgres
ports:
- port: 5432
targetPort: 5432
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: postgres
namespace: monitoring-tg
spec:
serviceName: postgres
replicas: 1
selector:
matchLabels:
app: postgres
template:
metadata:
labels:
app: postgres
spec:
containers:
- name: postgres
image: postgres:16-alpine
ports:
- containerPort: 5432
envFrom:
- secretRef:
name: postgres-secret
volumeMounts:
- name: pgdata
mountPath: /var/lib/postgresql/data
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
exec:
command: ["pg_isready", "-U", "parser", "-d", "parser"]
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
exec:
command: ["pg_isready", "-U", "parser", "-d", "parser"]
initialDelaySeconds: 5
periodSeconds: 5
volumeClaimTemplates:
- metadata:
name: pgdata
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: local-path
resources:
requests:
storage: 5Gi

25
k8s/secrets.yaml Normal file
View File

@@ -0,0 +1,25 @@
apiVersion: v1
kind: Secret
metadata:
name: monitoring-tg-secrets
namespace: monitoring-tg
type: Opaque
stringData:
TG_API_ID: "0"
TG_API_HASH: "CHANGE_ME"
TG_PHONE: "CHANGE_ME"
TG_SESSION_STRING: ""
POSTGRES_PASSWORD: "parser"
ADMIN_ALLOWED_IPS: ""
ADMIN_PASSWORD: "CHANGE_ME"
---
apiVersion: v1
kind: Secret
metadata:
name: postgres-secret
namespace: monitoring-tg
type: Opaque
stringData:
POSTGRES_USER: "parser"
POSTGRES_PASSWORD: "parser"
POSTGRES_DB: "parser"

View File

@@ -0,0 +1,70 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: monitoring-tg-data
namespace: monitoring-tg
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: local-path
resources:
requests:
storage: 10Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: monitoring-tg-server
namespace: monitoring-tg
spec:
replicas: 1
selector:
matchLabels:
app: monitoring-tg-server
template:
metadata:
labels:
app: monitoring-tg-server
spec:
terminationGracePeriodSeconds: 20
securityContext:
fsGroup: 1000
containers:
- name: monitoring-tg-server
image: localhost:30300/admin/monitoring-tg-server:latest
ports:
- containerPort: 8000
envFrom:
- configMapRef:
name: monitoring-tg-config
- secretRef:
name: monitoring-tg-secrets
volumeMounts:
- name: app-data
mountPath: /data
startupProbe:
httpGet:
path: /healthz
port: 8000
periodSeconds: 5
failureThreshold: 30
livenessProbe:
httpGet:
path: /healthz
port: 8000
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 8000
periodSeconds: 5
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 800m
memory: 1Gi
volumes:
- name: app-data
persistentVolumeClaim:
claimName: monitoring-tg-data

18
k8s/server-service.yaml Normal file
View File

@@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
name: monitoring-tg-server
namespace: monitoring-tg
annotations:
portal.estateliga.work/enabled: "true"
portal.estateliga.work/name: "Мониторинг TG"
portal.estateliga.work/description: "Парсер и анализ Telegram-каналов"
portal.estateliga.work/icon: "pulse"
portal.estateliga.work/path: "/api/monitoring-tg"
portal.estateliga.work/code: "monitoring_tg"
spec:
selector:
app: monitoring-tg-server
ports:
- port: 80
targetPort: 8000

44
pyproject.toml Normal file
View File

@@ -0,0 +1,44 @@
[project]
name = "parser-tg-bot"
version = "0.1.0"
description = "Telegram channel parser — periodic polling + storage, future Go microservice"
requires-python = ">=3.11"
dependencies = [
"telethon>=1.36",
"fastapi>=0.115",
"uvicorn[standard]>=0.32",
"sqlalchemy[asyncio]>=2.0",
"asyncpg>=0.30",
"alembic>=1.14",
"apscheduler>=3.10",
"pydantic>=2.9",
"pydantic-settings>=2.6",
"python-dotenv>=1.0",
"structlog>=24.4",
"httpx>=0.27",
]
[project.optional-dependencies]
dev = [
"ruff>=0.7",
"mypy>=1.13",
"pytest>=8.3",
"pytest-asyncio>=0.24",
]
[build-system]
requires = ["setuptools>=68"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.package-data]
"parser_bot.web" = ["static/*", "static/**/*"]
[tool.ruff]
line-length = 100
target-version = "py311"
[tool.pytest.ini_options]
asyncio_mode = "auto"

View File

116
src/parser_bot/access.py Normal file
View File

@@ -0,0 +1,116 @@
"""Admin access helpers for admin-only surfaces (auth, OpenAPI docs).
Resolution:
1. If `ADMIN_ALLOWED_IPS` is empty → no network restriction.
2. Otherwise the request's client IP must be in the allowlist.
3. When `TRUST_PROXY_HEADERS=true` (default) and one of the proxy headers
is present, the first IP in `X-Forwarded-For` (or `X-Real-IP`) is used.
Without this, behind a Docker port-forward the source IP is always the
gateway, which is useless for ACLs.
4. If `ADMIN_PASSWORD` is set, the request must also present a valid signed
admin cookie or the password in `X-Admin-Password`.
"""
from __future__ import annotations
import hashlib
import hmac
import secrets
from fastapi import HTTPException, Request, Response
from parser_bot.config import settings
ADMIN_COOKIE = "parser_admin"
_ADMIN_TOKEN_MESSAGE = b"parser-tg-bot-admin-v1"
def client_ip(request: Request) -> str:
"""Best-effort source IP of the request."""
if settings.trust_proxy_headers:
xff = request.headers.get("x-forwarded-for")
if xff:
# Standard form: "client, proxy1, proxy2" — first is closest to user.
return xff.split(",")[0].strip()
real = request.headers.get("x-real-ip")
if real:
return real.strip()
return request.client.host if request.client else "0.0.0.0"
def is_admin_network_allowed(request: Request) -> bool:
allowed = settings.admin_ip_set
if not allowed:
return True
return client_ip(request) in allowed
def admin_password_enabled() -> bool:
return bool(settings.admin_password)
def verify_admin_password(password: str | None) -> bool:
if not settings.admin_password:
return True
if password is None:
return False
return secrets.compare_digest(password, settings.admin_password)
def admin_token() -> str:
return hmac.new(
settings.admin_password.encode("utf-8"),
_ADMIN_TOKEN_MESSAGE,
hashlib.sha256,
).hexdigest()
def verify_admin_token(token: str | None) -> bool:
if not settings.admin_password:
return True
if token is None:
return False
return secrets.compare_digest(token, admin_token())
def set_admin_cookie(response: Response) -> None:
response.set_cookie(
ADMIN_COOKIE,
admin_token(),
httponly=True,
samesite="lax",
secure=False,
max_age=60 * 60 * 24 * 30,
)
def clear_admin_cookie(response: Response) -> None:
response.delete_cookie(ADMIN_COOKIE)
def is_admin_request(request: Request) -> bool:
if not is_admin_network_allowed(request):
return False
if not settings.admin_password:
return True
return verify_admin_token(request.cookies.get(ADMIN_COOKIE)) or verify_admin_password(
request.headers.get("x-admin-password")
)
def require_admin_network(request: Request) -> None:
"""FastAPI dependency for the admin login page/API.
This keeps the IP allowlist useful even before the password cookie exists.
"""
if not is_admin_network_allowed(request):
raise HTTPException(status_code=404)
def require_admin(request: Request) -> None:
"""FastAPI dependency: 404 for non-admins.
Admin endpoints keep returning 404 instead of 403 to avoid advertising
their existence to clients outside the admin boundary.
"""
if not is_admin_request(request):
raise HTTPException(status_code=404)

View File

1048
src/parser_bot/api/routes.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,231 @@
import re
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, ConfigDict, Field, field_validator
Vertical = Literal["real_estate", "hr"]
# Section slugs are used as URL segments — keep them URL-safe.
_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,62}[a-z0-9]$|^[a-z0-9]$")
class SectionCreate(BaseModel):
vertical: Vertical
slug: str = Field(..., min_length=1, max_length=64)
title: str = Field(..., min_length=1, max_length=255)
emoji: str | None = Field(None, max_length=8)
description: str | None = None
access_code: str = Field(..., min_length=3, max_length=255)
@field_validator("slug")
@classmethod
def _check_slug(cls, v: str) -> str:
if not _SLUG_RE.match(v):
raise ValueError(
"slug must be lowercase letters/digits with '-' or '_' separators"
)
return v
class SectionUpdate(BaseModel):
title: str | None = Field(None, min_length=1, max_length=255)
emoji: str | None = Field(None, max_length=8)
description: str | None = None
access_code: str | None = Field(None, min_length=3, max_length=255)
class SectionOut(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: int
vertical: Vertical
slug: str
title: str
emoji: str | None
description: str | None
access_code: str | None = None
created_at: datetime
class SectionWithStats(SectionOut):
"""Section payload enriched with rollup counts for the section chooser page."""
channels_total: int = 0
channels_active: int = 0
messages_total: int = 0
leads_total: int = 0
class ChannelCreate(BaseModel):
identifier: str = Field(..., min_length=1, max_length=255, description="@username or t.me link")
vertical: Vertical = "real_estate"
section: str = Field(
..., min_length=1, max_length=64,
description="Slug of the section inside the vertical (e.g. 'dubai')",
)
class ChannelUpdate(BaseModel):
is_active: bool | None = None
vertical: Vertical | None = None
section: str | None = Field(
None, min_length=1, max_length=64,
description="Move the channel to another section in the same vertical",
)
class ChannelOut(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: int
tg_id: int | None
identifier: str
title: str | None
vertical: Vertical
section_id: int
section_slug: str | None = None
is_active: bool
last_message_id: int | None
last_polled_at: datetime | None
created_at: datetime
class ChannelStats(BaseModel):
channel_id: int
identifier: str
title: str | None
vertical: Vertical
section_slug: str | None = None
is_active: bool
last_polled_at: datetime | None
message_count: int
last_message_at: datetime | None
class MediaFile(BaseModel):
kind: str # photo | video | document | audio | sticker | unknown
url: str | None = None
mime: str | None = None
size: int | None = None
skipped: str | None = None # set when not downloaded (e.g. "too_large")
class RealEstate(BaseModel):
kind: str | None = None
property_type: str | None = None
rooms: str | None = None
area_m2: float | None = None
price: str | None = None
class Lead(BaseModel):
is_listing: bool
kind: str | None = None # sale | rent | purchase
property_type: str | None = None
rooms: str | None = None
area_m2: float | None = None
price_text: str | None = None
price_value: float | None = None
currency: str | None = None # RUB | USD | EUR | AED | GBP | CNY | TRY | KZT | BYN | UAH
location: str | None = None
contact_phone: str | None = None
contact_name: str | None = None
summary: str | None = None
confidence: float = 0.0
class HrLead(BaseModel):
"""LLM verdict for HR-vertical messages (jobs / resumes / bare contacts)."""
is_lead: bool
kind: str | None = None # vacancy | resume | contact
title: str | None = None
company: str | None = None
candidate_name: str | None = None
experience_years: float | None = None
skills: list[str] = []
location: str | None = None
remote: bool | None = None
employment_type: str | None = None
salary_text: str | None = None
salary_value: float | None = None
currency: str | None = None
contact_phone: str | None = None
contact_name: str | None = None
summary: str | None = None
confidence: float = 0.0
class Extracted(BaseModel):
phones: list[str] = []
names: list[str] = []
tg_handles: list[str] = []
real_estate: RealEstate | None = None
lead: Lead | None = None
hr_lead: HrLead | None = None
class MessageOut(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: int
channel_id: int
channel_vertical: Vertical | None = None
channel_section_slug: str | None = None
tg_message_id: int
grouped_id: int | None = None
group_size: int = 1
date: datetime
text: str | None
sender_id: int | None
has_media: bool
media_files: list[MediaFile] | None = None
extracted: Extracted | None = None
sender_username: str | None = None
sender_name: str | None = None
post_url: str | None = None
views: int | None
forwards: int | None
fetched_at: datetime
class GlobalStats(BaseModel):
vertical: Vertical
section_slug: str | None = None
channels_total: int
channels_active: int
messages_total: int
messages_last_24h: int
leads_total: int = 0
leads_last_24h: int = 0
poll_interval_seconds: int
last_poll_at: datetime | None
class AuthStatus(BaseModel):
authorized: bool
username: str | None = None
phone: str | None = None
class AuthCode(BaseModel):
code: str = Field(..., min_length=3, max_length=12)
class AuthPassword(BaseModel):
password: str = Field(..., min_length=1)
class AuthCodeResult(BaseModel):
needs_password: bool
class AdminLogin(BaseModel):
password: str = Field(..., min_length=1)
class SectionLogin(BaseModel):
vertical: Vertical
section: str = Field(..., min_length=1, max_length=64)
code: str = Field(..., min_length=1, max_length=255)

51
src/parser_bot/auth.py Normal file
View File

@@ -0,0 +1,51 @@
"""Interactive Telethon login. Run once on a dev machine, copy the printed
TG_SESSION_STRING into your .env / k8s Secret, then deploy without ever
touching auth again.
Usage:
docker compose run --rm -it app python -m parser_bot.auth
Telegram requires interactive code entry only for the very first login;
the resulting StringSession can be reused on any host until you log out
or someone invalidates the session in Telegram settings.
"""
import asyncio
import sys
from telethon import TelegramClient
from telethon.sessions import StringSession
from parser_bot.config import settings
async def main() -> int:
if not sys.stdin.isatty():
print(
"ERROR: not a TTY. Re-run with: "
"docker compose run --rm -it app python -m parser_bot.auth",
file=sys.stderr,
)
return 2
client = TelegramClient(StringSession(), settings.tg_api_id, settings.tg_api_hash)
await client.start(phone=settings.tg_phone)
me = await client.get_me()
session_str = client.session.save()
await client.disconnect()
print()
print(f"authorized as {me.username or me.id}")
print()
print("Add this line to your .env (or k8s Secret) and never share it:")
print()
print(f"TG_SESSION_STRING={session_str}")
print()
print(
"After saving, no further interactive auth is needed. Restarts, rebuilds,"
" redeploys all reuse this string."
)
return 0
if __name__ == "__main__":
sys.exit(asyncio.run(main()))

64
src/parser_bot/config.py Normal file
View File

@@ -0,0 +1,64 @@
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
tg_api_id: int = Field(..., alias="TG_API_ID")
tg_api_hash: str = Field(..., alias="TG_API_HASH")
tg_phone: str = Field(..., alias="TG_PHONE")
tg_session_path: str = Field("/data/session/parser.session", alias="TG_SESSION_PATH")
# Preferred for prod / k8s: opaque base64-ish string from `python -m parser_bot.auth`.
# If set, takes priority over file-based session.
tg_session_string: str | None = Field(None, alias="TG_SESSION_STRING")
postgres_user: str = Field("parser", alias="POSTGRES_USER")
postgres_password: str = Field("parser", alias="POSTGRES_PASSWORD")
postgres_db: str = Field("parser", alias="POSTGRES_DB")
postgres_host: str = Field("db", alias="POSTGRES_HOST")
postgres_port: int = Field(5432, alias="POSTGRES_PORT")
poll_interval_seconds: int = Field(60, alias="POLL_INTERVAL_SECONDS")
poll_history_limit: int = Field(50, alias="POLL_HISTORY_LIMIT")
api_host: str = Field("0.0.0.0", alias="API_HOST")
api_port: int = Field(8000, alias="API_PORT")
public_base_path: str = Field("", alias="PUBLIC_BASE_PATH")
media_dir: str = Field("/data/media", alias="MEDIA_DIR")
media_max_bytes: int = Field(20 * 1024 * 1024, alias="MEDIA_MAX_BYTES")
# Local LLM via Ollama for lead classification & extraction
llm_enabled: bool = Field(True, alias="LLM_ENABLED")
llm_base_url: str = Field("http://ollama:11434", alias="LLM_BASE_URL")
llm_model: str = Field("qwen2.5:7b-instruct-q4_K_M", alias="LLM_MODEL")
llm_timeout_seconds: int = Field(120, alias="LLM_TIMEOUT_SECONDS")
llm_min_text_length: int = Field(20, alias="LLM_MIN_TEXT_LENGTH")
llm_classify_interval_seconds: int = Field(20, alias="LLM_CLASSIFY_INTERVAL_SECONDS")
llm_classify_batch_size: int = Field(5, alias="LLM_CLASSIFY_BATCH_SIZE")
# Admin allowlist for /auth.html, /docs, /openapi.json, /redoc and the
# /auth/* API endpoints. Comma-separated IPv4/IPv6. Empty (default) means
# no restriction — convenient for local dev. Set explicitly in prod.
admin_allowed_ips: str = Field("", alias="ADMIN_ALLOWED_IPS")
# Optional second factor for admin-only UI/API operations. Empty keeps the
# previous IP-only behavior for local/dev deployments.
admin_password: str = Field("", alias="ADMIN_PASSWORD")
# When true, honor X-Forwarded-For / X-Real-IP set by a reverse proxy
# in front of uvicorn (Docker port-forward, nginx, traefik, etc).
trust_proxy_headers: bool = Field(True, alias="TRUST_PROXY_HEADERS")
@property
def admin_ip_set(self) -> set[str]:
return {s.strip() for s in self.admin_allowed_ips.split(",") if s.strip()}
@property
def database_url(self) -> str:
return (
f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}"
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
)
settings = Settings()

View File

119
src/parser_bot/db/models.py Normal file
View File

@@ -0,0 +1,119 @@
from datetime import datetime
from sqlalchemy import (
BigInteger,
DateTime,
ForeignKey,
Index,
String,
Text,
UniqueConstraint,
func,
)
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
class Base(DeclarativeBase):
pass
class Section(Base):
"""A sub-section inside a vertical, e.g. ('real_estate', 'dubai').
The pair (vertical, slug) is unique and identifies a section in URLs
and API calls. A channel belongs to exactly one section, the section
knows its vertical, and the LLM prompt store can hold a per-section
override that falls back to the vertical-level prompt.
"""
__tablename__ = "sections"
__table_args__ = (
UniqueConstraint("vertical", "slug", name="uq_section_vertical_slug"),
Index("ix_sections_vertical", "vertical"),
)
id: Mapped[int] = mapped_column(primary_key=True)
vertical: Mapped[str] = mapped_column(String(32))
slug: Mapped[str] = mapped_column(String(64))
title: Mapped[str] = mapped_column(String(255))
emoji: Mapped[str | None] = mapped_column(String(8), nullable=True)
description: Mapped[str | None] = mapped_column(Text, nullable=True)
access_code: Mapped[str | None] = mapped_column(String(255), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
channels: Mapped[list["Channel"]] = relationship(back_populates="section")
class Channel(Base):
__tablename__ = "channels"
id: Mapped[int] = mapped_column(primary_key=True)
# Telegram numeric channel id (peer id), nullable until first resolve
tg_id: Mapped[int | None] = mapped_column(BigInteger, unique=True, nullable=True)
# Username or t.me/joinchat link supplied by user
identifier: Mapped[str] = mapped_column(String(255), unique=True)
title: Mapped[str | None] = mapped_column(String(512), nullable=True)
# 'real_estate' or 'hr' — picks which LLM prompt and lead schema is used
vertical: Mapped[str] = mapped_column(
String(32), default="real_estate", server_default="real_estate", index=True
)
section_id: Mapped[int] = mapped_column(
ForeignKey("sections.id", ondelete="RESTRICT"), index=True
)
is_active: Mapped[bool] = mapped_column(default=True, server_default="true")
last_message_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
last_polled_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
section: Mapped[Section] = relationship(back_populates="channels")
messages: Mapped[list["Message"]] = relationship(
back_populates="channel",
cascade="all, delete-orphan",
passive_deletes=True,
)
class Message(Base):
__tablename__ = "messages"
__table_args__ = (
UniqueConstraint("channel_id", "tg_message_id", name="uq_channel_message"),
Index("ix_messages_channel_date", "channel_id", "date"),
)
id: Mapped[int] = mapped_column(primary_key=True)
channel_id: Mapped[int] = mapped_column(ForeignKey("channels.id", ondelete="CASCADE"))
tg_message_id: Mapped[int] = mapped_column(BigInteger)
date: Mapped[datetime] = mapped_column(DateTime(timezone=True))
text: Mapped[str | None] = mapped_column(Text, nullable=True)
sender_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
sender_username: Mapped[str | None] = mapped_column(String(64), nullable=True)
sender_name: Mapped[str | None] = mapped_column(String(255), nullable=True)
grouped_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
has_media: Mapped[bool] = mapped_column(default=False, server_default="false")
views: Mapped[int | None] = mapped_column(nullable=True)
forwards: Mapped[int | None] = mapped_column(nullable=True)
raw: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
media_files: Mapped[list | None] = mapped_column(JSONB, nullable=True)
extracted: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
fetched_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
channel: Mapped[Channel] = relationship(back_populates="messages")
class AppSetting(Base):
"""Runtime-editable settings, edited from the UI without a restart."""
__tablename__ = "app_settings"
key: Mapped[str] = mapped_column(String(128), primary_key=True)
value: Mapped[dict | str | int | bool | None] = mapped_column(JSONB, nullable=False)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)

View File

@@ -0,0 +1,25 @@
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from parser_bot.config import settings
engine = create_async_engine(settings.database_url, pool_pre_ping=True)
SessionFactory = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
@asynccontextmanager
async def session_scope() -> AsyncIterator[AsyncSession]:
async with SessionFactory() as session:
try:
yield session
await session.commit()
except Exception:
await session.rollback()
raise
async def get_session() -> AsyncIterator[AsyncSession]:
async with SessionFactory() as session:
yield session

View File

@@ -0,0 +1,334 @@
"""Heuristic extractors for Telegram message text.
Russian-first, regex/keyword based, no ML deps. Goal is to surface signals for
the UI: phone numbers, person names (FIO), and real-estate intent (sale/rent/
purchase). False positives are tolerable — operator triages in the UI.
Output shape (used as JSONB in messages.extracted):
{
"phones": ["+79123456789", ...],
"names": ["Иван Петров", ...],
"real_estate": {
"kind": "sale" | "rent" | "purchase" | null,
"property_type": str | null, # квартира, дом, ...
"rooms": str | null, # "2-к"
"area_m2": float | null,
"price": str | null, # raw matched string
} | null
}
"""
from __future__ import annotations
import re
from typing import Any
# --- Telegram @handles ---------------------------------------------------
# Plain @username — Telegram allows 532 chars, letters/digits/_, no leading digit.
_TG_HANDLE_RE = re.compile(r"(?<![\w/])@([A-Za-z][A-Za-z0-9_]{4,31})\b")
# t.me / telegram.me links to a user/channel handle (not joinchat / +invite).
_TG_LINK_RE = re.compile(
r"(?:https?://)?(?:t|telegram)\.me/(?!joinchat/|\+)([A-Za-z][A-Za-z0-9_]{4,31})\b"
)
def extract_tg_handles(text: str | None) -> list[str]:
if not text:
return []
out: list[str] = []
seen: set[str] = set()
for h in _TG_HANDLE_RE.findall(text):
key = h.lower()
if key in seen:
continue
seen.add(key)
out.append("@" + h)
for h in _TG_LINK_RE.findall(text):
key = h.lower()
if key in seen:
continue
seen.add(key)
out.append("@" + h)
return out
# --- Phones --------------------------------------------------------------
# Russian-format: starts with +7, 7, or 8 (no plus), 11 digits total.
_PHONE_RU_RE = re.compile(
r"(?<!\d)(?:\+?7|8)[\s\-().]*\d{3}[\s\-().]*\d{3}[\s\-().]*\d{2}[\s\-().]*\d{2}(?!\d)"
)
# International format: starts with `+<country code>` then 714 more digits
# with optional separators. Catches +971 (UAE), +1 (US), +44 (UK), etc.
_PHONE_INTL_RE = re.compile(
r"(?<![\w\d])\+\d{1,3}[\s\-().]*(?:\d[\s\-().]*){6,14}\d(?!\d)"
)
def extract_phones(text: str | None) -> list[str]:
if not text:
return []
out: list[str] = []
seen: set[str] = set()
# Pass 1: Russian-style. Normalize to +7XXXXXXXXXX.
for raw in _PHONE_RU_RE.findall(text):
digits = re.sub(r"\D", "", raw)
if len(digits) == 11 and digits[0] in "78":
normalized = "+7" + digits[1:]
elif len(digits) == 10:
normalized = "+7" + digits
else:
continue
if normalized not in seen:
seen.add(normalized)
out.append(normalized)
# Pass 2: international "+<country>...". Keep raw plus-prefix; just
# collapse separators so the result is +<digits>.
for raw in _PHONE_INTL_RE.findall(text):
digits = re.sub(r"\D", "", raw)
if not (8 <= len(digits) <= 15):
continue
normalized = "+" + digits
# If it normalized to something we already captured (e.g. +7 number
# picked up by both passes), skip.
if normalized in seen:
continue
seen.add(normalized)
out.append(normalized)
return out
# --- Names (ФИО) ---------------------------------------------------------
# Two or three capitalized Cyrillic tokens in a row. Allows hyphens (Иванов-Петров).
_NAME_RE = re.compile(
r"\b([А-ЯЁ][а-яё]+(?:\-[А-ЯЁ][а-яё]+)?(?:\s+[А-ЯЁ][а-яё]+(?:\-[А-ЯЁ][а-яё]+)?){1,2})\b"
)
# Common false positives — geo/places/orgs/etc. Skip exact matches.
_NAME_BLOCKLIST = {
"Российская Федерация",
"Санкт Петербург",
"Санкт-Петербург",
"Нижний Новгород",
"Великий Новгород",
"Ростов На Дону",
"Ростов-На-Дону",
"Москва Сити",
"Красная Площадь",
"Чёрное Море",
"Чёрного Моря",
"Без Депозита",
"Без Залога",
"Без Комиссии",
"Сдам Квартиру",
"Продам Квартиру",
"Куплю Квартиру",
"Сдам Студию",
"Продам Студию",
}
# Words that look like names but rarely are (months, weekdays, common nouns).
_NAME_TOKEN_BLOCK = {
"Январь", "Февраль", "Март", "Апрель", "Май", "Июнь",
"Июль", "Август", "Сентябрь", "Октябрь", "Ноябрь", "Декабрь",
"Понедельник", "Вторник", "Среда", "Четверг", "Пятница", "Суббота", "Воскресенье",
"Москва", "Питер", "Россия", "Кремль", "Метро",
}
def extract_names(text: str | None) -> list[str]:
if not text:
return []
out: list[str] = []
seen: set[str] = set()
for match in _NAME_RE.findall(text):
candidate = match.strip()
if candidate in _NAME_BLOCKLIST:
continue
tokens = re.split(r"[\s\-]+", candidate)
if any(t in _NAME_TOKEN_BLOCK for t in tokens):
continue
# Heuristic: at least one token must have len >= 4 (rules out "Ул.")
if not any(len(t) >= 4 for t in tokens):
continue
if candidate not in seen:
seen.add(candidate)
out.append(candidate)
return out
# --- Real estate ---------------------------------------------------------
_DEAL_KEYWORDS: dict[str, tuple[str, ...]] = {
"rent": (
# ru
"сдаётся", "сдается", "сдаю", "сдадим", "сдам", "сдаём",
"аренда", "арендую", "арендуем", "снять",
"посуточно", "помесячно",
# en
"for rent", "to let", "rental", "renting", "lease", "leasing",
"per year", "per month", "/year", "/month", "/mo",
),
"sale": (
# ru
"продаётся", "продается", "продаю", "продадим", "продам", "продаём",
"продажа", "к продаже",
# en
"for sale", "#forsale", "selling", "selling price", "sale price",
),
"purchase": (
# ru
"куплю", "купим", "покупаю", "покупка", "ищу квартиру",
"ищу дом", "ищем квартиру", "рассматриваю покупку",
# en
"looking for", "want to buy", "wanted", "requirement", "wtb",
),
}
_PROPERTY_TYPES: tuple[tuple[str, str], ...] = (
# ru
("квартир", "квартира"),
("студи", "студия"),
("апартамент", "апартаменты"),
("комнат", "комната"),
("таунхаус", "таунхаус"),
("коттедж", "коттедж"),
("дача", "дача"),
("дом", "дом"),
("офис", "офис"),
("склад", "склад"),
("помещен", "помещение"),
("земельн", "земельный участок"),
("участок", "участок"),
("гараж", "гараж"),
("машиномест", "машиноместо"),
# en — kept as Russian labels for UI consistency
("villa", "дом"),
("townhouse", "таунхаус"),
("penthouse", "апартаменты"),
("apartment", "квартира"),
("studio", "студия"),
("plot", "участок"),
(" land ", "участок"),
("office", "офис"),
("warehouse", "склад"),
("retail", "помещение"),
("garage", "гараж"),
)
_AREA_M2_RE = re.compile(
r"(\d[\d\s,]*\d|\d)\s*(?:м[²2]|кв\.?\s*м|кв\.\s*метр)",
re.IGNORECASE,
)
_AREA_SQFT_RE = re.compile(
r"(\d[\d\s,]*\d|\d)\s*(?:sqft|sq\.?\s*ft|sq\s+ft|square\s+feet)",
re.IGNORECASE,
)
def _parse_number(s: str) -> float | None:
cleaned = s.replace(" ", "").replace(",", "")
try:
return float(cleaned)
except ValueError:
return None
_ROOMS_RE = re.compile(
r"\b(\d)[\-\s]*(?:к\b|комн|комнатн|-комнат|br\b|bed\b|bedroom|-bed)",
re.IGNORECASE,
)
# Studio is a special-case "0 rooms" indicator; not extracted as rooms count.
_PRICE_RE = re.compile(
r"(\d[\d\s.,]*\d|\d)\s*(млн|млрд|тыс|тысяч|миллионов?|миллиардов?|руб(?:лей)?|₽|р/мес|/мес|р\b)",
re.IGNORECASE,
)
def _detect_kind(low: str) -> str | None:
for kind, words in _DEAL_KEYWORDS.items():
for w in words:
if w in low:
return kind
return None
def _detect_property_type(low: str) -> str | None:
for stem, label in _PROPERTY_TYPES:
if stem in low:
return label
return None
def extract_real_estate(text: str | None) -> dict[str, Any] | None:
if not text:
return None
low = text.lower()
kind = _detect_kind(low)
prop = _detect_property_type(low)
if kind is None and prop is None:
return None
rooms_m = _ROOMS_RE.search(low)
rooms = f"{rooms_m.group(1)}" if rooms_m else None
if rooms is None and ("студи" in low or "studio" in low):
rooms = "студия"
area: float | None = None
area_m = _AREA_M2_RE.search(text)
if area_m:
area = _parse_number(area_m.group(1))
if area is None:
sqft_m = _AREA_SQFT_RE.search(text)
if sqft_m:
sqft = _parse_number(sqft_m.group(1))
if sqft is not None:
area = round(sqft * 0.0929, 1)
price_m = _PRICE_RE.search(text)
price = price_m.group(0).strip() if price_m else None
return {
"kind": kind,
"property_type": prop,
"rooms": rooms,
"area_m2": area,
"price": price,
}
# --- Top-level analyzer --------------------------------------------------
def analyze(text: str | None) -> dict[str, Any]:
"""Synchronous regex-only analysis. Cheap and runs at insert time."""
return {
"phones": extract_phones(text),
"names": extract_names(text),
"tg_handles": extract_tg_handles(text),
"real_estate": extract_real_estate(text),
}
async def analyze_with_llm(
text: str | None,
vertical: str = "real_estate",
section_slug: str | None = None,
) -> dict[str, Any]:
"""Regex extraction + local LLM lead classification, routed by vertical.
`section_slug` lets the classifier pick a section-specific system prompt
(e.g. Dubai-focused for `real_estate:dubai`) with fallback to the
vertical-default prompt. The LLM verdict goes under `lead` for RE and
under `hr_lead` for HR. Falls back to regex-only if Ollama is unavailable.
"""
base = analyze(text)
# Lazy import to avoid hard dep on httpx in environments where LLM is off.
from parser_bot.llm import classify
verdict = await classify(text, vertical, section_slug) # type: ignore[arg-type]
if verdict is not None:
base["hr_lead" if vertical == "hr" else "lead"] = verdict
return base

44
src/parser_bot/links.py Normal file
View File

@@ -0,0 +1,44 @@
"""Build Telegram URLs from stored channel metadata."""
from __future__ import annotations
import re
_USERNAME_RE = re.compile(r"^@?([A-Za-z][A-Za-z0-9_]{4,31})$")
_TME_URL_RE = re.compile(
r"^(?:https?://)?(?:t|telegram)\.me/(?:s/)?([A-Za-z][A-Za-z0-9_]{4,31})(?:/.*)?$"
)
def channel_username(identifier: str | None) -> str | None:
"""Extract the public username from a channel identifier if any.
Returns None for private channels (joinchat, +invite, raw IDs).
"""
if not identifier:
return None
s = identifier.strip()
m = _USERNAME_RE.match(s)
if m:
return m.group(1)
m = _TME_URL_RE.match(s)
if m:
return m.group(1)
return None
def post_url(identifier: str | None, tg_id: int | None, tg_message_id: int) -> str | None:
"""Build a deep link to a Telegram post.
Public channel: https://t.me/<username>/<msg_id>
Private channel (no public username, only tg_id): https://t.me/c/<short>/<msg_id>
where <short> is the absolute id with the leading -100 stripped.
"""
username = channel_username(identifier)
if username:
return f"https://t.me/{username}/{tg_message_id}"
if tg_id is None:
return None
raw = abs(tg_id)
s = str(raw)
short = s[3:] if s.startswith("100") and len(s) > 3 else s
return f"https://t.me/c/{short}/{tg_message_id}"

363
src/parser_bot/llm.py Normal file
View File

@@ -0,0 +1,363 @@
"""Local LLM (Ollama) client for lead classification & extraction.
Two verticals share one model and one process:
- real_estate: high recall on listings (sale/rent/purchase),
- hr: vacancies, resumes, bare contact leads.
The system prompt and JSON schema differ per vertical; the rest of the
plumbing (timeouts, single-lock concurrency, JSON-mode parsing) is shared.
On any error returns `None` and the caller falls back to regex-only extraction.
The model runs on CPU via Ollama (Qwen2.5 7B Q4_K_M). Each call ~36s on
i5-12400. Concurrency is 1 (Ollama already saturates CPU per call).
"""
from __future__ import annotations
import asyncio
import json
from typing import Any, Literal
import httpx
import structlog
from parser_bot.config import settings
log = structlog.get_logger()
# Single shared lock so we never run two LLM requests at once on the GPU —
# they would just thrash VRAM and finish slower than sequential.
_lock = asyncio.Lock()
Vertical = Literal["real_estate", "hr"]
DEFAULT_RE_SYSTEM_PROMPT = """\
Ты — аналитик объявлений о недвижимости. Тебе дают текст из Telegram-канала.
Сообщение МОЖЕТ БЫТЬ НА ЛЮБОМ ЯЗЫКЕ — русский, английский, арабский, любой
другой. Обрабатывай его одинаково независимо от языка.
Задача: определить, является ли это РЕАЛЬНЫМ объявлением о покупке, продаже
или аренде НЕДВИЖИМОСТИ (квартира, дом/villa, студия/studio, апартаменты,
комната, таунхаус/townhouse, дача, коттедж, пентхаус/penthouse, офис,
склад, помещение, земельный участок/plot/land, гараж, машиноместо).
Учитывай намёки и нечёткие формулировки — лучше отметить сомнительный лид
как `is_listing=true` с низкой confidence, чем пропустить.
Сигналы что это ОБЪЯВЛЕНИЕ (kind):
— продажа/sale: «продаётся», «продаю», «продажа», «for sale», «#forsale»,
«selling price», «selling», «price», «AED 33M», ценник в любой валюте.
— аренда/rent: «сдаётся», «сдаю», «аренда», «for rent», «to let», «rental»,
«per year», «per month», «AED ... /year».
— покупка/purchase: «куплю», «куплю в», «looking for», «want to buy»,
«wanted», «requirement».
ОДНО сообщение может быть и про продажу, И про аренду одновременно
(«FOR SALE | RENT» / «продажа или аренда»). В таком случае выбирай
основное намерение по самому тексту; если равноценно — `kind="sale"`
и упомяни аренду в summary.
НЕ объявления (is_listing=false):
— общие новости / статьи / аналитика рынка;
— воспоминания и истории («когда-то продавал квартиру»);
— шутки, мемы, цитаты;
— реклама услуг агентств без конкретного объекта;
— чужие пересланные объявления без контактов и явного предложения от автора.
Отвечай СТРОГО валидным JSON по схеме (никаких комментариев, никакого markdown):
{
"is_listing": boolean,
"kind": "sale" | "rent" | "purchase" | null,
"property_type": "квартира" | "дом" | "студия" | "апартаменты" | "комната" | "таунхаус" | "дача" | "коттедж" | "офис" | "склад" | "помещение" | "участок" | "гараж" | "машиноместо" | null,
"rooms": "студия" | "1-к" | "2-к" | "3-к" | "4-к" | "5+к" | null,
"area_m2": number | null,
"price_text": string | null,
"price_value": number | null,
"currency": "RUB" | "USD" | "EUR" | "AED" | "GBP" | "CNY" | "TRY" | "KZT" | "BYN" | "UAH" | null,
"location": string | null,
"contact_phone": string | null,
"contact_name": string | null,
"summary": string,
"confidence": number
}
Поля:
- summary — ОДНО короткое предложение НА РУССКОМ языке (даже если исходный
текст на английском или другом). Это нужно для единообразного UI.
- property_type — пиши значение по-русски (villa→дом, apartment→квартира,
townhouse→таунхаус, plot/land→участок, studio→студия, penthouse→апартаменты,
house→дом, office→офис, warehouse→склад, retail→помещение).
- rooms — для англоязычного «3BR», «3 BR», «3 bed», «3-bedroom» возвращай
«3-к»; для «studio» → «студия».
- area_m2 — площадь В КВАДРАТНЫХ МЕТРАХ. Если в тексте sqft / sq.ft / sq ft /
square feet — переведи: m² = sqft × 0.0929. Округляй до целого.
- confidence ∈ [0, 1]: 0.9+ если явное объявление с ценой/контактом,
0.50.8 если правдоподобно, 0.20.4 если намёк.
- price_text — точная цитата из текста («2.5 млн ₽», «AED 850 000», «$320k»,
«300 тыс. дирхам», «د.إ 1.2M», «70,000,000 AED», «AED 4.3M», «AED 1.75M»).
- price_value — числовая величина цены В УКАЗАННОЙ ВАЛЮТЕ (не конвертируй).
Раскрывай сокращения: «AED 4.3M» → 4300000, «$320k» → 320000.
- currency — определяй гибко: ₽/руб/р/RUB/рублей → RUB; $/USD/долл/бакс → USD;
€/EUR/евро → EUR; AED/дирхам/дирхамов/дирхама/dh/dhs/د.إ/Dirhams → AED;
₺/TRY/лир/лира → TRY; ¥/CNY/юань → CNY; ₸/KZT/тенге → KZT;
Br/BYN/бел.руб → BYN; ₴/UAH/грн → UAH. Если не уверен — null.
- contact_phone — любой номер телефона в тексте (с + или без, российский,
ОАЭ, любой международный).
"""
DEFAULT_HR_SYSTEM_PROMPT = """\
Ты — аналитик HR-объявлений. Тебе дают текст из Telegram-канала. Сообщение
МОЖЕТ БЫТЬ НА ЛЮБОМ ЯЗЫКЕ — обрабатывай одинаково.
Задача: определить, относится ли сообщение к рынку труда, и какого типа лид
это. Допускаются три типа (`kind`):
— vacancy — компания/наниматель ищет сотрудника («ищем разработчика»,
«hiring backend engineer», «требуется бухгалтер», «we are looking for»);
— resume — соискатель ищет работу («ищу работу», «open to work», «available
for hire», «рассматриваю предложения», «my CV», «резюме»);
— contact — короткое сообщение с именем/контактом и намёком на профессию,
без явной вакансии/резюме («Иван Петров, Python, +7…», «@nick — UI/UX,
Дубай»). Используй, когда vacancy и resume не подходят, но из текста ясно,
что это HR-контакт.
Лучше отметить сомнительный случай `is_lead=true` с низкой confidence,
чем пропустить. НО полностью исключай:
— общие новости и аналитика рынка труда без конкретной вакансии/резюме;
— реклама курсов, школ, маркетплейсов услуг (Profi.ru и т.п.);
— чужие пересланные посты без контактов и без явного предложения от автора;
— объявления о продаже/аренде недвижимости, услуг и товаров;
— мемы, шутки, цитаты.
Отвечай СТРОГО валидным JSON по схеме (никаких комментариев, никакого markdown):
{
"is_lead": boolean,
"kind": "vacancy" | "resume" | "contact" | null,
"title": string | null,
"company": string | null,
"candidate_name": string | null,
"experience_years": number | null,
"skills": string[],
"location": string | null,
"remote": boolean | null,
"employment_type": "full-time" | "part-time" | "contract" | "internship" | null,
"salary_text": string | null,
"salary_value": number | null,
"currency": "RUB" | "USD" | "EUR" | "AED" | "GBP" | "CNY" | "TRY" | "KZT" | "BYN" | "UAH" | null,
"contact_phone": string | null,
"contact_name": string | null,
"summary": string,
"confidence": number
}
Поля:
- title — должность/роль ОДНОЙ строкой («Senior Python Developer», «Бухгалтер»,
«UI/UX-дизайнер»). Для resume — желаемая роль. Для contact — то, что заявлено.
- company — название компании-нанимателя, если оно явно указано (vacancy).
- candidate_name — ФИО или ник кандидата (resume / contact).
- experience_years — стаж в годах числом. «5+ years» → 5. Если не указан — null.
- skills — короткий массив ключевых навыков/технологий (до ~10 элементов).
- remote — true для «удалёнка / remote / WFH / hybrid: remote», false для
«офис / on-site», null если не указано.
- employment_type — full-time для «полная занятость / full-time», part-time
для «частичная / part-time», contract для «договор/контракт/freelance»,
internship для «стажировка/internship». Иначе null.
- salary_text — точная цитата с зарплатой («200300k ₽», «$5k/mo», «AED 18,000 per month»).
- salary_value — число В УКАЗАННОЙ ВАЛЮТЕ. Если диапазон — нижняя граница.
Раскрывай сокращения: «200k» → 200000, «1.5M» → 1500000.
- currency — определяй гибко: ₽/руб/RUB → RUB; $/USD/долл → USD; €/EUR/евро → EUR;
AED/дирхам/dh/dhs → AED; ₺/TRY/лир → TRY; ¥/CNY/юань → CNY; ₸/KZT/тенге → KZT;
Br/BYN/бел.руб → BYN; ₴/UAH/грн → UAH. Если не уверен — null.
- contact_phone — любой номер телефона (RU / международный, с + или без).
- contact_name — имя контактного лица (рекрутер / соискатель / автор).
- summary — ОДНО короткое предложение НА РУССКОМ языке.
- confidence ∈ [0, 1]: 0.9+ если явная вакансия/резюме с деталями, 0.50.8
если правдоподобно, 0.20.4 если намёк.
"""
# Back-compat alias — older imports referenced DEFAULT_SYSTEM_PROMPT.
DEFAULT_SYSTEM_PROMPT = DEFAULT_RE_SYSTEM_PROMPT
def _build_user_prompt(text: str) -> str:
return f"Текст сообщения:\n```\n{text}\n```\nВерни JSON."
_VALID_CURRENCIES = {
"RUB", "USD", "EUR", "AED", "GBP", "CNY", "TRY", "KZT", "BYN", "UAH"
}
def _coerce_real_estate(payload: Any) -> dict | None:
if not isinstance(payload, dict):
return None
is_listing = bool(payload.get("is_listing"))
currency = payload.get("currency")
if currency is not None:
currency = str(currency).upper()
if currency not in _VALID_CURRENCIES:
currency = None
return {
"is_listing": is_listing,
"kind": payload.get("kind") if payload.get("kind") in ("sale", "rent", "purchase") else None,
"property_type": payload.get("property_type") or None,
"rooms": payload.get("rooms") or None,
"area_m2": _as_float(payload.get("area_m2")),
"price_text": payload.get("price_text") or None,
"price_value": _as_float(payload.get("price_value")),
"currency": currency,
"location": payload.get("location") or None,
"contact_phone": payload.get("contact_phone") or None,
"contact_name": payload.get("contact_name") or None,
"summary": (payload.get("summary") or "")[:300],
"confidence": max(0.0, min(1.0, _as_float(payload.get("confidence")) or 0.0)),
}
def _coerce_hr(payload: Any) -> dict | None:
if not isinstance(payload, dict):
return None
is_lead = bool(payload.get("is_lead"))
currency = payload.get("currency")
if currency is not None:
currency = str(currency).upper()
if currency not in _VALID_CURRENCIES:
currency = None
skills_raw = payload.get("skills") or []
if isinstance(skills_raw, str):
skills = [s.strip() for s in skills_raw.split(",") if s.strip()]
elif isinstance(skills_raw, list):
skills = [str(s).strip() for s in skills_raw if str(s).strip()]
else:
skills = []
skills = skills[:15]
employment = payload.get("employment_type")
if employment is not None and employment not in (
"full-time", "part-time", "contract", "internship"
):
employment = None
remote_raw = payload.get("remote")
remote = bool(remote_raw) if isinstance(remote_raw, bool) else None
return {
"is_lead": is_lead,
"kind": payload.get("kind") if payload.get("kind") in ("vacancy", "resume", "contact") else None,
"title": payload.get("title") or None,
"company": payload.get("company") or None,
"candidate_name": payload.get("candidate_name") or None,
"experience_years": _as_float(payload.get("experience_years")),
"skills": skills,
"location": payload.get("location") or None,
"remote": remote,
"employment_type": employment,
"salary_text": payload.get("salary_text") or None,
"salary_value": _as_float(payload.get("salary_value")),
"currency": currency,
"contact_phone": payload.get("contact_phone") or None,
"contact_name": payload.get("contact_name") or None,
"summary": (payload.get("summary") or "")[:300],
"confidence": max(0.0, min(1.0, _as_float(payload.get("confidence")) or 0.0)),
}
def _as_float(v: Any) -> float | None:
if v is None or isinstance(v, bool):
return None
try:
return float(v)
except (TypeError, ValueError):
return None
async def is_ready() -> bool:
"""Check that Ollama is up and the configured model is pulled."""
try:
async with httpx.AsyncClient(timeout=5) as client:
r = await client.get(f"{settings.llm_base_url}/api/tags")
r.raise_for_status()
tags = {m.get("name") for m in r.json().get("models", [])}
return any(t.startswith(settings.llm_model.split(":")[0]) for t in tags)
except Exception:
return False
def default_prompt(vertical: Vertical) -> str:
return DEFAULT_HR_SYSTEM_PROMPT if vertical == "hr" else DEFAULT_RE_SYSTEM_PROMPT
async def classify(
text: str | None,
vertical: Vertical = "real_estate",
section_slug: str | None = None,
) -> dict | None:
"""Classify a message text under the given vertical/section.
The system prompt is resolved with `section → vertical → built-in` fallback,
so a per-section prompt can fine-tune extraction (e.g. AED/sqft for Dubai)
while unconfigured sections keep using the vertical-wide prompt.
Returns a vertical-specific structured dict or None on error / short text.
"""
if not settings.llm_enabled:
return None
if not text or len(text.strip()) < settings.llm_min_text_length:
return None
# Lazy import to avoid a circular: prompt_store -> db.session -> config.
from parser_bot import prompt_store
system = await prompt_store.resolve(vertical, section_slug, default_prompt(vertical))
payload = {
"model": settings.llm_model,
"prompt": _build_user_prompt(text),
"system": system,
"format": "json",
"stream": False,
"options": {"temperature": 0.1, "num_ctx": 4096, "num_predict": 600},
}
async with _lock:
try:
async with httpx.AsyncClient(timeout=settings.llm_timeout_seconds) as client:
r = await client.post(
f"{settings.llm_base_url}/api/generate", json=payload
)
if r.status_code != 200:
# Surface the actual server message — most useful one is
# `model '...' not found`, which otherwise would just look
# like a generic HTTP error and leave the worker to spin.
log.warning(
"llm_request_failed",
status=r.status_code,
model=settings.llm_model,
vertical=vertical,
section=section_slug,
body=r.text[:300],
)
return None
data = r.json()
except Exception as exc:
log.warning(
"llm_request_failed", error=str(exc), model=settings.llm_model, vertical=vertical
)
return None
raw = (data.get("response") or "").strip()
if not raw:
return None
try:
parsed = json.loads(raw)
except json.JSONDecodeError:
# Best effort: extract first {...} block.
start, end = raw.find("{"), raw.rfind("}")
if start == -1 or end == -1:
log.warning("llm_invalid_json", raw=raw[:200], vertical=vertical)
return None
try:
parsed = json.loads(raw[start : end + 1])
except json.JSONDecodeError:
log.warning("llm_invalid_json", raw=raw[:200], vertical=vertical)
return None
if vertical == "hr":
return _coerce_hr(parsed)
return _coerce_real_estate(parsed)

205
src/parser_bot/main.py Normal file
View File

@@ -0,0 +1,205 @@
from contextlib import asynccontextmanager
from pathlib import Path
import structlog
import uvicorn
from fastapi import Depends, FastAPI, HTTPException
from fastapi.openapi.docs import get_redoc_html, get_swagger_ui_html
from fastapi.openapi.utils import get_openapi
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from starlette.types import Scope
from parser_bot.access import require_admin, require_admin_network
from parser_bot.api.routes import router
from parser_bot.config import settings
from parser_bot.scheduler.poller import build_scheduler
from parser_bot.telegram.client import is_authorized, start_client, stop_client
structlog.configure(
processors=[
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.add_log_level,
structlog.processors.JSONRenderer(),
]
)
log = structlog.get_logger()
STATIC_DIR = Path(__file__).parent / "web" / "static"
NOCACHE = {"Cache-Control": "no-cache, must-revalidate"}
class NoCacheStaticFiles(StaticFiles):
"""StaticFiles with Cache-Control: no-cache.
The browser still gets to validate via ETag/Last-Modified (304 is fine),
but it will not silently serve a stale JS bundle after a deploy.
"""
async def get_response(self, path: str, scope: Scope):
response = await super().get_response(path, scope)
response.headers["Cache-Control"] = "no-cache, must-revalidate"
return response
@asynccontextmanager
async def lifespan(app: FastAPI):
await start_client()
scheduler = build_scheduler()
scheduler.start()
authorized = await is_authorized()
log.info(
"startup", poll_interval=settings.poll_interval_seconds, authorized=authorized
)
if not authorized:
log.warning("not_authorized", action="open /auth.html to log in")
try:
yield
finally:
scheduler.shutdown(wait=False)
await stop_client()
log.info("shutdown")
def _serve_section_template(vertical_dir: str, page: str) -> FileResponse:
"""Resolve a section-scoped URL to a single shared template.
Sections are dynamic (created via UI), so `/real-estate/dubai/channels.html`
can't be a real file. We serve `web/static/<vertical_dir>/section/<page>`
for any section slug — the section name is read from the URL by JS.
"""
target_name = page if page else "index.html"
if "/" in target_name or target_name.startswith(".."):
raise HTTPException(404)
target = STATIC_DIR / vertical_dir / "section" / target_name
if not target.is_file():
raise HTTPException(404)
return FileResponse(target, headers=NOCACHE)
def create_app() -> FastAPI:
public_base = settings.public_base_path.rstrip("/")
# Disable the default /docs, /redoc and /openapi.json — we serve our own
# admin-gated versions below.
app = FastAPI(
title="parser-tg-bot",
lifespan=lifespan,
docs_url=None,
redoc_url=None,
openapi_url=None,
)
app.include_router(router, prefix="/api/v1")
@app.get("/healthz")
async def healthz() -> dict[str, str]:
return {"status": "ok"}
@app.get("/", include_in_schema=False)
async def index() -> FileResponse:
return FileResponse(STATIC_DIR / "index.html", headers=NOCACHE)
# Admin-only: Telegram login page. Registered BEFORE the static catch-all
# so the static mount can't accidentally serve it to non-admin visitors.
@app.get(
"/admin.html",
include_in_schema=False,
dependencies=[Depends(require_admin_network)],
)
async def admin_page() -> FileResponse:
return FileResponse(STATIC_DIR / "admin.html", headers=NOCACHE)
@app.get(
"/auth.html",
include_in_schema=False,
dependencies=[Depends(require_admin)],
)
async def auth_page() -> FileResponse:
return FileResponse(STATIC_DIR / "auth.html", headers=NOCACHE)
# Admin-only: OpenAPI surface. Custom routes so we can wrap them in
# `require_admin`; the auto-generated ones from FastAPI bypass it.
@app.get(
"/openapi.json",
include_in_schema=False,
dependencies=[Depends(require_admin)],
)
async def openapi_json() -> JSONResponse:
return JSONResponse(
get_openapi(
title=app.title,
version=app.version,
openapi_version=app.openapi_version,
description=app.description,
routes=app.routes,
)
)
@app.get(
"/docs",
include_in_schema=False,
dependencies=[Depends(require_admin)],
)
async def docs() -> FileResponse:
return get_swagger_ui_html(
openapi_url=f"{public_base}/openapi.json" if public_base else "/openapi.json",
title=app.title + " — docs",
)
@app.get(
"/redoc",
include_in_schema=False,
dependencies=[Depends(require_admin)],
)
async def redoc() -> FileResponse:
return get_redoc_html(
openapi_url=f"{public_base}/openapi.json" if public_base else "/openapi.json",
title=app.title + " — redoc",
)
# IMPORTANT: register /static and /media mounts BEFORE the dynamic
# vertical/section routes. Starlette matches routes in registration order,
# and a generic /{v}/{s}/{page} pattern would otherwise eat /static/*.
app.mount("/static", NoCacheStaticFiles(directory=STATIC_DIR), name="static")
media_dir = Path(settings.media_dir)
media_dir.mkdir(parents=True, exist_ok=True)
# /media is fine to cache — file names are content-stable.
app.mount("/media", StaticFiles(directory=media_dir), name="media")
# Section-templated dynamic routes, explicit per vertical so /static/*,
# /api/*, /media/* (and any future top-level path) can't be captured.
@app.get("/real-estate/{section}/", include_in_schema=False)
async def re_section_root(section: str) -> FileResponse:
return _serve_section_template("real-estate", "index.html")
@app.get("/real-estate/{section}/{page}", include_in_schema=False)
async def re_section_page(section: str, page: str) -> FileResponse:
return _serve_section_template("real-estate", page)
@app.get("/hr/{section}/", include_in_schema=False)
async def hr_section_root(section: str) -> FileResponse:
return _serve_section_template("hr", "index.html")
@app.get("/hr/{section}/{page}", include_in_schema=False)
async def hr_section_page(section: str, page: str) -> FileResponse:
return _serve_section_template("hr", page)
# Catch-all for top-level static pages (chooser, css, etc.). auth.html is
# already handled above, so the static catch-all can't bypass the guard.
app.mount("/", NoCacheStaticFiles(directory=STATIC_DIR, html=True), name="pages")
return app
app = create_app()
def main() -> None:
uvicorn.run(
"parser_bot.main:app",
host=settings.api_host,
port=settings.api_port,
log_config=None,
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,130 @@
"""Runtime-editable LLM system prompts, persisted in app_settings.
Three resolution levels with fallback (more specific → less specific):
1. `llm_system_prompt:<vertical>:<section_slug>` — section override
2. `llm_system_prompt:<vertical>` — vertical override
3. built-in DEFAULT_RE_SYSTEM_PROMPT / DEFAULT_HR_SYSTEM_PROMPT
The prompt is read on every classification call but cached for a short
window so the DB isn't hit per-message. Edits via the API invalidate the
cache for that level, so a save in the UI takes effect within seconds.
"""
from __future__ import annotations
import time
from typing import Literal
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from parser_bot.db.models import AppSetting
from parser_bot.db.session import session_scope
Vertical = Literal["real_estate", "hr"]
_KEY_PREFIX = "llm_system_prompt:"
_CACHE_TTL_S = 5.0
_cache: dict[str, tuple[float, str | None]] = {}
def _key(vertical: Vertical, section_slug: str | None = None) -> str:
if section_slug:
return f"{_KEY_PREFIX}{vertical}:{section_slug}"
return f"{_KEY_PREFIX}{vertical}"
async def _load(key: str) -> str | None:
"""Read a stored prompt by exact key. None if missing or empty."""
now = time.monotonic()
cached_at, cached_value = _cache.get(key, (0.0, None))
if now - cached_at < _CACHE_TTL_S:
return cached_value
async with session_scope() as session:
row = await session.execute(
select(AppSetting.value).where(AppSetting.key == key)
)
value = row.scalar_one_or_none()
text = value if isinstance(value, str) and value.strip() else None
_cache[key] = (now, text)
return text
async def resolve(
vertical: Vertical, section_slug: str | None, default: str
) -> str:
"""Pick the most specific prompt available, falling back to `default`.
Always consults section-level → vertical-level → default. This is what
the classifier uses for every message.
"""
if section_slug:
text = await _load(_key(vertical, section_slug))
if text is not None:
return text
text = await _load(_key(vertical))
if text is not None:
return text
return default
async def get(
vertical: Vertical, section_slug: str | None, default: str
) -> tuple[str, str]:
"""For the settings UI: return (text, source) where source is one of
'section' | 'vertical' | 'default'. Lets the editor show which override
is currently active without a second round-trip.
"""
if section_slug:
text = await _load(_key(vertical, section_slug))
if text is not None:
return text, "section"
text = await _load(_key(vertical))
if text is not None:
return text, "vertical"
return default, "default"
async def set_prompt(
vertical: Vertical, section_slug: str | None, text: str
) -> None:
"""Save a new prompt at the given level (section or vertical)."""
if not isinstance(text, str) or not text.strip():
raise ValueError("prompt must be a non-empty string")
key = _key(vertical, section_slug)
async with session_scope() as session:
stmt = (
pg_insert(AppSetting)
.values(key=key, value=text)
.on_conflict_do_update(
index_elements=["key"], set_={"value": text}
)
)
await session.execute(stmt)
invalidate(key)
async def reset(vertical: Vertical, section_slug: str | None) -> None:
"""Drop the override at the given level."""
key = _key(vertical, section_slug)
async with session_scope() as session:
await session.execute(
AppSetting.__table__.delete().where(AppSetting.key == key)
)
invalidate(key)
def invalidate(key: str | None = None) -> None:
if key is None:
_cache.clear()
else:
_cache.pop(key, None)
async def is_overridden(
vertical: Vertical, section_slug: str | None = None
) -> bool:
"""True iff a custom prompt is stored at this exact level."""
text = await _load(_key(vertical, section_slug))
return text is not None

View File

View File

@@ -0,0 +1,349 @@
from datetime import datetime, timezone
import structlog
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from sqlalchemy import func, select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from parser_bot.config import settings
from parser_bot.db.models import Channel, Message, Section
from parser_bot.db.session import session_scope
from parser_bot.extractors import analyze, analyze_with_llm
from parser_bot.telegram.client import (
fetch_new_messages,
fetch_specific_messages_with_media,
is_authorized,
resolve_channel,
)
log = structlog.get_logger()
def _verdict_key(vertical: str) -> str:
"""JSONB key under `extracted` where the LLM verdict lives for this vertical."""
return "hr_lead" if vertical == "hr" else "lead"
def _needs_work_clause(vertical: str | None):
"""Rows that still need LLM classification.
A row needs work when:
- extracted IS NULL (never analyzed), or
- the verdict for this vertical is missing.
Without `vertical`, falls back to "missing any verdict" — used by
aggregate /llm/queue display when no vertical is selected.
"""
if vertical is None:
return (Message.extracted.is_(None)) | (
Message.extracted["lead"].is_(None) & Message.extracted["hr_lead"].is_(None)
)
key = _verdict_key(vertical)
return (Message.extracted.is_(None)) | (Message.extracted[key].is_(None))
async def poll_channel(channel_id: int) -> int:
"""Poll one channel for new messages. Returns count of inserted rows."""
async with session_scope() as session:
channel = await session.get(Channel, channel_id)
if channel is None or not channel.is_active:
return 0
if channel.tg_id is None or channel.title is None:
resolved = await resolve_channel(channel.identifier)
channel.tg_id = resolved.tg_id
channel.title = resolved.title
msgs = await fetch_new_messages(
channel.identifier,
min_id=channel.last_message_id,
limit=settings.poll_history_limit,
download_media_for_channel_id=channel.id,
)
inserted = 0
for m in msgs:
# Only the cheap regex pass runs in the poll path. LLM classification
# is handled by `classify_pending` in a background scheduler job so
# that a poll request never blocks on a 5s/message LLM call.
stmt = (
pg_insert(Message)
.values(
channel_id=channel.id,
tg_message_id=m.tg_message_id,
date=m.date,
text=m.text,
sender_id=m.sender_id,
sender_username=m.sender_username,
sender_name=m.sender_name,
grouped_id=m.grouped_id,
has_media=m.has_media,
views=m.views,
forwards=m.forwards,
raw=m.raw,
media_files=m.media_files or None,
extracted=analyze(m.text) if m.text else None,
)
.on_conflict_do_nothing(index_elements=["channel_id", "tg_message_id"])
)
result = await session.execute(stmt)
inserted += result.rowcount or 0
if msgs:
channel.last_message_id = max(
channel.last_message_id or 0, msgs[-1].tg_message_id
)
channel.last_polled_at = datetime.now(timezone.utc)
log.info(
"polled_channel",
channel=channel.identifier,
vertical=channel.vertical,
fetched=len(msgs),
inserted=inserted,
)
return inserted
async def poll_all() -> None:
if not await is_authorized():
log.debug("poll_skipped_not_authorized")
return
async with session_scope() as session:
result = await session.execute(select(Channel.id).where(Channel.is_active.is_(True)))
ids = [row[0] for row in result.all()]
for channel_id in ids:
try:
await poll_channel(channel_id)
except Exception as exc:
log.error("poll_failed", channel_id=channel_id, error=str(exc))
async def backfill_media(channel_id: int, batch_size: int = 50) -> dict[str, int]:
"""Re-download media for messages with has_media=True but media_files IS NULL.
Goes through one batch (oldest-first by tg_message_id) at a time so the call
stays bounded; the UI can press the button repeatedly until 'pending' is 0.
"""
if not await is_authorized():
raise RuntimeError("not authorized")
async with session_scope() as session:
channel = await session.get(Channel, channel_id)
if channel is None:
raise RuntimeError("channel not found")
pending_q = select(func.count(Message.id)).where(
Message.channel_id == channel_id,
Message.has_media.is_(True),
Message.media_files.is_(None),
)
pending_total = (await session.execute(pending_q)).scalar_one()
rows = (
await session.execute(
select(Message.id, Message.tg_message_id)
.where(
Message.channel_id == channel_id,
Message.has_media.is_(True),
Message.media_files.is_(None),
)
.order_by(Message.tg_message_id.asc())
.limit(batch_size)
)
).all()
if not rows:
return {"updated": 0, "pending": 0}
tg_ids = [r.tg_message_id for r in rows]
results = await fetch_specific_messages_with_media(
channel.identifier, tg_ids, channel_id
)
updated = 0
for db_id, tg_id in rows:
files = results.get(tg_id)
if not files:
continue
msg = await session.get(Message, db_id)
if msg is None:
continue
msg.media_files = files
updated += 1
log.info(
"backfill_media",
channel_id=channel_id,
updated=updated,
remaining=max(0, pending_total - updated),
)
return {"updated": updated, "pending": max(0, pending_total - updated)}
async def reanalyze_channel(channel_id: int, batch_size: int = 5) -> dict[str, int]:
"""Re-run extractors (regex + LLM) over messages missing this channel's verdict.
Picks the vertical AND section from the channel row so the right LLM
prompt is used. Only reanalyzes rows where the corresponding verdict key
is missing. Newest first so fresh leads surface during long backfills.
"""
async with session_scope() as session:
result = await session.execute(
select(Channel, Section.slug)
.join(Section, Section.id == Channel.section_id)
.where(Channel.id == channel_id)
)
row = result.one_or_none()
if row is None:
return {"updated": 0, "pending": 0}
channel, section_slug = row
vertical = channel.vertical
needs_work = _needs_work_clause(vertical)
pending_total = (
await session.execute(
select(func.count(Message.id)).where(
Message.channel_id == channel_id,
Message.text.is_not(None),
needs_work,
)
)
).scalar_one()
rows = (
await session.execute(
select(Message.id, Message.text)
.where(
Message.channel_id == channel_id,
Message.text.is_not(None),
needs_work,
)
.order_by(Message.id.desc())
.limit(batch_size)
)
).all()
if not rows:
return {"updated": 0, "pending": 0}
updated = 0
for db_id, text in rows:
extracted = (
await analyze_with_llm(text, vertical, section_slug)
if settings.llm_enabled
else analyze(text)
)
msg = await session.get(Message, db_id)
if msg is None:
continue
msg.extracted = extracted
updated += 1
log.info(
"reanalyzed_channel",
channel_id=channel_id,
vertical=vertical,
section=section_slug,
updated=updated,
remaining=max(0, pending_total - updated),
)
return {"updated": updated, "pending": max(0, pending_total - updated)}
async def pending_llm_count(
vertical: str | None = None, section_slug: str | None = None
) -> int:
"""How many text messages still need LLM classification.
When `vertical` is set, only counts messages from channels of that vertical
(and optionally that section) whose vertical-specific verdict is missing.
"""
if not settings.llm_enabled:
return 0
needs_work = _needs_work_clause(vertical)
async with session_scope() as session:
stmt = select(func.count(Message.id)).where(
Message.text.is_not(None),
needs_work,
)
if vertical is not None:
stmt = stmt.join(Channel, Channel.id == Message.channel_id).where(
Channel.vertical == vertical
)
if section_slug is not None:
stmt = stmt.join(Section, Section.id == Channel.section_id).where(
Section.slug == section_slug
)
return (await session.execute(stmt)).scalar_one()
async def classify_pending(batch_size: int = 5) -> int:
"""Run LLM over a batch of unclassified messages across all channels.
Walks newest-first and picks the prompt/vertical/section from each
message's channel, so RE and HR channels (and per-section overrides)
share the same classifier worker without crosstalk.
"""
if not settings.llm_enabled:
return 0
needs_work = _needs_work_clause(None)
async with session_scope() as session:
rows = (
await session.execute(
select(Message.id, Message.text, Channel.vertical, Section.slug)
.join(Channel, Channel.id == Message.channel_id)
.join(Section, Section.id == Channel.section_id)
.where(Message.text.is_not(None), needs_work)
.order_by(Message.id.desc())
.limit(batch_size)
)
).all()
if not rows:
return 0
updated = 0
for db_id, text, vertical, section_slug in rows:
# If extracted already has THIS vertical's verdict, skip — needs_work
# uses an OR over both keys and would otherwise re-run RE channels
# that already have a lead just because hr_lead is null.
existing = (
await session.execute(select(Message.extracted).where(Message.id == db_id))
).scalar_one_or_none()
key = _verdict_key(vertical)
if existing and existing.get(key) is not None:
continue
extracted = await analyze_with_llm(text, vertical, section_slug)
msg = await session.get(Message, db_id)
if msg is None:
continue
msg.extracted = extracted
updated += 1
if updated:
log.info("classify_pending_batch", updated=updated)
return updated
def build_scheduler() -> AsyncIOScheduler:
scheduler = AsyncIOScheduler()
scheduler.add_job(
poll_all,
"interval",
seconds=settings.poll_interval_seconds,
id="poll_all",
max_instances=1,
coalesce=True,
)
if settings.llm_enabled:
scheduler.add_job(
classify_pending,
"interval",
seconds=settings.llm_classify_interval_seconds,
id="classify_pending",
max_instances=1,
coalesce=True,
kwargs={"batch_size": settings.llm_classify_batch_size},
)
return scheduler

View File

View File

@@ -0,0 +1,319 @@
import json
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any
import structlog
from telethon import TelegramClient
from telethon.sessions import StringSession
from telethon.tl.types import Channel as TgChannel
from telethon.tl.types import Message as TgMessage
from telethon.tl.types import (
MessageMediaDocument,
MessageMediaPhoto,
)
from parser_bot.config import settings
log = structlog.get_logger()
def _json_safe(value: Any) -> Any:
"""Coerce Telethon's to_dict() output into JSONB-safe primitives."""
return json.loads(json.dumps(value, default=str))
@dataclass(slots=True)
class ResolvedChannel:
tg_id: int
title: str
@dataclass(slots=True)
class FetchedMessage:
tg_message_id: int
date: datetime
text: str | None
sender_id: int | None
sender_username: str | None
sender_name: str | None
grouped_id: int | None
has_media: bool
views: int | None
forwards: int | None
raw: dict
media_files: list[dict] = field(default_factory=list)
def _sender_info(msg: TgMessage) -> tuple[str | None, str | None]:
"""Best-effort sender username + display name from a Telethon Message.
For public channel posts the "sender" is usually the channel itself —
in that case we fall back to `post_author` (the optional signature on
signed posts) so the operator at least knows who signed it.
"""
username: str | None = None
name: str | None = None
sender = msg.sender
if sender is not None:
username = getattr(sender, "username", None)
first = getattr(sender, "first_name", None)
last = getattr(sender, "last_name", None)
title = getattr(sender, "title", None)
if first or last:
name = " ".join(p for p in (first, last) if p)
elif title:
name = title
post_author = getattr(msg, "post_author", None)
if not name and post_author:
name = post_author
return username, name
def _media_kind(media: Any) -> str:
if isinstance(media, MessageMediaPhoto):
return "photo"
if isinstance(media, MessageMediaDocument):
doc = getattr(media, "document", None)
mime = (getattr(doc, "mime_type", "") or "").lower()
if mime.startswith("video/"):
return "video"
if mime.startswith("audio/"):
return "audio"
if mime == "image/webp":
return "sticker"
return "document"
return "unknown"
def _media_size(media: Any) -> int | None:
doc = getattr(media, "document", None)
if doc is not None:
return getattr(doc, "size", None)
return None
def _media_mime(media: Any) -> str | None:
doc = getattr(media, "document", None)
if doc is not None:
return getattr(doc, "mime_type", None)
if isinstance(media, MessageMediaPhoto):
return "image/jpeg"
return None
async def _download_message_media(
client: TelegramClient, msg: TgMessage, channel_id: int
) -> list[dict]:
"""Download media from a single message into <media_dir>/<channel_id>/.
Returns a list of dicts: {kind, url?, mime?, size?, skipped?}.
Large documents/videos are skipped to avoid eating disk; metadata is kept
so the UI can still show that media existed.
"""
if msg.media is None:
return []
kind = _media_kind(msg.media)
size = _media_size(msg.media)
mime = _media_mime(msg.media)
info: dict = {"kind": kind, "mime": mime, "size": size}
if size is not None and size > settings.media_max_bytes:
info["skipped"] = "too_large"
return [info]
target_dir = Path(settings.media_dir) / str(channel_id)
target_dir.mkdir(parents=True, exist_ok=True)
prefix = target_dir / f"{msg.id}"
try:
path = await client.download_media(msg, file=str(prefix))
except Exception as exc:
log.warning("media_download_failed", msg_id=msg.id, error=str(exc))
info["skipped"] = "download_error"
return [info]
if path is None:
info["skipped"] = "no_file"
return [info]
filename = Path(path).name
public_base = settings.public_base_path.rstrip("/")
info["url"] = f"{public_base}/media/{channel_id}/{filename}"
return [info]
_client: TelegramClient | None = None
def get_client() -> TelegramClient:
"""Build a Telethon client. Prefer StringSession from env (k8s-friendly),
fall back to file-based session at TG_SESSION_PATH for local dev."""
global _client
if _client is None:
session = (
StringSession(settings.tg_session_string)
if settings.tg_session_string
else settings.tg_session_path
)
_client = TelegramClient(session, settings.tg_api_id, settings.tg_api_hash)
return _client
async def start_client() -> TelegramClient:
"""Connect Telethon. Does NOT require authorization — connecting an
unauthorized client is fine and is a prerequisite for the web login flow.
Callers that need an authorized client must use `require_authorized()`.
"""
client = get_client()
if not client.is_connected():
await client.connect()
return client
async def stop_client() -> None:
global _client
if _client is not None and _client.is_connected():
await _client.disconnect()
_client = None
async def require_authorized() -> TelegramClient:
client = await start_client()
if not await client.is_user_authorized():
raise RuntimeError("not authorized: complete login at /auth.html")
return client
async def is_authorized() -> bool:
client = await start_client()
return await client.is_user_authorized()
async def current_username() -> str | None:
client = await start_client()
if not await client.is_user_authorized():
return None
me = await client.get_me()
if me is None:
return None
return me.username or str(me.id)
_pending_phone_code_hash: str | None = None
async def send_login_code() -> None:
"""Step 1: ask Telegram to send the login code to TG_PHONE."""
global _pending_phone_code_hash
client = await start_client()
if await client.is_user_authorized():
raise RuntimeError("already authorized")
sent = await client.send_code_request(settings.tg_phone)
_pending_phone_code_hash = sent.phone_code_hash
async def submit_login_code(code: str) -> bool:
"""Step 2: submit the code. Returns True if 2FA password is still required."""
global _pending_phone_code_hash
if _pending_phone_code_hash is None:
raise RuntimeError("no pending login: call send-code first")
client = await start_client()
from telethon.errors import SessionPasswordNeededError
try:
await client.sign_in(
phone=settings.tg_phone,
code=code,
phone_code_hash=_pending_phone_code_hash,
)
except SessionPasswordNeededError:
return True
_pending_phone_code_hash = None
return False
async def submit_login_password(password: str) -> None:
"""Step 3 (only if 2FA): submit the cloud password."""
global _pending_phone_code_hash
client = await start_client()
await client.sign_in(password=password)
_pending_phone_code_hash = None
async def logout() -> None:
global _pending_phone_code_hash
client = await start_client()
if await client.is_user_authorized():
await client.log_out()
_pending_phone_code_hash = None
async def resolve_channel(identifier: str) -> ResolvedChannel:
client = await start_client()
entity = await client.get_entity(identifier)
if not isinstance(entity, TgChannel):
raise ValueError(f"{identifier!r} is not a channel")
return ResolvedChannel(tg_id=entity.id, title=entity.title or identifier)
async def fetch_specific_messages_with_media(
identifier: str, tg_message_ids: list[int], channel_id: int
) -> dict[int, list[dict]]:
"""Re-fetch a list of specific messages by id and download their media.
Returns {tg_message_id: media_files} for messages whose media was
successfully resolved (skipped or downloaded). Used by the backfill flow
for messages that were saved before media-download was implemented.
"""
client = await require_authorized()
entity = await client.get_entity(identifier)
out: dict[int, list[dict]] = {}
msgs = await client.get_messages(entity, ids=list(tg_message_ids))
for msg in msgs:
if msg is None or not isinstance(msg, TgMessage) or msg.media is None:
continue
out[msg.id] = await _download_message_media(client, msg, channel_id)
return out
async def fetch_new_messages(
identifier: str,
min_id: int | None,
limit: int,
download_media_for_channel_id: int | None = None,
) -> list[FetchedMessage]:
client = await start_client()
entity = await client.get_entity(identifier)
kwargs = {"limit": limit}
if min_id is not None:
kwargs["min_id"] = min_id
out: list[FetchedMessage] = []
async for msg in client.iter_messages(entity, **kwargs):
if not isinstance(msg, TgMessage):
continue
media_files: list[dict] = []
if msg.media is not None and download_media_for_channel_id is not None:
media_files = await _download_message_media(
client, msg, download_media_for_channel_id
)
sender_username, sender_name = _sender_info(msg)
out.append(
FetchedMessage(
tg_message_id=msg.id,
date=msg.date,
text=msg.message,
sender_id=getattr(msg.sender_id, "user_id", msg.sender_id)
if msg.sender_id is not None
else None,
sender_username=sender_username,
sender_name=sender_name,
grouped_id=getattr(msg, "grouped_id", None),
has_media=msg.media is not None,
views=msg.views,
forwards=msg.forwards,
raw=_json_safe(msg.to_dict()),
media_files=media_files,
)
)
out.sort(key=lambda m: m.tg_message_id)
return out

View File

@@ -0,0 +1,36 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>Админ — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1>parser-tg-bot</h1>
<nav>
<a href="/api/monitoring-tg/">Разделы</a>
<a class="admin-login-link active" href="/api/monitoring-tg/admin.html">Админ</a>
<a class="admin-link" href="/api/monitoring-tg/auth.html">Авторизация</a>
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
</nav>
</header>
<main>
<h2>Админ-доступ</h2>
<div class="card" style="max-width:520px">
<div id="admin-status" class="muted" style="margin-bottom:12px">Проверка...</div>
<form id="admin-form" class="row">
<input type="password" id="admin-password" autocomplete="current-password"
placeholder="Админ пароль" required style="flex:1; min-width:220px" />
<button type="submit">Войти</button>
</form>
<div class="row" style="margin-top:12px">
<button id="admin-logout" class="secondary" type="button">Выйти</button>
</div>
</div>
</main>
<script type="module" src="/api/monitoring-tg/static/js/admin.js"></script>
</body>
</html>

View File

@@ -0,0 +1,85 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>Авторизация — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1>parser-tg-bot</h1>
<nav>
<a href="/api/monitoring-tg/">Разделы</a>
<a href="/api/monitoring-tg/real-estate/">🏠 Недвижимость</a>
<a href="/api/monitoring-tg/hr/">👥 HR</a>
<a class="admin-login-link" href="/api/monitoring-tg/admin.html">Админ</a>
<a class="admin-link active" href="/api/monitoring-tg/auth.html">Авторизация</a>
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
</nav>
</header>
<main>
<h2>Авторизация Telegram</h2>
<div class="card" style="max-width:520px">
<div id="status-block">
<div class="empty">Проверка статуса...</div>
</div>
<div id="step-idle" hidden>
<p>
Не авторизовано. Номер из конфигурации: <span class="mono" id="phone"></span>.
Нажми кнопку ниже — Telegram пришлёт одноразовый код на этот номер.
</p>
<button id="btn-send">Отправить код</button>
</div>
<div id="step-code" hidden>
<p>Код отправлен на <span class="mono" id="phone-2"></span>. Введи его:</p>
<form id="form-code" class="row">
<input type="text" id="code" inputmode="numeric" autocomplete="one-time-code"
placeholder="12345" required style="flex:1; min-width:160px" />
<button type="submit">Подтвердить</button>
</form>
<button id="btn-resend" class="secondary" style="margin-top:8px">
Запросить код повторно
</button>
</div>
<div id="step-password" hidden>
<p>На аккаунте включён 2FA. Введи облачный пароль Telegram:</p>
<form id="form-password" class="row">
<input type="password" id="password" autocomplete="current-password"
required style="flex:1; min-width:200px" />
<button type="submit">Войти</button>
</form>
</div>
<div id="step-done" hidden>
<p>
Авторизовано как <span class="mono" id="username"></span>.
Парсер начнёт опрашивать каналы согласно расписанию.
</p>
<div class="row">
<a id="return-link" href="/api/monitoring-tg/"><button>Перейти к разделам</button></a>
<button id="btn-logout" class="danger">Выйти</button>
</div>
</div>
</div>
<div class="card" style="max-width:520px; margin-top:16px">
<h3 style="margin-top:0">Прод-вариант (без UI)</h3>
<p class="muted">
Для деплоя в k8s удобнее заранее получить опаковую строку сессии и положить её
в Secret — тогда поды поднимаются без интерактива:
</p>
<pre>docker compose run --rm -it app python -m parser_bot.auth</pre>
<p class="muted">
Скрипт напечатает <span class="mono">TG_SESSION_STRING=...</span> — вставить
в <span class="mono">.env</span> или Secret и забыть про авторизацию.
</p>
</div>
</main>
<script type="module" src="/api/monitoring-tg/static/js/auth.js"></script>
</body>
</html>

View File

@@ -0,0 +1,241 @@
:root {
--bg: #0f1115;
--panel: #161a22;
--panel-2: #1d222c;
--border: #262c38;
--text: #e6e8ec;
--muted: #8a93a3;
--accent: #4f8cff;
--accent-hover: #6aa0ff;
--danger: #ff6464;
--ok: #2ecc71;
--warn: #f1c40f;
}
* { box-sizing: border-box; }
body {
margin: 0;
font: 14px/1.45 -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background: var(--bg);
color: var(--text);
}
a { color: var(--accent); text-decoration: none; }
a:hover { color: var(--accent-hover); }
header {
display: flex;
align-items: center;
gap: 24px;
padding: 14px 24px;
background: var(--panel);
border-bottom: 1px solid var(--border);
}
header h1 {
font-size: 16px;
margin: 0;
font-weight: 600;
}
nav { display: flex; gap: 6px; }
nav a {
padding: 6px 12px;
border-radius: 6px;
color: var(--muted);
}
nav a.active, nav a:hover {
color: var(--text);
background: var(--panel-2);
}
main { padding: 24px; max-width: 1200px; margin: 0 auto; }
h2 { font-size: 18px; margin: 0 0 16px; }
h3 { font-size: 14px; margin: 24px 0 12px; color: var(--muted); font-weight: 500; text-transform: uppercase; letter-spacing: 0.05em; }
.row { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }
.spacer { flex: 1; }
.card {
background: var(--panel);
border: 1px solid var(--border);
border-radius: 8px;
padding: 16px;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
gap: 12px;
margin-bottom: 24px;
}
.stat .label { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; }
.stat .value { font-size: 24px; font-weight: 600; margin-top: 4px; }
input, select, textarea, button {
font: inherit;
color: var(--text);
background: var(--panel-2);
border: 1px solid var(--border);
border-radius: 6px;
padding: 8px 10px;
outline: none;
}
input:focus, select:focus { border-color: var(--accent); }
button {
cursor: pointer;
background: var(--accent);
border-color: var(--accent);
color: white;
}
button:hover { background: var(--accent-hover); border-color: var(--accent-hover); }
button.secondary { background: var(--panel-2); color: var(--text); }
button.secondary:hover { background: var(--border); }
button.danger { background: transparent; color: var(--danger); border-color: var(--border); }
button.danger:hover { background: rgba(255, 100, 100, 0.1); }
button:disabled { opacity: 0.5; cursor: not-allowed; }
table { width: 100%; border-collapse: collapse; }
th, td { padding: 10px 12px; text-align: left; border-bottom: 1px solid var(--border); }
th { color: var(--muted); font-weight: 500; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; }
tr:hover td { background: var(--panel-2); }
.badge {
display: inline-block;
padding: 2px 8px;
border-radius: 999px;
font-size: 11px;
background: var(--panel-2);
color: var(--muted);
border: 1px solid var(--border);
}
.badge.ok { color: var(--ok); border-color: rgba(46, 204, 113, 0.4); }
.badge.off { color: var(--muted); }
.badge.warn { color: var(--warn); border-color: rgba(241, 196, 15, 0.4); }
.muted { color: var(--muted); }
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
.message {
padding: 12px 16px;
border-bottom: 1px solid var(--border);
}
.message:last-child { border-bottom: none; }
.message-meta { display: flex; gap: 12px; color: var(--muted); font-size: 12px; margin-bottom: 6px; }
.message-text { white-space: pre-wrap; word-break: break-word; }
.message-tags {
display: flex; flex-wrap: wrap; gap: 6px;
margin-top: 8px;
}
.message-tags .badge.re { color: #2ecc71; border-color: rgba(46, 204, 113, 0.4); }
.message-tags .badge.phone { color: #4f8cff; border-color: rgba(79, 140, 255, 0.4); }
.message-tags .badge.name { color: #f1c40f; border-color: rgba(241, 196, 15, 0.4); }
.message-tags .badge.tg { color: #4f8cff; border-color: rgba(79, 140, 255, 0.4); }
.message-tags .badge.tg-link { color: #fff; background: rgba(79, 140, 255, 0.2); border-color: rgba(79, 140, 255, 0.6); }
.message-tags .badge.tg-link:hover { background: rgba(79, 140, 255, 0.35); }
.lead-card {
margin-top: 10px;
padding: 10px 14px;
border-radius: 8px;
border: 1px solid var(--border);
background: rgba(46, 204, 113, 0.05);
}
.lead-card.lead-strong { border-color: rgba(46, 204, 113, 0.6); background: rgba(46, 204, 113, 0.1); }
.lead-card.lead-medium { border-color: rgba(241, 196, 15, 0.5); background: rgba(241, 196, 15, 0.06); }
.lead-card.lead-weak { border-color: rgba(138, 147, 163, 0.4); background: rgba(138, 147, 163, 0.05); }
.lead-head { display: flex; flex-wrap: wrap; align-items: center; gap: 10px; }
.lead-facts { color: var(--text); font-weight: 500; }
.lead-summary { margin-top: 4px; color: var(--muted); font-size: 13px; }
.lead-confidence {
margin-left: auto; padding: 2px 8px; border-radius: 999px;
background: var(--panel-2); border: 1px solid var(--border);
font-size: 11px; color: var(--muted); font-variant-numeric: tabular-nums;
}
.badge.lead { color: #2ecc71; border-color: rgba(46, 204, 113, 0.5); font-weight: 600; }
.message-media {
display: flex; flex-wrap: wrap; gap: 8px;
margin-top: 10px;
}
.media-thumb {
max-width: 240px; max-height: 240px;
border-radius: 6px; cursor: zoom-in;
background: var(--panel-2);
}
.media-video { max-width: 360px; max-height: 240px; border-radius: 6px; background: black; }
.media-doc {
display: inline-flex; align-items: center; gap: 8px;
padding: 8px 12px; background: var(--panel-2);
border: 1px solid var(--border); border-radius: 6px;
color: var(--text);
}
.media-doc:hover { border-color: var(--accent); }
.media-skipped {
display: inline-flex; align-items: center; gap: 8px;
padding: 6px 10px; background: var(--panel-2);
border-radius: 6px; font-size: 12px;
}
#lightbox {
position: fixed; inset: 0; z-index: 2000;
background: rgba(0,0,0,0.85);
display: flex; align-items: center; justify-content: center;
cursor: zoom-out;
}
#lightbox img { max-width: 95vw; max-height: 95vh; border-radius: 4px; }
.toolbar { display: flex; gap: 8px; align-items: center; margin-bottom: 16px; flex-wrap: wrap; }
.toolbar input[type="search"], .toolbar select { min-width: 200px; }
.toast {
position: fixed;
bottom: 20px;
right: 20px;
background: var(--panel);
border: 1px solid var(--border);
border-radius: 8px;
padding: 10px 16px;
box-shadow: 0 6px 24px rgba(0,0,0,0.4);
animation: slideIn 0.18s ease-out;
z-index: 1000;
max-width: 360px;
}
.toast.error { border-color: var(--danger); }
.toast.success { border-color: var(--ok); }
@keyframes slideIn { from { transform: translateY(8px); opacity: 0; } to { transform: none; opacity: 1; } }
.empty { padding: 32px; text-align: center; color: var(--muted); }
.sections-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
gap: 16px;
margin-top: 16px;
}
.section-tile { padding: 16px; }
.section-tile-link { display: block; color: var(--text); }
.section-tile-link:hover { color: var(--text); }
.section-tile-head { display: flex; align-items: center; gap: 10px; margin-bottom: 8px; }
.section-emoji { font-size: 28px; }
.section-title { font-size: 16px; font-weight: 600; }
.section-stats { display: flex; flex-wrap: wrap; gap: 12px; color: var(--muted); font-size: 13px; }
.section-stats b { color: var(--text); }
.section-desc { margin-top: 8px; font-size: 13px; }
.section-code { margin-top: 8px; color: var(--warn); font-size: 12px; }
.section-slug { margin-top: 8px; font-size: 11px; }
.pagination { display: flex; gap: 8px; justify-content: center; margin-top: 16px; }
dialog {
background: var(--panel);
color: var(--text);
border: 1px solid var(--border);
border-radius: 8px;
padding: 20px;
min-width: 400px;
max-width: 80vw;
max-height: 80vh;
}
dialog::backdrop { background: rgba(0,0,0,0.6); }
pre { background: var(--bg); padding: 12px; border-radius: 6px; overflow: auto; font-size: 12px; max-height: 60vh; }

View File

@@ -0,0 +1,99 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>👥 HR — подразделы</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot · 👥 HR / Кадры</h1>
<nav id="nav-section"></nav>
</header>
<main>
<div class="row">
<h2>Подразделы HR</h2>
<div class="spacer"></div>
<button id="open-create">+ Новый подраздел</button>
</div>
<p class="muted">
Каждый подраздел — это собственный набор каналов, своя статистика и свой
LLM-промпт (с фоллбэком на промпт вертикали). Например: IT, продажи,
маркетинг, рабочие специальности.
</p>
<div id="sections-grid"></div>
</main>
<dialog id="create-dialog">
<h3 style="margin-top:0">Новый подраздел</h3>
<form id="create-form">
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Название</span>
<input type="text" id="new-title" required placeholder="IT" style="flex:1" />
</label>
<div class="row" style="gap:8px; margin-bottom:8px; font-size:12px">
<span style="min-width:120px" class="muted">URL-адрес</span>
<span class="muted mono">/hr/<span id="new-slug-preview">(введите название)</span>/</span>
<div class="spacer"></div>
<a href="#" id="new-slug-manual" class="muted">изменить вручную</a>
</div>
<label class="row slug-row" style="gap:8px; margin-bottom:8px" hidden>
<span style="min-width:120px" class="muted">Slug</span>
<input type="text" id="new-slug" pattern="[a-z0-9][a-z0-9_-]*[a-z0-9]?"
placeholder="it" style="flex:1" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Иконка</span>
<input type="text" id="new-emoji" maxlength="4" placeholder="💻" style="width:80px" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Код доступа</span>
<input type="text" id="new-access-code" required minlength="3"
autocomplete="new-password" style="flex:1" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
<span style="min-width:120px" class="muted">Описание</span>
<textarea id="new-description" rows="3" style="flex:1"></textarea>
</label>
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
<button type="button" id="create-cancel" class="secondary">Отмена</button>
<button type="submit">Создать</button>
</div>
</form>
</dialog>
<dialog id="edit-dialog">
<h3 style="margin-top:0">Редактировать подраздел</h3>
<form id="edit-form">
<input type="hidden" id="edit-slug" />
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Название</span>
<input type="text" id="edit-title" required style="flex:1" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Иконка</span>
<input type="text" id="edit-emoji" maxlength="4" style="width:80px" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Код доступа</span>
<input type="text" id="edit-access-code" required minlength="3"
autocomplete="new-password" style="flex:1" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
<span style="min-width:120px" class="muted">Описание</span>
<textarea id="edit-description" rows="3" style="flex:1"></textarea>
</label>
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
<button type="button" id="edit-cancel" class="secondary">Отмена</button>
<button type="submit">Сохранить</button>
</div>
</form>
</dialog>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/sections-list.js"></script>
</body>
</html>

View File

@@ -0,0 +1,48 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>👥 HR · Каналы — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot</h1>
<nav id="nav-section"></nav>
</header>
<main>
<h2 id="page-heading">Каналы подраздела</h2>
<div class="card" style="margin-bottom:24px">
<form id="add-form" class="row">
<input type="text" id="identifier" placeholder="@channel или https://t.me/..." required style="flex:1; min-width:280px" />
<button type="submit">Добавить канал</button>
</form>
<div class="muted" style="margin-top:8px; font-size:12px">
Канал будет привязан к текущему подразделу.
</div>
</div>
<div class="card">
<table>
<thead>
<tr>
<th>ID</th>
<th>Канал</th>
<th>Telegram ID</th>
<th>Сообщ.</th>
<th>Последний опрос</th>
<th>Статус</th>
<th></th>
</tr>
</thead>
<tbody id="tbody"></tbody>
</table>
</div>
</main>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/channels.js"></script>
</body>
</html>

View File

@@ -0,0 +1,43 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>👥 HR · Дашборд — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot</h1>
<nav id="nav-section"></nav>
</header>
<main>
<div class="row">
<h2 id="page-heading">Дашборд</h2>
<div class="spacer"></div>
<button id="poll-all">Опросить все каналы подраздела</button>
</div>
<div class="stats-grid" id="stats"></div>
<h3>Каналы подраздела</h3>
<div class="card">
<table>
<thead>
<tr>
<th>Канал</th>
<th>Сообщений</th>
<th>Последнее сообщение</th>
<th>Последний опрос</th>
<th>Статус</th>
</tr>
</thead>
<tbody id="channels-tbody"></tbody>
</table>
</div>
</main>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/dashboard.js"></script>
</body>
</html>

View File

@@ -0,0 +1,78 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>👥 HR · Сообщения — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot</h1>
<nav id="nav-section"></nav>
</header>
<main>
<h2 id="page-heading">Сообщения подраздела</h2>
<div class="toolbar card">
<select id="channel-filter">
<option value="">Все каналы подраздела</option>
</select>
<input type="search" id="search" placeholder="Поиск по тексту..." />
<select id="hr-kind">
<option value="">Любой тип лида</option>
<option value="any">👥 HR (любой)</option>
<option value="vacancy">📢 Вакансия (наниматель)</option>
<option value="resume">📄 Резюме (соискатель)</option>
<option value="contact">📇 Лид-контакт</option>
</select>
<label class="row" style="gap:6px">
<input type="checkbox" id="leads-only" />
<span class="muted">🎯 Только лиды (ИИ)</span>
</label>
<select id="min-confidence" title="Минимальная уверенность ИИ">
<option value="0.3">0.3+</option>
<option value="0.5" selected>0.5+</option>
<option value="0.7">0.7+</option>
<option value="0.9">0.9+</option>
</select>
<label class="row" style="gap:6px">
<input type="checkbox" id="has-phone" />
<span class="muted">📞 С телефоном</span>
</label>
<select id="limit">
<option value="25">25</option>
<option value="50" selected>50</option>
<option value="100">100</option>
<option value="200">200</option>
</select>
<div class="spacer"></div>
<label class="row" style="gap:6px">
<input type="checkbox" id="autorefresh" />
<span class="muted">Автообновление</span>
</label>
<button id="refresh" class="secondary">Обновить</button>
</div>
<div class="card" id="list"></div>
<div class="pagination">
<button id="prev" class="secondary">← Назад</button>
<span class="muted" id="page-info" style="align-self:center"></span>
<button id="next" class="secondary">Вперёд →</button>
</div>
</main>
<dialog id="raw-dialog">
<h3 style="margin-top:0">Сообщение</h3>
<pre id="raw-content"></pre>
<div class="row" style="justify-content:flex-end; margin-top:12px">
<button class="secondary" id="raw-close">Закрыть</button>
</div>
</dialog>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/messages.js"></script>
</body>
</html>

View File

@@ -0,0 +1,66 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>👥 HR · Настройки — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot</h1>
<nav id="nav-section"></nav>
</header>
<main>
<h2 id="page-heading">Настройки подраздела</h2>
<div class="card" style="margin-bottom:24px">
<h3 style="margin-top:0">Текущая конфигурация</h3>
<table>
<tbody id="config-tbody">
<tr><td colspan="2" class="empty">Загрузка...</td></tr>
</tbody>
</table>
<div class="muted" style="font-size:12px; margin-top:12px">
Параметры задаются через переменные окружения (<span class="mono">.env</span>).
Для изменения отредактируйте <span class="mono">.env</span> и перезапустите контейнер:
<span class="mono">docker compose restart app</span>.
</div>
</div>
<div class="card" style="margin-bottom:24px">
<h3 style="margin-top:0">Действия</h3>
<div class="row">
<button id="poll-all">Опросить все каналы подраздела сейчас</button>
<a href="/api/monitoring-tg/docs" target="_blank" class="badge">OpenAPI / Swagger</a>
<a href="/api/monitoring-tg/healthz" target="_blank" class="badge">Health check</a>
</div>
</div>
<div class="card" style="margin-bottom:24px">
<h3 style="margin-top:0">🤖 Промпт ИИ</h3>
<div class="row" style="margin-bottom:8px">
<span class="badge" id="prompt-status"></span>
<span class="muted" id="prompt-length"></span>
<div class="spacer"></div>
<select id="prompt-level" title="Уровень редактирования промпта">
<option value="section" selected>Промпт подраздела</option>
<option value="vertical">Промпт вертикали</option>
</select>
<button id="prompt-reset" class="secondary">Сбросить уровень</button>
<button id="prompt-save">Сохранить</button>
</div>
<textarea id="prompt-editor" rows="22"
style="width:100%; font-family:ui-monospace, SFMono-Regular, Menlo, monospace; font-size:12px"></textarea>
<div class="muted" style="font-size:12px; margin-top:8px">
Каскад: <strong>section → vertical → default</strong>. Если промпта на
уровне подраздела нет, используется промпт вертикали; если и его нет —
встроенный по умолчанию. Сохранение применится в течение ~5 сек.
</div>
</div>
</main>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/settings.js"></script>
</body>
</html>

View File

@@ -0,0 +1,76 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>parser-tg-bot — выбор раздела</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
<style>
.chooser {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 16px;
max-width: 880px;
margin: 32px auto 0;
}
.chooser .tile {
display: flex;
flex-direction: column;
gap: 8px;
padding: 28px 24px;
border-radius: 12px;
border: 1px solid var(--border);
background: var(--panel);
color: var(--text);
text-decoration: none;
transition: transform 0.08s, border-color 0.1s;
}
.chooser .tile:hover {
border-color: var(--accent);
transform: translateY(-2px);
}
.chooser .tile .emoji { font-size: 40px; }
.chooser .tile .title { font-size: 18px; font-weight: 600; }
.chooser .tile .hint { color: var(--muted); font-size: 13px; }
</style>
</head>
<body>
<header>
<h1>parser-tg-bot</h1>
<nav>
<a href="/api/monitoring-tg/" class="active">Разделы</a>
<a class="admin-login-link" href="/api/monitoring-tg/admin.html">Админ</a>
<a class="admin-link" href="/api/monitoring-tg/auth.html">Авторизация</a>
<a class="admin-link" href="/api/monitoring-tg/docs" target="_blank">API</a>
</nav>
</header>
<script type="module" src="/api/monitoring-tg/static/js/access.js"></script>
<main>
<h2>Выберите вертикаль</h2>
<p class="muted">
У каждой вертикали — свои подразделы (например, «Дубай», «Москва»
внутри Недвижимости, или «IT», «Продажи» внутри HR). Канал привязан
к одному подразделу одной вертикали.
</p>
<div class="chooser">
<a class="tile" href="/api/monitoring-tg/real-estate/">
<div class="emoji">🏠</div>
<div class="title">Недвижимость</div>
<div class="hint">
Объявления о покупке, продаже и аренде квартир, домов, апартаментов,
земли, коммерции. RU / EN / арабский — любой язык.
</div>
</a>
<a class="tile" href="/api/monitoring-tg/hr/">
<div class="emoji">👥</div>
<div class="title">HR / Кадры</div>
<div class="hint">
Вакансии (наниматели), резюме (соискатели) и короткие лиды-контакты
с указанием профессии и контактов.
</div>
</a>
</div>
</main>
</body>
</html>

View File

@@ -0,0 +1,41 @@
// Ask the backend whether this client is on the admin allowlist and hide
// admin-only nav links if not. The backend independently enforces the
// allowlist on every admin endpoint, so this is purely cosmetic — it just
// removes dead controls from the UI for non-admin visitors.
let _adminPromise = null;
export function isAdmin() {
if (!_adminPromise) {
_adminPromise = fetch("/api/monitoring-tg/api/v1/access/me")
.then(r => r.ok ? r.json() : { is_admin: false })
.then(d => !!d.is_admin)
.catch(() => false);
}
return _adminPromise;
}
export function adminStatus() {
return fetch("/api/monitoring-tg/api/v1/access/me")
.then(r => r.ok ? r.json() : { is_admin: false, admin_ip_allowed: false })
.catch(() => ({ is_admin: false, admin_ip_allowed: false }));
}
adminStatus().then(status => {
const admin = !!status.is_admin;
const canOpenAdmin = !!status.admin_ip_allowed;
if (admin) return;
// Remove any `.admin-link` from the DOM. Works for both server-rendered
// navs (auth.html, chooser pages) and JS-built navs (nav.js fires before
// its own write, but DOMContentLoaded ordering means the elements appear
// after — handle via a MutationObserver for late insertions).
const hide = () => {
document.querySelectorAll(".admin-link").forEach(el => el.remove());
document.querySelectorAll(".admin-only").forEach(el => el.remove());
if (!canOpenAdmin) {
document.querySelectorAll(".admin-login-link").forEach(el => el.remove());
}
};
hide();
const mo = new MutationObserver(hide);
mo.observe(document.body, { childList: true, subtree: true });
});

View File

@@ -0,0 +1,49 @@
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
import "/api/monitoring-tg/static/js/access.js";
const form = document.getElementById("admin-form");
const password = document.getElementById("admin-password");
const statusEl = document.getElementById("admin-status");
const logoutBtn = document.getElementById("admin-logout");
function returnUrl() {
const params = new URLSearchParams(location.search);
return params.get("return") || "/";
}
async function refresh() {
const status = await api.accessMe();
if (status.is_admin) {
statusEl.textContent = "Админ-доступ активен.";
form.hidden = true;
logoutBtn.hidden = false;
} else if (!status.admin_password_enabled) {
statusEl.textContent = "Админ пароль не задан. Доступ управляется IP-allowlist.";
form.hidden = true;
logoutBtn.hidden = true;
} else {
statusEl.textContent = "Введите админ пароль, чтобы открыть админские функции.";
form.hidden = false;
logoutBtn.hidden = true;
setTimeout(() => password.focus(), 30);
}
}
form.addEventListener("submit", async (e) => {
e.preventDefault();
try {
await api.adminLogin(password.value);
password.value = "";
toast("Админ-доступ открыт", "success");
location.href = returnUrl();
} catch (err) {
toast(err.message, "error");
}
});
logoutBtn.addEventListener("click", async () => {
await api.adminLogout();
location.reload();
});
refresh().catch(err => toast(err.message, "error"));

View File

@@ -0,0 +1,192 @@
import { getVertical, getSection } from "/api/monitoring-tg/static/js/vertical.js";
const BASE = "/api/monitoring-tg/api/v1";
let sectionLoginPromise = null;
async function unlockCurrentSection() {
if (sectionLoginPromise) return sectionLoginPromise;
sectionLoginPromise = (async () => {
const vertical = getVertical();
const section = getSection();
if (!section) return false;
const code = prompt(`Введите код подраздела "${section}"`);
if (!code) return false;
await request("/access/section-login", {
method: "POST",
body: JSON.stringify({ vertical, section, code }),
sectionRetry: false,
});
return true;
})();
try {
return await sectionLoginPromise;
} finally {
sectionLoginPromise = null;
}
}
async function request(path, options = {}) {
const { sectionRetry = true, ...fetchOptions } = options;
const res = await fetch(BASE + path, {
headers: { "Content-Type": "application/json" },
...fetchOptions,
});
if (!res.ok) {
let detail = res.statusText;
try { detail = (await res.json()).detail || detail; } catch {}
if (res.status === 401 && detail === "section code required" && sectionRetry) {
if (await unlockCurrentSection()) {
return request(path, { ...options, sectionRetry: false });
}
}
throw new Error(`${res.status}: ${detail}`);
}
if (res.status === 204) return null;
return res.json();
}
// Build a query string scoped to the current (vertical, section). The
// section is intentionally optional — pages at /<vertical>/ (chooser)
// pass null so they see all sections, while pages inside a section
// always carry their section slug.
function qs(extra = {}, { vertical, section } = {}) {
const params = new URLSearchParams();
params.set("vertical", vertical ?? getVertical());
const s = section === undefined ? getSection() : section;
if (s) params.set("section", s);
for (const [k, v] of Object.entries(extra)) {
if (v == null || v === false) continue;
params.set(k, String(v));
}
return params.toString();
}
export const api = {
accessMe: () => request("/access/me"),
adminLogin: (password) =>
request("/access/admin-login", {
method: "POST",
body: JSON.stringify({ password }),
sectionRetry: false,
}),
adminLogout: () =>
request("/access/admin-logout", { method: "POST", sectionRetry: false }),
sectionLogin: ({ vertical, section, code }) =>
request("/access/section-login", {
method: "POST",
body: JSON.stringify({ vertical, section, code }),
sectionRetry: false,
}),
// Auth — section-agnostic.
authStatus: () => request("/auth/status"),
authSendCode: () => request("/auth/send-code", { method: "POST" }),
authSubmitCode: (code) =>
request("/auth/submit-code", { method: "POST", body: JSON.stringify({ code }) }),
authSubmitPassword: (password) =>
request("/auth/submit-password", { method: "POST", body: JSON.stringify({ password }) }),
authLogout: () => request("/auth/logout", { method: "POST" }),
// Sections (sub-sections within a vertical).
listSections: (vertical) => request(`/sections?${qs({}, { vertical, section: null })}`),
createSection: ({ vertical, slug, title, emoji, description, accessCode }) =>
request("/sections", {
method: "POST",
body: JSON.stringify({
vertical: vertical ?? getVertical(),
slug, title, emoji, description, access_code: accessCode,
}),
}),
updateSection: (vertical, slug, patch) =>
request(`/sections/${encodeURIComponent(vertical)}/${encodeURIComponent(slug)}`, {
method: "PATCH",
body: JSON.stringify(patch),
}),
deleteSection: (vertical, slug) =>
request(`/sections/${encodeURIComponent(vertical)}/${encodeURIComponent(slug)}`, {
method: "DELETE",
}),
// Scoped reads: implicit (vertical, section) from URL.
globalStats: (scope) => request(`/stats?${qs({}, scope)}`),
listChannels: (scope) => request(`/channels?${qs({}, scope)}`),
getChannel: (id, scope) => request(`/channels/${id}?${qs({}, scope)}`),
channelStats: (id, scope) => request(`/channels/${id}/stats?${qs({}, scope)}`),
addChannel: (identifier, scope = {}) => {
const vertical = scope.vertical ?? getVertical();
const section = scope.section === undefined ? getSection() : scope.section;
if (!section) {
throw new Error("addChannel requires a section context");
}
return request("/channels", {
method: "POST",
body: JSON.stringify({ identifier, vertical, section }),
});
},
updateChannel: (id, patch, scope) =>
request(`/channels/${id}?${qs({}, scope)}`, {
method: "PATCH", body: JSON.stringify(patch),
}),
deleteChannel: (id, scope) =>
request(`/channels/${id}?${qs({}, scope)}`, { method: "DELETE" }),
pollChannel: (id, scope) =>
request(`/channels/${id}/poll?${qs({}, scope)}`, { method: "POST" }),
backfillMedia: (id, batch = 50, scope) =>
request(`/channels/${id}/backfill-media?${qs({ batch }, scope)}`, { method: "POST" }),
reanalyze: (id, batch = 500, scope) =>
request(`/channels/${id}/reanalyze?${qs({ batch }, scope)}`, { method: "POST" }),
pollAll: (scope) => request(`/poll?${qs({}, scope)}`, { method: "POST" }),
listMessages: ({ channelId, q, realEstate, hrKind, hasPhone, leadsOnly,
minConfidence, limit = 50, offset = 0,
vertical, section } = {}) => {
const extra = { limit, offset };
if (channelId) extra.channel_id = channelId;
if (q) extra.q = q;
if (realEstate) extra.real_estate = realEstate;
if (hrKind) extra.hr_kind = hrKind;
if (hasPhone) extra.has_phone = "true";
if (leadsOnly) {
extra.leads_only = "true";
if (minConfidence != null) extra.min_confidence = minConfidence;
}
return request(`/messages?${qs(extra, { vertical, section })}`);
},
getMessage: (id, scope) => request(`/messages/${id}?${qs({}, scope)}`),
llmStatus: () => request("/llm/status"),
llmQueue: (scope) => request(`/llm/queue?${qs({}, scope)}`),
llmPromptGet: (scope) => request(`/llm/prompt?${qs({}, scope)}`),
llmPromptSave: (prompt, scope) =>
request(`/llm/prompt?${qs({}, scope)}`, {
method: "PUT", body: JSON.stringify({ prompt }),
}),
llmPromptReset: (scope) =>
request(`/llm/prompt?${qs({}, scope)}`, { method: "DELETE" }),
};
export function toast(message, type = "info") {
const el = document.createElement("div");
el.className = `toast ${type}`;
el.textContent = message;
document.body.appendChild(el);
setTimeout(() => el.remove(), 3500);
}
export function fmtDate(iso) {
if (!iso) return "—";
const d = new Date(iso);
return d.toLocaleString();
}
export function fmtRelative(iso) {
if (!iso) return "—";
const d = new Date(iso);
const diff = (Date.now() - d.getTime()) / 1000;
if (diff < 60) return `${Math.floor(diff)}s ago`;
if (diff < 3600) return `${Math.floor(diff / 60)}m ago`;
if (diff < 86400) return `${Math.floor(diff / 3600)}h ago`;
return `${Math.floor(diff / 86400)}d ago`;
}

View File

@@ -0,0 +1,120 @@
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
const returnTo = (() => {
const raw = new URLSearchParams(location.search).get("return");
// Only allow same-origin relative paths to avoid open-redirect via ?return=
if (raw && raw.startsWith("/") && !raw.startsWith("//")) return raw;
return null;
})();
const returnLink = document.getElementById("return-link");
if (returnLink && returnTo) {
returnLink.href = returnTo;
returnLink.querySelector("button").textContent = "← Вернуться";
}
const steps = ["idle", "code", "password", "done"];
function show(step) {
steps.forEach(s => {
document.getElementById(`step-${s}`).hidden = s !== step;
});
}
function setStatus(html) {
document.getElementById("status-block").innerHTML = html;
}
async function refresh() {
const status = await api.authStatus();
document.getElementById("phone").textContent = status.phone || "—";
document.getElementById("phone-2").textContent = status.phone || "—";
if (status.authorized) {
setStatus(`<div class="badge ok">Авторизовано</div>`);
document.getElementById("username").textContent = status.username || "(unnamed)";
show("done");
} else {
setStatus(`<div class="badge warn">Не авторизовано</div>`);
show("idle");
}
}
document.getElementById("btn-send").addEventListener("click", async (e) => {
e.target.disabled = true;
try {
await api.authSendCode();
toast("Код отправлен в Telegram", "success");
show("code");
document.getElementById("code").focus();
} catch (err) {
toast(err.message, "error");
} finally {
e.target.disabled = false;
}
});
document.getElementById("btn-resend").addEventListener("click", async (e) => {
e.target.disabled = true;
try {
await api.authSendCode();
toast("Новый код отправлен", "success");
} catch (err) {
toast(err.message, "error");
} finally {
e.target.disabled = false;
}
});
document.getElementById("form-code").addEventListener("submit", async (e) => {
e.preventDefault();
const code = document.getElementById("code").value.trim();
const btn = e.target.querySelector("button");
btn.disabled = true;
try {
const res = await api.authSubmitCode(code);
if (res.needs_password) {
toast("Введи 2FA-пароль", "success");
show("password");
document.getElementById("password").focus();
} else {
toast("Готово", "success");
await refresh();
}
} catch (err) {
toast(err.message, "error");
} finally {
btn.disabled = false;
}
});
document.getElementById("form-password").addEventListener("submit", async (e) => {
e.preventDefault();
const password = document.getElementById("password").value;
const btn = e.target.querySelector("button");
btn.disabled = true;
try {
await api.authSubmitPassword(password);
toast("Авторизовано", "success");
document.getElementById("password").value = "";
await refresh();
} catch (err) {
toast(err.message, "error");
} finally {
btn.disabled = false;
}
});
document.getElementById("btn-logout").addEventListener("click", async (e) => {
if (!confirm("Выйти из Telegram-сессии?")) return;
e.target.disabled = true;
try {
await api.authLogout();
toast("Сессия завершена", "success");
await refresh();
} catch (err) {
toast(err.message, "error");
} finally {
e.target.disabled = false;
}
});
refresh().catch(err => toast(err.message, "error"));

View File

@@ -0,0 +1,132 @@
import { api, toast, fmtRelative } from "/api/monitoring-tg/static/js/api.js";
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
import { getVertical, getSection, sectionBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
const V = getVertical();
const section = getSection();
const sBase = sectionBase();
const meta = VERTICAL_META[V];
function escape(s) {
if (s == null) return "";
return String(s).replace(/[&<>"']/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
}
async function load() {
const admin = await isAdmin();
const channels = await api.listChannels();
const tbody = document.getElementById("tbody");
if (!channels.length) {
tbody.innerHTML = `<tr><td colspan="7" class="empty">Каналов пока нет</td></tr>`;
return;
}
const stats = await Promise.all(channels.map(c => api.channelStats(c.id).catch(() => null)));
tbody.innerHTML = channels.map((c, i) => {
const s = stats[i] || {};
return `
<tr data-id="${c.id}">
<td class="muted mono">${c.id}</td>
<td>
<div>${escape(c.title || "—")}</div>
<div class="muted mono" style="font-size:12px">${escape(c.identifier)}</div>
</td>
<td class="mono muted">${c.tg_id ?? "—"}</td>
<td>${(s.message_count ?? 0).toLocaleString()}</td>
<td>${fmtRelative(c.last_polled_at)}</td>
<td>
<label class="row" style="gap:6px">
<input type="checkbox" data-action="toggle" ${c.is_active ? "checked" : ""} ${admin ? "" : "disabled"} />
<span class="badge ${c.is_active ? "ok" : "off"}">${c.is_active ? "on" : "off"}</span>
</label>
</td>
<td>
<div class="row" style="gap:6px">
<a href="${sBase}/messages.html?channel_id=${c.id}" class="badge">сообщения</a>
${admin ? `
<button class="secondary" data-action="poll">Опросить</button>
<button class="secondary" data-action="backfill-media">Подкачать медиа</button>
<button class="secondary" data-action="reanalyze">Переанализировать</button>
<button class="danger" data-action="delete">Удалить</button>
` : ""}
</div>
</td>
</tr>`;
}).join("");
}
document.getElementById("add-form").addEventListener("submit", async (e) => {
e.preventDefault();
const input = document.getElementById("identifier");
const id = input.value.trim();
if (!id) return;
const btn = e.target.querySelector("button");
btn.disabled = true;
try {
await api.addChannel(id);
const where = section ? `${meta.short} / ${section}` : meta.short;
toast(`Канал добавлен в "${where}"`, "success");
input.value = "";
await load();
} catch (err) {
toast(err.message, "error");
} finally {
btn.disabled = false;
}
});
document.getElementById("tbody").addEventListener("click", async (e) => {
const btn = e.target.closest("[data-action]");
if (!btn) return;
const tr = btn.closest("tr");
const id = Number(tr.dataset.id);
const action = btn.dataset.action;
try {
if (action === "delete") {
if (!confirm("Удалить канал и все его сообщения?")) return;
await api.deleteChannel(id);
toast("Удалено", "success");
await load();
} else if (action === "poll") {
btn.disabled = true;
const res = await api.pollChannel(id);
toast(`Добавлено ${res.inserted} сообщений`, "success");
await load();
} else if (action === "backfill-media") {
btn.disabled = true;
let totalUpdated = 0;
let pending = Infinity;
while (pending > 0) {
btn.textContent = `Качаю... (готово: ${totalUpdated})`;
const res = await api.backfillMedia(id, 50);
totalUpdated += res.updated;
pending = res.pending;
if (res.updated === 0) break;
}
btn.textContent = "Подкачать медиа";
toast(`Подкачано ${totalUpdated}, осталось ${pending}`, "success");
} else if (action === "reanalyze") {
btn.disabled = true;
let total = 0;
let pending = Infinity;
while (pending > 0) {
btn.textContent = `Анализирую... (${total})`;
const res = await api.reanalyze(id, 500);
total += res.updated;
pending = res.pending;
if (res.updated === 0) break;
}
btn.textContent = "Переанализировать";
toast(`Проанализировано ${total} сообщений, осталось ${pending}`, "success");
} else if (action === "toggle") {
const isActive = btn.checked;
await api.updateChannel(id, { is_active: isActive });
toast(isActive ? "Канал включён" : "Канал выключен", "success");
await load();
}
} catch (err) {
toast(err.message, "error");
await load();
}
});
load().catch(err => toast(err.message, "error"));

View File

@@ -0,0 +1,87 @@
import { api, toast, fmtRelative } from "/api/monitoring-tg/static/js/api.js";
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
import { getVertical, getSection, sectionBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
const V = getVertical();
const section = getSection();
const sBase = sectionBase();
const meta = VERTICAL_META[V];
function escape(s) {
if (s == null) return "";
return String(s).replace(/[&<>"']/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
}
async function loadStats() {
const [stats, llm, queue] = await Promise.all([
api.globalStats(),
api.llmStatus().catch(() => ({ enabled: false, ready: false, model: "—" })),
api.llmQueue().catch(() => ({ pending: null })),
]);
const grid = document.getElementById("stats");
const llmBadge = llm.enabled
? (llm.ready ? `<span class="badge ok">ready</span>` : `<span class="badge warn">загружается</span>`)
: `<span class="badge off">off</span>`;
const queueValue = queue.pending == null ? "—" : queue.pending.toLocaleString();
grid.innerHTML = `
<div class="card stat"><div class="label">Каналы</div><div class="value">${stats.channels_active} / ${stats.channels_total}</div></div>
<div class="card stat"><div class="label">Сообщений всего</div><div class="value">${stats.messages_total.toLocaleString()}</div></div>
<div class="card stat"><div class="label">Сообщений за 24ч</div><div class="value">${stats.messages_last_24h.toLocaleString()}</div></div>
<div class="card stat"><div class="label">🎯 Лидов всего</div><div class="value">${(stats.leads_total ?? 0).toLocaleString()}</div></div>
<div class="card stat"><div class="label">🎯 Лидов за 24ч</div><div class="value"><a href="${sBase}/messages.html?leads_only=true">${(stats.leads_last_24h ?? 0).toLocaleString()}</a></div></div>
<div class="card stat"><div class="label">⏳ В очереди ИИ</div><div class="value">${queueValue}</div></div>
<div class="card stat"><div class="label">Период опроса</div><div class="value">${stats.poll_interval_seconds}s</div></div>
<div class="card stat"><div class="label">Последний опрос</div><div class="value">${fmtRelative(stats.last_poll_at)}</div></div>
<div class="card stat"><div class="label">Локальный ИИ</div><div class="value" style="font-size:14px">${llmBadge}<div class="muted mono" style="font-size:11px;margin-top:4px">${escape(llm.model || "")}</div></div></div>
`;
}
async function loadChannels() {
const channels = await api.listChannels();
const tbody = document.getElementById("channels-tbody");
if (!channels.length) {
tbody.innerHTML = `<tr><td colspan="5" class="empty">Каналов в этом подразделе пока нет — добавьте их на странице <a href="${sBase}/channels.html">Каналы</a></td></tr>`;
return;
}
const stats = await Promise.all(channels.map(c => api.channelStats(c.id).catch(() => null)));
tbody.innerHTML = channels.map((c, i) => {
const s = stats[i] || {};
return `
<tr>
<td>
<div><a href="${sBase}/messages.html?channel_id=${c.id}">${escape(c.title || c.identifier)}</a></div>
<div class="muted mono" style="font-size:12px">${escape(c.identifier)}</div>
</td>
<td>${(s.message_count ?? 0).toLocaleString()}</td>
<td>${fmtRelative(s.last_message_at)}</td>
<td>${fmtRelative(c.last_polled_at)}</td>
<td>${c.is_active ? '<span class="badge ok">on</span>' : '<span class="badge off">off</span>'}</td>
</tr>`;
}).join("");
}
document.getElementById("poll-all").addEventListener("click", async (e) => {
e.target.disabled = true;
try {
const res = await api.pollAll();
const scope = section ? `${meta.short} / ${section}` : meta.short;
toast(`В очереди ${res.queued ?? 0} каналов (${scope}) — опрос идёт в фоне`, "success");
await loadAll();
} catch (err) {
toast(err.message, "error");
} finally {
e.target.disabled = false;
}
});
async function loadAll() {
try {
document.getElementById("poll-all").hidden = !(await isAdmin());
await Promise.all([loadStats(), loadChannels()]);
} catch (err) {
toast(err.message, "error");
}
}
loadAll();
setInterval(loadAll, 15000);

View File

@@ -0,0 +1,433 @@
import { api, toast, fmtDate } from "/api/monitoring-tg/static/js/api.js";
import { getVertical, getSection, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
const V = getVertical();
const section = getSection();
const meta = VERTICAL_META[V];
const state = {
offset: 0,
limit: 50,
channelId: null,
q: "",
realEstate: "",
hrKind: "",
hasPhone: false,
leadsOnly: false,
minConfidence: 0.5,
channels: [],
autorefresh: false,
timer: null,
};
function escape(s) {
if (s == null) return "";
return String(s).replace(/[&<>"']/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
}
function highlight(text, q) {
if (!q || !text) return escape(text);
const escaped = escape(text);
const re = new RegExp(escape(q).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "gi");
return escaped.replace(re, m => `<mark style="background:#f1c40f33;color:inherit">${m}</mark>`);
}
function channelTitle(id) {
const c = state.channels.find(c => c.id === id);
return c ? (c.title || c.identifier) : `#${id}`;
}
function fmtSize(bytes) {
if (bytes == null) return "";
if (bytes < 1024) return `${bytes}B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)}KB`;
return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
}
const REAL_ESTATE_LABELS = { sale: "продажа", rent: "аренда", purchase: "покупка" };
const HR_KIND_LABELS = { vacancy: "вакансия", resume: "резюме", contact: "контакт" };
function senderContacts(m) {
const contacts = [];
if (m && m.post_url) {
contacts.push(`<a class="badge tg-link" href="${escape(m.post_url)}" target="_blank">📬 Открыть в Telegram</a>`);
}
if (m && m.sender_username) {
const u = m.sender_username.startsWith("@") ? m.sender_username : "@" + m.sender_username;
contacts.push(`<a class="badge tg" href="https://t.me/${escape(m.sender_username.replace(/^@/, ""))}" target="_blank">✉️ ${escape(u)}</a>`);
} else if (m && m.sender_name) {
contacts.push(`<span class="badge name">✍️ ${escape(m.sender_name)}</span>`);
}
const handles = (m && m.extracted && m.extracted.tg_handles) || [];
for (const h of handles) {
const bare = h.replace(/^@/, "");
contacts.push(`<a class="badge tg" href="https://t.me/${escape(bare)}" target="_blank">✉️ ${escape(h)}</a>`);
}
return contacts;
}
function renderReLead(lead, m) {
if (!lead || !lead.is_listing) return "";
const tone =
lead.confidence >= 0.7 ? "lead-strong" :
lead.confidence >= 0.4 ? "lead-medium" : "lead-weak";
const bits = [];
if (lead.kind) bits.push(REAL_ESTATE_LABELS[lead.kind] || lead.kind);
if (lead.property_type) bits.push(lead.property_type);
if (lead.rooms) bits.push(lead.rooms);
if (lead.area_m2) bits.push(`${lead.area_m2} м²`);
const priceBit = lead.price_text
|| (lead.price_value != null
? `${lead.price_value.toLocaleString()}${lead.currency ? " " + lead.currency : ""}`
: null);
if (priceBit) bits.push(priceBit);
else if (lead.currency) bits.push(lead.currency);
if (lead.location) bits.push(lead.location);
const facts = bits.length
? `<div class="lead-facts">${escape(bits.join(" · "))}</div>` : "";
const summary = lead.summary
? `<div class="lead-summary">${escape(lead.summary)}</div>` : "";
const contacts = [];
if (lead.contact_phone) {
contacts.push(`<a class="badge phone" href="tel:${escape(lead.contact_phone)}">📞 ${escape(lead.contact_phone)}</a>`);
}
if (lead.contact_name) {
contacts.push(`<span class="badge name">👤 ${escape(lead.contact_name)}</span>`);
}
contacts.push(...senderContacts(m));
return `
<div class="lead-card ${tone}">
<div class="lead-head">
<span class="badge lead">🎯 ЛИД · 🏠</span>
${facts}
<span class="lead-confidence">${(lead.confidence * 100).toFixed(0)}%</span>
</div>
${summary}
${contacts.length ? `<div class="message-tags">${contacts.join(" ")}</div>` : ""}
</div>`;
}
function renderHrLead(lead, m) {
if (!lead || !lead.is_lead) return "";
const tone =
lead.confidence >= 0.7 ? "lead-strong" :
lead.confidence >= 0.4 ? "lead-medium" : "lead-weak";
const bits = [];
if (lead.kind) bits.push(HR_KIND_LABELS[lead.kind] || lead.kind);
if (lead.title) bits.push(lead.title);
if (lead.company) bits.push(lead.company);
if (lead.candidate_name) bits.push(lead.candidate_name);
if (lead.experience_years != null) bits.push(`${lead.experience_years}+ лет опыта`);
if (lead.employment_type) bits.push(lead.employment_type);
if (lead.remote === true) bits.push("удалёнка");
else if (lead.remote === false) bits.push("офис");
if (lead.location) bits.push(lead.location);
const salaryBit = lead.salary_text
|| (lead.salary_value != null
? `${lead.salary_value.toLocaleString()}${lead.currency ? " " + lead.currency : ""}`
: null);
if (salaryBit) bits.push(salaryBit);
else if (lead.currency) bits.push(lead.currency);
const facts = bits.length
? `<div class="lead-facts">${escape(bits.join(" · "))}</div>` : "";
const summary = lead.summary
? `<div class="lead-summary">${escape(lead.summary)}</div>` : "";
const skills = (lead.skills || []).slice(0, 12);
const skillsBlock = skills.length
? `<div class="message-tags">${skills.map(s => `<span class="badge">${escape(s)}</span>`).join(" ")}</div>`
: "";
const contacts = [];
if (lead.contact_phone) {
contacts.push(`<a class="badge phone" href="tel:${escape(lead.contact_phone)}">📞 ${escape(lead.contact_phone)}</a>`);
}
if (lead.contact_name) {
contacts.push(`<span class="badge name">👤 ${escape(lead.contact_name)}</span>`);
}
contacts.push(...senderContacts(m));
return `
<div class="lead-card ${tone}">
<div class="lead-head">
<span class="badge lead">🎯 ЛИД · 👥</span>
${facts}
<span class="lead-confidence">${(lead.confidence * 100).toFixed(0)}%</span>
</div>
${summary}
${skillsBlock}
${contacts.length ? `<div class="message-tags">${contacts.join(" ")}</div>` : ""}
</div>`;
}
function renderExtracted(ex) {
if (!ex) return "";
const parts = [];
const re = ex.real_estate;
const showRegexRE =
V === "real_estate" && re && !(ex.lead && ex.lead.is_listing);
if (showRegexRE) {
const bits = [];
if (re.kind) bits.push(REAL_ESTATE_LABELS[re.kind] || re.kind);
if (re.property_type) bits.push(re.property_type);
if (re.rooms) bits.push(re.rooms);
if (re.area_m2) bits.push(`${re.area_m2} м²`);
if (re.price) bits.push(re.price);
if (bits.length) parts.push(`<span class="badge re">🏠 regex: ${escape(bits.join(" · "))}</span>`);
}
// Phones/names from regex are still useful even when there's a lead — show
// only those that aren't already inside the lead card.
const inLead = new Set();
const activeLead = V === "hr" ? ex.hr_lead : ex.lead;
if (activeLead) {
if (activeLead.contact_phone) inLead.add(activeLead.contact_phone);
if (activeLead.contact_name) inLead.add(activeLead.contact_name);
}
for (const p of ex.phones || []) {
if (inLead.has(p)) continue;
parts.push(`<a class="badge phone" href="tel:${escape(p)}">📞 ${escape(p)}</a>`);
}
for (const n of (ex.names || []).slice(0, 3)) {
if (inLead.has(n)) continue;
parts.push(`<span class="badge name">👤 ${escape(n)}</span>`);
}
if ((ex.names || []).length > 3) {
parts.push(`<span class="badge name muted">+${ex.names.length - 3}</span>`);
}
const leadShown = (V === "hr" && ex.hr_lead && ex.hr_lead.is_lead) ||
(V === "real_estate" && ex.lead && ex.lead.is_listing);
if (!leadShown) {
for (const h of (ex.tg_handles || [])) {
const bare = h.replace(/^@/, "");
parts.push(`<a class="badge tg" href="https://t.me/${escape(bare)}" target="_blank">✉️ ${escape(h)}</a>`);
}
}
const tags = parts.length ? `<div class="message-tags">${parts.join(" ")}</div>` : "";
return tags;
}
function renderMedia(files) {
if (!files || !files.length) return "";
return `<div class="message-media">${files.map(f => {
if (f.skipped) {
const why = f.skipped === "too_large" ? "слишком большой" : f.skipped;
return `<div class="media-item media-skipped"><span class="badge warn">${escape(f.kind)}</span>
<span class="muted">${why}${f.size ? `, ${fmtSize(f.size)}` : ""}</span></div>`;
}
if (!f.url) return "";
if (f.kind === "photo" || f.kind === "sticker") {
return `<a href="${escape(f.url)}" target="_blank" data-action="lightbox" data-url="${escape(f.url)}">
<img class="media-thumb" src="${escape(f.url)}" loading="lazy" alt="" />
</a>`;
}
if (f.kind === "video") {
return `<video class="media-video" src="${escape(f.url)}" controls preload="metadata"></video>`;
}
if (f.kind === "audio") {
return `<audio src="${escape(f.url)}" controls preload="none" style="width:100%"></audio>`;
}
return `<a class="media-doc" href="${escape(f.url)}" target="_blank" download>
<span class="badge">${escape(f.kind)}</span>
<span>${escape(f.mime || "файл")}</span>
<span class="muted">${fmtSize(f.size)}</span>
</a>`;
}).join("")}</div>`;
}
function readUrl() {
const params = new URLSearchParams(location.search);
if (params.has("channel_id")) state.channelId = Number(params.get("channel_id"));
if (params.has("q")) state.q = params.get("q");
if (params.has("real_estate")) state.realEstate = params.get("real_estate");
if (params.has("hr_kind")) state.hrKind = params.get("hr_kind");
if (params.get("has_phone") === "true") state.hasPhone = true;
if (params.get("leads_only") === "true") state.leadsOnly = true;
if (params.has("min_confidence")) state.minConfidence = Number(params.get("min_confidence"));
}
function syncControls() {
document.getElementById("channel-filter").value = state.channelId ?? "";
document.getElementById("search").value = state.q;
const reSel = document.getElementById("real-estate");
if (reSel) reSel.value = state.realEstate;
const hrSel = document.getElementById("hr-kind");
if (hrSel) hrSel.value = state.hrKind;
document.getElementById("has-phone").checked = state.hasPhone;
document.getElementById("leads-only").checked = state.leadsOnly;
document.getElementById("min-confidence").value = String(state.minConfidence);
document.getElementById("limit").value = state.limit;
}
async function loadChannels() {
state.channels = await api.listChannels();
const sel = document.getElementById("channel-filter");
sel.innerHTML = `<option value="">Все каналы (${meta.short})</option>` + state.channels.map(c =>
`<option value="${c.id}">${escape(c.title || c.identifier)}</option>`
).join("");
syncControls();
}
async function loadMessages() {
const list = document.getElementById("list");
list.innerHTML = `<div class="empty">Загрузка...</div>`;
try {
const msgs = await api.listMessages({
channelId: state.channelId,
q: state.q || undefined,
realEstate: state.realEstate || undefined,
hrKind: state.hrKind || undefined,
hasPhone: state.hasPhone || undefined,
leadsOnly: state.leadsOnly || undefined,
minConfidence: state.leadsOnly ? state.minConfidence : undefined,
limit: state.limit,
offset: state.offset,
});
if (!msgs.length) {
list.innerHTML = `<div class="empty">Сообщений нет</div>`;
} else {
list.innerHTML = msgs.map(m => `
<div class="message" data-id="${m.id}">
<div class="message-meta">
<a href="?channel_id=${m.channel_id}">${escape(channelTitle(m.channel_id))}</a>
<span>·</span>
<span>${fmtDate(m.date)}</span>
<span>·</span>
<span class="mono">#${m.tg_message_id}</span>
${m.group_size > 1 ? `<span class="badge">альбом · ${m.group_size}</span>` : (m.has_media ? '<span class="badge">media</span>' : '')}
${m.views != null ? `<span>👁 ${m.views}</span>` : ''}
${m.forwards ? `<span>↗ ${m.forwards}</span>` : ''}
<div class="spacer"></div>
<a href="#" data-action="raw">json</a>
</div>
<div class="message-text">${m.text ? highlight(m.text, state.q) : '<span class="muted">(без текста)</span>'}</div>
${V === "hr"
? renderHrLead(m.extracted && m.extracted.hr_lead, m)
: renderReLead(m.extracted && m.extracted.lead, m)}
${renderExtracted(m.extracted)}
${renderMedia(m.media_files)}
</div>
`).join("");
}
document.getElementById("page-info").textContent =
`${state.offset + 1}${state.offset + msgs.length}`;
document.getElementById("prev").disabled = state.offset === 0;
document.getElementById("next").disabled = msgs.length < state.limit;
} catch (err) {
toast(err.message, "error");
list.innerHTML = `<div class="empty">Ошибка: ${escape(err.message)}</div>`;
}
}
document.getElementById("channel-filter").addEventListener("change", (e) => {
state.channelId = e.target.value ? Number(e.target.value) : null;
state.offset = 0;
loadMessages();
});
let searchTimer;
document.getElementById("search").addEventListener("input", (e) => {
clearTimeout(searchTimer);
searchTimer = setTimeout(() => {
state.q = e.target.value.trim();
state.offset = 0;
loadMessages();
}, 250);
});
document.getElementById("limit").addEventListener("change", (e) => {
state.limit = Number(e.target.value);
state.offset = 0;
loadMessages();
});
const reSelEl = document.getElementById("real-estate");
if (reSelEl) {
reSelEl.addEventListener("change", (e) => {
state.realEstate = e.target.value;
state.offset = 0;
loadMessages();
});
}
const hrSelEl = document.getElementById("hr-kind");
if (hrSelEl) {
hrSelEl.addEventListener("change", (e) => {
state.hrKind = e.target.value;
state.offset = 0;
loadMessages();
});
}
document.getElementById("has-phone").addEventListener("change", (e) => {
state.hasPhone = e.target.checked;
state.offset = 0;
loadMessages();
});
document.getElementById("leads-only").addEventListener("change", (e) => {
state.leadsOnly = e.target.checked;
state.offset = 0;
loadMessages();
});
document.getElementById("min-confidence").addEventListener("change", (e) => {
state.minConfidence = Number(e.target.value);
if (state.leadsOnly) {
state.offset = 0;
loadMessages();
}
});
document.getElementById("refresh").addEventListener("click", loadMessages);
document.getElementById("prev").addEventListener("click", () => {
state.offset = Math.max(0, state.offset - state.limit);
loadMessages();
});
document.getElementById("next").addEventListener("click", () => {
state.offset += state.limit;
loadMessages();
});
document.getElementById("autorefresh").addEventListener("change", (e) => {
state.autorefresh = e.target.checked;
if (state.timer) { clearInterval(state.timer); state.timer = null; }
if (state.autorefresh) state.timer = setInterval(loadMessages, 10000);
});
document.getElementById("list").addEventListener("click", async (e) => {
const lightbox = e.target.closest("[data-action='lightbox']");
if (lightbox) {
e.preventDefault();
openLightbox(lightbox.dataset.url);
return;
}
const a = e.target.closest("[data-action='raw']");
if (!a) return;
e.preventDefault();
const id = Number(a.closest(".message").dataset.id);
try {
const msg = await api.getMessage(id);
document.getElementById("raw-content").textContent = JSON.stringify(msg, null, 2);
document.getElementById("raw-dialog").showModal();
} catch (err) {
toast(err.message, "error");
}
});
function openLightbox(url) {
let lb = document.getElementById("lightbox");
if (!lb) {
lb = document.createElement("div");
lb.id = "lightbox";
lb.addEventListener("click", () => lb.remove());
document.body.appendChild(lb);
}
lb.innerHTML = `<img src="${escape(url)}" alt="" />`;
}
document.getElementById("raw-close").addEventListener("click", () => {
document.getElementById("raw-dialog").close();
});
readUrl();
(async () => {
await loadChannels();
await loadMessages();
})();

View File

@@ -0,0 +1,25 @@
import { api } from "/api/monitoring-tg/static/js/api.js";
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
import { appBase } from "/api/monitoring-tg/static/js/vertical.js";
// "Telegram not authorized" banner. Only useful for admins — non-admin
// visitors can't open /auth.html anyway, so showing the banner would be
// noise (and the /auth/status call itself 404s for non-admins).
(async () => {
if (!(await isAdmin())) return;
try {
const status = await api.authStatus();
if (status.authorized) return;
const banner = document.createElement("div");
banner.className = "card";
banner.style.cssText =
"border-color: rgba(241, 196, 15, 0.5); background: rgba(241, 196, 15, 0.08); margin-bottom: 16px;";
banner.innerHTML = `
<strong>Telegram не авторизован.</strong>
Парсер не сможет ходить за сообщениями, пока вы не залогинитесь.
<a href="${appBase()}/auth.html?return=${encodeURIComponent(location.pathname)}">Открыть страницу авторизации →</a>
`;
const main = document.querySelector("main");
if (main) main.insertBefore(banner, main.firstChild);
} catch {}
})();

View File

@@ -0,0 +1,71 @@
import { api } from "/api/monitoring-tg/static/js/api.js";
// Import for side-effect: access.js hides .admin-link elements for non-admins.
import "/api/monitoring-tg/static/js/access.js";
import {
VERTICAL_META,
appBase,
getVertical,
getSection,
verticalBase,
sectionBase,
} from "/api/monitoring-tg/static/js/vertical.js";
const V = getVertical();
const section = getSection();
const meta = VERTICAL_META[V];
const titleEl = document.getElementById("page-title");
if (titleEl) {
titleEl.textContent = section
? `parser-tg-bot · ${meta.emoji} ${meta.short} · ${section}`
: `parser-tg-bot · ${meta.emoji} ${meta.short}`;
}
const navEl = document.getElementById("nav-section");
if (navEl) {
const here = location.pathname;
const active = (href) => here === href ? "active" : "";
const links = [];
// Up-link: chooser if we are inside a section, vertical-list otherwise.
if (section) {
links.push(`<a href="${verticalBase()}/">← ${meta.short} (подразделы)</a>`);
} else {
links.push(`<a href="${appBase()}/">← Разделы</a>`);
}
if (section) {
const sBase = sectionBase();
links.push(
`<a href="${sBase}/" class="${active(sBase + '/')}">Дашборд</a>`,
`<a href="${sBase}/channels.html" class="${active(sBase + '/channels.html')}">Каналы</a>`,
`<a href="${sBase}/messages.html" class="${active(sBase + '/messages.html')}">Сообщения</a>`,
`<a href="${sBase}/settings.html" class="admin-only ${active(sBase + '/settings.html')}">Настройки</a>`,
);
}
links.push(
`<a class="admin-login-link" href="${appBase()}/admin.html?return=${encodeURIComponent(location.pathname)}">Админ</a>`,
`<a class="admin-link" href="${appBase()}/auth.html">Авторизация</a>`,
`<a class="admin-link" href="${appBase()}/docs" target="_blank">API</a>`,
);
navEl.innerHTML = links.join("");
}
// Best-effort: resolve section's display title from the API and update the
// page heading. Falls back to the raw slug if the network call fails.
const headingEl = document.getElementById("page-heading");
if (headingEl && section) {
api.listSections(V)
.then(sections => {
const s = sections.find(x => x.slug === section);
if (s) {
const baseText = headingEl.dataset.base || headingEl.textContent;
headingEl.dataset.base = baseText;
headingEl.textContent = `${baseText} · ${s.emoji ? s.emoji + " " : ""}${s.title}`;
}
})
.catch(() => {});
}
export { section, V, meta };

View File

@@ -0,0 +1,202 @@
import { api, toast } from "/api/monitoring-tg/static/js/api.js";
import { isAdmin } from "/api/monitoring-tg/static/js/access.js";
import { getVertical, verticalBase, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
import { slugify } from "/api/monitoring-tg/static/js/slugify.js";
const V = getVertical();
const base = verticalBase(V);
const meta = VERTICAL_META[V];
let sectionsBySlug = new Map();
function escape(s) {
if (s == null) return "";
return String(s).replace(/[&<>"']/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
}
async function render() {
const grid = document.getElementById("sections-grid");
grid.innerHTML = `<div class="empty">Загрузка...</div>`;
try {
const admin = await isAdmin();
const sections = await api.listSections(V);
sectionsBySlug = new Map(sections.map(s => [s.slug, s]));
if (!sections.length) {
grid.innerHTML = `<div class="empty">Подразделов пока нет — нажми «+ Новый подраздел»</div>`;
return;
}
grid.innerHTML = `<div class="sections-grid">${sections.map(s => `
<div class="card section-tile" data-slug="${escape(s.slug)}">
<a href="${base}/${encodeURIComponent(s.slug)}/" class="section-tile-link">
<div class="section-tile-head">
<span class="section-emoji">${escape(s.emoji || meta.emoji)}</span>
<span class="section-title">${escape(s.title)}</span>
</div>
<div class="section-stats">
<span title="Каналов (активных/всего)"><b>${s.channels_active}</b> / ${s.channels_total} каналов</span>
<span title="Сообщений всего">${s.messages_total.toLocaleString()} сообщ.</span>
<span title="🎯 Лидов">${s.leads_total.toLocaleString()} лидов</span>
</div>
${s.description ? `<div class="section-desc muted">${escape(s.description)}</div>` : ""}
${admin ? `<div class="section-code mono">Код: ${escape(s.access_code || "не задан")}</div>` : ""}
<div class="section-slug muted mono">${escape(V)} / ${escape(s.slug)}</div>
</a>
${admin ? `
<div class="row admin-only" style="justify-content:flex-end; gap:8px; margin-top:8px">
<button class="secondary" data-action="edit">Переименовать</button>
<button class="danger" data-action="delete">Удалить</button>
</div>
` : ""}
</div>
`).join("")}</div>`;
} catch (err) {
toast(err.message, "error");
grid.innerHTML = `<div class="empty">Ошибка: ${escape(err.message)}</div>`;
}
}
// --- Create-section dialog with auto-slug -------------------------------
const titleInput = document.getElementById("new-title");
const slugInput = document.getElementById("new-slug");
const slugPreview = document.getElementById("new-slug-preview");
const slugManualToggle = document.getElementById("new-slug-manual");
// Track whether the user has taken manual control of the slug. As soon as
// they touch the slug field directly, stop auto-syncing it.
let slugIsAuto = true;
function syncSlugFromTitle() {
if (!slugIsAuto) return;
const proposed = slugify(titleInput.value);
slugInput.value = proposed;
if (slugPreview) {
slugPreview.textContent = proposed || "(введите название)";
}
}
if (titleInput) {
titleInput.addEventListener("input", syncSlugFromTitle);
}
if (slugInput) {
slugInput.addEventListener("input", () => { slugIsAuto = false; });
}
if (slugManualToggle) {
slugManualToggle.addEventListener("click", (e) => {
e.preventDefault();
const hidden = slugInput.closest(".slug-row");
if (hidden) hidden.hidden = !hidden.hidden;
slugInput.focus();
});
}
function resetForm() {
document.getElementById("create-form").reset();
slugIsAuto = true;
if (slugPreview) slugPreview.textContent = "(введите название)";
if (slugInput) slugInput.value = "";
const hidden = slugInput?.closest(".slug-row");
if (hidden) hidden.hidden = true;
}
document.getElementById("open-create").addEventListener("click", () => {
resetForm();
document.getElementById("create-dialog").showModal();
setTimeout(() => titleInput?.focus(), 50);
});
document.getElementById("create-cancel").addEventListener("click", () => {
document.getElementById("create-dialog").close();
});
document.getElementById("edit-cancel").addEventListener("click", () => {
document.getElementById("edit-dialog").close();
});
document.getElementById("create-form").addEventListener("submit", async (e) => {
e.preventDefault();
const title = titleInput.value.trim();
if (!title) return;
// Re-sync once more in case `input` didn't fire before submit (autofill).
if (slugIsAuto) syncSlugFromTitle();
const slug = slugInput.value.trim() || slugify(title);
if (!slug) {
toast("Не удалось сформировать slug — введите его вручную", "error");
return;
}
const emoji = document.getElementById("new-emoji").value.trim() || null;
const accessCode = document.getElementById("new-access-code").value.trim();
if (accessCode.length < 3) {
toast("Код доступа должен быть не короче 3 символов", "error");
return;
}
const description = document.getElementById("new-description").value.trim() || null;
try {
await api.createSection({ vertical: V, slug, title, emoji, description, accessCode });
toast(`Подраздел "${title}" создан`, "success");
document.getElementById("create-dialog").close();
resetForm();
await render();
} catch (err) {
toast(err.message, "error");
}
});
document.getElementById("sections-grid").addEventListener("click", async (e) => {
const btn = e.target.closest("[data-action]");
if (!btn) return;
const tile = btn.closest(".section-tile");
const slug = tile.dataset.slug;
const action = btn.dataset.action;
if (action === "edit") {
const section = sectionsBySlug.get(slug);
if (!section) return;
document.getElementById("edit-slug").value = slug;
document.getElementById("edit-title").value = section.title || "";
document.getElementById("edit-emoji").value = section.emoji || "";
document.getElementById("edit-access-code").value = section.access_code || "";
document.getElementById("edit-description").value = section.description || "";
document.getElementById("edit-dialog").showModal();
setTimeout(() => document.getElementById("edit-title").focus(), 50);
return;
}
if (action !== "delete") return;
if (!confirm(`Удалить подраздел "${slug}"? Удалить можно только пустой подраздел (без каналов).`)) {
return;
}
try {
await api.deleteSection(V, slug);
toast(`Подраздел "${slug}" удалён`, "success");
await render();
} catch (err) {
toast(err.message, "error");
}
});
document.getElementById("edit-form").addEventListener("submit", async (e) => {
e.preventDefault();
const slug = document.getElementById("edit-slug").value;
const title = document.getElementById("edit-title").value.trim();
const emoji = document.getElementById("edit-emoji").value.trim() || null;
const accessCode = document.getElementById("edit-access-code").value.trim();
const description = document.getElementById("edit-description").value.trim() || null;
if (!title) return;
if (accessCode.length < 3) {
toast("Код доступа должен быть не короче 3 символов", "error");
return;
}
try {
await api.updateSection(V, slug, {
title,
emoji,
description,
access_code: accessCode,
});
toast(`Подраздел "${title}" сохранён`, "success");
document.getElementById("edit-dialog").close();
await render();
} catch (err) {
toast(err.message, "error");
}
});
render();

View File

@@ -0,0 +1,118 @@
import { api, toast, fmtDate } from "/api/monitoring-tg/static/js/api.js";
import { getVertical, getSection, VERTICAL_META } from "/api/monitoring-tg/static/js/vertical.js";
const V = getVertical();
const section = getSection();
const meta = VERTICAL_META[V];
// `level` decides which override layer the editor edits/saves/resets.
// "section" → store key llm_system_prompt:<vertical>:<section_slug>
// "vertical" → store key llm_system_prompt:<vertical>
// Effective resolution always goes section → vertical → default.
let level = section ? "section" : "vertical";
const levelEl = document.getElementById("prompt-level");
if (levelEl) {
if (!section) {
levelEl.value = "vertical";
levelEl.disabled = true;
} else {
levelEl.value = "section";
levelEl.addEventListener("change", async (e) => {
level = e.target.value;
await loadPrompt();
});
}
}
function levelScope() {
return level === "section"
? { vertical: V, section }
: { vertical: V, section: null };
}
async function loadConfig() {
const res = await fetch("/api/monitoring-tg/api/v1/settings");
if (!res.ok) throw new Error(res.statusText);
const cfg = await res.json();
const stats = await api.globalStats();
const scopeLabel = section ? `${meta.short} / ${section}` : meta.short;
const rows = [
["Раздел", `${meta.emoji} ${scopeLabel}`],
["Период опроса", `${cfg.poll_interval_seconds}s`],
["Лимит истории за опрос", cfg.poll_history_limit],
["Telethon session", cfg.tg_session_path],
["Postgres host", `${cfg.postgres_host}:${cfg.postgres_port}/${cfg.postgres_db}`],
["API host", `${cfg.api_host}:${cfg.api_port}`],
[`Каналов в ${scopeLabel}`, `${stats.channels_active} активных / ${stats.channels_total}`],
[`Сообщений в ${scopeLabel}`, stats.messages_total.toLocaleString()],
["Последний опрос (scope)", fmtDate(stats.last_poll_at)],
];
document.getElementById("config-tbody").innerHTML = rows.map(([k, v]) =>
`<tr><td class="muted">${k}</td><td class="mono">${v ?? "—"}</td></tr>`
).join("");
}
document.getElementById("poll-all").addEventListener("click", async (e) => {
e.target.disabled = true;
try {
const res = await api.pollAll();
toast(`В очереди ${res.queued ?? 0} каналов — опрос идёт в фоне`, "success");
} catch (err) {
toast(err.message, "error");
} finally {
e.target.disabled = false;
}
});
async function loadPrompt() {
const data = await api.llmPromptGet(levelScope());
const editor = document.getElementById("prompt-editor");
editor.value = data.prompt || "";
const status = document.getElementById("prompt-status");
const lengthEl = document.getElementById("prompt-length");
const map = {
section: ["override · подраздел", "ok"],
vertical: ["override · вертикаль", "ok"],
default: ["встроенный по умолчанию", "off"],
};
const [label, cls] = map[data.source] || ["—", "off"];
status.textContent = label;
status.className = `badge ${cls}`;
lengthEl.textContent = `${(data.prompt || "").length.toLocaleString()} символов`;
}
document.getElementById("prompt-save").addEventListener("click", async (e) => {
const text = document.getElementById("prompt-editor").value;
e.target.disabled = true;
try {
await api.llmPromptSave(text, levelScope());
const where = level === "section" ? `${meta.short} / ${section}` : meta.short;
toast(`Промпт ${where} сохранён, применится в течение 5 секунд`, "success");
await loadPrompt();
} catch (err) {
toast(err.message, "error");
} finally {
e.target.disabled = false;
}
});
document.getElementById("prompt-reset").addEventListener("click", async (e) => {
const where = level === "section" ? `подраздела "${section}"` : `вертикали "${meta.short}"`;
if (!confirm(`Сбросить пользовательский промпт ${where} и вернуться к фоллбэку?`)) return;
e.target.disabled = true;
try {
await api.llmPromptReset(levelScope());
toast(`Промпт ${where} сброшен`, "success");
await loadPrompt();
} catch (err) {
toast(err.message, "error");
} finally {
e.target.disabled = false;
}
});
loadConfig().catch(err => toast(err.message, "error"));
loadPrompt().catch(err => toast(err.message, "error"));

View File

@@ -0,0 +1,22 @@
// URL-safe slug from arbitrary text. Cyrillic → Latin so titles like
// "Дубай Marina" become "dubai-marina" without forcing the user to type
// a slug by hand.
const RU_TO_LAT = {
а: "a", б: "b", в: "v", г: "g", д: "d", е: "e", ё: "yo", ж: "zh",
з: "z", и: "i", й: "y", к: "k", л: "l", м: "m", н: "n", о: "o",
п: "p", р: "r", с: "s", т: "t", у: "u", ф: "f", х: "h", ц: "ts",
ч: "ch", ш: "sh", щ: "sch", ъ: "", ы: "y", ь: "", э: "e", ю: "yu",
я: "ya",
};
export function slugify(text) {
return (text || "")
.toLowerCase()
.split("")
.map(c => RU_TO_LAT[c] ?? c)
.join("")
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "")
.slice(0, 64);
}

View File

@@ -0,0 +1,76 @@
const APP_BASE = "/api/monitoring-tg";
// Detect the current scope from the URL path.
//
// / → vertical=null, section=null
// /real-estate/ → vertical=real_estate, section=null (section chooser)
// /real-estate/dubai/ → vertical=real_estate, section=dubai
// /real-estate/dubai/channels.html → same
// /hr/ → vertical=hr, section=null
// /hr/it/settings.html → vertical=hr, section=it
//
// Section slug comes from URL path[2] and is opaque (created via UI). The
// frontend treats it as a string and passes it to the API; the backend
// resolves slug→Section row at query time.
function _segments() {
const segments = location.pathname.split("/").filter(Boolean);
const base = APP_BASE.split("/").filter(Boolean);
if (base.every((part, idx) => segments[idx] === part)) {
return segments.slice(base.length);
}
return segments;
}
export function getVerticalSlug() {
const seg = (_segments()[0] || "").toLowerCase();
if (seg === "hr") return "hr";
if (seg === "real-estate") return "real-estate";
return null;
}
export function getVertical() {
const slug = getVerticalSlug();
if (slug === "hr") return "hr";
if (slug === "real-estate") return "real_estate";
return "real_estate"; // harmless default for section-less pages
}
export function getSection() {
const segs = _segments();
// Only treat segment[1] as a section slug when segment[0] is a known vertical.
if (!getVerticalSlug()) return null;
const candidate = segs[1];
if (!candidate || candidate.endsWith(".html")) return null;
return candidate.toLowerCase();
}
export const VERTICAL_META = {
real_estate: {
slug: "real-estate",
title: "Недвижимость",
short: "Недвижимость",
emoji: "🏠",
leadLabel: "Объявление",
},
hr: {
slug: "hr",
title: "HR / Кадры",
short: "HR",
emoji: "👥",
leadLabel: "HR-лид",
},
};
export function appBase() {
return APP_BASE;
}
export function verticalBase(vertical = getVertical()) {
return `${APP_BASE}/${VERTICAL_META[vertical].slug}`;
}
export function sectionBase(vertical = getVertical(), section = getSection()) {
const v = verticalBase(vertical);
return section ? `${v}/${section}` : v;
}

View File

@@ -0,0 +1,99 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>🏠 Недвижимость — подразделы</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot · 🏠 Недвижимость</h1>
<nav id="nav-section"></nav>
</header>
<main>
<div class="row">
<h2>Подразделы недвижимости</h2>
<div class="spacer"></div>
<button id="open-create">+ Новый подраздел</button>
</div>
<p class="muted">
Каждый подраздел — это собственный набор каналов, своя статистика и свой
LLM-промпт (с фоллбэком на промпт вертикали). Например: Дубай, Москва,
Сочи, коммерческая недвижимость.
</p>
<div id="sections-grid"></div>
</main>
<dialog id="create-dialog">
<h3 style="margin-top:0">Новый подраздел</h3>
<form id="create-form">
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Название</span>
<input type="text" id="new-title" required placeholder="Дубай" style="flex:1" />
</label>
<div class="row" style="gap:8px; margin-bottom:8px; font-size:12px">
<span style="min-width:120px" class="muted">URL-адрес</span>
<span class="muted mono">/real-estate/<span id="new-slug-preview">(введите название)</span>/</span>
<div class="spacer"></div>
<a href="#" id="new-slug-manual" class="muted">изменить вручную</a>
</div>
<label class="row slug-row" style="gap:8px; margin-bottom:8px" hidden>
<span style="min-width:120px" class="muted">Slug</span>
<input type="text" id="new-slug" pattern="[a-z0-9][a-z0-9_-]*[a-z0-9]?"
placeholder="dubai" style="flex:1" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Иконка</span>
<input type="text" id="new-emoji" maxlength="4" placeholder="🌴" style="width:80px" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Код доступа</span>
<input type="text" id="new-access-code" required minlength="3"
autocomplete="new-password" style="flex:1" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
<span style="min-width:120px" class="muted">Описание</span>
<textarea id="new-description" rows="3" style="flex:1"></textarea>
</label>
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
<button type="button" id="create-cancel" class="secondary">Отмена</button>
<button type="submit">Создать</button>
</div>
</form>
</dialog>
<dialog id="edit-dialog">
<h3 style="margin-top:0">Редактировать подраздел</h3>
<form id="edit-form">
<input type="hidden" id="edit-slug" />
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Название</span>
<input type="text" id="edit-title" required style="flex:1" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Иконка</span>
<input type="text" id="edit-emoji" maxlength="4" style="width:80px" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px">
<span style="min-width:120px" class="muted">Код доступа</span>
<input type="text" id="edit-access-code" required minlength="3"
autocomplete="new-password" style="flex:1" />
</label>
<label class="row" style="gap:8px; margin-bottom:8px; align-items:flex-start">
<span style="min-width:120px" class="muted">Описание</span>
<textarea id="edit-description" rows="3" style="flex:1"></textarea>
</label>
<div class="row" style="justify-content:flex-end; gap:8px; margin-top:12px">
<button type="button" id="edit-cancel" class="secondary">Отмена</button>
<button type="submit">Сохранить</button>
</div>
</form>
</dialog>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/sections-list.js"></script>
</body>
</html>

View File

@@ -0,0 +1,48 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>🏠 Недвижимость · Каналы — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot</h1>
<nav id="nav-section"></nav>
</header>
<main>
<h2 id="page-heading">Каналы подраздела</h2>
<div class="card" style="margin-bottom:24px">
<form id="add-form" class="row">
<input type="text" id="identifier" placeholder="@channel или https://t.me/..." required style="flex:1; min-width:280px" />
<button type="submit">Добавить канал</button>
</form>
<div class="muted" style="margin-top:8px; font-size:12px">
Канал будет привязан к текущему подразделу.
</div>
</div>
<div class="card">
<table>
<thead>
<tr>
<th>ID</th>
<th>Канал</th>
<th>Telegram ID</th>
<th>Сообщ.</th>
<th>Последний опрос</th>
<th>Статус</th>
<th></th>
</tr>
</thead>
<tbody id="tbody"></tbody>
</table>
</div>
</main>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/channels.js"></script>
</body>
</html>

View File

@@ -0,0 +1,43 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>🏠 Недвижимость · Дашборд — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot</h1>
<nav id="nav-section"></nav>
</header>
<main>
<div class="row">
<h2 id="page-heading">Дашборд</h2>
<div class="spacer"></div>
<button id="poll-all">Опросить все каналы подраздела</button>
</div>
<div class="stats-grid" id="stats"></div>
<h3>Каналы подраздела</h3>
<div class="card">
<table>
<thead>
<tr>
<th>Канал</th>
<th>Сообщений</th>
<th>Последнее сообщение</th>
<th>Последний опрос</th>
<th>Статус</th>
</tr>
</thead>
<tbody id="channels-tbody"></tbody>
</table>
</div>
</main>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/dashboard.js"></script>
</body>
</html>

View File

@@ -0,0 +1,78 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>🏠 Недвижимость · Сообщения — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot</h1>
<nav id="nav-section"></nav>
</header>
<main>
<h2 id="page-heading">Сообщения подраздела</h2>
<div class="toolbar card">
<select id="channel-filter">
<option value="">Все каналы подраздела</option>
</select>
<input type="search" id="search" placeholder="Поиск по тексту..." />
<select id="real-estate">
<option value="">Любая тема</option>
<option value="any">🏠 Недвижимость (любая)</option>
<option value="sale">🏠 Продажа</option>
<option value="rent">🏠 Аренда</option>
<option value="purchase">🏠 Покупка</option>
</select>
<label class="row" style="gap:6px">
<input type="checkbox" id="leads-only" />
<span class="muted">🎯 Только лиды (ИИ)</span>
</label>
<select id="min-confidence" title="Минимальная уверенность ИИ">
<option value="0.3">0.3+</option>
<option value="0.5" selected>0.5+</option>
<option value="0.7">0.7+</option>
<option value="0.9">0.9+</option>
</select>
<label class="row" style="gap:6px">
<input type="checkbox" id="has-phone" />
<span class="muted">📞 С телефоном</span>
</label>
<select id="limit">
<option value="25">25</option>
<option value="50" selected>50</option>
<option value="100">100</option>
<option value="200">200</option>
</select>
<div class="spacer"></div>
<label class="row" style="gap:6px">
<input type="checkbox" id="autorefresh" />
<span class="muted">Автообновление</span>
</label>
<button id="refresh" class="secondary">Обновить</button>
</div>
<div class="card" id="list"></div>
<div class="pagination">
<button id="prev" class="secondary">← Назад</button>
<span class="muted" id="page-info" style="align-self:center"></span>
<button id="next" class="secondary">Вперёд →</button>
</div>
</main>
<dialog id="raw-dialog">
<h3 style="margin-top:0">Сообщение</h3>
<pre id="raw-content"></pre>
<div class="row" style="justify-content:flex-end; margin-top:12px">
<button class="secondary" id="raw-close">Закрыть</button>
</div>
</dialog>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/messages.js"></script>
</body>
</html>

View File

@@ -0,0 +1,66 @@
<!doctype html>
<html lang="ru">
<head>
<meta charset="utf-8" />
<title>🏠 Недвижимость · Настройки — parser-tg-bot</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/api/monitoring-tg/static/css/app.css" />
</head>
<body>
<header>
<h1 id="page-title">parser-tg-bot</h1>
<nav id="nav-section"></nav>
</header>
<main>
<h2 id="page-heading">Настройки подраздела</h2>
<div class="card" style="margin-bottom:24px">
<h3 style="margin-top:0">Текущая конфигурация</h3>
<table>
<tbody id="config-tbody">
<tr><td colspan="2" class="empty">Загрузка...</td></tr>
</tbody>
</table>
<div class="muted" style="font-size:12px; margin-top:12px">
Параметры задаются через переменные окружения (<span class="mono">.env</span>).
Для изменения отредактируйте <span class="mono">.env</span> и перезапустите контейнер:
<span class="mono">docker compose restart app</span>.
</div>
</div>
<div class="card" style="margin-bottom:24px">
<h3 style="margin-top:0">Действия</h3>
<div class="row">
<button id="poll-all">Опросить все каналы подраздела сейчас</button>
<a href="/api/monitoring-tg/docs" target="_blank" class="badge">OpenAPI / Swagger</a>
<a href="/api/monitoring-tg/healthz" target="_blank" class="badge">Health check</a>
</div>
</div>
<div class="card" style="margin-bottom:24px">
<h3 style="margin-top:0">🤖 Промпт ИИ</h3>
<div class="row" style="margin-bottom:8px">
<span class="badge" id="prompt-status"></span>
<span class="muted" id="prompt-length"></span>
<div class="spacer"></div>
<select id="prompt-level" title="Уровень редактирования промпта">
<option value="section" selected>Промпт подраздела</option>
<option value="vertical">Промпт вертикали</option>
</select>
<button id="prompt-reset" class="secondary">Сбросить уровень</button>
<button id="prompt-save">Сохранить</button>
</div>
<textarea id="prompt-editor" rows="22"
style="width:100%; font-family:ui-monospace, SFMono-Regular, Menlo, monospace; font-size:12px"></textarea>
<div class="muted" style="font-size:12px; margin-top:8px">
Каскад: <strong>section → vertical → default</strong>. Если промпта на
уровне подраздела нет, используется промпт вертикали; если и его нет —
встроенный по умолчанию. Сохранение применится в течение ~5 сек.
</div>
</div>
</main>
<script type="module" src="/api/monitoring-tg/static/js/nav.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/nav-status.js"></script>
<script type="module" src="/api/monitoring-tg/static/js/settings.js"></script>
</body>
</html>