diff --git a/README.md b/README.md index ffb78a5..de31246 100644 --- a/README.md +++ b/README.md @@ -350,12 +350,21 @@ docker run -d -p 6379:6379 --name redis redis:alpine celery -A config beat -l info ``` -```markdown +```bash -A config - указывает где находится Celery-приложение beat - запускает Celery Beat — компонент, который периодически отправляет задачи в очередь -l info - уровень логирования (DEBUG, INFO, WARNING, ERROR) ``` +## Команды для приложения + +### в командной строке: +```bash +python manage.py create_default_tags # Заполнить БД стандартными тэгами +python manage.py assign_tags_to_books # Назначить тэги для книг +python manage.py parse_books # Запустить парсер книг +``` + ## 📄 Лицензия Этот проект лицензирован под MIT License - см. файл [LICENSE](LICENSE) для деталей. diff --git a/apps/books/api/v1/filters.py b/apps/books/api/v1/filters.py index fb1d0e6..a4d1711 100644 --- a/apps/books/api/v1/filters.py +++ b/apps/books/api/v1/filters.py @@ -1,4 +1,4 @@ -from django_filters import CharFilter, DateFilter, FilterSet +from django_filters import CharFilter, DateFilter, FilterSet, NumberFilter from ...models import Book @@ -11,13 +11,16 @@ class BookFilter(FilterSet): field_name="author__last_name", lookup_expr="icontains", ) - publisher = CharFilter( - field_name="publisher__name", - lookup_expr="icontains", + publisher = NumberFilter( + field_name="publisher__id", + ) + tag = NumberFilter( + field_name="tags__id", + lookup_expr="exact", ) - tag = CharFilter( + tag_name = CharFilter( field_name="tags__name", - lookup_expr="iexact", + lookup_expr="icontains", ) language = CharFilter( lookup_expr="iexact", diff --git a/apps/books/api/v1/serializers.py b/apps/books/api/v1/serializers.py index 2db297e..a609640 100644 --- a/apps/books/api/v1/serializers.py +++ b/apps/books/api/v1/serializers.py @@ -40,6 +40,10 @@ class Meta: "publisher", "published_at", "cover_image", + "description", + "isbn_code", + "total_pages", + "language", ] @@ -49,11 +53,6 @@ class BookDetailSerializer(BookSerializer): class Meta(BookSerializer.Meta): fields = BookSerializer.Meta.fields + [ - "description", - "isbn_code", - "total_pages", - "cover_image", - "language", "tags", "comments", ] diff --git a/apps/books/api/v1/views.py b/apps/books/api/v1/views.py index e1da3f1..0fa2045 100644 --- a/apps/books/api/v1/views.py +++ b/apps/books/api/v1/views.py @@ -1,5 +1,7 @@ from django_filters.rest_framework import DjangoFilterBackend from rest_framework import viewsets +from rest_framework.filters import OrderingFilter +from rest_framework.permissions import AllowAny from ...models import ( Author, @@ -22,24 +24,36 @@ class PublisherViewSet(viewsets.ModelViewSet): queryset = Publisher.objects.all() serializer_class = PublisherSerializer + permission_classes = [AllowAny] class AuthorViewSet(viewsets.ModelViewSet): queryset = Author.objects.all() serializer_class = AuthorSerializer + permission_classes = [AllowAny] class TagViewSet(viewsets.ReadOnlyModelViewSet): queryset = Tag.objects.all() serializer_class = TagSerializer + permission_classes = [AllowAny] class BookViewSet(viewsets.ModelViewSet): queryset = Book.objects.select_related("publisher").prefetch_related( "author__books", "tags" ) - filter_backends = [DjangoFilterBackend] + filter_backends = [DjangoFilterBackend, OrderingFilter] filterset_class = BookFilter + ordering_fields = [ + "title", + "published_at", + "created", + "publisher__name", + "author__last_name", + ] + ordering = ["-created"] + permission_classes = [AllowAny] def get_serializer_class(self): if self.action == "retrieve": @@ -50,6 +64,7 @@ def get_serializer_class(self): class CommentViewSet(viewsets.ModelViewSet): queryset = Comment.objects.select_related("user", "book") serializer_class = CommentSerializer + permission_classes = [AllowAny] def perform_create(self, serializer): serializer.save(user=self.request.user) diff --git a/apps/books/management/commands/assign_tags_to_books.py b/apps/books/management/commands/assign_tags_to_books.py new file mode 100644 index 0000000..e03866d --- /dev/null +++ b/apps/books/management/commands/assign_tags_to_books.py @@ -0,0 +1,67 @@ +from django.core.management.base import BaseCommand +from django.db import transaction + +from logger.books.log import get_logger +from ...models import Book +from ...services.tag_matcher import find_matching_tags + +logger = get_logger(__name__) + + +class Command(BaseCommand): + help = "сопоставляет существующие тэги с названиями всех книг в базе данных" + + def add_arguments(self, parser): + parser.add_argument( + "--book-id", + type=int, + help="ID конкретной книги для обработки", + ) + + def handle(self, *args, **options): + book_id = options.get("book_id") + + if book_id: + try: + book = Book.objects.get(id=book_id) + self.assign_tags_to_book(book) + logger.success(f"tags assigned successfully for book id {book_id}") + except Book.DoesNotExist: + logger.error(f"book with id {book_id} not found") + else: + self.assign_tags_to_all_books() + + @transaction.atomic + def assign_tags_to_all_books(self): + """Назначает тэги всем книгам в базе данных""" + books = Book.objects.all() + total_books = books.count() + + logger.info(f"start matching tags for {total_books} books") + + updated_count = 0 + for book in books: + if self.assign_tags_to_book(book): + updated_count += 1 + + logger.success( + f"processed {total_books} books, updated tags for {updated_count} books" + ) + + def assign_tags_to_book(self, book): + """ + Назначает тэги конкретной книге + """ + matching_tags = find_matching_tags(book.title) + + current_tags = set(book.tags.all()) + new_tags = set(matching_tags) + + if current_tags == new_tags: + logger.debug(f'no tag changes for book "{book.title}"') + return False + + book.tags.set(matching_tags) + tag_names = [tag.name for tag in matching_tags] + logger.info(f'updated tags for book "{book.title}": {tag_names}') + return True diff --git a/apps/books/management/commands/create_default_tags.py b/apps/books/management/commands/create_default_tags.py new file mode 100644 index 0000000..8119878 --- /dev/null +++ b/apps/books/management/commands/create_default_tags.py @@ -0,0 +1,46 @@ +from django.core.management.base import BaseCommand + +from logger.books.log import get_logger +from ...models import Tag + + +logger = get_logger(__name__) + + +class Command(BaseCommand): + help = "Создает предопределенные тэги для книг" + + def handle(self, *args, **options): + default_tags = [ + # Основные языки программирования + {"name": "Python", "slug": "python", "color": "#3776ab"}, + {"name": "JavaScript", "slug": "javascript", "color": "#f7df1e"}, + {"name": "Java", "slug": "java", "color": "#ed8b00"}, + # Темы машинного обучения + { + "name": "Машинное обучение", + "slug": "machine-learning", + "color": "#ff6b6b", + }, + {"name": "Искусственный интеллект", "slug": "ai", "color": "#feca57"}, + {"name": "Data Science", "slug": "data-science", "color": "#96ceb4"}, + {"name": "Нейронные сети", "slug": "neural-networks", "color": "#a55eea"}, + # Разработка + {"name": "Алгоритмы", "slug": "algorithms", "color": "#4ecdc4"}, + {"name": "Веб-разработка", "slug": "web-development", "color": "#45b7d1"}, + {"name": "Базы данных", "slug": "databases", "color": "#ff9ff3"}, + {"name": "DevOps", "slug": "devops", "color": "#54a0ff"}, + {"name": "Тестирование", "slug": "testing", "color": "#5f27cd"}, + {"name": "Архитектура", "slug": "architecture", "color": "#00d2d3"}, + ] + created_count = 0 + for tag_data in default_tags: + tag, created = Tag.objects.get_or_create( + name=tag_data["name"], defaults=tag_data + ) + if created: + created_count += 1 + logger.success(f"{tag.name} tag created") + else: + logger.info(f"{tag.name} already exist!") + logger.success(f"Successfully created {created_count} new tags") diff --git a/apps/books/management/commands/parse_books.py b/apps/books/management/commands/parse_books.py index 5cb478c..6441f39 100644 --- a/apps/books/management/commands/parse_books.py +++ b/apps/books/management/commands/parse_books.py @@ -45,14 +45,23 @@ async def scrape_book(self, url: str): return None parser = BookParser(html) + # Extract parameters first + params = parser.extract_all_params() + book_data = { "url": url, "book_title": parser.extract_book_name().get("book_title", ""), "author": parser.extract_authors(), - "price": parser.extract_price(), - "details": parser.extract_all_params(), "description": parser.extract_description().get("description", ""), - "cover": parser.extract_cover_image(), + "cover": { + "cover_image": parser.extract_cover_image().get("cover_image", "") + }, + "details": { + "ISBN": params.get("ISBN", ""), + "Год": params.get("Год", ""), + "Страниц": int(params.get("Страниц", "0")) or 0, + }, + "price": parser.extract_price(), } logger.debug(f"parsed book data for: {book_data['book_title']}") return book_data diff --git a/apps/books/migrations/0002_book_electronic_price_book_price_book_url_and_more.py b/apps/books/migrations/0002_book_electronic_price_book_price_book_url_and_more.py new file mode 100644 index 0000000..2a78b1b --- /dev/null +++ b/apps/books/migrations/0002_book_electronic_price_book_price_book_url_and_more.py @@ -0,0 +1,46 @@ +# Generated by Django 5.1.7 on 2025-09-19 08:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("books", "0001_initial"), + ] + + operations = [ + migrations.AddField( + model_name="book", + name="electronic_price", + field=models.DecimalField( + blank=True, + decimal_places=2, + max_digits=10, + null=True, + verbose_name="Цена электронной версии", + ), + ), + migrations.AddField( + model_name="book", + name="price", + field=models.DecimalField( + blank=True, + decimal_places=2, + max_digits=10, + null=True, + verbose_name="Цена", + ), + ), + migrations.AddField( + model_name="book", + name="url", + field=models.URLField(blank=True, max_length=255, verbose_name="URL книги"), + ), + migrations.AlterField( + model_name="publisher", + name="website", + field=models.URLField( + blank=True, max_length=255, verbose_name="Сайт издательства" + ), + ), + ] diff --git a/apps/books/models.py b/apps/books/models.py index 8306b54..f546d27 100644 --- a/apps/books/models.py +++ b/apps/books/models.py @@ -11,7 +11,7 @@ class Publisher(models.Model): max_length=255, ) website = models.URLField( - "Сайт издательства", + "Сайт издательства", max_length=255, blank=True, ) @@ -97,6 +97,25 @@ class Book(TimeStampedModel): "Язык", max_length=50, ) + url = models.URLField( + "URL книги", + max_length=255, + blank=True, + ) + price = models.DecimalField( + "Цена", + max_digits=10, + decimal_places=2, + null=True, + blank=True, + ) + electronic_price = models.DecimalField( + "Цена электронной версии", + max_digits=10, + decimal_places=2, + null=True, + blank=True, + ) author = models.ManyToManyField( Author, diff --git a/apps/books/scrapers/piter_publ/book_parser.py b/apps/books/scrapers/piter_publ/book_parser.py index 13427cf..086204c 100644 --- a/apps/books/scrapers/piter_publ/book_parser.py +++ b/apps/books/scrapers/piter_publ/book_parser.py @@ -77,46 +77,57 @@ def extract_cover_image(self): def extract_authors(self) -> List[Dict[str, str]]: try: authors = [] - author_blocks = self.soup.select("#tab-2 .autor-wrapper") + author_blocks = self.soup.select(".autor-wrapper") + if not author_blocks: + author_blocks = self.soup.select("#tab-2 .autor-wrapper") + + logger.info(f"found {len(author_blocks)} author blocks") for block in author_blocks: name_tag = block.select_one("h2") if name_tag: full_name = name_tag.get_text(strip=True) - parts = full_name.split() - if len(parts) == 1: - last_name = parts[0] - first_name = "" - elif len(parts) == 2: - last_name, first_name = parts - elif len(parts) >= 3: - first_name = parts[1] - last_name = " ".join([parts[0]] + parts[2:]) + else: + name_element = block.find(["h1", "h3", "h4", "h5", "h6"]) + if name_element: + full_name = name_element.get_text(strip=True) else: - last_name = "" - first_name = "" - logger.warning(f"empty author name: {full_name}") - - description_block = name_tag.parent - bio_parts = [] - for bio in description_block.contents: - if bio != name_tag and isinstance(bio, str): - bio_parts.append(bio.strip()) - bio = " ".join(bio_parts).strip() - - authors.append( - { - "first_name": first_name.strip("."), - "last_name": last_name, - "bio": bio, - } - ) - - logger.info(f"parsed {len(authors)} authors from tab-2") + full_name = ( + block.get_text(strip=True).split("\n")[0] + if block.get_text(strip=True) + else "" + ) + + if not full_name: + continue + + parts = full_name.split() + if len(parts) >= 2: + first_name = parts[0] + last_name = " ".join(parts[1:]) + elif len(parts) == 1: + first_name = "" + last_name = parts[0] + else: + first_name = "" + last_name = "" + + full_text = block.get_text(separator=" ", strip=True) + bio = full_text.replace(full_name, "", 1).strip() + + authors.append( + { + "first_name": first_name.strip("."), + "last_name": last_name, + "bio": bio, + } + ) + + logger.info(f"parsed {len(authors)} authors") return authors except Exception as e: - logger.error(f"failed to parse authors from tab-2: {str(e)}") - logger.exception("tab-2 author parsing error details") + logger.error(f"failed to parse authors: {str(e)}") + logger.exception("author parsing error details") return [] def extract_author_bio(self) -> str: diff --git a/apps/books/services/book_saver.py b/apps/books/services/book_saver.py index e045ab3..cd7c0b4 100644 --- a/apps/books/services/book_saver.py +++ b/apps/books/services/book_saver.py @@ -9,6 +9,7 @@ from ..validators.validators import BookInput from .author_service import AuthorService from .publisher_service import PublisherService +from .tag_matcher import find_matching_tags logger = get_logger(__name__) author_service = AuthorService(Author) @@ -40,6 +41,21 @@ def save_book(self, item: dict): publisher = self.Publisher.get_or_create_publisher("Издательство Питер") + paper_price = None + electronic_price = None + if book_input.price and isinstance(book_input.price, dict): + paper_price_str = book_input.price.get("price", "").replace("₽", "").strip() + electronic_price_str = ( + book_input.price.get("electronic_price", "").replace("₽", "").strip() + ) + if paper_price_str and paper_price_str.replace(".", "", 1).isdigit(): + paper_price = float(paper_price_str) + if ( + electronic_price_str + and electronic_price_str.replace(".", "", 1).isdigit() + ): + electronic_price = float(electronic_price_str) + book = Book.objects.filter(isbn_code=isbn).first() if book: logger.info(f"updating book: {book_input.book_title} ({isbn})") @@ -50,6 +66,9 @@ def save_book(self, item: dict): book.cover_image = book_input.cover.cover_image or "" book.language = "Русский" book.publisher = publisher + book.url = book_input.url or "" + book.price = paper_price + book.electronic_price = electronic_price book.save() else: logger.info(f"creating new book: {book_input.book_title} ({isbn})") @@ -62,6 +81,9 @@ def save_book(self, item: dict): cover_image=book_input.cover.cover_image or "", language="Русский", publisher=publisher, + url=book_input.url or "", + price=paper_price, + electronic_price=electronic_price, ) authors = [] @@ -77,3 +99,8 @@ def save_book(self, item: dict): book.author.set(authors) logger.debug(f"saved book with authors: {book_input.book_title}") + matching_tags = find_matching_tags(book.title) + book.tags.set(matching_tags) + logger.debug( + f"assigned tags to book '{book.title}': {[tag.name for tag in matching_tags]}" + ) diff --git a/apps/books/services/tag_matcher.py b/apps/books/services/tag_matcher.py new file mode 100644 index 0000000..ea14f8d --- /dev/null +++ b/apps/books/services/tag_matcher.py @@ -0,0 +1,28 @@ +from typing import List +from django.db.models import QuerySet + +from ..models import Tag + + +def find_matching_tags(book_title: str) -> List[Tag]: + """ + находит теги, которые совпадают с названием книги + """ + all_tags = Tag.objects.all() + matching_tags = match_title_with_tags(book_title, all_tags) + + return matching_tags + + +def match_title_with_tags(title: str, tags: QuerySet) -> List[Tag]: + """ + сопоставляет название книги с существующими тегам + """ + matching_tags = [] + title_lower = title.lower() + + for tag in tags: + if tag.name.lower() in title_lower: + matching_tags.append(tag) + + return matching_tags diff --git a/apps/books/validators/validators.py b/apps/books/validators/validators.py index fd6e7d3..7062875 100644 --- a/apps/books/validators/validators.py +++ b/apps/books/validators/validators.py @@ -28,6 +28,8 @@ class BookInput(BaseModel): author: List[AuthorInput] cover: CoverInput details: BookDetails + url: Optional[str] = None + price: Optional[dict] = None @field_validator("details") @classmethod diff --git a/config/settings.py b/config/settings.py index ff5aae7..256d1c3 100644 --- a/config/settings.py +++ b/config/settings.py @@ -162,6 +162,7 @@ # ==================== # Настройки CORS (Cross-Origin Resource Sharing) CORS_ALLOWED_ORIGINS = env.list("CORS_ALLOWED_ORIGINS") +CORS_ALLOW_ALL_ORIGINS = True # Разрешить все источники (только для разработки!) # Разрешить куки и заголовки авторизации CORS_ALLOW_CREDENTIALS = True diff --git a/frontend/src/components/BookCard.tsx b/frontend/src/components/BookCard.tsx index 6c11ea6..e990df1 100644 --- a/frontend/src/components/BookCard.tsx +++ b/frontend/src/components/BookCard.tsx @@ -48,7 +48,10 @@ export default function BookCard({ book, onClick }: BookCardProps) {