Source code for class_factory.beamer_bot.BeamerBot

"""
**BeamerBot Module**
--------------------

The `BeamerBot` module provides a framework for generating structured LaTeX Beamer slides based on lesson objectives, readings, and prior lesson presentations. By using a language model (LLM), `BeamerBot` automates the process of slide creation, ensuring a consistent slide structure while allowing for custom guidance and validation.

Key Functionalities
~~~~~~~~~~~~~~~~~~~

1. **Automated Slide Generation**:
   - `BeamerBot` generates a LaTeX Beamer presentation for each lesson, incorporating:
     - A title page with consistent author and institution information
     - "Where We Came From" and "Where We Are Going" slides
     - Lesson objectives with highlighted action verbs (e.g., `\\textbf{Analyze} key events`)
     - Discussion questions and in-class exercises
     - Summary slides with key takeaways

2. **Previous Lesson Integration**:
   - Retrieves and references prior lesson presentations to maintain consistent formatting and flow
   - Preserves author and institution information across presentations

3. **Prompt Customization and Validation**:
   - Supports custom prompts and specific guidance for tailored slide content
   - Validates generated LaTeX for correct formatting and content quality
   - Provides multiple retry attempts if validation fails

Dependencies
~~~~~~~~~~~~~

This module requires:

- `langchain_core`: For LLM chain creation and prompt handling
- `pathlib`: For file path management
- Custom utility modules for:
  - Document loading (`load_documents`)
  - LaTeX validation (`llm_validator`)
  - Response parsing (`response_parsers`)
  - Slide pipeline utilities (`slide_pipeline_utils`)

Usage
~~~~~~~

1. **Initialize BeamerBot**:
   ```python
   beamer_bot = BeamerBot(
       lesson_no=10,
       llm=llm,
       course_name="Political Science",
       lesson_loader=lesson_loader,
       output_dir=output_dir
   )
   ```

2. **Generate Slides**:
   ```python
   # Optional specific guidance
   guidance = "Focus on comparing democratic and authoritarian systems"
   slides = beamer_bot.generate_slides(specific_guidance=guidance)
   ```

3. **Save the Slides**:
   ```python
   beamer_bot.save_slides(slides)
   ```
"""

import logging
from pathlib import Path
from typing import Any, Dict, Union

# env setup
from langchain_core.messages import SystemMessage
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import (ChatPromptTemplate,
                                    HumanMessagePromptTemplate)

# base libraries
from class_factory.beamer_bot.slide_preamble import preamble
from class_factory.utils.base_model import BaseModel
from class_factory.utils.llm_validator import Validator
from class_factory.utils.load_documents import LessonLoader
from class_factory.utils.response_parsers import ValidatorResponse
from class_factory.utils.slide_pipeline_utils import (
    clean_latex_content, comment_out_includegraphics, validate_latex)

# %%


[docs] class BeamerBot(BaseModel): """ A class to generate LaTeX Beamer slides for a specified lesson using a language model (LLM). BeamerBot automates the slide generation process, creating structured presentations based on lesson readings, objectives, and content from prior presentations when available. Each slide is crafted following a consistent format, and the generated LaTeX is validated for correctness. Attributes: lesson_no (int): Lesson number for which to generate slides. llm: Language model instance for generating slides. course_name (str): Name of the course for slide context. lesson_loader (LessonLoader): Loader for accessing lesson readings and objectives. output_dir (Path): Directory to save the generated Beamer slides. slide_dir (Optional[Path]): Directory containing existing Beamer slides. llm_response (str): Stores the generated LaTeX response from the LLM. prompt (str): Generated prompt for the LLM. lesson_objectives (optional, dict): user-provided lesson objectives if syllabus not available. Methods: generate_slides(specific_guidance: str = None, latex_compiler: str = "pdflatex") -> str: Generates Beamer slides as LaTeX code for the specified lesson. save_slides(latex_content: str) -> None: Saves the generated LaTeX content to a .tex file. set_user_objectives(objectives: Union[List[str], Dict[str, str]]): Initialize user-defined lesson objectives, converting lists to dictionaries if needed. Inherited from BaseModel. Internal Methods: _format_readings_for_prompt() -> str: Combines readings across lessons into a single string for the LLM prompt. _find_prior_lesson(lesson_no: int, max_attempts: int = 3) -> Path: Finds the most recent prior lesson's Beamer file to use as a template. _load_prior_lesson() -> str: Loads the LaTeX content of a prior lesson's Beamer presentation as a string. _generate_prompt() -> str: Constructs the LLM prompt using lesson objectives, readings, and prior lesson content. _validate_llm_response(generated_slides: str, objectives: str, readings: str, last_presentation: str, prompt_specific_guidance: str = "", additional_guidance: str = "") -> Dict[str, Any]: Validates the generated LaTeX for quality and accuracy. """ def __init__(self, lesson_no: int, llm, course_name: str, lesson_loader: LessonLoader, output_dir: Union[Path, str] = None, verbose: bool = False, slide_dir: Union[Path, str] = None, lesson_objectives: dict = None): super().__init__(lesson_no=lesson_no, course_name=course_name, lesson_loader=lesson_loader, output_dir=output_dir, verbose=verbose) self.llm = llm self.llm_response = None # Determine slide directory if slide_dir: self.lesson_loader.slide_dir = slide_dir self.slide_dir = self.lesson_loader.slide_dir elif not slide_dir and not self.lesson_loader.slide_dir: self.logger.warning( "No slide directory provided directly or through lesson loader. " "Some functionality, such as loading prior presentations, may be limited." ) self.readings = self._format_readings_for_prompt() # Adjust reading formatting self.user_objectives = self.set_user_objectives(lesson_objectives, range(self.lesson_no, self.lesson_no+1)) if lesson_objectives else {} # Initialize chain and validator self.prompt = self._generate_prompt() parser = StrOutputParser() self.chain = self.prompt | self.llm | parser self.validator = Validator(llm=self.llm, parser=JsonOutputParser(pydantic_object=ValidatorResponse), log_level=self.logger.level) # Verify the Beamer file from the previous lesson self.prior_lesson = self.lesson_no - 1 # default prior lesson, updated when find prior beamer presentation self.beamer_output = self.output_dir / f'L{self.lesson_no}.tex' def _format_readings_for_prompt(self) -> str: """ Format readings as a single string for use in the LLM prompt. Returns: str: Combined readings across all specified lessons for the LLM prompt. """ all_readings_dict = self._load_readings(self.lesson_no) combined_readings = "\n\n".join(f"Lesson {lesson}: {', '.join(readings)}" for lesson, readings in all_readings_dict.items()) return combined_readings def _find_prior_lesson(self, lesson_no: int, max_attempts: int = 3) -> Path: """ Find the most recent prior lesson's Beamer file to use as a template. Args: lesson_no (int): The current lesson number. max_attempts (int): Number of prior lessons to attempt to retrieve. Defaults to 3. Returns: Path: The path to the located Beamer file. Raises: FileNotFoundError: If no valid prior lesson file is found within max_attempts. """ for i in range(1, max_attempts + 1): prior_lesson = lesson_no - i beamer_file = self.lesson_loader.slide_dir / f'L{prior_lesson}.tex' # Check if the Beamer file exists for this prior lesson if beamer_file.is_file(): self.prior_lesson = int(prior_lesson) self.logger.info(f"Found prior lesson: Lesson {prior_lesson}") return beamer_file # Raise error if no valid prior Beamer file is found within the attempts raise FileNotFoundError(f"No prior Beamer file found within the last {max_attempts} lessons.") def _load_prior_lesson(self) -> str: """ Load the previous lesson's Beamer presentation as a string. """ beamer_example = self.lesson_loader.find_prior_beamer_presentation(self.lesson_no) return self.lesson_loader.load_beamer_presentation(beamer_example) def _generate_prompt(self, human_prompt: str = None) -> str: """ Generates a detailed prompt for the LLM to guide LaTeX Beamer slide creation. Returns: str: The constructed prompt for the LLM. """ slide_system_prompt = """You are a LaTeX Beamer specialist and a political scientist with expertise in {course_name}. Your task is to create content for a college-level lesson using the Beamer presentation format. Focus on clarity, relevance, and adherence to LaTeX standards.""" slide_human_prompt = """ ## Create a LaTeX Beamer presentation following the below guidelines: ### Source Documents and Examples 1. **Lesson Objectives**: - We are on lesson {lesson_no}. - Ensure each slide works toward the following lesson objectives: {objectives} 2. **Lesson Readings**: - Use these readings to guide your slide content: {information} --- ### General Format to Follow: 1. **Title Slide**: - Copy the prior lesson's title slide, **include author and institution from the last presentation**. 2. **Where We Came From** - The subject of last lesson - The readings from last lesson (Lesson {prior_lesson}). 3. **Where We Are Going** - The subject of the current lesson - The readings for the current lesson (Lesson {lesson_no}). 4. **Lesson Objectives**: - The action in each lesson objective should be bolded (e.g. '\\textbf(Understand) the role of government.') 5. **Discussion Question**: - Add a thought-provoking question based on lesson material to initiate conversation. 6. **Lecture Slides**: - Cover key points from the lesson objectives and readings. - Ensure logical flow and alignment with the objectives. 7. **In-Class Exercise**: - Add an interactive exercise to engage and re-energize students. - This exercise should occur about halfway through the lecture slides, to get students re-engaged. 8. **Key Takeaways**: - Conclude with three primary takeaways from the lesson. These should emphasize the most critical points. --- ### Specific guidance for this lesson: {specific_guidance} --- ### Example of Expected Output: % This is an example format only. Use the provided last lesson as your primary source. % Replace the example \\author{{}} and \\institute{{}} below with the corresponding values from last lesson's presentation \\title{{Lesson 5: Interest Groups}} \\author{{}} \\institute[]{{}} \\date{{\\today}} \\begin{{document}} \\section{{Introduction}} \\begin{{frame}} \\titlepage \\end{{frame}} ... \\end{{document}} {additional_guidance} --- ### Example of previous presentation: - Use the presentation from last lesson as an example for formatting and structure: {last_presentation} --- ### IMPORTANT: - Use valid LaTeX syntax. - The output should contain **only** LaTeX code, with no extra explanations. - Start at the point in the preamble where we call \\title. - Failure to follow the format and style of the last lesson's presentation may result in the output being rejected. - Use the **same author and institute** as provided in the last lesson’s presentation. Do not invent new names or institutions. Copy these values exactly from the prior lesson. - If unable to identify the author and institute from the last lesson, just leave them blank. - Failure to follow these instructions will result in the output being rejected. """ prompt = ChatPromptTemplate.from_messages( [ SystemMessage( content=( slide_system_prompt.format(course_name=self.course_name) ) ), HumanMessagePromptTemplate.from_template(slide_human_prompt if not human_prompt else human_prompt), ] ) return prompt
[docs] def generate_slides(self, specific_guidance: str = None, lesson_objectives: dict = None, latex_compiler: str = "pdflatex") -> str: """ Generate LaTeX Beamer slides for the lesson using the language model. Args: specific_guidance (str, optional): Custom instructions for slide content and structure lesson_objectives (dict, optional): Override default objectives with custom ones Format: {lesson_number: "objective text"} latex_compiler (str, optional): LaTeX compiler to use for validation. Defaults to "pdflatex" Returns: str: Complete LaTeX content for the presentation, including preamble Raises: ValueError: If validation fails after maximum retry attempts FileNotFoundError: If required prior lesson files cannot be located Note: The method includes multiple validation steps: 1. Content quality validation through LLM 2. LaTeX syntax validation using specified compiler 3. Up to 3 retry attempts if validation fails """ # Load objectives (last, current, next), readings, and previous lesson slides self.user_objectives = self.set_user_objectives(lesson_objectives, range(self.lesson_no, self.lesson_no+1)) if lesson_objectives else {} objectives_text = "\n\n".join([self._get_lesson_objectives(lesson) for lesson in range(self.lesson_no - 1, self.lesson_no + 2)]) combined_readings_text = self.readings if self.lesson_loader.slide_dir: prior_lesson = self._load_prior_lesson() else: prior_lesson = "Not Provided" self.logger.warning( "No slide_dir provided. Prior slides will not be referenced during generation. " "If this is unintentional, please check LessonLoader configuration for slide_dir." ) self.logger.info(f"{self.prompt=}") # Generate Beamer slides via the chain additional_guidance = "" retries, MAX_RETRIES = 0, 3 valid = False while not valid and retries < MAX_RETRIES: response = self.chain.invoke({ "objectives": objectives_text, "information": combined_readings_text, "last_presentation": self.prior_lesson, "lesson_no": self.lesson_no, "prior_lesson": int(self.lesson_no) - 1, 'specific_guidance': specific_guidance if specific_guidance else "Not provided.", "additional_guidance": additional_guidance }) val_response = self._validate_llm_response(generated_slides=response, objectives=objectives_text, readings=combined_readings_text, last_presentation=prior_lesson, prompt_specific_guidance=specific_guidance if specific_guidance else "Not provided.") # Validate raw LLM response for quality self.validator.logger.info(f"Validation output: {val_response}") if int(val_response['status']) != 1: retries += 1 additional_guidance = val_response.get("additional_guidance", "") self.validator.logger.warning( f"Response validation failed on attempt {retries}. " f"Guidance for improvement: {additional_guidance}" ) continue # Retry LLM generation # Clean and format the LaTeX output cleaned_latex = clean_latex_content(response) full_latex = preamble + "\n\n" + comment_out_includegraphics(cleaned_latex) self.llm_response = full_latex # Validate the generated LaTeX code is_valid_latex = False # Reset each iteration try: is_valid_latex = validate_latex(full_latex, latex_compiler=latex_compiler) except Exception as e: self.logger.error(f"LaTeX validation encountered an error: {e}") if is_valid_latex: valid = True return full_latex else: retries += 1 # Increment retries only if validation fails self.logger.warning("\nLaTeX code is invalid. Attempting a second model run. " "If the error persists, please review the LLM output for potential causes. " "You can inspect the model output via the 'llm_response' object (BeamerBot.llm_response). " "\n\nNote: Compilation issues may stem from syntax errors in the example LaTeX code provided to the model." ) # Handle validation failure after max retries if not valid: raise ValueError("Validation failed after max retries. Ensure correct prompt and input data. Consider trying a different LLM.")
def _validate_llm_response(self, generated_slides: str, objectives: str, readings: str, last_presentation: str, prompt_specific_guidance: str = "", additional_guidance: str = "") -> Dict[str, Any]: """ Validates the generated LaTeX slides for content quality and formatting accuracy. Args: generated_slides (str): LaTeX content generated by the LLM objectives (str): Formatted string of lesson objectives for validation readings (str): Formatted string of lesson readings for content verification last_presentation (str): Content from prior lesson's presentation for format consistency prompt_specific_guidance (str, optional): Custom guidance provided during generation additional_guidance (str, optional): Supplementary guidance for validation refinement Returns: Dict[str, Any]: Validation results containing: - status (int): 1 for pass, 0 for fail - evaluation_score (float): Quality score (0-10) - additional_guidance (str): Suggestions for improvement if validation fails - rationale (str): Explanation of the validation result Note: The validation process uses the same prompt template as slide generation to ensure consistency between requirements and validation criteria. """ # Validate quiz quality and accuracy response_str = str(generated_slides) validation_prompt = self.prompt.format( objectives=objectives, information=readings, last_presentation=last_presentation, lesson_no=self.lesson_no, prior_lesson=self.prior_lesson, additional_guidance=additional_guidance, specific_guidance=prompt_specific_guidance ) val_response = self.validator.validate(task_description=validation_prompt, generated_response=response_str, min_eval_score=8, specific_guidance="Pay attention to the concepts introduced and their accuracy with respect to the texts.") return val_response
[docs] def save_slides(self, latex_content: str): """ Save the generated LaTeX content to a .tex file. Args: latex_content (str): The LaTeX content to save. """ with open(self.beamer_output, 'w', encoding='utf-8') as f: f.write(latex_content) self.logger.info(f"Slides saved to {self.beamer_output}")
if __name__ == "__main__": import os from dotenv import load_dotenv from langchain_community.llms import Ollama from langchain_google_genai import ChatGoogleGenerativeAI from langchain_openai import ChatOpenAI from pyprojroot.here import here from class_factory.utils.tools import reset_loggers wd = here() load_dotenv() user_home = Path.home() reset_loggers(log_level=logging.INFO) OPENAI_KEY = os.getenv('openai_key') OPENAI_ORG = os.getenv('openai_org') GEMINI_KEY = os.getenv('gemini_api_key') # Paths for readings, slides, and syllabus reading_dir = user_home / os.getenv('readingsDir') slide_dir = user_home / os.getenv('slideDir') syllabus_path = user_home / os.getenv('syllabus_path') llm = ChatOpenAI( model="gpt-4o-mini", temperature=0.4, max_tokens=None, timeout=None, max_retries=2, api_key=OPENAI_KEY, organization=OPENAI_ORG, ) # llm = ChatGoogleGenerativeAI( # model="gemini-1.5-flash-8b", # temperature=0.4, # max_tokens=None, # timeout=None, # max_retries=2, # api_key=GEMINI_KEY # ) lsn = 3 # llm = Ollama( # model="llama3.1", # temperature=0.2 # ) specific_guidance = """ The lesson should be structured in a way that discusses big picture ideas about political parties and their influence. The slides will, at a minimum, cover the following: - The role of parties in government and society - How parties have changed over time - Relating parties to Tocqueville's notion of associations - Have a slide for each of the 5 functions of parties: Recruit Candidates​, Nominate Candidates​, Get Out the Vote (GOTV)​, Facilitate Electoral Choice, Influence National Government​ """ loader = LessonLoader(syllabus_path=syllabus_path, reading_dir=reading_dir, slide_dir=slide_dir) # Initialize the BeamerBot beamer_bot = BeamerBot( lesson_no=2, lesson_loader=loader, llm=llm, course_name="American Government", verbose=True ) # Generate slides for Lesson 20 slides = beamer_bot.generate_slides(lesson_objectives={"3": "do nothing today"}) # Save the generated LaTeX slides # beamer_bot.save_slides(slides) # %%