import os from openai import OpenAI import markdown2 import zipfile from weasyprint import HTML import unicodedata from pygments.formatters import HtmlFormatter from dependencies.helper import * from dotenv import load_dotenv from PyPDF2 import PdfReader, PdfWriter from io import BytesIO from reportlab.pdfgen import canvas from reportlab.lib.colors import Color from reportlab.lib.pagesizes import letter load_dotenv() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o") OPENAI_MINI_MODEL = os.getenv("OPENAI_MINI_MODEL", "gpt-4o-mini") OUTPUT_FOLDER = os.getenv("OUTPUT_FOLDER", "./result") working_dir = "imscc_temp" content_dir = "wiki_content" MATHJAX_CDN = """ """ class CourseCreator: def __init__(self, course ): self.topic = course.topic self.skills = course.skills self.tone = course.tone self.degree = course.degree self.taxonomy = course.taxonomy self.allocated_time = course.allocated_time self.learning_approach = course.learning_approach self.no_of_modules = course.no_of_modules self.mode_of_delivery = course.mode_of_delivery self.outline = course.outline self.language = course.language self.content_outcomes = course.content_outcomes self.content_description = course.content_description self.level_of_difficulty = course.level_of_difficulty if course.level_of_difficulty else "easy" self.type_of_assessment = course.type_of_assessment if course.type_of_assessment else "MCQ" self.num_of_questions = course.no_of_questions if course.no_of_questions else 5 self.max_paragraph_length = 12000 self.base_filename = self.topic.replace(" ", "_") self.course_path = os.path.join(OUTPUT_FOLDER, f"{self.base_filename}.md") if not os.path.exists(OUTPUT_FOLDER): os.makedirs(OUTPUT_FOLDER, exist_ok=True) # Set Degree if self.degree == "bachelor": self.degree_name = "Bachelor's Degree" elif self.degree == "masters": self.degree_name = "Master's Degree" elif self.degree == "doctoral": self.degree_name = "Doctoral and Post-Graduate Degree" else: self.degree_name = "Bachelor's Degree" # Set Taxonomy if self.taxonomy == "bloom": self.taxonomy_desc = "Revised Bloom's Taxonomy" elif self.taxonomy == "solo": self.taxonomy_desc = "Solo Taxonomy" elif self.taxonomy == "webb": self.taxonomy_desc = "Webb's Depth of Knowledge (DOK)" else: self.taxonomy_desc = "Revised Bloom's Taxonomy" # Set Learning Approach if self.learning_approach == "design": self.learning_approach_desc = "Learning Approach based on Design Thinking Process" elif self.learning_approach == "project": self.learning_approach_desc = "Project-based Learning (PBL) Approach" elif self.learning_approach == "competency": self.learning_approach_desc = "Competency-based Learning (CBL) Approach" elif self.learning_approach == "21century": self.learning_approach_desc = "21st Century Learning Approach" elif self.learning_approach == "interdisciplinary": self.learning_approach_desc = "Interdisciplinary Learning Approach" else: self.learning_approach_desc = "Learning Approach based on Design Thinking Process" # Initialize Glossay self.glossary_list = [] def __repr__(self): return f"CourseCreator(topic={self.topic}, skills={self.skills}, tone={self.tone}, degree={self.degree}, taxonomy={self.taxonomy}, allocated_time={self.allocated_time}, learning_approach={self.learning_approach}, no_of_modules={self.no_of_modules}, mode_of_delivery={self.mode_of_delivery})" ## Currently it is not used def generate_course_summary(self): """Generate a course summary based on the topic.""" if not self.topic: raise ValueError("Course topic is required to generate summary.") system_prompt = f"You are an experienced teacher and an expert in designing courses for students and learners of {self.degree_name} on {self.taxonomy_desc} in a {self.tone} tone.\nYou are good in creating logical flow of courses in which you break down the course into modules and arrange them in sequential flow for students to understand the subject." user_prompt = f"You are an experienced course creator and an expert in {self.topic}.\nYou are planning to create a course to teach students of {self.degree_name} about === {self.topic} ===.\nIn a {self.tone} tone, you will be creating a course for {self.skills} level skills development of the students.\nThe student will spend not more than {self.allocated_time} hours in studying the course including the completion of exercises.\nWhen planning and organizing the content, you will be following {self.taxonomy_desc} definition as much as possible.\nYour task now is to generate a summary of this course that you are going to create. The summary should include the main topics that will be covered in the course and the overall learning objectives." # print(f"System Prompt: {system_prompt}") # print(f"User Prompt: {user_prompt}") response = self.clientCall(system_prompt, user_prompt, 0.4, 1200, 0.9) return response ## Currently it is not used def generate_course_objectives(self): """Generate course objectives based on the topic.""" if not self.topic: raise ValueError("Course topic is required to generate objectives.") system_prompt = f"You are an experienced teacher and an expert in designing courses for students and learners of {self.degree_name} on {self.taxonomy_desc} in a {self.tone} tone.\nYou are good in creating logical flow of courses in which you break down the course into modules and arrange them in sequential flow for students to understand the subject." user_prompt = f"You are an experienced course creator and an expert in {self.topic}.\nYou are going to create a course to teach students of {self.degree_name} about === {self.topic} ===.\nIn a {self.tone} tone, you will be creating a course for {self.skills} level skills development of the students.\nThe student will be spending a maximum of {self.allocated_time} hours in studying this course including the completion of exercises.\nWhen planning and organizing the content, you will be following {self.taxonomy_desc} definition as much as possible.\nYour task now is to create a set of clear and measurable objectives of this course that you are going to create. The objectives should be specific, measurable, achievable, relevant, and time bound." # print(f"System Prompt: {system_prompt}") # print(f"User Prompt: {user_prompt}") response = self.clientCall(system_prompt, user_prompt, 0.4, 1200, 0.9) return response # Generate the Course Outcomes def generate_course_outcomes(self): """Generate course outcomes based on the topic.""" if not self.topic: raise ValueError("Course topic is required to generate outcomes.") system_prompt = f"You are an experienced teacher and an expert in designing courses for students and learners. Follow and adhere to the following definitions:" degree_prompt = get_prompt(self.degree, prompt_type='prompt-degree') tone_prompt = get_prompt(self.tone, prompt_type='prompt-tone') skills_prompt = get_prompt(self.skills, prompt_type='prompt-skills') taxonomy_prompt = get_prompt(self.taxonomy, prompt_type='prompt-taxonomy') system_prompt += f"\n\n{degree_prompt}\n{skills_prompt}\n{tone_prompt}\n{taxonomy_prompt}" # print(f"System Prompt: {system_prompt}") # print(f"User Prompt: {user_prompt}") user_prompt = f"You are an experienced course creator and an expert in {self.topic}.\nYou are going to create a course to teach students of {self.degree_name} about === {self.topic} ===.\nIn a {self.tone} tone, you will be creating a course for {self.skills} level skills development of the students.\nThe student will be spending a maximum of {self.allocated_time} hours in studying this course including the completion of exercises.\nWhen planning and organizing the content, you will be following {self.taxonomy_desc} definition as much as possible.\nFirst, give an overview of the content of the course for the topic === {self.topic} ===.\nAnd then define in no more than 5-7 bullet points about the outcome of this course that you have planned and designed.\nThese points should be clear and specific statements of what the learner will be able to achieve upon the successful study and completion of this course.\n\nThe format is:" user_prompt += f"\n\n### Course Overview:\n" user_prompt += f"### Course Outcomes:\n" user_prompt += f"\n\nThe course overview and outcomes should be in {self.language} language." response = self.clientCall(system_prompt, user_prompt, 0.5, 1200, 0.9) # print(response) response = response.split("### Course Overview:")[1].strip() overview, outcomes = response.split("### Course Outcomes:") overview = overview.strip() outcomes = outcomes.strip().split("\n") outcomes = [outcome.strip() for outcome in outcomes if outcome.strip()] outcomes = "\n".join(outcomes) return { "overview": overview, "outcomes": outcomes } def generate_course_modules(self, course_overview, course_outcomes): """Generate course modules based on the topic and outcomes.""" if not self.topic or not course_overview or not course_outcomes: raise ValueError("Topic, overview, and outcomes are required to generate modules.") system_prompt = f"You are an experienced teacher and an expert in designing courses for students and learners of {self.degree_name} on {self.taxonomy_desc} in a {self.tone} tone.\nYou are very good in creating logical flow of courses in which you break down the course into modules and arrange them in sequential flow for a student to understand the subject.\n\nYou are already designing and working on a course, the description of which is {course_overview}.\nYou have carefully defined the course outcome which is {course_outcomes}." user_prompt = f"You are now seriously working on the teaching structure of a course, the title of which is === {self.topic} ===.\nYour goal is to create a course layout for students of {self.degree_name} on {self.topic}, so that they can fully understand the subject for their level.\nYour task now is to create a maximum of {self.no_of_modules} for this course on === {self.topic} ===. Think of the modules in a sequential and logical order of understanding and learning. \nRemember that you are following {self.taxonomy_desc} framework. \nFor each module, think of the subtopics in that module which you will cover and summarize them in not more than 3 lines on what the module will cover. Then list down all the modules and the clear description of what will be covered in each of the module along with the subtopics. Then provide the estimated time for each module in minutes. \n\nThe course module should be in {self.language} language. \n\nThe course layout should be in the following format:\n\n ## Course Outline\n ### Module 1: [Module Title]\n**Description:** [Brief description of what will be covered in this module]\n**Subtopics:**\n- Subtopic 1\n- Subtopic 2\n- Subtopic 3\n**Estimated Time:** [Time in minutes]\n\n### Module 2: [Module Title]\n**Description:** [Brief description of what will be covered in this module]\n**Subtopics:**\n- Subtopic 1\n- Subtopic 2\n- Subtopic 3\n**Estimated Time:** [Time in minutes]\n\n...\n\n### Module N: [Module Title]\n**Description:** [Brief description of what will be covered in this module]\n**Subtopics:**\n- Subtopic 1\n- Subtopic 2\n- Subtopic 3\n**Estimated Time:** [Time in minutes]\n\nRemember to use {self.tone} tone" # print(f"System Prompt: {system_prompt}") # print(f"User Prompt: {user_prompt}") response = self.clientCall(system_prompt, user_prompt, 0.3, 1500, 0.9) return response def generate_module_detailed_content(self, modules, course_outcomes): """Generate detailed content for each module.""" # if not modules or not course_outcomes['outcomes']: # raise ValueError("Module titles and course outcomes are required to generate detailed content.") md = "" if modules: prompt_learning_approach = get_prompt(self.learning_approach, prompt_type='prompt-learning-approach') mode_description = get_prompt(self.mode_of_delivery, prompt_type='prompt-mode-of-delivery') # template_of_outline = get_prompt(self.outline, prompt_type='template-outline') system_prompt = f"You are an experienced teacher and an expert in designing courses for students and learners. You are a specialist in {mode_description}. Follow and adhere to the following definitions:" degree_prompt = get_prompt(self.degree, prompt_type='prompt-degree') tone_prompt = get_prompt(self.tone, prompt_type='prompt-tone') skills_prompt = get_prompt(self.skills, prompt_type='prompt-skills') learning_approach_prompt = get_prompt(self.learning_approach, prompt_type='prompt-learning-approach') template_prompt = get_prompt(self.outline, prompt_type='template-outline') system_prompt += f"\n\n{degree_prompt}\n{skills_prompt}\n{tone_prompt}\n{learning_approach_prompt}" system_prompt += f"\n\nYou are creating content for a course for {self.skills} level skills of students and learners of {self.degree_name} in a {self.tone} tone.\nIt is important to use the {self.learning_approach_desc} when generating the content." overview = course_outcomes['overview'] outcomes = course_outcomes['outcomes'] for index, module in enumerate(modules, start=1): title = module["module_title"] subtopics = module.get("subtopics", []) print("Subtopics:", subtopics) # Format subtopics as bullet points subtopic_list_text = "\n".join(f"- {st}" for st in subtopics) if subtopics else "No specific subtopics provided." if index == 1: milestone = f"You are writing the first module of the course, the title of which is {title}. The outcome of this module is {subtopic_list_text}." else: milestone = f"Now you are writing the module of the course, the title of which is {title}. The outcome of this module is {subtopic_list_text}. Remember that the last module which you have already written was {last_module}." last_module = title user_prompt = f"You are already working on detailed content for a course, the overview of which is {overview} and the course outcome is {outcomes}.\nYou have planned {self.no_of_modules} modules within this course.\n{milestone}\n\n\nYour task is to create a detailed content for this module, keeping in mind the outcome.\n No need to include module title in content.\nYou will start with a springboard to introduce each module and then add minimum 4 paragraphs of content to achieve the outcome of the module covering all the key objectives.\n\nUse the following format:\n" user_prompt += f"{template_prompt}\n\n" user_prompt += f"Add suggested reading links and exercises/activities at the end of the module content.\n Include the instructional videos and links wherever necessary.\n" user_prompt += f"The content should be in {self.language} language." # user_prompt +="The output should be given in the following format:\n\n#### Introduction:\n#### Key Concepts:\n#### Learning Objectives:\n#### Suggested Readings:\n#### Exercises/Activities:\n" # print(f"System Prompt: {system_prompt}") # print(f"User Prompt: {user_prompt}") response = self.clientCall(system_prompt, user_prompt, 0.4, 1400, 0.9) md += f"### Module {index}: {title}\n{response}\n\n" # md += f"### Module {index}:{title}\n\n" # md += f"{response}\n\n" mcq_content = f"{response}\n\n" # Generate content for each subtopic if subtopics: md += f"#### Subtopic:\n" for subtopic in subtopics: subtopic = subtopic.strip() if not subtopic: continue print(subtopic) # Generate content for each subtopic subtopic_list_text = f"- {subtopic}" if subtopic else "No specific subtopics provided." system_prompt = f"You are an expert in generating comprehensive content blocks for each subtopic of a module. As a content writer, you are creating content for a course for {self.skills} level skills of students and learners of {self.degree_name} in a {self.tone} tone.\nIt is important to use the {self.learning_approach_desc} when generating the content. Now you are going to write a content block for the subtopic '{subtopic}' of the module '{title}'." # Note for Manish - This needs to be optimized further. subtopic_prompt = f"You are now writing a comprehensive content block covering the following subtopic of the module '{title}':\n{subtopic_list_text}\n\nWrite minimum 6 paragraphs in total." subtopic_response = self.clientCall(system_prompt, subtopic_prompt, 0.4, 1300, 0.9, OPENAI_MODEL) md += f"{subtopic_response}\n\n" mcq_content += f"{subtopic_response}\n\n" # subtopic_prompt = ( # f"Now write a comprehensive content block covering the following subtopics of the module '{title}':\n" # f"{subtopic_list_text}\n\n" # f"Write minimum 6 paragraphs in total." # ) # subtopic_response = self.clientCall(system_prompt, subtopic_prompt, 0.4, 600, 0.9) # md += f"#### Subtopics Overview: {subtopic_response}\n\n" # mcq_content += f"{subtopic_response}\n\n" # md += self.generate_module_questions(title) # Generate Module glossary # module_glossary = f"{self.generate_ai_content('glossary', mcq_content)}\n\n" # self.glossary_list.append(module_glossary) md += f"#### Questions:\n\n" md += self.generate_mcq_questions(mcq_content) return md def generate_mcq_questions(self, content): """Generate MCQ questions based on the course content.""" if not content: raise ValueError("Content is required to generate assessment questions.") incoming_text = content.strip() if len(incoming_text) > self.max_paragraph_length: incoming_text = incoming_text[:self.max_paragraph_length] question_guidelines = get_prompt(f"{self.taxonomy}-mixed", prompt_type='question_guidelines') user_prompt = f"""Generate {self.num_of_questions} unambiguous, unbiased, and verifiable multiple-choice questions based on the following paragraph with answers at the end. Ensure the questions and options are unbiased, free from any cultural, racial, or gender bias, and are appropriate for diverse audiences. Each question MUST be unique, and have exactly 4 options (A, B, C, D), with only one correct answer. Format each question as follows: The format will be in markdown format: Question 1: [Question text] A. [Option A] B. [Option B] C. [Option C] D. [Option D] Correct Answer: [A/B/C/D] Ensure that the options are plausible and avoid trivial or obviously incorrect answers. The paragraph is \n\n=== {incoming_text}\n\n===\n\nQuestion:""" system_prompt = f"You are an expert in generating multiple choice questions within the scope of a given text. You are now given a text and are going to generate {self.num_of_questions} from it. But you are going to follow these guidelines when generating the questions: {question_guidelines} " # print(f"System Prompt: {system_prompt}") # print(f"User Prompt: {user_prompt}") response = self.clientCall(system_prompt, user_prompt, 0.3, 3500, 0.9, OPENAI_MINI_MODEL) return f"{response}\n\n" def generate_course_description(self): """Generate a course description based on the topic.""" if not self.topic: raise ValueError("Course topic is required to generate description.") system_prompt = f"You are an experienced teacher and an expert in designing courses for students and learners of {self.degree_name} on {self.taxonomy_desc} in a {self.tone} tone.\nYou are very good in creating logical flow of courses in which you break down the course into modules and arrange them in sequential flow for a student to understand the subject." user_prompt = f"You are an experienced course creator and an expert in {self.topic}.\nYou are going to create a course to teach students of {self.degree_name} about === {self.topic} ===.\nIn a {self.tone} tone, you will be creating a course for {self.skills} level skills development of the students.\nThe student will be spending a maximum of {self.allocated_time} hours in studying this course including the completion of exercises.\nWhen planning and organizing the content, you will be following {self.taxonomy_desc} definition as much as possible.\nYour task now is to create a brief description of the course that you are going to plan and design. The description should include the main topics that will be covered in the course and the overall learning objectives. \nThe description should be engaging and informative, suitable for attracting students to enroll in the course.\n \nThis course on {self.topic} is designed for {self.degree_name} students who are looking to enhance their understanding and skills in this subject. The course will cover the following main topics:\n1. Topic 1\n2. Topic 2\n3. Topic 3\n\nBy the end of this course, students will be able to achieve the following goals:\n- Objective 1\n- Objective 2\n- Objective 3\n\nThe course description should be in {self.language} language.\nRemember to use {self.tone} tone" # print(f"System Prompt: {system_prompt}") # print(f"User Prompt: {user_prompt}") response = self.clientCall(system_prompt, user_prompt, 0.4, 1200, 0.9) return response def generate_course_glossary(self): """Generate a course glossary based on the topic.""" if not self.topic: raise ValueError("Course topic is required to generate glossary.") system_prompt = f"You are an experienced teacher and good in understanding words and researching their meaning and explaining them in a {self.tone} tone. Your audience are students and learners of {self.degree_name} and have {self.skills} level skills." user_prompt = f"You are an experienced course creator and an expert in {self.topic}.\nYour task now is to create a glossary of key terms and concepts related to the course topic. The glossary should include definitions and explanations of important terms that will be used throughout the course.\nEach explanation should be in as plain and clear language as possible.\nThe glossary should be in {self.language} language and it must be in alphabetical order." # print(f"System Prompt: {system_prompt}") # print(f"User Prompt: {user_prompt}") response = self.clientCall(system_prompt, user_prompt, 0.5, 1200, 0.9) return response def generate_ai_content(self, content_type): """Generate AI content based on the content type and content.""" if not content_type: raise ValueError("Content type is required to generate AI content.") if content_type == 'content_outcomes': course_outcomes = self.generate_course_outcomes() return course_outcomes['outcomes'] else: """Generate content description of content based on the content type.""" user_prompt = f"You are an expert in generating {content_type}. Your task is to create {content_type} based on the following topic:\n\n{self.topic}\n\nPlease provide the {content_type} in a clear and concise manner. The {content_type} should be in {self.language} language. Remember to use {self.tone} tone. The {content_type} should be suitable for students and learners of {self.degree_name} with {self.skills} level skills." system_prompt = f"You are an expert in generating {content_type}. You have a deep understanding of how to create engaging and informative {content_type} based on the provided input. Your audience are students and learners of {self.degree_name} and have {self.skills} level skills." print(f"System Prompt: {system_prompt}") print(f"User Prompt: {user_prompt}") response = self.clientCall(system_prompt, user_prompt, 0.5, 1200, 0.9) return response def generate_course_materials(self): """Generate course materials based on the topic.""" if not self.topic: raise ValueError("Course topic is required to generate content.") md = f"# Course: {self.topic}\n\n" # course_summary = self.generate_course_summary() # md += f"## Course Summary\n{course_summary}\n\n" print(f"Generating course Description for topic: {self.topic}") if self.content_description: print("Using provided course description.") course_description = self.content_description else: course_description = self.generate_course_description() print(f"Course Description: {course_description}") md += f"## Course Description\n{course_description}\n\n" # course_objectives = self.generate_course_objectives() # md += f"## Course Objectives\n{course_objectives}\n\n" # Generate outcomes print(f"Generating course outcomes for topic: {self.topic}") course_outcomes = self.generate_course_outcomes() print(f"Generating course modules for topic: {self.topic}") # Override outcomes if provided from 2nd step of course creation if self.content_outcomes: course_outcomes['outcomes'] = self.content_outcomes print(f"Course Outcomes: {course_outcomes['outcomes']}") # course_modules = self.generate_course_modules(course_outcomes['overview'], course_outcomes['outcomes']) course_modules = self.generate_course_modules(course_description, course_outcomes['outcomes']) module_titles = self.extract_module_titles(course_modules) print(f"Module Titles: {module_titles}") module_detail = self.generate_module_detailed_content(module_titles, course_outcomes) # md += f"## Course Overview\n{course_outcomes['overview']}\n\n" md += f"## Course Outcomes\n{course_outcomes['outcomes']}\n\n" md += f"{course_modules}\n\n" md += f"## Module Details\n{module_detail}\n\n" # md += f"{module_detail}\n\n" md += f"{self.generate_course_glossary()}\n\n" # md += f"## Course Glossary\n" # self.glossary_list = list(set(self.glossary_list)) # Remove duplicates # for glossary in self.glossary_list: # md += f"{glossary}\n\n" # Save markdown with open(f"{self.course_path}", "w", encoding="utf-8") as f: f.write(md) print(f"✅ Course materials generated and saved to {self.course_path}") self.export_to_html_and_pdf(md) # self.generate_imscc(md) return { "course_path": self.course_path, "module_titles": module_titles, "course_overview": course_outcomes['overview'], "course_outcomes": course_outcomes['outcomes'], "markdown_content": md, "module_detail": module_detail, "course_description": course_description } def extract_module_titles(self,course_outline): """Extract module titles from the course outline.""" # lines = course_outline.strip().split("\n") current_module = None collecting_subtopics = False modules = [] for line in course_outline.splitlines(): line = line.strip() module_match = ( re.match(r"(?:#+\s*)?Module\s*\d+:\s*(.+)", line, re.IGNORECASE) or re.match(r"\*\*Module\s*\d+:\s*(.+?)\*\*", line, re.IGNORECASE) or re.match(r"^#{3,4}\s*Module\s+\d+:\s*(.+)", line, re.IGNORECASE) or re.match(r"^#{3,4}\s*Module\s+\d+\s*:\s*(.+)", line, re.IGNORECASE) or re.match(r"^#{3,4}\s*Module\s+\d+:\s+(.+)", line, re.IGNORECASE) ) if module_match: if current_module: modules.append(current_module) current_module = { "module_title": module_match.group(1).strip(), "subtopics": [] } collecting_subtopics = False continue # Match start of subtopics section like "**Subtopics:**" (bolded, no dash) if re.match(r"^\*\*Subtopics:?\*\*$", line, re.IGNORECASE): collecting_subtopics = True continue # Match inline subtopics: - **Subtopics:** item1, item2 # inline_subtopics_match = re.match(r"-\s*\*\*Subtopics:\*\*\s*(.+)", line) inline_subtopics_match = re.match(r"-\s*\*\*Subtopics\*\*\s*(.*)", line, re.IGNORECASE) estimated_time_match = re.match(r"[*]{1,2}Estimated Time:[:]*[*]{0,2}\s*(.+)", line, re.IGNORECASE) if inline_subtopics_match: subtopic_str = inline_subtopics_match.group(1).strip() if subtopic_str and not subtopic_str in [":", "**"]: subtopics = [t.strip() for t in subtopic_str.split(",") if t.strip()] if current_module: current_module["subtopics"].extend(subtopics) collecting_subtopics = False continue # Match block start for subtopics: - **Subtopics:** (no items on the line) # if re.match(r"-\s*\*\*Subtopics:\*\*\s*$", line, re.IGNORECASE) or re.match(r"^#{3,4}\s*Subtopics:?\s*$", line, re.IGNORECASE): if re.match(r"-\s*(?:\*\*)?Subtopics:?(\*\*)?\s*$", line, re.IGNORECASE) or re.match(r"^#{3,4}\s*Subtopics:?$", line, re.IGNORECASE): collecting_subtopics = True continue # Collecting subtopics if collecting_subtopics: subtopic_item = re.match(r"^- (.+)", line) if subtopic_item: topic = subtopic_item.group(1).strip() if current_module and topic not in (":", "**", ""): current_module["subtopics"].append(topic) elif not line: # End collecting on blank line collecting_subtopics = False else: # Stop collecting on non-bullet line collecting_subtopics = False # Match estimated time if estimated_time_match: estimated_time = estimated_time_match.group(1).strip() if current_module and estimated_time: current_module["estimated_time"] = estimated_time if current_module: modules.append(current_module) # current_module = None return modules def get_module_titles(self,markdown_text=None): """Get the module titles from the course markdown file.""" if not os.path.exists(self.course_path): raise HTTPException(status_code=404, detail="Course markdown file not found.") if markdown_text is not None: content = markdown_text else: content = read_txt_file(self.course_path) # Extract module titles from the content module_titles = self.extract_module_titles(content) print(f"Module Titles: {module_titles}") return module_titles def get_course_content(self): """Get the course content from the markdown file.""" if not os.path.exists(self.course_path): raise HTTPException(status_code=404, detail="Course markdown file not found.") content = read_txt_file(self.course_path) return content def export_to_html_and_pdf(self, markdown_content, tenent_name=None): """Export the course content to HTML and PDF formats.""" # Convert markdown to HTML with fenced code and syntax highlight extras = ["fenced-code-blocks", "code-friendly", "tables", "cuddled-lists", "break-on-newline", "smarty-pants", "footnotes", "toc", "target-blank-links"] html_content = markdown2.markdown(markdown_content, extras=extras) # Add Pygments CSS for code styling style = HtmlFormatter().get_style_defs('.codehilite') style += """ body { font-family: Arial, sans-serif; line-height: 1.5; margin: 10px; } h1, h2, h3, h4, h5, h6 { margin-top: 1em; margin-bottom: 0.5em; } table { border-collapse: collapse; margin: 1em 0; width: 100%; } table, th, td { border: 1px solid #888; padding: 8px; text-align: left; } code, pre { background: #f8f8f8; padding: 4px; border-radius: 4px; } pre { overflow-x: auto; padding: 10px; } """ ## add watermark to pdf katex_css = '' full_html = f""" \n{katex_css}\n{MATHJAX_CDN} {html_content} """ output_base = os.path.join(OUTPUT_FOLDER, self.base_filename) # Save HTML html_file = f"{output_base}.html" with open(html_file, "w", encoding="utf-8") as f: f.write(full_html) # Save PDF pdf_file = f"{output_base}.pdf" # HTML(string=full_html).write_pdf(pdf_file) # Create watermark overlay for PDF if tenent_name: watermark_text = f"{tenent_name}" else: watermark_text = "CONFIDENTIAL" # Convert markdown to PDF with watermark on each page html_obj = HTML(string=full_html) pdf_bytes = html_obj.write_pdf() # Add watermark to each page using PyPDF2 pdf_reader = PdfReader(BytesIO(pdf_bytes)) pdf_writer = PdfWriter() page_width, page_height = letter # Default to letter size if not available for page in pdf_reader.pages: # Create watermark page and merge watermark_buffer = BytesIO() c = canvas.Canvas(watermark_buffer, pagesize=(page_width, page_height)) # Font c.setFont("Helvetica-Bold", 50) c.setFillColor(Color(0.6, 0.6, 0.6, alpha=0.2)) # Draw rotated watermark image at center c.saveState() c.translate(page_width / 2, page_height / 2) c.rotate(45) c.drawCentredString(0, 0, watermark_text) c.restoreState() c.save() watermark_buffer.seek(0) watermark_page = PdfReader(watermark_buffer).pages[0] # type: ignore page.merge_page(watermark_page) pdf_writer.add_page(page) with open(pdf_file, 'wb') as f: pdf_writer.write(f) print(f"✅ Exported to {html_file} and {pdf_file}") return { "html_file": html_file, "pdf_file": pdf_file } def safe_filename(self, s, max_len=100): """Make a filesystem-safe, reasonably short filename from a string.""" # Normalize unicode and remove accents s = unicodedata.normalize("NFKD", s) # Replace non-alphanum with underscores s = re.sub(r"[^\w\s-]", "", s, flags=re.U) s = re.sub(r"[-\s]+", "_", s).strip("_") # Truncate if len(s) > max_len: s = s[:max_len].rstrip("_") # fallback return s or "untitled" def generate_imscc(self, content): """ Generate an IMSCC package from the article content. """ article_title = truncate_filename( self.topic.replace(" ", "_").replace("/", "_") ) article_dir = os.path.join(working_dir, article_title) wiki_content_dir = os.path.join(article_dir, content_dir) os.makedirs(working_dir, exist_ok=True) os.makedirs(wiki_content_dir, exist_ok=True) output_path = os.path.join(OUTPUT_FOLDER, f"{article_title}.imscc") markdown_path = os.path.join(OUTPUT_FOLDER, f"{article_title}.md") if not os.path.exists(markdown_path) and content is None: raise HTTPException(status_code=404, detail="Markdown file not found. Please generate the course first.") self.save_sections_as_html(content,article_dir,wiki_content_dir) # Check if the polished article file exists # self.prepare_article_question() # Create the manifest create_manifest( course_title=article_title, content_dir=article_dir,#wiki_content_dir wiki_dir=content_dir, output_file=os.path.join(article_dir, "imsmanifest.xml") ) with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf: for root, _, files in os.walk(working_dir): for file in files: full_path = os.path.join(root, file) arcname = os.path.relpath(full_path, start=article_dir) zf.write(full_path, arcname) # Clean up (optional) # shutil.rmtree(working_dir) return output_path def save_sections_as_html(self,content,content_dir,wiki_content_dir): """ Reads a markdown file, splits it into sections, and saves each section as an HTML file. """ # Extract sections for main course pages sections = { "course-description.html": r"##\s*Course Description\s*\n(.*?)(?=\n##\s*Course Outcomes)", "course-outcomes.html": r"##\s*Course Outcomes\s*\n(.*?)(?=\n##\s*Course Outline)", "course-outline.html": r"##\s*Course Outline[^\n]*\n(.*?)(?=\n##\s*Module Details)", } for filename, pattern in sections.items(): match = re.search(pattern, content, re.S | re.IGNORECASE) if match and match is not None: html = markdown2.markdown(match.group(1)) self.save_html(os.path.join(content_dir, filename), html) else: print(f"[WARN] Section not found for {filename}") # Extract modules module_pattern = re.compile( r"^#{2,6}\s*Module\s+(\d+):\s*([^\r\n]+)\r?\n+" # Module heading and title (title is single line) r"^#{2,6}\s*Module Details\s*\r?\n+" # the "Module Details" heading that follows r"(.*?)(?=^#{2,6}\s*Module\s+\d+:|\Z)", # body until next "Module N:" or EOF re.S | re.M | re.IGNORECASE ) modules = module_pattern.findall(content) for num, raw_title, content in modules: folder_name = f"module{num}" module_dir = os.path.join(wiki_content_dir, folder_name) os.makedirs(module_dir, exist_ok=True) # --- Remove Questions section (### Questions ... until next heading or end) --- content_no_questions = re.sub( r"^#{2,6}\s*Questions\s*[:\-]?\s*\r?\n.*?(?=^#{1,6}\s|\Z)", "", content, flags=re.S | re.M | re.IGNORECASE ) file_title = self.safe_filename(raw_title, max_len=80) + ".html" # Convert to HTML html_page = self.markdown_to_html(raw_title.strip(), content_no_questions) # Extract MCQs self.extract_mcq_questions(raw_title.strip(), content, module_dir) # Save HTML with module title as filename file_title = replace_special_characters(raw_title.strip()) + ".html" out_path = os.path.join(module_dir, file_title) try: self.save_html(out_path, html_page) except Exception as e: print(f"[ERROR] Failed saving file for module {num} with title '{raw_title[:120]}': {e}") # fallback: use numeric filename fallback = os.path.join(module_dir, f"module{num}.html") print(f"[INFO] Attempting fallback filename: {fallback}") self.save_html(fallback, html_page) # sections = split_markdown_sections(content) # for section in sections: # lines = section.splitlines() # if not lines: # continue # Skip empty sections # title_line = next((line for line in lines if re.match(r'^#+\s', line)), None) # if not title_line: # continue # Skip sections without a title # title = title_line.strip("# ").strip() # filename = f"{re.sub(r'[^a-zA-Z0-9_]', '_', title.lower())}.html" # # filename = f"{title_line.lower().replace(' ', '_')}.html" # html = self.markdown_to_html(title, section) # output_path = os.path.join(wiki_content_dir, filename) # self.save_html(output_path,html) print(f"✅ Exported {len(sections)} sections to: {wiki_content_dir}") def extract_mcq_questions(self, title, content, module_dir): """Extract MCQ questions from the content and save them as a separate file.""" # Match MCQ questions in the content mcq_pattern = re.compile( r"Question\s*\d+:\s*(.*?)\nA\.\s*(.*?)\nB\.\s*(.*?)\nC\.\s*(.*?)\nD\.\s*(.*?)\nCorrect Answer:\s*([A-D])", re.S | re.M | re.IGNORECASE ) matches = mcq_pattern.findall(content) if not matches: print("[WARN] No MCQ questions found in the content.") return module_data = { "module_title": title, "mcq_questions": [] } mcq_content = "" for i, (question, option_a, option_b, option_c, option_d, correct_answer) in enumerate(matches): question_text = f"**{i+1}. {question.strip()}**" options = [ f"A. {option_a.strip()}", f"B. {option_b.strip()}", f"C. {option_c.strip()}", f"D. {option_d.strip()}" ] correct_option = f"Correct Answer: {correct_answer.strip()}" mcq_content += f"{question_text}\n" + "\n".join(options) + "\n" + correct_option + "\n\n" module_data["mcq_questions"].append({ "question": question.strip(), "options": [option_a.strip(), option_b.strip(), option_c.strip(), option_d.strip()], "answer": correct_answer.strip() }) create_qti_for_module(title, module_data["mcq_questions"], module_dir) print(f"✅ Extracted and saved {len(matches)} MCQ questions for module '{title}' to {module_dir}") def save_html(self,filename, html_content): """Save HTML file with UTF-8 encoding.""" with open(filename, "w", encoding="utf-8") as f: f.write(html_content) def markdown_to_html(self,title: str, content: str) -> str: """Convert markdown content to HTML with MathJax support.""" extras = ["fenced-code-blocks", "code-friendly", "tables", "cuddled-lists", "break-on-newline", "smarty-pants", "footnotes", "toc", "target-blank-links"] html_content = markdown2.markdown(content, extras=extras) # Add Pygments CSS for code styling style = HtmlFormatter().get_style_defs('.codehilite') style += """ body, h1, h2, h3, h4, h5, h6, pre, code { word-wrap: break-word; overflow-wrap: break-word; white-space: normal; } pre, code { white-space: pre-wrap; } """ full_html = f""" \n{MATHJAX_CDN}\n{html_content} """ return full_html # clientCall method to interact with OpenAI API def clientCall(self,system_prompt,user_prompt, temperature=0.7, max_tokens=1500, top_p=1.0, model=OPENAI_MINI_MODEL): """ Call the OpenAI API with the provided prompts and parameters. Args: system_prompt (str): The system prompt to guide the model. user_prompt (str): The user prompt to provide context. temperature (float): The temperature for response variability. max_tokens (int): The maximum number of tokens for the response. Returns: str: The response from the model. """ if not system_prompt or not user_prompt: raise ValueError("System prompt and user prompt cannot be empty.") response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], temperature=temperature, max_tokens=max_tokens, top_p=top_p, ) return response.choices[0].message.content.strip() # if model == 'gpt-4o-mini': # response = client.chat.completions.create( # model=model, # messages=[ # {"role": "system", "content": system_prompt}, # {"role": "user", "content": user_prompt} # ], # temperature=temperature, # max_tokens=max_tokens, # top_p=top_p, # ) # return response.choices[0].message.content.strip() # else: # response = client.responses.create( # model=model, # input=[ # {"role": "system", "content": system_prompt}, # {"role": "user", "content": user_prompt} # ], # max_output_tokens=max_tokens, # ✅ correct name # ) # # print(response) # return response.output_text