Add base code

2024-11-09 22:57:47 +01:00
parent 967f80d734
commit c7b40276cf
14 changed files with 975 additions and 0 deletions
--- a/internal/llm/structure_builder.go
+++ b/internal/llm/structure_builder.go
@@ -0,0 +1,199 @@
+/*
+Copyright © 2024 Matteo Schiff <matteo@underdesk.net>
+
+*/
+
+package llm
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/sashabaranov/go-openai"
+)
+
+type StructureBuilder struct {
+	client   *openai.Client
+	useCache bool
+}
+
+func NewStructureBuilder(client *openai.Client) *StructureBuilder {
+	return &StructureBuilder{
+		client:   client,
+		useCache: true,
+	}
+}
+
+func (wt *StructureBuilder) cacheFileName(inputAudio string) string {
+	return inputAudio + ".splitted.txt"
+}
+
+func (wt *StructureBuilder) loadCache(name string) (map[string]ParagraphItem, error) {
+	contentJson, err := os.ReadFile(name)
+
+	var content map[string]ParagraphItem
+	err = json.Unmarshal(contentJson, &content)
+
+	return content, err
+}
+
+func (wt *StructureBuilder) saveCache(name string, content map[string]ParagraphItem) error {
+	contentJson, _ := json.Marshal(content)
+
+	err := os.WriteFile(name, contentJson, 0666)
+
+	return err
+}
+
+type ParagraphItem struct {
+	Title   string
+	Type    string
+	Content string `json:"-"`
+}
+
+func (wt *StructureBuilder) splitUsingLLM(transcriptionSentences []string) map[string]ParagraphItem {
+	systemPrompt := openai.ChatCompletionMessage{
+		Role: openai.ChatMessageRoleSystem,
+		Content: `
+You are given a transcription of a conversation or speech. Your task is to identify where new topics or arguments begin within the text. A new topic is marked by a shift in subject, a new argument being introduced, or a clear change in focus. Additionally, ensure that each identified segment (paragraph) contains more than just a few sentences.
+For each identified new topic, write a brief title or summary describing it. Additionally, assign a heading type based on the importance or hierarchy of the topic. Use the following rules for heading types:
+
+    "heading1" for main sections or major topics.
+    "heading2" for subtopics or important subsections within a main topic.
+    "heading3" for smaller, more detailed sections within a subtopic.
+
+Input: The transcription will be provided as a series of lines. Each line is numbered for easy reference. Your task is to read through the transcription and identify the points where a new topic or argument begins.
+
+The previous output will be provided as input to guide you on where new sections begin and their existing structure. Use this to extend the structure as needed.
+Output: The output should be formatted as a JSON object, where each key is the line number where a new topic starts, and the corresponding value is an object containing the title of that topic and the heading type. For example:
+Example:
+{
+  "0": {
+    "Title": "Example title",
+    "Type": "heading1"
+  },
+  "4": {
+    "Title": "Example subtitle",
+    "Type": "heading2"
+  },
+  "9": {
+    "Title": "Another example subtitle",
+    "Type": "heading2"
+  },
+  "14": {
+    "Title": "A third example subtitle",
+    "Type": "heading2"
+  }
+}
+
+Instruction: Please read the transcription carefully and list the line numbers where each new topic or argument starts, along with a brief title summarizing that topic. Use the previous output to guide your extensions, adding titles and types ("heading1", "heading2", "heading3") as necessary.`,
+	}
+
+	currentStart := 0
+	blockSize := 200
+
+	currentStructure := map[string]ParagraphItem{}
+
+	for currentStart < len(transcriptionSentences) {
+		if currentStart > len(transcriptionSentences) {
+			break
+		}
+
+		blockEnd := currentStart + blockSize
+
+		if blockEnd >= len(transcriptionSentences) {
+			blockEnd = len(transcriptionSentences) - 1
+		}
+
+		currentSlice := transcriptionSentences[currentStart:blockEnd]
+
+		userPromptText := ""
+		for i, sentence := range currentSlice {
+			userPromptText += fmt.Sprintf("%d: %s\n", i+currentStart, sentence)
+		}
+
+		currentJson, err := json.Marshal(currentStructure)
+		currentPrompt := openai.ChatCompletionMessage{
+			Role:    openai.ChatMessageRoleUser,
+			Content: "Current structure is: " + string(currentJson),
+		}
+
+		userPrompt := openai.ChatCompletionMessage{
+			Role:    openai.ChatMessageRoleUser,
+			Content: "Current transcription slice: " + userPromptText,
+		}
+
+		resp1, err := wt.client.CreateChatCompletion(
+			context.Background(),
+			openai.ChatCompletionRequest{
+				Model:    "mistral-small-instruct",
+				Messages: []openai.ChatCompletionMessage{systemPrompt, currentPrompt, userPrompt},
+				ResponseFormat: &openai.ChatCompletionResponseFormat{
+					Type: openai.ChatCompletionResponseFormatTypeJSONObject,
+				},
+			},
+		)
+
+		if err != nil {
+			fmt.Printf("Splitting error: %v\n", err)
+			return map[string]ParagraphItem{}
+		}
+
+		var data map[string]ParagraphItem
+		json.Unmarshal([]byte(resp1.Choices[0].Message.Content), &data)
+
+		for k, v := range data {
+			currentStructure[k] = v
+		}
+
+		currentStart += blockSize
+	}
+	return currentStructure
+}
+
+func (wt *StructureBuilder) Split(splitFile string, transcription string) ([]ParagraphItem, error) {
+	transcriptionSentences := strings.Split(transcription, ".")
+
+	cache, err := wt.loadCache(splitFile)
+
+	var paragraphs map[string]ParagraphItem
+	if wt.useCache && err == nil {
+		paragraphs = cache
+	} else {
+		paragraphs = wt.splitUsingLLM(transcriptionSentences)
+		wt.saveCache(splitFile, paragraphs)
+	}
+
+	var startLines []int
+	for k, _ := range paragraphs {
+		q, err := strconv.Atoi(k)
+		if err != nil {
+			continue
+		}
+		startLines = append(startLines, q)
+	}
+	sort.Ints(startLines)
+
+	var items []ParagraphItem
+	for i := range startLines {
+		q := len(transcriptionSentences) - 1
+
+		if i < len(startLines)-1 {
+			q = startLines[i+1]
+		}
+
+		var currentParagraph string
+		for j := startLines[i]; j <= q; j++ {
+			currentParagraph += transcriptionSentences[j] + "."
+		}
+		newItem := paragraphs[strconv.Itoa(startLines[i])]
+		newItem.Content = currentParagraph
+		items = append(items, newItem)
+	}
+	return items, nil
+}