Add base code

This commit is contained in:
2024-11-09 22:57:47 +01:00
parent 967f80d734
commit c7b40276cf
14 changed files with 975 additions and 0 deletions

View File

@@ -0,0 +1,234 @@
/*
Copyright © 2024 Matteo Schiff <matteo@underdesk.net>
*/
package llm
import (
"context"
"encoding/json"
"fmt"
"os"
"sort"
"strconv"
"strings"
"github.com/sashabaranov/go-openai"
)
type ParagraphDetector struct {
client *openai.Client
useCache bool
}
func NewParagraphDetector(client *openai.Client) *ParagraphDetector {
return &ParagraphDetector{
client: client,
useCache: true,
}
}
func (wt *ParagraphDetector) cacheFileName(inputAudio string) string {
return inputAudio + ".splitted.txt"
}
func (wt *ParagraphDetector) loadCache(name string) ([]int, error) {
contentJson, err := os.ReadFile(wt.cacheFileName(name))
var content []int
err = json.Unmarshal(contentJson, &content)
return content, err
}
func (wt *ParagraphDetector) saveCache(name string, content []int) error {
contentJson, _ := json.Marshal(content)
err := os.WriteFile(wt.cacheFileName(name), contentJson, 0666)
return err
}
type SplitResponse struct {
Topics map[string]string `json:"Topics"`
}
func (s SplitResponse) MarshalJSON() ([]byte, error) {
return json.Marshal(struct{
Topics map[string]string `json:"Topics"`
}{
Topics: s.Topics,
})
}
func (wt *ParagraphDetector) splitUsingLLM(transcriptionSentences []string) []int {
systemPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleSystem,
//Content: "Identify topics in the following transcription. Topics are a group of sentences about the same argument. Format the output using a JSON integer array, where each item is the line number (integer). Each time you see one or two new topics, add the integer to the array.",
Content: `
You are given a transcription of a conversation or speech. Your task is to identify where new topics or arguments begin within the text. A new topic is marked by a shift in subject, a new argument being introduced, or a clear change in focus. Additionally, ensure that each identified segment (paragraph) contains more than just a few sentences. If necessary, group smaller sections together so that each segment is substantial.
Input: The transcription will be provided as a series of lines. Each line is numbered for easy reference. Your task is to read through the transcription and identify the points where a new topic or argument begins. If a segment is too short (fewer than three sentences), it should be grouped with an adjacent segment. For each identified new topic, write a brief title or summary describing it.
Output: The output should be formatted as a JSON object, where each key is the line number where a new topic starts, and the corresponding value is the title of that topic. For example:
json
{
"0": "Title of First Topic",
"19": "Title of Second Topic"
}
Example:
Transcription:
vbnet
0: Hello, everyone. Today we're going to talk about climate change.
1: It's a complex issue, but I'll try to break it down.
2: Climate change refers to long-term shifts in temperatures and weather patterns.
3: In this discussion, we will cover the causes, effects, and possible solutions.
4: First, let's discuss the causes of climate change.
5: There are several factors, including greenhouse gas emissions.
6: Most emissions come from burning fossil fuels like coal, oil, and gas.
7: Another key cause is deforestation, which reduces the number of trees that can absorb CO2.
8: Deforestation not only affects CO2 levels but also disrupts ecosystems.
9: Next, let's move on to the effects of climate change.
10: Rising temperatures are one of the most obvious effects.
11: This leads to melting ice caps, rising sea levels, and extreme weather events.
12: We are already seeing more frequent and intense heatwaves, hurricanes, and floods.
13: The impact on wildlife is also severe, with many species facing habitat loss.
14: Finally, what can we do to address this issue?
15: One solution is to reduce our carbon footprint by using energy more efficiently.
16: Renewable energy sources, like wind and solar, play a big role here.
17: Governments and organizations worldwide are investing in clean energy technologies.
18: Individual actions, such as reducing waste and conserving water, also make a difference.
19: That's all for today's discussion on climate change. Thank you for listening.
Expected Output:
json
{
"0": "Introduction to Climate Change",
"4": "Causes of Climate Change",
"9": "Effects of Climate Change",
"14": "Solutions to Climate Change"
}
Instruction: Please read the transcription carefully and list the line numbers where each new topic or argument starts, along with a brief title summarizing that topic. Ensure that shorter segments are grouped with neighboring content to form more substantial paragraphs before marking the start of a new topic.`,
}
currentStart := 0
blockSize := 200
var startLines []int
for currentStart < len(transcriptionSentences) {
if currentStart > len(transcriptionSentences) {
break
}
blockEnd := currentStart+blockSize
if blockEnd >= len(transcriptionSentences) {
blockEnd = len(transcriptionSentences) - 1
}
currentSlice := transcriptionSentences[currentStart : blockEnd]
userPromptText := ""
for i, sentence := range currentSlice {
userPromptText += fmt.Sprintf("%d: %s\n", i, sentence)
}
userPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: userPromptText,
}
resp1, err := wt.client.CreateChatCompletion(
context.Background(),
openai.ChatCompletionRequest{
Model: "mistral-7b-instruct-v0.3",
Messages: []openai.ChatCompletionMessage{systemPrompt, userPrompt},
ResponseFormat: &openai.ChatCompletionResponseFormat{
Type: openai.ChatCompletionResponseFormatTypeJSONObject,
JSONSchema: &openai.ChatCompletionResponseFormatJSONSchema{
Name: "splitter",
Strict: true,
Schema: SplitResponse{},
},
},
},
)
if err != nil {
fmt.Printf("Splitting error: %v\n", err)
return []int{}
}
fmt.Printf(resp1.Choices[0].Message.Content)
var data map[string]string
json.Unmarshal([]byte(resp1.Choices[0].Message.Content), &data)
//startLinesRaw = append(startLinesRaw, strings.Split(resp1.Choices[0].Message.Content, ",")...)
for k, _ := range data {
before, _, _ := strings.Cut(k, "-")
q, err := strconv.Atoi(before)
if err != nil {
continue
}
startLines = append(startLines, currentStart+q)
}
fmt.Println(startLines)
currentStart += blockSize
}
/*fmt.Println(startLinesRaw)
var err error
for i, n := range startLinesRaw {
startLines[i], err = strconv.Atoi(n)
if err != nil {
return nil
}
}*/
sort.Ints(startLines)
return startLines
}
func (wt *ParagraphDetector) Split(name string, transcription string) ([]string, error) {
transcriptionSentences := strings.Split(transcription, ".")
cache, err := wt.loadCache(name)
var startLines []int
if wt.useCache && err == nil {
startLines = cache
} else {
startLines = wt.splitUsingLLM(transcriptionSentences)
wt.saveCache(name, startLines)
}
var splittedTranscription []string
for i := range startLines {
q := len(transcriptionSentences) - 1
if i < len(startLines) - 1 {
q = startLines[i+1]
}
var currentParagraph string
for j := startLines[i]; j <= q; j++ {
currentParagraph += transcriptionSentences[j] + "."
}
splittedTranscription = append(splittedTranscription, currentParagraph)
}
return splittedTranscription, nil
}

View File

@@ -0,0 +1,199 @@
/*
Copyright © 2024 Matteo Schiff <matteo@underdesk.net>
*/
package llm
import (
"context"
"encoding/json"
"fmt"
"os"
"sort"
"strconv"
"strings"
"github.com/sashabaranov/go-openai"
)
type StructureBuilder struct {
client *openai.Client
useCache bool
}
func NewStructureBuilder(client *openai.Client) *StructureBuilder {
return &StructureBuilder{
client: client,
useCache: true,
}
}
func (wt *StructureBuilder) cacheFileName(inputAudio string) string {
return inputAudio + ".splitted.txt"
}
func (wt *StructureBuilder) loadCache(name string) (map[string]ParagraphItem, error) {
contentJson, err := os.ReadFile(name)
var content map[string]ParagraphItem
err = json.Unmarshal(contentJson, &content)
return content, err
}
func (wt *StructureBuilder) saveCache(name string, content map[string]ParagraphItem) error {
contentJson, _ := json.Marshal(content)
err := os.WriteFile(name, contentJson, 0666)
return err
}
type ParagraphItem struct {
Title string
Type string
Content string `json:"-"`
}
func (wt *StructureBuilder) splitUsingLLM(transcriptionSentences []string) map[string]ParagraphItem {
systemPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleSystem,
Content: `
You are given a transcription of a conversation or speech. Your task is to identify where new topics or arguments begin within the text. A new topic is marked by a shift in subject, a new argument being introduced, or a clear change in focus. Additionally, ensure that each identified segment (paragraph) contains more than just a few sentences.
For each identified new topic, write a brief title or summary describing it. Additionally, assign a heading type based on the importance or hierarchy of the topic. Use the following rules for heading types:
"heading1" for main sections or major topics.
"heading2" for subtopics or important subsections within a main topic.
"heading3" for smaller, more detailed sections within a subtopic.
Input: The transcription will be provided as a series of lines. Each line is numbered for easy reference. Your task is to read through the transcription and identify the points where a new topic or argument begins.
The previous output will be provided as input to guide you on where new sections begin and their existing structure. Use this to extend the structure as needed.
Output: The output should be formatted as a JSON object, where each key is the line number where a new topic starts, and the corresponding value is an object containing the title of that topic and the heading type. For example:
Example:
{
"0": {
"Title": "Example title",
"Type": "heading1"
},
"4": {
"Title": "Example subtitle",
"Type": "heading2"
},
"9": {
"Title": "Another example subtitle",
"Type": "heading2"
},
"14": {
"Title": "A third example subtitle",
"Type": "heading2"
}
}
Instruction: Please read the transcription carefully and list the line numbers where each new topic or argument starts, along with a brief title summarizing that topic. Use the previous output to guide your extensions, adding titles and types ("heading1", "heading2", "heading3") as necessary.`,
}
currentStart := 0
blockSize := 200
currentStructure := map[string]ParagraphItem{}
for currentStart < len(transcriptionSentences) {
if currentStart > len(transcriptionSentences) {
break
}
blockEnd := currentStart + blockSize
if blockEnd >= len(transcriptionSentences) {
blockEnd = len(transcriptionSentences) - 1
}
currentSlice := transcriptionSentences[currentStart:blockEnd]
userPromptText := ""
for i, sentence := range currentSlice {
userPromptText += fmt.Sprintf("%d: %s\n", i+currentStart, sentence)
}
currentJson, err := json.Marshal(currentStructure)
currentPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Current structure is: " + string(currentJson),
}
userPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Current transcription slice: " + userPromptText,
}
resp1, err := wt.client.CreateChatCompletion(
context.Background(),
openai.ChatCompletionRequest{
Model: "mistral-small-instruct",
Messages: []openai.ChatCompletionMessage{systemPrompt, currentPrompt, userPrompt},
ResponseFormat: &openai.ChatCompletionResponseFormat{
Type: openai.ChatCompletionResponseFormatTypeJSONObject,
},
},
)
if err != nil {
fmt.Printf("Splitting error: %v\n", err)
return map[string]ParagraphItem{}
}
var data map[string]ParagraphItem
json.Unmarshal([]byte(resp1.Choices[0].Message.Content), &data)
for k, v := range data {
currentStructure[k] = v
}
currentStart += blockSize
}
return currentStructure
}
func (wt *StructureBuilder) Split(splitFile string, transcription string) ([]ParagraphItem, error) {
transcriptionSentences := strings.Split(transcription, ".")
cache, err := wt.loadCache(splitFile)
var paragraphs map[string]ParagraphItem
if wt.useCache && err == nil {
paragraphs = cache
} else {
paragraphs = wt.splitUsingLLM(transcriptionSentences)
wt.saveCache(splitFile, paragraphs)
}
var startLines []int
for k, _ := range paragraphs {
q, err := strconv.Atoi(k)
if err != nil {
continue
}
startLines = append(startLines, q)
}
sort.Ints(startLines)
var items []ParagraphItem
for i := range startLines {
q := len(transcriptionSentences) - 1
if i < len(startLines)-1 {
q = startLines[i+1]
}
var currentParagraph string
for j := startLines[i]; j <= q; j++ {
currentParagraph += transcriptionSentences[j] + "."
}
newItem := paragraphs[strconv.Itoa(startLines[i])]
newItem.Content = currentParagraph
items = append(items, newItem)
}
return items, nil
}

View File

@@ -0,0 +1,99 @@
/*
Copyright © 2024 Matteo Schiff <matteo@underdesk.net>
*/
package llm
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"time"
"git.underdesk.net/Matte23/transcriber/internal/utils"
"github.com/sashabaranov/go-openai"
)
func RewriteText(client *openai.Client, paragraphs []ParagraphItem, finalFile string) {
outputFile, err := os.Create(finalFile)
if err != nil {
panic(err)
}
// close output file on exit and check for its returned error
defer func() {
if err := outputFile.Close(); err != nil {
panic(err)
}
}()
systemPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleSystem,
Content: `You are provided with:
A transcription paragraph to rewrite, aimed at improving clarity, grammar, and flow while preserving the original meaning and details.
A JSON structure of the document that lists section and subsection titles, showing how this paragraph fits within the larger structure.
The current built document, containing all previously written sections up to this point.
Your task: Rewrite the transcription paragraph using clear and polished language, while keeping all key information intact. Format the paragraph using Markdown for improved readability (e.g., bold for emphasis, bullet points if applicable, etc.). Do not add any new information or leave out any critical details. Focus solely on rewriting the paragraph provided in the transcription, and do not add headers, titles, extra context, or explanations beyond the paragraph's Markdown-formatted text.
Only write the Markdown-formatted paragraph text in your response.`,
}
structureJson, _ := json.Marshal(paragraphs)
documentStructure := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Document structure is: " + string(structureJson),
}
currentText := ""
log.Println("Starting rewriting text")
for _, currentMessage := range paragraphs {
startTime := time.Now()
heading := ""
switch currentMessage.Type {
case "heading1":
heading += "# "
case "heading2":
heading += "## "
default:
heading += "### "
}
heading += currentMessage.Title + "\n"
currentText += heading
outputFile.Write([]byte(heading))
currentTranscription := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Paragraph to rewrite is: '" + currentMessage.Content + "'",
}
currentDocument := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: "Current rewritten document is: \n\n" + currentText,
}
resp1, err := client.CreateChatCompletion(
context.Background(),
openai.ChatCompletionRequest{
Model: "mistral-small-instruct",
Messages: []openai.ChatCompletionMessage{systemPrompt, documentStructure, currentDocument, currentTranscription},
},
)
if err != nil {
fmt.Printf("LLM process error: %v\n", err)
return
}
result := resp1.Choices[0].Message.Content
outputFile.Write([]byte(result + "\n\n"))
currentText += result + "\n\n"
utils.MeasureTime(startTime, "Text rewrite iteration for "+currentMessage.Title)
}
}

View File

@@ -0,0 +1,86 @@
/*
Copyright © 2024 Matteo Schiff <matteo@underdesk.net>
*/
package llm
/*import (
"context"
"fmt"
"log"
"os"
"time"
"git.underdesk.net/Matte23/transcriber/utils"
"github.com/sashabaranov/go-openai"
)
func RewriteTextLarge(client *openai.Client, transcription string) {
splittedTranscription = transcription
outputFile, err := os.Create("output.md")
if err != nil {
panic(err)
}
// close output file on exit and check for its returned error
defer func() {
if err := outputFile.Close(); err != nil {
panic(err)
}
}()
systemPrompt := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleSystem,
Content: `You will receive a part of a transcription in English that you have to rewrite. Your task is to:
Rewrite the transcription, ensuring that the content is clearly presented and properly structured.
Correct any English errors, including grammar, spelling, and punctuation.
Organize the discourse by adding appropriate headings, subheadings, and bullet points where needed. Use titles and subtitles to logically separate sections and make the content easy to follow.
Do not add any new information that is not present in the original transcription (for example don't insert a conclusion paragraph if it's not present in the original text). Do not change the meaning of the text.
Each response will be formatted in Markdown. Use the following guidelines:
Use #, ##, ### for titles and subtitles.
Use bullet points, numbered lists, and other Markdown formatting to present information clearly.
Use ** for bold.
Use paragraphs to separate distinct ideas or topics.
Each message you receive will be a part of a larger transcription, so please ensure that the content flows naturally and coherently. You should revise the transcription as if it were a section of a longer document, but avoid duplicating any content.`,
}
var messages []openai.ChatCompletionMessage
log.Println("Starting rewriting text")
for _, currentMessage := range splittedTranscription {
startTime := time.Now()
messages = append(messages, openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: currentMessage,
})
if len(messages) > 4 {
messages = messages[2:]
}
resp1, err := client.CreateChatCompletion(
context.Background(),
openai.ChatCompletionRequest{
Model: "mistral-small-instruct",
Messages: append([]openai.ChatCompletionMessage{systemPrompt}, messages...),
},
)
if err != nil {
fmt.Printf("LLM process error: %v\n", err)
return
}
result := resp1.Choices[0].Message.Content
outputFile.Write([]byte(result + "\n"))
messages = append(messages, openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleAssistant,
Content: result,
})
utils.MeasureTime(startTime, "Text rewrite iteration")
}
} */