Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Execute retrieval processes #286

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Things we are currently working on:
- [ ] App: Implement the process to vectorize one local file using embeddings
- [ ] Runtime: Integration of the vector database [LanceDB](https://github.com/lancedb/lancedb)
- [ ] App: Implement the continuous process of vectorizing data
- [x] ~~App: Define a common retrieval context interface for the integration of RAG processes in chats (PR [#281](https://github.com/MindWorkAI/AI-Studio/pull/281), [#284](https://github.com/MindWorkAI/AI-Studio/pull/284))~~
- [x] ~~App: Define a common retrieval context interface for the integration of RAG processes in chats (PR [#281](https://github.com/MindWorkAI/AI-Studio/pull/281), [#284](https://github.com/MindWorkAI/AI-Studio/pull/284), [#286](https://github.com/MindWorkAI/AI-Studio/pull/286))~~
- [ ] App: Define a common augmentation interface for the integration of RAG processes in chats
- [x] ~~App: Integrate data sources in chats (PR [#282](https://github.com/MindWorkAI/AI-Studio/pull/282))~~

Expand Down
43 changes: 43 additions & 0 deletions app/MindWork AI Studio/Chat/ChatThread.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using AIStudio.Components;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
using AIStudio.Tools.ERIClient.DataModel;

namespace AIStudio.Chat;

Expand Down Expand Up @@ -150,4 +151,46 @@ public void Remove(IContent content, bool removeForRegenerate = false)
// Remove the block from the chat thread:
this.Blocks.Remove(block);
}

/// <summary>
/// Transforms this chat thread to an ERI chat thread.
/// </summary>
/// <param name="token">The cancellation token.</param>
/// <returns>The ERI chat thread.</returns>
public async Task<Tools.ERIClient.DataModel.ChatThread> ToERIChatThread(CancellationToken token = default)
{
//
// Transform the content blocks:
//
var contentBlocks = new List<Tools.ERIClient.DataModel.ContentBlock>(this.Blocks.Count);
foreach (var block in this.Blocks)
{
var (contentData, contentType) = block.Content switch
{
ContentImage image => (await image.AsBase64(token), Tools.ERIClient.DataModel.ContentType.IMAGE),
ContentText text => (text.Text, Tools.ERIClient.DataModel.ContentType.TEXT),

_ => (string.Empty, Tools.ERIClient.DataModel.ContentType.UNKNOWN),
};

contentBlocks.Add(new Tools.ERIClient.DataModel.ContentBlock
{
Role = block.Role switch
{
ChatRole.AI => Role.AI,
ChatRole.USER => Role.USER,
ChatRole.AGENT => Role.AGENT,
ChatRole.SYSTEM => Role.SYSTEM,
ChatRole.NONE => Role.NONE,

_ => Role.UNKNOW,
},

Content = contentData,
Type = contentType,
});
}

return new Tools.ERIClient.DataModel.ChatThread { ContentBlocks = contentBlocks };
}
}
24 changes: 23 additions & 1 deletion app/MindWork AI Studio/Chat/ContentText.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using AIStudio.Components;
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Tools.RAG;
using AIStudio.Tools.Services;

namespace AIStudio.Chat;
Expand Down Expand Up @@ -199,9 +200,30 @@ public async Task CreateFromProviderAsync(IProvider provider, Model chatModel, I
//
// Trigger the retrieval part of the (R)AG process:
//
var dataContexts = new List<IRetrievalContext>();
if (proceedWithRAG)
{

//
// We kick off the retrieval process for each data source in parallel:
//
var retrievalTasks = new List<Task<IReadOnlyList<IRetrievalContext>>>(selectedDataSources.Count);
foreach (var dataSource in selectedDataSources)
retrievalTasks.Add(dataSource.RetrieveDataAsync(lastPrompt, chatThread, token));

//
// Wait for all retrieval tasks to finish:
//
foreach (var retrievalTask in retrievalTasks)
{
try
{
dataContexts.AddRange(await retrievalTask);
}
catch (Exception e)
{
logger.LogError(e, "An error occurred during the retrieval process.");
}
}
}

//
Expand Down
11 changes: 11 additions & 0 deletions app/MindWork AI Studio/Chat/IContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,15 @@ public interface IContent
/// Uses the provider to create the content.
/// </summary>
public Task CreateFromProviderAsync(IProvider provider, Model chatModel, IContent? lastPrompt, ChatThread? chatChatThread, CancellationToken token = default);

/// <summary>
/// Returns the corresponding ERI content type.
/// </summary>
public Tools.ERIClient.DataModel.ContentType ToERIContentType => this switch
{
ContentText => Tools.ERIClient.DataModel.ContentType.TEXT,
ContentImage => Tools.ERIClient.DataModel.ContentType.IMAGE,

_ => Tools.ERIClient.DataModel.ContentType.UNKNOWN,
};
}
88 changes: 88 additions & 0 deletions app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
// ReSharper disable InconsistentNaming

using AIStudio.Assistants.ERI;
using AIStudio.Chat;
using AIStudio.Tools.ERIClient;
using AIStudio.Tools.ERIClient.DataModel;
using AIStudio.Tools.RAG;
using AIStudio.Tools.Services;

using ChatThread = AIStudio.Chat.ChatThread;
using ContentType = AIStudio.Tools.ERIClient.DataModel.ContentType;

namespace AIStudio.Settings.DataModel;

Expand Down Expand Up @@ -43,4 +50,85 @@ public DataSourceERI_V1()

/// <inheritdoc />
public ERIVersion Version { get; init; } = ERIVersion.V1;

/// <inheritdoc />
public async Task<IReadOnlyList<IRetrievalContext>> RetrieveDataAsync(IContent lastPrompt, ChatThread thread, CancellationToken token = default)
{
// Important: Do not dispose the RustService here, as it is a singleton.
var rustService = Program.SERVICE_PROVIDER.GetRequiredService<RustService>();
var logger = Program.SERVICE_PROVIDER.GetRequiredService<ILogger<DataSourceERI_V1>>();

using var eriClient = ERIClientFactory.Get(this.Version, this)!;
var authResponse = await eriClient.AuthenticateAsync(this, rustService, token);
if (authResponse.Successful)
{
var retrievalRequest = new RetrievalRequest
{
LatestUserPromptType = lastPrompt.ToERIContentType,
LatestUserPrompt = lastPrompt switch
{
ContentText text => text.Text,
ContentImage image => await image.AsBase64(token),
_ => string.Empty
},

Thread = await thread.ToERIChatThread(token),
MaxMatches = 10,
RetrievalProcessId = null, // The ERI server selects the retrieval process when multiple processes are available
Parameters = null, // The ERI server selects useful default parameters
};

var retrievalResponse = await eriClient.ExecuteRetrievalAsync(retrievalRequest, token);
if(retrievalResponse is { Successful: true, Data: not null })
{
//
// Next, we have to transform the ERI context back to our generic retrieval context:
//
var genericRetrievalContexts = new List<IRetrievalContext>(retrievalResponse.Data.Count);
foreach (var eriContext in retrievalResponse.Data)
{
switch (eriContext.Type)
{
case ContentType.TEXT:
genericRetrievalContexts.Add(new RetrievalTextContext
{
Path = eriContext.Path ?? string.Empty,
Type = eriContext.ToRetrievalContentType(),
Links = eriContext.Links,
Category = RetrievalContentCategory.TEXT,
MatchedText = eriContext.MatchedContent,
DataSourceName = eriContext.Name,
SurroundingContent = eriContext.SurroundingContent,
});
break;

case ContentType.IMAGE:
genericRetrievalContexts.Add(new RetrievalImageContext
{
Path = eriContext.Path ?? string.Empty,
Type = eriContext.ToRetrievalContentType(),
Links = eriContext.Links,
Source = eriContext.MatchedContent,
Category = RetrievalContentCategory.IMAGE,
SourceType = ContentImageSource.BASE64,
DataSourceName = eriContext.Name,
});
break;

default:
logger.LogWarning($"The ERI context type '{eriContext.Type}' is not supported yet.");
break;
}
}

return genericRetrievalContexts;
}

logger.LogWarning($"Was not able to retrieve data from the ERI data source '{this.Name}'. Message: {retrievalResponse.Message}");
return [];
}

logger.LogWarning($"Was not able to authenticate with the ERI data source '{this.Name}'. Message: {authResponse.Message}");
return [];
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
using AIStudio.Chat;
using AIStudio.Tools.RAG;

namespace AIStudio.Settings.DataModel;

/// <summary>
Expand Down Expand Up @@ -27,6 +30,13 @@ public DataSourceLocalDirectory()
/// <inheritdoc />
public DataSourceSecurity SecurityPolicy { get; init; } = DataSourceSecurity.NOT_SPECIFIED;

/// <inheritdoc />
public Task<IReadOnlyList<IRetrievalContext>> RetrieveDataAsync(IContent lastPrompt, ChatThread thread, CancellationToken token = default)
{
IReadOnlyList<IRetrievalContext> retrievalContext = new List<IRetrievalContext>();
return Task.FromResult(retrievalContext);
}

/// <summary>
/// The path to the directory.
/// </summary>
Expand Down
10 changes: 10 additions & 0 deletions app/MindWork AI Studio/Settings/DataModel/DataSourceLocalFile.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
using AIStudio.Chat;
using AIStudio.Tools.RAG;

namespace AIStudio.Settings.DataModel;

/// <summary>
Expand Down Expand Up @@ -27,6 +30,13 @@ public DataSourceLocalFile()
/// <inheritdoc />
public DataSourceSecurity SecurityPolicy { get; init; } = DataSourceSecurity.NOT_SPECIFIED;

/// <inheritdoc />
public Task<IReadOnlyList<IRetrievalContext>> RetrieveDataAsync(IContent lastPrompt, ChatThread thread, CancellationToken token = default)
{
IReadOnlyList<IRetrievalContext> retrievalContext = new List<IRetrievalContext>();
return Task.FromResult(retrievalContext);
}

/// <summary>
/// The path to the file.
/// </summary>
Expand Down
11 changes: 11 additions & 0 deletions app/MindWork AI Studio/Settings/IDataSource.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
using System.Text.Json.Serialization;

using AIStudio.Chat;
using AIStudio.Settings.DataModel;
using AIStudio.Tools.RAG;

namespace AIStudio.Settings;

Expand Down Expand Up @@ -37,4 +39,13 @@ public interface IDataSource
/// Which data security policy is applied to this data source?
/// </summary>
public DataSourceSecurity SecurityPolicy { get; init; }

/// <summary>
/// Perform the data retrieval process.
/// </summary>
/// <param name="lastPrompt">The last prompt from the chat.</param>
/// <param name="thread">The chat thread.</param>
/// <param name="token">The cancellation token.</param>
/// <returns>The retrieved data context.</returns>
public Task<IReadOnlyList<IRetrievalContext>> RetrieveDataAsync(IContent lastPrompt, ChatThread thread, CancellationToken token = default);
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace AIStudio.Tools.RAG;

public static class RetrievalContentTypeExtensions
{
public static RetrievalContentType ToRetrievalContentType(Context eriContext)
public static RetrievalContentType ToRetrievalContentType(this Context eriContext)
{
//
// Right now, we have to parse the category string along the type enum to
Expand Down