I am using AWS bedrock for the first time. I have configured the data source which is S3 along with opensearch serverless cluster for embeddings. However, I do not have any control over the mappings of that cluster. I want to add S3 object metadata as mappings and run RAG on them. Does anybody know proper way to either create new data source in which metadata is included or how to update existing data source to include mappings? This would help a lot! thank you.
Add a comment
|
2 Answers
Here's how you can handle S3 object metadata mappings
{
"name": "your-data-source-name",
"dataSourceConfiguration": {
"s3Configuration": {
"bucketName": "your-bucket",
"inclusionPrefixes": ["your/prefix/"],
"inclusionPatterns": ["*.pdf", "*.txt"],
"metadata": {
"fields": [
"author",
"creation-date",
"custom-metadata-field"
]
}
}
},
"vectorConfiguration": {
"vectorizer": "amazon.titan-embed-text-v1",
"dimensions": 1536
}
}
Using AWS cli, update the data source
aws bedrock-agent update-knowledge-base \
--knowledge-base-id your-kb-id \
--data-source-configuration '{
"s3Configuration": {
"bucketName": "your-bucket",
"metadata": {
"fields": ["author", "creation-date"]
}
}
}'
Also because openSearch serverless doesn't allow direct mapping modification, we need to create custom mapping.
ex:
import boto3
import json
def lambda_handler(event, context):
s3 = boto3.client('s3')
opensearch = boto3.client('opensearchserverless')
# Get S3 object metadata
response = s3.head_object(
Bucket=event['bucket'],
Key=event['key']
)
metadata = response['Metadata']
# Prepare document with metadata
document = {
'content': event['content'],
'metadata': metadata,
'_meta': {
'author': metadata.get('author'),
'creation_date': metadata.get('creation-date')
}
}
# Index to OpenSearch
opensearch.post(
collection_name='your-collection',
body=json.dumps(document)
)
Hope this helps.
-
thank you for your answer. Could you elaborate more on the first code snippet? which javascript API should I exactly use?– MakarandCommented Apr 14 at 2:22
-
1Sorry, I was not able to add the code in the comment. Posted another answer. Let me know if you have more questions.– swawgeCommented Apr 14 at 2:55
-
@swawge you shouldn't need to add code in comments. Just edit the existing answer and add the code.– jarmodCommented Apr 16 at 14:24
To create or update a Bedrock knowledge base data source, you can use AWS SDK for JavaScript (v3)
const { BedrockAgentClient, CreateKnowledgeBaseCommand, UpdateKnowledgeBaseCommand } = require("@aws-sdk/client-bedrock-agent");
// Initialize the Bedrock Agent client
const client = new BedrockAgentClient({
region: "your-region" // e.g., "us-east-1"
});
// Configuration object
const dataSourceConfig = {
name: "your-data-source-name",
dataSourceConfiguration: {
s3Configuration: {
bucketName: "your-bucket",
inclusionPrefixes: ["your/prefix/"],
inclusionPatterns: ["*.pdf", "*.txt"],
metadata: {
fields: [
"author",
"creation-date",
"custom-metadata-field"
]
}
}
},
vectorConfiguration: {
vectorizer: "amazon.titan-embed-text-v1",
dimensions: 1536
}
};
// Function to create a new knowledge base
async function createKnowledgeBase() {
try {
const command = new CreateKnowledgeBaseCommand({
knowledgeBaseName: "your-kb-name",
...dataSourceConfig
});
const response = await client.send(command);
console.log("Knowledge base created:", response);
return response;
} catch (error) {
console.error("Error creating knowledge base:", error);
throw error;
}
}
// Function to update an existing knowledge base
async function updateKnowledgeBase(knowledgeBaseId) {
try {
const command = new UpdateKnowledgeBaseCommand({
knowledgeBaseId: knowledgeBaseId,
...dataSourceConfig
});
const response = await client.send(command);
console.log("Knowledge base updated:", response);
return response;
} catch (error) {
console.error("Error updating knowledge base:", error);
throw error;
}
}
// Example usage with error handling and async/await
async function main() {
try {
// Create a new knowledge base
const newKB = await createKnowledgeBase();
// Update an existing knowledge base
await updateKnowledgeBase("your-kb-id");
} catch (error) {
console.error("Main error:", error);
}
}
// Execute the main function
main();