!pip install -qU boto3 # Python SDK for AWS
!pip install -qU awscli # AWS command-line we'll use with CloudFormation later

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 139.3/139.3 kB 1.8 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.3/12.3 MB 17.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 82.2/82.2 kB 2.7 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.5/4.5 MB 14.0 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 548.2/548.2 kB 21.2 MB/s eta 0:00:00


import boto3
import json
import os
import uuid
import time
from jinja2 import Template


# Turn on access to these secrets, then set them as environment variables
from google.colab import userdata
os.environ['AWS_ACCESS_KEY_ID'] = userdata.get('AWS_ACCESS_KEY_ID')
os.environ['AWS_SECRET_ACCESS_KEY'] = userdata.get('AWS_SECRET_ACCESS_KEY')
os.environ['AWS_REGION'] = userdata.get('AWS_REGION')


# # Verify environment variables were set properly
# region = os.getenv('AWS_REGION')
# region

'us-east-1'


# # Set AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION interactively via getpass
# import boto3
# import os
# from getpass import getpass

# default_region = "us-east-1"
# AWS_ACCESS_KEY = getpass("AWS Acces key: ")
# AWS_SECRET_KEY = getpass("AWS Secret key: ")
# AWS_REGION = input(f"AWS Region [default: {default_region}]: ") or default_region

# bedrock_client = boto3.client(
#     service_name="bedrock-runtime",
#     region_name=AWS_REGION,
#     aws_access_key_id=AWS_ACCESS_KEY,
#     aws_secret_access_key=AWS_SECRET_KEY,
# )

AWS Acces key: ··········
AWS Secret key: ··········
AWS Region [default: us-east-1]:


# Install the AWS command-line tool
# !pip install -qU awscli


# # Configure credentials
# !aws configure


# Download the sample audio file
%cd /content/
!wget https://huggingface.co/datasets/gadkins/call-center-dialog/resolve/main/dialog.mp3

/content
--2024-06-10 14:49:48--  https://huggingface.co/datasets/gadkins/call-center-dialog/resolve/main/dialog.mp3
Resolving huggingface.co (huggingface.co)... 3.163.189.37, 3.163.189.74, 3.163.189.114, ...
Connecting to huggingface.co (huggingface.co)|3.163.189.37|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/ee/3a/ee3a0826497221533834235201a7077c9c60ff0ef7a835e7c234f25525fd7486/7efc4f44e499795f36d995eeeb9ef0a1e5952d276355f2d0359fd534733694de?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27dialog.mp3%3B+filename%3D%22dialog.mp3%22%3B&response-content-type=audio%2Fmpeg&Expires=1718290188&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxODI5MDE4OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2VlLzNhL2VlM2EwODI2NDk3MjIxNTMzODM0MjM1MjAxYTcwNzdjOWM2MGZmMGVmN2E4MzVlN2MyMzRmMjU1MjVmZDc0ODYvN2VmYzRmNDRlNDk5Nzk1ZjM2ZDk5NWVlZWI5ZWYwYTFlNTk1MmQyNzYzNTVmMmQwMzU5ZmQ1MzQ3MzM2OTRkZT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=SdSFczbtROqeV54HaGUx970%7En3uBpRCg7F2JFtgjLCmnuPCvebm015FtzATItyrXonxk6ARXDL99aQpXbruLBN3mp70YhX7PmF0mQJFtgjSLMK2RrVnxPIrWPhf0rArXo2M-ghj4HWp4TVlnsyMppYbKUMp9PI1CuWxHdKGhhqGDCfqwI%7Ev00MozoMGvXr5uEMBtp6ePKI5Y8X-xkXwi9KV4ESlByGxioV6o4BmdzQh7Xw7x6WmzzsWP9LFA-SYigI6fjoufs7Q6J7YxWH6pWIjF6dVOu6b84zbolU0AENdJzR1RcyBM3POFBRQ9TImVh%7EdjY00sjCqhPWUy9v7iMw__&Key-Pair-Id=KCD77M1F0VK2B [following]
--2024-06-10 14:49:48--  https://cdn-lfs-us-1.huggingface.co/repos/ee/3a/ee3a0826497221533834235201a7077c9c60ff0ef7a835e7c234f25525fd7486/7efc4f44e499795f36d995eeeb9ef0a1e5952d276355f2d0359fd534733694de?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27dialog.mp3%3B+filename%3D%22dialog.mp3%22%3B&response-content-type=audio%2Fmpeg&Expires=1718290188&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxODI5MDE4OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2VlLzNhL2VlM2EwODI2NDk3MjIxNTMzODM0MjM1MjAxYTcwNzdjOWM2MGZmMGVmN2E4MzVlN2MyMzRmMjU1MjVmZDc0ODYvN2VmYzRmNDRlNDk5Nzk1ZjM2ZDk5NWVlZWI5ZWYwYTFlNTk1MmQyNzYzNTVmMmQwMzU5ZmQ1MzQ3MzM2OTRkZT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=SdSFczbtROqeV54HaGUx970%7En3uBpRCg7F2JFtgjLCmnuPCvebm015FtzATItyrXonxk6ARXDL99aQpXbruLBN3mp70YhX7PmF0mQJFtgjSLMK2RrVnxPIrWPhf0rArXo2M-ghj4HWp4TVlnsyMppYbKUMp9PI1CuWxHdKGhhqGDCfqwI%7Ev00MozoMGvXr5uEMBtp6ePKI5Y8X-xkXwi9KV4ESlByGxioV6o4BmdzQh7Xw7x6WmzzsWP9LFA-SYigI6fjoufs7Q6J7YxWH6pWIjF6dVOu6b84zbolU0AENdJzR1RcyBM3POFBRQ9TImVh%7EdjY00sjCqhPWUy9v7iMw__&Key-Pair-Id=KCD77M1F0VK2B
Resolving cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)... 3.163.189.28, 3.163.189.91, 3.163.189.127, ...
Connecting to cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)|3.163.189.28|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1030356 (1006K) [audio/mpeg]
Saving to: ‘dialog.mp3’

dialog.mp3          100%[===================>]   1006K  --.-KB/s    in 0.04s   

2024-06-10 14:49:48 (27.6 MB/s) - ‘dialog.mp3’ saved [1030356/1030356]


# Display audio widget
from IPython.display import Audio
audio = Audio(filename="dialog.mp3")
display(audio)


# Create an S3 client using the boto3 Python SDK for AWS
s3_client = boto3.client('s3', region_name='us-east-1')


# Define a bucket
# bucket_name = os.environ['BucketName']
bucket_name = "call-center-dialog"
file_name = "dialog.mp3"


# Create the bucket to store audio sample
s3_client.create_bucket(Bucket=bucket_name)

{'ResponseMetadata': {'RequestId': 'WH14B0P0ZZZ4BJHV',
  'HostId': 'kgcwsfl2oylbU/wJKNLPVW+upp2O7QPYu5E/O0Em91nBW41Esr9mdoRSE5bdrlvowZMzvliwodsyhktW17peU0fsUwUJSUOk',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'kgcwsfl2oylbU/wJKNLPVW+upp2O7QPYu5E/O0Em91nBW41Esr9mdoRSE5bdrlvowZMzvliwodsyhktW17peU0fsUwUJSUOk',
   'x-amz-request-id': 'WH14B0P0ZZZ4BJHV',
   'date': 'Mon, 03 Jun 2024 20:24:46 GMT',
   'location': '/call-center-dialog',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'Location': '/call-center-dialog'}


# Upload file to bucket
s3_client.upload_file(file_name, bucket_name, file_name) # Creates s3://{bucket_name}/{file_name}


# Create a client for the AWS Transcribe API
default_region = os.getenv('AWS_REGION')
transcribe_client = boto3.client('transcribe', region_name=default_region)


job_name = 'transcription-job-' + str(uuid.uuid4())
job_name

'transcription-job-31b5a7a4-df92-473e-b83d-7d4600f1fb6f'


# Recall that the audio file we're transcribing is in Amazon S3, so we'll pass
# this URI as the value of the Media parameter here
response = transcribe_client.start_transcription_job(
    TranscriptionJobName=job_name,
    Media={'MediaFileUri': f's3://{bucket_name}/{file_name}'},
    MediaFormat='mp3',
    LanguageCode='en-US',
    OutputBucketName=bucket_name,
    Settings={
        'ShowSpeakerLabels': True,
        'MaxSpeakerLabels': 2
    }
)


# Check status of transcription job
# (This step is really only necessary for very long audio samples)
while True:
    status = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
        break
    time.sleep(2)
print(status['TranscriptionJob']['TranscriptionJobStatus'])

COMPLETED


if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':

    # Load the transcript from S3
    transcript_key = f"{job_name}.json"
    transcript_obj = s3_client.get_object(Bucket=bucket_name, Key=transcript_key)
    transcript_text = transcript_obj['Body'].read().decode('utf-8')
    transcript_json = json.loads(transcript_text)

    output_text = ""
    current_speaker = None

    # JSON object containing speaker labels, content, confidence scores, etc.
    items = transcript_json['results']['items']

    # Format transcription into dialog format, e.g. {speaker_label}: {content}\n
    for item in items:

        speaker_label = item.get('speaker_label', None)
        content = item['alternatives'][0]['content']

        # Start the line with the speaker label:
        if speaker_label is not None and speaker_label != current_speaker:
            current_speaker = speaker_label
            output_text += f"\n{current_speaker}: "

        # Add the speech content:
        if item['type'] == 'punctuation':
            output_text = output_text.rstrip()

        output_text += f"{content} "

    # Save the transcript to a text file
    with open(f'{job_name}.txt', 'w') as f:
        f.write(output_text)


# Display final formatted transcription
with open(f'{job_name}.txt', "r") as file:
    transcript = file.read()
print(transcript)

spk_0: Hi, is this the Crystal Heights Hotel in Singapore? 
spk_1: Yes, it is. Good afternoon. How may I assist you today? 
spk_0: Fantastic, good afternoon. I was looking to book a room for my 10th wedding anniversary. Ive heard your hotel offers exceptional views and services. Could you tell me more? 
spk_1: Absolutely, Alex and congratulations on your upcoming anniversary. Thats a significant milestone and wed be honored to make it a special occasion for you. We have several room types that offer stunning views of the city skyline and the fictional Sapphire Bay. Our special diamond suite even comes with exclusive access to the moonlit pool and star deck. We also have in house spa services, world class dining options and a shopping arcade. 
spk_0: That sounds heavenly. I think my spouse would love the moonlit pool. Can you help me make a reservation for one of your diamond suites with a sapphire bay view? 
spk_1: Of course. May I know the dates you planning to visit? 
spk_0: Sure. It would be from October 10th to 17th. 
spk_1: Excellent. Let me check the availability. Ah It looks like we have a diamond suite available for those dates. Would you like to proceed with the reservation? 
spk_0: Definitely. Whats included in the package? 
spk_1: Wonderful. The package includes breakfast, complimentary access to the moonlit pool and star deck. A one time spa treatment for two and a special romantic dinner at our cloud nine restaurant. 
spk_0: You making it impossible to resist. Lets go ahead with the booking. 
spk_1: Great. I'll need some personal information for the reservation. Can I get your full name, contact details and a credit card for the preauthorizations? 
spk_0: Certainly. My full name is Alexander Thompson. My contact number is 12345678910. And the credit card is, wait, did you say pre authorization? How much would that be? 
spk_1: Ah, I should have mentioned that earlier. My apologies. A pre authorization. A mt of $1000 will be held on your card which would be released upon checkout 
spk_0: $1000. That seems a bit excessive. Don't you think 
spk_1: I understand your concern, Alex. The pre authorization is a standard procedure to cover any incidental expenses you may incur during your stay. However, I assure you its only a hold and not an actual charge. 
spk_0: Thats still a lot. Are there any additional charges that I should know about? 
spk_1: Well, there is a 10% service charge and a 7% fantasy tax applied to the room rate. 
spk_0: Mm. You know what its a special occasion. So lets go ahead. 
spk_1: Thank you, Alex for understanding. Will ensure that your experience at Crystal Heights is well worth it.


# Create a client for Amazon Bedrock
bedrock_runtime = boto3.client('bedrock-runtime', region_name=default_region)


# Create a prompt template file
%%writefile prompt_template.txt
I need to summarize a conversation. The transcript of the
conversation is between the <data> XML like tags.

<data>
{{transcript}}
</data>

The summary must contain a one word sentiment analysis, and
a list of issues, problems or causes of friction
during the conversation. The output must be provided in
JSON format shown in the following example.

Example output:
{
    "sentiment": <sentiment>,
    "issues": [
        {
            "topic": <topic>,
            "summary": <issue_summary>,
        }
    ]
}

Write the JSON output and nothing more.

Here is the JSON output:

Writing prompt_template.txt


with open('prompt_template.txt', "r") as file:
    template_string = file.read()


data = {
    'transcript' : transcript
}


from jinja2 import Template
template = Template(template_string)


prompt = template.render(data)


print(prompt)

I need to summarize a conversation. The transcript of the
conversation is between the <data> XML like tags.

<data>

spk_0: Hi, is this the Crystal Heights Hotel in Singapore? 
spk_1: Yes, it is. Good afternoon. How may I assist you today? 
spk_0: Fantastic, good afternoon. I was looking to book a room for my 10th wedding anniversary. Ive heard your hotel offers exceptional views and services. Could you tell me more? 
spk_1: Absolutely, Alex and congratulations on your upcoming anniversary. Thats a significant milestone and wed be honored to make it a special occasion for you. We have several room types that offer stunning views of the city skyline and the fictional Sapphire Bay. Our special diamond suite even comes with exclusive access to the moonlit pool and star deck. We also have in house spa services, world class dining options and a shopping arcade. 
spk_0: That sounds heavenly. I think my spouse would love the moonlit pool. Can you help me make a reservation for one of your diamond suites with a sapphire bay view? 
spk_1: Of course. May I know the dates you planning to visit? 
spk_0: Sure. It would be from October 10th to 17th. 
spk_1: Excellent. Let me check the availability. Ah It looks like we have a diamond suite available for those dates. Would you like to proceed with the reservation? 
spk_0: Definitely. Whats included in the package? 
spk_1: Wonderful. The package includes breakfast, complimentary access to the moonlit pool and star deck. A one time spa treatment for two and a special romantic dinner at our cloud nine restaurant. 
spk_0: You making it impossible to resist. Lets go ahead with the booking. 
spk_1: Great. I'll need some personal information for the reservation. Can I get your full name, contact details and a credit card for the preauthorizations? 
spk_0: Certainly. My full name is Alexander Thompson. My contact number is 12345678910. And the credit card is, wait, did you say pre authorization? How much would that be? 
spk_1: Ah, I should have mentioned that earlier. My apologies. A pre authorization. A mt of $1000 will be held on your card which would be released upon checkout 
spk_0: $1000. That seems a bit excessive. Don't you think 
spk_1: I understand your concern, Alex. The pre authorization is a standard procedure to cover any incidental expenses you may incur during your stay. However, I assure you its only a hold and not an actual charge. 
spk_0: Thats still a lot. Are there any additional charges that I should know about? 
spk_1: Well, there is a 10% service charge and a 7% fantasy tax applied to the room rate. 
spk_0: Mm. You know what its a special occasion. So lets go ahead. 
spk_1: Thank you, Alex for understanding. Will ensure that your experience at Crystal Heights is well worth it. 
</data>

The summary must contain a one word sentiment analysis, and
a list of issues, problems or causes of friction
during the conversation. The output must be provided in
JSON format shown in the following example.

Example output:
{
    "sentiment": <sentiment>,
    "issues": [
        {
            "topic": <topic>,
            "summary": <issue_summary>,
        }
    ]
}

Write the JSON output and nothing more.

Here is the JSON output:


kwargs = {
    "modelId": "amazon.titan-text-express-v1",
    "contentType": "application/json",
    "accept": "*/*",
    "body": json.dumps(
        {
            "inputText": prompt,
            "textGenerationConfig": {
                "maxTokenCount": 512,
                "temperature": 0,
                "topP": 0.9
            }
        }
    )
}


response = bedrock_client.invoke_model(**kwargs)


response_body = json.loads(response.get('body').read())
generation = response_body['results'][0]['outputText']


print(json.dumps(response_body, indent=4))

{
    "inputTextTokenCount": 824,
    "results": [
        {
            "tokenCount": 291,
            "outputText": "\n\n{\n    \"sentiment\": \"Positive\",\n    \"issues\": [\n        {\n            \"topic\": \"Hotel services\",\n            \"summary\": \"The hotel offers exceptional views and services.\"\n        },\n        {\n            \"topic\": \"Room booking\",\n            \"summary\": \"The hotel has several room types that offer stunning views of the city skyline and the fictional Sapphire Bay.\"\n        },\n        {\n            \"topic\": \"Diamond suite\",\n            \"summary\": \"The diamond suite comes with exclusive access to the moonlit pool and star deck.\"\n        },\n        {\n            \"topic\": \"Spa services\",\n            \"summary\": \"The hotel has in-house spa services, world-class dining options, and a shopping arcade.\"\n        },\n        {\n            \"topic\": \"Reservation process\",\n            \"summary\": \"The reservation process includes breakfast, complimentary access to the moonlit pool and star deck, a one-time spa treatment for two, and a special romantic dinner at the cloud nine restaurant.\"\n        },\n        {\n            \"topic\": \"Pre-authorization\",\n            \"summary\": \"A pre-authorization of $1000 is held on the credit card, which is released upon checkout.\"\n        },\n        {\n            \"topic\": \"Additional charges\",\n            \"summary\": \"There is a 10% service charge and a 7% fantasy tax applied to the room rate.\"\n        }\n    ]\n}",
            "completionReason": "FINISH"
        }
    ]
}


print(generation)


{
    "sentiment": "Positive",
    "issues": [
        {
            "topic": "Hotel services",
            "summary": "The hotel offers exceptional views and services."
        },
        {
            "topic": "Room booking",
            "summary": "The hotel has several room types that offer stunning views of the city skyline and the fictional Sapphire Bay."
        },
        {
            "topic": "Diamond suite",
            "summary": "The diamond suite comes with exclusive access to the moonlit pool and star deck."
        },
        {
            "topic": "Spa services",
            "summary": "The hotel has in-house spa services, world-class dining options, and a shopping arcade."
        },
        {
            "topic": "Reservation process",
            "summary": "The reservation process includes breakfast, complimentary access to the moonlit pool and star deck, a one-time spa treatment for two, and a special romantic dinner at the cloud nine restaurant."
        },
        {
            "topic": "Pre-authorization",
            "summary": "A pre-authorization of $1000 is held on the credit card, which is released upon checkout."
        },
        {
            "topic": "Additional charges",
            "summary": "There is a 10% service charge and a 7% fantasy tax applied to the room rate."
        }
    ]
}


# Install the AWS command-line tool
!pip install -qU awscli

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.5/4.5 MB 9.9 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 548.2/548.2 kB 15.2 MB/s eta 0:00:00


# Configure credentials if you haven't already
!aws configure


s3_client = boto3.client('s3', region_name='us-east-1')


bucket_name = 'bedrock-logging-' + str(uuid.uuid4())
bucket_name

'bedrock-logging-865b1f2b-e32c-4da0-8b3d-b143445d7637'


# Create a bucket to store invocation logs from Amazon Bedrock
s3_client.create_bucket(Bucket=bucket_name)

{'ResponseMetadata': {'RequestId': 'QNH2Y1X6800FQGGN',
  'HostId': 'ELgpccHlb4VhpWHH8i+TZ3teipU4K3t8Ls/NRPQV5NUzRKIxsdTyll/t7h74s29xZkKb6fNsGOQ=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'ELgpccHlb4VhpWHH8i+TZ3teipU4K3t8Ls/NRPQV5NUzRKIxsdTyll/t7h74s29xZkKb6fNsGOQ=',
   'x-amz-request-id': 'QNH2Y1X6800FQGGN',
   'date': 'Tue, 04 Jun 2024 02:56:26 GMT',
   'location': '/bedrock-logging-865b1f2b-e32c-4da0-8b3d-b143445d7637',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'Location': '/bedrock-logging-865b1f2b-e32c-4da0-8b3d-b143445d7637'}


%%writefile cfn_role.yaml
AWSTemplateFormatVersion: '2010-09-09'
Description: 'CloudFormation Template to Create an IAM Role with a Trust Relationship and Permissions Policy'

Resources:
  MyIAMRole:
    Type: 'AWS::IAM::Role'
    Properties:
      AssumeRolePolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Effect: 'Allow'
            Principal:
              Service: 'bedrock.amazonaws.com'
            Action: 'sts:AssumeRole'
            Condition:
              StringEquals:
                'aws:SourceAccount': !Ref 'AWS::AccountId'
              ArnLike:
                'aws:SourceArn': !Sub 'arn:aws:bedrock:us-east-1:${AWS::AccountId}:*'

  MyRolePolicy:
    Type: 'AWS::IAM::Policy'
    Properties:
      PolicyName: 'MyIAMRolePolicy'
      Roles:
        - !Ref MyIAMRole
      PolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Effect: 'Allow'
            Action:
              - 'logs:CreateLogStream'
              - 'logs:PutLogEvents'
            Resource: !Sub 'arn:aws:logs:us-east-1:${AWS::AccountId}:log-group:my-amazon-bedrock-logs:log-stream:aws/bedrock/modelinvocations'
          - Effect: 'Allow'
            Action:
              - 'logs:DescribeLogGroups'
              - 'logs:DescribeLogStreams'
              - 'logs:CreateLogGroup'
            Resource: !Sub 'arn:aws:logs:us-east-1:${AWS::AccountId}:log-group:my-amazon-bedrock-logs'

Overwriting cfn_role.yaml


!aws cloudformation create-stack --stack-name MyAmazonBedrockIAMRoleStack --template-body file://cfn_role.yaml --capabilities CAPABILITY_NAMED_IAM

arn:aws:cloudformation:us-east-1:128035544350:stack/MyAmazonBedrockIAMRoleStack/01f2bab0-221f-11ef-b712-0e03c24809dd


# Or use the boto3 client instead of the above aws CLI command

# import boto3

# # Initialize a session using Amazon CloudFormation
# cloudformation_client = boto3.client('cloudformation')

# # Define the stack parameters
# stack_name = 'MyAmazonBedrockIAMRoleStack'
# template_body = ''
# capabilities = ['CAPABILITY_NAMED_IAM']

# # Load the CloudFormation template from a file
# with open('cfn_role.yaml', 'r') as file:
#     template_body = file.read()

# # Create the CloudFormation stack
# response = cloudformation_client.create_stack(
#     StackName=stack_name,
#     TemplateBody=template_body,
#     Capabilities=capabilities
# )

# print(response)


# Get the ARN for the IAM Role
!aws cloudformation describe-stacks --stack-name MyAmazonBedrockIAMRoleStack --query "Stacks[0].Outputs[?OutputKey=='RoleArn'].OutputValue" --output text

arn:aws:iam::128035544350:role/MyAmazonBedrockIAMRoleStack-MyIAMRole-i11dyuABwd9f


# Store the output in an environment variable
role_arn = !aws cloudformation describe-stacks --stack-name MyAmazonBedrockIAMRoleStack --query "Stacks[0].Outputs[?OutputKey=='RoleArn'].OutputValue" --output text
os.environ['LOGGING_ROLE_ARN'] = role_arn[0]
os.environ['LOGGING_BUCKETNAME'] = bucket_name


# Verify environment variables are set
print("LOGGING_ROLE_ARN:", os.environ.get('LOGGING_ROLE_ARN'))
print("LOGGING_BUCKETNAME:", os.environ.get('LOGGING_BUCKETNAME'))

LOGGING_ROLE_ARN: arn:aws:iam::128035544350:role/MyAmazonBedrockIAMRoleStack-MyIAMRole-i11dyuABwd9f
LOGGING_BUCKETNAME: bedrock-logging-865b1f2b-e32c-4da0-8b3d-b143445d7637


def create_log_group(self, log_group_name):
  try:
      response = self.cloudwatch_logs_client.create_log_group(logGroupName=log_group_name)
      print(f"Log group '{log_group_name}' created successfully.")
  except ClientError as e:
      if e.response['Error']['Code'] == 'ResourceAlreadyExistsException':
          print(f"Log group '{log_group_name}' already exists.")
      else:
          print(f"Failed to create log group '{log_group_name}'. Error: {e}")


log_group_name = 'my-amazon-bedrock-logs'


create_log_group(log_group_name)

Log group 'my-amazon-bedrock-logs' created successfully.


# Create a bedrock client (i.e. NOT 'bedrock_runtime' as before!)
bedrock_client = boto3.client('bedrock', region_name=default_region)


## Apply the logging configuration for Bedrock
response = bedrock_client.put_model_invocation_logging_configuration(
    loggingConfig = {
        'cloudWatchConfig': {
            'logGroupName': log_group_name,
            'roleArn': os.environ['LOGGING_ROLE_ARN'],
            'largeDataDeliveryS3Config': {
                'bucketName': os.environ['LOGGING_BUCKETNAME'],
                'keyPrefix': 'amazon_bedrock_large_data_delivery',
            }
        },
        's3Config': {
            'bucketName': os.environ['LOGGING_BUCKETNAME'],
            'keyPrefix': 'amazon_bedrock_logs',
        },
        'textDataDeliveryEnabled': True,
    }
)


print(response)

{'ResponseMetadata': {'RequestId': 'dffba53e-ad7a-4880-952f-0c840d2c996f', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Tue, 04 Jun 2024 03:45:00 GMT', 'content-type': 'application/json', 'content-length': '2', 'connection': 'keep-alive', 'x-amzn-requestid': 'dffba53e-ad7a-4880-952f-0c840d2c996f'}, 'RetryAttempts': 0}}


# Verify configuration is set correctly
bedrock_client.get_model_invocation_logging_configuration()

{'ResponseMetadata': {'RequestId': '5b2bc5b7-24b8-49e8-8ce4-5f39dafefd7e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Tue, 04 Jun 2024 03:46:41 GMT',
   'content-type': 'application/json',
   'content-length': '535',
   'connection': 'keep-alive',
   'x-amzn-requestid': '5b2bc5b7-24b8-49e8-8ce4-5f39dafefd7e'},
  'RetryAttempts': 0},
 'loggingConfig': {'cloudWatchConfig': {'logGroupName': 'my-amazon-bedrock-logs',
   'roleArn': 'arn:aws:iam::128035544350:role/MyAmazonBedrockIAMRoleStack-MyIAMRole-i11dyuABwd9f',
   'largeDataDeliveryS3Config': {'bucketName': 'bedrock-logging-865b1f2b-e32c-4da0-8b3d-b143445d7637',
    'keyPrefix': 'amazon_bedrock_large_data_delivery'}},
  's3Config': {'bucketName': 'bedrock-logging-865b1f2b-e32c-4da0-8b3d-b143445d7637',
   'keyPrefix': 'amazon_bedrock_logs'},
  'textDataDeliveryEnabled': True,
  'imageDataDeliveryEnabled': True,
  'embeddingDataDeliveryEnabled': True}}


bedrock_runtime = boto3.client('bedrock-runtime', region_name=default_region)


prompt = "Write a summary of Star Wars: Episode IV - A New Hope"

kwargs = {
    "modelId": "amazon.titan-text-express-v1",
    "contentType": "application/json",
    "accept": "*/*",
    "body": json.dumps(
        {
            "inputText": prompt,
            "textGenerationConfig": {
                "maxTokenCount": 512,
                "temperature": 0.7,
                "topP": 0.9
            }
        }
    )
}

response = bedrock_runtime.invoke_model(**kwargs)
response_body = json.loads(response.get('body').read())

generation = response_body['results'][0]['outputText']

print(generation)

Here is a summary of Star Wars: Episode IV - A New Hope:

In a galaxy far, far away, the evil Empire is tightening its grip on the planet, and the Rebel Alliance is fighting back. A young farm boy named Luke Skywalker is drawn into the conflict when he discovers the powerful force that can bring balance to the universe. With the help of Han Solo, Princess Leia, and the wise Jedi Master Obi-Wan Kenobi, Luke sets out on a mission to destroy the Empire's newest weapon, the Death Star. Along the way, he learns the truth about his family and the true nature of the force.

Star Wars: Episode IV - A New Hope is a classic tale of good versus evil, heroism, and self-discovery. It is the first film in the original Star Wars trilogy and has become one of the most beloved and influential movies of all time.


# Create an instance of the CloudWatch Logs client
cloudwatch_logs_client = boto3.client('logs', region_name=default_region)


from botocore.exceptions import ClientError

def print_recent_logs(log_group_name, minutes=5):
  try:
    # Calculate the time range
    end_time = int(datetime.datetime.now().timestamp() * 1000)  # Current time in milliseconds
    start_time = end_time - (minutes * 60 * 1000)  # 5 minutes ago in milliseconds

    # Fetch log streams (assumes logs are stored in streams within the log group)
    streams = cloudwatch_logs_client.describe_log_streams(
        logGroupName=log_group_name,
        orderBy='LastEventTime',
        descending=True
    )

    for stream in streams.get('logStreams', []):
        # Fetch log events from each stream
        events = cloudwatch_logs_client.get_log_events(
            logGroupName=log_group_name,
            logStreamName=stream['logStreamName'],
            startTime=start_time,
            endTime=end_time
        )

        for event in events.get('events', []):
            try:
                # Try to load the string as JSON
                json_data = json.loads(event['message'])
                # Pretty print the JSON data
                print(json.dumps(json_data, indent=4))
            except json.JSONDecodeError:
                # If it's not valid JSON, print the original string
                print(event['message'])
            print(f'{"-"*25}\n')

  except ClientError as e:
      print(f"Error fetching logs: {e}")


print_recent_logs(log_group_name, minutes=30)

{
    "schemaType": "ModelInvocationLog",
    "schemaVersion": "1.0",
    "timestamp": "2024-06-04T04:54:24Z",
    "accountId": "128035544350",
    "identity": {
        "arn": "arn:aws:iam::128035544350:user/grayson"
    },
    "region": "us-east-1",
    "requestId": "bc4a49c0-5710-4daa-b456-95415b4078f5",
    "operation": "InvokeModel",
    "modelId": "amazon.titan-text-express-v1",
    "input": {
        "inputContentType": "application/json",
        "inputBodyJson": {
            "inputText": "Write a summary of Star Wars: Episode IV - A New Hope",
            "textGenerationConfig": {
                "maxTokenCount": 512,
                "temperature": 0.7,
                "topP": 0.9
            }
        },
        "inputTokenCount": 13
    },
    "output": {
        "outputContentType": "application/json",
        "outputBodyJson": {
            "inputTextTokenCount": 13,
            "results": [
                {
                    "tokenCount": 188,
                    "outputText": "\nHere is a summary of Star Wars: Episode IV - A New Hope:\n\nIn a galaxy far, far away, the evil Empire is tightening its grip on the planet, and the Rebel Alliance is fighting back. A young farm boy named Luke Skywalker is drawn into the conflict when he discovers the powerful force that can bring balance to the universe. With the help of Han Solo, Princess Leia, and the wise Jedi Master Obi-Wan Kenobi, Luke sets out on a mission to destroy the Empire's newest weapon, the Death Star. Along the way, he learns the truth about his family and the true nature of the force.\n\nStar Wars: Episode IV - A New Hope is a classic tale of good versus evil, heroism, and self-discovery. It is the first film in the original Star Wars trilogy and has become one of the most beloved and influential movies of all time.",
                    "completionReason": "FINISH"
                }
            ]
        },
        "outputTokenCount": 188
    }
}
-------------------------


%%writefile lambda_role.yaml
AWSTemplateFormatVersion: "2010-09-09"
Description: "CloudFormation Template to Create an IAM Role with a Trust Relationship and Permissions Policy"

Resources:
  MyIAMRole:
    Type: "AWS::IAM::Role"
    Properties:
      RoleName: "LambdaRoleCallSummarization"
      AssumeRolePolicyDocument:
        Version: "2012-10-17"
        Statement:
          - Effect: "Allow"
            Principal:
              Service: "lambda.amazonaws.com"
            Action: "sts:AssumeRole"

  MyRolePolicy:
    Type: "AWS::IAM::Policy"
    Properties:
      PolicyName: "MyIAMRolePolicy"
      Roles:
        - !Ref MyIAMRole
      PolicyDocument:
        Version: "2012-10-17"
        Statement:
          - Effect: "Allow"
            Action:
              - "bedrock:InvokeModel"
            Resource: arn:aws:bedrock:*::foundation-model/*
          - Effect: "Allow"
            Action:
              - "s3:GetObject"
              - "s3:PutObject"
            Resource: "*"
          - Effect: "Allow"
            Action:
              - "transcribe:StartTranscriptionJob"
            Resource: "*"
          - Effect: "Allow"
            Action:
              - "logs:CreateLogGroup"
              - "logs:CreateLogStream"
              - "logs:PutLogEvents"
            Resource: "*"

Outputs:
  RoleArn:
    Description: "The ARN of the created IAM Role"
    Value: !GetAtt MyIAMRole.Arn

Writing lambda_role.yaml


# Create a role with the above CloudFormation
role_arn = !aws cloudformation create-stack --stack-name LambdaRoleCallSummarization --template-body file://lambda_role.yaml --capabilities CAPABILITY_NAMED_IAM

arn:aws:cloudformation:us-east-1:128035544350:stack/LambdaRoleCallSummarization/b7d93c60-235e-11ef-b596-122a212bd651


# # Save the ARN for the created role
# !aws cloudformation describe-stacks --stack-name LambdaRoleCallSummarization
# role_arn = !aws cloudformation describe-stacks --stack-name LambdaRoleCallSummarization --query "Stacks[0].Outputs[?OutputKey=='RoleArn'].OutputValue" --output text


!aws lambda publish-layer-version --layer-name bedrock-jinja-layer --zip-file fileb://bedrock-jinja-layer.zip

2024-06-05T17:08:16.033+0000		arn:aws:lambda:us-east-1:128035544350:layer:bedrock-jinja-layer	arn:aws:lambda:us-east-1:128035544350:layer:bedrock-jinja-layer:1	1
CONTENT	M2CO2rpfz+qGSR3EpOlgbnMCBz/VWt7L9LBXaP8QqB0=	14796771	https://prod-iad-c1-djusa-layers.s3.us-east-1.amazonaws.com/snapshots/128035544350/bedrock-jinja-layer-56f31076-035f-4878-97bf-a4f1d2c6be27?versionId=dB9pK0o51YbXdr3HHCUn0Z6SxsEDHJ.t&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEDkaCXVzLWVhc3QtMSJHMEUCIBLzJ8mMPX9H14zHtdH9Om%2B4WdIS5aZ8eF319HwHlZe%2BAiEA0%2FHgzacWhNTeTaOTkaecradmdvjqKzb%2FD8LgO3MZAfcqwwUIwv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAEGgw0NzkyMzMwMjUzNzkiDAeM5od7174UcYU9wCqXBdlTEPMf1DhnAJpdHGEpx7bbfvSgaSuPTWVinLJmq6Ub5MWKvIBZjDR7%2FYgjy%2BF6REzoKRM3vLLAQKInvPudey8u34hL1eCSHttA4aODODUszkMChfoRZGKpNHWj3jLtre6a%2Bw2WerBISZ6li0BmRWnjFwJlyfCOFIh1bJK%2BXFXgSH8AguknIwaeSBkaROxSNAZG50cTEJcwJoWGO63MyVlElmAVdwPgr2o%2F6X0FgOJTGhVedn%2FcAZCrxkoPryJtIdyf9eS1%2Bb1RjdRXXyGdPNLLYDwt3NcrjgAW8mp0XGyx6Y5Qv4U44DjRNxvEXG2SH%2BwbjVWL%2FAXsp2tnnuyZ8GEPxnTBrzUo6M3ick%2FdiyhyvBORoRVCdXyQD%2FmRNb%2FvXZAtRplHwGFz07RbdJqHvHkC07D421E4YshmqNkJRFib3WR679nY5y%2FfnhrRIkL5miXbC%2FCHx5uZ%2F8%2FNycGolaih1WvycCh2BTEEUKX%2BCc1h8FjPRNwaZtB26tNN4SOiV11ju83sMrseRtDyUjBN5E3q%2FJGG4mv%2BaZTHkY165GJ2NEliXptlhQDwG8LHJ6RlE2q%2BhnN7y3LbLgJty7QWGZypUcxZptwK89o%2FO6w7qKeCuoXDs8gnKbVPWoBj%2BbAIpk9Wm8u077Y%2FT7I6HwKdO9LyjOyTr0JbfF%2FEqjR9K3Dp2wboppS96%2BLPEYfL3cXBAcHNAUi1ETQuO6Eq9OtRpF5lAruWuEEgXiSqM1KTwn%2F9V8zWEOs9jFIq3VCKU516Yw2VbMNElhr%2BWn336p%2FJhz247%2BgnjXh%2FDBYGYkbEzjyxM6jpc0dqC5NVZmRL1l6DstwBk8j2y2%2F4nIvWb6Gwfp94iBfuq9cmf0ynAHkIlEQ2EWKScDTt5zDbrIKzBjqxAVlZjX1grLpbkwrPoD%2F7lVmRhqtikeW7nrNp8dzZJn6xEaVPufFcJL4UOJLFT9k7uxA3aJuEJCn8aKOVO4ANnjk05DslfSn%2FeM8%2BeCja76uAcLNR51ipWZvxwFNjJEnVaRfXSUQMoM8gxR7Oxar5nWZ83f7wrxTKXuwFrHNyuhxaGF78AzQZJHkNHngTPTcGCkfmsThoQZmIQ7imyt%2FHfqy%2BhWJY9%2BCePfJ9YW8xh%2F9YLQ%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20240605T170810Z&X-Amz-SignedHeaders=host&X-Amz-Expires=600&X-Amz-Credential=ASIAW7FEDUVRRNHTSZNE%2F20240605%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=1017ab6d9206705a38ac639761d12d50dfdd69b75e895b907b6e79a948977a2c


# Set environment variables
# import os
os.environ['LEARNER_S3_BUCKETNAME_TEXT'] = bucket_name
os.environ['LAMBDALAYERVERSIONARN'] = 'arn:aws:lambda:us-east-1:128035544350:layer:bedrock-jinja-layer:1'


%cd /content/ai-cookbook/data-processing/call-summarization

/content/ai-cookbook/data-processing/call-summarization


# rm -rf /content/ai-cookbook


# Clone this repo if you haven't already done so
%cd /content/
!git clone https://github.com/gadkins/ai-cookbook.git

/content
Cloning into 'ai-cookbook'...
remote: Enumerating objects: 217, done.
remote: Counting objects: 100% (217/217), done.
remote: Compressing objects: 100% (167/167), done.
remote: Total 217 (delta 65), reused 191 (delta 39), pack-reused 0
Receiving objects: 100% (217/217), 18.80 MiB | 32.63 MiB/s, done.
Resolving deltas: 100% (65/65), done.


%cd /content/ai-cookbook/data-processing/call-summarization

/content/ai-cookbook/data-processing/call-summarization


!pwd

/content/ai-cookbook/data-processing/call-summarization


from helpers.lambda_helper import Lambda_Helper
from helpers.s3_helper import S3_Helper
from helpers.display_helper import Display_Helper


lambda_helper = Lambda_Helper()
# Includes functions:
# deploy_function()
# add_lambda_trigger()


s3_helper = S3_Helper()
# Includes functions:
# upload_file()
# download_object()
# list_objects()


display_helper = Display_Helper()
# Includes functions:
# text_file()
# json_file()


%%writefile prompt_template.txt
I need to summarize a conversation. The transcript of the conversation is between the <data> XML like tags.

<data>
{{transcript}}
</data>

The summary must contain a one word sentiment analysis, and a list of issues, problems or causes of friction
during the conversation. The output must be provided in JSON format shown in the following example.

Example output:
{
    "version": 0.1,
    "sentiment": <sentiment>,
    "issues": [
        {
            "topic": <topic>,
            "summary": <issue_summary>,
        }
    ]
}

An `issue_summary` must only be one of:
{%- for topic in topics %}
 - `{{topic}}`
{% endfor %}

Write the JSON output and nothing more.

Here is the JSON output:

Overwriting prompt_template.txt


display_helper.text_file('prompt_template.txt')

prompt_template.txt:

I need to summarize a conversation. The transcript of the conversation is between the <data> XML like tags.<data>{{transcript}}</data>The summary must contain a one word sentiment analysis, and a list of issues, problems or causes of frictionduring the conversation. The output must be provided in JSON format shown in the following example.Example output:{    "version": 0.1,    "sentiment": <sentiment>,    "issues": [        {            "topic": <topic>,            "summary": <issue_summary>,        }    ]}An `issue_summary` must only be one of:{%- for topic in topics %} - `{{topic}}`{% endfor %}Write the JSON output and nothing more.Here is the JSON output:


%%writefile lambda_function.py


#############################################################
#
# This Lambda function is written to a file by the notebook
# It does not run in the notebook!
#
#############################################################

import boto3
import json
from jinja2 import Template

s3_client = boto3.client('s3')
bedrock_runtime = boto3.client('bedrock-runtime', 'us-east-1')

def lambda_handler(event, context):

    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    # Since we're posting both the transcript and summaries to the same S3 bucket,
    # we need to check that "-transcript.json" is included in the file name, otherwise
    # we'd get stuck in a loop when the summaries get posted to the bucket.
    if "-transcript.json" not in key:
        print("This demo only works with *-transcript.json.")
        return

    try:
        file_content = ""

        response = s3_client.get_object(Bucket=bucket, Key=key)

        file_content = response['Body'].read().decode('utf-8')

        transcript = extract_transcript_from_textract(file_content)

        # Note this will be printed in the Lambda environment logs
        print(f"Successfully read file {key} from bucket {bucket}.")

        print(f"Transcript: {transcript}")

        summary = bedrock_summarisation(transcript)

        s3_client.put_object(
            Bucket=bucket,
            Key='results.txt',
            Body=summary,
            ContentType='text/plain'
        )

    except Exception as e:
        print(f"Error occurred: {e}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error occurred: {e}")
        }

    return {
        'statusCode': 200,
        'body': json.dumps(f"Successfully summarized {key} from bucket {bucket}. Summary: {summary}")
    }


# Unpack the complex textract and format the transcript as {speaker}: {dialog}\n
def extract_transcript_from_textract(file_content):

    transcript_json = json.loads(file_content)

    output_text = ""
    current_speaker = None

    items = transcript_json['results']['items']

    # Iterate through the content word by word:
    for item in items:
        speaker_label = item.get('speaker_label', None)
        content = item['alternatives'][0]['content']

        # Start the line with the speaker label:
        if speaker_label is not None and speaker_label != current_speaker:
            current_speaker = speaker_label
            output_text += f"\n{current_speaker}: "

        # Add the speech content:
        if item['type'] == 'punctuation':
            output_text = output_text.rstrip()  # Remove the last space

        output_text += f"{content} "

    return output_text


def bedrock_summarisation(transcript):

    with open('prompt_template.txt', "r") as file:
        template_string = file.read()

    data = {
        'transcript': transcript,
        'topics': ['charges', 'location', 'availability']
    }

    template = Template(template_string)
    prompt = template.render(data)

    print(prompt)

    kwargs = {
        "modelId": "amazon.titan-text-express-v1",
        "contentType": "application/json",
        "accept": "*/*",
        "body": json.dumps(
            {
                "inputText": prompt,
                "textGenerationConfig": {
                    "maxTokenCount": 2048,
                    "stopSequences": [],
                    "temperature": 0,
                    "topP": 0.9
                }
            }
        )
    }

    response = bedrock_runtime.invoke_model(**kwargs)

    summary = json.loads(response.get('body').read()).get('results')[0].get('outputText')
    return summary

Overwriting lambda_function.py


lambda_helper.deploy_function(
    ["lambda_function.py", "prompt_template.txt"],
    function_name="LambdaFunctionSummarize"
)

Zipping function...
Looking for existing function...
Function LambdaFunctionSummarize exists. Updating code...
Function LambdaFunctionSummarize code updated: 2024-06-10T15:30:29.000+0000
Done.


# s3_client = boto3.client('s3', region_name=default_region)
# bucket_name_text = 'call-center-text' + str(uuid.uuid4())
# s3_client.create_bucket(Bucket=bucket_name_text)


lambda_helper.filter_rules_suffix = "json"
lambda_helper.add_lambda_trigger(bucket_name_text)

Using function name of deployed function: LambdaFunctionSummarize
Removed existing permission: s3-trigger-permission
Permission added with Statement: {
    "Sid": "s3-trigger-permission",
    "Effect": "Allow",
    "Principal": {
        "Service": "s3.amazonaws.com"
    },
    "Action": "lambda:InvokeFunction",
    "Resource": "arn:aws:lambda:us-east-1:128035544350:function:LambdaFunctionSummarize",
    "Condition": {
        "ArnLike": {
            "AWS:SourceArn": "arn:aws:s3:::call-center-text-78179769-0fbf-4bce-90a1-927777b4cfb9"
        }
    }
}
Trigger added for call-center-text-78179769-0fbf-4bce-90a1-927777b4cfb9 -> LambdaFunctionSummarize


s3_helper.upload_file(bucket_name_text, 'demo-transcript.json')

Object 'demo-transcript.json' uploaded to bucket 'call-center-text-78179769-0fbf-4bce-90a1-927777b4cfb9'


s3_helper.list_objects(bucket_name)

Object: amazon_bedrock_large_data_delivery/AWSLogs/128035544350/BedrockModelInvocationLogs/us-east-1/2024/06/04/03/data/amazon-bedrock-logs-permission-check, Created on: 2024-06-04 03:45:01+00:00
Object: amazon_bedrock_logs/AWSLogs/128035544350/BedrockModelInvocationLogs/us-east-1/2024/06/04/03/amazon-bedrock-logs-permission-check, Created on: 2024-06-04 03:45:01+00:00
Object: amazon_bedrock_logs/AWSLogs/128035544350/BedrockModelInvocationLogs/us-east-1/2024/06/04/03/data/amazon-bedrock-logs-permission-check, Created on: 2024-06-04 03:45:01+00:00
Object: amazon_bedrock_logs/AWSLogs/128035544350/BedrockModelInvocationLogs/us-east-1/2024/06/04/04/20240604T041355005Z_e25fe09f85843b65.json.gz, Created on: 2024-06-04 04:13:56+00:00
Object: amazon_bedrock_logs/AWSLogs/128035544350/BedrockModelInvocationLogs/us-east-1/2024/06/04/04/20240604T045524315Z_205869554e9b5e13.json.gz, Created on: 2024-06-04 04:55:25+00:00
Object: amazon_bedrock_logs/AWSLogs/128035544350/BedrockModelInvocationLogs/us-east-1/2024/06/06/00/20240606T002115598Z_d9f003f7ef6c3f15.json.gz, Created on: 2024-06-06 00:21:16+00:00
Object: demo-transcript.json, Created on: 2024-06-06 00:20:09+00:00
Object: results.txt, Created on: 2024-06-06 00:20:16+00:00


s3_helper.download_object(bucket_name, "results.txt")

Object 'results.txt' from bucket 'bedrock-logging-865b1f2b-e32c-4da0-8b3d-b143445d7637' to './results.txt'


display_helper.text_file('results.txt')

results.txt:

{    "version": 0.1,    "sentiment": "positive",    "issues": [        {            "topic": "charges",            "summary": "pre authorization is a standard procedure to cover any incidental expenses you may incur during your stay"        }    ]}


# Create an S3 client to store audio files
s3_client = boto3.client('s3', region_name='us-east-1')


# Use two buckets: one for uploading audio, and two for storing text
# transcription and summaries
bucket_name_audio = 'call-center-audio-' + str(uuid.uuid4())
bucket_name_text = 'call-center-text-' + str(uuid.uuid4())


print(bucket_name_audio)
print(bucket_name_text)

call-center-audio-5259cfae-3d68-4b80-a02a-39ed7679c445
call-center-text-78179769-0fbf-4bce-90a1-927777b4cfb9


s3_client.create_bucket(Bucket=bucket_name_audio)
s3_client.create_bucket(Bucket=bucket_name_text)

{'ResponseMetadata': {'RequestId': 'MF0QG8PXH5GTWPAH',
  'HostId': 'W2Bxi0Vp+yhU2fDuG7kNTcecFJgKS/0PhJ41VApJkNNW3nAceA41F3qhk1wxFDZ/IIaLfojbJxI=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'W2Bxi0Vp+yhU2fDuG7kNTcecFJgKS/0PhJ41VApJkNNW3nAceA41F3qhk1wxFDZ/IIaLfojbJxI=',
   'x-amz-request-id': 'MF0QG8PXH5GTWPAH',
   'date': 'Fri, 07 Jun 2024 20:11:08 GMT',
   'location': '/call-center-dialog-summary-119eb663-03d8-41c5-9e30-618bd08dfcd2',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'Location': '/call-center-dialog-summary-119eb663-03d8-41c5-9e30-618bd08dfcd2'}


# import os

os.environ['LEARNER_S3_BUCKETNAME_AUDIO'] = 'call-center-audio-5259cfae-3d68-4b80-a02a-39ed7679c445'
os.environ['LEARNER_S3_BUCKETNAME_TEXT'] = 'call-center-text-78179769-0fbf-4bce-90a1-927777b4cfb9'
os.environ['LAMBDALAYERVERSIONARN'] = 'arn:aws:lambda:us-east-1:128035544350:layer:bedrock-jinja-layer:1'


%%writefile lambda_function.py

#############################################################
#
# This Lambda function is written to a file by the notebook
# It does not run in the notebook!
#
#############################################################

import json
import boto3
import uuid
import os

s3_client = boto3.client('s3')
transcribe_client = boto3.client('transcribe', region_name='us-east-1')

def lambda_handler(event, context):
    # Extract the bucket name and key from the incoming event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    # Since we're posting both the dialog and text transcription to the same S3 bucket,
    # we need to check that "-dialog.mp3" is included in the file name, otherwise
    # we'd get stuck in a loop when the transcriptions get posted to the bucket.
    if key != "dialog.mp3":
        print("This demo only works with dialog.mp3.")
        return

    try:

        job_name = 'transcription-job-' + str(uuid.uuid4()) # Needs to be a unique name

        response = transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': f's3://{bucket}/{key}'},
            MediaFormat='mp3',
            LanguageCode='en-US',
            OutputBucketName= os.environ['S3_BUCKET_NAME_TEXT'],  # specify the output bucket
            OutputKey=f'{job_name}-transcript.json',
            Settings={
                'ShowSpeakerLabels': True,
                'MaxSpeakerLabels': 2
            }
        )

    except Exception as e:
        print(f"Error occurred: {e}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Error occurred: {e}")
        }

    return {
        'statusCode': 200,
        'body': json.dumps(f"Submitted transcription job for {key} from bucket {bucket}.")
    }

Overwriting lambda_function.py


# Set environment variables needed by our Lambda function and deploy the function
# up to the AWS Lambda cloud service
import boto3, os

from helpers.lambda_helper import Lambda_Helper
from helpers.s3_helper import S3_Helper

lambda_helper = Lambda_Helper()
s3_helper = S3_Helper()

lambda_helper.lambda_environ_variables = {'S3_BUCKET_NAME_TEXT' : bucket_name_text}
lambda_helper.deploy_function(["lambda_function.py"], function_name="LambdaFunctionTranscribe")

Zipping function...
Looking for existing function...
Function LambdaFunctionTranscribe exists. Updating code...
Function LambdaFunctionTranscribe code updated: 2024-06-10T17:53:29.000+0000
Done.


# Ensure this function only runs for .mp3 files
lambda_helper.filter_rules_suffix = "mp3"
lambda_helper.add_lambda_trigger(bucket_name_audio, function_name="LambdaFunctionTranscribe")

Removed existing permission: s3-trigger-permission
Permission added with Statement: {
    "Sid": "s3-trigger-permission",
    "Effect": "Allow",
    "Principal": {
        "Service": "s3.amazonaws.com"
    },
    "Action": "lambda:InvokeFunction",
    "Resource": "arn:aws:lambda:us-east-1:128035544350:function:LambdaFunctionTranscribe",
    "Condition": {
        "ArnLike": {
            "AWS:SourceArn": "arn:aws:s3:::call-center-audio-5259cfae-3d68-4b80-a02a-39ed7679c445"
        }
    }
}
Trigger added for call-center-audio-5259cfae-3d68-4b80-a02a-39ed7679c445 -> LambdaFunctionTranscribe


# Upload a test audio file
s3_helper.upload_file(bucket_name_audio, 'dialog.mp3')

Object 'dialog.mp3' uploaded to bucket 'call-center-audio-5259cfae-3d68-4b80-a02a-39ed7679c445'


# Verify audio file was successfully uploaded to S3
s3_helper.list_objects(bucket_name_audio)

Object: dialog.mp3, Created on: 2024-06-10 17:54:56+00:00


# Verify transcription and results.txt were successfully executed and saved to S3
s3_helper.list_objects(bucket_name_text)

Object: .write_access_check_file.temp, Created on: 2024-06-10 17:54:59+00:00
Object: results.txt, Created on: 2024-06-10 17:55:25+00:00
Object: transcription-job-cd231453-897a-4c6b-9970-5df2faa971a9-transcript.json, Created on: 2024-06-10 17:55:20+00:00


# Download the results.txt file to view the call summary
s3_helper.download_object(bucket_name_text, 'results.txt')

Object 'results.txt' from bucket 'call-center-text-78179769-0fbf-4bce-90a1-927777b4cfb9' to './results.txt'


# Display result.txt in this notebook
from helpers.display_helper import Display_Helper
display_helper = Display_Helper()
display_helper.text_file('results.txt')

results.txt:

{    "version": 0.1,    "sentiment": "positive",    "issues": [        {            "topic": "charges",            "summary": "pre authorization is a standard procedure to cover any incidental expenses you may incur during your stay"        }    ]}

Serverless Call Summarization Pipeline with AWS Lambda and Amazon Bedrock¶

Technical Summary¶

Pre-requisites¶

Attribution¶

Source Code¶

Table of Contents¶

Install dependencies and import packages¶

Configure this notebook to authenticate with AWS services¶

Option 1: Add secrets in Google Colab¶

Option 2: Set environment variables interactively with getpass¶

Option 3: AWS CLI¶

Download sample audio data¶

Upload audio sample to Amazon S3¶

Transcribe the audio dialog¶

Load transcript from S3¶

Summarize transcript with an LLM¶

Set up model invocation logging for Amazon Bedrock¶

Create a bucket to store logs¶

CloudFormation configuration¶

Create the CloudFormation stack¶

Create a log group¶

Enable logging for Bedrock models¶

Invoke a Bedrock model to test logging¶

Verify logs exist in CloudWatch and S3¶

View logs in the AWS Console¶

Set up AWS Lambda to trigger pipeline on events¶

Define an IAM Role for Lambda¶

Creat the CloudFormation stack¶

Create a Lambda layer for dependencies¶

Set environment variables used by the Lambda function¶

Download helper functions¶

Create (advanced) prompt template¶

Define the Lambda Function¶

Deploy Lambda function¶

Upload a test transcript to S3¶

Confirm results were stored in S3¶

Putting it all together: Event-driven, end-to-end pipeline¶

Test the end-to-end pipeline¶