REGION="<your-region>"
AWS_ACCOUNT_ID="<your-account_id>"
sudo docker buildx build --platform linux/arm64 -f Dockerfile . -t llm_on_lambda:7May
% cat test_event.json
{
"body": {"prompt": "You are a helpful agent who does Zero Shot Classification on the `Input_Text`\n Input_Text: `Interstellar move is my favorite`. Classify if `Input_Text` is Positive or Negative. You can answer in one word."}
}
% docker run --rm -ti --platform linux/arm64 -p 9000:8080 llm_on_lambda:phi3
# from the same directory opened in another terminal
% curl -X PUT -d @test_event.json "http://localhost:9000/2015-03-31/functions/function/invocations" > output.json
% jp -f output.json -u 'body' | jq .
{
"main_output": " Positive",
"full_output": {
"id": "cmpl-554fd015-69ed-4df5-9f25-06567db98678",
"object": "text_completion",
"created": 1715431836,
"model": "./model/Phi-3-mini-4k-instruct-q4.gguf",
"choices": [
{
"text": " Positive",
"index": 0,
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 60,
"completion_tokens": 2,
"total_tokens": 62
}
}
}
aws ecr get-login-password | docker login --username AWS --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com
ECR_REPO_NAME="phi3_model"
LOCAL_TAG_NAME="phi3"
ECR_TAG_NAME="phi3_mini"
# below command only once when creating the repo in ECR
aws ecr create-repository --repository-name ${ECR_REPO_NAME}
docker tag llm_on_lambda:${LOCAL_TAG_NAME} ${AWS_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${ECR_REPO_NAME}:${ECR_TAG_NAME}
docker push ${AWS_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${ECR_REPO_NAME}:${ECR_TAG_NAME}
DOCKER_IMAGE_URI="${AWS_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${ECR_REPO_NAME}:${ECR_TAG_NAME}"
POLICY_NAME="container_lambda_iam_policy"
ROLE_NAME="container_lambda_role"
location_of_iam_policy_json_file="container_iam_policy.json"
location_of_trust_policy_json_file="trust_policy.json"
cd ./codes
aws iam create-policy --policy-name $POLICY_NAME --policy-document file://${location_of_iam_policy_json_file}
aws iam create-role --role-name $ROLE_NAME --assume-role-policy-document file://${location_of_trust_policy_json_file}
aws iam attach-role-policy --role-name $ROLE_NAME --policy-arn "arn:aws:iam::${AWS_ACCOUNT_ID}:policy/${POLICY_NAME}"
IAM_ROLE_ARN=$(aws iam get-role --role-name $ROLE_NAME --query "Role.Arn" --output text)
FUNCTION_NAME="container_lambda_to_run_slm"
MEMORY_SIZE=10240
TIMEOUT=600
ARCHITECTURE=arm64
% aws lambda create-function \
--function-name ${FUNCTION_NAME} \
--package-type Image \
--code ImageUri=$DOCKER_IMAGE_URI \
--role $IAM_ROLE_ARN \
--memory-size $MEMORY_SIZE \
--timeout $TIMEOUT \
--architectures $ARCHITECTURE
% aws lambda update-function-code \
--function-name ${FUNCTION_NAME} \
--image-uri ${DOCKER_IMAGE_URI}
% EPHEMERAL_STORAGE=5120 #changed from default 512 MB to 5120 MB
% aws lambda update-function-configuration \
--function-name ${FUNCTION_NAME} \
--ephemeral-storage Size=${EPHEMERAL_STORAGE}
% aws lambda update-function-configuration \
--function-name ${FUNCTION_NAME} \
--memory-size 10240
# invoke the test_event.json and out the response
% aws lambda invoke \
--function-name "${FUNCTION_NAME}" \
--invocation-type 'RequestResponse' \
--payload file://test_event_manual.json output_aws.json \
&& cat output_aws.json | jq > formatted_output_aws.json && rm output_aws.json
% jp -f formatted_output_aws.json -u 'body' | jq .
{
"main_output": " Positive",
"full_output": {
"id": "cmpl-554fd015-69ed-4df5-9f25-06567db98678",
"object": "text_completion",
"created": 1715431836,
"model": "./model/Phi-3-mini-4k-instruct-q4.gguf",
"choices": [
{
"text": " Positive",
"index": 0,
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 60,
"completion_tokens": 2,
"total_tokens": 62
}
}
}
APIGW_IAM_POLICY_JSON=apigw_iam_policy.json
APIGW_POLICY_NAME=API_GW_POLICY_TO_INVOKE_LAMDA
aws iam create-policy --policy-name $APIGW_POLICY_NAME --policy-document file://${APIGW_IAM_POLICY_JSON}
APIGW_ROLE_NAME=API_GW_ROLE_TO_INVOKE_LAMBDA
APIGW_TRUST_POLICY_JSON=apigw_trust_policy.json
aws iam create-role --role-name $APIGW_ROLE_NAME --assume-role-policy-document file://${APIGW_TRUST_POLICY_JSON}
aws iam attach-role-policy --role-name $APIGW_ROLE_NAME --policy-arn "arn:aws:iam::${AWS_ACCOUNT_ID}:policy/${APIGW_POLICY_NAME}"
APIGW_ROLE_ARN=$(aws iam get-role --role-name $APIGW_ROLE_NAME --query "Role.Arn" --output text)
API_NAME=PHI3_MINI
REST_API_ID=$(aws apigateway create-rest-api --name $API_NAME --region ${REGION} --endpoint-configuration types=REGIONAL --query 'id' --output text)
sed -e "s|\${API_NAME}|${API_NAME}|g" -e "s|\${REST_API_ID\}|${REST_API_ID}|g" -e "s|\${REGION}|${REGION}|g" -e "s|\${FUNCTION_NAME}|${FUNCTION_NAME}|g" -e "s|\${AWS_ACCOUNT_ID}|${AWS_ACCOUNT_ID}|g" lambda_openapi_spec_template.yml > lambda_openapi_spec.yml
aws apigateway put-rest-api --rest-api-id $REST_API_ID --body file://lambda_openapi_spec.yml
put-integration
API_GW_URI="arn:aws:apigateway:${REGION}:lambda:path/2015-03-31/functions/arn:aws:lambda:${REGION}:${AWS_ACCOUNT_ID}:function:${FUNCTION_NAME}/invocations"
RESOURCE_ID=$(aws apigateway get-resources \
--rest-api-id $REST_API_ID \
--query "items[?path=='/${FUNCTION_NAME}'].id" \
--output text \
--region ${REGION})
aws apigateway put-integration \
--region ${REGION} \
--rest-api-id $REST_API_ID \
--resource-id ${RESOURCE_ID} \
--http-method ANY \
--type AWS_PROXY \
--integration-http-method ANY \
--uri $API_GW_URI \
--credentials $APIGW_ROLE_ARN
aws apigateway put-integration-response --region ${REGION} --rest-api-id $REST_API_ID --resource-id $RESOURCE_ID --http-method ANY --status-code 200
STAGE_NAME="v1"
aws apigateway create-deployment --rest-api-id ${REST_API_ID} --stage-name ${STAGE_NAME}
% API_GW_URL=https://${REST_API_ID}.execute-api.${REGION}.amazonaws.com/v1/${FUNCTION_NAME}
% curl -X POST -H "Content-Type: application/json" -d @test_event_2.json $API_GW_URL > output_via_apigw.json
% cat output_via_apigw.json | jq .
{
"main_output": " Positive",
"full_output": {
"id": "cmpl-554fd015-69ed-4df5-9f25-06567db98678",
"object": "text_completion",
"created": 1715431836,
"model": "./model/Phi-3-mini-4k-instruct-q4.gguf",
"choices": [
{
"text": " Positive",
"index": 0,
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 60,
"completion_tokens": 2,
"total_tokens": 62
}
}
}
Limitations with API Gateway: