2021-04-15 11:45:07 -07:00
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
2021-10-28 15:43:09 -07:00
2021-04-15 11:45:07 -07:00
"""
2021-10-28 15:43:09 -07:00
Purpose
Amazon Lookout for Vision dataset code examples used in the service documentation:
2021-04-15 11:45:07 -07:00
https://docs.aws.amazon.com/lookout-for-vision/latest/developer-guide/model-create-dataset.html
Shows how to create and manage datasets. Also, how to create a manifest file and
2021-10-28 15:43:09 -07:00
upload to an Amazon S3 bucket.
2021-04-15 11:45:07 -07:00
"""
2021-10-28 15:43:09 -07:00
2021-04-15 11:45:07 -07:00
import logging
import time
from datetime import datetime
import os
import json
2022-05-19 07:09:24 -07:00
2021-04-15 11:45:07 -07:00
from botocore . exceptions import ClientError
logger = logging . getLogger ( __name__ )
2021-10-28 15:43:09 -07:00
# snippet-start:[python.example_code.lookoutvision.Datasets]
2021-04-15 11:45:07 -07:00
class Datasets :
2022-05-19 07:09:24 -07:00
# snippet-end:[python.example_code.lookoutvision.Datasets]
2021-04-15 11:45:07 -07:00
"""
2021-10-28 15:43:09 -07:00
Provides example functions for creating, listing, and deleting Lookout for Vision
datasets. Also shows how to create a manifest file in an Amazon S3 bucket.
2021-04-15 11:45:07 -07:00
"""
2023-10-18 10:35:05 -07:00
# snippet-start:[python.example_code.lookoutvision.CreateDataset]
2021-04-15 11:45:07 -07:00
@staticmethod
def create_dataset ( lookoutvision_client , project_name , manifest_file , dataset_type ) :
"""
2021-10-28 15:43:09 -07:00
Creates a new Lookout for Vision dataset
:param lookoutvision_client: A Lookout for Vision Boto3 client.
2021-05-04 13:49:38 -07:00
:param project_name: The name of the project in which you want to
2021-10-28 15:43:09 -07:00
create a dataset.
:param bucket: The bucket that contains the manifest file.
2021-05-04 13:49:38 -07:00
:param manifest_file: The path and name of the manifest file.
:param dataset_type: The type of the dataset (train or test).
2021-04-15 11:45:07 -07:00
"""
try :
bucket , key = manifest_file . replace ( " s3:// " , " " ) . split ( " / " , 1 )
logger . info ( " Creating %s dataset type... " , dataset_type )
2021-05-04 13:49:38 -07:00
dataset = {
" GroundTruthManifest " : { " S3Object " : { " Bucket " : bucket , " Key " : key } }
}
2021-04-15 11:45:07 -07:00
response = lookoutvision_client . create_dataset (
ProjectName = project_name ,
DatasetType = dataset_type ,
DatasetSource = dataset ,
)
2023-10-18 10:35:05 -07:00
logger . info ( " Dataset Status: %s " , response [ " DatasetMetadata " ] [ " Status " ] )
2021-04-15 11:45:07 -07:00
logger . info (
" Dataset Status Message: %s " ,
response [ " DatasetMetadata " ] [ " StatusMessage " ] ,
)
2023-10-18 10:35:05 -07:00
logger . info ( " Dataset Type: %s " , response [ " DatasetMetadata " ] [ " DatasetType " ] )
2021-04-15 11:45:07 -07:00
# Wait until either created or failed.
finished = False
status = " "
2021-10-28 15:43:09 -07:00
dataset_description = { }
2021-04-15 11:45:07 -07:00
while finished is False :
dataset_description = lookoutvision_client . describe_dataset (
ProjectName = project_name , DatasetType = dataset_type
)
status = dataset_description [ " DatasetDescription " ] [ " Status " ]
if status == " CREATE_IN_PROGRESS " :
logger . info ( " Dataset creation in progress... " )
time . sleep ( 2 )
2021-10-28 15:43:09 -07:00
elif status == " CREATE_COMPLETE " :
2021-04-15 11:45:07 -07:00
logger . info ( " Dataset created. " )
finished = True
2021-10-28 15:43:09 -07:00
else :
logger . info (
" Dataset creation failed: %s " ,
2023-10-18 10:35:05 -07:00
dataset_description [ " DatasetDescription " ] [ " StatusMessage " ] ,
)
2021-10-28 15:43:09 -07:00
finished = True
2021-04-15 11:45:07 -07:00
if status != " CREATE_COMPLETE " :
2021-05-04 13:49:38 -07:00
message = dataset_description [ " DatasetDescription " ] [ " StatusMessage " ]
logger . exception ( " Couldn ' t create dataset: %s " , message )
raise Exception ( f " Couldn ' t create dataset: { message } " )
2021-04-15 11:45:07 -07:00
2021-10-28 15:43:09 -07:00
except ClientError :
logger . exception ( " Service error: Couldn ' t create dataset. " )
2021-04-15 11:45:07 -07:00
raise
2023-10-18 10:35:05 -07:00
# snippet-end:[python.example_code.lookoutvision.CreateDataset]
# snippet-start:[python.example_code.lookoutvision.Scenario_CreateManifestFile]
2021-04-15 11:45:07 -07:00
@staticmethod
2021-05-04 13:49:38 -07:00
def create_manifest_file_s3 ( s3_resource , image_s3_path , manifest_s3_path ) :
2021-04-15 11:45:07 -07:00
"""
2021-10-28 15:43:09 -07:00
Creates a manifest file and uploads to Amazon S3.
:param s3_resource: A Boto3 Amazon S3 resource.
:param image_s3_path: The Amazon S3 path to the images referenced by the
manifest file. The images must be in an Amazon S3 bucket
with the following folder structure.
2024-09-24 17:37:11 -04:00
s3://amzn-s3-demo-bucket/<train or test>/
2021-10-28 15:43:09 -07:00
normal/
anomaly/
Place normal images in the normal folder and anomalous
images in the anomaly folder.
:param manifest_s3_path: The Amazon S3 location in which to store the created
manifest file.
2021-04-15 11:45:07 -07:00
"""
2021-10-28 15:43:09 -07:00
output_manifest_file = " temp.manifest "
2021-04-15 11:45:07 -07:00
try :
2021-10-28 15:43:09 -07:00
# Current date and time in manifest file format.
2021-04-15 11:45:07 -07:00
dttm = datetime . now ( ) . strftime ( " % Y- % m- %d T % H: % M: % S. %f " )
2021-10-28 15:43:09 -07:00
# Get bucket and folder from image and manifest file paths.
2021-04-15 11:45:07 -07:00
bucket , prefix = image_s3_path . replace ( " s3:// " , " " ) . split ( " / " , 1 )
2023-10-18 10:35:05 -07:00
if prefix [ - 1 ] != " / " :
prefix + = " / "
2021-04-15 11:45:07 -07:00
manifest_bucket , manifest_prefix = manifest_s3_path . replace (
2023-10-18 10:35:05 -07:00
" s3:// " , " "
) . split ( " / " , 1 )
2021-04-15 11:45:07 -07:00
with open ( output_manifest_file , " w " ) as mfile :
logger . info ( " Creating manifest file " )
2021-05-04 13:49:38 -07:00
src_bucket = s3_resource . Bucket ( bucket )
2021-10-28 15:43:09 -07:00
# Create JSON lines for anomalous images.
2021-05-04 13:49:38 -07:00
for obj in src_bucket . objects . filter (
2023-10-18 10:35:05 -07:00
Prefix = prefix + " anomaly/ " , Delimiter = " / "
) :
2021-05-04 13:49:38 -07:00
image_path = f " s3:// { src_bucket . name } / { obj . key } "
2023-10-18 10:35:05 -07:00
manifest = Datasets . create_json_line ( image_path , " anomaly " , dttm )
2021-04-15 11:45:07 -07:00
mfile . write ( json . dumps ( manifest ) + " \n " )
2021-05-04 13:49:38 -07:00
2021-10-28 15:43:09 -07:00
# Create json lines for normal images.
2021-05-04 13:49:38 -07:00
for obj in src_bucket . objects . filter (
2023-10-18 10:35:05 -07:00
Prefix = prefix + " normal/ " , Delimiter = " / "
) :
2021-05-04 13:49:38 -07:00
image_path = f " s3:// { src_bucket . name } / { obj . key } "
2023-10-18 10:35:05 -07:00
manifest = Datasets . create_json_line ( image_path , " normal " , dttm )
2021-04-15 11:45:07 -07:00
mfile . write ( json . dumps ( manifest ) + " \n " )
logger . info ( " Uploading manifest file to %s " , manifest_s3_path )
2021-05-04 13:49:38 -07:00
s3_resource . Bucket ( manifest_bucket ) . upload_file (
2023-10-18 10:35:05 -07:00
output_manifest_file , manifest_prefix
)
2021-10-28 15:43:09 -07:00
except ClientError :
logger . exception ( " Error uploading manifest. " )
2021-04-15 11:45:07 -07:00
raise
2021-10-28 15:43:09 -07:00
except Exception :
logger . exception ( " Error uploading manifest. " )
2021-04-15 11:45:07 -07:00
raise
else :
logger . info ( " Completed manifest file creation and upload. " )
2021-10-28 15:43:09 -07:00
finally :
try :
os . remove ( output_manifest_file )
except FileNotFoundError :
pass
2021-04-15 11:45:07 -07:00
@staticmethod
2021-05-04 13:49:38 -07:00
def create_json_line ( image , class_name , dttm ) :
2021-04-15 11:45:07 -07:00
"""
Creates a single JSON line for an image.
2021-10-28 15:43:09 -07:00
2021-05-04 13:49:38 -07:00
:param image: The S3 location for the image.
2021-10-28 15:43:09 -07:00
:param class_name: The class of the image (normal or anomaly)
2021-05-04 13:49:38 -07:00
:param dttm: The date and time that the JSON is created.
2021-04-15 11:45:07 -07:00
"""
2021-05-04 13:49:38 -07:00
label = 0
if class_name == " normal " :
label = 0
elif class_name == " anomaly " :
label = 1
2021-04-15 11:45:07 -07:00
else :
2021-10-28 15:43:09 -07:00
logger . error ( " Unexpected label value: %s for %s " , label , image )
raise Exception ( f " Unexpected label value: { label } for { image } " )
2021-04-15 11:45:07 -07:00
manifest = {
" source-ref " : image ,
2021-05-04 13:49:38 -07:00
" anomaly-label " : label ,
" anomaly-label-metadata " : {
2021-04-15 11:45:07 -07:00
" confidence " : 1 ,
2021-05-04 13:49:38 -07:00
" job-name " : " labeling-job/anomaly-label " ,
2021-04-15 11:45:07 -07:00
" class-name " : class_name ,
" human-annotated " : " yes " ,
" creation-date " : dttm ,
" type " : " groundtruth/image-classification " ,
} ,
}
return manifest
2023-10-18 10:35:05 -07:00
# snippet-end:[python.example_code.lookoutvision.Scenario_CreateManifestFile]
# snippet-start:[python.example_code.lookoutvision.DeleteDataset]
2021-04-15 11:45:07 -07:00
@staticmethod
def delete_dataset ( lookoutvision_client , project_name , dataset_type ) :
"""
2021-10-28 15:43:09 -07:00
Deletes a Lookout for Vision dataset
:param lookoutvision_client: A Boto3 Lookout for Vision client.
2021-05-04 13:49:38 -07:00
:param project_name: The name of the project that contains the dataset that
2021-10-28 15:43:09 -07:00
you want to delete.
2021-05-04 13:49:38 -07:00
:param dataset_type: The type (train or test) of the dataset that you
2021-10-28 15:43:09 -07:00
want to delete.
2021-04-15 11:45:07 -07:00
"""
try :
logger . info (
2023-10-18 10:35:05 -07:00
" Deleting the %s dataset for project %s . " , dataset_type , project_name
)
2021-04-15 11:45:07 -07:00
lookoutvision_client . delete_dataset (
2023-10-18 10:35:05 -07:00
ProjectName = project_name , DatasetType = dataset_type
)
2021-05-04 13:49:38 -07:00
logger . info ( " Dataset deleted. " )
2021-10-28 15:43:09 -07:00
except ClientError :
logger . exception ( " Service error: Couldn ' t delete dataset. " )
2021-04-15 11:45:07 -07:00
raise
2023-10-18 10:35:05 -07:00
# snippet-end:[python.example_code.lookoutvision.DeleteDataset]
# snippet-start:[python.example_code.lookoutvision.DescribeDataset]
2021-04-15 11:45:07 -07:00
@staticmethod
def describe_dataset ( lookoutvision_client , project_name , dataset_type ) :
"""
2021-10-28 15:43:09 -07:00
Gets information about a Lookout for Vision dataset.
:param lookoutvision_client: A Boto3 Lookout for Vision client.
2021-05-04 13:49:38 -07:00
:param project_name: The name of the project that contains the dataset that
2021-10-28 15:43:09 -07:00
you want to describe.
2021-05-04 13:49:38 -07:00
:param dataset_type: The type (train or test) of the dataset that you want
2021-10-28 15:43:09 -07:00
to describe.
2021-04-15 11:45:07 -07:00
"""
try :
response = lookoutvision_client . describe_dataset (
2023-10-18 10:35:05 -07:00
ProjectName = project_name , DatasetType = dataset_type
)
2021-05-04 13:49:38 -07:00
print ( f " Name: { response [ ' DatasetDescription ' ] [ ' ProjectName ' ] } " )
print ( f " Type: { response [ ' DatasetDescription ' ] [ ' DatasetType ' ] } " )
print ( f " Status: { response [ ' DatasetDescription ' ] [ ' Status ' ] } " )
2023-10-18 10:35:05 -07:00
print ( f " Message: { response [ ' DatasetDescription ' ] [ ' StatusMessage ' ] } " )
print ( f " Images: { response [ ' DatasetDescription ' ] [ ' ImageStats ' ] [ ' Total ' ] } " )
print ( f " Labeled: { response [ ' DatasetDescription ' ] [ ' ImageStats ' ] [ ' Labeled ' ] } " )
print ( f " Normal: { response [ ' DatasetDescription ' ] [ ' ImageStats ' ] [ ' Normal ' ] } " )
print ( f " Anomaly: { response [ ' DatasetDescription ' ] [ ' ImageStats ' ] [ ' Anomaly ' ] } " )
2021-10-28 15:43:09 -07:00
except ClientError :
logger . exception ( " Service error: problem listing datasets. " )
raise
print ( " Done. " )
2022-05-19 07:09:24 -07:00
2023-10-18 10:35:05 -07:00
# snippet-end:[python.example_code.lookoutvision.DescribeDataset]
# snippet-start:[python.example_code.lookoutvision.UpdateDatasetEntries]
2022-05-19 07:09:24 -07:00
@staticmethod
2023-10-18 10:35:05 -07:00
def update_dataset_entries (
lookoutvision_client , project_name , dataset_type , updates_file
) :
2022-05-19 07:09:24 -07:00
"""
2023-10-18 10:35:05 -07:00
Adds dataset entries to an Amazon Lookout for Vision dataset.
2022-05-19 07:09:24 -07:00
:param lookoutvision_client: The Amazon Rekognition Custom Labels Boto3 client.
:param project_name: The project that contains the dataset that you want to update.
:param dataset_type: The type of the dataset that you want to update (train or test).
2023-10-18 10:35:05 -07:00
:param updates_file: The manifest file of JSON lines that contains the updates.
2022-05-19 07:09:24 -07:00
"""
try :
status = " "
status_message = " "
manifest_file = " "
# Update dataset entries.
2023-10-18 10:35:05 -07:00
logger . info (
f " Updating { dataset_type } dataset for project { project_name } "
f " with entries from { updates_file } . "
)
2022-05-19 07:09:24 -07:00
with open ( updates_file ) as f :
manifest_file = f . read ( )
lookoutvision_client . update_dataset_entries (
ProjectName = project_name ,
DatasetType = dataset_type ,
Changes = manifest_file ,
)
finished = False
2023-10-18 10:35:05 -07:00
while not finished :
dataset = lookoutvision_client . describe_dataset (
ProjectName = project_name , DatasetType = dataset_type
)
2022-05-19 07:09:24 -07:00
2023-10-18 10:35:05 -07:00
status = dataset [ " DatasetDescription " ] [ " Status " ]
status_message = dataset [ " DatasetDescription " ] [ " StatusMessage " ]
2022-05-19 07:09:24 -07:00
if status == " UPDATE_IN_PROGRESS " :
logger . info (
2023-10-18 10:35:05 -07:00
( f " Updating { dataset_type } dataset for project { project_name } . " )
)
2022-05-19 07:09:24 -07:00
time . sleep ( 5 )
continue
if status == " UPDATE_FAILED_ROLLBACK_IN_PROGRESS " :
logger . info (
2023-10-18 10:35:05 -07:00
(
f " Update failed, rolling back { dataset_type } dataset for project { project_name } . "
)
)
2022-05-19 07:09:24 -07:00
time . sleep ( 5 )
continue
if status == " UPDATE_COMPLETE " :
logger . info (
2023-10-18 10:35:05 -07:00
f " Dataset updated: { status } : { status_message } : { dataset_type } dataset for project { project_name } . "
)
2022-05-19 07:09:24 -07:00
finished = True
continue
if status == " UPDATE_FAILED_ROLLBACK_COMPLETE " :
logger . info (
2023-10-18 10:35:05 -07:00
f " Rollback completed after update failure: { status } : { status_message } : { dataset_type } dataset for project { project_name } . "
)
2022-05-19 07:09:24 -07:00
finished = True
continue
logger . exception (
f " Failed. Unexpected state for dataset update: { status } : { status_message } : "
2023-10-18 10:35:05 -07:00
" {dataset_type} dataset for project {project_name} . "
)
2022-05-19 07:09:24 -07:00
raise Exception (
f " Failed. Unexpected state for dataset update: { status } : "
2023-10-18 10:35:05 -07:00
" {status_message} : {dataset_type} dataset for project {project_name} . "
)
2022-05-19 07:09:24 -07:00
logger . info ( f " Added entries to dataset. " )
return status , status_message
except ClientError as err :
logger . exception (
2023-10-18 10:35:05 -07:00
f " Couldn ' t update dataset: { err . response [ ' Error ' ] [ ' Message ' ] } "
)
2022-05-19 07:09:24 -07:00
raise
2023-10-18 10:35:05 -07:00
2022-05-19 07:09:24 -07:00
# snippet-end:[python.example_code.lookoutvision.UpdateDatasetEntries]