from __future__ import print_function
import zipfile
import boto3
import io
import json
import urllib.parse
print('Loading function')
s3 = boto3.client('s3')
def lambda_handler(event, context):
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
try:
response = s3.get_object(Bucket=bucket, Key=key)
print("CONTENT TYPE: " + response['ContentType'])
putObjects = []
with io.BytesIO(response["Body"].read()) as tf:
# rewind the file
tf.seek(0)
# Read the file as a zipfile and process the members
with zipfile.ZipFile(tf, mode='r') as zipf:
for file in zipf.infolist():
fileName = file.filename
print(fileName)
with io.BytesIO(zipf.read(file)) as otherTf:
# rewind the file
otherTf.seek(0)
with zipfile.ZipFile(otherTf, mode='r') as otherZipf:
for otherFile in otherZipf.infolist():
otherFileName = otherFile.filename
print(otherFileName)
if otherFileName.startswith('2021'):
putFile = s3.put_object(Bucket=bucket, Key='new/'+otherFile.filename, Body=otherZipf.read(otherFile).decode('euc-kr'))
putObjects.append(putFile)
#Delete zip file after unzip
if len(putObjects) > 0:
deletedObj = s3.delete_object(Bucket=bucket, Key=key)
print('deleted file:')
print(deletedObj)
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
샘플코드는 공공데이터 다운로드 원본 파일에 대한 압축 해제 및 utf-8-bom 형식 csv 파일들을 euc-kr로 디코딩 후 분석 대상파일들만 S3에 업로드 해주는 예시입니다.