lambda_function.py

from __future__ import print_function

import zipfile
import boto3
import io
import json 
import urllib.parse

print('Loading function')

s3 = boto3.client('s3')


def lambda_handler(event, context):

    # Get the object from the event and show its content type
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
    try:
        response = s3.get_object(Bucket=bucket, Key=key)
        print("CONTENT TYPE: " + response['ContentType'])
        putObjects = []
        with io.BytesIO(response["Body"].read()) as tf:
            # rewind the file
            tf.seek(0)

            # Read the file as a zipfile and process the members
            with zipfile.ZipFile(tf, mode='r') as zipf:
                for file in zipf.infolist():
                    fileName = file.filename
                    print(fileName)
                    with io.BytesIO(zipf.read(file)) as otherTf:
                        # rewind the file
                        otherTf.seek(0)
                        with zipfile.ZipFile(otherTf, mode='r') as otherZipf:
                            for otherFile in otherZipf.infolist():
                                otherFileName = otherFile.filename
                                print(otherFileName)
                                if otherFileName.startswith('2021'):
                                    putFile = s3.put_object(Bucket=bucket, Key='new/'+otherFile.filename, Body=otherZipf.read(otherFile).decode('euc-kr'))
                                    putObjects.append(putFile)
                                
        #Delete zip file after unzip
        if len(putObjects) > 0:
            deletedObj = s3.delete_object(Bucket=bucket, Key=key)
            print('deleted file:')
            print(deletedObj)
    except Exception as e:
        print(e)
        print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
        raise e

샘플코드는 공공데이터 다운로드 원본 파일에 대한 압축 해제 및 utf-8-bom 형식 csv 파일들을 euc-kr로 디코딩 후 분석 대상파일들만 S3에 업로드 해주는 예시입니다.