src/blogc-github-lambda/lambda_function.py.in


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

# coding: utf-8
#
# blogc: A blog compiler.
# Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br>
#
# This program can be distributed under the terms of the BSD License.
# See the license for details.
#

from contextlib import closing
from StringIO import StringIO

import base64
import boto3
import hashlib
import json
import mimetypes
import os
import subprocess
import tarfile
import urllib2
import shutil

BLOGC_VERSION = '@PACKAGE_VERSION@'

cwd = os.path.dirname(os.path.abspath(__file__))
os.environ['PATH'] = '%s:%s' % (cwd, os.environ.get('PATH', ''))

s3 = boto3.resource('s3')

GITHUB_AUTH = os.environ.get('GITHUB_AUTH')
if GITHUB_AUTH is not None and ':' not in GITHUB_AUTH:
    GITHUB_AUTH = boto3.client('kms').decrypt(
        CiphertextBlob=base64.b64decode(GITHUB_AUTH))['Plaintext']


def get_tarball(repo_name):
    tarball_url = 'https://api.github.com/repos/%s/tarball/master' % repo_name
    request = urllib2.Request(tarball_url)

    if GITHUB_AUTH is not None:
        auth = base64.b64encode(GITHUB_AUTH)
        request.add_header("Authorization", "Basic %s" % auth)

    with closing(urllib2.urlopen(request)) as fp:
        tarball = fp.read()

    rootdir = None
    with closing(StringIO(tarball)) as fp:
        with tarfile.open(fileobj=fp, mode='r:gz') as tar:
            for f in tar.getnames():
                if '/' not in f:
                    rootdir = f
                    break
            if rootdir is None:
                raise RuntimeError('Failed to find a directory in tarball')
            rootdir = '/tmp/%s' % rootdir

            if os.path.isdir(rootdir):
                shutil.rmtree(rootdir)

            tar.extractall('/tmp/')

    return rootdir


def translate_filename(filename):
    f = filename.split('/')
    if len(f) == 0:
        return filename
    basename = f[-1]

    # replace any index.$EXT file with index.html, because s3 only allows
    # users to declare one directory index file name.
    p = basename.split('.')
    if len(p) == 2 and p[0] == 'index':
        f[-1] = 'index.html'
        f = '/'.join(f)
        if not os.path.exists(f):
            return f

    return filename


def sync_s3(src, dest, settings_file):
    settings = {}
    if os.path.exists(settings_file):
        with open(settings_file, 'r') as fp:
            settings = json.load(fp)

    content_types = settings.get('content-type', {})
    dest = settings.get('bucket', dest)

    bucket = s3.Bucket(dest)

    remote_files = {}
    for obj in bucket.objects.all():
        if not obj.key.endswith('/'):
            remote_files[obj.key] = obj

    local_files = {}
    for root, dirs, files in os.walk(src):
        real_root = root[len(src):].lstrip('/')
        for filename in files:
            real_filename = os.path.join(real_root, filename)
            data = {'Key': real_filename}

            mime = content_types.get(real_filename,
                                     mimetypes.guess_type(real_filename)[0])
            if mime is not None:
                data['ContentType'] = mime

            with open(os.path.join(src, real_filename), 'rb') as fp:
                data['Body'] = fp.read()

            # always push the original file to its place
            local_files[real_filename] = data

            # if we need a copy on s3 for index or something, push it too
            translated_filename = translate_filename(real_filename)
            if translated_filename != real_filename:
                translated_data = data.copy()
                translated_data['Key'] = translated_filename
                local_files[translated_filename] = translated_data

    to_upload = []
    for filename in local_files:
        if filename not in remote_files:
            to_upload.append(local_files[filename])

    to_delete = []
    for filename in remote_files:
        if filename in local_files:
            l = hashlib.sha1(local_files[filename]['Body'])

            with closing(remote_files[filename].get()['Body']) as fp:
                r = hashlib.sha1(fp.read())

            if l.hexdigest() != r.hexdigest():
                to_upload.append(local_files[filename])
        else:
            to_delete.append(filename)

    for data in to_upload:
        print 'Uploading file: %s; content-type: "%s"' % (
            data['Key'],
            data.get('ContentType'),
        )
        bucket.put_object(**data)

    for filename in to_delete:
        print 'Deleting file:', filename
        remote_files[filename].delete()


def sns_handler(message):
    print 'blogc-github-lambda %s' % BLOGC_VERSION
    payload = json.loads(message)

    if payload['ref'] == 'refs/heads/master':
        print 'Building: %s' % payload['repository']['full_name']
        debug = 'DEBUG' in os.environ

        env = os.environ.copy()
        env['BLOGC'] = os.path.join(cwd, 'blogc')
        env['OUTPUT_DIR'] = '_build_lambda'

        rootdir = get_tarball(payload['repository']['full_name'])
        blogcfile = os.path.join(rootdir, 'blogcfile')

        if os.path.isfile(blogcfile):
            # deploy using blogc-make
            args = [os.path.join(cwd, 'blogc'), '-m', '-f', blogcfile,
                    'all']
            if debug:
                args.append('-V')
            rv = subprocess.call(args, env=env)
        else:
            # fallback to using make. please note that this will break if
            # amazon removes gnu make from lambda images
            stream = None if debug else subprocess.PIPE
            rv = subprocess.call(['make', '-C', rootdir], env=env,
                                 stdout=stream, stderr=stream)
        if rv != 0:
            raise RuntimeError('Failed to run the build tool.')

        sync_s3(os.path.join(rootdir, env['OUTPUT_DIR']),
                payload['repository']['name'],
                os.path.join(rootdir, 's3.json'))

    else:
        print "Commit not for master branch, skipping: %s" % payload['ref']


def lambda_handler(event, context):
    for record in event['Records']:
        if 'Sns' in record:
            sns_handler(record['Sns']['Message'])