aboutsummaryrefslogtreecommitdiffstats
path: root/src/blogc-github-lambda/lambda_function.py.in
blob: a6cdc6f7cfd756bb93e7184c5336d956dd85d799 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# coding: utf-8
#
# blogc: A blog compiler.
# Copyright (C) 2014-2017 Rafael G. Martins <rafael@rafaelmartins.eng.br>
#
# This program can be distributed under the terms of the BSD License.
# See the license for details.
#

from __future__ import print_function

from contextlib import closing
try:
    from io import StringIO
except ImportError:
    from StringIO import StringIO

import base64
import boto3
import hmac
import hashlib
import json
import mimetypes
import os
import subprocess
import tarfile
import shutil
try:
    import urllib.request as urllib2
except ImportError:
    import urllib2

BLOGC_VERSION = '@PACKAGE_VERSION@'

cwd = os.path.dirname(os.path.abspath(__file__))
os.environ['PATH'] = '%s:%s' % (cwd, os.environ.get('PATH', ''))

s3 = boto3.resource('s3')

GITHUB_AUTH = os.environ.get('GITHUB_AUTH')
if GITHUB_AUTH is not None and ':' not in GITHUB_AUTH:
    GITHUB_AUTH = boto3.client('kms').decrypt(
        CiphertextBlob=base64.b64decode(GITHUB_AUTH))['Plaintext']

GITHUB_SECRET = os.environ.get('GITHUB_SECRET')
if GITHUB_SECRET is not None:
    GITHUB_SECRET = boto3.client('kms').decrypt(
        CiphertextBlob=base64.b64decode(GITHUB_SECRET))['Plaintext']


def get_tarball(repo_name):
    tarball_url = 'https://api.github.com/repos/%s/tarball/master' % repo_name
    request = urllib2.Request(tarball_url)

    if GITHUB_AUTH is not None:
        auth = base64.b64encode(GITHUB_AUTH)
        request.add_header("Authorization", "Basic %s" % auth)

    with closing(urllib2(request)) as fp:
        tarball = fp.read()

    rootdir = None
    with closing(StringIO(tarball)) as fp:
        with tarfile.open(fileobj=fp, mode='r:gz') as tar:
            for f in tar.getnames():
                if '/' not in f:
                    rootdir = f
                    break
            if rootdir is None:
                raise RuntimeError('Failed to find a directory in tarball')
            rootdir = '/tmp/%s' % rootdir

            if os.path.isdir(rootdir):
                shutil.rmtree(rootdir)

            tar.extractall('/tmp/')

    return rootdir


def translate_filename(filename):
    f = filename.split('/')
    if len(f) == 0:
        return filename
    basename = f[-1]

    # replace any index.$EXT file with index.html, because s3 only allows
    # users to declare one directory index file name.
    p = basename.split('.')
    if len(p) == 2 and p[0] == 'index':
        f[-1] = 'index.html'
        f = '/'.join(f)
        if not os.path.exists(f):
            return f

    return filename


def sync_s3(src, dest, settings_file):
    settings = {}
    if os.path.exists(settings_file):
        with open(settings_file, 'r') as fp:
            settings = json.load(fp)

    content_types = settings.get('content-type', {})
    dest = settings.get('bucket', dest)

    bucket = s3.Bucket(dest)

    remote_files = {}
    for obj in bucket.objects.all():
        if not obj.key.endswith('/'):
            remote_files[obj.key] = obj

    local_files = {}
    for root, dirs, files in os.walk(src):
        real_root = root[len(src):].lstrip('/')
        for filename in files:
            real_filename = os.path.join(real_root, filename)
            data = {'Key': real_filename}

            mime = content_types.get(real_filename,
                                     mimetypes.guess_type(real_filename)[0])
            if mime is not None:
                data['ContentType'] = mime

            with open(os.path.join(src, real_filename), 'rb') as fp:
                data['Body'] = fp.read()

            # always push the original file to its place
            local_files[real_filename] = data

            # if we need a copy on s3 for index or something, push it too
            translated_filename = translate_filename(real_filename)
            if translated_filename != real_filename:
                translated_data = data.copy()
                translated_data['Key'] = translated_filename
                local_files[translated_filename] = translated_data

    to_upload = []
    for filename in local_files:
        if filename not in remote_files:
            to_upload.append(local_files[filename])

    to_delete = []
    for filename in remote_files:
        if filename in local_files:
            l = hashlib.sha1(local_files[filename]['Body'])

            with closing(remote_files[filename].get()['Body']) as fp:
                r = hashlib.sha1(fp.read())

            if l.hexdigest() != r.hexdigest():
                to_upload.append(local_files[filename])
        else:
            to_delete.append(filename)

    for data in to_upload:
        print('Uploading file: %s; content-type: "%s"' % (
            data['Key'],
            data.get('ContentType'),
        ))
        bucket.put_object(**data)

    for filename in to_delete:
        print('Deleting file:', filename)
        remote_files[filename].delete()


def blogc_handler(message):
    print('blogc-github-lambda %s' % BLOGC_VERSION)
    payload = json.loads(message)

    if payload['ref'] == 'refs/heads/master':
        print('Building: %s' % payload['repository']['full_name'])
        debug = 'DEBUG' in os.environ

        env = os.environ.copy()
        env['BLOGC'] = os.path.join(cwd, 'blogc')
        env['OUTPUT_DIR'] = '_build_lambda'

        rootdir = get_tarball(payload['repository']['full_name'])
        blogcfile = os.path.join(rootdir, 'blogcfile')

        if os.path.isfile(blogcfile):
            # deploy using blogc-make
            args = [os.path.join(cwd, 'blogc'), '-m', '-f', blogcfile,
                    'all']
            if debug:
                args.append('-V')
            rv = subprocess.call(args, env=env)
        else:
            # fallback to using make. please note that this will break if
            # amazon removes gnu make from lambda images
            stream = None if debug else subprocess.PIPE
            rv = subprocess.call(['make', '-C', rootdir], env=env,
                                 stdout=stream, stderr=stream)
        if rv != 0:
            raise RuntimeError('Failed to run the build tool.')

        sync_s3(os.path.join(rootdir, env['OUTPUT_DIR']),
                payload['repository']['name'],
                os.path.join(rootdir, 's3.json'))

    else:
        print("Commit not for master branch, skipping: %s" % payload['ref'])


def api_gateway_response(code, message):
    return {
        'statusCode': code,
        'body': json.dumps({'message': message}),
    }


def api_gateway_handler(event):
    headers = event.get('headers')
    if headers is None:
        return api_gateway_response(400, 'NO_HEADERS')

    if headers.get('X-GitHub-Event') != 'push':
        return api_gateway_response(400, 'UNSUPPORTED_EVENT')

    body = event.get('body', '')

    if GITHUB_SECRET is not None:
        sig = headers.get('X-Hub-Signature')
        if sig is None:
            return api_gateway_response(400, 'NO_SIGNATURE')

        pieces = sig.split('=')
        if len(pieces) != 2 or pieces[0] != 'sha1':
            return api_gateway_response(400, 'INVALID_SIGNATURE')

        digest = hmac.new(GITHUB_SECRET, body, hashlib.sha1)

        if not hmac.compare_digest(digest.hexdigest(), pieces[1]):
            return api_gateway_response(400, 'BAD_SIGNATURE')

    try:
        blogc_handler(body)
    except Exception as err:
        return api_gateway_response(500, 'ERROR: %s' % err)

    return api_gateway_response(201, 'ACCEPTED')


def lambda_handler(event, context):
    if 'Records' in event:  # sns
        for record in event['Records']:
            if 'Sns' in record:
                blogc_handler(record['Sns']['Message'])
    elif 'body' in event:  # api-gateway
        return api_gateway_handler(event)