...
 
Commits (1095)
*.terraform/
*tfstate*
.*.sw[a-z]
\ No newline at end of file
.*.sw[a-z]
*.plan
This diff is collapsed.
# frozen_string_literal: true
MISSING_DESCRIPTION_MESSAGE = <<~END_OF_MESSAGE
Please provide a merge request description.
END_OF_MESSAGE
SKIPPING_VALIDATION_MESSAGE = <<~END_OF_MESSAGE
Skipping validation of merge request description because of ~backstage label.
END_OF_MESSAGE
MISSING_ISSUE_LINK_MESSAGE = <<~END_OF_MESSAGE
**Please provide an issue link in the merge request description to the gitlab.com issue related to this change.**
For version bumps, typos and other small changes, or in time-limited situations (eg, during an incident),
add the label ~backstage to excempt this change from this requirement.
END_OF_MESSAGE
def validate_mr
# The cookbook-publisher user is automatically creating MR's that are backstage in nature, so skip those MR's.
return if gitlab.mr_author == 'cookbook-publisher'
validate_mr_description
end
def validate_mr_description
fail MISSING_DESCRIPTION_MESSAGE if gitlab.mr_body.empty?
# `backstage` label describes smaller changes like fixing typos and docs, we can safely skip those MR's.
if gitlab.mr_labels.include? 'backstage'
message SKIPPING_VALIDATION_MESSAGE
return
end
fail MISSING_ISSUE_LINK_MESSAGE unless gitlab.mr_body.match?(%r{https://gitlab.com/[\w+-/]+/issues/\d+})
end
validate_mr
# Makefile for installing various utilities during CI
# Copyright 2017
# Licence MIT
# Maintainer Ilya Frolov <ilya@gitlab.com>
# Variables
UNAME := $(shell uname -s)
KEY_SERVERS := pool.sks-keyservers.net \
subkeys.pgp.net \
pgp.mit.edu \
keyserver.ubuntu.com \
keys.gnupg.net
#
TF_URL := https://releases.hashicorp.com/terraform
HASHICORP_KEY := 0x51852D87348FFC4C
# These can be overriden, for ex: with either `TF_ARCH=i386 make tfinstall` or `make TF_ARCH=i386 tfinstall`
TF_VERSION ?= 0.9.11
TF_DISTRO ?= linux
TF_ARCH ?= amd64
TF_INSTALL_TO ?= /
# These are just for readability
TF_ZIP := terraform_$(TF_VERSION)_$(TF_DISTRO)_$(TF_ARCH).zip
TF_SHA256 := terraform_$(TF_VERSION)_SHA256SUMS
TF_SHA256SIG := terraform_$(TF_VERSION)_SHA256SUMS.sig
TF_URL_ZIP := $(TF_URL)/$(TF_VERSION)/$(TF_ZIP)
TF_URL_SHA256 := $(TF_URL)/$(TF_VERSION)/$(TF_SHA256)
TF_URL_SHA256SIG := $(TF_URL)/$(TF_VERSION)/$(TF_SHA256SIG)
# Vault
VA_URL := https://releases.hashicorp.com/vault
# These can be overriden with either `VA_ARCH=i386 make vainstall` or `make VA_ARCH=i386 vainstall`
VA_VERSION ?= 0.7.3
VA_DISTRO ?= linux
VA_ARCH ?= amd64
VA_INSTALL_TO ?= /
# These are just for readability
VA_ZIP := vault_$(VA_VERSION)_$(VA_DISTRO)_$(VA_ARCH).zip
VA_SHA256 := vault_$(VA_VERSION)_SHA256SUMS
VA_SHA256SIG := vault_$(VA_VERSION)_SHA256SUMS.sig
VA_URL_ZIP := $(VA_URL)/$(VA_VERSION)/$(VA_ZIP)
VA_URL_SHA256 := $(VA_URL)/$(VA_VERSION)/$(VA_SHA256)
VA_URL_SHA256SIG := $(VA_URL)/$(VA_VERSION)/$(VA_SHA256SIG)
# Packer
PK_URL := https://releases.hashicorp.com/packer
# These can be overriden with either `PK_ARCH=i386 make pkinstall` or `make PK_ARCH=i386 pkinstall`
PK_VERSION ?= 1.0.3
PK_DISTRO ?= linux
PK_ARCH ?= amd64
PK_INSTALL_TO ?= /
# These are just for readability
PK_ZIP := packer_$(PK_VERSION)_$(PK_DISTRO)_$(PK_ARCH).zip
PK_SHA256 := packer_$(PK_VERSION)_SHA256SUMS
PK_SHA256SIG := packer_$(PK_VERSION)_SHA256SUMS.sig
PK_URL_ZIP := $(PK_URL)/$(PK_VERSION)/$(PK_ZIP)
PK_URL_SHA256 := $(PK_URL)/$(PK_VERSION)/$(PK_SHA256)
PK_URL_SHA256SIG := $(PK_URL)/$(PK_VERSION)/$(PK_SHA256SIG)
# Consul
CO_URL := https://releases.hashicorp.com/consul
# These can be overriden with either `CO_ARCH=i386 make coinstall` or `make CO_ARCH=i386 coinstall`
CO_VERSION ?= 0.9.0
CO_DISTRO ?= linux
CO_ARCH ?= amd64
CO_INSTALL_TO ?= /
# These are just for readability
CO_ZIP := consul_$(CO_VERSION)_$(CO_DISTRO)_$(CO_ARCH).zip
CO_SHA256 := consul_$(CO_VERSION)_SHA256SUMS
CO_SHA256SIG := consul_$(CO_VERSION)_SHA256SUMS.sig
CO_URL_ZIP := $(CO_URL)/$(CO_VERSION)/$(CO_ZIP)
CO_URL_SHA256 := $(CO_URL)/$(CO_VERSION)/$(CO_SHA256)
CO_URL_SHA256SIG := $(CO_URL)/$(CO_VERSION)/$(CO_SHA256SIG)
# DO ctl
DO_URL := https://github.com/digitalocean/doctl/releases/download
# These can be overriden with either `DO_ARCH=i386 make doinstall` or `make DO_ARCH=i386 doinstall`
DO_VERSION ?= 1.7.0
DO_DISTRO ?= linux
DO_ARCH ?= amd64
DO_INSTALL_TO ?= /
# These are just for readability
DO_TGZ := doctl-$(DO_VERSION)-$(DO_DISTRO)-$(DO_ARCH).tar.gz
DO_SHA256 := doctl-$(DO_VERSION)-$(DO_DISTRO)-$(DO_ARCH).sha256
DO_URL_TGZ := $(DO_URL)/v$(DO_VERSION)/$(DO_TGZ)
DO_URL_SHA256 := $(DO_URL)/v$(DO_VERSION)/$(DO_SHA256)
# this is godly
# https://news.ycombinator.com/item?id=11939200
.PHONY: help
help: ### This screen. Keep it first target to be default
ifeq ($(UNAME), Linux)
@grep -P '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
else
@# this is not tested, but prepared in advance for you, Mac drivers
@awk -F ':.*###' '$$0 ~ FS {printf "%15s%s\n", $$1 ":", $$2}' $(MAKEFILE_LIST) | grep -v '@awk' | sort
endif
# Targets
#
.PHONY: debug
debug: ### Debug Makefile itself placeholder
@echo $(UNAME)
.PHONY: gpgkey
gpgkey: ### Get Hashicop's gpg key from list of servers
@gpg --list-keys $(HASHICORP_KEY); \
if [ $$? -eq 0 ]; then \
echo "Key $(HASHICORP_KEY) is already in keystore"; \
else \
for ksrv in $(KEY_SERVERS); do \
echo -n "Getting key $(HASHICORP_KEY) from server $$ksrv ... "; \
gpg --keyserver $$ksrv --recv-keys $(HASHICORP_KEY); \
if [ $$? -eq 0 ]; then \
echo "Success!"; \
exit 0 ; \
else \
echo "Fail"; \
fi; \
done; \
fi
.PHONY: tfinstall
tfinstall: ### Download, check sum and unpack specific terraform version
tfinstall: gpgkey
@# First, we download into temporary dir
$(eval $@_TMP := $(shell mktemp -d "/tmp/tfinstall.tmp.XXXXXX"))
test -n "$($@_TMP)" || exit 1
wget --quiet --continue --directory-prefix "$($@_TMP)" \
"$(TF_URL_ZIP)" \
"$(TF_URL_SHA256)" \
"$(TF_URL_SHA256SIG)"
@# Then, we verify signature on hashsums
gpg --verbose --verify "$($@_TMP)/$(TF_SHA256SIG)" "$($@_TMP)/$(TF_SHA256)"
@# Then, we verify hashsum on our zip archive, using only its line as stdin
cd $($@_TMP) && grep "$(TF_ZIP)" "$(TF_SHA256)" | sha256sum -c -w
@# Finally, we are confident file is legitimate
unzip -o "$($@_TMP)/$(TF_ZIP)" -d "$(TF_INSTALL_TO)"
@# Cleanup
rm -rf "$($@_TMP)"
.PHONY: vainstall
vainstall: ### Download, check sum and unpack specific vault version
vainstall: gpgkey
@# First, we download into temporary dir
$(eval $@_TMP := $(shell mktemp -d "/tmp/vainstall.tmp.XXXXXX"))
test -n "$($@_TMP)" || exit 1
wget --quiet --continue --directory-prefix "$($@_TMP)" \
"$(VA_URL_ZIP)" \
"$(VA_URL_SHA256)" \
"$(VA_URL_SHA256SIG)"
@# Then, we verify signature on hashsums
gpg --verbose --verify "$($@_TMP)/$(VA_SHA256SIG)" "$($@_TMP)/$(VA_SHA256)"
@# Then, we verify hashsum on our zip archive, using only its line as stdin
cd $($@_TMP) && grep "$(VA_ZIP)" "$(VA_SHA256)" | sha256sum -c -w
@# Finally, we are confident file is legitimate
unzip -o "$($@_TMP)/$(VA_ZIP)" -d "$(VA_INSTALL_TO)"
@# Cleanup
rm -rf "$($@_TMP)"
.PHONY: pkinstall
pkinstall: ### Download, check sum and unpack specific packer version
pkinstall: gpgkey
@# First, we download into temporary dir
$(eval $@_TMP := $(shell mktemp -d "/tmp/pkinstall.tmp.XXXXXX"))
test -n "$($@_TMP)" || exit 1
wget --quiet --continue --directory-prefix "$($@_TMP)" \
"$(PK_URL_ZIP)" \
"$(PK_URL_SHA256)" \
"$(PK_URL_SHA256SIG)"
@# Then, we verify signature on hashsums
gpg --verbose --verify "$($@_TMP)/$(PK_SHA256SIG)" "$($@_TMP)/$(PK_SHA256)"
@# Then, we verify hashsum on our zip archive, using only its line as stdin
cd $($@_TMP) && grep "$(PK_ZIP)" "$(PK_SHA256)" | sha256sum -c -w
@# Finally, we are confident file is legitimate
unzip -o "$($@_TMP)/$(PK_ZIP)" -d "$(PK_INSTALL_TO)"
@# Cleanup
rm -rf "$($@_TMP)"
.PHONY: coinstall
coinstall: ### Download, check sum and unpack specific consul version
coinstall: gpgkey
@# First, we download into temporary dir
$(eval $@_TMP := $(shell mktemp -d "/tmp/coinstall.tmp.XXXXXX"))
test -n "$($@_TMP)" || exit 1
wget --quiet --continue --directory-prefix "$($@_TMP)" \
"$(CO_URL_ZIP)" \
"$(CO_URL_SHA256)" \
"$(CO_URL_SHA256SIG)"
@# Then, we verify signature on hashsums
gpg --verbose --verify "$($@_TMP)/$(CO_SHA256SIG)" "$($@_TMP)/$(CO_SHA256)"
@# Then, we verify hashsum on our zip archive, using only its line as stdin
cd $($@_TMP) && grep "$(CO_ZIP)" "$(CO_SHA256)" | sha256sum -c -w
@# Finally, we are confident file is legitimate
unzip -o "$($@_TMP)/$(CO_ZIP)" -d "$(CO_INSTALL_TO)"
@# Cleanup
rm -rf "$($@_TMP)"
.PHONY: doinstall
doinstall: ### Download, check sum and unpack specific doctl version
@# First, we download into temporary dir
$(eval $@_TMP := $(shell mktemp -d "/tmp/doinstall.tmp.XXXXXX"))
test -n "$($@_TMP)" || exit 1
wget --continue --directory-prefix "$($@_TMP)" \
"$(DO_URL_TGZ)" \
"$(DO_URL_SHA256)"
@# Then, unpack and verify sum
cd $($@_TMP) && tar zxf $(DO_TGZ) && cat $(DO_SHA256) | sha256sum -c -w
@# Finally, install file if it is legitimate
mv $($@_TMP)/doctl $(DO_INSTALL_TO)/doctl.real
@# Cleanup
rm -rf "$($@_TMP)"
.PHONY: tfmt
tfmt:
@find . -name "*.tf" | xargs -I{} terraform fmt {}
This diff is collapsed.
#!/usr/bin/env bash
set -euo pipefail
main() {
if [ "$#" -ne 2 ]; then
help_and_bail
fi
module="$(sanitise_module_name "$1")"
case "$2" in
major)
# shellcheck disable=SC2016
substitution='${1}.(${2}+1).".0.0"'
;;
minor)
# shellcheck disable=SC2016
substitution='${1}.${2}.".".(${3}+1).".0"'
;;
patch)
# shellcheck disable=SC2016
substitution='${1}.${2}.".".${3}.".".(${4}+1)'
;;
*)
help_and_bail
;;
esac
perl -i -pe \
"s/($module\.git\?ref=v)(\d+)\.(\d+)\.(\d+)/$substitution/e" \
environments/**/*.tf
}
sanitise_module_name() {
# Hack: escape hyphens, since that's the only sanitisation we currently need.
#
# Since we're already using perl later, avoid sed because of the differences
# between BSD and GNU sed.
echo "$1" | perl -pe 's/\-/\\-/g'
}
help_and_bail() {
echo "usage: bump_module <module_name> <major|minor|patch>"
exit 1
}
main "$@"
......@@ -3,21 +3,21 @@
set -e
key_name="$1"
if [[ -z $key_name ]]; then
echo "Please provide a key name"
exit 1
echo "Please provide a key name"
exit 1
fi
dir=$(dirname "$0")
key_dir="${dir}/../private/ssh"
key_path="${key_dir}/${key_name}"
if [[ -f "$key_path" ]]; then
echo "$key_path already exists, not doing anything"
exit 0
echo "$key_path already exists, not doing anything"
exit 0
fi
if [[ -n "${TF_VAR_ssh_key:-""}" ]]; then
echo "TF_VAR_ssh_key set, not doing anything"
exit 0
echo "TF_VAR_ssh_key set, not doing anything"
exit 0
fi
mkdir -p "${key_dir}"
......
......@@ -6,10 +6,10 @@ environment=$(basename "$PWD")
source "$dir/tf-set-env"
# shellcheck source=/dev/null
source "$dir/tf-version-check"
if [[ $environment =~ "production" || $environment =~ "gprd" ]];then
echo -ne "${_BRED}You are about to run terraform in the production environment, press enter to continue.${_NORM}"
read -rp ""
echo -e "${_CYN}continuing...${_NORM}"
if [[ $environment =~ "production" || $environment =~ "gprd" ]]; then
echo -ne "${_BRED}You are about to run terraform in the production environment, press enter to continue.${_NORM}" >&2
read -rp ""
echo -e "${_CYN}continuing...${_NORM}" >&2
fi
# shellcheck disable=SC2086
terraform "$@" $TF_EXTRA_OPTS
#!/usr/bin/env bash
#
# Description: Fetch TF secrets from 1password
#
# Depends on 1password CLI tool: https://1password.com/downloads/command-line/
set -u -o pipefail
if ! type op >/dev/null 2>&1; then
echo 'ERROR: Missing 1password CLI tool.'
echo 'See: https://1password.com/downloads/command-line/'
exit 1
fi
if ! type jq >/dev/null 2>&1; then
echo 'ERROR: Missing jq.'
echo 'Try apt/brew install jq'
exit 1
fi
uuid_jq='.[] | select(.overview.title|test("terraform-private/env_vars/.+.env")) | .uuid'
uuid_list="$(op list items --vault=Production | jq --raw-output "${uuid_jq}")"
if [[ -z "${uuid_list}" ]]; then
echo 'ERROR: Unable to fetch items from Production vault'
exit 1
fi
fetch_secret() {
uuid="$1"
echo "Fetching ${uuid}"
item="$(op get item "${uuid}")"
if [[ -z "${item}" ]]; then
echo "ERROR: Unable to fetch uuid=${uuid} from 1password"
return
fi
title="$(echo "${item}" | jq --raw-output '.overview.title')"
secrets="$(echo "${item}" | jq --raw-output '.details.notesPlain')"
secret_file="${title#terraform-}"
if [[ -f "${secret_file}" ]]; then
echo "Updated ${secret_file}"
else
echo "New ${secret_file}"
fi
echo -e "${secrets}" >"${secret_file}"
}
for uuid in ${uuid_list}; do
fetch_secret "$uuid"
done
wait
#!/usr/bin/env bash
set -e
dir="$(cd "$(dirname "${0}")"; pwd)"
# shellcheck disable=SC2034
environment=$(basename "$PWD")
# shellcheck source=/dev/null
source "$dir/tf-version-check"
# shellcheck source=/dev/null
source "$dir/tf-set-env"
terraform init -backend-config "bucket=${STATE_S3_BUCKET}" -backend-config "key=${STATE_S3_KEY}" -backend-config "region=${STATE_S3_REGION}"
......@@ -2,36 +2,35 @@
_NORM="\033[0m"
_CYN="\033[0;36m"
_YELLOW="\033[0;33m"
# shellcheck disable=SC2034
_BRED="\033[1;31m"
if [[ -z ${environment:-""} ]]; then
echo "environment variable not set"
exit 1
echo "environment variable not set"
exit 1
fi
if [[ $environment == "common" ]]; then
echo "Invalid environment name 'common'"
exit 1
echo "Invalid environment name 'common'"
exit 1
fi
env_file="${dir:-.}/../private/env_vars/${environment}.env"
env_file_common="${dir:-.}/../private/env_vars/common.env"
if [[ -f "$env_file_common" ]]; then
# shellcheck source=/dev/null
source "$env_file_common"
# shellcheck source=/dev/null
source "$env_file_common"
fi
if [[ -f "$env_file" ]]; then
# shellcheck source=/dev/null
source "$env_file"
# shellcheck source=/dev/null
source "$env_file"
else
echo -e "${_CYN}WARNING: No TF env variables are set for environment \"$environment\"!${_NORM}"
echo -e "${_CYN}WARNING: No TF env variables are set for environment \"$environment\"!${_NORM}"
fi
vars_file="${dir:-.}/../private/env_vars/${environment}.tfvars"
if [[ -f "$vars_file" ]]; then
# shellcheck source=/dev/null
# shellcheck disable=SC2034
TF_EXTRA_OPTS="-var-file=$vars_file"
# shellcheck source=/dev/null
# shellcheck disable=SC2034
TF_EXTRA_OPTS="-var-file=$vars_file"
fi
......@@ -4,26 +4,33 @@ env_tf_version_file="${dir:-.}/../environments/${environment:-.}/.terraform-vers
repo_tf_version_file="${dir:-.}/../.terraform-version"
if [[ -n $TF_VERSION ]]; then
required_version="$TF_VERSION"
required_version="$TF_VERSION"
elif [[ -r "$env_tf_version_file" ]]; then
required_version=$(cat "$env_tf_version_file")
required_version=$(cat "$env_tf_version_file")
elif [[ -r "$repo_tf_version_file" ]]; then
required_version=$(cat "$repo_tf_version_file")
required_version=$(cat "$repo_tf_version_file")
else
echo -e "${_BRED}ERROR: ${_NORM}No suitable .terraform-version file found; exiting."
exit 1
echo -e "${_BRED}ERROR: ${_NORM}No suitable .terraform-version file found; exiting."
exit 1
fi
if [[ "$installed_version" != "$required_version" ]]; then
if [[ -x "$(which tfenv)" ]]; then
echo -e "${_YELLOW}!!! NOTE !!!${_NORM}"
echo "Setting terraform to use version ${required_version}."
echo "Update the `.terraform-version` file(s) to use a different version"
tfenv use "${required_version}"
else
echo -e "${_BRED}!!! WARNING !!!${_NORM}"
echo "You are using the wrong version ($installed_version) of Terraform."
echo "Please install version $required_version or proceed at your own risk, press enter to continue."
read -rp ""
if [[ -x "$(command -v tfenv)" ]]; then
echo -e "${_YELLOW}!!! NOTE !!!${_NORM}"
echo "Setting terraform to use version ${required_version}."
# shellcheck disable=SC2016
echo 'Update the `.terraform-version` file(s) to use a different version'
tfenv use "${required_version}"
installed_version=$(terraform -version | cut -d v -f 2 | head -1)
if [[ "$installed_version" != "$required_version" ]]; then
echo -e "${_BRED}!!! ERROR !!!"
echo -e '`tfenv use '"${required_version}"'` failed. Quitting.'"${_NORM}"
exit 1
fi
else
echo -e "${_BRED}!!! WARNING !!!${_NORM}"
echo "You are using the wrong version ($installed_version) of Terraform."
echo "Please install version $required_version or proceed at your own risk, press enter to continue."
read -rp ""
fi
fi
data "aws_iam_policy_document" "datateam-greenhouse-extract" {
statement {
sid = "1"
actions = [
"s3:ListBucket",
]
resources = [
aws_s3_bucket.datateam-greenhouse-extract.arn,
]
}
statement {
sid = "2"
actions = [
"s3:GetObject",
"s3:PutObject",
]
resources = [
aws_s3_bucket.datateam-greenhouse-extract.arn,
"${aws_s3_bucket.datateam-greenhouse-extract.arn}/*",
]
}
}
resource "aws_iam_user" "datateam-greenhouse-extract" {
name = "datateam-greenhouse-extract"
}
resource "aws_iam_policy" "datateam-greenhouse-extract" {
name = "datateam-greenhouse-extract"
path = "/"
policy = data.aws_iam_policy_document.datateam-greenhouse-extract.json
}
resource "aws_iam_user_policy_attachment" "datateam-greenhouse-extract" {
user = aws_iam_user.datateam-greenhouse-extract.name
policy_arn = aws_iam_policy.datateam-greenhouse-extract.arn
}
resource "aws_s3_bucket" "datateam-greenhouse-extract" {
bucket = "datateam-greenhouse-extract"
acl = "private"
}
// Configure remote state
terraform {
backend "s3" {}
backend "s3" {
bucket = "gitlab-com-infrastructure"
key = "terraform/aws-account/terraform.tfstate"
region = "us-east-1"
}
}
// Use credentials from environment or shared credentials file
provider "aws" {
region = "us-east-1"
version = "~> 1.41"
version = "~> 2.27"
}
module "cloudtrail" {
source = "git::https://github.com/cloudposse/terraform-aws-cloudtrail.git?ref=0.5.0"
source = "git::https://github.com/cloudposse/terraform-aws-cloudtrail.git?ref=0.9.0"
namespace = "gitlab"
stage = "aws"
name = "cloudtrail-default"
......@@ -18,7 +22,7 @@ module "cloudtrail" {
enable_log_file_validation = "true"
include_global_service_events = "true"
is_multi_region_trail = "true"
s3_bucket_name = "${module.cloudtrail_s3_bucket.bucket_id}"
s3_bucket_name = module.cloudtrail_s3_bucket.bucket_id
}
module "cloudtrail_s3_bucket" {
......@@ -28,3 +32,43 @@ module "cloudtrail_s3_bucket" {
name = "cloudtrail-default"
region = "us-east-1"
}
resource "aws_s3_bucket" "forum_backup" {
bucket = "gitlab-discourse-forum-backup"
acl = "private"
}
resource "aws_iam_user" "forum_backup" {
name = "forum-backup"
}
resource "aws_iam_access_key" "forum_backup" {
user = aws_iam_user.forum_backup.name
}
resource "aws_iam_policy" "write_forum_backup" {
name = "write-forum-backup"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": "s3:*",
"Resource": [
"arn:aws:s3:::${aws_s3_bucket.forum_backup.bucket}",
"arn:aws:s3:::${aws_s3_bucket.forum_backup.bucket}/*"
]
}
]
}
EOF
}
resource "aws_iam_user_policy_attachment" "forum_backup" {
user = aws_iam_user.forum_backup.name
policy_arn = aws_iam_policy.write_forum_backup.arn
}
terraform {
required_version = ">= 0.12"
}
# Terraform for the AWS SnowPlow Pipeline
This configuration uses the following AWS services to host SnowPlow. There
may be more in use, but these are the primary services.
1. EC2 (Auto Scaling Groups, Launch Configurations, ELB, Target Groups,
Security Groups)
1. Kinesis (Streams and Firehose)
1. Lambda
1. DynamoDB
1. IAM (Policies and Roles)
1. S3
1. VPC (Subnets, VPC, Internet Gateways, Routes, Routing Tables)
## Design Document
If you want to know more about the SnowPlow infrastructure, please consult the
[design document](https://about.gitlab.com/handbook/engineering/infrastructure/library/snowplow/).
## SnowPlow Installs and Configs
There are two types of SnowPlow nodes (Collectors and Enrichers) and they are
all configured and installed via user-data in the launch configurations.
## Kinesis Streams
Kinesis is how SnowPlow hands off data from collector to enricher to s3loader.
* snowplow-raw-good
* snowplow-raw-bad
* snowplow-enriched-good
* snowplow-enriched-bad
## Kinesis Firehose and Lambda
Kinesis Firehose takes events from a stream and applies a Lambda function
to the event, then write it into the S3 bucket.
## Lambda Function
Firehose uses a Lambda function to format events written to S3. The Lambda
function code is in the file ```lambda/lambda_function.py```. As of this
writing, all this function does is append a newline to the end of each event
before it is written to S3.
The AWS provider for Terraform requires a zip file of this code to update or
create the Lambda function. There is a data archive object in the config with
the name ```snowplow_lambda_event_formatter_archive``` that build the zip file
from the function python script. For now, the zip contains a single file (the
lambda_function.py file) with no directory structure.
If the hash of that file changes, terraform will try to update the function.
It's possible that the zip file hash changes, but no code changes were made.
This is ok to replace on the fly in the Lambda config.
## DynamoDB
The enricher and s3loader nodes use DynamoDB to track Kinesis state. Normally
these tables would be allocated by Terraform, but if the nodes themselves don't
create the tables, it did not seem to work properly. Therefore, access to the
tables is controlled by roles and policies, but the tables are managed by the
SnowPlow nodes that need them. If the table needs to be created, the nodes will
do that on their own.
* SnowplowEnrich-gitlab-us-east-1
* SnowplowS3Loader-gitlab-us-east-1
## Launch Config Changes and Production Instances
Updating the launch config will apply to new systems coming up in the
auto-scaling group. But existing EC2 instances won't be changed. You will
have to rotate them manually to have them replaced.
### SSL Certificate for Load Balancer
This is referenced as an ARN to the cert in AWS. We're not going to put the
private key in TF, so this will have to remain as an ARN reference.
from __future__ import print_function
import base64
print('Loading function')
def lambda_handler(event, context):
output = []
for record in event['records']:
print(record['recordId'])
payload = base64.b64decode(record['data'])
processed = payload + "\n"
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': base64.b64encode(processed)
}
output.append(output_record)
print('Successfully processed {} records.'.format(len(event['records'])))
return {'records': output}
This diff is collapsed.
#!/bin/bash
## This has, so far, been written to run on Amazon Linux 2 AMI.
## Install Java 1.8
yum -y install java-1.8.0-openjdk
## Set up user, group, and install location
groupadd snowplow
adduser --system --gid snowplow snowplow
mkdir -p /snowplow
mkdir -p /snowplow/config
mkdir -p /snowplow/logs
## Install SnowPlow Kinesis Collector
mkdir -p /tmp/build
cd /tmp/build
wget -q http://dl.bintray.com/snowplow/snowplow-generic/snowplow_scala_stream_collector_kinesis_0.15.0.zip
unzip -d /snowplow/bin snowplow_scala_stream_collector_kinesis_0.15.0.zip
cd /tmp
rm -rf /tmp/build
## Need to copy in a config
cat > /snowplow/config/collector.hocon <<EOF
collector {
interface = "0.0.0.0"
port = "8000"
production = true
p3p {
policyRef = "/w3c/p3p.xml"
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
}
crossDomain {
enabled = true
domains = [ "*" ]
secure = true
}
cookie {
enabled = false
expiration = "365 days"
name = "snwplw"
domain = "gitlab.sinter-collect.com"
}
doNotTrackCookie {
enabled = false
name = "COLLECTOR_DO_NOT_TRACK_COOKIE_NAME"
value = "COLLECTOR_DO_NOT_TRACK_COOKIE_VALUE"
}
cookieBounce {
enabled = false
name = "n3pc"
fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000"
forwardedProtocolHeader = "X-Forwarded-Proto"
}
redirectMacro {
enabled = false
}
cors {
accessControlMaxAge = 5 seconds
}
rootResponse {
enabled = false
statusCode = 302
}
prometheusMetrics {
enabled = false
}
streams {
good = "snowplow-raw-good"
bad = "snowplow-raw-bad"
useIpAddressAsPartitionKey = true
sink {
enabled = kinesis
region = "us-east-1"
threadPoolSize = 10
aws {
accessKey = iam
secretKey = iam
}
backoffPolicy {
minBackoff = 10
maxBackoff = 300000
}
}
buffer {
byteLimit = 16384
recordLimit = 1000
timeLimit = 10000
}
}
}
akka {
loglevel = OFF
loggers = ["akka.event.slf4j.Slf4jLogger"]
http.server {
remote-address-header = on
raw-request-uri-header = on
parsing {
max-uri-length = 32768
uri-parsing-mode = relaxed
}
}
}
EOF
chown -R snowplow:snowplow /snowplow
## Star the collector service
su snowplow -g snowplow -c 'nohup /usr/bin/java -jar /snowplow/bin/snowplow-stream-collector-kinesis-0.15.0.jar --config /snowplow/config/collector.hocon > /snowplow/logs/out.log 2>&1 &'
#!/bin/bash
## This has, so far, been written to run on Amazon Linux 2 AMI.
## Install Java 1.8
yum -y install java-1.8.0-openjdk
## Set up user, group, and install location
groupadd snowplow
adduser --system --gid snowplow snowplow
mkdir -p /snowplow
mkdir -p /snowplow/config
mkdir -p /snowplow/enrichments
mkdir -p /snowplow/logs
## Install SnowPlow Kinesis Collector
mkdir -p /tmp/build
cd /tmp/build
wget -q http://dl.bintray.com/snowplow/snowplow-generic/snowplow_stream_enrich_kinesis_0.21.0.zip
unzip -d /snowplow/bin snowplow_stream_enrich_kinesis_0.21.0.zip
cd /tmp
rm -rf /tmp/build
## We need an IGLU Resolver config
cat > /snowplow/config/iglu_resolver.json <<EOJ
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "Iglu Central",
"priority": 0,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://iglucentral.com"
}
}
},
{
"name": "Iglu Central - GCP Mirror",
"priority": 1,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://mirror01.iglucentral.com"
}
}
}
]
}
}
EOJ
## IP Lookup Encrichment
cat > /snowplow/enrichments/ip_lookups.json <<EOR
{
"schema": "iglu:com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0",
"data": {
"name": "ip_lookups",
"vendor": "com.snowplowanalytics.snowplow",
"enabled": true,
"parameters": {
"geo": {
"database": "GeoLite2-City.mmdb",
"uri": "http://snowplow-hosted-assets.s3.amazonaws.com/third-party/maxmind"
}
}
}
}
EOR
## user_agent_utils_config Enrichment
cat > /snowplow/enrichments/user_agent_utils_config.json <<EOA
{
"schema": "iglu:com.snowplowanalytics.snowplow/user_agent_utils_config/jsonschema/1-0-0",
"data": {
"vendor": "com.snowplowanalytics.snowplow",
"name": "user_agent_utils_config",
"enabled": true,
"parameters": {}
}
}
EOA
## Need to copy in a config
cat > /snowplow/config/enricher.hocon <<EOF
enrich {
production = true
streams {
in {
raw = "snowplow-raw-good"
}
out {
enriched = "snowplow-enriched-good"
bad = "snowplow-enriched-bad"
partitionKey = "user_ipaddress"
}
sourceSink {
enabled = kinesis
aws {
accessKey = iam
secretKey = iam
}
region = "us-east-1"
maxRecords = 10000
initialPosition = TRIM_HORIZON
backoffPolicy {
minBackoff = 10
maxBackoff = 300000
}
}
buffer {
byteLimit = 16384
recordLimit = 1000
timeLimit = 10000
}
appName = "SnowplowEnrich-gitlab-us-east-1"
}
}
EOF
chown -R snowplow:snowplow /snowplow
## Star the collector service
su snowplow -g snowplow -c 'nohup /usr/bin/java -jar /snowplow/bin/snowplow-stream-enrich-kinesis-0.21.0.jar --config /snowplow/config/enricher.hocon --enrichments file:/snowplow/enrichments --resolver file:/snowplow/config/iglu_resolver.json > /snowplow/logs/out.log 2>&1 &'
{
"Version" : "2012-10-17",
"Id" : "Policy1560181228695",
"Statement" : [
{
"Sid" : "Stmt1560181207940",
"Effect" : "Allow",
"Principal" : {
"AWS" : "arn:aws:iam::855262394183:user/datateam-snowplow-ro"
},
"Action" : "s3:ListBucket",
"Resource" : "arn:aws:s3:::gitlab-com-snowplow-events"
},
{
"Sid" : "Stmt1560181227007",
"Effect" : "Allow",
"Principal" : {
"AWS" : "arn:aws:iam::855262394183:user/datateam-snowplow-ro"
},
"Action" : "s3:GetObject",
"Resource" : "arn:aws:s3:::gitlab-com-snowplow-events/*"
}
]
}
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Action": [
"cloudwatch:PutMetricData"
],
"Resource": [
"*"
]
},
{
"Sid": "",
"Effect": "Allow",
"Action": [
"kinesis:*"
],
"Resource": [
"*"
]
}
]
}
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Action": [
"cloudwatch:PutMetricData"
],
"Resource": [
"*"
]
},
{
"Sid": "",
"Effect": "Allow",
"Action": [
"dynamodb:*"
],
"Resource": [
"arn:aws:dynamodb:us-east-1:855262394183:table/SnowplowEnrich-gitlab-us-east-1"
]
},
{
"Sid": "",
"Effect": "Allow",
"Action": [
"kinesis:*"
],
"Resource": [
"*"
]
}
]
}
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Action": [
"glue:GetTableVersions"
],
"Resource": "*"
},
{
"Sid": "",
"Effect": "Allow",
"Action": [
"s3:AbortMultipartUpload",
"s3:GetBucketLocation",
"s3:GetObject",
"s3:ListBucket",
"s3:ListBucketMultipartUploads",
"s3:PutObject"
],
"Resource": [
"arn:aws:s3:::gitlab-com-snowplow-events",
"arn:aws:s3:::gitlab-com-snowplow-events/*",
"arn:aws:s3:::%FIREHOSE_BUCKET_NAME%",
"arn:aws:s3:::%FIREHOSE_BUCKET_NAME%/*"
]
},
{
"Sid": "",
"Effect": "Allow",
"Action": [
"lambda:InvokeFunction",
"lambda:GetFunctionConfiguration"
],
"Resource": "arn:aws:lambda:us-east-1:855262394183:function:SnowPlowFirehoseFormatter:$LATEST"
},
{
"Sid": "",
"Effect": "Allow",
"Action": [
"logs:PutLogEvents"
],
"Resource": [
"arn:aws:logs:us-east-1:855262394183:log-group:/aws/kinesisfirehose/SnowPlowEnrichedBad:log-stream:*"
]
},