Commit 57f04888 authored by Cameron McFarland's avatar Cameron McFarland

Learned a better way to manage the user-info strings.

parent 2164d6eb
......@@ -42,11 +42,8 @@ Going to probably need an elastic IP for the snowplow endpoint. Revisit this!
IAM Policies and Roles:
Need roles/policies to allow proper access to the collectors, enrichers and s3 loaders.
ECS Tasks need a IAM role to allow access to things? Yes.
Making a bunch of policies is great, but they were for EC2, not ECS.
Collector:
I made two kinesis streams with 1 shard each to start with. snowplow-good and snowplow-bad
OMG it worked.
## Runbook Material
[ec2-user@ip-10-32-0-206 ~]$ curl http://localhost:8000/health
OK[ec2-user@ip-10-32-0-206 ~]$
......@@ -62,14 +59,22 @@ Testing an event: curl http://34.227.92.217:8000/i\?e\=pv
S3Loader:
"I realize now my folly: the app name needs to be different between the enricher and loader ergo the 2 dynamoDB tables were conflicting. Everything makes so much sense now…"
ECS:
Needs us to run EC2 instances to run docker on. WTF.
https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ECS_GetStarted_Fargate.html
EC2 Autoscaling Stuff:
Need three target groups. These define the health checks we need to use to determine if something is healthy.
Need three auto-scaling groups.
Logging:
-Dlog4j.configuration=file:/snowplow/bin/log4j.properties
```
log4j.rootLogger=INFO, FILE
log4j.appender.FILE=org.apache.log4j.FileAppender
log4j.appender.FILE.File=/snowplow/logs/log.out
log4j.appender.FILE.ImmediateFlush=true
log4j.appender.FILE.Threshold=debug
log4j.appender.FILE.Append=false
log4j.appender.FILE.layout=org.apache.log4j.PatternLayout
```
Last steps:
Are we using the right SSH key?
Did we clean up everything we made for testing?
#!/bin/bash
## This has, so far, been written to run on Amazon Linux 2 AMI.
## Install Java 1.8
yum -y install java-1.8.0-openjdk
## Set up user, group, and install location
groupadd snowplow
adduser --system --gid snowplow snowplow
mkdir -p /snowplow
mkdir -p /snowplow/config
## Install SnowPlow Kinesis Collector
mkdir -p /tmp/build
cd /tmp/build
wget -q http://dl.bintray.com/snowplow/snowplow-generic/snowplow_scala_stream_collector_kinesis_0.15.0.zip
unzip -d /snowplow/bin snowplow_scala_stream_collector_kinesis_0.15.0.zip
cd /tmp
rm -rf /tmp/build
## Need to copy in a config
cat > /snowplow/config/collector.hocon <<EOF
collector {
interface = "0.0.0.0"
port = "8000"
production = true
p3p {
policyRef = "/w3c/p3p.xml"
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
}
crossDomain {
enabled = true
domains = [ "*" ]
secure = true
}
cookie {
enabled = false
expiration = "365 days"
name = "snwplw"
domain = "gitlab.sinter-collect.com"
}
doNotTrackCookie {
enabled = false
name = "COLLECTOR_DO_NOT_TRACK_COOKIE_NAME"
value = "COLLECTOR_DO_NOT_TRACK_COOKIE_VALUE"
}
cookieBounce {
enabled = false
name = "n3pc"
fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000"
forwardedProtocolHeader = "X-Forwarded-Proto"
}
redirectMacro {
enabled = false
}
cors {
accessControlMaxAge = 5 seconds
}
rootResponse {
enabled = false
statusCode = 302
}
prometheusMetrics {
enabled = false
}
streams {
good = "snowplow-raw-good"
bad = "snowplow-raw-bad"
useIpAddressAsPartitionKey = true
sink {
enabled = kinesis
region = "us-east-1"
threadPoolSize = 10
aws {
accessKey = iam
secretKey = iam
}
backoffPolicy {
minBackoff = 10
maxBackoff = 300000
}
}
buffer {
byteLimit = 16384
recordLimit = 1000
timeLimit = 10000
}
}
}
akka {
loglevel = OFF
loggers = ["akka.event.slf4j.Slf4jLogger"]
http.server {
remote-address-header = on
raw-request-uri-header = on
parsing {
max-uri-length = 32768
uri-parsing-mode = relaxed
}
}
}
EOF
chown -R snowplow:snowplow /snowplow
## Star the collector service
su snowplow -g snowplow -c '/usr/bin/java -jar /snowplow/bin/snowplow-stream-collector-kinesis-0.15.0.jar --config /snowplow/config/collector.hocon'
#!/bin/bash
## This has, so far, been written to run on Amazon Linux 2 AMI.
## Install Java 1.8
yum -y install java-1.8.0-openjdk
## Set up user, group, and install location
groupadd snowplow
adduser --system --gid snowplow snowplow
mkdir -p /snowplow
mkdir -p /snowplow/config
## Install SnowPlow Kinesis Collector
mkdir -p /tmp/build
cd /tmp/build
wget -q http://dl.bintray.com/snowplow/snowplow-generic/snowplow_stream_enrich_kinesis_0.21.0.zip
unzip -d /snowplow/bin snowplow_stream_enrich_kinesis_0.21.0.zip
cd /tmp
rm -rf /tmp/build
## We need an IGLU Resolver config
cat > /snowplow/config/iglu_resolver.json <<EOJ
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "Iglu Central",
"priority": 0,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://iglucentral.com"
}
}
},
{
"name": "Iglu Central - GCP Mirror",
"priority": 1,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://mirror01.iglucentral.com"
}
}
}
]
}
}
EOJ
## Need to copy in a config
cat > /snowplow/config/enricher.hocon <<EOF
enrich {
production = true
streams {
in {
raw = "snowplow-raw-good"
}
out {
enriched = "snowplow-enriched-good"
bad = "snowplow-enriched-bad"
partitionKey = "user_ipaddress"
}
sourceSink {
enabled = kinesis
aws {
accessKey = iam
secretKey = iam
}
region = "us-east-1"
maxRecords = 10000
initialPosition = TRIM_HORIZON
backoffPolicy {
minBackoff = 10
maxBackoff = 300000
}
}
buffer {
byteLimit = 16384
recordLimit = 1000
timeLimit = 10000
}
appName = "SnowplowEnrich-gitlab-us-east-1"
}
}
EOF
chown -R snowplow:snowplow /snowplow
## Star the collector service
su snowplow -g snowplow -c '/usr/bin/java -jar /snowplow/bin/snowplow-stream-enrich-kinesis-0.21.0.jar --config /snowplow/config/enricher.hocon --resolver file:/snowplow/config/iglu_resolver.json'
......@@ -13,6 +13,19 @@ provider "aws" {
version = "~> 1.41"
}
// User Data Templates
data "template_file" "user_data_collector" {
template = "${file("${path.module}/collector-user-data.sh")}"
}
data "template_file" "user_data_enricher" {
template = "${file("${path.module}/enricher-user-data.sh")}"
}
data "template_file" "user_data_s3loader" {
template = "${file("${path.module}/s3loader-user-data.sh")}"
}
// Policies
resource "aws_iam_policy" "snowplow_collector_policy" {
description = "Policy the allows the collector to access other AWS services such as Kinesis."
......@@ -404,126 +417,7 @@ resource "aws_launch_configuration" "snowplow_collector_launch_config" {
"${aws_security_group.snowplow_security_group.id}",
]
user_data = <<EOUD
#!/bin/bash
## This has, so far, been written to run on Amazon Linux 2 AMI.
## Install Java 1.8
yum -y install java-1.8.0-openjdk
## Set up user, group, and install location
groupadd snowplow
adduser --system --gid snowplow snowplow
mkdir -p /snowplow
mkdir -p /snowplow/config
## Install SnowPlow Kinesis Collector
mkdir -p /tmp/build
cd /tmp/build
wget -q http://dl.bintray.com/snowplow/snowplow-generic/snowplow_scala_stream_collector_kinesis_0.15.0.zip
unzip -d /snowplow/bin snowplow_scala_stream_collector_kinesis_0.15.0.zip
cd /tmp
rm -rf /tmp/build
## Need to copy in a config
cat > /snowplow/config/collector.hocon <<EOF
collector {
interface = "0.0.0.0"
port = "8000"
production = true
p3p {
policyRef = "/w3c/p3p.xml"
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
}
crossDomain {
enabled = true
domains = [ "*" ]
secure = true
}
cookie {
enabled = false
expiration = "365 days"
name = "snwplw"
domain = "gitlab.sinter-collect.com"
}
doNotTrackCookie {
enabled = false
name = "COLLECTOR_DO_NOT_TRACK_COOKIE_NAME"
value = "COLLECTOR_DO_NOT_TRACK_COOKIE_VALUE"
}
cookieBounce {
enabled = false
name = "n3pc"
fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000"
forwardedProtocolHeader = "X-Forwarded-Proto"
}
redirectMacro {
enabled = false
}
cors {
accessControlMaxAge = 5 seconds
}
rootResponse {
enabled = false
statusCode = 302
}
prometheusMetrics {
enabled = false
}
streams {
good = "snowplow-raw-good"
bad = "snowplow-raw-bad"
useIpAddressAsPartitionKey = true
sink {
enabled = kinesis
region = "us-east-1"
threadPoolSize = 10
aws {
accessKey = iam
secretKey = iam
}
backoffPolicy {
minBackoff = 10
maxBackoff = 300000
}
}
buffer {
byteLimit = 16384
recordLimit = 1000
timeLimit = 10000
}
}
}
akka {
loglevel = OFF
loggers = ["akka.event.slf4j.Slf4jLogger"]
http.server {
remote-address-header = on
raw-request-uri-header = on
parsing {
max-uri-length = 32768
uri-parsing-mode = relaxed
}
}
}
EOF
chown -R snowplow:snowplow /snowplow
## Star the collector service
su snowplow -g snowplow -c '/usr/bin/java -jar /snowplow/bin/snowplow-stream-collector-kinesis-0.15.0.jar --config /snowplow/config/collector.hocon'
EOUD
user_data = "${data.template_file.user_data_collector.rendered}"
lifecycle {
create_before_destroy = true
......@@ -541,101 +435,7 @@ resource "aws_launch_configuration" "snowplow_enricher_launch_config" {
"${aws_security_group.snowplow_security_group.id}",
]
user_data = <<EOUD
#!/bin/bash
## This has, so far, been written to run on Amazon Linux 2 AMI.
## Install Java 1.8
yum -y install java-1.8.0-openjdk
## Set up user, group, and install location
groupadd snowplow
adduser --system --gid snowplow snowplow
mkdir -p /snowplow
mkdir -p /snowplow/config
## Install SnowPlow Kinesis Collector
mkdir -p /tmp/build
cd /tmp/build
wget -q http://dl.bintray.com/snowplow/snowplow-generic/snowplow_stream_enrich_kinesis_0.21.0.zip
unzip -d /snowplow/bin snowplow_stream_enrich_kinesis_0.21.0.zip
cd /tmp
rm -rf /tmp/build
## We need an IGLU Resolver config
cat > /snowplow/config/iglu_resolver.json <<EOJ
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "Iglu Central",
"priority": 0,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://iglucentral.com"
}
}
},
{
"name": "Iglu Central - GCP Mirror",
"priority": 1,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://mirror01.iglucentral.com"
}
}
}
]
}
}
EOJ
## Need to copy in a config
cat > /snowplow/config/enricher.hocon <<EOF
enrich {
production = true
streams {
in {
raw = "snowplow-raw-good"
}
out {
enriched = "snowplow-enriched-good"
bad = "snowplow-enriched-bad"
partitionKey = "user_ipaddress"
}
sourceSink {
enabled = kinesis
aws {
accessKey = iam
secretKey = iam
}
region = "us-east-1"
maxRecords = 10000
initialPosition = TRIM_HORIZON
backoffPolicy {
minBackoff = 10
maxBackoff = 300000
}
}
buffer {
byteLimit = 16384
recordLimit = 1000
timeLimit = 10000
}
appName = "SnowplowEnrich-gitlab-us-east-1"
}
}
EOF
chown -R snowplow:snowplow /snowplow
## Star the collector service
su snowplow -g snowplow -c '/usr/bin/java -jar /snowplow/bin/snowplow-stream-enrich-kinesis-0.21.0.jar --config /snowplow/config/enricher.hocon --resolver file:/snowplow/config/iglu_resolver.json'
EOUD
user_data = "${data.template_file.user_data_enricher.rendered}"
lifecycle {
create_before_destroy = true
......@@ -653,69 +453,7 @@ resource "aws_launch_configuration" "snowplow_s3loader_launch_config" {
"${aws_security_group.snowplow_security_group.id}",
]
user_data = <<EOUD
#!/bin/bash
## This has, so far, been written to run on Amazon Linux 2 AMI.
## Install Java 1.8
yum -y install java-1.8.0-openjdk lzop lzo-devel
## Set up user, group, and install location
groupadd snowplow
adduser --system --gid snowplow snowplow
mkdir -p /snowplow
mkdir -p /snowplow/config
## Install SnowPlow Kinesis Collector
mkdir -p /tmp/build
cd /tmp/build
wget -q http://dl.bintray.com/snowplow/snowplow-generic/snowplow_s3_loader_0.6.0.zip
unzip -d /snowplow/bin snowplow_s3_loader_0.6.0.zip
cd /tmp
rm -rf /tmp/build
## Need to copy in a config
cat > /snowplow/config/s3loader.hocon <<EOF
source = "kinesis"
sink = "kinesis"
aws {
accessKey = "iam"
secretKey = "iam"
}
nsq {
channelName = "noidea"
host = "snowplownsq.gitlab.com"
port = 8085
lookupPort = 8090
}
kinesis {
initialPosition = "LATEST"
maxRecords = 5
region = "us-east-1"
appName = "SnowplowS3Loader-gitlab-us-east-1"
}
streams {
inStreamName = "snowplow-enriched-good"
outStreamName = "snowplow-s3loader-bad"
buffer {
byteLimit = 16384
recordLimit = 1000
timeLimit = 10000
}
}
s3 {
region = "us-east-1"
bucket = "gitlab-com-snowplow-test-one"
format = "gzip"
maxTimeout = 120000000
}
EOF
chown -R snowplow:snowplow /snowplow
## Star the collector service
su snowplow -g snowplow -c '/usr/bin/java -jar /snowplow/bin/snowplow-s3-loader-0.6.0.jar --config /snowplow/config/s3loader.hocon'
EOUD
user_data = "${data.template_file.user_data_s3loader.rendered}"
lifecycle {
create_before_destroy = true
......
#!/bin/bash
## This has, so far, been written to run on Amazon Linux 2 AMI.
## Install Java 1.8
yum -y install java-1.8.0-openjdk lzop lzo-devel
## Set up user, group, and install location
groupadd snowplow
adduser --system --gid snowplow snowplow
mkdir -p /snowplow
mkdir -p /snowplow/config
## Install SnowPlow Kinesis Collector
mkdir -p /tmp/build
cd /tmp/build
wget -q http://dl.bintray.com/snowplow/snowplow-generic/snowplow_s3_loader_0.6.0.zip
unzip -d /snowplow/bin snowplow_s3_loader_0.6.0.zip
cd /tmp
rm -rf /tmp/build
## Need to copy in a config
cat > /snowplow/config/s3loader.hocon <<EOF
source = "kinesis"
sink = "kinesis"
aws {
accessKey = "iam"
secretKey = "iam"
}
nsq {
channelName = "noidea"
host = "snowplownsq.gitlab.com"
port = 8085
lookupPort = 8090
}
kinesis {
initialPosition = "LATEST"
maxRecords = 5
region = "us-east-1"
appName = "SnowplowS3Loader-gitlab-us-east-1"
}
streams {
inStreamName = "snowplow-enriched-good"
outStreamName = "snowplow-s3loader-bad"
buffer {
byteLimit = 16384
recordLimit = 1000
timeLimit = 10000
}
}
s3 {
region = "us-east-1"
bucket = "gitlab-com-snowplow-test-one"
format = "gzip"
maxTimeout = 120000000
}
EOF
chown -R snowplow:snowplow /snowplow
## Star the collector service
su snowplow -g snowplow -c '/usr/bin/java -jar /snowplow/bin/snowplow-s3-loader-0.6.0.jar --config /snowplow/config/s3loader.hocon'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment