This repository has been archived by the owner on Oct 29, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Makefile.orig
148 lines (120 loc) · 6.17 KB
/
Makefile.orig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
CONFIG_FILE=$(shell if [ -z $$SMART_ARCHIVE_CONFIG ]; then echo default.cfg; else echo $$SMART_ARCHIVE_CONFIG; fi)
PROJECT=$(shell cat $(CONFIG_FILE) | grep PROJECT | cut -d '=' -f 2)
DATASET_NAME=$(shell cat $(CONFIG_FILE) | grep DATASET_NAME | cut -d '=' -f 2)
SCHEDULING_TOPIC=$(shell cat $(CONFIG_FILE) | grep SCHEDULING_TOPIC | cut -d '=' -f 2)
SCHEDULED_JOB_NAME=$(shell cat $(CONFIG_FILE) | grep SCHEDULED_JOB_NAME | cut -d '=' -f 2)
SCHEDULE_CRON=$(shell cat $(CONFIG_FILE) | grep SCHEDULE_CRON | cut -d '=' -f 2)
FUNCTION_NAME=$(shell cat $(CONFIG_FILE) | grep FUNCTION_NAME | cut -d '=' -f 2)
FUNCTION_MEMORY=$(shell cat $(CONFIG_FILE) | grep FUNCTION_MEMORY | cut -d '=' -f 2)
default: step_explain step_set_up_audit_logging step_set_up_bq_log_sink step_set_up_cloud_function step_set_up_cloud_scheduler
CHECK_CONTINUE = \
read -p "Continue? (Y/n) " continue; \
case "$$continue" in \
n|N ) echo "Stopping." && exit 1 ;; \
* ) echo -n ;; \
esac
MESSAGE = \
echo ========================================== ;\
echo $1 ;\
echo ========================================== ;
# Macro for a comma in arguments. This gets expanded after the arguments are parsed.
, := ,
reset:
@$(call MESSAGE, Erase all step markers and start from the beginning?)
@$(CHECK_CONTINUE)
rm -f step_*
step_explain:
@echo Using config file: $(CONFIG_FILE)
@echo ==========================================
@echo This Makefile will set up logging of GCS object access into a BigQuery dataset.
@echo
@echo As it works, it will create step_* files in this directory to save progress. If a step errors, you can fix the underlying issue and resume by running make. Run 'make reset' to start over.
@echo
@echo First, it will set up read audit logging on all GCS buckets in the active project.
@echo Next, it will set up a sink of those logs into BigQuery tables.
@echo ==========================================
@$(CHECK_CONTINUE)
@touch step_explain
step_set_up_audit_logging:
@$(call MESSAGE, First$(,) we will patch your project-level IAM policy to turn on DATA_READ admin logging for all storage.googleapis.com requests.)
@$(CHECK_CONTINUE)
# stash the iam policy
gcloud projects get-iam-policy $(PROJECT) --format json \
| jq '. | if has("auditConfigs") then . else . += {"auditConfigs":[]} end' \
> /tmp/projectiampolicy
@echo
# patch the iam policy
cat /tmp/projectiampolicy | jq '.auditConfigs += [{"service":"storage.googleapis.com","auditLogConfigs":[{"logType": "DATA_READ"},{"logType": "DATA_WRITE"}]}]' | jq '. + {"auditConfigs":.auditConfigs|unique}' > /tmp/projectiampolicy_patched
@echo
# set the iam policy to the patched one
gcloud projects set-iam-policy $(PROJECT) --format json /tmp/projectiampolicy_patched
@echo
@$(call MESSAGE, Success!)
@touch step_set_up_audit_logging
@$(call MESSAGE, Note: Your old and patched IAM policies are stored in /tmp/projectiampolicy*$(,) if you need them.)
step_set_up_bq_log_sink:
@$(call MESSAGE, Next$(,) we will set up a log sink to BigQuery.)
@$(CHECK_CONTINUE)
# make the dataset
bq --location=US mk --dataset $(PROJECT):$(DATASET_NAME)
@echo
# stash dataset info
bq show --format=prettyjson $(PROJECT):$(DATASET_NAME) > /tmp/dsinfo
@echo
# make the sink
gcloud logging sinks create \
test_sink 'bigquery.googleapis.com/projects/$(PROJECT)/datasets/$(DATASET_NAME)' \
--log-filter 'resource.type="gcs_bucket" (protoPayload.methodName="storage.objects.get" OR protoPayload.methodName="storage.objects.create")'
@echo
# stash sink info
gcloud logging sinks describe test_sink --format json > /tmp/sinkinfo
cat /tmp/sinkinfo | jq -r .writerIdentity | awk '{split($$0,arr,":"); print arr[2]}' > /tmp/logwriteridentity
@echo
# modify dataset access info
cat /tmp/dsinfo | jq '.access += [{"role":"WRITER","userByEmail":"'$$(cat /tmp/logwriteridentity)'"}]' | jq '. + {"access":.access|unique}' > /tmp/dsinfo_patched
@echo
# modify dataset with new access info
bq update --source /tmp/dsinfo_patched $(PROJECT):$(DATASET_NAME)
@echo
@$(call MESSAGE, Success!)
@rm /tmp/dsinfo /tmp/sinkinfo /tmp/logwriteridentity /tmp/dsinfo_patched
@touch step_set_up_bq_log_sink
step_set_up_cloud_function:
@$(call MESSAGE, Next$(,) we deploy a cloud function to evaluate objects for archive when it gets a scheduled message.)
@$(CHECK_CONTINUE)
@echo
# make topic
gcloud pubsub topics create $(SCHEDULING_TOPIC)
# deploy function
gcloud functions deploy $(FUNCTION_NAME) --entry-point=archive_cold_objects --runtime python37 --trigger-topic $(SCHEDULING_TOPIC) --timeout 540s --memory $(FUNCTION_MEMORY) --max-instances 1
@$(call MESSAGE, Success! Run make again to deploy new code or configuration.)
step_set_up_cloud_scheduler:
@$(call MESSAGE, Finally$(,) we will set up a cloud scheduler job to run the archive job periodically.)
@$(CHECK_CONTINUE)
@echo
# make scheduled job
gcloud scheduler jobs create pubsub $(SCHEDULED_JOB_NAME) --schedule=$(SCHEDULE_CRON) --topic=$(SCHEDULING_TOPIC) --message-body="Time to archive objects!"
@$(call MESSAGE, Success!)
@touch step_set_up_cloud_scheduler
teardown:
@$(call MESSAGE, This will remove the smart archiver and supporting resources.)
@$(CHECK_CONTINUE)
yes | gcloud scheduler jobs delete $(SCHEDULED_JOB_NAME)
yes | gcloud functions delete $(FUNCTION_NAME)
yes | gcloud pubsub topics delete $(SCHEDULING_TOPIC)
yes | gcloud logging sinks delete test_sink
yes | bq --location=US rm -r --dataset $(PROJECT):$(DATASET_NAME)
@$(call MESSAGE, If you continue$(,) audit logging for GCS will be turned off. Stop now if you use audit logging for other applications.)
@$(CHECK_CONTINUE)
# stash the iam policy
gcloud projects get-iam-policy $(PROJECT) --format json \
| jq '. | if has("auditConfigs") then . else . += {"auditConfigs":[]} end' \
> /tmp/projectiampolicy
@echo
# patch the iam policy
cat /tmp/projectiampolicy | jq '.auditConfigs -= [{"service":"storage.googleapis.com","auditLogConfigs":[{"logType": "DATA_READ"},{"logType": "DATA_WRITE"}]}]' | jq '. + {"auditConfigs":.auditConfigs|unique}' > /tmp/projectiampolicy_patched
@echo
# set the iam policy to the patched one
gcloud projects set-iam-policy $(PROJECT) --format json /tmp/projectiampolicy_patched
@echo
@$(call MESSAGE, Teardown complete.)