Skip to content

Commit 1bc508a

Browse files
authored
Merge pull request #154 from boegel/ingestion_staging_buckets
update ingestion script to support multiple S3 buckets
2 parents 77435b1 + ddef0c7 commit 1bc508a

File tree

3 files changed

+22
-22
lines changed

3 files changed

+22
-22
lines changed

scripts/automated_ingestion/automated_ingestion.cfg.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ ingestion_script = /absolute/path/to/ingest-tarball.sh
1010
metadata_file_extension = .meta.txt
1111

1212
[aws]
13-
staging_bucket = eessi-staging
13+
staging_buckets = eessi-staging, eessi-staging-2023.06
1414

1515
[cvmfs]
1616
ingest_as_root = yes

scripts/automated_ingestion/automated_ingestion.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
REQUIRED_CONFIG = {
1818
'secrets': ['aws_secret_access_key', 'aws_access_key_id', 'github_pat'],
1919
'paths': ['download_dir', 'ingestion_script', 'metadata_file_extension'],
20-
'aws': ['staging_bucket'],
20+
'aws': ['staging_buckets'],
2121
'github': ['staging_repo', 'failed_ingestion_issue_body', 'pr_body'],
2222
}
2323

@@ -39,10 +39,9 @@ def error(msg, code=1):
3939
def find_tarballs(s3, bucket, extension='.tar.gz', metadata_extension='.meta.txt'):
4040
"""Return a list of all tarballs in an S3 bucket that have a metadata file with the given extension (and same filename)."""
4141
# TODO: list_objects_v2 only returns up to 1000 objects
42-
files = [
43-
object['Key']
44-
for object in s3.list_objects_v2(Bucket=bucket)['Contents']
45-
]
42+
s3_objects = s3.list_objects_v2(Bucket=bucket).get('Contents', [])
43+
files = [obj['Key'] for obj in s3_objects]
44+
4645
tarballs = [
4746
file
4847
for file in files
@@ -100,15 +99,16 @@ def main():
10099
aws_secret_access_key=config['secrets']['aws_secret_access_key'],
101100
)
102101

103-
tarballs = find_tarballs(s3, config['aws']['staging_bucket'])
104-
if args.list_only:
105-
for num, tarball in enumerate(tarballs):
106-
print(f'{num}: {tarball}')
107-
sys.exit(0)
108-
109-
for tarball in tarballs:
110-
tar = EessiTarball(tarball, config, gh, s3)
111-
tar.run_handler()
102+
buckets = [x.strip() for x in config['aws']['staging_buckets'].split(',')]
103+
for bucket in buckets:
104+
tarballs = find_tarballs(s3, bucket)
105+
if args.list_only:
106+
for num, tarball in enumerate(tarballs):
107+
print(f'{num}: {tarball}')
108+
else:
109+
for tarball in tarballs:
110+
tar = EessiTarball(tarball, config, gh, s3, bucket)
111+
tar.run_handler()
112112

113113

114114
if __name__ == '__main__':

scripts/automated_ingestion/eessitarball.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,18 @@ class EessiTarball:
1919
for which it interfaces with the S3 bucket, GitHub, and CVMFS.
2020
"""
2121

22-
def __init__(self, object_name, config, github, s3):
22+
def __init__(self, object_name, config, github, s3, bucket):
2323
"""Initialize the tarball object."""
2424
self.config = config
2525
self.github = github
2626
self.git_repo = github.get_repo(config['github']['staging_repo'])
2727
self.metadata_file = object_name + config['paths']['metadata_file_extension']
2828
self.object = object_name
2929
self.s3 = s3
30+
self.bucket = bucket
3031
self.local_path = os.path.join(config['paths']['download_dir'], os.path.basename(object_name))
3132
self.local_metadata_path = self.local_path + config['paths']['metadata_file_extension']
32-
self.url = f'https://{config["aws"]["staging_bucket"]}.s3.amazonaws.com/{object_name}'
33+
self.url = f'https://{bucket}.s3.amazonaws.com/{object_name}'
3334

3435
self.states = {
3536
'new': {'handler': self.mark_new_tarball_as_staged, 'next_state': 'staged'},
@@ -47,21 +48,20 @@ def download(self, force=False):
4748
"""
4849
Download this tarball and its corresponding metadata file, if this hasn't been already done.
4950
"""
50-
bucket = self.config['aws']['staging_bucket']
5151
if force or not os.path.exists(self.local_path):
5252
try:
53-
self.s3.download_file(bucket, self.object, self.local_path)
53+
self.s3.download_file(self.bucket, self.object, self.local_path)
5454
except:
5555
logging.error(
56-
f'Failed to download tarball {self.object} from {bucket} to {self.local_path}.'
56+
f'Failed to download tarball {self.object} from {self.bucket} to {self.local_path}.'
5757
)
5858
self.local_path = None
5959
if force or not os.path.exists(self.local_metadata_path):
6060
try:
61-
self.s3.download_file(bucket, self.metadata_file, self.local_metadata_path)
61+
self.s3.download_file(self.bucket, self.metadata_file, self.local_metadata_path)
6262
except:
6363
logging.error(
64-
f'Failed to download metadata file {self.metadata_file} from {bucket} to {self.local_metadata_path}.'
64+
f'Failed to download metadata file {self.metadata_file} from {self.bucket} to {self.local_metadata_path}.'
6565
)
6666
self.local_metadata_path = None
6767

0 commit comments

Comments
 (0)