From dda7ed7db820ec367af08469ddcc5f2432e43a08 Mon Sep 17 00:00:00 2001 From: Sheldan <5037282+Sheldan@users.noreply.github.com> Date: Sat, 13 Jul 2024 00:50:45 +0200 Subject: [PATCH] [SIS-xxx] making quote attachment URL column larger changing migration script for starboard/quotes --- .../migrations/1.4.57/collection.xml | 6 +++++ .../1.4.57/tables/quote_attachment.xml | 11 +++++++++ .../migrations/1.4.57/tables/tables.xml | 6 +++++ .../resources/migrations/quotes-changeLog.xml | 1 + .../quotes-starboard-import/main.py | 24 +++++++++++-------- .../quotes-starboard-import/post_loader.py | 2 +- .../quotes-starboard-import/quote_importer.py | 1 + .../starboard_loader.py | 21 ++++++++-------- 8 files changed, 51 insertions(+), 21 deletions(-) create mode 100644 application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/collection.xml create mode 100644 application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/tables/quote_attachment.xml create mode 100644 application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/tables/tables.xml diff --git a/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/collection.xml b/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/collection.xml new file mode 100644 index 00000000..bf192f5d --- /dev/null +++ b/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/collection.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/tables/quote_attachment.xml b/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/tables/quote_attachment.xml new file mode 100644 index 00000000..792eff35 --- /dev/null +++ b/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/tables/quote_attachment.xml @@ -0,0 +1,11 @@ + + + + + + + \ No newline at end of file diff --git a/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/tables/tables.xml b/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/tables/tables.xml new file mode 100644 index 00000000..43af58e7 --- /dev/null +++ b/application/sissi-modules/quotes/src/main/resources/migrations/1.4.57/tables/tables.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/application/sissi-modules/quotes/src/main/resources/migrations/quotes-changeLog.xml b/application/sissi-modules/quotes/src/main/resources/migrations/quotes-changeLog.xml index c516c477..8d94656a 100644 --- a/application/sissi-modules/quotes/src/main/resources/migrations/quotes-changeLog.xml +++ b/application/sissi-modules/quotes/src/main/resources/migrations/quotes-changeLog.xml @@ -4,4 +4,5 @@ xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog https://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.26.xsd" > + \ No newline at end of file diff --git a/python/tools/migrations/quotes-starboard-import/main.py b/python/tools/migrations/quotes-starboard-import/main.py index 9a6b0afc..821ce28d 100644 --- a/python/tools/migrations/quotes-starboard-import/main.py +++ b/python/tools/migrations/quotes-starboard-import/main.py @@ -14,13 +14,17 @@ engine = db.create_engine('postgresql://%s:%s@%s:%s/%s' % (db_user, db_password, with engine.connect() as con: posts = load_all_starboard_posts(con) - print(posts) - print(f'Loaded {len(posts)}') - enriched_posts = enrich_posts(posts) - print(f'Enriched posts') - import_quotes(enriched_posts, con) - print(f'Done storing quotes') - con.commit() - fix_quote_created(enriched_posts, con) - con.commit() - print('Done.') \ No newline at end of file + sub_posts = chunks = [posts[x:x+100] for x in range(0, len(posts), 100)] + print(f'Loaded {len(posts)} into {len(sub_posts)} partitions') + counter = 0 + for sub_post in sub_posts: + print(f'Partition size {len(sub_post)}') + enriched_posts = enrich_posts(sub_post) + print(f'Enriched posts') + import_quotes(enriched_posts, con) + print(f'Done storing quotes') + con.commit() + fix_quote_created(enriched_posts, con) + con.commit() + counter += 1 + print(f'Done. {counter}') \ No newline at end of file diff --git a/python/tools/migrations/quotes-starboard-import/post_loader.py b/python/tools/migrations/quotes-starboard-import/post_loader.py index ca41cd56..34df319e 100644 --- a/python/tools/migrations/quotes-starboard-import/post_loader.py +++ b/python/tools/migrations/quotes-starboard-import/post_loader.py @@ -12,7 +12,7 @@ def enrich_posts(posts): print(f"Loading post {post['message_id']}") url = f"https://discord.com/api/v10/channels/{post['channel_id']}/messages/{post['message_id']}" message = requests.get(url, headers={'Authorization': token}) - time.sleep(5) + time.sleep(0.5) if message.status_code == 200: message_obj = json.loads(message.content) post['content'] = message_obj['content'] diff --git a/python/tools/migrations/quotes-starboard-import/quote_importer.py b/python/tools/migrations/quotes-starboard-import/quote_importer.py index c429a894..77ae68a7 100644 --- a/python/tools/migrations/quotes-starboard-import/quote_importer.py +++ b/python/tools/migrations/quotes-starboard-import/quote_importer.py @@ -4,6 +4,7 @@ def import_quotes(posts, con): for post in posts: if 'content' not in post: print(f"Skipping {post['message_id']} because no content, did it fail?") + continue print(f"Inserting {post['message_id']}") statement = text("""INSERT INTO quote(author_user_in_server_id, adder_user_in_server_id, source_channel_id, server_id, message_id, text, created) diff --git a/python/tools/migrations/quotes-starboard-import/starboard_loader.py b/python/tools/migrations/quotes-starboard-import/starboard_loader.py index 4e6212b7..fff86d76 100644 --- a/python/tools/migrations/quotes-starboard-import/starboard_loader.py +++ b/python/tools/migrations/quotes-starboard-import/starboard_loader.py @@ -2,17 +2,18 @@ from sqlalchemy.sql import text def load_all_starboard_posts(conn): - squery = text("""select sp.id, sp.author_user_in_server_id, sp.source_channel_id, sp.server_id, sp.post_message_id, spr.reactor_user_in_server_id, sp.created + squery = text("""select distinct sp.id, sp.author_user_in_server_id, sp.source_channel_id, sp.server_id, sp.post_message_id, spr.reactor_user_in_server_id, sp.created from starboard_post sp -inner join starboard_post_reaction spr -on sp.id = spr.post_id -and spr.created = ( -select spr.created -from starboard_post_reaction spr2 -where spr2.post_id = sp.id -order by created limit 1 - ) - where sp.ignored = false + inner join starboard_post_reaction spr + on sp.id = spr.post_id + and spr.reactor_user_in_server_id = ( + select reactor_user_in_server_id + from starboard_post_reaction spr2 + where spr2.post_id = sp.id + order by created limit 1 + ) +where sp.ignored = false +and sp.post_message_id not in (select message_id from quote) """) rs = conn.execute(squery) found_posts = []