[SIS-xxx] adding sync of starboard posts and quotes, including migration script

This commit is contained in:
Sheldan
2024-07-12 22:45:17 +02:00
parent 89d743987d
commit 7bc04a7906
21 changed files with 412 additions and 27 deletions

View File

@@ -0,0 +1,26 @@
import sqlalchemy as db
import os
from starboard_loader import load_all_starboard_posts
from post_loader import enrich_posts
from quote_importer import import_quotes, fix_quote_created
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_database = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASS')
engine = db.create_engine('postgresql://%s:%s@%s:%s/%s' % (db_user, db_password, db_host, db_port, db_database))
with engine.connect() as con:
posts = load_all_starboard_posts(con)
print(posts)
print(f'Loaded {len(posts)}')
enriched_posts = enrich_posts(posts)
print(f'Enriched posts')
import_quotes(enriched_posts, con)
print(f'Done storing quotes')
con.commit()
fix_quote_created(enriched_posts, con)
con.commit()
print('Done.')

View File

@@ -0,0 +1,32 @@
import requests
import os
import json
import time
token = os.getenv('TOKEN')
image_extension = ["jpg", "jpeg", "png", "gif", "webp", "tiff", "svg", "apng"]
def enrich_posts(posts):
for post in posts:
print(f"Loading post {post['message_id']}")
url = f"https://discord.com/api/v10/channels/{post['channel_id']}/messages/{post['message_id']}"
message = requests.get(url, headers={'Authorization': token})
time.sleep(5)
if message.status_code == 200:
message_obj = json.loads(message.content)
post['content'] = message_obj['content']
attachments = []
attachment_objs = message_obj['attachments']
if len(attachment_objs) > 0:
for attachment in attachment_objs:
extension = attachment['filename'][attachment['filename'].rfind('.') + 1]
attachment = {
'url': attachment['proxy_url'],
'is_image': extension.lower() in image_extension
}
attachments.append(attachment)
post['attachments'] = attachments
else:
print(f"{post['message_id']}: Didnt find post {url}: {message.status_code}")
return posts

View File

@@ -0,0 +1,25 @@
from sqlalchemy.sql import text
def import_quotes(posts, con):
for post in posts:
if 'content' not in post:
print(f"Skipping {post['message_id']} because no content, did it fail?")
print(f"Inserting {post['message_id']}")
statement = text("""INSERT INTO quote(author_user_in_server_id, adder_user_in_server_id, source_channel_id,
server_id, message_id, text, created)
VALUES(:author_id, :adder_id, :channel_id, :server_id, :message_id, :content, :created) returning id""")
quote_id = con.execute(statement, {'author_id': post['author_id'], 'adder_id': post['adder_id'], 'channel_id': post['channel_id'], 'server_id': post['server_id'],
'message_id': post['message_id'], 'content': post['content'], 'created': post['created']}).fetchone()[0]
print(f'Created quote {quote_id}')
for attachment in post['attachments']:
statement = text("""INSERT INTO quote_attachment(quote_id, server_id, url, is_image)
VALUES(:quote_id, :server_id, :url, :is_image)""")
con.execute(statement, {'quote_id': quote_id, 'server_id': post['server_id'], 'url': attachment['url'], 'is_image': attachment['is_image']})
post['quote_id'] = quote_id
# the insert trigger always updated created, we have to re-do it (will be changed, but not for now)
def fix_quote_created(posts, con):
for post in posts:
if 'quote_id' in post:
statement = text("""update quote set created = :created where id = :quote_id""")
con.execute(statement, {'created': post['created'], 'quote_id': post['quote_id']})

View File

@@ -0,0 +1,29 @@
from sqlalchemy.sql import text
def load_all_starboard_posts(conn):
squery = text("""select sp.id, sp.author_user_in_server_id, sp.source_channel_id, sp.server_id, sp.post_message_id, spr.reactor_user_in_server_id, sp.created
from starboard_post sp
inner join starboard_post_reaction spr
on sp.id = spr.post_id
and spr.created = (
select spr.created
from starboard_post_reaction spr2
where spr2.post_id = sp.id
order by created limit 1
)
where sp.ignored = false
""")
rs = conn.execute(squery)
found_posts = []
for post in rs:
found_posts.append({
'post_id': post[0],
'channel_id': post[2],
'message_id': post[4],
'adder_id': post[5],
'author_id': post[1],
'server_id': post[3],
'created': post[6]
})
return found_posts