diff options
Diffstat (limited to 'import_issues.py')
-rwxr-xr-x | import_issues.py | 595 |
1 files changed, 595 insertions, 0 deletions
diff --git a/import_issues.py b/import_issues.py new file mode 100755 index 0000000..245cb55 --- /dev/null +++ b/import_issues.py @@ -0,0 +1,595 @@ +#!/usr/bin/env python3 + +# Copyright 2024 Bryan Gardiner <bog@khumba.net> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + + +# Imports issues from an exported Gitlab project into a Sourcehut tracker. +# +# Reads *.ndjson files from an exported Gitlab project, and recreates tickets +# and their histories in a new, empty Sourcehut tracker. Preserves notes +# attached to each issue; generally this includes comments, status changes, +# labels, milestones, and anything else that is included in as a plain text +# note, but definitely doesn't include all available metadata. +# +# Tickets are created in sr.ht via SMTP, so a working mail setup is required. +# Surely using Sourcehut's API would be better. +# +# There are a few of big caveats: +# +# 1. If all issue IDs from 1 to the max ID are available in your export, and the +# tracker you import into is a new tracker, then your Gitlab and Sourcehut issue +# IDs will match up one-to-one. If not, then all bets are off. The ticket +# descriptions in Sourcehut will indicate the original Gitlab ID, but no +# rewriting of text is done, so issue references will be incorrect. By default, +# the script tries to avoid this by checking that all IDs from 1 to the number +# of issues are in the export, and aborting if not. To override this, pass +# --allow-missing-issues. +# +# 2. Because emails are used to create tickets, we also assume that emails are +# processed in the order that they are sent, so that tickets don't get created +# out of order. This program has no way of knowing if that happens, however, +# there is a configurable delay between sending each email, for this reason. +# +# 3. Gitlab project exports are missing some crucial information, in particular +# they don't include ticket author names or label IDs. For best results, +# appropriate mappings for your project can be filled in manually in the LABELS +# and USERS dicts below, if desired, or these features can be disabled. See the +# documentation for these variables. +# +# 4. The projects I have tested this on are small, and don't make use of many of +# Gitlab's features. This may bork on more complex projects. + + +# Still here? Here's how to use this: +# +# First take an export of your Gitlab project from its settings area, then +# extract the archive. The important files are tree/project/*.ndjson. +# +# Let's generate a report of all the emails that would be sent. Preview the +# output to make sure things look right, and ensure that the command completes +# without error: +# +# ./import_issues.py \ +# --srht-owner=MY_SRHT_USER \ +# --srht-tracker=MY_SRHT_TRACKER \ +# --gitlab-project-url=https://gitlab.com/ME/PROJECT/ \ +# --from='Moi <me@email.com>' \ +# .../gitlab-export/tree/project \ +# >issue-emails.txt +# +# You may get errors if you are missing label or user mappings, and you haven't +# disabled these; see the LABELS and USERS variables below. +# +# If this file looks correct, then you can proceed with sending emails. +# Double-check that your tracket is empty to start with, then rerun the command +# with "--mode=send" and with your SMTP parameters. SMTP options can be +# specified either via parameters --smtp-{host,port,user,password} or the +# equivalent SMTP_{HOST,PORT,USER,PASSWORD} environment variables. Pass +# --smtp-ssl to enable SSL. Also by default there is a five-second delay +# between sending emails, that you may wish to change with --smtp-delay. +# +# ./import_issues.py \ +# --srht-owner=MY_SRHT_USER \ +# --srht-tracker=MY_SRHT_TRACKER \ +# --gitlab-project-url=https://gitlab.com/ME/PROJECT/ \ +# --from='Moi <me@email.com>' \ +# --smtp-host=SMTP_HOSTNAME \ +# --smtp-ssl \ +# --smtp-user=SMTP_USERNAME \ +# --smtp-password=SMTP_PASSWORD \ +# .../gitlab-export/tree/project + + +import argparse +import json +import os +import re +import smtplib +import time +from email.message import EmailMessage +from email.utils import format_datetime, make_msgid +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional + + +# Mapping from label IDs to names for the project. This info is unfortunately +# not included in the Gitlab project export, and it's needed to transform raw +# label IDs into label names in issue notes. +# +# Any missing labels that are referenced from issues will cause an exception to +# be thrown. Run with --mode=print first to make sure no labels are missing, +# before using --mode=send. +# +# Alternatively, set this to None to disable translation of label IDs to names. +LABELS: Optional[Dict[int, str]] = { + # 123456: "Bug", + # 232323: "Feature", + # ... +} + + +# Mapping from user IDs to strings to use for their names when recording who +# created each ticket. Gitlab exports user full names (but not necessarily +# IDs) names (but not necessarily IDs) for each note on an issue, but for the +# creator of an issue, only exports the user ID, no name. +# +# Any missing users that created issues will cause an exception to be thrown. +# Run with --mode=print first to make sure no users are missing, before using +# --mode=send. +# +# Alternatively, set this to None to disable recording issue creators. +# +# TODO Might be able to automatically map note.events.author_id to note.author.name. +USERS: Optional[Dict[int, str]] = { + # 1234000: "John Joe (@jdoe)", + # ... +} + + +email_count = 0 +issue_count = 0 + + +def do_mail( + *, + smtp, + smtp_delay: float, + mode: str, + frm: str, + to: str, + body: str, + subject: Optional[str] = None, +): + global email_count + email_count += 1 + print(f"---- #{email_count}") + + date = format_datetime(datetime.utcnow()) + msg_id = make_msgid() + + if mode == "print": + print(f"From: {frm}") + print(f"To: {to}") + print(f"Date: {date}") + if subject: + print(f"Subject: {subject}") + print(f"Message-ID: {msg_id}") + print() + print(body) + + elif mode == "send": + msg = EmailMessage() + msg.set_content(body) + msg["From"] = frm + msg["To"] = to + msg["Date"] = date + if subject: + msg["Subject"] = subject + + # Message-ID is required, unless you want this error message from the + # sr.ht mail server: + # + # 500 Error: (AttributeError) 'NoneType' object has no attribute + # 'removeprefix' (in reply to end of DATA command) + msg["Message-ID"] = msg_id + + smtp.send_message(msg) + + time.sleep(smtp_delay) + + else: + raise RuntimeError(f"Unknown mode: {mode!r}") + + +def open_ticket( + *, + smtp, + smtp_delay: float, + mode: str, + srht_owner: str, + srht_tracker: str, + frm: str, + title: str, + body: str, + created_by: Optional[str], + created_at: str, + closed_at: Optional[str], + is_closed: bool, + label_names: List[str], + milestone_name: Optional[str], + gitlab_ticket_url: str, +) -> int: + global issue_count + + lines = [] + pheaders = [] + + pheaders.append(f"Migrated from: {gitlab_ticket_url}") + + if created_by: + pheaders.append(f"Created by: {created_by}") + pheaders.append(f"Created at: {created_at}") + + if closed_at is not None: + pheaders.append(f"Closed at: {closed_at}") + elif is_closed: + pheaders.append("State: closed") + + if milestone_name: + pheaders.append(f"Milestone: {milestone_name}") + + if label_names: + pheaders.append("Labels: " + ", ".join(sorted(label_names))) + + lines.append(" \\\n".join(pheaders)) + lines.append("") + lines.append(body) + + do_mail( + smtp=smtp, + smtp_delay=smtp_delay, + mode=mode, + frm=frm, + to=f"~{srht_owner}/{srht_tracker}@todo.sr.ht", + subject=title, + body="\n".join(lines), + ) + + issue_count += 1 + return issue_count + + +def send_comment( + *, + smtp, + smtp_delay: float, + mode: str, + srht_owner: str, + srht_tracker: str, + frm: str, + issue_id: int, + body: str, + author_name: str, + created_at: str, + last_edited_at: str, + is_system: bool, +): + lines = [] + + if is_system: + # (Skipping pseudoheaders array here, only have one.) + lines.append(f"Changed at: {created_at}") + else: + lines.append(f"On {created_at}, {author_name} wrote:") + + lines.append("") + lines.append(body) + + if last_edited_at and last_edited_at != created_at: + lines.append("") + lines.append(f"(Last edited at {last_edited_at}.)") + + do_mail( + smtp=smtp, + smtp_delay=smtp_delay, + mode=mode, + frm=frm, + to=f"~{srht_owner}/{srht_tracker}/{issue_id}@todo.sr.ht", + body="\n".join(lines), + ) + + +def close_ticket( + *, + smtp, + smtp_delay: float, + mode: str, + srht_owner: str, + srht_tracker: str, + frm: str, + issue_id: int, + closed_at: Optional[str], + is_closed: bool, +): + lines = [] + + if closed_at is not None: + # (Skipping pseudoheaders array here, only have one.) + lines.append(f"Closed at: {closed_at}") + elif is_closed: + lines.append("Ticket closed.") + + lines.append("") + lines.append("!resolve fixed") + + do_mail( + smtp=smtp, + smtp_delay=smtp_delay, + mode=mode, + frm=frm, + to=f"~{srht_owner}/{srht_tracker}/{issue_id}@todo.sr.ht", + body="\n".join(lines), + ) + + +def run( + *, + smtp, + smtp_delay: float, + mode: str, + srht_owner: str, + srht_tracker: str, + frm: str, + export_dir_path: Path, + gitlab_project_url: str, + allow_missing_issues: bool, +): + milestone_jsons = [] + with open(export_dir_path / 'milestones.ndjson') as milestones_file: + for line in milestones_file: + milestone_jsons.append(json.loads(line)) + + milestone_ids_to_titles = {} + for milestone_json in milestone_jsons: + milestone_ids_to_titles[milestone_json['iid']] = milestone_json['title'] + + issue_jsons = [] + with open(export_dir_path / 'issues.ndjson') as issues_file: + for line in issues_file: + issue_jsons.append(json.loads(line)) + + issue_jsons.sort(key=lambda x: x['iid']) + if not allow_missing_issues: + assert [x['iid'] for x in issue_jsons] == list(range(1, len(issue_jsons) + 1)), \ + f"Don't have all issues from 1 to {len(issue_jsons)}, cannot proceed." + + for issue_json in issue_jsons: + issue_json['notes'].sort(key=lambda x: x['created_at']) + + print("-------- CREATING TICKETS") + + issue_id_map: Dict[int, int] = {} + + for issue_json in issue_jsons: + gitlab_issue_id = issue_json['iid'] + author_id = issue_json['author_id'] + created_by: Optional[str] + if USERS is None: + created_by = None + else: + assert author_id in USERS, \ + f"Unknown author #{author_id} of ticket #{gitlab_issue_id}, please add to USERS." + created_by = USERS[author_id] + + srht_issue_id = open_ticket( + smtp=smtp, + smtp_delay=smtp_delay, + mode=mode, + srht_owner=srht_owner, + srht_tracker=srht_tracker, + frm=frm, + title=issue_json['title'], + body=issue_json['description'], + created_by=created_by, + created_at=issue_json['created_at'], + closed_at=issue_json['closed_at'], + is_closed=(issue_json['state'] == 'closed'), + label_names=[x['label']['title'] for x in issue_json['label_links']], + milestone_name=issue_json.get('milestone', {}).get('title') or None, + gitlab_ticket_url=f"{gitlab_project_url}/-/issues/{gitlab_issue_id}", + ) + + if not allow_missing_issues: + assert srht_issue_id == gitlab_issue_id, \ + f"Internal error, srht_issue_id {srht_issue_id} != " \ + f"gitlab_issue_id {gitlab_issue_id}." + + issue_id_map[gitlab_issue_id] = srht_issue_id + + print("-------- CREATING COMMENTS") + + for issue_json in issue_jsons: + for note_json in issue_json['notes']: + system_action = note_json.get('system_note_metadata', {}).get('action', None) + + body = note_json['note'] + + # The "Removed" part is a guess here, don't know if that actually shows up. + if LABELS is not None and ( + system_action == 'label' or re.search(r'^(Added|Removed) ~[0-9]+ label', body) + ): + def expand_label(ref): + ref_num = int(ref.group(1)) + assert ref_num in LABELS, \ + f"Unknown label #{ref_num}, please add to LABELS." + return LABELS[ref_num] + + body = re.sub(r'~([0-9]+)', expand_label, body) + + if system_action == 'milestone' or re.search(r'^Milestone changed to %[0-9]+$', body): + def expand_milestone(ref): + ref_num = int(ref.group(1)) + assert ref_num in milestone_ids_to_titles, \ + f"Unknown milestone #{ref_num}." + return milestone_ids_to_titles[ref_num] + + body = re.sub(r'%([0-9]+)', expand_milestone, body) + + send_comment( + smtp=smtp, + smtp_delay=smtp_delay, + mode=mode, + srht_owner=srht_owner, + srht_tracker=srht_tracker, + frm=frm, + issue_id=issue_id_map[issue_json['iid']], + body=body, + author_name=note_json['author']['name'], + created_at=note_json['created_at'], + last_edited_at=note_json['last_edited_at'], + is_system=note_json['system'], + ) + + print("-------- CLOSING CLOSED ISSUES") + + for issue_json in issue_jsons: + if issue_json['state'] == 'closed': + close_ticket( + smtp=smtp, + smtp_delay=smtp_delay, + mode=mode, + srht_owner=srht_owner, + srht_tracker=srht_tracker, + frm=frm, + issue_id=issue_id_map[issue_json['iid']], + closed_at=issue_json['closed_at'], + is_closed=(issue_json['state'] == 'closed'), + ) + + +def main(): + parser = argparse.ArgumentParser( + prog='import_issues.py', + description='Import Gitlab issues into Sourcehut via SMTP.', + ) + + parser.add_argument( + '--srht-owner', + required=True, + help='Owner of the Sorucehut tracker.', + ) + + parser.add_argument( + '--srht-tracker', + required=True, + help='Name of Sourcehut tracker to submit to.', + ) + + parser.add_argument( + '--gitlab-project-url', + required=True, + help="The base URL the project on Gitlab.", + ) + + parser.add_argument( + '--mode', + default='print', + help="Action to take, 'print' or 'send'.", + ) + + parser.add_argument( + '--from', + help="From address if mode is 'send'.", + ) + + parser.add_argument( + '--smtp-host', + help="SMTP host to use.", + ) + + parser.add_argument( + '--smtp-port', + default=None, + help="SMTP port to use.", + ) + + parser.add_argument( + '--smtp-ssl', + action='store_true', + help="Use SMTP over SSL.", + ) + + parser.add_argument( + '--smtp-user', + help="SMTP username.", + ) + + parser.add_argument( + '--smtp-password', + help="SMTP password.", + ) + + parser.add_argument( + '--smtp-delay', + default=5, + help="Decimal number of seconds to wait after sending each email.", + ) + + parser.add_argument( + '--allow-missing-issues', + action='store_true', + help="Don't abort if there are missing issue IDs in the export.", + ) + + parser.add_argument( + 'export_dir', + help='Exported Gitlab tree/project/ directory containing ndjson files.', + ) + + args = vars(parser.parse_args()) + + export_dir = args['export_dir'] + assert export_dir, f"Must have a exported project directory." + export_dir_path = Path(export_dir) + assert export_dir_path.is_dir(), \ + f"Project directory is not a directory: {export_dir_path}" + + mode = args['mode'] + frm = args['from'] + + if mode == 'print': + smtp = None + elif mode == 'send': + smtp_ssl = args['smtp_ssl'] + smtp_host = args['smtp_host'] or os.environ.get('SMTP_HOST', 'localhost') + smtp_port = args['smtp_port'] or os.environ.get('SMTP_PORT', 465 if smtp_ssl else 25) + smtp_user = args['smtp_user'] or os.environ.get('SMTP_USER', None) + smtp_password = args['smtp_password'] or os.environ.get('SMTP_PASSWORD', None) + + assert smtp_user, f"No SMTP user given." + assert smtp_password, f"No SMTP password given." + + print(f"Connecting to {smtp_host}:{smtp_port}, user {smtp_user!r}.") + + if smtp_ssl: + smtp = smtplib.SMTP_SSL(host=smtp_host, port=smtp_port) + else: + smtp = smtplib.SMTP(host=smtp_host, port=smtp_port) + + # If SMTP isn't working: smtp.set_debuglevel(2) + + if smtp_user: + smtp.login(smtp_user, smtp_password) + + run( + smtp=smtp, + smtp_delay=float(args['smtp_delay']), + mode=mode, + srht_owner=args['srht_owner'], + srht_tracker=args['srht_tracker'], + frm=frm, + export_dir_path=export_dir_path, + gitlab_project_url=args['gitlab_project_url'].rstrip('/'), + allow_missing_issues=args['allow_missing_issues'], + ) + + if mode == 'send': + smtp.quit() + + +if __name__ == '__main__': + main() |