aboutsummaryrefslogtreecommitdiffstats
path: root/import_issues.py
diff options
context:
space:
mode:
Diffstat (limited to 'import_issues.py')
-rwxr-xr-ximport_issues.py595
1 files changed, 595 insertions, 0 deletions
diff --git a/import_issues.py b/import_issues.py
new file mode 100755
index 0000000..245cb55
--- /dev/null
+++ b/import_issues.py
@@ -0,0 +1,595 @@
+#!/usr/bin/env python3
+
+# Copyright 2024 Bryan Gardiner <bog@khumba.net>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+
+# Imports issues from an exported Gitlab project into a Sourcehut tracker.
+#
+# Reads *.ndjson files from an exported Gitlab project, and recreates tickets
+# and their histories in a new, empty Sourcehut tracker. Preserves notes
+# attached to each issue; generally this includes comments, status changes,
+# labels, milestones, and anything else that is included in as a plain text
+# note, but definitely doesn't include all available metadata.
+#
+# Tickets are created in sr.ht via SMTP, so a working mail setup is required.
+# Surely using Sourcehut's API would be better.
+#
+# There are a few of big caveats:
+#
+# 1. If all issue IDs from 1 to the max ID are available in your export, and the
+# tracker you import into is a new tracker, then your Gitlab and Sourcehut issue
+# IDs will match up one-to-one. If not, then all bets are off. The ticket
+# descriptions in Sourcehut will indicate the original Gitlab ID, but no
+# rewriting of text is done, so issue references will be incorrect. By default,
+# the script tries to avoid this by checking that all IDs from 1 to the number
+# of issues are in the export, and aborting if not. To override this, pass
+# --allow-missing-issues.
+#
+# 2. Because emails are used to create tickets, we also assume that emails are
+# processed in the order that they are sent, so that tickets don't get created
+# out of order. This program has no way of knowing if that happens, however,
+# there is a configurable delay between sending each email, for this reason.
+#
+# 3. Gitlab project exports are missing some crucial information, in particular
+# they don't include ticket author names or label IDs. For best results,
+# appropriate mappings for your project can be filled in manually in the LABELS
+# and USERS dicts below, if desired, or these features can be disabled. See the
+# documentation for these variables.
+#
+# 4. The projects I have tested this on are small, and don't make use of many of
+# Gitlab's features. This may bork on more complex projects.
+
+
+# Still here? Here's how to use this:
+#
+# First take an export of your Gitlab project from its settings area, then
+# extract the archive. The important files are tree/project/*.ndjson.
+#
+# Let's generate a report of all the emails that would be sent. Preview the
+# output to make sure things look right, and ensure that the command completes
+# without error:
+#
+# ./import_issues.py \
+# --srht-owner=MY_SRHT_USER \
+# --srht-tracker=MY_SRHT_TRACKER \
+# --gitlab-project-url=https://gitlab.com/ME/PROJECT/ \
+# --from='Moi <me@email.com>' \
+# .../gitlab-export/tree/project \
+# >issue-emails.txt
+#
+# You may get errors if you are missing label or user mappings, and you haven't
+# disabled these; see the LABELS and USERS variables below.
+#
+# If this file looks correct, then you can proceed with sending emails.
+# Double-check that your tracket is empty to start with, then rerun the command
+# with "--mode=send" and with your SMTP parameters. SMTP options can be
+# specified either via parameters --smtp-{host,port,user,password} or the
+# equivalent SMTP_{HOST,PORT,USER,PASSWORD} environment variables. Pass
+# --smtp-ssl to enable SSL. Also by default there is a five-second delay
+# between sending emails, that you may wish to change with --smtp-delay.
+#
+# ./import_issues.py \
+# --srht-owner=MY_SRHT_USER \
+# --srht-tracker=MY_SRHT_TRACKER \
+# --gitlab-project-url=https://gitlab.com/ME/PROJECT/ \
+# --from='Moi <me@email.com>' \
+# --smtp-host=SMTP_HOSTNAME \
+# --smtp-ssl \
+# --smtp-user=SMTP_USERNAME \
+# --smtp-password=SMTP_PASSWORD \
+# .../gitlab-export/tree/project
+
+
+import argparse
+import json
+import os
+import re
+import smtplib
+import time
+from email.message import EmailMessage
+from email.utils import format_datetime, make_msgid
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional
+
+
+# Mapping from label IDs to names for the project. This info is unfortunately
+# not included in the Gitlab project export, and it's needed to transform raw
+# label IDs into label names in issue notes.
+#
+# Any missing labels that are referenced from issues will cause an exception to
+# be thrown. Run with --mode=print first to make sure no labels are missing,
+# before using --mode=send.
+#
+# Alternatively, set this to None to disable translation of label IDs to names.
+LABELS: Optional[Dict[int, str]] = {
+ # 123456: "Bug",
+ # 232323: "Feature",
+ # ...
+}
+
+
+# Mapping from user IDs to strings to use for their names when recording who
+# created each ticket. Gitlab exports user full names (but not necessarily
+# IDs) names (but not necessarily IDs) for each note on an issue, but for the
+# creator of an issue, only exports the user ID, no name.
+#
+# Any missing users that created issues will cause an exception to be thrown.
+# Run with --mode=print first to make sure no users are missing, before using
+# --mode=send.
+#
+# Alternatively, set this to None to disable recording issue creators.
+#
+# TODO Might be able to automatically map note.events.author_id to note.author.name.
+USERS: Optional[Dict[int, str]] = {
+ # 1234000: "John Joe (@jdoe)",
+ # ...
+}
+
+
+email_count = 0
+issue_count = 0
+
+
+def do_mail(
+ *,
+ smtp,
+ smtp_delay: float,
+ mode: str,
+ frm: str,
+ to: str,
+ body: str,
+ subject: Optional[str] = None,
+):
+ global email_count
+ email_count += 1
+ print(f"---- #{email_count}")
+
+ date = format_datetime(datetime.utcnow())
+ msg_id = make_msgid()
+
+ if mode == "print":
+ print(f"From: {frm}")
+ print(f"To: {to}")
+ print(f"Date: {date}")
+ if subject:
+ print(f"Subject: {subject}")
+ print(f"Message-ID: {msg_id}")
+ print()
+ print(body)
+
+ elif mode == "send":
+ msg = EmailMessage()
+ msg.set_content(body)
+ msg["From"] = frm
+ msg["To"] = to
+ msg["Date"] = date
+ if subject:
+ msg["Subject"] = subject
+
+ # Message-ID is required, unless you want this error message from the
+ # sr.ht mail server:
+ #
+ # 500 Error: (AttributeError) 'NoneType' object has no attribute
+ # 'removeprefix' (in reply to end of DATA command)
+ msg["Message-ID"] = msg_id
+
+ smtp.send_message(msg)
+
+ time.sleep(smtp_delay)
+
+ else:
+ raise RuntimeError(f"Unknown mode: {mode!r}")
+
+
+def open_ticket(
+ *,
+ smtp,
+ smtp_delay: float,
+ mode: str,
+ srht_owner: str,
+ srht_tracker: str,
+ frm: str,
+ title: str,
+ body: str,
+ created_by: Optional[str],
+ created_at: str,
+ closed_at: Optional[str],
+ is_closed: bool,
+ label_names: List[str],
+ milestone_name: Optional[str],
+ gitlab_ticket_url: str,
+) -> int:
+ global issue_count
+
+ lines = []
+ pheaders = []
+
+ pheaders.append(f"Migrated from: {gitlab_ticket_url}")
+
+ if created_by:
+ pheaders.append(f"Created by: {created_by}")
+ pheaders.append(f"Created at: {created_at}")
+
+ if closed_at is not None:
+ pheaders.append(f"Closed at: {closed_at}")
+ elif is_closed:
+ pheaders.append("State: closed")
+
+ if milestone_name:
+ pheaders.append(f"Milestone: {milestone_name}")
+
+ if label_names:
+ pheaders.append("Labels: " + ", ".join(sorted(label_names)))
+
+ lines.append(" \\\n".join(pheaders))
+ lines.append("")
+ lines.append(body)
+
+ do_mail(
+ smtp=smtp,
+ smtp_delay=smtp_delay,
+ mode=mode,
+ frm=frm,
+ to=f"~{srht_owner}/{srht_tracker}@todo.sr.ht",
+ subject=title,
+ body="\n".join(lines),
+ )
+
+ issue_count += 1
+ return issue_count
+
+
+def send_comment(
+ *,
+ smtp,
+ smtp_delay: float,
+ mode: str,
+ srht_owner: str,
+ srht_tracker: str,
+ frm: str,
+ issue_id: int,
+ body: str,
+ author_name: str,
+ created_at: str,
+ last_edited_at: str,
+ is_system: bool,
+):
+ lines = []
+
+ if is_system:
+ # (Skipping pseudoheaders array here, only have one.)
+ lines.append(f"Changed at: {created_at}")
+ else:
+ lines.append(f"On {created_at}, {author_name} wrote:")
+
+ lines.append("")
+ lines.append(body)
+
+ if last_edited_at and last_edited_at != created_at:
+ lines.append("")
+ lines.append(f"(Last edited at {last_edited_at}.)")
+
+ do_mail(
+ smtp=smtp,
+ smtp_delay=smtp_delay,
+ mode=mode,
+ frm=frm,
+ to=f"~{srht_owner}/{srht_tracker}/{issue_id}@todo.sr.ht",
+ body="\n".join(lines),
+ )
+
+
+def close_ticket(
+ *,
+ smtp,
+ smtp_delay: float,
+ mode: str,
+ srht_owner: str,
+ srht_tracker: str,
+ frm: str,
+ issue_id: int,
+ closed_at: Optional[str],
+ is_closed: bool,
+):
+ lines = []
+
+ if closed_at is not None:
+ # (Skipping pseudoheaders array here, only have one.)
+ lines.append(f"Closed at: {closed_at}")
+ elif is_closed:
+ lines.append("Ticket closed.")
+
+ lines.append("")
+ lines.append("!resolve fixed")
+
+ do_mail(
+ smtp=smtp,
+ smtp_delay=smtp_delay,
+ mode=mode,
+ frm=frm,
+ to=f"~{srht_owner}/{srht_tracker}/{issue_id}@todo.sr.ht",
+ body="\n".join(lines),
+ )
+
+
+def run(
+ *,
+ smtp,
+ smtp_delay: float,
+ mode: str,
+ srht_owner: str,
+ srht_tracker: str,
+ frm: str,
+ export_dir_path: Path,
+ gitlab_project_url: str,
+ allow_missing_issues: bool,
+):
+ milestone_jsons = []
+ with open(export_dir_path / 'milestones.ndjson') as milestones_file:
+ for line in milestones_file:
+ milestone_jsons.append(json.loads(line))
+
+ milestone_ids_to_titles = {}
+ for milestone_json in milestone_jsons:
+ milestone_ids_to_titles[milestone_json['iid']] = milestone_json['title']
+
+ issue_jsons = []
+ with open(export_dir_path / 'issues.ndjson') as issues_file:
+ for line in issues_file:
+ issue_jsons.append(json.loads(line))
+
+ issue_jsons.sort(key=lambda x: x['iid'])
+ if not allow_missing_issues:
+ assert [x['iid'] for x in issue_jsons] == list(range(1, len(issue_jsons) + 1)), \
+ f"Don't have all issues from 1 to {len(issue_jsons)}, cannot proceed."
+
+ for issue_json in issue_jsons:
+ issue_json['notes'].sort(key=lambda x: x['created_at'])
+
+ print("-------- CREATING TICKETS")
+
+ issue_id_map: Dict[int, int] = {}
+
+ for issue_json in issue_jsons:
+ gitlab_issue_id = issue_json['iid']
+ author_id = issue_json['author_id']
+ created_by: Optional[str]
+ if USERS is None:
+ created_by = None
+ else:
+ assert author_id in USERS, \
+ f"Unknown author #{author_id} of ticket #{gitlab_issue_id}, please add to USERS."
+ created_by = USERS[author_id]
+
+ srht_issue_id = open_ticket(
+ smtp=smtp,
+ smtp_delay=smtp_delay,
+ mode=mode,
+ srht_owner=srht_owner,
+ srht_tracker=srht_tracker,
+ frm=frm,
+ title=issue_json['title'],
+ body=issue_json['description'],
+ created_by=created_by,
+ created_at=issue_json['created_at'],
+ closed_at=issue_json['closed_at'],
+ is_closed=(issue_json['state'] == 'closed'),
+ label_names=[x['label']['title'] for x in issue_json['label_links']],
+ milestone_name=issue_json.get('milestone', {}).get('title') or None,
+ gitlab_ticket_url=f"{gitlab_project_url}/-/issues/{gitlab_issue_id}",
+ )
+
+ if not allow_missing_issues:
+ assert srht_issue_id == gitlab_issue_id, \
+ f"Internal error, srht_issue_id {srht_issue_id} != " \
+ f"gitlab_issue_id {gitlab_issue_id}."
+
+ issue_id_map[gitlab_issue_id] = srht_issue_id
+
+ print("-------- CREATING COMMENTS")
+
+ for issue_json in issue_jsons:
+ for note_json in issue_json['notes']:
+ system_action = note_json.get('system_note_metadata', {}).get('action', None)
+
+ body = note_json['note']
+
+ # The "Removed" part is a guess here, don't know if that actually shows up.
+ if LABELS is not None and (
+ system_action == 'label' or re.search(r'^(Added|Removed) ~[0-9]+ label', body)
+ ):
+ def expand_label(ref):
+ ref_num = int(ref.group(1))
+ assert ref_num in LABELS, \
+ f"Unknown label #{ref_num}, please add to LABELS."
+ return LABELS[ref_num]
+
+ body = re.sub(r'~([0-9]+)', expand_label, body)
+
+ if system_action == 'milestone' or re.search(r'^Milestone changed to %[0-9]+$', body):
+ def expand_milestone(ref):
+ ref_num = int(ref.group(1))
+ assert ref_num in milestone_ids_to_titles, \
+ f"Unknown milestone #{ref_num}."
+ return milestone_ids_to_titles[ref_num]
+
+ body = re.sub(r'%([0-9]+)', expand_milestone, body)
+
+ send_comment(
+ smtp=smtp,
+ smtp_delay=smtp_delay,
+ mode=mode,
+ srht_owner=srht_owner,
+ srht_tracker=srht_tracker,
+ frm=frm,
+ issue_id=issue_id_map[issue_json['iid']],
+ body=body,
+ author_name=note_json['author']['name'],
+ created_at=note_json['created_at'],
+ last_edited_at=note_json['last_edited_at'],
+ is_system=note_json['system'],
+ )
+
+ print("-------- CLOSING CLOSED ISSUES")
+
+ for issue_json in issue_jsons:
+ if issue_json['state'] == 'closed':
+ close_ticket(
+ smtp=smtp,
+ smtp_delay=smtp_delay,
+ mode=mode,
+ srht_owner=srht_owner,
+ srht_tracker=srht_tracker,
+ frm=frm,
+ issue_id=issue_id_map[issue_json['iid']],
+ closed_at=issue_json['closed_at'],
+ is_closed=(issue_json['state'] == 'closed'),
+ )
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ prog='import_issues.py',
+ description='Import Gitlab issues into Sourcehut via SMTP.',
+ )
+
+ parser.add_argument(
+ '--srht-owner',
+ required=True,
+ help='Owner of the Sorucehut tracker.',
+ )
+
+ parser.add_argument(
+ '--srht-tracker',
+ required=True,
+ help='Name of Sourcehut tracker to submit to.',
+ )
+
+ parser.add_argument(
+ '--gitlab-project-url',
+ required=True,
+ help="The base URL the project on Gitlab.",
+ )
+
+ parser.add_argument(
+ '--mode',
+ default='print',
+ help="Action to take, 'print' or 'send'.",
+ )
+
+ parser.add_argument(
+ '--from',
+ help="From address if mode is 'send'.",
+ )
+
+ parser.add_argument(
+ '--smtp-host',
+ help="SMTP host to use.",
+ )
+
+ parser.add_argument(
+ '--smtp-port',
+ default=None,
+ help="SMTP port to use.",
+ )
+
+ parser.add_argument(
+ '--smtp-ssl',
+ action='store_true',
+ help="Use SMTP over SSL.",
+ )
+
+ parser.add_argument(
+ '--smtp-user',
+ help="SMTP username.",
+ )
+
+ parser.add_argument(
+ '--smtp-password',
+ help="SMTP password.",
+ )
+
+ parser.add_argument(
+ '--smtp-delay',
+ default=5,
+ help="Decimal number of seconds to wait after sending each email.",
+ )
+
+ parser.add_argument(
+ '--allow-missing-issues',
+ action='store_true',
+ help="Don't abort if there are missing issue IDs in the export.",
+ )
+
+ parser.add_argument(
+ 'export_dir',
+ help='Exported Gitlab tree/project/ directory containing ndjson files.',
+ )
+
+ args = vars(parser.parse_args())
+
+ export_dir = args['export_dir']
+ assert export_dir, f"Must have a exported project directory."
+ export_dir_path = Path(export_dir)
+ assert export_dir_path.is_dir(), \
+ f"Project directory is not a directory: {export_dir_path}"
+
+ mode = args['mode']
+ frm = args['from']
+
+ if mode == 'print':
+ smtp = None
+ elif mode == 'send':
+ smtp_ssl = args['smtp_ssl']
+ smtp_host = args['smtp_host'] or os.environ.get('SMTP_HOST', 'localhost')
+ smtp_port = args['smtp_port'] or os.environ.get('SMTP_PORT', 465 if smtp_ssl else 25)
+ smtp_user = args['smtp_user'] or os.environ.get('SMTP_USER', None)
+ smtp_password = args['smtp_password'] or os.environ.get('SMTP_PASSWORD', None)
+
+ assert smtp_user, f"No SMTP user given."
+ assert smtp_password, f"No SMTP password given."
+
+ print(f"Connecting to {smtp_host}:{smtp_port}, user {smtp_user!r}.")
+
+ if smtp_ssl:
+ smtp = smtplib.SMTP_SSL(host=smtp_host, port=smtp_port)
+ else:
+ smtp = smtplib.SMTP(host=smtp_host, port=smtp_port)
+
+ # If SMTP isn't working: smtp.set_debuglevel(2)
+
+ if smtp_user:
+ smtp.login(smtp_user, smtp_password)
+
+ run(
+ smtp=smtp,
+ smtp_delay=float(args['smtp_delay']),
+ mode=mode,
+ srht_owner=args['srht_owner'],
+ srht_tracker=args['srht_tracker'],
+ frm=frm,
+ export_dir_path=export_dir_path,
+ gitlab_project_url=args['gitlab_project_url'].rstrip('/'),
+ allow_missing_issues=args['allow_missing_issues'],
+ )
+
+ if mode == 'send':
+ smtp.quit()
+
+
+if __name__ == '__main__':
+ main()