aboutsummaryrefslogblamecommitdiffstats
path: root/insert_criminal_records.py
blob: f4b761be5f54ffeecf943d769000f2a0f6ace991 (plain) (tree)

































































































                                                                                                        



                                        
                       



                                  

                                                                                                                                                                                                                                         
import sqlite3
import xml.etree.ElementTree as ET
import os
from update_db import get_primary_sql_key

def insert_criminal_records(DB_name):
    print("Inserting the criminal register information")
    conn = sqlite3.connect(DB_name)
    c = conn.cursor()
    file_address = os.path.join(str(os.getcwd()), "data", "criminal_records.xml")
    tree = ET.parse(file_address)
    root = tree.getroot()
    for child in root:
        process_individual_extracts(child, c)
    conn.commit()
    conn.close()
    return
    
def process_individual_extracts(extract, c):
    ICO = get_ICO(extract)
    if ICO == -1:
        return
    else:
        court_records = find_court_records(extract)
        relevant_paragraphs = find_relevant_paragraphs(extract)
        penalties = find_penalties(extract)
        primary_sql_key = get_primary_sql_key(c, ICO)
        if primary_sql_key != 0:
            insert_crimnal_data_to_DB(c, primary_sql_key, court_records, relevant_paragraphs, penalties)
    return

def get_ICO(extract):
    person_entry = extract[0][0][2]
    temp_ico = person_entry.text
    if temp_ico.isnumeric() == True:
        return int(temp_ico)
    else:
        return -1

def find_court_records(extract):
    # TODO --- Make sure that the records are extracted correctly based on the tags
    court_records = []
    court_records_rider = extract[1][0][0]
    for child in court_records_rider:
        if "spisZnacka" in child.tag:
            court_records.append(child.text)
        if "organizace" in child.tag:
            court_records.append(child.text)
        if "odvolaci" in child.tag:
            for sub_child in child:
                if "spisZnacka" in sub_child.tag:
                    court_records.append(sub_child.text)
                if "organizace" in sub_child.tag:
                    court_records.append(sub_child.text)
    return court_records

def find_relevant_paragraphs(extract):
    paragraphs = []
    paragraphs_rider = extract[1][0][1]
    for individual_paragraph in paragraphs_rider:
        paragraphs.append(extract_paragraph_info(individual_paragraph))
    return paragraphs

def extract_paragraph_info(individual_paragraph):
    text_description = "§"
    for child in individual_paragraph[0]:
        if "Cislo" in child.tag:
            text_description += child.text
        if "Pismeno" in child.tag:
            text_description += ", odst. "
            text_description += child.text
        if "zakon" in child.tag:
            temp_law_description = child.text
            # temp_law_description[0].tolower()
            text_description += ", "
            text_description += temp_law_description
    return text_description

def find_penalties(extract):
    paragraphs_rider = extract[1][0]
    for child in paragraphs_rider:
        if "tresty" in child.tag:
            return extract_penalties_info(child)

def extract_penalties_info(extract):
    penalties = []
    for child in extract:
        if "druh" in child[0].tag:
            penalties.append(child[0].text)
    return penalties

def insert_crimnal_data_to_DB(c, primary_sql_key, court_records, relevant_paragraphs, penalties):
    first_instance = court_records[1] + ", sp. zn. " + court_records[0]
    if len(court_records) > 2:
        second_instance = court_records[3] + ", sp. zn. " + court_records[2]
    else:
        second_instance = None
    text_paragraphs = ""
    if relevant_paragraphs != None:
        for elem in relevant_paragraphs:
            text_paragraphs += elem
            text_paragraphs += ", "
    text_penalties = ""
    if penalties != None:
        for elem in penalties:
            text_penalties += elem
            text_penalties += ", "
    c.execute("INSERT INTO criminal_records (company_id, first_instance, second_instance, paragraphs, penalties) VALUES (?, ?, ?, ?, ?)", (primary_sql_key, first_instance, second_instance, text_paragraphs[:-2], text_penalties[:-2],))
    return 0