1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
from datetime import datetime
import os
import requests
import gzip
import shutil
import send2trash
from lxml import etree
def get_valid_filenames():
FILENAME = "justice_files.txt"
my_file = download_list_filenames()
save_file(my_file, FILENAME)
valid_files = get_files_list(FILENAME)
return valid_files
def download_list_filenames():
source = "https://dataor.justice.cz/api/3/action/package_list"
download = requests.get(source, stream = True)
try:
# print("Downloading file ", source)
download.raise_for_status()
except Exception as exc:
print("There was a problem: %s. Please check whether https://dataor.justice.cz is online. If not, try again later." % (exc))
return None
return download
def save_file(download, temp_file):
temp_file = open(temp_file, "wb")
for chunk in download.iter_content(1000):
temp_file.write(chunk)
temp_file.close()
return 0
def get_files_list(my_file):
f = open(my_file, "r")
valid_files = []
for line in f:
l1 = line[1:-2].split("[")[1].split(",")
for elem in l1:
if is_valid_file(elem[1:-1]) == True:
valid_files.append(elem[1:-1])
valid_files.sort()
return valid_files
def is_valid_file(tested_file):
if tested_file.split("-")[1] == "full" and tested_file.split("-")[3] == str(datetime.now().year):
return True
else:
return False
def download_data(filename):
source = "https://dataor.justice.cz/api/file/" + filename + ".xml.gz"
# temp_file = "D:\\Programovani\\Moje vymysly\\Justice\\data\\temp-" + filename
temp_file = os.path.join(str(os.getcwd()), "data", "temp-" + filename + ".xml.gz")
# temp_file = str(os.getcwd()) + "\\data\\temp-" + filename
downloaded_OR = downloadOR(source)
if downloaded_OR != None:
save_temp_file(downloaded_OR, temp_file)
unzip_file(filename, temp_file)
delete_archive(temp_file)
parse_check = parseOR(temp_file[:-3])
if parse_check == True:
update_main_file(filename + ".xml", temp_file[:-3])
# delete_archive(temp_file[:-3])
else:
delete_archive(temp_file)
return 0
def downloadOR(source):
download = requests.get(source, stream = True)
try:
print("Downloading file ", source)
download.raise_for_status()
except Exception as exc:
print("There was a problem: %s" % (exc))
return None
return download
def parseOR(download):
print("Parsing the file!")
try:
for event, element in etree.iterparse(download):
element.clear()
print("Parsing succsessful!")
except Exception as f:
print(f)
return False
return True
def save_temp_file(download, temp_file):
temp_file = open(temp_file, "wb")
for chunk in download.iter_content(1000000):
temp_file.write(chunk)
temp_file.close()
return 0
def update_main_file(filename, temp_file):
shutil.move(temp_file, os.path.join(str(os.getcwd()), "data", filename))
return 0
def delete_temp_file(temp_file):
temp_file = open(temp_file, "w")
temp_file.write("0")
temp_file.close()
return 0
def unzip_file(filename, temp_file):
with gzip.open(temp_file, 'rb') as f_in:
with open(os.path.join(str(os.getcwd()), "data", "temp-" + filename + ".xml"), "wb") as f_out:
# with open(str(os.getcwd()) + "\\data\\temp-" + filename, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
return 0
def delete_archive(file):
send2trash.send2trash(file)
return 0
def download_criminal_records():
source = "https://eservice-po.rejtr.justice.cz/public/odsouzeni_xml"
file_address = os.path.join(str(os.getcwd()), "data", "criminal_records.xml")
downloaded_criminal_extracts = downloadOR(source)
if downloaded_criminal_extracts != None:
save_temp_file(downloaded_criminal_extracts, file_address)
return 0
|