aboutsummaryrefslogtreecommitdiffstats
path: root/download_files.py
diff options
context:
space:
mode:
Diffstat (limited to 'download_files.py')
-rw-r--r--download_files.py72
1 files changed, 57 insertions, 15 deletions
diff --git a/download_files.py b/download_files.py
index 16b9be3..ea433e7 100644
--- a/download_files.py
+++ b/download_files.py
@@ -6,28 +6,65 @@ import shutil
import send2trash
from lxml import etree
-def download_data(typy_po, soudy):
- rok = str(datetime.now().year)
- for osoba in typy_po:
- for soud in soudy:
- update_data(osoba + "-full-" + soud + "-" + rok + ".xml.gz")
-def update_data(filename):
- source = "https://dataor.justice.cz/api/file/" + filename
+def get_valid_filenames():
+ FILENAME = "justice_files.txt"
+ my_file = download_list_filenames()
+ save_file(my_file, FILENAME)
+ valid_files = get_files_list(FILENAME)
+ return valid_files
+
+def download_list_filenames():
+ source = "https://dataor.justice.cz/api/3/action/package_list"
+ download = requests.get(source, stream = True)
+ try:
+ # print("Downloading file ", source)
+ download.raise_for_status()
+ except Exception as exc:
+ print("There was a problem: %s" % (exc))
+ return None
+ return download
+
+def save_file(download, temp_file):
+ temp_file = open(temp_file, "wb")
+ for chunk in download.iter_content(1000):
+ temp_file.write(chunk)
+ temp_file.close()
+ return 0
+
+def get_files_list(my_file):
+ f = open(my_file, "r")
+ valid_files = []
+ for line in f:
+ l1 = line[1:-2].split("[")[1].split(",")
+ for elem in l1:
+ if is_valid_file(elem[1:-1]) == True:
+ valid_files.append(elem[1:-1])
+ return valid_files
+
+def is_valid_file(tested_file):
+ if tested_file.split("-")[1] == "full" and tested_file.split("-")[3] == str(datetime.now().year):
+ return True
+ else:
+ return False
+
+def download_data(filename):
+ source = "https://dataor.justice.cz/api/file/" + filename + ".xml.gz"
# temp_file = "D:\\Programovani\\Moje vymysly\\Justice\\data\\temp-" + filename
- temp_file = os.path.join(str(os.getcwd()), "data", "temp-" + filename)
+ temp_file = os.path.join(str(os.getcwd()), "data", "temp-" + filename + ".xml.gz")
# temp_file = str(os.getcwd()) + "\\data\\temp-" + filename
downloaded_OR = downloadOR(source)
if downloaded_OR != None:
save_temp_file(downloaded_OR, temp_file)
- unzip_file(filename[:-3], temp_file)
+ unzip_file(filename, temp_file)
delete_archive(temp_file)
parse_check = parseOR(temp_file[:-3])
if parse_check == True:
- update_main_file(filename[:-3], temp_file[:-3])
+ update_main_file(filename + ".xml", temp_file[:-3])
# delete_archive(temp_file[:-3])
else:
- delete_archive(temp_file[:-3])
+ delete_archive(temp_file)
+ return 0
def downloadOR(source):
download = requests.get(source, stream = True)
@@ -45,8 +82,8 @@ def parseOR(download):
for event, element in etree.iterparse(download):
element.clear()
print("Parsing succsessful!")
- except:
- print("Parsing failed!")
+ except Exception as f:
+ print(f)
return False
return True
@@ -55,20 +92,25 @@ def save_temp_file(download, temp_file):
for chunk in download.iter_content(1000000):
temp_file.write(chunk)
temp_file.close()
+ return 0
def update_main_file(filename, temp_file):
shutil.move(temp_file, os.path.join(str(os.getcwd()), "data", filename))
+ return 0
def delete_temp_file(temp_file):
temp_file = open(temp_file, "w")
temp_file.write("0")
temp_file.close()
+ return 0
def unzip_file(filename, temp_file):
with gzip.open(temp_file, 'rb') as f_in:
- with open(os.path.join(str(os.getcwd()), "data", "temp-" + filename), "wb") as f_out:
+ with open(os.path.join(str(os.getcwd()), "data", "temp-" + filename + ".xml"), "wb") as f_out:
# with open(str(os.getcwd()) + "\\data\\temp-" + filename, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
+ return 0
def delete_archive(file):
- send2trash.send2trash(file) \ No newline at end of file
+ send2trash.send2trash(file)
+ return 0 \ No newline at end of file