remove database connection, instead write back to csv file
This commit is contained in:
parent
d1229cad84
commit
b38af8dee5
@ -3,14 +3,12 @@ import argparse
|
|||||||
import csv
|
import csv
|
||||||
import secrets
|
import secrets
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import requests
|
import requests
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
import io
|
||||||
import psycopg as ps
|
import datetime
|
||||||
import psycopg_pool as ps_pool
|
|
||||||
from rich import progress
|
from rich import progress
|
||||||
from rich.logging import RichHandler
|
from rich.logging import RichHandler
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
@ -18,8 +16,9 @@ from rich.traceback import install
|
|||||||
install(show_locals=True, locals_max_length=150, locals_max_string=300)
|
install(show_locals=True, locals_max_length=150, locals_max_string=300)
|
||||||
|
|
||||||
class Company:
|
class Company:
|
||||||
def __init__(self, data, report):
|
def __init__(self, data, report, out):
|
||||||
self.data = data
|
self.data = data
|
||||||
|
self.out = out
|
||||||
self.bvdid = data["BvD ID Nummer"]
|
self.bvdid = data["BvD ID Nummer"]
|
||||||
self.name = data["Unternehmensname"]
|
self.name = data["Unternehmensname"]
|
||||||
self.gv2020 = None
|
self.gv2020 = None
|
||||||
@ -179,27 +178,31 @@ class Company:
|
|||||||
def validate(self):
|
def validate(self):
|
||||||
#fallback, in case tax wasn't already calculated
|
#fallback, in case tax wasn't already calculated
|
||||||
self.calculate_tax()
|
self.calculate_tax()
|
||||||
if self.st2020 and self.ek2020:
|
if True:
|
||||||
self.report.valid_data += 1
|
if self.st2020 and self.ek2020:
|
||||||
return True
|
self.report.valid_data += 1
|
||||||
self.report.invalid_data +=1
|
else:
|
||||||
if self.st2021 and self.ek2021:
|
self.report.invalid_data +=1
|
||||||
self.report.valid_data += 1
|
if self.st2021 and self.ek2021:
|
||||||
return True
|
self.report.valid_data += 1
|
||||||
self.report.invalid_data +=1
|
else:
|
||||||
if self.st2022 and self.ek2022:
|
self.report.invalid_data +=1
|
||||||
self.report.valid_data += 1
|
if self.st2022 and self.ek2022:
|
||||||
return True
|
self.report.valid_data += 1
|
||||||
if self.st2024 and self.ek2024:
|
else:
|
||||||
self.report.valid_data += 1
|
self.report.invalid_data +=1
|
||||||
return True
|
if self.st2023 and self.ek2023:
|
||||||
self.report.invalid_data +=1
|
self.report.valid_data += 1
|
||||||
return False
|
else:
|
||||||
|
self.report.invalid_data +=1
|
||||||
|
if self.st2024 and self.ek2024:
|
||||||
|
self.report.valid_data += 1
|
||||||
|
else:
|
||||||
|
self.report.invalid_data +=1
|
||||||
|
|
||||||
|
|
||||||
class dataimport:
|
class dataimport:
|
||||||
def __init__(self, filename, logfile, seek=0):
|
def __init__(self, filename, logfile, output, seek=0):
|
||||||
self.seek = seek
|
self.seek = seek
|
||||||
self.progress = progress.Progress(
|
self.progress = progress.Progress(
|
||||||
*progress.Progress.get_default_columns(),
|
*progress.Progress.get_default_columns(),
|
||||||
@ -210,6 +213,7 @@ class dataimport:
|
|||||||
self.filename = filename
|
self.filename = filename
|
||||||
FORMAT = "%(message)s"
|
FORMAT = "%(message)s"
|
||||||
self.logfile = open(logfile, 'a')
|
self.logfile = open(logfile, 'a')
|
||||||
|
self.output = output
|
||||||
if self.logfile != "NONE":
|
if self.logfile != "NONE":
|
||||||
self.logconsole = Console(file=self.logfile)
|
self.logconsole = Console(file=self.logfile)
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@ -225,54 +229,38 @@ class dataimport:
|
|||||||
show_path=False, show_time=False, level="NOTSET")])
|
show_path=False, show_time=False, level="NOTSET")])
|
||||||
|
|
||||||
self.log = logging.getLogger("import")
|
self.log = logging.getLogger("import")
|
||||||
self.total_rows = self.get_total()
|
self.total_rows = self.get_total(self.filename)
|
||||||
self.errors = 0
|
self.errors = 0
|
||||||
self.data = {}
|
self.data = {}
|
||||||
self.duplicate_database_id = None
|
self.duplicate_database_id = None
|
||||||
self.task = self.progress.add_task(f"Importing {self.filename.split('/')[-1]}", total=self.get_total())
|
self.task = self.progress.add_task(f"Importing {self.filename.split('/')[-1]}", total=self.get_total(self.filename))
|
||||||
self.progress.update(self.task, advance=self.seek)
|
self.progress.update(self.task, advance=self.seek)
|
||||||
global AUTHTOKEN
|
|
||||||
AUTHTOKEN = None
|
|
||||||
self.valid_data = 0
|
self.valid_data = 0
|
||||||
self.invalid_data = 0
|
self.invalid_data = 0
|
||||||
#with ps_pool.ConnectionPool(conninfo="postgresql:///bachelorarbeit?sslmode=require&port=5432&host=denkena-consulting.com&passfile=/home/user/bachelorarbeit_importer/pgpass&user=bachelorarbeit_w&hostaddr=94.16.116.86", min_size=4, max_size=10, open=True, ) as pool:
|
|
||||||
# with pool.connection() as conn:
|
|
||||||
#self.db_setup()
|
|
||||||
self.importer()
|
self.importer()
|
||||||
#AUTHTOKEN = self.authtoken
|
|
||||||
#self.log.info('AUTHTOKEN SET!')
|
|
||||||
|
|
||||||
def db_setup(self, conn):
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute("CREATE TABLE IF NOT EXISTS test( bvd_id serial PRIMARY KEY)")
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def importer(self):
|
def importer(self):
|
||||||
with self.progress:
|
with self.progress:
|
||||||
if AUTHTOKEN is not None:
|
|
||||||
self.authtoken = AUTHTOKEN
|
|
||||||
self.log.info('AUTHTOKEN obtained!')
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
with open(self.filename, mode='r', encoding='utf-8-sig', newline='') as csv_file:
|
with open(self.filename, mode='r', encoding='utf-8-sig', newline='') as csv_file:
|
||||||
csv_reader = csv.DictReader(csv_file, delimiter=',')
|
with open(self.output, mode='a+', encoding='utf-8-sig', newline='') as output_csv:
|
||||||
rownum = 0
|
csv_reader = csv.DictReader(csv_file, delimiter=',')
|
||||||
for row in csv_reader:
|
out_names = []
|
||||||
if rownum < self.seek:
|
output_writer = csv.DictWriter(output_csv, fieldnames=out_names)
|
||||||
|
self.log.warning(self.get_total(self.output))
|
||||||
|
if self.get_total(self.output) <= 0:
|
||||||
|
self.log.warning(f"WRITING HEADER FOR FILE {self.output}!")
|
||||||
|
output_writer.writeheader()
|
||||||
|
rownum = 0
|
||||||
|
for row in csv_reader:
|
||||||
|
if rownum < self.seek:
|
||||||
|
rownum += 1
|
||||||
|
continue
|
||||||
|
for key in csv_reader.fieldnames:
|
||||||
|
self.data[key] = row[key]
|
||||||
|
self.comp_import(self.data, output_writer)
|
||||||
|
self.data = {}
|
||||||
rownum += 1
|
rownum += 1
|
||||||
continue
|
self.progress.update(self.task, advance=1)
|
||||||
for key in csv_reader.fieldnames:
|
|
||||||
self.data[key] = row[key]
|
|
||||||
self.comp_import(self.data)
|
|
||||||
#if self.check_duplicate(data):
|
|
||||||
# self.patch_record(data)
|
|
||||||
# self.duplicate_database_id = None
|
|
||||||
#else:
|
|
||||||
# self.create_record(data)
|
|
||||||
self.data = {}
|
|
||||||
rownum += 1
|
|
||||||
self.progress.update(self.task, advance=1)
|
|
||||||
self.progress.console.rule()
|
self.progress.console.rule()
|
||||||
self.log.info(f"Rows: {self.total_rows}")
|
self.log.info(f"Rows: {self.total_rows}")
|
||||||
self.log.info(f"Valid: {self.valid_data}")
|
self.log.info(f"Valid: {self.valid_data}")
|
||||||
@ -286,24 +274,23 @@ class dataimport:
|
|||||||
else:
|
else:
|
||||||
self.log.critical("ERROR CALCULATION EXCEPTION")
|
self.log.critical("ERROR CALCULATION EXCEPTION")
|
||||||
|
|
||||||
def get_total(self):
|
def get_total(self, file):
|
||||||
return sum(1 for _ in open(self.filename, mode='r')) - 1
|
return sum(1 for _ in open(file, mode='r')) - 1
|
||||||
|
|
||||||
def comp_import(self, data):
|
def comp_import(self, data, out):
|
||||||
current = Company(data, report=self)
|
current = Company(data, report=self, out=out)
|
||||||
current.validate()
|
current.validate()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Import data from ORBIS', epilog='Copyright Denkena Consulting')
|
parser = argparse.ArgumentParser(description='Import data from ORBIS', epilog='Copyright Denkena Consulting')
|
||||||
parser.add_argument('filename', nargs="+")
|
parser.add_argument('filename', nargs="+")
|
||||||
parser.add_argument('-l', '--logfile', default="log_importer", nargs="?")
|
parser.add_argument('-l', '--logfile', default="log_importer", nargs="?")
|
||||||
|
parser.add_argument('-o', '--output', default="export_cleaned.csv", nargs="?")
|
||||||
parser.add_argument('-s', '--seek', type=int, default=0)
|
parser.add_argument('-s', '--seek', type=int, default=0)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if len(args.filename) > 1 and args.seek > 0:
|
if len(args.filename) > 1 and args.seek > 0:
|
||||||
parser.error("Seek combined with multiple files is a bad idea!")
|
parser.error("Seek combined with multiple files is a bad idea!")
|
||||||
for filename in args.filename:
|
for filename in args.filename:
|
||||||
dataimport(filename, args.logfile, args.seek)
|
dataimport(filename, args.logfile, args.output, args.seek)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user