Coverage for lib/batch/lib_batch.py: 23%
1383 statements
« prev ^ index » next coverage.py v7.9.1, created at 2026-02-18 01:49 +0100
« prev ^ index » next coverage.py v7.9.1, created at 2026-02-18 01:49 +0100
1import datetime
2import os
3import types
5import pandas as pd
8def prepare_ioput_col_datou(inputs):
9 if inputs == "" or inputs == None:
10 return [], []
11 inputs = inputs.split(",")
12 input_column_name = []
13 input_datou_name = []
14 for i in inputs:
15 if ":" in i:
16 input_column_name.append(i.split(":")[0])
17 input_datou_name.append(i.split(":")[1])
18 else:
19 input_column_name.append(i)
20 input_datou_name.append(i)
22 return input_column_name, input_datou_name
24def create_pandas_table_from_list_column_and_input(input_column_name, output_column_name):
25 import pandas as pd
26 df = pd.DataFrame(columns=input_column_name + output_column_name)
27 return df
29def append_data_to_df(df, input_arg_by_name, verbose = False):
30 import pandas as pd
31 if verbose:
32 print(" DF in append_data_to_df : ")
33 print(df)
34 #df = pd.concat([df, pd.DataFrame(input_arg_by_name)], ignore_index=True)
35# df = df.append(input_arg_by_name, ignore_index=True)
36# df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
37 # As noted by @cottontail, it's also possible to use loc, although this only works if the new index is not already present in the DataFrame (typically, this will be the case if the index is a RangeIndex:
38 # https://stackoverflow.com/questions/75956209/error-dataframe-object-has-no-attribute-append
39 df.loc[len(df)] = input_arg_by_name # only use with a RangeIndex!
41def find_first_missing_output(df, output_column_name, input_column_name):
43 # TODO VR 27-12-23 modulariser cette partie
44 if output_column_name != None and input_column_name != None :
45 # Trouver la première ligne sans donnée de sortie
46 if output_column_name[0] not in df.columns:
47# (nb_col, nb_data) = df.shape
48 # Add column to dataframe and init with empty string value
49 df[output_column_name[0]] = ""
50# df.assign(output_column_name[0] = "")
52 list_no_data = df[output_column_name[0]] == ""
53 if len(list_no_data) == 0:
54 print("All DONE")
55 iloc_missing = df.index[list_no_data]
56 if iloc_missing.shape == (0,):
57 print("All DONE")
58 return df, None, None
59 first_missing_output = iloc_missing[0]
60 missing_row_index = df.index[list_no_data][0]
62 # Input data de cette ligne
63 # Je sens un comportement différent quand len(input_column_name) == 1 (bof, c'était toujours le cas pourtant)
64 # PAr ailleurs on construit les input_data plus loin donc peut-etre n'est pas nécessaire
66 print("About to get some first_missing_output :" + str(first_missing_output))
67 print("USing input_column_name : " + str(input_column_name))
69 print(" available cols : " + str(df.columns.values.tolist()))
70 print(" number data : " + str(len(df.values.tolist())))
72 if len(input_column_name) == 1:
73 input_data = df[input_column_name[0]][first_missing_output]
74 else :
75 input_data = [df[k][first_missing_output] for k in input_column_name]
76 else :
77 input_data = None
78 missing_row_index = None
80 return input_data, missing_row_index
83def create_pandas_table_from_text(text,
84 input_column_name = None,
85 output_column_name = None,
86 separator = "|",
87 verbose = False):
88# import markdown
89# html = markdown.markdown(text)
90# print(str(html))
92# import md2py
93# parsed = md2py.md2py(text)
95 if verbose:
96 print(text)
97 if text == None:
98 return None
100 import pandas as pd
101 # Nettoyer les lignes qui commencent par un dièse
102 lines = [line for line in text.strip().split('\n') if (not line.startswith("#") and separator in line)]
104 if len(lines) == 0:
105 print("ERROR should exit We have no input data, maybe the markdown is not well formatted ? ")
106 print(text)
107 return None
109 # Les titres de colonnes se trouvent à l'index 1 puisque l'index 0 contient les éléments 'CAMPAIGN'...
110 headers = [d.rstrip(" ").lstrip(" ") for d in lines[0].split(separator)]
111 # On commence à 2 car il y a le format markdown qui a une ligne sans donnée entre les noms des colonnes et les
112# TODO mais faire mieux
113 data = [line.split(separator) for line in lines[2:]]
115 data = [[d.rstrip(" ").lstrip(" ").rstrip(" ").lstrip(" ") for d in row] for row in data]
117 max_nb_columns = max([len(row) for row in data]) if len(data) > 0 else -1
118 headers = [headers[i].rstrip(" ").lstrip(" ") if i < len(headers) else "Column " + str(i) for i in range(max_nb_columns)]
120 # S'assurer que chaque ligne ait le même nombre de colonnes que l'en-tête
121 data = [row if len(row) == len(headers) else row + [''] * (len(headers) - len(row)) for row in data]
123 if verbose:
124 print(" data to build pd " + str(len(data)))
126 # Créer le DataFrame
127 df = pd.DataFrame(data, columns=headers)
128 if '' in df.columns:
129 df = df.drop('', axis=1)
132 return df
134css_button_class = "btn btn-primary text-l bg-blue-500 text-white p-2 rounded"
137def make_replace_upload_button_manax_hc(hit):
138 return '<button type="button" class="button_replace {}" id="{}" ><i class="bi bi-upload"></i>Replace</button>'.format(css_button_class, hit)
140#
141def make_audit_button_manax_hc(id_line, href):
142 # return '<button type="button" id="button-audit-{}" class="{}"><i class="bi bi-info-circle-fill"></i>Audit</button>'.format(
143# mtd_id, css_button_class + " button_audit-run")
144 return '<a href="{}"><button type="button" id="button-launch-{}" class="{}"><i class="bi bi-rocket-takeoff-fill float-left"></i> Launch</button></a>'.format( href, id_line, css_button_class)
146def make_launch_button(endpoint, args, id_line, name = "Launch"):
147 return '<a href="{}?{}"><button type="button" id="button-launch-create-{}" class="{}"><i class="bi bi-info-circle-fill"></i>{}</button></a>'.format(endpoint, args, id_line, css_button_class, name)
149def make_downloadable(url, id_line, name = "File"):
150 return '<a href="{}"><button type="button" id="button-downloadable-{}-{}" class="{}"><i class="bi bi-info-circle-fill"></i>{}</button></a>'.format(url, name, id_line, css_button_class, name)
154from flask import url_for
156def append_action_steps_button(table,
157 safia_suivi_prod_doc_id,
158 safia_suivi_prod_proj_id,
159 saxia_steps = {}):
160 # table.rename(columns={"caffemodel_name":"description", "svm_number_of_descriptors": "dimension", "main_photo_desc_type": "main_pdt"}, inplace=True)
161 try :
162 for step in saxia_steps:
163 if "col" in saxia_steps[step]:
164 col = saxia_steps[step]["col"]
165 else:
166 import sys
167 sys.stdout.write("&")
168 # print("This action is not meant to be a button in a column : " + (saxia_steps[step]["name"] if "name" in saxia_steps[step] else "no name"))
169 except Exception as e:
170 print("Many pbs voilà in configuration ! " + str(e))
173# print(str(table))
174# print(table.to_string())
175 for x in table.index:
176 mtd_id = 40
177 out_folder = "/" #static/saxia"
178 suffix = "" #"/static/saxia"
179 doc_audit = "non:0"
180 hash_id_treatment = "dummy"
181 file = None
182 anon_file = None
183 out_file = None
184 try:
185 mtd_id = table.loc[x, 'datou_id'] if "datou_id" in table.columns else mtd_id
186 out_folder = table.loc[x, 'out_folder'] if "out_folder" in table.columns else out_folder
187 suffix = table.loc[x, 'suffix'] if "suffix" in table.columns else suffix
188 hash_id_treatment = table.loc[x, 'hash_id_treatment'] if "hash_id_treatment" in table.columns else hash_id_treatment
189 file = table.loc[x, 'file'] if "file" in table.columns else file
190 anon_file = table.loc[x, 'anon_file'] if "anon_file" in table.columns else file
191 out_file = table.loc[x, 'out_file'] if "out_file" in table.columns else str(hash_id_treatment) + ".docx"
192 doc_audit = table.loc[x, 'doc_audit'] if "doc_audit" in table.columns else doc_audit
193 audit_output = table.loc[x, 'audit_output'] if "audit_output" in table.columns else None
194 except Exception as e:
195 print("Error in append_action_steps_button : " + str(e))
197 if out_folder.startswith("/home/safia/OneDrive/Test Safia expertise"):
198 out_folder = out_folder.replace("/home/safia/OneDrive/Test Safia expertise", "/static/onedrive/") + "/"
199 elif out_folder.startswith("/home/safia/workarea/git/Safia/prompt/python/server/static/onedrive"):
200 out_folder = out_folder.replace("/home/safia/workarea/git/Safia/prompt/python/server/static/onedrive", "/static/onedrive") + "/"
201 filename = os.path.basename(file) if file != None else file
203 out_folder_hc = "static/onedrive/output"
204 out_folder_hc_http = "static%2Fonedrive%2Foutput%2F"
206 audit_button_aux = make_audit_button_manax_hc(mtd_id, href=url_for('manax', id = str(mtd_id), suffix = out_folder, hash_id_treatment = hash_id_treatment, safia_suivi_prod_doc_id = safia_suivi_prod_doc_id, safia_suivi_prod_proj_id = safia_suivi_prod_proj_id, file = file, out_file = out_file)) #, audit_file = doc_audit))
207 table.loc[x, "Audit"] = audit_button_aux
209 replace_button_aux = make_replace_upload_button_manax_hc(hash_id_treatment) #, audit_file = doc_audit))
210 table.loc[x, "Replace"] = replace_button_aux
212 # J'essai de mettre en forme ici le bouton de lancement qu'il faudrait construire à partir des données de la ligne et des paramètres de configuration situé dans les données du projet
213 # Données déjà crée dans le projet 70
214 col_name = "Launch Extract"
215 endpoint = "/api/v1/safia/query"
216 datou_id = "datou-40"
217 if file != None:
218 out_folder_hc = "static/onedrive/output"
219 out_folder_hc_http = "static%2Fonedrive%2Foutput%2F"
220 args = "hash_id_treatment=" + hash_id_treatment + "&input_csv=hash_id_treatment_input%3D" + hash_id_treatment + "%2Cout_folder%3D" + out_folder_hc_http + "%2Cinput_col_cr%3Dcr_correct_typo%2Cinput_col_intro%3Dintro_correct_typo%2Cload_df_from_db_and_correct%3Dtrue%2Cfile%3D" + file.replace("/", "%2F") + "%2Cwith_audit%3D1"
221 else :
222# print("TODO args to setup for anon file : migration file / filename, filename is better, ok")
223 args = ""
224 args += "&with_audit=1"
225 args += "&" + datou_id + "=true&object=simple_text_query&load_df_from_db_and_correct=true"
226 id_line = x
227 args += "&safia_suivi_prod_doc_id=" + str(safia_suivi_prod_doc_id)
228 args += "&safia_suivi_prod_proj_id=" + str(safia_suivi_prod_proj_id)
229 args += "&col_index_treatment=file"
230 if file != None:
231 args += "&file=" + file
232 audit_button_aux = make_launch_button(endpoint, args, id_line)
233 table.loc[x, col_name] = audit_button_aux
235 # En fait on avait un moment donnée un bouton consolidate ici, ah ha ah
236 # Ouais mais on va faire une boucle sur les saxia_steps, hi ih
237# conf_consolidate = saxia_steps["consolidate"] if "consolidate" in saxia_steps else {}
238# output_datou_to_col = conf_consolidate["output_datou_to_col"] if "output_datou_to_col" in conf_consolidate else "nb_page:nb_page,nb_modif:nb_modif_manual,nb_modif_class:nb_modif_class_manual,nb_doc:nb_doc,nb_word:nb_word_result"
239# args += "&output_datou_to_col=" + output_datou_to_col # VR 10-4-24 : je ne sais pas à quoi cela sert !
240 datou_id = "datou-44"
241 if file != None:
242 args = "hash_id_treatment=" + hash_id_treatment + "&input_csv=hash_id_treatment_input%3D" + hash_id_treatment + "%2Cout_folder%3Dstatic%2Fonedrive%2Foutput%2F%2Cfile%3D" + file.replace("/", "%2F") + "%2Cwith_audit%3D1"
243 else :
244# print(" wip need to align file and filenmae ")
245 args = ""
246 inputs = ["hash_id_treatment", "object=simple_text_query", "load_df_from_db_and_correct=true"]
248 if file != None:
249 url = os.path.join(suffix, file)
250 else:
251 url = ""
252 if 'static' in url:
253 url = "/static" + url.split("static")[1]
254 col_name = "Input File Anon"
255 name = "File Input"
256 audit_button_aux = make_downloadable(url, id_line, name)
257 table.loc[x, col_name] = audit_button_aux
259 col = ""
260 try :
261 for step in saxia_steps:
262 if "col" in saxia_steps[step]:
263 col = saxia_steps[step]["col"] if "col" in saxia_steps[step] else "nocol"
264 inputs = saxia_steps[step]["inputs"] if "inputs" in saxia_steps[step] else []
265 mtd_id = saxia_steps[step]["datou-id"] if "datou-id" in saxia_steps[step] else None
266 output_datou_to_col = saxia_steps[step]["output_datou_to_col"] if "output_datou_to_col" in saxia_steps[step] else "" # for anon "anon_filename,nom,prenom",
268 # Force to datou for now
269 type = saxia_steps[step]["type"] if "type" in saxia_steps[step] else "datou"
271 list_args = []
273 if type == "datou":
274 list_args.append("object=simple_text_query")
275 list_args.append("with_audit=true")
276 url = "/api/v1/safia/query"
277 elif type == "endpoint":
278 url = saxia_steps[step]["url"]
279# locals().update(saxia_steps[step])
280 list_args_from_json = saxia_steps[step]["args"].split(",") if "args" in saxia_steps[step] else []
281 for args in list_args_from_json:
282 if args in table.columns:
284# if args in globals()
285 list_args.append(args + "=" + table.loc[x, args])
288 # not used
289 special = None
291 duplicate = {
292 "col": "Duplicate Treatment",
293 "type": "endpoint",
294 "endpoint": "api/v1/saxia/duplicate_hash_id_treatment",
295 "args": "hash_id_treatment",
296 "ret": "new_hash_id_treatment",
297 "name": "duplicate"
298 }
300 list_input_csv = []
301 for input in inputs:
302 if input in table.columns:
303 input_val = table.loc[x, input]
304 one_input = input + "%3D" + input_val.replace("/", "%2F")
305 list_input_csv.append(one_input)
307 if len(list_input_csv) > 0:
308 input_csv_val = "%2C".join(list_input_csv)
309 input_csv_var_and_val = "input_csv=" + input_csv_val
310 list_args.append(input_csv_var_and_val)
312 list_args.append("safia_suivi_prod_doc_id=" + str(safia_suivi_prod_doc_id))
313 list_args.append("safia_suivi_prod_proj_id=" + str(safia_suivi_prod_proj_id))
314 list_args.append("col_index_treatment=file") # to remove
315 if filename != None:
316 list_args.append("value_index=" + filename)
317 list_args.append("col_index=filename")
318# list_args.append("file=" + file)
319 list_args.append("output_datou_to_col=" + output_datou_to_col)
321 if mtd_id != None:
322 list_args.append("datou-" + str(mtd_id) + "=true")
324 if len(list_args) > 0:
325 args = "&".join(list_args)
326 else:
327 args = ""
329 action_button_aux = make_launch_button(url, args, x)
330 table.loc[x, col] = action_button_aux
331 else:
332 import sys
333 sys.stdout.write("&")
334 # print("This action is not meant to be a button in a column")
335 except Exception as e:
336 print("Many pbs voilà in configuration for col : " + str(col) + " " + str(e))
338 print(" table") # useless
339# return table
341def set_custom_display(all_result, endpoint_df_conf_type_suivi, col_csv = ""):
342 list_col_from_input = col_csv.split(",") if col_csv != None and col_csv != "" else []
343 list_col_display = endpoint_df_conf_type_suivi["list_col_display"] if "list_col_display" in endpoint_df_conf_type_suivi else []
344 list_col_display_init = list_col_display.copy()
345 list_col_virtual = endpoint_df_conf_type_suivi["list_col_virtual"] if "list_col_virtual" in endpoint_df_conf_type_suivi else []
346 list_col_action = endpoint_df_conf_type_suivi["list_col_action"] if "list_col_action" in endpoint_df_conf_type_suivi else []
348# list_col_display.append("")
350 for col in list_col_virtual:
351 all_result[col] = "tofill"
352 if "col_source" not in list_col_virtual[col]:
353 print(" Missing col_source info in " + str(list_col_virtual[col]))
354 continue
355 col_source = list_col_virtual[col]["col_source"]
356 key = list_col_virtual[col]["key"] if "key" in list_col_virtual[col] else None
357 type = list_col_virtual[col]["type"] if "type" in list_col_virtual[col] else "text"
358 if type == "text" or type == "humanize_size":
359 if col_source in all_result.columns:
360 all_result[col] = "init"
361 if key == None:
362 continue
363 elif "/" in key:
364 from lib.manaudit.lib_datou_audit import load_sub_json
365 all_result[col] = all_result[col_source].apply(lambda x: load_sub_json(x, key))
366 else:
367 all_result[col] = all_result[col_source].apply(lambda x : x[key] if key in x else None)
368 if type == "humanize_size":
369 from lib.lib_util import humanize_size_file
370 all_result[col] = all_result[col].apply(lambda x: humanize_size_file(x))
371 list_col_display.append(col)
372 elif type == "link_format" or type == "date_link_format":
373 if "format" not in list_col_virtual[col]:
374 print(" Missing format info in " + str(list_col_virtual[col]))
375 continue
376 if "variables" not in list_col_virtual[col]:
377 print(" Missing variables info in " + str(list_col_virtual[col]))
378 continue
379 all_result[col] = col
380 for index, row in all_result.iterrows():
381 format = list_col_virtual[col]["format"]
382 for var in list_col_virtual[col]["variables"]:
383 key = list_col_virtual[col]["variables"][var]["key"]
384 col_source = list_col_virtual[col]["variables"][var]["col_source"]
385 try:
386 value = all_result.loc[index, col_source]
387 if key != None:
388 value = value.get(key, "dummy")
389 except Exception as e:
390 print(" Pb in getting value for a virtual column name " + str(e))
391 value = "dummy"
392 format = format.replace("{" + var + "}", str(value))
393 # VR 16-5-24 todo hack to have result auto download in suivi?type=lab
394 format = format.replace("/home/safia/workarea/git/Safia/prompt/python/server/static", "/static")
395 format = format.replace("//", "/")
396 if type == "link_format":
397 all_result.loc[index, col] = "<a href='" + format + "'>" + col + "</a>"
398 elif type == "date_link_format":
399 format_loc = format.replace("/home/safia/workarea/git/Safia/prompt/python/server/static", "static")
400 format_loc = format_loc.replace("//", "/")
401 format_loc = format_loc.lstrip("/")
402 try:
403 datetime_from_stat = os.stat(format_loc).st_ctime
404 dt = datetime.datetime.fromtimestamp(datetime_from_stat)
405 date_str = dt.strftime("%Y-%m-%d %H:%M:%S")
406 except Exception as e:
407 print(" Pb in date_link_format " + str(e))
408 date_str = "No date, possible no file"
409 all_result.loc[index, col] = "<a href='" + format + "'>" + date_str + "</a>"
410 else:
411 print("Unexpected type " + str(type))
412 all_result.loc[index, col] = "unexpected"
413 list_col_display.append(col)
414 else :
415 print(" Unexpected type " + str(type))
417 for col in list_col_action:
418 value = list_col_action[col]["value"] if "value" in list_col_action[col] else None
419 col_source = list_col_action[col]["col_source"] if "col_source" in list_col_action[col] else None
420 key = list_col_action[col]["key"] if "key" in list_col_action[col] else None
421 action_type = list_col_action[col]["action_type"] if "action_type" in list_col_action[col] else "input"
422 if col_source in all_result.columns:
423 if action_type == "launch":
424 print("TODO and refacto by the way")
425 elif action_type == "download":
426 print("TODO and refacto by the way")
427 elif action_type == "checkbox":
428 all_result[col] = "<input type=checkbox data-key=" + str(key) + " data-col=" + str(col_source) + " ></input>"
429 for index, row in all_result.iterrows():
430 # print(row[col], row[col_source])
431 # ready_to_deliver
432 if row[col_source] != None:
433 all_result.loc[index, col] = "<input type=checkbox checked data-key=" + str(key) + " data-col=" + str(col_source) + " ></input>"
435 elif action_type == "input":
436 begin_input = "<input type=text class='input_dyn' data-key=" + str(key) + " data-col=" + str(col_source)
437 if "typedata" in list_col_action[col]:
438 begin_input += " data-typedata=" + list_col_action[col]["typedata"]
439 end_input = " ><div class='status' ></div></input>"
440 all_result[col] = begin_input + end_input
441 for index, row in all_result.iterrows():
442 # print(row[col], row[col_source])
443 value = row[col_source]
444 if key != None:
445 if key in value :
446 value = value[key]
447 if row[col_source] != None:
448 all_result.loc[index, col] = begin_input + " value='" + str(value) + "' " + end_input
450# all_result[col] = all_result[col_source]
451# all_result[col] = all_result[col].apply(lambda x: "<input type=checkbox " + ("checked" if x != None else "") + " ></input>")
452 else :
453 print("Unknown action_type : " + str(action_type))
454# all_result[col] = all_result[col].apply(lambda x : x[key] if key in x else None)
455 list_col_display.append(col)
456 elif value != None:
457 all_result[col] = value
458 list_col_display.append(col)
460 # En fait ce n'est pas grave que ce ne soit pas unique => SI et puis je ne veux pas cela !
461 if list_col_display_init == []:
462 for l in list_col_from_input:
463 if l not in list_col_display:
464 list_col_display.append(l)
466 return list_col_display
468def create_markdown_table_from_df(df):
470 # Je veux créer le tableau en trouvant le nom des colonnes de df, en rajoutant une ligne avec uniquement des "--" puis en concatenant avec des " | " entre les données et avec "\n" entre les lignes
471 list_col_names = df.columns.values.tolist()
472 list_col_names = [str(i) for i in list_col_names] # .strip("**")
473 list_col_names = " | ".join(list_col_names)
474 list_col_names = list_col_names + "\n" + "--- | " * len(df.columns.values.tolist())
475 df_list = df.values.tolist()
476 body_content = "\n".join(map(lambda x : " | ".join(map(str, x)), df_list))
477 return list_col_names + "\n" + body_content
479def load_safia_doc_input_list(safia_document_id, safia_project_id, lpgss, limit=10000):
481 info_project = lpgss.get_project_info(safia_project_id)
482 table_documents = info_project["table_documents"] if "table_documents" in info_project else "table_documents"
484 # lpgss.load_document(safia_project_id, safia_document_id)
485 check_table_exists = lpgss.check_table_exists(table_documents)
486 if not check_table_exists:
487 print("l465 ERROR treated as warning Table " + table_documents + " does not exists, please create it first !")
488 documents = []
489 else:
490 documents = lpgss.get_documents(table_documents, safia_document_id, chunk_id = None, limit=limit)
491 print("Number of documents : " + str(len(documents)))
492 if len(documents) == 0:
493 print(" We will need to create the df and document")
494 return None
496 total_content = ""
497 for d in documents:
498 total_content += d["content"]
500 return total_content
503def fill_df_from_datou_result(df, outut_datou_name,
504 output_column_name, output,
505 value_index,
506 col_index="line_number"):
507 if value_index == None:
508 print("Missing value_index in fill_df_from_datou_result")
509 return
510 line_number = None
511 if col_index == "line_number":
512 line_number = value_index
513 else:
514 if col_index not in df.columns:
515 print(" Missing columns " + str(col_index) + " in df, data not saved from datou in df ")
516 else:
517 line_number = df[col_index] == value_index
518 if len(df.index[line_number]) == 0:
519 print(" Plop ")
520 line_number == 0 #None
521 elif len(df.index[line_number]) > 0:
522 print(" Unexpected multiple line_number, tant pis we will try to update the first !")
523 line_number = df.index[line_number][0]
524 else:
525 print(" Plop ")
526 line_number = 0#df.index[line_number][0]
528 if type(line_number) == types.NoneType:
529 print("Missing line to update in df : value_index : " + str(value_index))
530 line_number = 0
532 info_store = {}
534 for j in range(len(outut_datou_name)):
535 if outut_datou_name[j] in output:
536# if output_column_name[j] not in df.columns:
537# continue
538 val = output[outut_datou_name[j]]
540 info_store[output_column_name[j]] = val
542 val = str(val).replace("\n", "<br>")
543 val = str(val).replace("\n", "<br>")
545 df.loc[line_number, output_column_name[j]] = val
547 else:
548 print(" Missing output " + str(outut_datou_name[j]) + " keys available are : " + str(output.keys()))
550 return info_store
552def aux_fill_load_df(audit_info):
554 from lib.manaudit.lib_datou_audit import load_sub_json
555 key = "io_exec/9/output/df_complet_as_json"
556 try:
557 df_from_json = load_sub_json(audit_info, key)
558 print("df_from_json : " + str(df_from_json))
559 except Exception as e:
560 print("Error in loading df_as_json : " + str(e))
561 df_from_json = {}
562 return None
564 try:
565 df = pd.read_json(df_from_json, convert_dates=["datet", "date_entree_hospitalisationt", "date_sortie_hospitalisationt", "date_fin_arret_travailt", "date_debut_arret_travailt"])
566 except Exception as e:
567 print("Error in read_json df_as_json : " + str(e))
568 return None
570 return df
572# Not used VR 4/3/35
573def fill_info_list_page(audit_info, hash_id_treatment, document_type="ordonnance"):
574 df = aux_fill_load_df(audit_info)
576 list_page = []
578 if type(df) != types.NoneType:
579 print(df.columns)
580 else:
581 return []
583 key_list_images = "io_exec/0/output/images"
584 from lib.manaudit.lib_datou_audit import load_sub_json
585 list_pages = load_sub_json(audit_info, key_list_images)
587 for i in range(len(df)):
588 dt = str(df["document_type"][i])
589# if dt == "certif_at":
590# if dt == "ordonnance":
591 if document_type == "all" or dt == document_type:
593 list_des_pages = df["Liste des pages"][i]
594 try :
595 if type(list_des_pages) == str:
596 list_des_pages_as_int = map(int, list_des_pages.split(","))
597 else:
598 list_des_pages_as_int = [list_des_pages]
599 except Exception as e:
600 print(str(e))
601 list_des_pages_as_int = []
602 for page in list_des_pages_as_int:
603 if page < len(list_pages):
604 list_page.append(list_pages[page])
605 # list_page.append()
607 return list_page
609def fill_info_stat_audit(audit_info, hash_id_treatment):
610 print(" TODO plein de truc et adapter les sorties")
612 df = aux_fill_load_df(audit_info)
613 if df == None:
614 return {}
616 nb_page = 0
617 map_doc_type_nb = {}
618 for i in range(len(df)):
619 lp = str(df["Liste des pages"][i])
620 nb_page_this_doc = len(lp) - len(lp.replace(",", "")) + 1
621 nb_page += nb_page_this_doc
622 if df["document_type"][i] not in map_doc_type_nb:
623 map_doc_type_nb[df["document_type"][i]] = nb_page_this_doc
624 else:
625 map_doc_type_nb[df["document_type"][i]] += nb_page_this_doc
627 info_stat = {"nb_doc" : len(df), "count" : map_doc_type_nb}
629 return info_stat
631# [ ] TODO VR 11-1-23 refacto : deja modulariser le chargement
632# sans doute aussi modulariser la boucle
633# lorsqu'on a un in_file, il faut plutot faire un seul datou sans tourner sur le df
634# On pourrait construire un df from param_json, avec les **args ??
635# Il faut sinon une option pour faire tourner sur un seul
636# On sinon modulariser et du coup avoir par exemple un context_datou_exec
637# On pourrait aussi gérer le cas ou on charge les input d'un datou à partir d'un json dans un fichier putot
638def run_batch_datou(mtr_datou_id, safia_project_id = 0,
639 safia_document_id = None,
640 input_as_csv = None, output_as_csv = None,
641 verbose = False,
642 with_audit = False,
643 lpgss = None,
644 nb_lines = 10,
645 in_file=None, offset=None,
646 out_folder = "temp",
647 datou_func = "",
648 saxia_step_name=None,
649 special_op="default"): # TODO VR 14-5-24 : saxia_step_name est un nom de step dans le projet saxia, dans la fonction c'est juste le suffix d'un nom de colonne
650 print("# LOAD DATA NAKED")
651 # TODO 22-1-24 : on préferait avoir plutot que input_from_df_or_arg : load_input_from_missing et insert_input
653 if output_as_csv == None:
654 output_as_csv = ""
656 from lib.lib_util import count_and_display_elapsed_time
658 if "hash_id_treatment" not in output_as_csv:
659 if output_as_csv != "":
660 output_as_csv += ","
661 output_as_csv += "hash_id_treatment:hash_id_treatment"
663 input_from_df_or_arg = True
664 if in_file != None and in_file != "":
665 nb_lines = 1
666# input_from_df_or_arg = False
668 input_column_name, input_datou_name = prepare_ioput_col_datou(input_as_csv)
669 output_column_name, outut_datou_name = prepare_ioput_col_datou(output_as_csv)
671 output_column_name.append("datetime")
672 outut_datou_name.append("datetime")
673 if mtr_datou_id != 0:
674 output_column_name.append("datou_id")
675 outut_datou_name.append("datou_id")
677 from auth.lib_auth import create_id
678 hash_id_treatment = create_id()
680 if safia_document_id == "pg_audit":
681 condition = [
682 {
683 "value": mtr_datou_id,
684 "variable": "mtr_datou_id",
685 "data_type": "int"
686 }
687]
688 mtr_datou_id = None
689 df = lpgss.load_data_audit(output_type = "pd",
690 limit = nb_lines,
691 offset = offset,
692 condition = condition,
693 col_csv="*")
694 input_from_df_or_arg = True
695 else:
696 if safia_document_id == None or safia_document_id == "":
697 print(" Unexpected behavior TODO safia_document_id should be initialized by default with hash_id_treatment ! ")
699 safia_document_id = "tab_log_treatment_" + hash_id_treatment
701 total_content = None
702 if input_from_df_or_arg:
703 if safia_project_id != 0:
704 total_content = load_safia_doc_input_list(safia_document_id, safia_project_id, lpgss, limit=10000)
706 if total_content == None:
707 input_from_df_or_arg = False
708 else :
709 print("# PREPARE DATA : total_content : " + str(len(total_content)) + " characters")
711 if input_from_df_or_arg:
712 df = create_pandas_table_from_text(total_content, input_column_name, output_column_name)
713 else:
714 df = create_pandas_table_from_list_column_and_input(input_column_name, output_column_name)
716 if in_file != None and in_file != "":
717 input_arg_by_name = {}
718 # TODO 22-1-24 CA ca a l'air mieux https://stackoverflow.com/questions/218616/how-to-get-method-parameter-names mais je n'y arrive pas
719 # TODO 22-1-24 Et cette logique devrait aller aussi dans les ligne 223 et suivante quand on ne trouve pas l'input utile
720 for name in input_column_name:
721 # if name in locals():
722 if name == "file":
723 input_arg_by_name[name] = in_file
724 elif name == "out_folder":
725 input_arg_by_name[name] = out_folder
726 else:
727 print(" name : " + str(name) + " is not an accessible variable ! ")
728 input_arg_by_name[name] = ""
730 append_data_to_df(df, input_arg_by_name)
731 input_data, missing_row_index = input_arg_by_name, len(df.values) - 1
732 elif safia_document_id == "pg_audit":
733 offset = 0 # car on a chargé au load
734 missing_row_index = offset
735 else:
736 input_data, missing_row_index = find_first_missing_output(df, output_column_name, input_column_name)
738 # out_folder
740 if offset == None or offset == 0:
741 offset = missing_row_index
743 # Treat data
744# lpgss.load_datou(mtr_datou_id)
746 # A faire avant et puis c'est un "hack" pour le moment question de droit !
747 from lib.lib_safia_system import LibSafiaSystem
748 from server.safia import lib_external_info_from_apia_at
749 from server.safia import lpgss_singleton, lib_external_info_from_apia_at, lib_auth_now_from_at, lib_right_singleton
750 from lib.lib_safia_system import LibSafiaSystem
751 lss = LibSafiaSystem(lib_user_data_internal=lpgss_singleton,
752 lib_user_data_external=lib_external_info_from_apia_at,
753 lib_auth_user_otp=lib_auth_now_from_at,
754 lib_right=lib_right_singleton)
755 otp="0a76f14b131682eaa36fbef63d725f9352cfb85d"
756 info, is_valid = lss.connect_with_otp(otp)
758 # De ouf !
759 lss.user_id = 1
761# lss = LibSafiaSystem(lib_user_data_internal=lpgss, lib_user_data_external = lib_external_info_from_apia_at)
763 datous = lss.get_datou(mtr_datou_id)
765 datou = None
766 if len(datous) == 1:
767 datou = datous[0]
768 if str(datou["id"]) != str(mtr_datou_id):
769 print("Wrong datou id !")
770 else:
771 print("Wronge datou id !")
773# index_datou = datous.index(lambda x : x["id"] == mtr_datou_id)
774 if datou == None:
775 print("ERROR Missing datou")
776 # return
777# datou = datous[index_datou]
779 from auth.lib_auth import get_datou_exec_context_as_complete_param_json
780 user = "info@opio.fr"
781 user = "victor@reutenauer.eu"
782 privacy = False
783 from auth.lib_conf_system import lcs_global_singleton
784 OPENAI_API_KEY = lcs_global_singleton.get_openai_api_key()
785 complete_param_json = get_datou_exec_context_as_complete_param_json(user, verbose, privacy,
786 openai_token=OPENAI_API_KEY, lss=lss,
787 project_id=safia_project_id)
789 # VR TODO : mais p***, complete_param_json doit utilisé les param_json du datou ! grrr grrrr grrr
790 list_datou_step = list(map(lambda x : x["name"], datou["steps"])) if datou != None else [] # Et ca je doit le faire aussi ! grrr grrr grrr
791 list_param_json_steps = list(map(lambda x : x["param_json"], datou["steps"])) if datou != None else []
793# datou_exec(datou["datou_linear_list_steps"], input = input_data, verbose = verbose, privacy = False)
795# if missing_row_index == 0:
796# missing_row_index = 1
798 list_datou_func = []
800 if datou_func != "":
801# from lib.lib_batch import parse_directive
802 list_datou_func = parse_directive(datou_func)
803 else:
804 list_datou_func = []
806 map_modif_hash_id_treatment_data = {}
808 list_results = []
810 from lib.datou.datou_exec import datou_exec
811 for i in range(nb_lines):
812 input_datou = {}
814 if input_from_df_or_arg :
815 if i + missing_row_index >= len(df.values.tolist()):
816 print(" All the tab should be completed by now or we have a problem !")
817 break
818 if len(input_datou_name) != len(input_column_name) or len(outut_datou_name) != len(output_column_name):
819 print(" Internal error mismatch datou column io : exiting ")
820 exit(1)
821 for j in range(len(input_datou_name)):
822 print("About to get some input value for : j :" + str(j) + " col : " + str(offset + i))
823 print("USing input_datou_name[j] : " + str(input_datou_name[j]))
825 print(" available cols : " + str(df.columns.values.tolist()))
826 print(" number data : " + str(len(df.values.tolist())))
828 # Je veux récupérer l'élément de la ranger missing_row_index + i, de la colonne input_column_name[i] de df et le mettre dans val
829 val = df[input_column_name[j]][offset + i] # Hack pour le moment parce que jen'arrive pas à récupérer la bonne ranger on dirait que pour la première ligne il ne trouve pas les colonnes)
830 # bug crash lorsque url est vide, d'autre cas sans doute à gérer
831 input_datou[input_datou_name[j]] = val
833 else :
834 if in_file != None:
835 if nb_lines != 1 :
836 print(" Un managed behavior ")
837 input_datou["file"] = in_file
838 # sert à eviter le preprompt par defaut de l'ocr sur l'impact carbone grr je ne comprends pas le CdC qui ferait le job
839# input_datou["preprompt"] = ""
841 if verbose:
842 print(str(input_datou))
844 if datou != None:
845 input_datou["datou_exec_info"] = {"project_id": safia_project_id,
846 "safia_doc_id": safia_document_id,
847 "mtr_datou_id": mtr_datou_id,
848 "launched_at" : datetime.datetime.now()}
849 output, audit_json = datou_exec(list_datou_step, input_datou, complete_param_json,
850 verbose = verbose,
851 with_audit = with_audit,
852 privacy = privacy,
853 list_param_json_steps=list_param_json_steps)
854 output["datou_id"] = str(mtr_datou_id)
856 if len(list_datou_func) > 0:
857 output = call_functions(list_datou_func, input_datou)
859 output["datetime"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
861 info_store = fill_df_from_datou_result(df, outut_datou_name,
862 output_column_name, output, missing_row_index + i)
864 hash_id_treatment = output["hash_id_treatment"] if "hash_id_treatment" in output else input_datou["hash_id_treatment"] if "hash_id_treatment" in input_datou else "POURRI_" + hash_id_treatment
865 map_modif_hash_id_treatment_data[hash_id_treatment] = info_store
867 if special_op == "accumulate":
868 list_results.append(output)
870 if saxia_step_name != None:
871 name_info_step = "info_" + saxia_step_name
873 saxia_step_name_at = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
874 info_store[saxia_step_name + "_at"] = saxia_step_name_at
876 lss.lib_user_data_internal.upsert_audit_info(hash_id_treatment,
877 list_json_update = [
878{"variable": name_info_step, "type": "json"}
879],
880 input_values = {name_info_step : info_store},
881 verbose = verbose)
883 # saxia_step_name
885 new_text = create_markdown_table_from_df(df)
886 # TODO VR tester to_markdown
888 if safia_document_id != "pg_audit":
889 print(" NOW SAVING INDEX FILE BATCH TREATMENT !")
890 input_save = {"json_to_save" : [{"id" : safia_document_id, "text" : new_text}]}
891 output_save, audit_json = datou_exec(["import_json"], input_save, complete_param_json, verbose=verbose, privacy=privacy)
892 output_final = output_save
894 print(str(output_final))
896 return output_final
897 if special_op == "accumulate":
898 print(list_results)
899 list_pages = []
900 for l in list_results:
901 if "list_page" in l:
902 list_pages.extend(l["list_page"])
903 print(list_pages)
904 return list_pages
905 return "TO check"
909 # Fonction pour charger dynamiquement un module en Python.
910def load_module(module_path, function_name):
911# import sys
912 import importlib
913# sys.path.append('.')
914 # return importlib.import_module(module_path)
916 module = importlib.import_module(module_path)
918 # class_name = conf["class_name"]
919 #from lib.brick_layers.lib_abstract_generic_layer import LayerPrompt
920 # from lib.lib_openai import LayerPromptOpenAI
921 # import lib
922 function = getattr(module, function_name)
923 return function
926 # Fonction pour appeler dynamiquement une fonction avec des arguments.
927def call_function(function, args):
928# func = getattr(module, function_name)
929 return function(*args)
933# parse datou_function
934def parse_directive(chain):
935 import json
936 import re
938 # La chaîne de caractères à parser.
939 # chain = "path_to_file1:function1(arg1,arg2):output1;path_to_file2:function2(arg1,arg2):output2"
941 # 1. Splitter la chaîne de caractères par des points-virgules.
942 directives = chain.split(';')
944 # 3. Charger les arguments depuis un fichier JSON
945# with open('arguments.json') as file:
946# arguments_json = json.load(file)
948 list_function = []
950 # Processus principal pour chaque directive.
951 for directive in directives:
952 # 2. Splitter chaque directive selon le deux-points.
953 dir_split = directive.split(':')
954 if len(dir_split) != 3:
955 print(f"Error in directive : {directive}")
956 continue
957 (path_to_file, function_and_args, output) = tuple(dir_split)
959 # Utiliser une expression régulière pour extraire le nom de la fonction et les arguments.
960 function_name, args_str = re.match(r'(\w+)\((.*)\)', function_and_args).groups()
962 # Extraire les noms des arguments en les séparant par des virgules, ignorer les espaces.
963 arg_names = re.split(r'\s*,\s*', args_str)
965 one_function = {"path_to_file" : path_to_file, "function_name" : function_name, "arguments" : arg_names, "output" : output}
966 list_function.append(one_function)
968 return list_function
972def call_functions(list_function, arguments_json):
973 map_results = {}
974 for function in list_function:
975 path_to_file = function["path_to_file"]
976 function_name = function["function_name"]
977 arguments = function["arguments"]
978 output = function["output"]
979 # 4. Charger la bibliothèque spécifiée.
980 function = load_module(path_to_file, function_name)
982 args_to_pass_as_list = []
983 missing_arg = False
984 for arg in arguments:
985 if arg in arguments_json:
986 args_to_pass_as_list.append(arguments_json[arg])
987 else:
988 # missing_arg = True
989 print(f"No argument data available for argument: {arg} expecting fixed value")
990 args_to_pass_as_list.append(arg)
992 # 5. Appeler la fonction avec les arguments.
993 if not missing_arg:
994# args_to_pass_as_list = arguments_json[function_name]
995 result = call_function(function, args_to_pass_as_list)
996 print(f"Result of {function_name}: {result}")
997 map_results[output] = result
998 else:
999 print(f"No argument data available for function: {function_name}")
1000 return map_results
1003file_column = "file"
1004size_column = "size"
1005created_at_column = "created_at"
1006indexed_at_column = "indexed_at"
1007last_indexed_at_column = "last_indexed_at"
1008modified_at_column = "modified_at"
1009deleted_at_column = "deleted_at"
1010def init_df_synchronize():
1011 input_column_name = [file_column, "sub_folder", created_at_column, modified_at_column, deleted_at_column, indexed_at_column, last_indexed_at_column, size_column]
1012 import pandas as pd
1013 df = pd.DataFrame(columns=input_column_name)
1014 return df
1016def synchronize_df_folder(df = None, folder = "", verbose = False):
1017 from lib.import_util.lib_path_to_vec import list_files
1018 if df == None:
1019 df = init_df_synchronize()
1021 import datetime
1022 now_as_string = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
1024 index, file_list = list_files(folder, verbose = False,
1025 managed_zip_extension_to_avoid = [".zip", ".tar", ".tar.gz", ".tgz", ".tar.bz2", ".tbz"])
1027 for file_dict in file_list:
1028 file = file_dict["path"]
1029 # search occurence of string file in pd column file
1030# if df.get("")
1032 find_occurence = df[file_column] == file
1033 if len(find_occurence) == 0:
1034 print("Indexing one file : " + file)
1035 size = os.stat(file).st_size
1036 created_at_float = os.stat(file).st_birthtime
1037 modified_at_float = os.stat(file).st_mtime
1038 created_at_as_string = datetime.datetime.fromtimestamp(created_at_float).strftime('%Y-%m-%d %H:%M:%S')
1039 modified_at_as_string = datetime.datetime.fromtimestamp(modified_at_float).strftime('%Y-%m-%d %H:%M:%S')
1040 df.loc[len(df)] = [file, folder, created_at_as_string, modified_at_as_string, "", now_as_string, now_as_string, size]
1041 elif len(find_occurence) > 1:
1042 print("ERROR : internal inconsistency error : multiple occurence")
1043 else :
1044 iloc_file = df.index[find_occurence]
1045# input_data = df[input_column_name[0]][first_missing_output]
1047 pass
1049from abc import abstractmethod
1051class Cdn():
1052 @abstractmethod
1053 def get_file(self, file_id):
1054 pass
1056# faut une option onedrive ?
1058class CdnLocal(Cdn):
1059 def __init__(self, root_dir = "temp",
1060 lcs = None,
1061 lib_data_internal = None,
1062 project_id = None):
1063 self.root_dir = root_dir
1064 if lcs != None:
1065 root_dir_conf = lcs.get_root_dir_local_cdn()
1066 if root_dir_conf != None:
1067 self.root_dir = root_dir_conf
1068 self.lib_data_internal = lib_data_internal
1069 self.project_id = project_id
1071 # set project name
1072 # set date
1073 # la convention de localisation relative des pages doit etre implémenté en javascript aussi, ou bien comment fais t'on ?
1075 def store_file_get_path(self, temp_file):
1076 import shutil
1077 from auth.lib_auth import create_id
1078 hash_id = create_id()
1079 import datetime
1080 folder_as_YMD = datetime.datetime.now().strftime('%Y/%m/%d')
1081 # get extension of the file temp_file
1082 if os.path.splitext(temp_file)[1] == "":
1083 ext = "dat"
1084 else:
1085 ext = os.path.splitext(temp_file)[1]
1086 basename = os.path.basename(temp_file)
1087 path_file_local_cdn = os.path.join(self.root_dir, str(self.project_id), folder_as_YMD, hash_id + "_" + basename + "." + ext)
1088 shutil.move(temp_file, path_file_local_cdn)
1089 return path_file_local_cdn
1091def get_class_quali_from_worst_prediag(worst_prediag):
1092 # 3 classes : BON // LE RESTE // MANUSCRIT OU TABLEAUX
1093 if worst_prediag in ["OK", "BON"]:
1094 return "PRICE_LOW"
1095 elif worst_prediag in ["TABLEAUX", "MANUSCRIT", "MAUVAIS"]:
1096 return "PRICE_HIGH"
1097 else : # if worst_prediag in ["PRESQUEBON", "MISSING"]:
1098 return "PRICE_MEDIUM"
1100def get_worst_page_prediag(list_of_page, map_id_page_prediag):
1101# import time
1102# time.sleep(1)
1103 worst_prediag = "INIT"
1104 complete_prediag = ""
1105 if len(list_of_page) == 0:
1106 return worst_prediag, complete_prediag
1108 if len(list_of_page) == 1 and list_of_page[0] in map_id_page_prediag:
1109 return map_id_page_prediag[list_of_page[0]], map_id_page_prediag[list_of_page[0]]
1111 # OK,BON => PRESQUEBON => {ALL} => MISSING => MANUSCRIT => MAUVAIS
1112 for page in list_of_page:
1113 if str(page) in map_id_page_prediag:
1114 prediag = map_id_page_prediag[str(page)]
1115 if prediag == "MISSING":
1116 continue
1117 if complete_prediag != "":
1118 complete_prediag += ","
1119 complete_prediag += prediag
1121 if worst_prediag == "INIT":
1122 worst_prediag = prediag
1123 elif worst_prediag in ["OK", "BON"] and prediag not in ["OK", "BON"]:
1124 worst_prediag = prediag
1125 elif prediag == "PRESQUEBON" and worst_prediag in ["OK", "BON"]:
1126 worst_prediag = "PRESQUEBON"
1127 elif worst_prediag in ["OK", "BON", "PRESQUEBON"] and prediag not in ["OK", "BON", "PRESQUEBON"]:
1128 worst_prediag = prediag
1129 elif prediag == "MISSING" and worst_prediag not in ["MANUSCRIT", "MAUVAIS"]:
1130 worst_prediag = "MISSING"
1131 elif prediag == "MANUSCRIT" and worst_prediag != "MAUVAIS":
1132 worst_prediag = "MANUSCRIT"
1133 elif prediag == "MAUVAIS":
1134 worst_prediag = "MAUVAIS"
1136 return worst_prediag, complete_prediag
1138def get_nb_modif_nb_word_change_or_not_per_doc(info_consolidate_raw, map_id_page_prediag):
1139 map_count_modif_per_doc = info_consolidate_raw["map_count_modif_per_doc"] if "map_count_modif_per_doc" in info_consolidate_raw else {}
1140 map_nb_word_per_doc = info_consolidate_raw["audit_info_write"]["map_nb_word_per_doc"] if "audit_info_write" in info_consolidate_raw and "map_nb_word_per_doc" in info_consolidate_raw["audit_info_write"] else {}
1141 map_type_document_per_doc = info_consolidate_raw["audit_info_write"]["map_type_document_per_doc"] if "audit_info_write" in info_consolidate_raw and "map_type_document_per_doc" in info_consolidate_raw["audit_info_write"] else {}
1142 list_of_pages_as_sccsv = info_consolidate_raw["audit_info_write"]["list_of_pages_as_sccsv"] if "audit_info_write" in info_consolidate_raw and "list_of_pages_as_sccsv" in info_consolidate_raw["audit_info_write"] else ""
1144 from lib.lib_util import from_list_page_per_doc_ccsv_to_list_of_list_of_page
1145 list_of_list_of_page, nb_page, max_page = from_list_page_per_doc_ccsv_to_list_of_list_of_page(list_of_pages_as_sccsv)
1147 list_doc_nb_modif_nb_word = []
1148 for id_doc in map_type_document_per_doc:
1149 id_doc_int = int(id_doc)
1150 doc_type = map_type_document_per_doc[id_doc]
1151 if doc_type == "":
1152 continue
1153 nb_word = map_nb_word_per_doc[id_doc] if id_doc in map_nb_word_per_doc else 0
1154 nb_modif = map_count_modif_per_doc[id_doc] if id_doc in map_count_modif_per_doc else 0
1155 corr_type_manual = "map_modif_type_document" in info_consolidate_raw and id_doc in info_consolidate_raw["map_modif_type_document"]
1158 if list_of_list_of_page != None and len(list_of_list_of_page) > id_doc_int:
1159 list_of_page = list_of_list_of_page[id_doc_int]
1160 worst_prediag, complete_prediag = get_worst_page_prediag(list_of_page, map_id_page_prediag)
1161 else :
1162 worst_prediag = "MISSING"
1163 complete_prediag = "MISSING"
1165 one_doc = {"doc_type" : doc_type, "nb_word" : nb_word, "nb_modif" : nb_modif, "corr_type_manual" : corr_type_manual, "worst_prediag" : worst_prediag, "complete_prediag" : complete_prediag} # "id_doc" : id_doc_int,
1167 list_doc_nb_modif_nb_word.append(one_doc)
1169 return list_doc_nb_modif_nb_word
1171def assoc_doc_type_int(list_doc_type):
1172 map_doc_type_int = {}
1173 for i in range(len(list_doc_type)):
1174 map_doc_type_int[list_doc_type[i]] = i
1175 return map_doc_type_int
1177def print_nb_word_nb_modif_type_doc(list_doc_nb_modif_nb_word, map_doc_type_int):
1178 print("# nb_word nb_modif doc_type")
1179 for doc in list_doc_nb_modif_nb_word:
1180 print(str(doc["nb_word"]) + " " + str(doc["nb_modif"]) + " " + str(map_doc_type_int[doc["doc_type"]]))
1182taux_tva = 20
1184def reset_saxia_fact_after_including(month_start = None, year_start = None,
1185 # month_end = None, year_end = None,
1186 lpgss = None, root_folder_fact = "temp"):
1187 # Find previous month
1188 # How to get list of month ?
1189# mtr = > select
1190# id, month, TO_DATE(month, 'TMmonth YYYY')
1191# from mtruser.mtr_factures;
1192# id | month | to_date
1193#----+---------------+------------
1194#52 | janvier
1195#2025 | 2025 - 01 - 01
1196#53 | janvier
1197#2025 | 2025 - 01 - 01
1199 from datetime import date
1200 date_start = date(int(year_start), int(month_start), 1)
1202 map_validated_nb, map_months = lpgss.get_bill_validate(date_start)
1203 if map_validated_nb[True] != 0:
1204 print("Can't regenerate since bills where already validated")
1205 return
1206 nb_to_delete = map_validated_nb[False]
1207 min_bill_id = lpgss.get_min_bill_id_after_or_equal_date(date_start)
1208 if min_bill_id != None and type(min_bill_id) == int and min_bill_id > 0:
1210 autocommit_prev_val = lpgss.client.autocommit
1211 try:
1212 lpgss.client.autocommit = False
1213 ret_reset = lpgss.reset_facture(min_bill_id, nb_to_delete)
1214 except Exception as e:
1215 print("Error in resetting the bill")
1216 print(str(e))
1217 lpgss.client.autocommit = autocommit_prev_val
1218 return False, None
1219 lpgss.client.autocommit = autocommit_prev_val
1220 if ret_reset == False:
1221 print("Error in resetting the bill")
1222 return False, None
1224# Get All files
1225 # Delete Files
1226 list_month_csv = map_months[False]
1227 if list_month_csv != "":
1228 list_month = list_month_csv.split(",")
1229 if len(list_month) > 1:
1230 print(" Many months to delete : " + str(list_month) + " only the first one deleted will recreated, so job should run !")
1231 for month in list_month:
1232 month_subfolder_suffix = month.lower().replace(" ", "_")
1233 subfolder_facture = "facture_" + month_subfolder_suffix
1234 folder_facture_www = os.path.join(root_folder_fact, subfolder_facture)
1235 folder_facture_internal_server = folder_facture_www.lstrip("/")
1236 import shutil
1237 ret = shutil.rmtree(folder_facture_internal_server, ignore_errors=True)
1238 print(" ret : " + str(ret))
1239 else :
1240 print("Unexpected something inconsistent, TODO set test before and cancel this reset")
1241 subfolder_facture = "mois_en_lettre_sans_accent_year_en_chiffre"
1243 # Delete all lines in the table
1244 print("TODO")
1245 return True, min_bill_id
1247def generate_saxia_fact(id_fact = 0, month = None, year = None,
1248 type_fact = "debug",
1249 project_id = None,
1250 root_folder_fact = "temp",
1251 only_correct_file = False,
1252 verbose = False):
1253 from server.safia import lpgss_singleton
1255 # TODO VR 21/4/25 A choper de la configuration de l'app !
1256 mtr_datou_id_fact = 40
1258 import locale
1259# locale.setlocale(locale.LC_ALL, 'fr_FR.utf8')
1260 locale.setlocale(locale.LC_ALL, 'fr_FR.UTF-8')
1262 user = None
1263 import datetime
1264 # calculer aujourd'hui moins un mois et récupérer le mois et l'année
1265 today = datetime.datetime.now()
1266 # enlever un mois à aujourd'hui
1267# last_month_date = today - datetime.timedelta(month=1)
1269 from dateutil.relativedelta import relativedelta
1270 last_month_date = today + relativedelta(months=-1)
1271 two_month_before_present = today + relativedelta(months=-2)
1273 # While debugging !
1275 last_month = 8
1276 year_last_month = 2024
1277 mois_last_month_string = "Aout"
1279 last_month = 10
1280 year_last_month = 2024
1281 mois_last_month_string = "Octobre"
1283 last_month = 11
1284 year_last_month = 2024
1285 mois_last_month_string = "Novembre"
1287 if type_fact == "deprecated_no_db": # TODO Et quid du comportement qui génère le mois précédent à partir de now sans enregistrer dans la DB, veut-on le garder, gasp grrr plop => JE NE SAIS PAS !
1288 if month == None or year == None or month == "" or year == "":
1289 mois_last_month_string = last_month_date.strftime("%B")
1290 mois_last_month_string = mois_last_month_string[0].upper() + mois_last_month_string[1:]
1291 last_month = last_month_date.month
1292 year_last_month = last_month_date.year
1293 else:
1294 last_month = month
1295 year_last_month = year
1297 # We check the consistency of the generation with voilà plop !
1298 res_max_id, res_last_value, res_month_argmax = lpgss_singleton.get_last_fact_id_month()
1299 inconsistent_data_last_id_stop = False
1300 if res_max_id != res_last_value:
1301 print("Inconsistency in the generation of the fact : " + str(res_max_id) + " != " + str(res_last_value))
1302 inconsistent_data_last_id_stop = True
1303 return
1304 else:
1305 if id_fact == 0 or id_fact == None:
1306 id_fact = res_max_id + 1
1307# import dateparser
1308 import datetime
1309 # res_month_argmax = last_month_date.strftime("%B %Y")
1310 try :
1311 datetime_last_info = datetime.datetime.strptime(res_month_argmax.lower(), "%B %Y")# dateparser.parse(month_year, settings={'DATE_ORDER': 'DMY'})
1312 except Exception as e:
1313 print("What to do with : res_month_argmax")
1314 print(str(e))
1315 print("Padam plop !")
1316 print("ERROR EXITING")
1317 exit(1)
1319 print(" datetime_last_info : " + str(datetime_last_info))
1321 if type_fact == "force":
1322 print("We don't care about check !")
1324 month_to_treat = datetime_last_info + relativedelta(months=1)
1325 last_month = month_to_treat.month
1326 year_last_month = month_to_treat.year
1328 if type_fact == "redo":
1329 if not only_correct_file:
1330 success_reset, min_bill_id = reset_saxia_fact_after_including(month_start=month, year_start=year,
1331 lpgss=lpgss_singleton,
1332 root_folder_fact=root_folder_fact)
1333 else:
1334 min_bill_id = None
1335 success_reset = True
1336 if not success_reset:
1337 print("Error in resetting the bill, nothing should have been done !")
1338 return
1339 last_month = month
1340 year_last_month = year
1341 month_to_treat = datetime.datetime(int(year_last_month), int(last_month), 1)
1342 if min_bill_id != None:
1343 id_fact = min_bill_id # - 1 # TODO VR not sure
1344 elif ((datetime_last_info.month == last_month_date.month
1345 and datetime_last_info.year == month_to_treat.year) or
1346 (datetime_last_info.month == two_month_before_present.month
1347 and datetime_last_info.year == two_month_before_present.year)) and not inconsistent_data_last_id_stop:
1348 print("We just generate the next month data")
1349 elif type_fact == "force":
1350 print("Force neede since fact done with too much delay")
1351 else:
1352 print("Nothing done since we need to make a lot of stuff and check !")
1353 return
1355 list_project_id = []
1356 if project_id != None and project_id != "" and project_id != 0:
1357 list_project_id = [project_id]
1358 # VR TODO rename last_month and year_last_month in month_used
1359 all_results = lpgss_singleton.get_treatment_done(last_month, year_last_month, project_id=list_project_id)
1361 if only_correct_file:
1362 print("More protection (third ? )")
1363# lpgss_singleton = None
1365 # VR TODO : we need the last month data of course !
1366 subfolder_facture = "facture_" + month_to_treat.strftime("%B_%Y")
1367 mois_last_month_string = month_to_treat.strftime("%B")
1368 if mois_last_month_string != "":
1369 mois_last_month_string = mois_last_month_string[0].upper() + mois_last_month_string[1:]
1370 month_string_for_data = month_to_treat.strftime("%B %Y")
1371 if len(month_string_for_data) > 0:
1372 month_string_for_data = month_string_for_data[0].upper() + month_string_for_data[1:]
1373 else:
1374 print("Unexpected behavior : month_string_for_data is empty !")
1375 print(" subfolder_facture : " + str(subfolder_facture))
1376 from unidecode import unidecode
1377 subfolder_facture = unidecode(subfolder_facture)
1378 print(" subfolder_facture : " + str(subfolder_facture))
1380# folder_facture = os.path.join(root_folder_fact, "factures", subfolder_facture)
1381 folder_facture = os.path.join(root_folder_fact, subfolder_facture)
1382 if not os.path.exists(folder_facture.lstrip("/")):
1383 os.makedirs(folder_facture.lstrip("/"))
1385 map_pid_results = {}
1386 count_pid_nb_page = {}
1387 for data in all_results:
1388 pid = data["project_id"]
1389 id_file = data["id_file"]
1390 if pid not in map_pid_results:
1391 map_pid_results[pid] = []
1392 count_pid_nb_page[pid] = 0
1393 map_pid_results[pid].append(data)
1394 if data["nb_page"] != None:
1395 count_pid_nb_page[pid] += data["nb_page"]
1397 print(count_pid_nb_page)
1399 map_list_nb_modif_per_pid = {}
1400 map_list_folder_to_fact_per_pid = {}
1402 list_doc_nb_modif_nb_word_acc = []
1404 map_prediag_id_file_page_ccsv = {}
1405 for pid in map_pid_results:
1406 list_doc_nb_modif_nb_word_one_pid = []
1407 list_folder_to_fact = []
1408 for data in map_pid_results[pid]:
1410 id_file = None
1411 from lib.sandbox.migration.append_prediag_df_complet_as_json import build_map_from_prediag_id_page_c_csv
1412 data_prediag = data["info_date"]["prediag_csv"] if "info_date" in data and "prediag_csv" in data["info_date"] else ""
1413 if ":" in data_prediag:
1414 map_id_page_prediag = build_map_from_prediag_id_page_c_csv(data_prediag)
1415 elif data_prediag == "":
1416 map_id_page_prediag = {}
1417 else :
1418 map_id_page_prediag = {str(i) : data_prediag.split(",")[i] for i in range(len(data_prediag.split(",")))}
1420 if map_id_page_prediag == {}:
1421 print("Missing prediag for pid : " + str(pid) + " id_file : " + str(data["id_file"]) + " data_prediag : " + str(data_prediag) + " hit : " + data["hash_id_treatment"])
1422# continue
1423 else :
1424 if "id_file" in data:
1425 id_file = str(data["id_file"])
1426 print("Present prediag for " + str(id_file) + " : " + str(data["id_file"]))
1427 else :
1428 print("Wrong")
1430 if "info_consolidate" not in data or "audit_info_write" not in data["info_consolidate"]:
1431 print("Missing audit_info_write for pid : " + str(pid) + " id_file : " + str(data["id_file"]) + " data_prediag : " + str(data_prediag) + " hit : " + data["hash_id_treatment"])
1432# continue
1434 for id_page in map_id_page_prediag:
1435 id_page = str(id_page)
1436 id_file_id_page_ccsv = id_file + ":" + id_page
1437 prediag = map_id_page_prediag[id_page]
1438 if prediag not in map_prediag_id_file_page_ccsv:
1439 map_prediag_id_file_page_ccsv[prediag] = []
1440 if id_file_id_page_ccsv not in map_prediag_id_file_page_ccsv[prediag]:
1441 map_prediag_id_file_page_ccsv[prediag].append(id_file_id_page_ccsv)
1443 list_doc_nb_modif_nb_word = get_nb_modif_nb_word_change_or_not_per_doc(data["info_consolidate"], map_id_page_prediag)
1444 list_doc_nb_modif_nb_word_acc.extend(list_doc_nb_modif_nb_word)
1445 list_doc_nb_modif_nb_word_one_pid.extend(list_doc_nb_modif_nb_word)
1447 nb_modif = data["info_consolidate"]["nb_modif"] if "info_consolidate" in data and "nb_modif" in data["info_consolidate"] else 0
1448 input_file_at = data["info_date"]["input_file_available_at"] if "info_date" in data and "input_file_available_at" in data["info_date"] else None
1449 output_file_at = data["info_consolidate"]["consolidate_at"] if "info_consolidate" in data and "consolidate_at" in data["info_consolidate"] else None
1450 time_treatmeant = None
1451 import dateparser
1452 if output_file_at != None and input_file_at != None:
1453 try:
1454 in_date = dateparser.parse(input_file_at) #, settings={'DATE_ORDER': 'DMY'})
1455 out_date = dateparser.parse(output_file_at) #, settings={'DATE_ORDER': 'DMY'})
1456 time_treatmeant = out_date - in_date
1457 print(" time_treatmeant : " + str(time_treatmeant))
1458 time_treatmeant = int(time_treatmeant.total_seconds() / 3600)
1459 except Exception as e:
1460 print(str(e))
1462 nb_page = data["nb_page"] if "nb_page" in data else 0
1463 worst_prediag_doc, complete_prediag = get_worst_page_prediag(list(range(0, nb_page)), map_id_page_prediag)
1464 list_folder_to_fact.append({"nb_page" : data["nb_page"], "id_file" : data["id_file"], "nb_modif" : nb_modif, "time_treatmeant" : time_treatmeant, "map_id_page_prediag" : map_id_page_prediag, "worst_prediag" : worst_prediag_doc})
1466 map_list_nb_modif_per_pid[pid] = list_doc_nb_modif_nb_word_one_pid
1467 map_list_folder_to_fact_per_pid[pid] = list_folder_to_fact
1469 print("""
1471 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
1473 map_prediag_id_file_page_ccsv :
1475 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
1477 """)
1478 print(str(map_prediag_id_file_page_ccsv))
1479 print("""
1481 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
1483 """)
1485 print(map_list_folder_to_fact_per_pid)
1487 print(map_list_nb_modif_per_pid)
1489 list_prediag_class = []
1490 for doc in list_doc_nb_modif_nb_word_acc:
1491 if "worst_prediag" not in doc:
1492 print("ERROR : worst_prediag not found in doc : " + str(doc))
1493 continue
1494 if doc["worst_prediag"] not in list_prediag_class:
1495 list_prediag_class.append(doc["worst_prediag"])
1497 map_prediag_class_int = assoc_doc_type_int(list_prediag_class)
1498 print(" map_prediag_class_int : ")
1499 print(str(map_prediag_class_int))
1501 list_doc_type = []
1502 for doc in list_doc_nb_modif_nb_word_acc:
1503 if doc["doc_type"] not in list_doc_type:
1504 list_doc_type.append(doc["doc_type"])
1506 map_doc_type_int = assoc_doc_type_int(list_doc_type)
1507 print(" map_doc_type_int : ")
1508 print(str(map_doc_type_int))
1509 for pid in map_list_nb_modif_per_pid:
1510 print_nb_word_nb_modif_type_doc(map_list_nb_modif_per_pid[pid], map_doc_type_int)
1512 print(" AND ALL ")
1514 print_nb_word_nb_modif_type_doc(list_doc_nb_modif_nb_word_acc, map_doc_type_int)
1516 unit_price = 0.5
1517 unit_price_default = 0.5
1518 unit_price_high = 0.8
1519 unit_price_low = 0.4
1520 map_class_price_unit_price = {"PRICE_LOW" : unit_price_low,
1521 "PRICE_MEDIUM" : unit_price,
1522 "PRICE_HIGH" : unit_price_high}
1523 map_class_price_unit_price = {}
1525 import numpy as np
1526 for pid in map_list_folder_to_fact_per_pid:
1527 if only_correct_file:
1528 print("We should only modify the input pid")
1529 print(f" pid : {pid} should be the one given as argument {project_id}")
1530 if project_id != pid:
1531 continue
1533 data_count = {
1534 "document_type" : map_doc_type_int.keys(),
1535 "count" : len(map_doc_type_int) * [0]
1536 }
1537 data_count_class = {
1538 "prediag_class" : map_prediag_class_int.keys(),
1539 "count" : len(map_prediag_class_int) * [0]
1540 }
1541 # TO BE USED
1542 # Tableau de comptage des types de documents croisé avec les classe de prediagnostique
1543 np_array_count_class_type = np.zeros((len(map_prediag_class_int), len(map_doc_type_int)), dtype=int)
1544 for data in map_list_nb_modif_per_pid[pid]:
1545 data_count["count"][map_doc_type_int[data["doc_type"]]] += 1
1546 data_count_class["count"][map_prediag_class_int[data["worst_prediag"]]] += 1
1547 np_array_count_class_type[map_prediag_class_int[data["worst_prediag"]],map_doc_type_int[data["doc_type"]]] += 1
1549 data_folder_price = {
1550 'Dossier': [],
1551 'Nb de Page': [],
1552 'Prix HT\npar page': [],
1553 'Prix total HT': [],
1554 'TVA': [],
1555 'Prix total TTC': [],
1556 'Temps (h)\ntraitement': [],
1557 }
1558 print("pid : " + str(pid))
1559 sum_total = 0
1560 time_treatmeant_total = 0
1561 for data in map_list_folder_to_fact_per_pid[pid]:
1562 unit_price = unit_price_default
1563 worst_prediag = data["worst_prediag"] if "worst_prediag" in data else "MISSING"
1564 price_class = get_class_quali_from_worst_prediag(worst_prediag)
1565 unit_price = map_class_price_unit_price.get(price_class, unit_price_default)
1566 print(data)
1567 data_folder_price["Dossier"].append(data["id_file"])
1568 data_folder_price["Nb de Page"].append(data["nb_page"])
1569 data_folder_price["Prix HT\npar page"].append(str(round(unit_price, 2)))
1570 prix_total_ht = round(unit_price * data["nb_page"], 2)
1571 data_folder_price["Prix total HT"].append(str(prix_total_ht))
1572 prix_total_ttc = round(prix_total_ht * (1. + float(taux_tva) / 100.), 2)
1573 data_folder_price["TVA"].append("{:.2f}".format(prix_total_ttc - prix_total_ht))
1574 data_folder_price["Prix total TTC"].append(str(prix_total_ttc))
1575 sum_total += unit_price * data["nb_page"] * (1. + float(taux_tva) / 100.)
1577 time_treatmeant = data["time_treatmeant"]
1578 if time_treatmeant != -1 and time_treatmeant != None:
1579 time_treatmeant_total += time_treatmeant
1580 else:
1581 print("ERROR : time_treatmeant is -1")
1582 data_folder_price["Temps (h)\ntraitement"].append(str(time_treatmeant))
1584 sum_total = round(sum_total, 2)
1586 sum_total_wo_tva = sum_total / (1. + float(taux_tva) / 100.)
1587 sum_total_wo_tva = round(sum_total_wo_tva, 2)
1588 nb_page_total = int(sum_total_wo_tva / unit_price)
1590 data_folder_price["Dossier"].append("Total " + str(mois_last_month_string) + " " + str(year_last_month) + " en EUROS ")
1591 data_folder_price["Nb de Page"].append(nb_page_total) #str(nb_page_total))
1592 data_folder_price["Prix HT\npar page"].append(str(unit_price))
1593 data_folder_price["Prix total HT"].append(str(sum_total_wo_tva))
1594 data_folder_price["TVA"].append("{:.2f}".format(sum_total - sum_total_wo_tva))
1595 data_folder_price["Prix total TTC"].append(str(sum_total) + " € TTC")
1596 temps_traitement_moyen = -1
1597 if len(map_list_folder_to_fact_per_pid[pid]) > 0:
1598 temps_traitement_moyen = time_treatmeant_total / len(map_list_folder_to_fact_per_pid[pid])
1599 data_folder_price["Temps (h)\ntraitement"].append(str(round(temps_traitement_moyen, 1)) + "(moyenne)")
1601 # data = {
1602 # 'Dossier': ['Item A', 'Item B', 'Item C'],
1603 # 'Nb de Page': [4, 5, 2],
1604 # 'Prix par page': [7.5, 8.0, 9.5],
1605 # 'Prix total': [30.0, 40.0, 19.0]
1606 # }
1607 df = pd.DataFrame(data_folder_price)
1608 df_data_count = pd.DataFrame(data_count)
1609 df_data_class = pd.DataFrame(data_count_class)
1611 info_project = lpgss_singleton.get_project_info(pid)
1612 owner_id = info_project["owner_id"]
1613 map_id_email = lpgss_singleton.get_email_from_user_ids([owner_id])
1614 info_user = lpgss_singleton.select_user(owner_id)
1616 if only_correct_file:
1617 print("Fourth protection to be downgraded as third")
1618 print("We should only generate the pdf for the input pid and nnot modify the DB")
1619 lpgss_singleton = None
1621 siret = info_user["miscinfo"]["siret"] if "miscinfo" in info_user and info_user["miscinfo"] != None and "siret" in info_user["miscinfo"] else ""
1622 adresse = info_user["miscinfo"]["adresse"] if "miscinfo" in info_user and info_user["miscinfo"] != None and "adresse" in info_user["miscinfo"] else ""
1623 email = info_user["email"] if "email" in info_user and info_user["email"] != None else ""
1624 first_name = info_user["firstname"] if "firstname" in info_user and info_user["firstname"] != None else ""
1625 last_name = info_user["lastname"] if "lastname" in info_user and info_user["lastname"] != None else ""
1626 found_user = False
1627 email_client_expert = "email@expert"
1628 for data in map_id_email:
1629 data_dict = dict(data)
1630 if "id" in data_dict and owner_id == data_dict["id"]:
1631 found_user = True
1632 email_client_expert = data_dict["mail"]
1633 break
1634 name = f"""Docteur {first_name} {last_name}"""
1635 print(" TODO add name ")
1636 if not found_user:
1637 email_client_expert = "anonymous@user"
1638 print(" INTERNAL ERROR GENERATING id_fact " + str(id_fact) + " for pid " + str(pid) + " owner_id " + str(owner_id) + " email_client_expert " + email_client_expert + " and project_id : " + str(pid) + " sum_total : " + str(sum_total) + " df_data_count : " + str(df_data_count) + " id_fact : " + str(id_fact) + " email_client_expert : " + email_client_expert + " project_id : " + str(pid))
1640 pdf_path = create_pdf(datas = df,
1641 sum_total = sum_total,
1642 df_data_count = df_data_count,
1643 id_fact = id_fact,
1644 email_client_expert = email_client_expert,
1645 name = name,
1646 adresse=adresse,
1647 siret=siret,
1648 project_id = pid,
1649 sub_folder = folder_facture,
1650 df_data_class = df_data_class,
1651 np_array_count_class_type = np_array_count_class_type)
1653 if only_correct_file:
1654 print ("We should only generate the pdf for the input pid and nnot modify the DB")
1655 exit(2)
1657 if lpgss_singleton != None:
1658 print("Record please !")
1659 query_id = lpgss_singleton.insert_new_bill(id_fact, pdf_path, pid, month_string_for_data, sum_total_wo_tva, sum_total, nb_page_total)
1660 if query_id != id_fact:
1661 print("ERROR ERROR id fact We should cancel all !")
1663 id_fact = id_fact + 1
1666# A supprimer ou utiliser !
1667en_tete_gauche = """
1668SAS FOTONOWER FRANCE
166930 Rue CHARLOT
167075003 PARIS-3E-ARRONDISSEMENT
1671FRANCE
1672compta@fotonower.com
1673https://www.fotonower.com/
1674N° TVA Intracommunautaire : FR80804468197
1675N° SIRET : 80446819700012
1676"""
1678info_banque = """
1679Coordonnées bancaires :
1680IBAN : FR76 1695 8000 0165 7288 7296 040
1681BIC/SWIFT : QNTOFRP1XXX
1682"""
1684info_banque = """
1685Coordonnées bancaires :
1686IBAN : FR76 1287 9000 0111 2329 2200 123
1687BIC/SWIFT : DELUFR22XXX
1688"""
1690info_banque = """
1691Coordonnées bancaires :
1692IBAN : FR76 3000 3024 2400 1500 3117 325
1693BIC/SWIFT : SOGEFRPP
1694"""
1696info_banque_lines = info_banque.strip().split("\n")
1698info_legal = info_banque
1702condition_paiement_1 = """
1703Conditions de paiement :
1704• 100 % soit """
1706condition_paiement_2 = """ € TTC à payer le :
1707"""
1709condition_paiement_3 = """ (à réception).
1710"""
1714def create_pdf(datas = None,
1715 sum_total = 0,
1716 df_data_count = None,
1717 id_fact = 0,
1718 email_client_expert = "email@expert",
1719 name = "Docteur Expert",
1720 adresse = "30 Rue Charlot",
1721 siret = "FR56573",
1722 project_id = -1,
1723 sub_folder = "temp",
1724 df_data_class = None,
1725 np_array_count_class_type = None):
1726 suffix = "_p_" + str(project_id) + "_id_" + str(id_fact)
1728 en_tete_droite = email_client_expert + """
1729 project_id : """ + str(project_id)
1731 if len(datas) == 0:
1732 print("On ne génére pas de facture à 0")
1733 return
1735 customer_info = [
1736 "Adresse de facturation :",
1737 name,
1738 adresse,
1739 "SIRET : " + siret,
1740 email_client_expert,
1741 "project_id=" + str(project_id)
1742 ]
1744 pdf_file = 'facture_saxia_' + suffix + '.pdf'
1746 table_data = []
1747 table_data.append(list(datas.columns))
1748 for index, row in datas.iterrows():
1749 table_data.append(list(row))
1751 if float(table_data[-1][3]) >= 500 or project_id in [327]:
1752 print("TRIGGER FORFAIT 500")
1753 table_data[-1][2] = "FORFAIT500"
1754 table_data[-1][3] = "500"
1755 table_data[-1][4] = "100"
1756 table_data[-1][5] = "600 € TTC"
1758 pdf_path = os.path.join(sub_folder, pdf_file)
1759 create_first_page_fact(id_fact = id_fact, pdf_file=pdf_path.strip("/"), customer_info=customer_info, table_data=table_data)
1760# pdf_path = pdf_path.strip("/")
1764 import matplotlib.pyplot as plt
1765 from matplotlib.backends.backend_pdf import PdfPages
1766 from datetime import datetime
1768 import matplotlib
1769 matplotlib.use('agg')
1771 pdf_file_graph = 'facture_saxia_' + suffix + '_graph.pdf'
1772 pdf_path_graph = os.path.join(sub_folder, pdf_file_graph)
1774 pdf_path_graph = pdf_path_graph.strip("/")
1775# os.path.copy(pdf_path, pdf_path_graph)
1777 try:
1778 # Création d'un fichier PDF
1779 with PdfPages(pdf_path_graph) as pdf:
1780 plt.clf()
1781 # plt.close()
1783 fontsize = 4
1785# plt.rcParams.update({'font.size': fontsize})
1786# plt.rc('font', size=fontsize) # controls default text size
1787# plt.rc('axes', titlesize=fontsize) # fontsize of the title
1788# plt.rc('axes', labelsize=fontsize) # fontsize of the x and y labels
1792 # Sans doute le seul utile ! => mais en dessous ca marche aussi !
1793# plt.rc('font', size=fontsize)
1794 plt.rc('legend', fontsize=4*fontsize) # fontsize of the legend
1796 plt.rc('xtick', labelsize=fontsize) # fontsize of the x tick labels
1797 plt.rc('ytick', labelsize=fontsize) # fontsize of the y tick labels
1799 # Plot des graphiques (exemples simples ici)
1800 plt.subplot(2, 2, 3)
1801 datas['Nb de Page'][:-1].plot(kind='bar', legend=False)
1802 plt.title('Nombre de pages\npar dossiers', size=5*fontsize, fontweight="bold")
1804 index_null = df_data_count["count"] == 0
1805 # df_data_count["document_type"][index_null] = ""
1806 if all(index_null):
1807 df_data_count["document_type"] = ""
1808 else:
1809 # we just upade the null row for count
1810 df_data_count.loc[index_null, "document_type"] = ""
1814 try :
1815 plt.subplot(2, 2, 2)
1816 plt.imshow(np_array_count_class_type, cmap='hot', interpolation='nearest')
1817 plt.title('Répartition des prédiagnostics\npar type de document')
1818 import numpy as np
1819 plt.xticks(ticks=np.arange(len(list(df_data_count["document_type"]))), labels=list(df_data_count["document_type"]), rotation=45)
1820 plt.yticks(ticks=np.arange(len(list(df_data_class["prediag_class"]))), labels=list(df_data_class["prediag_class"]))
1821 for i in range(len(list(df_data_class["prediag_class"]))):
1822 for j in range(len(list(df_data_count["document_type"]))):
1823 text = plt.text(j, i, np_array_count_class_type[i, j],
1824 ha="center", va="center", color="pink", fontsize=2)
1826 except Exception as e:
1827 print("Error in heatmap chart : " + str(e))
1829# Si on veut deux pages
1830# plt.savefig(pdf, format='pdf')
1831# plt.clf()
1835 try:
1836 import numpy as np
1837 if np.sum(df_data_count["count"]) > 0:
1838 plt.subplot(2, 2, 1)
1840 plt.pie(df_data_count["count"], labels=(df_data_count["document_type"] + " " + df_data_count["count"].apply(str)))
1841 plt.title('Répartition des types\nde documents')
1842 else:
1843 plt.subplot(2, 2, 1)
1844 plt.text(0.5, 0.5, 'Aucune informations sur les documents', horizontalalignment='center', verticalalignment='center')
1845 plt.title('Répartition des types de documents')
1846 except Exception as e:
1847 print("Error in pie chart doc_type : " + str(e))
1849# plt.subplots_adjust(wspace=0.2)
1851 try:
1852 import numpy as np
1853 if np.sum(df_data_count["count"]) > 0:
1854 plt.subplot(2, 2, 4)
1855 plt.pie(df_data_class["count"], labels=(df_data_class["prediag_class"] + " " + df_data_class["count"].apply(str)))
1856 plt.title('Répartition des prédiagnostics')
1857 else:
1858 plt.subplot(2, 2, 4)
1859 plt.text(0.5, 0.5, 'Aucune informations sur les prédiagnostics', horizontalalignment='center', verticalalignment='center')
1860 plt.title('Répartition des prédiagnostics')
1861 except Exception as e:
1862 print("Error in pie chart prediag : " + str(e))
1868 # datas['Prix total'].plot(kind='bar', color='green')
1869 # plt.title('Prix total')
1871 plt.subplots_adjust(wspace=0.2, hspace=0.4, left=0.1, right=0.9, top=0.9, bottom=0.1)
1873 # Ajustement de l'espace entre les sous-graphiques
1874# plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
1876 # plt.tight_layout()
1877 plt.savefig(pdf, format='pdf')
1879 # C'est équivalent à cela : mais cela ne marche pas dans un pdf existant, en tout cas pas la !
1880# pdf.savefig(plt.gcf())
1882 plt.close()
1883 except Exception as e:
1884 print("Error building graphs for invoice:")
1885 print(str(e))
1888 # Merge both
1889 if True:
1890 try :
1891 if os.path.exists(pdf_path.strip("/")) and os.path.exists(pdf_path_graph):
1892 from PyPDF2 import PdfReader, PdfWriter
1894 # Create a PDF writer object
1895 pdf_writer = PdfWriter()
1897 # Read the first page (invoice details)
1898 with open(pdf_path.strip("/"), 'rb') as f:
1899 pdf_reader = PdfReader(f)
1900 for page in pdf_reader.pages:
1901 pdf_writer.add_page(page)
1903 # Read the second page (graphs)
1904 with open(pdf_path_graph, 'rb') as f:
1905 pdf_reader = PdfReader(f)
1906 for page in pdf_reader.pages:
1907 pdf_writer.add_page(page)
1909 print(" WRITE in " + str(pdf_path.strip("/")))
1911 # Write to a new PDF file
1912 with open(pdf_path.strip("/"), 'wb') as f:
1913 pdf_writer.write(f)
1914 except Exception as e:
1915 print("Error in merge")
1916 print(str(e))
1917 return pdf_path
1919def test_voila():
1921 from matplotlib.backends.backend_pdf import PdfPages
1922 import matplotlib.pyplot as plt
1923 from scipy.misc import imread
1924 import os
1925 import numpy as np
1927 files = [ "Column0_Line16.jpg", "Column0_Line47.jpg" ]
1928 def plotImage(f):
1929 folder = "temp/"
1930 im = imread(os.path.join(folder, f)).astype(np.float32) / 255
1931 plt.imshow(im)
1932 a = plt.gca()
1933 a.get_xaxis().set_visible(False) # We don't need axis ticks
1934 a.get_yaxis().set_visible(False)
1936 pp = PdfPages("temp/page1.pdf")
1937 plt.subplot(121)
1938 plotImage(files[0])
1939 plt.subplot(122)
1940 plotImage(files[1])
1941 pp.savefig(plt.gcf()) # This generates page 1
1942 pp.savefig(plt.gcf()) # This generates page 2
1943 pp.close()
1945import os
1947id_fact = 1666
1948# Create a PDF document
1949pdf_file = "output_invoice.pdf"
1951customer_info = [
1952 "Docteur DUGUET",
1953 "drduguet.tiphaine@gmail.com",
1954 "project_id=112"
1955]
1958# Table of costs
1959table_data = [
1960 ["Nbs de pages", "Prix HT par pages (€)", "Prix total HT (€)", "Prix total TTC (€)"],
1961 ["42", "0.5", "21", "25.2"]
1962]
1964def create_first_page_fact(id_fact = 1666, pdf_file = "output_invoice.pdf", customer_info = [
1965 "Docteur DUGUET",
1966 "drduguet.tiphaine@gmail.com",
1967 "project_id=112"
1968], table_data = [
1969 ["What", "Nbs de pages (€)", "Prix HT par pages (€)", "Prix total HT (€)", "Prix total TTC (€)"],
1970 ["id_trucmuche", "42", "0.5", "21", "25.2 € TTC"]
1971], montant_total = None):
1973 if montant_total == None:
1974 montant_total = table_data[-1][-2].strip(" € TTC")
1976 from reportlab.lib.pagesizes import A4
1977 from reportlab.lib import colors
1978 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
1979 from reportlab.lib.units import mm
1980 from reportlab.lib.enums import TA_RIGHT, TA_LEFT, TA_CENTER
1981 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image
1982 from reportlab.pdfgen import canvas
1984 def add_background(canvas, doc):
1985 git_safia_root = os.getenv("GITSAFIA")
1986 img_folder = os.path.join(git_safia_root, "prompt/python/server/static/image_static/saxia")
1987 background_image_path = os.path.join(img_folder, "Logo_Saxia_Picto_Vert_calque.png") # Replace with the path to your background image
1988 canvas.drawImage(background_image_path, 0, -100, width = A4[0], preserveAspectRatio = True)#, mask = [0,100,0,100,0,100]) # preserveAspectRatio = True, , height=A4[1])
1991 document = SimpleDocTemplate(pdf_file, pagesize=A4,
1992 rightMargin=20*mm, leftMargin=20*mm, topMargin=20*mm, bottomMargin=20*mm)
1994 # Define styles
1995 styles = getSampleStyleSheet()
1996 styleN = styles["Normal"]
1997 styleH = styles["Heading1"]
1998 styleTitle = ParagraphStyle(
1999 name="Title",
2000 fontSize=16,
2001 alignment=TA_CENTER,
2002 spaceAfter=12
2003 )
2004 styleSubtitle = ParagraphStyle(
2005 name="Subtitle",
2006 fontSize=12,
2007 alignment=TA_CENTER,
2008 spaceAfter=12
2009 )
2010 styleRight = ParagraphStyle(
2011 name="RightAlign",
2012 fontSize=10,
2013 alignment=TA_RIGHT,
2014 )
2016 styleLeft = ParagraphStyle(
2017 name="LeftAlign",
2018 fontSize=10,
2019 alignment=TA_LEFT,
2020 )
2022 styleCenter = ParagraphStyle(
2023 name="CenterAlign",
2024 fontSize=10,
2025 alignment=TA_CENTER,
2026 )
2028 import datetime
2029 date_today = datetime.datetime.now()
2031 # Title
2032 elements = []
2033 elements.append(Paragraph("SAXIA", styleTitle))
2034 name_fact = "FTN-SAXIA " + date_today.strftime("%Y-%m") + "-" + str(id_fact)
2036 elements.append(Paragraph("Facture " + name_fact, styleH))
2037 elements.append(Spacer(1, 12))
2040 # "Le 4 août 2024"
2041 date_facturation = date_today.strftime("Le %d %B %Y")
2043 # Date and logo (Placeholder for Logo)
2044 elements.append(Paragraph(date_facturation, styleRight))
2046 # Add company and customer details
2047 company_info = [
2048 "SAS FOTONOWER FRANCE",
2049 "30 RUE CHARLOT",
2050 "75003 PARIS",
2051 "compta@fotonower.com",
2052 "https://www.fotonower.com",
2053 "N TVA intracommunautaire : FR804468197",
2054 "N Siret 80446819700012",
2055 ]
2057 company_paragraphs = [Paragraph(line, styleLeft) for line in company_info]
2058 customer_paragraphs = [Paragraph(line, styleLeft) for line in customer_info]
2060 # Organize them in a table
2061 data = [[
2062 Paragraph("<br/>".join(company_info), styleLeft),
2063 Paragraph("<br/>".join(customer_info), styleLeft)
2064 ]]
2066 table = Table(data)
2067 elements.append(table)
2068 elements.append(Spacer(1, 12))
2070 table = Table(table_data, colWidths=[48*mm, 20*mm, 17*mm, 20*mm, 15*mm, 30*mm, 25*mm])
2071 table.setStyle(TableStyle([
2072 ("BACKGROUND", (0, 0), (-1, 0), colors.lightgreen),
2073 ("BACKGROUND", (-2, -1), (-2, -1), colors.red),
2074 ("TEXTCOLOR", (0, 0), (-1, 0), colors.black),
2075 ("ALIGN", (0, 0), (-1, -1), "CENTER"),
2076 ("GRID", (0, 0), (-1, -1), 1, colors.black),
2077 ]))
2078 elements.append(table)
2080 # Payment conditions and bank details
2081 elements.append(Spacer(1, 12))
2082 elements.append(Paragraph("Conditions de paiement : à réception soit " + date_facturation.lower(), styleLeft))
2083 elements.append(Paragraph("Montant total : " + montant_total.lower() + " € TTC ", styleLeft))
2084 elements.append(Spacer(1, 12))
2085 elements.append(Paragraph(info_banque_lines[0], styleLeft))
2086 elements.append(Paragraph(info_banque_lines[1], styleLeft))
2087 elements.append(Paragraph(info_banque_lines[2], styleLeft))
2089 # Build the document
2090 document.build(elements, onFirstPage=add_background)
2092 print(f"PDF '{pdf_file}' created successfully.")
2095def retrieve_and_finish(mtd_upload_id = 51,
2096 mtd_complete_map = 46,
2097 id_step_finish = 1,
2098 list_project_void = [121],
2099 lpgss = None,
2100 project_id = 0,
2101 nb_day = 2,
2102 verbose = False,
2103 filo_or_fifo = True,
2104 min_time = 7200,
2105 smart_relaunch = False):
2106 runnings = retrieve_missed_folder(mtd_upload_id = mtd_upload_id,
2107 mtd_complete_map = mtd_complete_map,
2108 list_project_void = list_project_void,
2109 lpgss = lpgss,
2110 project_id = project_id,
2111 nb_day = nb_day,
2112 verbose = verbose,
2113 filo_or_fifo = filo_or_fifo)
2115 project_id_param_relaunch = 91
2116 key_param_relaunch = "saxia/steps/relaunch"
2117 smart_param_relaunch = lpgss.load_conf_project(project_id_param_relaunch, key_param_relaunch)
2118 from lib.manaudit.lib_datou_audit import load_sub_json
2119 smart_param_relaunch = load_sub_json(smart_param_relaunch, key_param_relaunch)
2121 import datetime
2122 now = datetime.datetime.now(datetime.timezone.utc)
2123 map_running_by_id_file = {}
2124 for running in runnings:
2125 if "id_file" not in running:
2126 print("ERROR missing id_file for running " + str(running))
2127 continue
2128 id_file = running["id_file"]
2129 if running["project_id"] in list_project_void:
2130 print("Ignoring running from void project (121) " + str(running))
2131 continue
2132 if id_file not in map_running_by_id_file:
2133 map_running_by_id_file[id_file] = []
2134 map_running_by_id_file[id_file].append(running)
2136 selected_to_run = None
2137 for id_file in map_running_by_id_file:
2138 max_created_at = max(list(map(lambda x: x["created_at"], map_running_by_id_file[id_file])))
2140 delai = now - max_created_at
2141 if delai.total_seconds() < min_time:
2142 print(" Not launching id_file : " + str(id_file) + " because it was launched " + str(delai.total_seconds()) + " seconds ago (min_time = " + str(min_time) + ")" )
2143 continue
2145 for running in map_running_by_id_file[id_file]:
2146 created_at = running["created_at"]
2147 nb_page = running["nb_page"] if "nb_page" in running else 100
2148 # faire la différence entre les date
2149 delai = now - created_at
2150 print(" time running : " + str(delai) + " " + str(delai.total_seconds()))
2151 if verbose:
2152 print(" one running or failed " + str(running))
2154 split_csv_empty = True
2155 if "info_lab" in running and "split_csv_lab" in running["info_lab"]:
2156 split_csv_lab = running["info_lab"]["split_csv_lab"]
2157 if split_csv_lab != "" and type(split_csv_lab) == str:
2158 split_csv_empty = False
2159 else :
2160 split_csv_empty = True
2162 all_doc_separated = False
2163 if "info_lab" in running and "all_doc_separated" in running["info_lab"]: # and .lower() == "true":
2164 print(str(running["info_lab"]["all_doc_separated"]))
2165 all_doc_separated = True
2166 else :
2167 all_doc_separated = False
2169 if split_csv_empty and ("only_with_manual_split_above_nb_page" in smart_param_relaunch \
2170 and smart_param_relaunch["only_with_manual_split_above_nb_page"] < nb_page):
2171 print( str(running["id_file"]) + " split csv empty we avoid launching this one !")
2172 continue
2173 print(str(running["id_file"]) + " candidate")
2175 if delai.total_seconds() > min_time and running["project_id"] not in list_project_void:
2176 if selected_to_run != None:
2177 if filo_or_fifo :
2178 if delai > selected_to_run["delai"]:
2179 selected_to_run = running
2180 selected_to_run["delai"] = delai
2181 else:
2182 if delai < selected_to_run["delai"]:
2183 selected_to_run = running
2184 selected_to_run["delai"] = delai
2185 else :
2186 selected_to_run = running
2187 selected_to_run["delai"] = delai
2189 # min_time
2191 # Maintenant on relance celui selectionné
2192 if selected_to_run != None:
2193 print("Relaunching job : " + str(selected_to_run))
2194 from lib.datou.datou_exec import datou_exec
2196 from lib.lib_safia_system import LibSafiaSystem
2197 lss = LibSafiaSystem(lib_user_data_internal=lpgss)
2198 project_id = selected_to_run["project_id"] if "project_id" in selected_to_run else 0
2199 datou_to_run = lss.get_datou(mtd_complete_map, project_id = project_id)
2200 if len(datou_to_run) != 1:
2201 print("Unexpected size datou_to_run")
2202 datou_to_run = datou_to_run[0]
2204 input = {}
2205 # - [ ] TODO : récupérer les paramètres de l'input depuis audi : file
2206 # - [ ] TODO : vérifier que le fichier existe : sinon on a tourner en rond, on pourrait mettre à jour created_at ou utiliser modifier_at ?
2207 # - [ ] TODO : récupérer les complete_param_json depuis l'audit
2210 hash_id_treatment = selected_to_run["hash_id_treatment"]
2211 output_hit = selected_to_run["info_date"]["output_hit"] if "info_date" in selected_to_run and "hit_output" in selected_to_run["info_date"] else None
2212 all_data = lpgss.load_data_audit(hash_id_treatment)
2213 audit_info = all_data["audit_info"] if "audit_info" in all_data else {}
2215 print(" We don't use id_step_finish for now or id_step_finish - 1!")
2216 input = audit_info["io_exec"]['0']["input"] if "io_exec" in audit_info and '0' in audit_info["io_exec"] else {}
2217 if output_hit != None:
2218 input["output_hit"] = output_hit
2220 if "id_file" not in selected_to_run:
2221 print("Pb date")
2222 exit(1)
2224 from lib.lib_util import parse_id_date_nb_page_folder
2225 nb, id, date = parse_id_date_nb_page_folder(selected_to_run["id_file"])
2227 if date == None:
2228 print("Pb date")
2229 exit(1)
2230 date = selected_to_run["uploaded_at"].strftime("%Y%m%d")
2232 try :
2233 date_parsed = datetime.datetime.strptime(date, "%Y%m%d")
2234 except Exception as e:
2235 print("Pb date")
2236 print(str(e))
2237 date_parsed = datetime.datetime.now()
2239 input["date"] = date_parsed
2241 file = input["file"] if "file" in input else ""
2242 if isinstance(file, list):
2243 test_file = file[0]
2244 elif isinstance(file, str):
2245 test_file = file
2246 else:
2247 print("Unexpected type for file : " + str(type(file)))
2248 print(" We don't do anything ! ")
2249 return
2250 if not os.path.exists(test_file):
2251 print("ERROR : file not found : " + file)
2252 print(" We don't do anything ! ")
2253 return
2255 if "info_lab" in all_data and "split_csv_lab" in all_data["info_lab"]:
2256 input["saxia_split_end_csv"] = all_data["info_lab"]["split_csv_lab"]
2257 input["saxia_all_doc_separated"] = all_doc_separated
2259 complete_param_json = audit_info["config"]["complete_param_json"] if "config" in audit_info and "complete_param_json" in audit_info["config"] else {}
2261 from auth.lib_auth import build_layer_from_configuration
2262 map_type_layer_inst = build_layer_from_configuration(lss, complete_param_json)
2263 # Contexte execution traitement datou sans PG, VR 2023
2264 complete_param_json["map_type_layer_inst"] = map_type_layer_inst
2266 datou_linear_list_steps = list(map(lambda x : x["name"], datou_to_run["steps"]))
2267 list_param_json_steps = list(map(lambda x : x["param_json"], datou_to_run["steps"]))
2269 ret = datou_exec(datou_linear_list_steps = datou_linear_list_steps,
2270 input = input,
2271 complete_param_json = complete_param_json,
2272 verbose = verbose,
2273 with_audit = True,
2274 privacy = False,
2275 # map_type_layer_inst: dict = {},
2276 list_param_json_steps = list_param_json_steps,
2277 id_step_incomplete_args = 0) # id_step_finish)
2279 return ret
2280 else :
2281 print(" ALL DONE !")
2282 return None
2284def retrieve_missed_folder(mtd_upload_id = 51,
2285 mtd_complete_map = 46,
2286 list_project_void = [121],
2287 lpgss = None,
2288 project_id = 0,
2289 nb_day = 5,
2290 verbose = False,
2291 filo_or_fifo = True):
2293 ret = lpgss.running_job(project_id = project_id, verbose = verbose, nb_day = nb_day)
2295 print(str(ret))
2296 count_running = len(ret["running_or_failed"]) if "running_or_failed" in ret else 0
2297 print(" count_running : " + str(count_running))
2299 return ret["running_or_failed"]
2301def datou_exec_partial(id_step, mtd_id, input_datou,
2302 project_id, user_id,
2303 verbose = False):
2304 pass