Coverage for lib/batch/lib_batch.py: 23%

1383 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2026-02-18 01:49 +0100

1import datetime 

2import os 

3import types 

4 

5import pandas as pd 

6 

7 

8def prepare_ioput_col_datou(inputs): 

9 if inputs == "" or inputs == None: 

10 return [], [] 

11 inputs = inputs.split(",") 

12 input_column_name = [] 

13 input_datou_name = [] 

14 for i in inputs: 

15 if ":" in i: 

16 input_column_name.append(i.split(":")[0]) 

17 input_datou_name.append(i.split(":")[1]) 

18 else: 

19 input_column_name.append(i) 

20 input_datou_name.append(i) 

21 

22 return input_column_name, input_datou_name 

23 

24def create_pandas_table_from_list_column_and_input(input_column_name, output_column_name): 

25 import pandas as pd 

26 df = pd.DataFrame(columns=input_column_name + output_column_name) 

27 return df 

28 

29def append_data_to_df(df, input_arg_by_name, verbose = False): 

30 import pandas as pd 

31 if verbose: 

32 print(" DF in append_data_to_df : ") 

33 print(df) 

34 #df = pd.concat([df, pd.DataFrame(input_arg_by_name)], ignore_index=True) 

35# df = df.append(input_arg_by_name, ignore_index=True) 

36# df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) 

37 # As noted by @cottontail, it's also possible to use loc, although this only works if the new index is not already present in the DataFrame (typically, this will be the case if the index is a RangeIndex: 

38 # https://stackoverflow.com/questions/75956209/error-dataframe-object-has-no-attribute-append 

39 df.loc[len(df)] = input_arg_by_name # only use with a RangeIndex! 

40 

41def find_first_missing_output(df, output_column_name, input_column_name): 

42 

43 # TODO VR 27-12-23 modulariser cette partie 

44 if output_column_name != None and input_column_name != None : 

45 # Trouver la première ligne sans donnée de sortie 

46 if output_column_name[0] not in df.columns: 

47# (nb_col, nb_data) = df.shape 

48 # Add column to dataframe and init with empty string value 

49 df[output_column_name[0]] = "" 

50# df.assign(output_column_name[0] = "") 

51 

52 list_no_data = df[output_column_name[0]] == "" 

53 if len(list_no_data) == 0: 

54 print("All DONE") 

55 iloc_missing = df.index[list_no_data] 

56 if iloc_missing.shape == (0,): 

57 print("All DONE") 

58 return df, None, None 

59 first_missing_output = iloc_missing[0] 

60 missing_row_index = df.index[list_no_data][0] 

61 

62 # Input data de cette ligne 

63 # Je sens un comportement différent quand len(input_column_name) == 1 (bof, c'était toujours le cas pourtant) 

64 # PAr ailleurs on construit les input_data plus loin donc peut-etre n'est pas nécessaire 

65 

66 print("About to get some first_missing_output :" + str(first_missing_output)) 

67 print("USing input_column_name : " + str(input_column_name)) 

68 

69 print(" available cols : " + str(df.columns.values.tolist())) 

70 print(" number data : " + str(len(df.values.tolist()))) 

71 

72 if len(input_column_name) == 1: 

73 input_data = df[input_column_name[0]][first_missing_output] 

74 else : 

75 input_data = [df[k][first_missing_output] for k in input_column_name] 

76 else : 

77 input_data = None 

78 missing_row_index = None 

79 

80 return input_data, missing_row_index 

81 

82 

83def create_pandas_table_from_text(text, 

84 input_column_name = None, 

85 output_column_name = None, 

86 separator = "|", 

87 verbose = False): 

88# import markdown 

89# html = markdown.markdown(text) 

90# print(str(html)) 

91 

92# import md2py 

93# parsed = md2py.md2py(text) 

94 

95 if verbose: 

96 print(text) 

97 if text == None: 

98 return None 

99 

100 import pandas as pd 

101 # Nettoyer les lignes qui commencent par un dièse 

102 lines = [line for line in text.strip().split('\n') if (not line.startswith("#") and separator in line)] 

103 

104 if len(lines) == 0: 

105 print("ERROR should exit We have no input data, maybe the markdown is not well formatted ? ") 

106 print(text) 

107 return None 

108 

109 # Les titres de colonnes se trouvent à l'index 1 puisque l'index 0 contient les éléments 'CAMPAIGN'... 

110 headers = [d.rstrip(" ").lstrip(" ") for d in lines[0].split(separator)] 

111 # On commence à 2 car il y a le format markdown qui a une ligne sans donnée entre les noms des colonnes et les 

112# TODO mais faire mieux 

113 data = [line.split(separator) for line in lines[2:]] 

114 

115 data = [[d.rstrip(" ").lstrip(" ").rstrip(" ").lstrip(" ") for d in row] for row in data] 

116 

117 max_nb_columns = max([len(row) for row in data]) if len(data) > 0 else -1 

118 headers = [headers[i].rstrip(" ").lstrip(" ") if i < len(headers) else "Column " + str(i) for i in range(max_nb_columns)] 

119 

120 # S'assurer que chaque ligne ait le même nombre de colonnes que l'en-tête 

121 data = [row if len(row) == len(headers) else row + [''] * (len(headers) - len(row)) for row in data] 

122 

123 if verbose: 

124 print(" data to build pd " + str(len(data))) 

125 

126 # Créer le DataFrame 

127 df = pd.DataFrame(data, columns=headers) 

128 if '' in df.columns: 

129 df = df.drop('', axis=1) 

130 

131 

132 return df 

133 

134css_button_class = "btn btn-primary text-l bg-blue-500 text-white p-2 rounded" 

135 

136 

137def make_replace_upload_button_manax_hc(hit): 

138 return '<button type="button" class="button_replace {}" id="{}" ><i class="bi bi-upload"></i>Replace</button>'.format(css_button_class, hit) 

139 

140# 

141def make_audit_button_manax_hc(id_line, href): 

142 # return '<button type="button" id="button-audit-{}" class="{}"><i class="bi bi-info-circle-fill"></i>Audit</button>'.format( 

143# mtd_id, css_button_class + " button_audit-run") 

144 return '<a href="{}"><button type="button" id="button-launch-{}" class="{}"><i class="bi bi-rocket-takeoff-fill float-left"></i> Launch</button></a>'.format( href, id_line, css_button_class) 

145 

146def make_launch_button(endpoint, args, id_line, name = "Launch"): 

147 return '<a href="{}?{}"><button type="button" id="button-launch-create-{}" class="{}"><i class="bi bi-info-circle-fill"></i>{}</button></a>'.format(endpoint, args, id_line, css_button_class, name) 

148 

149def make_downloadable(url, id_line, name = "File"): 

150 return '<a href="{}"><button type="button" id="button-downloadable-{}-{}" class="{}"><i class="bi bi-info-circle-fill"></i>{}</button></a>'.format(url, name, id_line, css_button_class, name) 

151 

152 

153 

154from flask import url_for 

155 

156def append_action_steps_button(table, 

157 safia_suivi_prod_doc_id, 

158 safia_suivi_prod_proj_id, 

159 saxia_steps = {}): 

160 # table.rename(columns={"caffemodel_name":"description", "svm_number_of_descriptors": "dimension", "main_photo_desc_type": "main_pdt"}, inplace=True) 

161 try : 

162 for step in saxia_steps: 

163 if "col" in saxia_steps[step]: 

164 col = saxia_steps[step]["col"] 

165 else: 

166 import sys 

167 sys.stdout.write("&") 

168 # print("This action is not meant to be a button in a column : " + (saxia_steps[step]["name"] if "name" in saxia_steps[step] else "no name")) 

169 except Exception as e: 

170 print("Many pbs voilà in configuration ! " + str(e)) 

171 

172 

173# print(str(table)) 

174# print(table.to_string()) 

175 for x in table.index: 

176 mtd_id = 40 

177 out_folder = "/" #static/saxia" 

178 suffix = "" #"/static/saxia" 

179 doc_audit = "non:0" 

180 hash_id_treatment = "dummy" 

181 file = None 

182 anon_file = None 

183 out_file = None 

184 try: 

185 mtd_id = table.loc[x, 'datou_id'] if "datou_id" in table.columns else mtd_id 

186 out_folder = table.loc[x, 'out_folder'] if "out_folder" in table.columns else out_folder 

187 suffix = table.loc[x, 'suffix'] if "suffix" in table.columns else suffix 

188 hash_id_treatment = table.loc[x, 'hash_id_treatment'] if "hash_id_treatment" in table.columns else hash_id_treatment 

189 file = table.loc[x, 'file'] if "file" in table.columns else file 

190 anon_file = table.loc[x, 'anon_file'] if "anon_file" in table.columns else file 

191 out_file = table.loc[x, 'out_file'] if "out_file" in table.columns else str(hash_id_treatment) + ".docx" 

192 doc_audit = table.loc[x, 'doc_audit'] if "doc_audit" in table.columns else doc_audit 

193 audit_output = table.loc[x, 'audit_output'] if "audit_output" in table.columns else None 

194 except Exception as e: 

195 print("Error in append_action_steps_button : " + str(e)) 

196 

197 if out_folder.startswith("/home/safia/OneDrive/Test Safia expertise"): 

198 out_folder = out_folder.replace("/home/safia/OneDrive/Test Safia expertise", "/static/onedrive/") + "/" 

199 elif out_folder.startswith("/home/safia/workarea/git/Safia/prompt/python/server/static/onedrive"): 

200 out_folder = out_folder.replace("/home/safia/workarea/git/Safia/prompt/python/server/static/onedrive", "/static/onedrive") + "/" 

201 filename = os.path.basename(file) if file != None else file 

202 

203 out_folder_hc = "static/onedrive/output" 

204 out_folder_hc_http = "static%2Fonedrive%2Foutput%2F" 

205 

206 audit_button_aux = make_audit_button_manax_hc(mtd_id, href=url_for('manax', id = str(mtd_id), suffix = out_folder, hash_id_treatment = hash_id_treatment, safia_suivi_prod_doc_id = safia_suivi_prod_doc_id, safia_suivi_prod_proj_id = safia_suivi_prod_proj_id, file = file, out_file = out_file)) #, audit_file = doc_audit)) 

207 table.loc[x, "Audit"] = audit_button_aux 

208 

209 replace_button_aux = make_replace_upload_button_manax_hc(hash_id_treatment) #, audit_file = doc_audit)) 

210 table.loc[x, "Replace"] = replace_button_aux 

211 

212 # J'essai de mettre en forme ici le bouton de lancement qu'il faudrait construire à partir des données de la ligne et des paramètres de configuration situé dans les données du projet 

213 # Données déjà crée dans le projet 70 

214 col_name = "Launch Extract" 

215 endpoint = "/api/v1/safia/query" 

216 datou_id = "datou-40" 

217 if file != None: 

218 out_folder_hc = "static/onedrive/output" 

219 out_folder_hc_http = "static%2Fonedrive%2Foutput%2F" 

220 args = "hash_id_treatment=" + hash_id_treatment + "&input_csv=hash_id_treatment_input%3D" + hash_id_treatment + "%2Cout_folder%3D" + out_folder_hc_http + "%2Cinput_col_cr%3Dcr_correct_typo%2Cinput_col_intro%3Dintro_correct_typo%2Cload_df_from_db_and_correct%3Dtrue%2Cfile%3D" + file.replace("/", "%2F") + "%2Cwith_audit%3D1" 

221 else : 

222# print("TODO args to setup for anon file : migration file / filename, filename is better, ok") 

223 args = "" 

224 args += "&with_audit=1" 

225 args += "&" + datou_id + "=true&object=simple_text_query&load_df_from_db_and_correct=true" 

226 id_line = x 

227 args += "&safia_suivi_prod_doc_id=" + str(safia_suivi_prod_doc_id) 

228 args += "&safia_suivi_prod_proj_id=" + str(safia_suivi_prod_proj_id) 

229 args += "&col_index_treatment=file" 

230 if file != None: 

231 args += "&file=" + file 

232 audit_button_aux = make_launch_button(endpoint, args, id_line) 

233 table.loc[x, col_name] = audit_button_aux 

234 

235 # En fait on avait un moment donnée un bouton consolidate ici, ah ha ah 

236 # Ouais mais on va faire une boucle sur les saxia_steps, hi ih 

237# conf_consolidate = saxia_steps["consolidate"] if "consolidate" in saxia_steps else {} 

238# output_datou_to_col = conf_consolidate["output_datou_to_col"] if "output_datou_to_col" in conf_consolidate else "nb_page:nb_page,nb_modif:nb_modif_manual,nb_modif_class:nb_modif_class_manual,nb_doc:nb_doc,nb_word:nb_word_result" 

239# args += "&output_datou_to_col=" + output_datou_to_col # VR 10-4-24 : je ne sais pas à quoi cela sert ! 

240 datou_id = "datou-44" 

241 if file != None: 

242 args = "hash_id_treatment=" + hash_id_treatment + "&input_csv=hash_id_treatment_input%3D" + hash_id_treatment + "%2Cout_folder%3Dstatic%2Fonedrive%2Foutput%2F%2Cfile%3D" + file.replace("/", "%2F") + "%2Cwith_audit%3D1" 

243 else : 

244# print(" wip need to align file and filenmae ") 

245 args = "" 

246 inputs = ["hash_id_treatment", "object=simple_text_query", "load_df_from_db_and_correct=true"] 

247 

248 if file != None: 

249 url = os.path.join(suffix, file) 

250 else: 

251 url = "" 

252 if 'static' in url: 

253 url = "/static" + url.split("static")[1] 

254 col_name = "Input File Anon" 

255 name = "File Input" 

256 audit_button_aux = make_downloadable(url, id_line, name) 

257 table.loc[x, col_name] = audit_button_aux 

258 

259 col = "" 

260 try : 

261 for step in saxia_steps: 

262 if "col" in saxia_steps[step]: 

263 col = saxia_steps[step]["col"] if "col" in saxia_steps[step] else "nocol" 

264 inputs = saxia_steps[step]["inputs"] if "inputs" in saxia_steps[step] else [] 

265 mtd_id = saxia_steps[step]["datou-id"] if "datou-id" in saxia_steps[step] else None 

266 output_datou_to_col = saxia_steps[step]["output_datou_to_col"] if "output_datou_to_col" in saxia_steps[step] else "" # for anon "anon_filename,nom,prenom", 

267 

268 # Force to datou for now 

269 type = saxia_steps[step]["type"] if "type" in saxia_steps[step] else "datou" 

270 

271 list_args = [] 

272 

273 if type == "datou": 

274 list_args.append("object=simple_text_query") 

275 list_args.append("with_audit=true") 

276 url = "/api/v1/safia/query" 

277 elif type == "endpoint": 

278 url = saxia_steps[step]["url"] 

279# locals().update(saxia_steps[step]) 

280 list_args_from_json = saxia_steps[step]["args"].split(",") if "args" in saxia_steps[step] else [] 

281 for args in list_args_from_json: 

282 if args in table.columns: 

283 

284# if args in globals() 

285 list_args.append(args + "=" + table.loc[x, args]) 

286 

287 

288 # not used 

289 special = None 

290 

291 duplicate = { 

292 "col": "Duplicate Treatment", 

293 "type": "endpoint", 

294 "endpoint": "api/v1/saxia/duplicate_hash_id_treatment", 

295 "args": "hash_id_treatment", 

296 "ret": "new_hash_id_treatment", 

297 "name": "duplicate" 

298 } 

299 

300 list_input_csv = [] 

301 for input in inputs: 

302 if input in table.columns: 

303 input_val = table.loc[x, input] 

304 one_input = input + "%3D" + input_val.replace("/", "%2F") 

305 list_input_csv.append(one_input) 

306 

307 if len(list_input_csv) > 0: 

308 input_csv_val = "%2C".join(list_input_csv) 

309 input_csv_var_and_val = "input_csv=" + input_csv_val 

310 list_args.append(input_csv_var_and_val) 

311 

312 list_args.append("safia_suivi_prod_doc_id=" + str(safia_suivi_prod_doc_id)) 

313 list_args.append("safia_suivi_prod_proj_id=" + str(safia_suivi_prod_proj_id)) 

314 list_args.append("col_index_treatment=file") # to remove 

315 if filename != None: 

316 list_args.append("value_index=" + filename) 

317 list_args.append("col_index=filename") 

318# list_args.append("file=" + file) 

319 list_args.append("output_datou_to_col=" + output_datou_to_col) 

320 

321 if mtd_id != None: 

322 list_args.append("datou-" + str(mtd_id) + "=true") 

323 

324 if len(list_args) > 0: 

325 args = "&".join(list_args) 

326 else: 

327 args = "" 

328 

329 action_button_aux = make_launch_button(url, args, x) 

330 table.loc[x, col] = action_button_aux 

331 else: 

332 import sys 

333 sys.stdout.write("&") 

334 # print("This action is not meant to be a button in a column") 

335 except Exception as e: 

336 print("Many pbs voilà in configuration for col : " + str(col) + " " + str(e)) 

337 

338 print(" table") # useless 

339# return table 

340 

341def set_custom_display(all_result, endpoint_df_conf_type_suivi, col_csv = ""): 

342 list_col_from_input = col_csv.split(",") if col_csv != None and col_csv != "" else [] 

343 list_col_display = endpoint_df_conf_type_suivi["list_col_display"] if "list_col_display" in endpoint_df_conf_type_suivi else [] 

344 list_col_display_init = list_col_display.copy() 

345 list_col_virtual = endpoint_df_conf_type_suivi["list_col_virtual"] if "list_col_virtual" in endpoint_df_conf_type_suivi else [] 

346 list_col_action = endpoint_df_conf_type_suivi["list_col_action"] if "list_col_action" in endpoint_df_conf_type_suivi else [] 

347 

348# list_col_display.append("") 

349 

350 for col in list_col_virtual: 

351 all_result[col] = "tofill" 

352 if "col_source" not in list_col_virtual[col]: 

353 print(" Missing col_source info in " + str(list_col_virtual[col])) 

354 continue 

355 col_source = list_col_virtual[col]["col_source"] 

356 key = list_col_virtual[col]["key"] if "key" in list_col_virtual[col] else None 

357 type = list_col_virtual[col]["type"] if "type" in list_col_virtual[col] else "text" 

358 if type == "text" or type == "humanize_size": 

359 if col_source in all_result.columns: 

360 all_result[col] = "init" 

361 if key == None: 

362 continue 

363 elif "/" in key: 

364 from lib.manaudit.lib_datou_audit import load_sub_json 

365 all_result[col] = all_result[col_source].apply(lambda x: load_sub_json(x, key)) 

366 else: 

367 all_result[col] = all_result[col_source].apply(lambda x : x[key] if key in x else None) 

368 if type == "humanize_size": 

369 from lib.lib_util import humanize_size_file 

370 all_result[col] = all_result[col].apply(lambda x: humanize_size_file(x)) 

371 list_col_display.append(col) 

372 elif type == "link_format" or type == "date_link_format": 

373 if "format" not in list_col_virtual[col]: 

374 print(" Missing format info in " + str(list_col_virtual[col])) 

375 continue 

376 if "variables" not in list_col_virtual[col]: 

377 print(" Missing variables info in " + str(list_col_virtual[col])) 

378 continue 

379 all_result[col] = col 

380 for index, row in all_result.iterrows(): 

381 format = list_col_virtual[col]["format"] 

382 for var in list_col_virtual[col]["variables"]: 

383 key = list_col_virtual[col]["variables"][var]["key"] 

384 col_source = list_col_virtual[col]["variables"][var]["col_source"] 

385 try: 

386 value = all_result.loc[index, col_source] 

387 if key != None: 

388 value = value.get(key, "dummy") 

389 except Exception as e: 

390 print(" Pb in getting value for a virtual column name " + str(e)) 

391 value = "dummy" 

392 format = format.replace("{" + var + "}", str(value)) 

393 # VR 16-5-24 todo hack to have result auto download in suivi?type=lab 

394 format = format.replace("/home/safia/workarea/git/Safia/prompt/python/server/static", "/static") 

395 format = format.replace("//", "/") 

396 if type == "link_format": 

397 all_result.loc[index, col] = "<a href='" + format + "'>" + col + "</a>" 

398 elif type == "date_link_format": 

399 format_loc = format.replace("/home/safia/workarea/git/Safia/prompt/python/server/static", "static") 

400 format_loc = format_loc.replace("//", "/") 

401 format_loc = format_loc.lstrip("/") 

402 try: 

403 datetime_from_stat = os.stat(format_loc).st_ctime 

404 dt = datetime.datetime.fromtimestamp(datetime_from_stat) 

405 date_str = dt.strftime("%Y-%m-%d %H:%M:%S") 

406 except Exception as e: 

407 print(" Pb in date_link_format " + str(e)) 

408 date_str = "No date, possible no file" 

409 all_result.loc[index, col] = "<a href='" + format + "'>" + date_str + "</a>" 

410 else: 

411 print("Unexpected type " + str(type)) 

412 all_result.loc[index, col] = "unexpected" 

413 list_col_display.append(col) 

414 else : 

415 print(" Unexpected type " + str(type)) 

416 

417 for col in list_col_action: 

418 value = list_col_action[col]["value"] if "value" in list_col_action[col] else None 

419 col_source = list_col_action[col]["col_source"] if "col_source" in list_col_action[col] else None 

420 key = list_col_action[col]["key"] if "key" in list_col_action[col] else None 

421 action_type = list_col_action[col]["action_type"] if "action_type" in list_col_action[col] else "input" 

422 if col_source in all_result.columns: 

423 if action_type == "launch": 

424 print("TODO and refacto by the way") 

425 elif action_type == "download": 

426 print("TODO and refacto by the way") 

427 elif action_type == "checkbox": 

428 all_result[col] = "<input type=checkbox data-key=" + str(key) + " data-col=" + str(col_source) + " ></input>" 

429 for index, row in all_result.iterrows(): 

430 # print(row[col], row[col_source]) 

431 # ready_to_deliver 

432 if row[col_source] != None: 

433 all_result.loc[index, col] = "<input type=checkbox checked data-key=" + str(key) + " data-col=" + str(col_source) + " ></input>" 

434 

435 elif action_type == "input": 

436 begin_input = "<input type=text class='input_dyn' data-key=" + str(key) + " data-col=" + str(col_source) 

437 if "typedata" in list_col_action[col]: 

438 begin_input += " data-typedata=" + list_col_action[col]["typedata"] 

439 end_input = " ><div class='status' ></div></input>" 

440 all_result[col] = begin_input + end_input 

441 for index, row in all_result.iterrows(): 

442 # print(row[col], row[col_source]) 

443 value = row[col_source] 

444 if key != None: 

445 if key in value : 

446 value = value[key] 

447 if row[col_source] != None: 

448 all_result.loc[index, col] = begin_input + " value='" + str(value) + "' " + end_input 

449 

450# all_result[col] = all_result[col_source] 

451# all_result[col] = all_result[col].apply(lambda x: "<input type=checkbox " + ("checked" if x != None else "") + " ></input>") 

452 else : 

453 print("Unknown action_type : " + str(action_type)) 

454# all_result[col] = all_result[col].apply(lambda x : x[key] if key in x else None) 

455 list_col_display.append(col) 

456 elif value != None: 

457 all_result[col] = value 

458 list_col_display.append(col) 

459 

460 # En fait ce n'est pas grave que ce ne soit pas unique => SI et puis je ne veux pas cela ! 

461 if list_col_display_init == []: 

462 for l in list_col_from_input: 

463 if l not in list_col_display: 

464 list_col_display.append(l) 

465 

466 return list_col_display 

467 

468def create_markdown_table_from_df(df): 

469 

470 # Je veux créer le tableau en trouvant le nom des colonnes de df, en rajoutant une ligne avec uniquement des "--" puis en concatenant avec des " | " entre les données et avec "\n" entre les lignes 

471 list_col_names = df.columns.values.tolist() 

472 list_col_names = [str(i) for i in list_col_names] # .strip("**") 

473 list_col_names = " | ".join(list_col_names) 

474 list_col_names = list_col_names + "\n" + "--- | " * len(df.columns.values.tolist()) 

475 df_list = df.values.tolist() 

476 body_content = "\n".join(map(lambda x : " | ".join(map(str, x)), df_list)) 

477 return list_col_names + "\n" + body_content 

478 

479def load_safia_doc_input_list(safia_document_id, safia_project_id, lpgss, limit=10000): 

480 

481 info_project = lpgss.get_project_info(safia_project_id) 

482 table_documents = info_project["table_documents"] if "table_documents" in info_project else "table_documents" 

483 

484 # lpgss.load_document(safia_project_id, safia_document_id) 

485 check_table_exists = lpgss.check_table_exists(table_documents) 

486 if not check_table_exists: 

487 print("l465 ERROR treated as warning Table " + table_documents + " does not exists, please create it first !") 

488 documents = [] 

489 else: 

490 documents = lpgss.get_documents(table_documents, safia_document_id, chunk_id = None, limit=limit) 

491 print("Number of documents : " + str(len(documents))) 

492 if len(documents) == 0: 

493 print(" We will need to create the df and document") 

494 return None 

495 

496 total_content = "" 

497 for d in documents: 

498 total_content += d["content"] 

499 

500 return total_content 

501 

502 

503def fill_df_from_datou_result(df, outut_datou_name, 

504 output_column_name, output, 

505 value_index, 

506 col_index="line_number"): 

507 if value_index == None: 

508 print("Missing value_index in fill_df_from_datou_result") 

509 return 

510 line_number = None 

511 if col_index == "line_number": 

512 line_number = value_index 

513 else: 

514 if col_index not in df.columns: 

515 print(" Missing columns " + str(col_index) + " in df, data not saved from datou in df ") 

516 else: 

517 line_number = df[col_index] == value_index 

518 if len(df.index[line_number]) == 0: 

519 print(" Plop ") 

520 line_number == 0 #None 

521 elif len(df.index[line_number]) > 0: 

522 print(" Unexpected multiple line_number, tant pis we will try to update the first !") 

523 line_number = df.index[line_number][0] 

524 else: 

525 print(" Plop ") 

526 line_number = 0#df.index[line_number][0] 

527 

528 if type(line_number) == types.NoneType: 

529 print("Missing line to update in df : value_index : " + str(value_index)) 

530 line_number = 0 

531 

532 info_store = {} 

533 

534 for j in range(len(outut_datou_name)): 

535 if outut_datou_name[j] in output: 

536# if output_column_name[j] not in df.columns: 

537# continue 

538 val = output[outut_datou_name[j]] 

539 

540 info_store[output_column_name[j]] = val 

541 

542 val = str(val).replace("\n", "<br>") 

543 val = str(val).replace("\n", "<br>") 

544 

545 df.loc[line_number, output_column_name[j]] = val 

546 

547 else: 

548 print(" Missing output " + str(outut_datou_name[j]) + " keys available are : " + str(output.keys())) 

549 

550 return info_store 

551 

552def aux_fill_load_df(audit_info): 

553 

554 from lib.manaudit.lib_datou_audit import load_sub_json 

555 key = "io_exec/9/output/df_complet_as_json" 

556 try: 

557 df_from_json = load_sub_json(audit_info, key) 

558 print("df_from_json : " + str(df_from_json)) 

559 except Exception as e: 

560 print("Error in loading df_as_json : " + str(e)) 

561 df_from_json = {} 

562 return None 

563 

564 try: 

565 df = pd.read_json(df_from_json, convert_dates=["datet", "date_entree_hospitalisationt", "date_sortie_hospitalisationt", "date_fin_arret_travailt", "date_debut_arret_travailt"]) 

566 except Exception as e: 

567 print("Error in read_json df_as_json : " + str(e)) 

568 return None 

569 

570 return df 

571 

572# Not used VR 4/3/35 

573def fill_info_list_page(audit_info, hash_id_treatment, document_type="ordonnance"): 

574 df = aux_fill_load_df(audit_info) 

575 

576 list_page = [] 

577 

578 if type(df) != types.NoneType: 

579 print(df.columns) 

580 else: 

581 return [] 

582 

583 key_list_images = "io_exec/0/output/images" 

584 from lib.manaudit.lib_datou_audit import load_sub_json 

585 list_pages = load_sub_json(audit_info, key_list_images) 

586 

587 for i in range(len(df)): 

588 dt = str(df["document_type"][i]) 

589# if dt == "certif_at": 

590# if dt == "ordonnance": 

591 if document_type == "all" or dt == document_type: 

592 

593 list_des_pages = df["Liste des pages"][i] 

594 try : 

595 if type(list_des_pages) == str: 

596 list_des_pages_as_int = map(int, list_des_pages.split(",")) 

597 else: 

598 list_des_pages_as_int = [list_des_pages] 

599 except Exception as e: 

600 print(str(e)) 

601 list_des_pages_as_int = [] 

602 for page in list_des_pages_as_int: 

603 if page < len(list_pages): 

604 list_page.append(list_pages[page]) 

605 # list_page.append() 

606 

607 return list_page 

608 

609def fill_info_stat_audit(audit_info, hash_id_treatment): 

610 print(" TODO plein de truc et adapter les sorties") 

611 

612 df = aux_fill_load_df(audit_info) 

613 if df == None: 

614 return {} 

615 

616 nb_page = 0 

617 map_doc_type_nb = {} 

618 for i in range(len(df)): 

619 lp = str(df["Liste des pages"][i]) 

620 nb_page_this_doc = len(lp) - len(lp.replace(",", "")) + 1 

621 nb_page += nb_page_this_doc 

622 if df["document_type"][i] not in map_doc_type_nb: 

623 map_doc_type_nb[df["document_type"][i]] = nb_page_this_doc 

624 else: 

625 map_doc_type_nb[df["document_type"][i]] += nb_page_this_doc 

626 

627 info_stat = {"nb_doc" : len(df), "count" : map_doc_type_nb} 

628 

629 return info_stat 

630 

631# [ ] TODO VR 11-1-23 refacto : deja modulariser le chargement 

632# sans doute aussi modulariser la boucle 

633# lorsqu'on a un in_file, il faut plutot faire un seul datou sans tourner sur le df 

634# On pourrait construire un df from param_json, avec les **args ?? 

635# Il faut sinon une option pour faire tourner sur un seul 

636# On sinon modulariser et du coup avoir par exemple un context_datou_exec 

637# On pourrait aussi gérer le cas ou on charge les input d'un datou à partir d'un json dans un fichier putot 

638def run_batch_datou(mtr_datou_id, safia_project_id = 0, 

639 safia_document_id = None, 

640 input_as_csv = None, output_as_csv = None, 

641 verbose = False, 

642 with_audit = False, 

643 lpgss = None, 

644 nb_lines = 10, 

645 in_file=None, offset=None, 

646 out_folder = "temp", 

647 datou_func = "", 

648 saxia_step_name=None, 

649 special_op="default"): # TODO VR 14-5-24 : saxia_step_name est un nom de step dans le projet saxia, dans la fonction c'est juste le suffix d'un nom de colonne 

650 print("# LOAD DATA NAKED") 

651 # TODO 22-1-24 : on préferait avoir plutot que input_from_df_or_arg : load_input_from_missing et insert_input 

652 

653 if output_as_csv == None: 

654 output_as_csv = "" 

655 

656 from lib.lib_util import count_and_display_elapsed_time 

657 

658 if "hash_id_treatment" not in output_as_csv: 

659 if output_as_csv != "": 

660 output_as_csv += "," 

661 output_as_csv += "hash_id_treatment:hash_id_treatment" 

662 

663 input_from_df_or_arg = True 

664 if in_file != None and in_file != "": 

665 nb_lines = 1 

666# input_from_df_or_arg = False 

667 

668 input_column_name, input_datou_name = prepare_ioput_col_datou(input_as_csv) 

669 output_column_name, outut_datou_name = prepare_ioput_col_datou(output_as_csv) 

670 

671 output_column_name.append("datetime") 

672 outut_datou_name.append("datetime") 

673 if mtr_datou_id != 0: 

674 output_column_name.append("datou_id") 

675 outut_datou_name.append("datou_id") 

676 

677 from auth.lib_auth import create_id 

678 hash_id_treatment = create_id() 

679 

680 if safia_document_id == "pg_audit": 

681 condition = [ 

682 { 

683 "value": mtr_datou_id, 

684 "variable": "mtr_datou_id", 

685 "data_type": "int" 

686 } 

687] 

688 mtr_datou_id = None 

689 df = lpgss.load_data_audit(output_type = "pd", 

690 limit = nb_lines, 

691 offset = offset, 

692 condition = condition, 

693 col_csv="*") 

694 input_from_df_or_arg = True 

695 else: 

696 if safia_document_id == None or safia_document_id == "": 

697 print(" Unexpected behavior TODO safia_document_id should be initialized by default with hash_id_treatment ! ") 

698 

699 safia_document_id = "tab_log_treatment_" + hash_id_treatment 

700 

701 total_content = None 

702 if input_from_df_or_arg: 

703 if safia_project_id != 0: 

704 total_content = load_safia_doc_input_list(safia_document_id, safia_project_id, lpgss, limit=10000) 

705 

706 if total_content == None: 

707 input_from_df_or_arg = False 

708 else : 

709 print("# PREPARE DATA : total_content : " + str(len(total_content)) + " characters") 

710 

711 if input_from_df_or_arg: 

712 df = create_pandas_table_from_text(total_content, input_column_name, output_column_name) 

713 else: 

714 df = create_pandas_table_from_list_column_and_input(input_column_name, output_column_name) 

715 

716 if in_file != None and in_file != "": 

717 input_arg_by_name = {} 

718 # TODO 22-1-24 CA ca a l'air mieux https://stackoverflow.com/questions/218616/how-to-get-method-parameter-names mais je n'y arrive pas 

719 # TODO 22-1-24 Et cette logique devrait aller aussi dans les ligne 223 et suivante quand on ne trouve pas l'input utile 

720 for name in input_column_name: 

721 # if name in locals(): 

722 if name == "file": 

723 input_arg_by_name[name] = in_file 

724 elif name == "out_folder": 

725 input_arg_by_name[name] = out_folder 

726 else: 

727 print(" name : " + str(name) + " is not an accessible variable ! ") 

728 input_arg_by_name[name] = "" 

729 

730 append_data_to_df(df, input_arg_by_name) 

731 input_data, missing_row_index = input_arg_by_name, len(df.values) - 1 

732 elif safia_document_id == "pg_audit": 

733 offset = 0 # car on a chargé au load 

734 missing_row_index = offset 

735 else: 

736 input_data, missing_row_index = find_first_missing_output(df, output_column_name, input_column_name) 

737 

738 # out_folder 

739 

740 if offset == None or offset == 0: 

741 offset = missing_row_index 

742 

743 # Treat data 

744# lpgss.load_datou(mtr_datou_id) 

745 

746 # A faire avant et puis c'est un "hack" pour le moment question de droit ! 

747 from lib.lib_safia_system import LibSafiaSystem 

748 from server.safia import lib_external_info_from_apia_at 

749 from server.safia import lpgss_singleton, lib_external_info_from_apia_at, lib_auth_now_from_at, lib_right_singleton 

750 from lib.lib_safia_system import LibSafiaSystem 

751 lss = LibSafiaSystem(lib_user_data_internal=lpgss_singleton, 

752 lib_user_data_external=lib_external_info_from_apia_at, 

753 lib_auth_user_otp=lib_auth_now_from_at, 

754 lib_right=lib_right_singleton) 

755 otp="0a76f14b131682eaa36fbef63d725f9352cfb85d" 

756 info, is_valid = lss.connect_with_otp(otp) 

757 

758 # De ouf ! 

759 lss.user_id = 1 

760 

761# lss = LibSafiaSystem(lib_user_data_internal=lpgss, lib_user_data_external = lib_external_info_from_apia_at) 

762 

763 datous = lss.get_datou(mtr_datou_id) 

764 

765 datou = None 

766 if len(datous) == 1: 

767 datou = datous[0] 

768 if str(datou["id"]) != str(mtr_datou_id): 

769 print("Wrong datou id !") 

770 else: 

771 print("Wronge datou id !") 

772 

773# index_datou = datous.index(lambda x : x["id"] == mtr_datou_id) 

774 if datou == None: 

775 print("ERROR Missing datou") 

776 # return 

777# datou = datous[index_datou] 

778 

779 from auth.lib_auth import get_datou_exec_context_as_complete_param_json 

780 user = "info@opio.fr" 

781 user = "victor@reutenauer.eu" 

782 privacy = False 

783 from auth.lib_conf_system import lcs_global_singleton 

784 OPENAI_API_KEY = lcs_global_singleton.get_openai_api_key() 

785 complete_param_json = get_datou_exec_context_as_complete_param_json(user, verbose, privacy, 

786 openai_token=OPENAI_API_KEY, lss=lss, 

787 project_id=safia_project_id) 

788 

789 # VR TODO : mais p***, complete_param_json doit utilisé les param_json du datou ! grrr grrrr grrr 

790 list_datou_step = list(map(lambda x : x["name"], datou["steps"])) if datou != None else [] # Et ca je doit le faire aussi ! grrr grrr grrr 

791 list_param_json_steps = list(map(lambda x : x["param_json"], datou["steps"])) if datou != None else [] 

792 

793# datou_exec(datou["datou_linear_list_steps"], input = input_data, verbose = verbose, privacy = False) 

794 

795# if missing_row_index == 0: 

796# missing_row_index = 1 

797 

798 list_datou_func = [] 

799 

800 if datou_func != "": 

801# from lib.lib_batch import parse_directive 

802 list_datou_func = parse_directive(datou_func) 

803 else: 

804 list_datou_func = [] 

805 

806 map_modif_hash_id_treatment_data = {} 

807 

808 list_results = [] 

809 

810 from lib.datou.datou_exec import datou_exec 

811 for i in range(nb_lines): 

812 input_datou = {} 

813 

814 if input_from_df_or_arg : 

815 if i + missing_row_index >= len(df.values.tolist()): 

816 print(" All the tab should be completed by now or we have a problem !") 

817 break 

818 if len(input_datou_name) != len(input_column_name) or len(outut_datou_name) != len(output_column_name): 

819 print(" Internal error mismatch datou column io : exiting ") 

820 exit(1) 

821 for j in range(len(input_datou_name)): 

822 print("About to get some input value for : j :" + str(j) + " col : " + str(offset + i)) 

823 print("USing input_datou_name[j] : " + str(input_datou_name[j])) 

824 

825 print(" available cols : " + str(df.columns.values.tolist())) 

826 print(" number data : " + str(len(df.values.tolist()))) 

827 

828 # Je veux récupérer l'élément de la ranger missing_row_index + i, de la colonne input_column_name[i] de df et le mettre dans val 

829 val = df[input_column_name[j]][offset + i] # Hack pour le moment parce que jen'arrive pas à récupérer la bonne ranger on dirait que pour la première ligne il ne trouve pas les colonnes) 

830 # bug crash lorsque url est vide, d'autre cas sans doute à gérer 

831 input_datou[input_datou_name[j]] = val 

832 

833 else : 

834 if in_file != None: 

835 if nb_lines != 1 : 

836 print(" Un managed behavior ") 

837 input_datou["file"] = in_file 

838 # sert à eviter le preprompt par defaut de l'ocr sur l'impact carbone grr je ne comprends pas le CdC qui ferait le job 

839# input_datou["preprompt"] = "" 

840 

841 if verbose: 

842 print(str(input_datou)) 

843 

844 if datou != None: 

845 input_datou["datou_exec_info"] = {"project_id": safia_project_id, 

846 "safia_doc_id": safia_document_id, 

847 "mtr_datou_id": mtr_datou_id, 

848 "launched_at" : datetime.datetime.now()} 

849 output, audit_json = datou_exec(list_datou_step, input_datou, complete_param_json, 

850 verbose = verbose, 

851 with_audit = with_audit, 

852 privacy = privacy, 

853 list_param_json_steps=list_param_json_steps) 

854 output["datou_id"] = str(mtr_datou_id) 

855 

856 if len(list_datou_func) > 0: 

857 output = call_functions(list_datou_func, input_datou) 

858 

859 output["datetime"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 

860 

861 info_store = fill_df_from_datou_result(df, outut_datou_name, 

862 output_column_name, output, missing_row_index + i) 

863 

864 hash_id_treatment = output["hash_id_treatment"] if "hash_id_treatment" in output else input_datou["hash_id_treatment"] if "hash_id_treatment" in input_datou else "POURRI_" + hash_id_treatment 

865 map_modif_hash_id_treatment_data[hash_id_treatment] = info_store 

866 

867 if special_op == "accumulate": 

868 list_results.append(output) 

869 

870 if saxia_step_name != None: 

871 name_info_step = "info_" + saxia_step_name 

872 

873 saxia_step_name_at = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 

874 info_store[saxia_step_name + "_at"] = saxia_step_name_at 

875 

876 lss.lib_user_data_internal.upsert_audit_info(hash_id_treatment, 

877 list_json_update = [ 

878{"variable": name_info_step, "type": "json"} 

879], 

880 input_values = {name_info_step : info_store}, 

881 verbose = verbose) 

882 

883 # saxia_step_name 

884 

885 new_text = create_markdown_table_from_df(df) 

886 # TODO VR tester to_markdown 

887 

888 if safia_document_id != "pg_audit": 

889 print(" NOW SAVING INDEX FILE BATCH TREATMENT !") 

890 input_save = {"json_to_save" : [{"id" : safia_document_id, "text" : new_text}]} 

891 output_save, audit_json = datou_exec(["import_json"], input_save, complete_param_json, verbose=verbose, privacy=privacy) 

892 output_final = output_save 

893 

894 print(str(output_final)) 

895 

896 return output_final 

897 if special_op == "accumulate": 

898 print(list_results) 

899 list_pages = [] 

900 for l in list_results: 

901 if "list_page" in l: 

902 list_pages.extend(l["list_page"]) 

903 print(list_pages) 

904 return list_pages 

905 return "TO check" 

906 

907 

908 

909 # Fonction pour charger dynamiquement un module en Python. 

910def load_module(module_path, function_name): 

911# import sys 

912 import importlib 

913# sys.path.append('.') 

914 # return importlib.import_module(module_path) 

915 

916 module = importlib.import_module(module_path) 

917 

918 # class_name = conf["class_name"] 

919 #from lib.brick_layers.lib_abstract_generic_layer import LayerPrompt 

920 # from lib.lib_openai import LayerPromptOpenAI 

921 # import lib 

922 function = getattr(module, function_name) 

923 return function 

924 

925 

926 # Fonction pour appeler dynamiquement une fonction avec des arguments. 

927def call_function(function, args): 

928# func = getattr(module, function_name) 

929 return function(*args) 

930 

931 

932 

933# parse datou_function 

934def parse_directive(chain): 

935 import json 

936 import re 

937 

938 # La chaîne de caractères à parser. 

939 # chain = "path_to_file1:function1(arg1,arg2):output1;path_to_file2:function2(arg1,arg2):output2" 

940 

941 # 1. Splitter la chaîne de caractères par des points-virgules. 

942 directives = chain.split(';') 

943 

944 # 3. Charger les arguments depuis un fichier JSON 

945# with open('arguments.json') as file: 

946# arguments_json = json.load(file) 

947 

948 list_function = [] 

949 

950 # Processus principal pour chaque directive. 

951 for directive in directives: 

952 # 2. Splitter chaque directive selon le deux-points. 

953 dir_split = directive.split(':') 

954 if len(dir_split) != 3: 

955 print(f"Error in directive : {directive}") 

956 continue 

957 (path_to_file, function_and_args, output) = tuple(dir_split) 

958 

959 # Utiliser une expression régulière pour extraire le nom de la fonction et les arguments. 

960 function_name, args_str = re.match(r'(\w+)\((.*)\)', function_and_args).groups() 

961 

962 # Extraire les noms des arguments en les séparant par des virgules, ignorer les espaces. 

963 arg_names = re.split(r'\s*,\s*', args_str) 

964 

965 one_function = {"path_to_file" : path_to_file, "function_name" : function_name, "arguments" : arg_names, "output" : output} 

966 list_function.append(one_function) 

967 

968 return list_function 

969 

970 

971 

972def call_functions(list_function, arguments_json): 

973 map_results = {} 

974 for function in list_function: 

975 path_to_file = function["path_to_file"] 

976 function_name = function["function_name"] 

977 arguments = function["arguments"] 

978 output = function["output"] 

979 # 4. Charger la bibliothèque spécifiée. 

980 function = load_module(path_to_file, function_name) 

981 

982 args_to_pass_as_list = [] 

983 missing_arg = False 

984 for arg in arguments: 

985 if arg in arguments_json: 

986 args_to_pass_as_list.append(arguments_json[arg]) 

987 else: 

988 # missing_arg = True 

989 print(f"No argument data available for argument: {arg} expecting fixed value") 

990 args_to_pass_as_list.append(arg) 

991 

992 # 5. Appeler la fonction avec les arguments. 

993 if not missing_arg: 

994# args_to_pass_as_list = arguments_json[function_name] 

995 result = call_function(function, args_to_pass_as_list) 

996 print(f"Result of {function_name}: {result}") 

997 map_results[output] = result 

998 else: 

999 print(f"No argument data available for function: {function_name}") 

1000 return map_results 

1001 

1002 

1003file_column = "file" 

1004size_column = "size" 

1005created_at_column = "created_at" 

1006indexed_at_column = "indexed_at" 

1007last_indexed_at_column = "last_indexed_at" 

1008modified_at_column = "modified_at" 

1009deleted_at_column = "deleted_at" 

1010def init_df_synchronize(): 

1011 input_column_name = [file_column, "sub_folder", created_at_column, modified_at_column, deleted_at_column, indexed_at_column, last_indexed_at_column, size_column] 

1012 import pandas as pd 

1013 df = pd.DataFrame(columns=input_column_name) 

1014 return df 

1015 

1016def synchronize_df_folder(df = None, folder = "", verbose = False): 

1017 from lib.import_util.lib_path_to_vec import list_files 

1018 if df == None: 

1019 df = init_df_synchronize() 

1020 

1021 import datetime 

1022 now_as_string = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 

1023 

1024 index, file_list = list_files(folder, verbose = False, 

1025 managed_zip_extension_to_avoid = [".zip", ".tar", ".tar.gz", ".tgz", ".tar.bz2", ".tbz"]) 

1026 

1027 for file_dict in file_list: 

1028 file = file_dict["path"] 

1029 # search occurence of string file in pd column file 

1030# if df.get("") 

1031 

1032 find_occurence = df[file_column] == file 

1033 if len(find_occurence) == 0: 

1034 print("Indexing one file : " + file) 

1035 size = os.stat(file).st_size 

1036 created_at_float = os.stat(file).st_birthtime 

1037 modified_at_float = os.stat(file).st_mtime 

1038 created_at_as_string = datetime.datetime.fromtimestamp(created_at_float).strftime('%Y-%m-%d %H:%M:%S') 

1039 modified_at_as_string = datetime.datetime.fromtimestamp(modified_at_float).strftime('%Y-%m-%d %H:%M:%S') 

1040 df.loc[len(df)] = [file, folder, created_at_as_string, modified_at_as_string, "", now_as_string, now_as_string, size] 

1041 elif len(find_occurence) > 1: 

1042 print("ERROR : internal inconsistency error : multiple occurence") 

1043 else : 

1044 iloc_file = df.index[find_occurence] 

1045# input_data = df[input_column_name[0]][first_missing_output] 

1046 

1047 pass 

1048 

1049from abc import abstractmethod 

1050 

1051class Cdn(): 

1052 @abstractmethod 

1053 def get_file(self, file_id): 

1054 pass 

1055 

1056# faut une option onedrive ? 

1057 

1058class CdnLocal(Cdn): 

1059 def __init__(self, root_dir = "temp", 

1060 lcs = None, 

1061 lib_data_internal = None, 

1062 project_id = None): 

1063 self.root_dir = root_dir 

1064 if lcs != None: 

1065 root_dir_conf = lcs.get_root_dir_local_cdn() 

1066 if root_dir_conf != None: 

1067 self.root_dir = root_dir_conf 

1068 self.lib_data_internal = lib_data_internal 

1069 self.project_id = project_id 

1070 

1071 # set project name 

1072 # set date 

1073 # la convention de localisation relative des pages doit etre implémenté en javascript aussi, ou bien comment fais t'on ? 

1074 

1075 def store_file_get_path(self, temp_file): 

1076 import shutil 

1077 from auth.lib_auth import create_id 

1078 hash_id = create_id() 

1079 import datetime 

1080 folder_as_YMD = datetime.datetime.now().strftime('%Y/%m/%d') 

1081 # get extension of the file temp_file 

1082 if os.path.splitext(temp_file)[1] == "": 

1083 ext = "dat" 

1084 else: 

1085 ext = os.path.splitext(temp_file)[1] 

1086 basename = os.path.basename(temp_file) 

1087 path_file_local_cdn = os.path.join(self.root_dir, str(self.project_id), folder_as_YMD, hash_id + "_" + basename + "." + ext) 

1088 shutil.move(temp_file, path_file_local_cdn) 

1089 return path_file_local_cdn 

1090 

1091def get_class_quali_from_worst_prediag(worst_prediag): 

1092 # 3 classes : BON // LE RESTE // MANUSCRIT OU TABLEAUX 

1093 if worst_prediag in ["OK", "BON"]: 

1094 return "PRICE_LOW" 

1095 elif worst_prediag in ["TABLEAUX", "MANUSCRIT", "MAUVAIS"]: 

1096 return "PRICE_HIGH" 

1097 else : # if worst_prediag in ["PRESQUEBON", "MISSING"]: 

1098 return "PRICE_MEDIUM" 

1099 

1100def get_worst_page_prediag(list_of_page, map_id_page_prediag): 

1101# import time 

1102# time.sleep(1) 

1103 worst_prediag = "INIT" 

1104 complete_prediag = "" 

1105 if len(list_of_page) == 0: 

1106 return worst_prediag, complete_prediag 

1107 

1108 if len(list_of_page) == 1 and list_of_page[0] in map_id_page_prediag: 

1109 return map_id_page_prediag[list_of_page[0]], map_id_page_prediag[list_of_page[0]] 

1110 

1111 # OK,BON => PRESQUEBON => {ALL} => MISSING => MANUSCRIT => MAUVAIS 

1112 for page in list_of_page: 

1113 if str(page) in map_id_page_prediag: 

1114 prediag = map_id_page_prediag[str(page)] 

1115 if prediag == "MISSING": 

1116 continue 

1117 if complete_prediag != "": 

1118 complete_prediag += "," 

1119 complete_prediag += prediag 

1120 

1121 if worst_prediag == "INIT": 

1122 worst_prediag = prediag 

1123 elif worst_prediag in ["OK", "BON"] and prediag not in ["OK", "BON"]: 

1124 worst_prediag = prediag 

1125 elif prediag == "PRESQUEBON" and worst_prediag in ["OK", "BON"]: 

1126 worst_prediag = "PRESQUEBON" 

1127 elif worst_prediag in ["OK", "BON", "PRESQUEBON"] and prediag not in ["OK", "BON", "PRESQUEBON"]: 

1128 worst_prediag = prediag 

1129 elif prediag == "MISSING" and worst_prediag not in ["MANUSCRIT", "MAUVAIS"]: 

1130 worst_prediag = "MISSING" 

1131 elif prediag == "MANUSCRIT" and worst_prediag != "MAUVAIS": 

1132 worst_prediag = "MANUSCRIT" 

1133 elif prediag == "MAUVAIS": 

1134 worst_prediag = "MAUVAIS" 

1135 

1136 return worst_prediag, complete_prediag 

1137 

1138def get_nb_modif_nb_word_change_or_not_per_doc(info_consolidate_raw, map_id_page_prediag): 

1139 map_count_modif_per_doc = info_consolidate_raw["map_count_modif_per_doc"] if "map_count_modif_per_doc" in info_consolidate_raw else {} 

1140 map_nb_word_per_doc = info_consolidate_raw["audit_info_write"]["map_nb_word_per_doc"] if "audit_info_write" in info_consolidate_raw and "map_nb_word_per_doc" in info_consolidate_raw["audit_info_write"] else {} 

1141 map_type_document_per_doc = info_consolidate_raw["audit_info_write"]["map_type_document_per_doc"] if "audit_info_write" in info_consolidate_raw and "map_type_document_per_doc" in info_consolidate_raw["audit_info_write"] else {} 

1142 list_of_pages_as_sccsv = info_consolidate_raw["audit_info_write"]["list_of_pages_as_sccsv"] if "audit_info_write" in info_consolidate_raw and "list_of_pages_as_sccsv" in info_consolidate_raw["audit_info_write"] else "" 

1143 

1144 from lib.lib_util import from_list_page_per_doc_ccsv_to_list_of_list_of_page 

1145 list_of_list_of_page, nb_page, max_page = from_list_page_per_doc_ccsv_to_list_of_list_of_page(list_of_pages_as_sccsv) 

1146 

1147 list_doc_nb_modif_nb_word = [] 

1148 for id_doc in map_type_document_per_doc: 

1149 id_doc_int = int(id_doc) 

1150 doc_type = map_type_document_per_doc[id_doc] 

1151 if doc_type == "": 

1152 continue 

1153 nb_word = map_nb_word_per_doc[id_doc] if id_doc in map_nb_word_per_doc else 0 

1154 nb_modif = map_count_modif_per_doc[id_doc] if id_doc in map_count_modif_per_doc else 0 

1155 corr_type_manual = "map_modif_type_document" in info_consolidate_raw and id_doc in info_consolidate_raw["map_modif_type_document"] 

1156 

1157 

1158 if list_of_list_of_page != None and len(list_of_list_of_page) > id_doc_int: 

1159 list_of_page = list_of_list_of_page[id_doc_int] 

1160 worst_prediag, complete_prediag = get_worst_page_prediag(list_of_page, map_id_page_prediag) 

1161 else : 

1162 worst_prediag = "MISSING" 

1163 complete_prediag = "MISSING" 

1164 

1165 one_doc = {"doc_type" : doc_type, "nb_word" : nb_word, "nb_modif" : nb_modif, "corr_type_manual" : corr_type_manual, "worst_prediag" : worst_prediag, "complete_prediag" : complete_prediag} # "id_doc" : id_doc_int, 

1166 

1167 list_doc_nb_modif_nb_word.append(one_doc) 

1168 

1169 return list_doc_nb_modif_nb_word 

1170 

1171def assoc_doc_type_int(list_doc_type): 

1172 map_doc_type_int = {} 

1173 for i in range(len(list_doc_type)): 

1174 map_doc_type_int[list_doc_type[i]] = i 

1175 return map_doc_type_int 

1176 

1177def print_nb_word_nb_modif_type_doc(list_doc_nb_modif_nb_word, map_doc_type_int): 

1178 print("# nb_word nb_modif doc_type") 

1179 for doc in list_doc_nb_modif_nb_word: 

1180 print(str(doc["nb_word"]) + " " + str(doc["nb_modif"]) + " " + str(map_doc_type_int[doc["doc_type"]])) 

1181 

1182taux_tva = 20 

1183 

1184def reset_saxia_fact_after_including(month_start = None, year_start = None, 

1185 # month_end = None, year_end = None, 

1186 lpgss = None, root_folder_fact = "temp"): 

1187 # Find previous month 

1188 # How to get list of month ? 

1189# mtr = > select 

1190# id, month, TO_DATE(month, 'TMmonth YYYY') 

1191# from mtruser.mtr_factures; 

1192# id | month | to_date 

1193#----+---------------+------------ 

1194#52 | janvier 

1195#2025 | 2025 - 01 - 01 

1196#53 | janvier 

1197#2025 | 2025 - 01 - 01 

1198 

1199 from datetime import date 

1200 date_start = date(int(year_start), int(month_start), 1) 

1201 

1202 map_validated_nb, map_months = lpgss.get_bill_validate(date_start) 

1203 if map_validated_nb[True] != 0: 

1204 print("Can't regenerate since bills where already validated") 

1205 return 

1206 nb_to_delete = map_validated_nb[False] 

1207 min_bill_id = lpgss.get_min_bill_id_after_or_equal_date(date_start) 

1208 if min_bill_id != None and type(min_bill_id) == int and min_bill_id > 0: 

1209 

1210 autocommit_prev_val = lpgss.client.autocommit 

1211 try: 

1212 lpgss.client.autocommit = False 

1213 ret_reset = lpgss.reset_facture(min_bill_id, nb_to_delete) 

1214 except Exception as e: 

1215 print("Error in resetting the bill") 

1216 print(str(e)) 

1217 lpgss.client.autocommit = autocommit_prev_val 

1218 return False, None 

1219 lpgss.client.autocommit = autocommit_prev_val 

1220 if ret_reset == False: 

1221 print("Error in resetting the bill") 

1222 return False, None 

1223 

1224# Get All files 

1225 # Delete Files 

1226 list_month_csv = map_months[False] 

1227 if list_month_csv != "": 

1228 list_month = list_month_csv.split(",") 

1229 if len(list_month) > 1: 

1230 print(" Many months to delete : " + str(list_month) + " only the first one deleted will recreated, so job should run !") 

1231 for month in list_month: 

1232 month_subfolder_suffix = month.lower().replace(" ", "_") 

1233 subfolder_facture = "facture_" + month_subfolder_suffix 

1234 folder_facture_www = os.path.join(root_folder_fact, subfolder_facture) 

1235 folder_facture_internal_server = folder_facture_www.lstrip("/") 

1236 import shutil 

1237 ret = shutil.rmtree(folder_facture_internal_server, ignore_errors=True) 

1238 print(" ret : " + str(ret)) 

1239 else : 

1240 print("Unexpected something inconsistent, TODO set test before and cancel this reset") 

1241 subfolder_facture = "mois_en_lettre_sans_accent_year_en_chiffre" 

1242 

1243 # Delete all lines in the table 

1244 print("TODO") 

1245 return True, min_bill_id 

1246 

1247def generate_saxia_fact(id_fact = 0, month = None, year = None, 

1248 type_fact = "debug", 

1249 project_id = None, 

1250 root_folder_fact = "temp", 

1251 only_correct_file = False, 

1252 verbose = False): 

1253 from server.safia import lpgss_singleton 

1254 

1255 # TODO VR 21/4/25 A choper de la configuration de l'app ! 

1256 mtr_datou_id_fact = 40 

1257 

1258 import locale 

1259# locale.setlocale(locale.LC_ALL, 'fr_FR.utf8') 

1260 locale.setlocale(locale.LC_ALL, 'fr_FR.UTF-8') 

1261 

1262 user = None 

1263 import datetime 

1264 # calculer aujourd'hui moins un mois et récupérer le mois et l'année 

1265 today = datetime.datetime.now() 

1266 # enlever un mois à aujourd'hui 

1267# last_month_date = today - datetime.timedelta(month=1) 

1268 

1269 from dateutil.relativedelta import relativedelta 

1270 last_month_date = today + relativedelta(months=-1) 

1271 two_month_before_present = today + relativedelta(months=-2) 

1272 

1273 # While debugging ! 

1274 

1275 last_month = 8 

1276 year_last_month = 2024 

1277 mois_last_month_string = "Aout" 

1278 

1279 last_month = 10 

1280 year_last_month = 2024 

1281 mois_last_month_string = "Octobre" 

1282 

1283 last_month = 11 

1284 year_last_month = 2024 

1285 mois_last_month_string = "Novembre" 

1286 

1287 if type_fact == "deprecated_no_db": # TODO Et quid du comportement qui génère le mois précédent à partir de now sans enregistrer dans la DB, veut-on le garder, gasp grrr plop => JE NE SAIS PAS ! 

1288 if month == None or year == None or month == "" or year == "": 

1289 mois_last_month_string = last_month_date.strftime("%B") 

1290 mois_last_month_string = mois_last_month_string[0].upper() + mois_last_month_string[1:] 

1291 last_month = last_month_date.month 

1292 year_last_month = last_month_date.year 

1293 else: 

1294 last_month = month 

1295 year_last_month = year 

1296 

1297 # We check the consistency of the generation with voilà plop ! 

1298 res_max_id, res_last_value, res_month_argmax = lpgss_singleton.get_last_fact_id_month() 

1299 inconsistent_data_last_id_stop = False 

1300 if res_max_id != res_last_value: 

1301 print("Inconsistency in the generation of the fact : " + str(res_max_id) + " != " + str(res_last_value)) 

1302 inconsistent_data_last_id_stop = True 

1303 return 

1304 else: 

1305 if id_fact == 0 or id_fact == None: 

1306 id_fact = res_max_id + 1 

1307# import dateparser 

1308 import datetime 

1309 # res_month_argmax = last_month_date.strftime("%B %Y") 

1310 try : 

1311 datetime_last_info = datetime.datetime.strptime(res_month_argmax.lower(), "%B %Y")# dateparser.parse(month_year, settings={'DATE_ORDER': 'DMY'}) 

1312 except Exception as e: 

1313 print("What to do with : res_month_argmax") 

1314 print(str(e)) 

1315 print("Padam plop !") 

1316 print("ERROR EXITING") 

1317 exit(1) 

1318 

1319 print(" datetime_last_info : " + str(datetime_last_info)) 

1320 

1321 if type_fact == "force": 

1322 print("We don't care about check !") 

1323 

1324 month_to_treat = datetime_last_info + relativedelta(months=1) 

1325 last_month = month_to_treat.month 

1326 year_last_month = month_to_treat.year 

1327 

1328 if type_fact == "redo": 

1329 if not only_correct_file: 

1330 success_reset, min_bill_id = reset_saxia_fact_after_including(month_start=month, year_start=year, 

1331 lpgss=lpgss_singleton, 

1332 root_folder_fact=root_folder_fact) 

1333 else: 

1334 min_bill_id = None 

1335 success_reset = True 

1336 if not success_reset: 

1337 print("Error in resetting the bill, nothing should have been done !") 

1338 return 

1339 last_month = month 

1340 year_last_month = year 

1341 month_to_treat = datetime.datetime(int(year_last_month), int(last_month), 1) 

1342 if min_bill_id != None: 

1343 id_fact = min_bill_id # - 1 # TODO VR not sure 

1344 elif ((datetime_last_info.month == last_month_date.month 

1345 and datetime_last_info.year == month_to_treat.year) or 

1346 (datetime_last_info.month == two_month_before_present.month 

1347 and datetime_last_info.year == two_month_before_present.year)) and not inconsistent_data_last_id_stop: 

1348 print("We just generate the next month data") 

1349 elif type_fact == "force": 

1350 print("Force neede since fact done with too much delay") 

1351 else: 

1352 print("Nothing done since we need to make a lot of stuff and check !") 

1353 return 

1354 

1355 list_project_id = [] 

1356 if project_id != None and project_id != "" and project_id != 0: 

1357 list_project_id = [project_id] 

1358 # VR TODO rename last_month and year_last_month in month_used 

1359 all_results = lpgss_singleton.get_treatment_done(last_month, year_last_month, project_id=list_project_id) 

1360 

1361 if only_correct_file: 

1362 print("More protection (third ? )") 

1363# lpgss_singleton = None 

1364 

1365 # VR TODO : we need the last month data of course ! 

1366 subfolder_facture = "facture_" + month_to_treat.strftime("%B_%Y") 

1367 mois_last_month_string = month_to_treat.strftime("%B") 

1368 if mois_last_month_string != "": 

1369 mois_last_month_string = mois_last_month_string[0].upper() + mois_last_month_string[1:] 

1370 month_string_for_data = month_to_treat.strftime("%B %Y") 

1371 if len(month_string_for_data) > 0: 

1372 month_string_for_data = month_string_for_data[0].upper() + month_string_for_data[1:] 

1373 else: 

1374 print("Unexpected behavior : month_string_for_data is empty !") 

1375 print(" subfolder_facture : " + str(subfolder_facture)) 

1376 from unidecode import unidecode 

1377 subfolder_facture = unidecode(subfolder_facture) 

1378 print(" subfolder_facture : " + str(subfolder_facture)) 

1379 

1380# folder_facture = os.path.join(root_folder_fact, "factures", subfolder_facture) 

1381 folder_facture = os.path.join(root_folder_fact, subfolder_facture) 

1382 if not os.path.exists(folder_facture.lstrip("/")): 

1383 os.makedirs(folder_facture.lstrip("/")) 

1384 

1385 map_pid_results = {} 

1386 count_pid_nb_page = {} 

1387 for data in all_results: 

1388 pid = data["project_id"] 

1389 id_file = data["id_file"] 

1390 if pid not in map_pid_results: 

1391 map_pid_results[pid] = [] 

1392 count_pid_nb_page[pid] = 0 

1393 map_pid_results[pid].append(data) 

1394 if data["nb_page"] != None: 

1395 count_pid_nb_page[pid] += data["nb_page"] 

1396 

1397 print(count_pid_nb_page) 

1398 

1399 map_list_nb_modif_per_pid = {} 

1400 map_list_folder_to_fact_per_pid = {} 

1401 

1402 list_doc_nb_modif_nb_word_acc = [] 

1403 

1404 map_prediag_id_file_page_ccsv = {} 

1405 for pid in map_pid_results: 

1406 list_doc_nb_modif_nb_word_one_pid = [] 

1407 list_folder_to_fact = [] 

1408 for data in map_pid_results[pid]: 

1409 

1410 id_file = None 

1411 from lib.sandbox.migration.append_prediag_df_complet_as_json import build_map_from_prediag_id_page_c_csv 

1412 data_prediag = data["info_date"]["prediag_csv"] if "info_date" in data and "prediag_csv" in data["info_date"] else "" 

1413 if ":" in data_prediag: 

1414 map_id_page_prediag = build_map_from_prediag_id_page_c_csv(data_prediag) 

1415 elif data_prediag == "": 

1416 map_id_page_prediag = {} 

1417 else : 

1418 map_id_page_prediag = {str(i) : data_prediag.split(",")[i] for i in range(len(data_prediag.split(",")))} 

1419 

1420 if map_id_page_prediag == {}: 

1421 print("Missing prediag for pid : " + str(pid) + " id_file : " + str(data["id_file"]) + " data_prediag : " + str(data_prediag) + " hit : " + data["hash_id_treatment"]) 

1422# continue 

1423 else : 

1424 if "id_file" in data: 

1425 id_file = str(data["id_file"]) 

1426 print("Present prediag for " + str(id_file) + " : " + str(data["id_file"])) 

1427 else : 

1428 print("Wrong") 

1429 

1430 if "info_consolidate" not in data or "audit_info_write" not in data["info_consolidate"]: 

1431 print("Missing audit_info_write for pid : " + str(pid) + " id_file : " + str(data["id_file"]) + " data_prediag : " + str(data_prediag) + " hit : " + data["hash_id_treatment"]) 

1432# continue 

1433 

1434 for id_page in map_id_page_prediag: 

1435 id_page = str(id_page) 

1436 id_file_id_page_ccsv = id_file + ":" + id_page 

1437 prediag = map_id_page_prediag[id_page] 

1438 if prediag not in map_prediag_id_file_page_ccsv: 

1439 map_prediag_id_file_page_ccsv[prediag] = [] 

1440 if id_file_id_page_ccsv not in map_prediag_id_file_page_ccsv[prediag]: 

1441 map_prediag_id_file_page_ccsv[prediag].append(id_file_id_page_ccsv) 

1442 

1443 list_doc_nb_modif_nb_word = get_nb_modif_nb_word_change_or_not_per_doc(data["info_consolidate"], map_id_page_prediag) 

1444 list_doc_nb_modif_nb_word_acc.extend(list_doc_nb_modif_nb_word) 

1445 list_doc_nb_modif_nb_word_one_pid.extend(list_doc_nb_modif_nb_word) 

1446 

1447 nb_modif = data["info_consolidate"]["nb_modif"] if "info_consolidate" in data and "nb_modif" in data["info_consolidate"] else 0 

1448 input_file_at = data["info_date"]["input_file_available_at"] if "info_date" in data and "input_file_available_at" in data["info_date"] else None 

1449 output_file_at = data["info_consolidate"]["consolidate_at"] if "info_consolidate" in data and "consolidate_at" in data["info_consolidate"] else None 

1450 time_treatmeant = None 

1451 import dateparser 

1452 if output_file_at != None and input_file_at != None: 

1453 try: 

1454 in_date = dateparser.parse(input_file_at) #, settings={'DATE_ORDER': 'DMY'}) 

1455 out_date = dateparser.parse(output_file_at) #, settings={'DATE_ORDER': 'DMY'}) 

1456 time_treatmeant = out_date - in_date 

1457 print(" time_treatmeant : " + str(time_treatmeant)) 

1458 time_treatmeant = int(time_treatmeant.total_seconds() / 3600) 

1459 except Exception as e: 

1460 print(str(e)) 

1461 

1462 nb_page = data["nb_page"] if "nb_page" in data else 0 

1463 worst_prediag_doc, complete_prediag = get_worst_page_prediag(list(range(0, nb_page)), map_id_page_prediag) 

1464 list_folder_to_fact.append({"nb_page" : data["nb_page"], "id_file" : data["id_file"], "nb_modif" : nb_modif, "time_treatmeant" : time_treatmeant, "map_id_page_prediag" : map_id_page_prediag, "worst_prediag" : worst_prediag_doc}) 

1465 

1466 map_list_nb_modif_per_pid[pid] = list_doc_nb_modif_nb_word_one_pid 

1467 map_list_folder_to_fact_per_pid[pid] = list_folder_to_fact 

1468 

1469 print(""" 

1470 

1471 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ 

1472 

1473 map_prediag_id_file_page_ccsv : 

1474 

1475 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ 

1476 

1477 """) 

1478 print(str(map_prediag_id_file_page_ccsv)) 

1479 print(""" 

1480  

1481 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ 

1482  

1483 """) 

1484 

1485 print(map_list_folder_to_fact_per_pid) 

1486 

1487 print(map_list_nb_modif_per_pid) 

1488 

1489 list_prediag_class = [] 

1490 for doc in list_doc_nb_modif_nb_word_acc: 

1491 if "worst_prediag" not in doc: 

1492 print("ERROR : worst_prediag not found in doc : " + str(doc)) 

1493 continue 

1494 if doc["worst_prediag"] not in list_prediag_class: 

1495 list_prediag_class.append(doc["worst_prediag"]) 

1496 

1497 map_prediag_class_int = assoc_doc_type_int(list_prediag_class) 

1498 print(" map_prediag_class_int : ") 

1499 print(str(map_prediag_class_int)) 

1500 

1501 list_doc_type = [] 

1502 for doc in list_doc_nb_modif_nb_word_acc: 

1503 if doc["doc_type"] not in list_doc_type: 

1504 list_doc_type.append(doc["doc_type"]) 

1505 

1506 map_doc_type_int = assoc_doc_type_int(list_doc_type) 

1507 print(" map_doc_type_int : ") 

1508 print(str(map_doc_type_int)) 

1509 for pid in map_list_nb_modif_per_pid: 

1510 print_nb_word_nb_modif_type_doc(map_list_nb_modif_per_pid[pid], map_doc_type_int) 

1511 

1512 print(" AND ALL ") 

1513 

1514 print_nb_word_nb_modif_type_doc(list_doc_nb_modif_nb_word_acc, map_doc_type_int) 

1515 

1516 unit_price = 0.5 

1517 unit_price_default = 0.5 

1518 unit_price_high = 0.8 

1519 unit_price_low = 0.4 

1520 map_class_price_unit_price = {"PRICE_LOW" : unit_price_low, 

1521 "PRICE_MEDIUM" : unit_price, 

1522 "PRICE_HIGH" : unit_price_high} 

1523 map_class_price_unit_price = {} 

1524 

1525 import numpy as np 

1526 for pid in map_list_folder_to_fact_per_pid: 

1527 if only_correct_file: 

1528 print("We should only modify the input pid") 

1529 print(f" pid : {pid} should be the one given as argument {project_id}") 

1530 if project_id != pid: 

1531 continue 

1532 

1533 data_count = { 

1534 "document_type" : map_doc_type_int.keys(), 

1535 "count" : len(map_doc_type_int) * [0] 

1536 } 

1537 data_count_class = { 

1538 "prediag_class" : map_prediag_class_int.keys(), 

1539 "count" : len(map_prediag_class_int) * [0] 

1540 } 

1541 # TO BE USED 

1542 # Tableau de comptage des types de documents croisé avec les classe de prediagnostique 

1543 np_array_count_class_type = np.zeros((len(map_prediag_class_int), len(map_doc_type_int)), dtype=int) 

1544 for data in map_list_nb_modif_per_pid[pid]: 

1545 data_count["count"][map_doc_type_int[data["doc_type"]]] += 1 

1546 data_count_class["count"][map_prediag_class_int[data["worst_prediag"]]] += 1 

1547 np_array_count_class_type[map_prediag_class_int[data["worst_prediag"]],map_doc_type_int[data["doc_type"]]] += 1 

1548 

1549 data_folder_price = { 

1550 'Dossier': [], 

1551 'Nb de Page': [], 

1552 'Prix HT\npar page': [], 

1553 'Prix total HT': [], 

1554 'TVA': [], 

1555 'Prix total TTC': [], 

1556 'Temps (h)\ntraitement': [], 

1557 } 

1558 print("pid : " + str(pid)) 

1559 sum_total = 0 

1560 time_treatmeant_total = 0 

1561 for data in map_list_folder_to_fact_per_pid[pid]: 

1562 unit_price = unit_price_default 

1563 worst_prediag = data["worst_prediag"] if "worst_prediag" in data else "MISSING" 

1564 price_class = get_class_quali_from_worst_prediag(worst_prediag) 

1565 unit_price = map_class_price_unit_price.get(price_class, unit_price_default) 

1566 print(data) 

1567 data_folder_price["Dossier"].append(data["id_file"]) 

1568 data_folder_price["Nb de Page"].append(data["nb_page"]) 

1569 data_folder_price["Prix HT\npar page"].append(str(round(unit_price, 2))) 

1570 prix_total_ht = round(unit_price * data["nb_page"], 2) 

1571 data_folder_price["Prix total HT"].append(str(prix_total_ht)) 

1572 prix_total_ttc = round(prix_total_ht * (1. + float(taux_tva) / 100.), 2) 

1573 data_folder_price["TVA"].append("{:.2f}".format(prix_total_ttc - prix_total_ht)) 

1574 data_folder_price["Prix total TTC"].append(str(prix_total_ttc)) 

1575 sum_total += unit_price * data["nb_page"] * (1. + float(taux_tva) / 100.) 

1576 

1577 time_treatmeant = data["time_treatmeant"] 

1578 if time_treatmeant != -1 and time_treatmeant != None: 

1579 time_treatmeant_total += time_treatmeant 

1580 else: 

1581 print("ERROR : time_treatmeant is -1") 

1582 data_folder_price["Temps (h)\ntraitement"].append(str(time_treatmeant)) 

1583 

1584 sum_total = round(sum_total, 2) 

1585 

1586 sum_total_wo_tva = sum_total / (1. + float(taux_tva) / 100.) 

1587 sum_total_wo_tva = round(sum_total_wo_tva, 2) 

1588 nb_page_total = int(sum_total_wo_tva / unit_price) 

1589 

1590 data_folder_price["Dossier"].append("Total " + str(mois_last_month_string) + " " + str(year_last_month) + " en EUROS ") 

1591 data_folder_price["Nb de Page"].append(nb_page_total) #str(nb_page_total)) 

1592 data_folder_price["Prix HT\npar page"].append(str(unit_price)) 

1593 data_folder_price["Prix total HT"].append(str(sum_total_wo_tva)) 

1594 data_folder_price["TVA"].append("{:.2f}".format(sum_total - sum_total_wo_tva)) 

1595 data_folder_price["Prix total TTC"].append(str(sum_total) + " € TTC") 

1596 temps_traitement_moyen = -1 

1597 if len(map_list_folder_to_fact_per_pid[pid]) > 0: 

1598 temps_traitement_moyen = time_treatmeant_total / len(map_list_folder_to_fact_per_pid[pid]) 

1599 data_folder_price["Temps (h)\ntraitement"].append(str(round(temps_traitement_moyen, 1)) + "(moyenne)") 

1600 

1601 # data = { 

1602 # 'Dossier': ['Item A', 'Item B', 'Item C'], 

1603 # 'Nb de Page': [4, 5, 2], 

1604 # 'Prix par page': [7.5, 8.0, 9.5], 

1605 # 'Prix total': [30.0, 40.0, 19.0] 

1606 # } 

1607 df = pd.DataFrame(data_folder_price) 

1608 df_data_count = pd.DataFrame(data_count) 

1609 df_data_class = pd.DataFrame(data_count_class) 

1610 

1611 info_project = lpgss_singleton.get_project_info(pid) 

1612 owner_id = info_project["owner_id"] 

1613 map_id_email = lpgss_singleton.get_email_from_user_ids([owner_id]) 

1614 info_user = lpgss_singleton.select_user(owner_id) 

1615 

1616 if only_correct_file: 

1617 print("Fourth protection to be downgraded as third") 

1618 print("We should only generate the pdf for the input pid and nnot modify the DB") 

1619 lpgss_singleton = None 

1620 

1621 siret = info_user["miscinfo"]["siret"] if "miscinfo" in info_user and info_user["miscinfo"] != None and "siret" in info_user["miscinfo"] else "" 

1622 adresse = info_user["miscinfo"]["adresse"] if "miscinfo" in info_user and info_user["miscinfo"] != None and "adresse" in info_user["miscinfo"] else "" 

1623 email = info_user["email"] if "email" in info_user and info_user["email"] != None else "" 

1624 first_name = info_user["firstname"] if "firstname" in info_user and info_user["firstname"] != None else "" 

1625 last_name = info_user["lastname"] if "lastname" in info_user and info_user["lastname"] != None else "" 

1626 found_user = False 

1627 email_client_expert = "email@expert" 

1628 for data in map_id_email: 

1629 data_dict = dict(data) 

1630 if "id" in data_dict and owner_id == data_dict["id"]: 

1631 found_user = True 

1632 email_client_expert = data_dict["mail"] 

1633 break 

1634 name = f"""Docteur {first_name} {last_name}""" 

1635 print(" TODO add name ") 

1636 if not found_user: 

1637 email_client_expert = "anonymous@user" 

1638 print(" INTERNAL ERROR GENERATING id_fact " + str(id_fact) + " for pid " + str(pid) + " owner_id " + str(owner_id) + " email_client_expert " + email_client_expert + " and project_id : " + str(pid) + " sum_total : " + str(sum_total) + " df_data_count : " + str(df_data_count) + " id_fact : " + str(id_fact) + " email_client_expert : " + email_client_expert + " project_id : " + str(pid)) 

1639 

1640 pdf_path = create_pdf(datas = df, 

1641 sum_total = sum_total, 

1642 df_data_count = df_data_count, 

1643 id_fact = id_fact, 

1644 email_client_expert = email_client_expert, 

1645 name = name, 

1646 adresse=adresse, 

1647 siret=siret, 

1648 project_id = pid, 

1649 sub_folder = folder_facture, 

1650 df_data_class = df_data_class, 

1651 np_array_count_class_type = np_array_count_class_type) 

1652 

1653 if only_correct_file: 

1654 print ("We should only generate the pdf for the input pid and nnot modify the DB") 

1655 exit(2) 

1656 

1657 if lpgss_singleton != None: 

1658 print("Record please !") 

1659 query_id = lpgss_singleton.insert_new_bill(id_fact, pdf_path, pid, month_string_for_data, sum_total_wo_tva, sum_total, nb_page_total) 

1660 if query_id != id_fact: 

1661 print("ERROR ERROR id fact We should cancel all !") 

1662 

1663 id_fact = id_fact + 1 

1664 

1665 

1666# A supprimer ou utiliser ! 

1667en_tete_gauche = """ 

1668SAS FOTONOWER FRANCE 

166930 Rue CHARLOT 

167075003 PARIS-3E-ARRONDISSEMENT 

1671FRANCE 

1672compta@fotonower.com 

1673https://www.fotonower.com/ 

1674N° TVA Intracommunautaire : FR80804468197 

1675N° SIRET : 80446819700012 

1676""" 

1677 

1678info_banque = """ 

1679Coordonnées bancaires : 

1680IBAN : FR76 1695 8000 0165 7288 7296 040 

1681BIC/SWIFT : QNTOFRP1XXX 

1682""" 

1683 

1684info_banque = """ 

1685Coordonnées bancaires : 

1686IBAN : FR76 1287 9000 0111 2329 2200 123  

1687BIC/SWIFT : DELUFR22XXX 

1688""" 

1689 

1690info_banque = """ 

1691Coordonnées bancaires : 

1692IBAN : FR76 3000 3024 2400 1500 3117 325  

1693BIC/SWIFT : SOGEFRPP 

1694""" 

1695 

1696info_banque_lines = info_banque.strip().split("\n") 

1697 

1698info_legal = info_banque 

1699 

1700 

1701 

1702condition_paiement_1 = """ 

1703Conditions de paiement : 

1704• 100 % soit """ 

1705 

1706condition_paiement_2 = """ € TTC à payer le : 

1707""" 

1708 

1709condition_paiement_3 = """ (à réception). 

1710""" 

1711 

1712 

1713 

1714def create_pdf(datas = None, 

1715 sum_total = 0, 

1716 df_data_count = None, 

1717 id_fact = 0, 

1718 email_client_expert = "email@expert", 

1719 name = "Docteur Expert", 

1720 adresse = "30 Rue Charlot", 

1721 siret = "FR56573", 

1722 project_id = -1, 

1723 sub_folder = "temp", 

1724 df_data_class = None, 

1725 np_array_count_class_type = None): 

1726 suffix = "_p_" + str(project_id) + "_id_" + str(id_fact) 

1727 

1728 en_tete_droite = email_client_expert + """ 

1729 project_id : """ + str(project_id) 

1730 

1731 if len(datas) == 0: 

1732 print("On ne génére pas de facture à 0") 

1733 return 

1734 

1735 customer_info = [ 

1736 "Adresse de facturation :", 

1737 name, 

1738 adresse, 

1739 "SIRET : " + siret, 

1740 email_client_expert, 

1741 "project_id=" + str(project_id) 

1742 ] 

1743 

1744 pdf_file = 'facture_saxia_' + suffix + '.pdf' 

1745 

1746 table_data = [] 

1747 table_data.append(list(datas.columns)) 

1748 for index, row in datas.iterrows(): 

1749 table_data.append(list(row)) 

1750 

1751 if float(table_data[-1][3]) >= 500 or project_id in [327]: 

1752 print("TRIGGER FORFAIT 500") 

1753 table_data[-1][2] = "FORFAIT500" 

1754 table_data[-1][3] = "500" 

1755 table_data[-1][4] = "100" 

1756 table_data[-1][5] = "600 € TTC" 

1757 

1758 pdf_path = os.path.join(sub_folder, pdf_file) 

1759 create_first_page_fact(id_fact = id_fact, pdf_file=pdf_path.strip("/"), customer_info=customer_info, table_data=table_data) 

1760# pdf_path = pdf_path.strip("/") 

1761 

1762 

1763 

1764 import matplotlib.pyplot as plt 

1765 from matplotlib.backends.backend_pdf import PdfPages 

1766 from datetime import datetime 

1767 

1768 import matplotlib 

1769 matplotlib.use('agg') 

1770 

1771 pdf_file_graph = 'facture_saxia_' + suffix + '_graph.pdf' 

1772 pdf_path_graph = os.path.join(sub_folder, pdf_file_graph) 

1773 

1774 pdf_path_graph = pdf_path_graph.strip("/") 

1775# os.path.copy(pdf_path, pdf_path_graph) 

1776 

1777 try: 

1778 # Création d'un fichier PDF 

1779 with PdfPages(pdf_path_graph) as pdf: 

1780 plt.clf() 

1781 # plt.close() 

1782 

1783 fontsize = 4 

1784 

1785# plt.rcParams.update({'font.size': fontsize}) 

1786# plt.rc('font', size=fontsize) # controls default text size 

1787# plt.rc('axes', titlesize=fontsize) # fontsize of the title 

1788# plt.rc('axes', labelsize=fontsize) # fontsize of the x and y labels 

1789 

1790 

1791 

1792 # Sans doute le seul utile ! => mais en dessous ca marche aussi ! 

1793# plt.rc('font', size=fontsize) 

1794 plt.rc('legend', fontsize=4*fontsize) # fontsize of the legend 

1795 

1796 plt.rc('xtick', labelsize=fontsize) # fontsize of the x tick labels 

1797 plt.rc('ytick', labelsize=fontsize) # fontsize of the y tick labels 

1798 

1799 # Plot des graphiques (exemples simples ici) 

1800 plt.subplot(2, 2, 3) 

1801 datas['Nb de Page'][:-1].plot(kind='bar', legend=False) 

1802 plt.title('Nombre de pages\npar dossiers', size=5*fontsize, fontweight="bold") 

1803 

1804 index_null = df_data_count["count"] == 0 

1805 # df_data_count["document_type"][index_null] = "" 

1806 if all(index_null): 

1807 df_data_count["document_type"] = "" 

1808 else: 

1809 # we just upade the null row for count 

1810 df_data_count.loc[index_null, "document_type"] = "" 

1811 

1812 

1813 

1814 try : 

1815 plt.subplot(2, 2, 2) 

1816 plt.imshow(np_array_count_class_type, cmap='hot', interpolation='nearest') 

1817 plt.title('Répartition des prédiagnostics\npar type de document') 

1818 import numpy as np 

1819 plt.xticks(ticks=np.arange(len(list(df_data_count["document_type"]))), labels=list(df_data_count["document_type"]), rotation=45) 

1820 plt.yticks(ticks=np.arange(len(list(df_data_class["prediag_class"]))), labels=list(df_data_class["prediag_class"])) 

1821 for i in range(len(list(df_data_class["prediag_class"]))): 

1822 for j in range(len(list(df_data_count["document_type"]))): 

1823 text = plt.text(j, i, np_array_count_class_type[i, j], 

1824 ha="center", va="center", color="pink", fontsize=2) 

1825 

1826 except Exception as e: 

1827 print("Error in heatmap chart : " + str(e)) 

1828 

1829# Si on veut deux pages 

1830# plt.savefig(pdf, format='pdf') 

1831# plt.clf() 

1832 

1833 

1834 

1835 try: 

1836 import numpy as np 

1837 if np.sum(df_data_count["count"]) > 0: 

1838 plt.subplot(2, 2, 1) 

1839 

1840 plt.pie(df_data_count["count"], labels=(df_data_count["document_type"] + " " + df_data_count["count"].apply(str))) 

1841 plt.title('Répartition des types\nde documents') 

1842 else: 

1843 plt.subplot(2, 2, 1) 

1844 plt.text(0.5, 0.5, 'Aucune informations sur les documents', horizontalalignment='center', verticalalignment='center') 

1845 plt.title('Répartition des types de documents') 

1846 except Exception as e: 

1847 print("Error in pie chart doc_type : " + str(e)) 

1848 

1849# plt.subplots_adjust(wspace=0.2) 

1850 

1851 try: 

1852 import numpy as np 

1853 if np.sum(df_data_count["count"]) > 0: 

1854 plt.subplot(2, 2, 4) 

1855 plt.pie(df_data_class["count"], labels=(df_data_class["prediag_class"] + " " + df_data_class["count"].apply(str))) 

1856 plt.title('Répartition des prédiagnostics') 

1857 else: 

1858 plt.subplot(2, 2, 4) 

1859 plt.text(0.5, 0.5, 'Aucune informations sur les prédiagnostics', horizontalalignment='center', verticalalignment='center') 

1860 plt.title('Répartition des prédiagnostics') 

1861 except Exception as e: 

1862 print("Error in pie chart prediag : " + str(e)) 

1863 

1864 

1865 

1866 

1867 

1868 # datas['Prix total'].plot(kind='bar', color='green') 

1869 # plt.title('Prix total') 

1870 

1871 plt.subplots_adjust(wspace=0.2, hspace=0.4, left=0.1, right=0.9, top=0.9, bottom=0.1) 

1872 

1873 # Ajustement de l'espace entre les sous-graphiques 

1874# plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) 

1875 

1876 # plt.tight_layout() 

1877 plt.savefig(pdf, format='pdf') 

1878 

1879 # C'est équivalent à cela : mais cela ne marche pas dans un pdf existant, en tout cas pas la ! 

1880# pdf.savefig(plt.gcf()) 

1881 

1882 plt.close() 

1883 except Exception as e: 

1884 print("Error building graphs for invoice:") 

1885 print(str(e)) 

1886 

1887 

1888 # Merge both 

1889 if True: 

1890 try : 

1891 if os.path.exists(pdf_path.strip("/")) and os.path.exists(pdf_path_graph): 

1892 from PyPDF2 import PdfReader, PdfWriter 

1893 

1894 # Create a PDF writer object 

1895 pdf_writer = PdfWriter() 

1896 

1897 # Read the first page (invoice details) 

1898 with open(pdf_path.strip("/"), 'rb') as f: 

1899 pdf_reader = PdfReader(f) 

1900 for page in pdf_reader.pages: 

1901 pdf_writer.add_page(page) 

1902 

1903 # Read the second page (graphs) 

1904 with open(pdf_path_graph, 'rb') as f: 

1905 pdf_reader = PdfReader(f) 

1906 for page in pdf_reader.pages: 

1907 pdf_writer.add_page(page) 

1908 

1909 print(" WRITE in " + str(pdf_path.strip("/"))) 

1910 

1911 # Write to a new PDF file 

1912 with open(pdf_path.strip("/"), 'wb') as f: 

1913 pdf_writer.write(f) 

1914 except Exception as e: 

1915 print("Error in merge") 

1916 print(str(e)) 

1917 return pdf_path 

1918 

1919def test_voila(): 

1920 

1921 from matplotlib.backends.backend_pdf import PdfPages 

1922 import matplotlib.pyplot as plt 

1923 from scipy.misc import imread 

1924 import os 

1925 import numpy as np 

1926 

1927 files = [ "Column0_Line16.jpg", "Column0_Line47.jpg" ] 

1928 def plotImage(f): 

1929 folder = "temp/" 

1930 im = imread(os.path.join(folder, f)).astype(np.float32) / 255 

1931 plt.imshow(im) 

1932 a = plt.gca() 

1933 a.get_xaxis().set_visible(False) # We don't need axis ticks 

1934 a.get_yaxis().set_visible(False) 

1935 

1936 pp = PdfPages("temp/page1.pdf") 

1937 plt.subplot(121) 

1938 plotImage(files[0]) 

1939 plt.subplot(122) 

1940 plotImage(files[1]) 

1941 pp.savefig(plt.gcf()) # This generates page 1 

1942 pp.savefig(plt.gcf()) # This generates page 2 

1943 pp.close() 

1944 

1945import os 

1946 

1947id_fact = 1666 

1948# Create a PDF document 

1949pdf_file = "output_invoice.pdf" 

1950 

1951customer_info = [ 

1952 "Docteur DUGUET", 

1953 "drduguet.tiphaine@gmail.com", 

1954 "project_id=112" 

1955] 

1956 

1957 

1958# Table of costs 

1959table_data = [ 

1960 ["Nbs de pages", "Prix HT par pages (€)", "Prix total HT (€)", "Prix total TTC (€)"], 

1961 ["42", "0.5", "21", "25.2"] 

1962] 

1963 

1964def create_first_page_fact(id_fact = 1666, pdf_file = "output_invoice.pdf", customer_info = [ 

1965 "Docteur DUGUET", 

1966 "drduguet.tiphaine@gmail.com", 

1967 "project_id=112" 

1968], table_data = [ 

1969 ["What", "Nbs de pages (€)", "Prix HT par pages (€)", "Prix total HT (€)", "Prix total TTC (€)"], 

1970 ["id_trucmuche", "42", "0.5", "21", "25.2 € TTC"] 

1971], montant_total = None): 

1972 

1973 if montant_total == None: 

1974 montant_total = table_data[-1][-2].strip(" € TTC") 

1975 

1976 from reportlab.lib.pagesizes import A4 

1977 from reportlab.lib import colors 

1978 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle 

1979 from reportlab.lib.units import mm 

1980 from reportlab.lib.enums import TA_RIGHT, TA_LEFT, TA_CENTER 

1981 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image 

1982 from reportlab.pdfgen import canvas 

1983 

1984 def add_background(canvas, doc): 

1985 git_safia_root = os.getenv("GITSAFIA") 

1986 img_folder = os.path.join(git_safia_root, "prompt/python/server/static/image_static/saxia") 

1987 background_image_path = os.path.join(img_folder, "Logo_Saxia_Picto_Vert_calque.png") # Replace with the path to your background image 

1988 canvas.drawImage(background_image_path, 0, -100, width = A4[0], preserveAspectRatio = True)#, mask = [0,100,0,100,0,100]) # preserveAspectRatio = True, , height=A4[1]) 

1989 

1990 

1991 document = SimpleDocTemplate(pdf_file, pagesize=A4, 

1992 rightMargin=20*mm, leftMargin=20*mm, topMargin=20*mm, bottomMargin=20*mm) 

1993 

1994 # Define styles 

1995 styles = getSampleStyleSheet() 

1996 styleN = styles["Normal"] 

1997 styleH = styles["Heading1"] 

1998 styleTitle = ParagraphStyle( 

1999 name="Title", 

2000 fontSize=16, 

2001 alignment=TA_CENTER, 

2002 spaceAfter=12 

2003 ) 

2004 styleSubtitle = ParagraphStyle( 

2005 name="Subtitle", 

2006 fontSize=12, 

2007 alignment=TA_CENTER, 

2008 spaceAfter=12 

2009 ) 

2010 styleRight = ParagraphStyle( 

2011 name="RightAlign", 

2012 fontSize=10, 

2013 alignment=TA_RIGHT, 

2014 ) 

2015 

2016 styleLeft = ParagraphStyle( 

2017 name="LeftAlign", 

2018 fontSize=10, 

2019 alignment=TA_LEFT, 

2020 ) 

2021 

2022 styleCenter = ParagraphStyle( 

2023 name="CenterAlign", 

2024 fontSize=10, 

2025 alignment=TA_CENTER, 

2026 ) 

2027 

2028 import datetime 

2029 date_today = datetime.datetime.now() 

2030 

2031 # Title 

2032 elements = [] 

2033 elements.append(Paragraph("SAXIA", styleTitle)) 

2034 name_fact = "FTN-SAXIA " + date_today.strftime("%Y-%m") + "-" + str(id_fact) 

2035 

2036 elements.append(Paragraph("Facture " + name_fact, styleH)) 

2037 elements.append(Spacer(1, 12)) 

2038 

2039 

2040 # "Le 4 août 2024" 

2041 date_facturation = date_today.strftime("Le %d %B %Y") 

2042 

2043 # Date and logo (Placeholder for Logo) 

2044 elements.append(Paragraph(date_facturation, styleRight)) 

2045 

2046 # Add company and customer details 

2047 company_info = [ 

2048 "SAS FOTONOWER FRANCE", 

2049 "30 RUE CHARLOT", 

2050 "75003 PARIS", 

2051 "compta@fotonower.com", 

2052 "https://www.fotonower.com", 

2053 "N TVA intracommunautaire : FR804468197", 

2054 "N Siret 80446819700012", 

2055 ] 

2056 

2057 company_paragraphs = [Paragraph(line, styleLeft) for line in company_info] 

2058 customer_paragraphs = [Paragraph(line, styleLeft) for line in customer_info] 

2059 

2060 # Organize them in a table 

2061 data = [[ 

2062 Paragraph("<br/>".join(company_info), styleLeft), 

2063 Paragraph("<br/>".join(customer_info), styleLeft) 

2064 ]] 

2065 

2066 table = Table(data) 

2067 elements.append(table) 

2068 elements.append(Spacer(1, 12)) 

2069 

2070 table = Table(table_data, colWidths=[48*mm, 20*mm, 17*mm, 20*mm, 15*mm, 30*mm, 25*mm]) 

2071 table.setStyle(TableStyle([ 

2072 ("BACKGROUND", (0, 0), (-1, 0), colors.lightgreen), 

2073 ("BACKGROUND", (-2, -1), (-2, -1), colors.red), 

2074 ("TEXTCOLOR", (0, 0), (-1, 0), colors.black), 

2075 ("ALIGN", (0, 0), (-1, -1), "CENTER"), 

2076 ("GRID", (0, 0), (-1, -1), 1, colors.black), 

2077 ])) 

2078 elements.append(table) 

2079 

2080 # Payment conditions and bank details 

2081 elements.append(Spacer(1, 12)) 

2082 elements.append(Paragraph("Conditions de paiement : à réception soit " + date_facturation.lower(), styleLeft)) 

2083 elements.append(Paragraph("Montant total : " + montant_total.lower() + " € TTC ", styleLeft)) 

2084 elements.append(Spacer(1, 12)) 

2085 elements.append(Paragraph(info_banque_lines[0], styleLeft)) 

2086 elements.append(Paragraph(info_banque_lines[1], styleLeft)) 

2087 elements.append(Paragraph(info_banque_lines[2], styleLeft)) 

2088 

2089 # Build the document 

2090 document.build(elements, onFirstPage=add_background) 

2091 

2092 print(f"PDF '{pdf_file}' created successfully.") 

2093 

2094 

2095def retrieve_and_finish(mtd_upload_id = 51, 

2096 mtd_complete_map = 46, 

2097 id_step_finish = 1, 

2098 list_project_void = [121], 

2099 lpgss = None, 

2100 project_id = 0, 

2101 nb_day = 2, 

2102 verbose = False, 

2103 filo_or_fifo = True, 

2104 min_time = 7200, 

2105 smart_relaunch = False): 

2106 runnings = retrieve_missed_folder(mtd_upload_id = mtd_upload_id, 

2107 mtd_complete_map = mtd_complete_map, 

2108 list_project_void = list_project_void, 

2109 lpgss = lpgss, 

2110 project_id = project_id, 

2111 nb_day = nb_day, 

2112 verbose = verbose, 

2113 filo_or_fifo = filo_or_fifo) 

2114 

2115 project_id_param_relaunch = 91 

2116 key_param_relaunch = "saxia/steps/relaunch" 

2117 smart_param_relaunch = lpgss.load_conf_project(project_id_param_relaunch, key_param_relaunch) 

2118 from lib.manaudit.lib_datou_audit import load_sub_json 

2119 smart_param_relaunch = load_sub_json(smart_param_relaunch, key_param_relaunch) 

2120 

2121 import datetime 

2122 now = datetime.datetime.now(datetime.timezone.utc) 

2123 map_running_by_id_file = {} 

2124 for running in runnings: 

2125 if "id_file" not in running: 

2126 print("ERROR missing id_file for running " + str(running)) 

2127 continue 

2128 id_file = running["id_file"] 

2129 if running["project_id"] in list_project_void: 

2130 print("Ignoring running from void project (121) " + str(running)) 

2131 continue 

2132 if id_file not in map_running_by_id_file: 

2133 map_running_by_id_file[id_file] = [] 

2134 map_running_by_id_file[id_file].append(running) 

2135 

2136 selected_to_run = None 

2137 for id_file in map_running_by_id_file: 

2138 max_created_at = max(list(map(lambda x: x["created_at"], map_running_by_id_file[id_file]))) 

2139 

2140 delai = now - max_created_at 

2141 if delai.total_seconds() < min_time: 

2142 print(" Not launching id_file : " + str(id_file) + " because it was launched " + str(delai.total_seconds()) + " seconds ago (min_time = " + str(min_time) + ")" ) 

2143 continue 

2144 

2145 for running in map_running_by_id_file[id_file]: 

2146 created_at = running["created_at"] 

2147 nb_page = running["nb_page"] if "nb_page" in running else 100 

2148 # faire la différence entre les date 

2149 delai = now - created_at 

2150 print(" time running : " + str(delai) + " " + str(delai.total_seconds())) 

2151 if verbose: 

2152 print(" one running or failed " + str(running)) 

2153 

2154 split_csv_empty = True 

2155 if "info_lab" in running and "split_csv_lab" in running["info_lab"]: 

2156 split_csv_lab = running["info_lab"]["split_csv_lab"] 

2157 if split_csv_lab != "" and type(split_csv_lab) == str: 

2158 split_csv_empty = False 

2159 else : 

2160 split_csv_empty = True 

2161 

2162 all_doc_separated = False 

2163 if "info_lab" in running and "all_doc_separated" in running["info_lab"]: # and .lower() == "true": 

2164 print(str(running["info_lab"]["all_doc_separated"])) 

2165 all_doc_separated = True 

2166 else : 

2167 all_doc_separated = False 

2168 

2169 if split_csv_empty and ("only_with_manual_split_above_nb_page" in smart_param_relaunch \ 

2170 and smart_param_relaunch["only_with_manual_split_above_nb_page"] < nb_page): 

2171 print( str(running["id_file"]) + " split csv empty we avoid launching this one !") 

2172 continue 

2173 print(str(running["id_file"]) + " candidate") 

2174 

2175 if delai.total_seconds() > min_time and running["project_id"] not in list_project_void: 

2176 if selected_to_run != None: 

2177 if filo_or_fifo : 

2178 if delai > selected_to_run["delai"]: 

2179 selected_to_run = running 

2180 selected_to_run["delai"] = delai 

2181 else: 

2182 if delai < selected_to_run["delai"]: 

2183 selected_to_run = running 

2184 selected_to_run["delai"] = delai 

2185 else : 

2186 selected_to_run = running 

2187 selected_to_run["delai"] = delai 

2188 

2189 # min_time 

2190 

2191 # Maintenant on relance celui selectionné 

2192 if selected_to_run != None: 

2193 print("Relaunching job : " + str(selected_to_run)) 

2194 from lib.datou.datou_exec import datou_exec 

2195 

2196 from lib.lib_safia_system import LibSafiaSystem 

2197 lss = LibSafiaSystem(lib_user_data_internal=lpgss) 

2198 project_id = selected_to_run["project_id"] if "project_id" in selected_to_run else 0 

2199 datou_to_run = lss.get_datou(mtd_complete_map, project_id = project_id) 

2200 if len(datou_to_run) != 1: 

2201 print("Unexpected size datou_to_run") 

2202 datou_to_run = datou_to_run[0] 

2203 

2204 input = {} 

2205 # - [ ] TODO : récupérer les paramètres de l'input depuis audi : file 

2206 # - [ ] TODO : vérifier que le fichier existe : sinon on a tourner en rond, on pourrait mettre à jour created_at ou utiliser modifier_at ? 

2207 # - [ ] TODO : récupérer les complete_param_json depuis l'audit 

2208 

2209 

2210 hash_id_treatment = selected_to_run["hash_id_treatment"] 

2211 output_hit = selected_to_run["info_date"]["output_hit"] if "info_date" in selected_to_run and "hit_output" in selected_to_run["info_date"] else None 

2212 all_data = lpgss.load_data_audit(hash_id_treatment) 

2213 audit_info = all_data["audit_info"] if "audit_info" in all_data else {} 

2214 

2215 print(" We don't use id_step_finish for now or id_step_finish - 1!") 

2216 input = audit_info["io_exec"]['0']["input"] if "io_exec" in audit_info and '0' in audit_info["io_exec"] else {} 

2217 if output_hit != None: 

2218 input["output_hit"] = output_hit 

2219 

2220 if "id_file" not in selected_to_run: 

2221 print("Pb date") 

2222 exit(1) 

2223 

2224 from lib.lib_util import parse_id_date_nb_page_folder 

2225 nb, id, date = parse_id_date_nb_page_folder(selected_to_run["id_file"]) 

2226 

2227 if date == None: 

2228 print("Pb date") 

2229 exit(1) 

2230 date = selected_to_run["uploaded_at"].strftime("%Y%m%d") 

2231 

2232 try : 

2233 date_parsed = datetime.datetime.strptime(date, "%Y%m%d") 

2234 except Exception as e: 

2235 print("Pb date") 

2236 print(str(e)) 

2237 date_parsed = datetime.datetime.now() 

2238 

2239 input["date"] = date_parsed 

2240 

2241 file = input["file"] if "file" in input else "" 

2242 if isinstance(file, list): 

2243 test_file = file[0] 

2244 elif isinstance(file, str): 

2245 test_file = file 

2246 else: 

2247 print("Unexpected type for file : " + str(type(file))) 

2248 print(" We don't do anything ! ") 

2249 return 

2250 if not os.path.exists(test_file): 

2251 print("ERROR : file not found : " + file) 

2252 print(" We don't do anything ! ") 

2253 return 

2254 

2255 if "info_lab" in all_data and "split_csv_lab" in all_data["info_lab"]: 

2256 input["saxia_split_end_csv"] = all_data["info_lab"]["split_csv_lab"] 

2257 input["saxia_all_doc_separated"] = all_doc_separated 

2258 

2259 complete_param_json = audit_info["config"]["complete_param_json"] if "config" in audit_info and "complete_param_json" in audit_info["config"] else {} 

2260 

2261 from auth.lib_auth import build_layer_from_configuration 

2262 map_type_layer_inst = build_layer_from_configuration(lss, complete_param_json) 

2263 # Contexte execution traitement datou sans PG, VR 2023 

2264 complete_param_json["map_type_layer_inst"] = map_type_layer_inst 

2265 

2266 datou_linear_list_steps = list(map(lambda x : x["name"], datou_to_run["steps"])) 

2267 list_param_json_steps = list(map(lambda x : x["param_json"], datou_to_run["steps"])) 

2268 

2269 ret = datou_exec(datou_linear_list_steps = datou_linear_list_steps, 

2270 input = input, 

2271 complete_param_json = complete_param_json, 

2272 verbose = verbose, 

2273 with_audit = True, 

2274 privacy = False, 

2275 # map_type_layer_inst: dict = {}, 

2276 list_param_json_steps = list_param_json_steps, 

2277 id_step_incomplete_args = 0) # id_step_finish) 

2278 

2279 return ret 

2280 else : 

2281 print(" ALL DONE !") 

2282 return None 

2283 

2284def retrieve_missed_folder(mtd_upload_id = 51, 

2285 mtd_complete_map = 46, 

2286 list_project_void = [121], 

2287 lpgss = None, 

2288 project_id = 0, 

2289 nb_day = 5, 

2290 verbose = False, 

2291 filo_or_fifo = True): 

2292 

2293 ret = lpgss.running_job(project_id = project_id, verbose = verbose, nb_day = nb_day) 

2294 

2295 print(str(ret)) 

2296 count_running = len(ret["running_or_failed"]) if "running_or_failed" in ret else 0 

2297 print(" count_running : " + str(count_running)) 

2298 

2299 return ret["running_or_failed"] 

2300 

2301def datou_exec_partial(id_step, mtd_id, input_datou, 

2302 project_id, user_id, 

2303 verbose = False): 

2304 pass 

2305