Coverage for lib/batch/lib

1import datetime

2import os

3import types

5import pandas as pd

8def prepare_ioput_col_datou(inputs):

9 if inputs == "" or inputs == None:

10 return [], []

11 inputs = inputs.split(",")

12 input_column_name = []

13 input_datou_name = []

14 for i in inputs:

15 if ":" in i:

16 input_column_name.append(i.split(":")[0])

17 input_datou_name.append(i.split(":")[1])

18 else:

19 input_column_name.append(i)

20 input_datou_name.append(i)

22 return input_column_name, input_datou_name

24def create_pandas_table_from_list_column_and_input(input_column_name, output_column_name):

25 import pandas as pd

26 df = pd.DataFrame(columns=input_column_name + output_column_name)

27 return df

29def append_data_to_df(df, input_arg_by_name, verbose = False):

30 import pandas as pd

31 if verbose:

32 print(" DF in append_data_to_df : ")

33 print(df)

34 #df = pd.concat([df, pd.DataFrame(input_arg_by_name)], ignore_index=True)

35# df = df.append(input_arg_by_name, ignore_index=True)

36# df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

37 # As noted by @cottontail, it's also possible to use loc, although this only works if the new index is not already present in the DataFrame (typically, this will be the case if the index is a RangeIndex:

38 # https://stackoverflow.com/questions/75956209/error-dataframe-object-has-no-attribute-append

39 df.loc[len(df)] = input_arg_by_name # only use with a RangeIndex!

41def find_first_missing_output(df, output_column_name, input_column_name):

43 # TODO VR 27-12-23 modulariser cette partie

44 if output_column_name != None and input_column_name != None :

45 # Trouver la première ligne sans donnée de sortie

46 if output_column_name[0] not in df.columns:

47# (nb_col, nb_data) = df.shape

48 # Add column to dataframe and init with empty string value

49 df[output_column_name[0]] = ""

50# df.assign(output_column_name[0] = "")

52 list_no_data = df[output_column_name[0]] == ""

53 if len(list_no_data) == 0:

54 print("All DONE")

55 iloc_missing = df.index[list_no_data]

56 if iloc_missing.shape == (0,):

57 print("All DONE")

58 return df, None, None

59 first_missing_output = iloc_missing[0]

60 missing_row_index = df.index[list_no_data][0]

62 # Input data de cette ligne

63 # Je sens un comportement différent quand len(input_column_name) == 1 (bof, c'était toujours le cas pourtant)

64 # PAr ailleurs on construit les input_data plus loin donc peut-etre n'est pas nécessaire

66 print("About to get some first_missing_output :" + str(first_missing_output))

67 print("USing input_column_name : " + str(input_column_name))

69 print(" available cols : " + str(df.columns.values.tolist()))

70 print(" number data : " + str(len(df.values.tolist())))

72 if len(input_column_name) == 1:

73 input_data = df[input_column_name[0]][first_missing_output]

74 else :

75 input_data = [df[k][first_missing_output] for k in input_column_name]

76 else :

77 input_data = None

78 missing_row_index = None

80 return input_data, missing_row_index

83def create_pandas_table_from_text(text,

84 input_column_name = None,

85 output_column_name = None,

86 separator = "|",

87 verbose = False):

88# import markdown

89# html = markdown.markdown(text)

90# print(str(html))

92# import md2py

93# parsed = md2py.md2py(text)

95 if verbose:

96 print(text)

97 if text == None:

98 return None

100 import pandas as pd

101 # Nettoyer les lignes qui commencent par un dièse

102 lines = [line for line in text.strip().split('\n') if (not line.startswith("#") and separator in line)]

103

104 if len(lines) == 0:

105 print("ERROR should exit We have no input data, maybe the markdown is not well formatted ? ")

106 print(text)

107 return None

108

109 # Les titres de colonnes se trouvent à l'index 1 puisque l'index 0 contient les éléments 'CAMPAIGN'...

110 headers = [d.rstrip(" ").lstrip(" ") for d in lines[0].split(separator)]

111 # On commence à 2 car il y a le format markdown qui a une ligne sans donnée entre les noms des colonnes et les

112# TODO mais faire mieux

113 data = [line.split(separator) for line in lines[2:]]

114

115 data = [[d.rstrip(" ").lstrip(" ").rstrip(" ").lstrip(" ") for d in row] for row in data]

116

117 max_nb_columns = max([len(row) for row in data]) if len(data) > 0 else -1

118 headers = [headers[i].rstrip(" ").lstrip(" ") if i < len(headers) else "Column " + str(i) for i in range(max_nb_columns)]

119

120 # S'assurer que chaque ligne ait le même nombre de colonnes que l'en-tête

121 data = [row if len(row) == len(headers) else row + [''] * (len(headers) - len(row)) for row in data]

122

123 if verbose:

124 print(" data to build pd " + str(len(data)))

125

126 # Créer le DataFrame

127 df = pd.DataFrame(data, columns=headers)

128 if '' in df.columns:

129 df = df.drop('', axis=1)

130

131

132 return df

133

134css_button_class = "btn btn-primary text-l bg-blue-500 text-white p-2 rounded"

135

136

137def make_replace_upload_button_manax_hc(hit):

138 return '<button type="button" class="button_replace {}" id="{}" ><i class="bi bi-upload"></i>Replace</button>'.format(css_button_class, hit)

139

140#

141def make_audit_button_manax_hc(id_line, href):

142 # return '<button type="button" id="button-audit-{}" class="{}"><i class="bi bi-info-circle-fill"></i>Audit</button>'.format(

143# mtd_id, css_button_class + " button_audit-run")

144 return '<a href="{}"><button type="button" id="button-launch-{}" class="{}"><i class="bi bi-rocket-takeoff-fill float-left"></i> Launch</button></a>'.format( href, id_line, css_button_class)

145

146def make_launch_button(endpoint, args, id_line, name = "Launch"):

147 return '<a href="{}?{}"><button type="button" id="button-launch-create-{}" class="{}"><i class="bi bi-info-circle-fill"></i>{}</button></a>'.format(endpoint, args, id_line, css_button_class, name)

148

149def make_downloadable(url, id_line, name = "File"):

150 return '<a href="{}"><button type="button" id="button-downloadable-{}-{}" class="{}"><i class="bi bi-info-circle-fill"></i>{}</button></a>'.format(url, name, id_line, css_button_class, name)

151

152

153

154from flask import url_for

155

156def append_action_steps_button(table,

157 safia_suivi_prod_doc_id,

158 safia_suivi_prod_proj_id,

159 saxia_steps = {}):

160 # table.rename(columns={"caffemodel_name":"description", "svm_number_of_descriptors": "dimension", "main_photo_desc_type": "main_pdt"}, inplace=True)

161 try :

162 for step in saxia_steps:

163 if "col" in saxia_steps[step]:

164 col = saxia_steps[step]["col"]

165 else:

166 import sys

167 sys.stdout.write("&")

168 # print("This action is not meant to be a button in a column : " + (saxia_steps[step]["name"] if "name" in saxia_steps[step] else "no name"))

169 except Exception as e:

170 print("Many pbs voilà in configuration ! " + str(e))

171

172

173# print(str(table))

174# print(table.to_string())

175 for x in table.index:

176 mtd_id = 40

177 out_folder = "/" #static/saxia"

178 suffix = "" #"/static/saxia"

179 doc_audit = "non:0"

180 hash_id_treatment = "dummy"

181 file = None

182 anon_file = None

183 out_file = None

184 try:

185 mtd_id = table.loc[x, 'datou_id'] if "datou_id" in table.columns else mtd_id

186 out_folder = table.loc[x, 'out_folder'] if "out_folder" in table.columns else out_folder

187 suffix = table.loc[x, 'suffix'] if "suffix" in table.columns else suffix

188 hash_id_treatment = table.loc[x, 'hash_id_treatment'] if "hash_id_treatment" in table.columns else hash_id_treatment

189 file = table.loc[x, 'file'] if "file" in table.columns else file

190 anon_file = table.loc[x, 'anon_file'] if "anon_file" in table.columns else file

191 out_file = table.loc[x, 'out_file'] if "out_file" in table.columns else str(hash_id_treatment) + ".docx"

192 doc_audit = table.loc[x, 'doc_audit'] if "doc_audit" in table.columns else doc_audit

193 audit_output = table.loc[x, 'audit_output'] if "audit_output" in table.columns else None

194 except Exception as e:

195 print("Error in append_action_steps_button : " + str(e))

196

197 if out_folder.startswith("/home/safia/OneDrive/Test Safia expertise"):

198 out_folder = out_folder.replace("/home/safia/OneDrive/Test Safia expertise", "/static/onedrive/") + "/"

199 elif out_folder.startswith("/home/safia/workarea/git/Safia/prompt/python/server/static/onedrive"):

200 out_folder = out_folder.replace("/home/safia/workarea/git/Safia/prompt/python/server/static/onedrive", "/static/onedrive") + "/"

201 filename = os.path.basename(file) if file != None else file

202

203 out_folder_hc = "static/onedrive/output"

204 out_folder_hc_http = "static%2Fonedrive%2Foutput%2F"

205

206 audit_button_aux = make_audit_button_manax_hc(mtd_id, href=url_for('manax', id = str(mtd_id), suffix = out_folder, hash_id_treatment = hash_id_treatment, safia_suivi_prod_doc_id = safia_suivi_prod_doc_id, safia_suivi_prod_proj_id = safia_suivi_prod_proj_id, file = file, out_file = out_file)) #, audit_file = doc_audit))

207 table.loc[x, "Audit"] = audit_button_aux

208

209 replace_button_aux = make_replace_upload_button_manax_hc(hash_id_treatment) #, audit_file = doc_audit))

210 table.loc[x, "Replace"] = replace_button_aux

211

212 # J'essai de mettre en forme ici le bouton de lancement qu'il faudrait construire à partir des données de la ligne et des paramètres de configuration situé dans les données du projet

213 # Données déjà crée dans le projet 70

214 col_name = "Launch Extract"

215 endpoint = "/api/v1/safia/query"

216 datou_id = "datou-40"

217 if file != None:

218 out_folder_hc = "static/onedrive/output"

219 out_folder_hc_http = "static%2Fonedrive%2Foutput%2F"

220 args = "hash_id_treatment=" + hash_id_treatment + "&input_csv=hash_id_treatment_input%3D" + hash_id_treatment + "%2Cout_folder%3D" + out_folder_hc_http + "%2Cinput_col_cr%3Dcr_correct_typo%2Cinput_col_intro%3Dintro_correct_typo%2Cload_df_from_db_and_correct%3Dtrue%2Cfile%3D" + file.replace("/", "%2F") + "%2Cwith_audit%3D1"

221 else :

222# print("TODO args to setup for anon file : migration file / filename, filename is better, ok")

223 args = ""

224 args += "&with_audit=1"

225 args += "&" + datou_id + "=true&object=simple_text_query&load_df_from_db_and_correct=true"

226 id_line = x

227 args += "&safia_suivi_prod_doc_id=" + str(safia_suivi_prod_doc_id)

228 args += "&safia_suivi_prod_proj_id=" + str(safia_suivi_prod_proj_id)

229 args += "&col_index_treatment=file"

230 if file != None:

231 args += "&file=" + file

232 audit_button_aux = make_launch_button(endpoint, args, id_line)

233 table.loc[x, col_name] = audit_button_aux

234

235 # En fait on avait un moment donnée un bouton consolidate ici, ah ha ah

236 # Ouais mais on va faire une boucle sur les saxia_steps, hi ih

237# conf_consolidate = saxia_steps["consolidate"] if "consolidate" in saxia_steps else {}

238# output_datou_to_col = conf_consolidate["output_datou_to_col"] if "output_datou_to_col" in conf_consolidate else "nb_page:nb_page,nb_modif:nb_modif_manual,nb_modif_class:nb_modif_class_manual,nb_doc:nb_doc,nb_word:nb_word_result"

239# args += "&output_datou_to_col=" + output_datou_to_col # VR 10-4-24 : je ne sais pas à quoi cela sert !

240 datou_id = "datou-44"

241 if file != None:

242 args = "hash_id_treatment=" + hash_id_treatment + "&input_csv=hash_id_treatment_input%3D" + hash_id_treatment + "%2Cout_folder%3Dstatic%2Fonedrive%2Foutput%2F%2Cfile%3D" + file.replace("/", "%2F") + "%2Cwith_audit%3D1"

243 else :

244# print(" wip need to align file and filenmae ")

245 args = ""

246 inputs = ["hash_id_treatment", "object=simple_text_query", "load_df_from_db_and_correct=true"]

247

248 if file != None:

249 url = os.path.join(suffix, file)

250 else:

251 url = ""

252 if 'static' in url:

253 url = "/static" + url.split("static")[1]

254 col_name = "Input File Anon"

255 name = "File Input"

256 audit_button_aux = make_downloadable(url, id_line, name)

257 table.loc[x, col_name] = audit_button_aux

258

259 col = ""

260 try :

261 for step in saxia_steps:

262 if "col" in saxia_steps[step]:

263 col = saxia_steps[step]["col"] if "col" in saxia_steps[step] else "nocol"

264 inputs = saxia_steps[step]["inputs"] if "inputs" in saxia_steps[step] else []

265 mtd_id = saxia_steps[step]["datou-id"] if "datou-id" in saxia_steps[step] else None

266 output_datou_to_col = saxia_steps[step]["output_datou_to_col"] if "output_datou_to_col" in saxia_steps[step] else "" # for anon "anon_filename,nom,prenom",

267

268 # Force to datou for now

269 type = saxia_steps[step]["type"] if "type" in saxia_steps[step] else "datou"

270

271 list_args = []

272

273 if type == "datou":

274 list_args.append("object=simple_text_query")

275 list_args.append("with_audit=true")

276 url = "/api/v1/safia/query"

277 elif type == "endpoint":

278 url = saxia_steps[step]["url"]

279# locals().update(saxia_steps[step])

280 list_args_from_json = saxia_steps[step]["args"].split(",") if "args" in saxia_steps[step] else []

281 for args in list_args_from_json:

282 if args in table.columns:

283

284# if args in globals()

285 list_args.append(args + "=" + table.loc[x, args])

286

287

288 # not used

289 special = None

290

291 duplicate = {

292 "col": "Duplicate Treatment",

293 "type": "endpoint",

294 "endpoint": "api/v1/saxia/duplicate_hash_id_treatment",

295 "args": "hash_id_treatment",

296 "ret": "new_hash_id_treatment",

297 "name": "duplicate"

298 }

299

300 list_input_csv = []

301 for input in inputs:

302 if input in table.columns:

303 input_val = table.loc[x, input]

304 one_input = input + "%3D" + input_val.replace("/", "%2F")

305 list_input_csv.append(one_input)

306

307 if len(list_input_csv) > 0:

308 input_csv_val = "%2C".join(list_input_csv)

309 input_csv_var_and_val = "input_csv=" + input_csv_val

310 list_args.append(input_csv_var_and_val)

311

312 list_args.append("safia_suivi_prod_doc_id=" + str(safia_suivi_prod_doc_id))

313 list_args.append("safia_suivi_prod_proj_id=" + str(safia_suivi_prod_proj_id))

314 list_args.append("col_index_treatment=file") # to remove

315 if filename != None:

316 list_args.append("value_index=" + filename)

317 list_args.append("col_index=filename")

318# list_args.append("file=" + file)

319 list_args.append("output_datou_to_col=" + output_datou_to_col)

320

321 if mtd_id != None:

322 list_args.append("datou-" + str(mtd_id) + "=true")

323

324 if len(list_args) > 0:

325 args = "&".join(list_args)

326 else:

327 args = ""

328

329 action_button_aux = make_launch_button(url, args, x)

330 table.loc[x, col] = action_button_aux

331 else:

332 import sys

333 sys.stdout.write("&")

334 # print("This action is not meant to be a button in a column")

335 except Exception as e:

336 print("Many pbs voilà in configuration for col : " + str(col) + " " + str(e))

337

338 print(" table") # useless

339# return table

340

341def set_custom_display(all_result, endpoint_df_conf_type_suivi, col_csv = ""):

342 list_col_from_input = col_csv.split(",") if col_csv != None and col_csv != "" else []

343 list_col_display = endpoint_df_conf_type_suivi["list_col_display"] if "list_col_display" in endpoint_df_conf_type_suivi else []

344 list_col_display_init = list_col_display.copy()

345 list_col_virtual = endpoint_df_conf_type_suivi["list_col_virtual"] if "list_col_virtual" in endpoint_df_conf_type_suivi else []

346 list_col_action = endpoint_df_conf_type_suivi["list_col_action"] if "list_col_action" in endpoint_df_conf_type_suivi else []

347

348# list_col_display.append("")

349

350 for col in list_col_virtual:

351 all_result[col] = "tofill"

352 if "col_source" not in list_col_virtual[col]:

353 print(" Missing col_source info in " + str(list_col_virtual[col]))

354 continue

355 col_source = list_col_virtual[col]["col_source"]

356 key = list_col_virtual[col]["key"] if "key" in list_col_virtual[col] else None

357 type = list_col_virtual[col]["type"] if "type" in list_col_virtual[col] else "text"

358 if type == "text" or type == "humanize_size":

359 if col_source in all_result.columns:

360 all_result[col] = "init"

361 if key == None:

362 continue

363 elif "/" in key:

364 from lib.manaudit.lib_datou_audit import load_sub_json

365 all_result[col] = all_result[col_source].apply(lambda x: load_sub_json(x, key))

366 else:

367 all_result[col] = all_result[col_source].apply(lambda x : x[key] if key in x else None)

368 if type == "humanize_size":

369 from lib.lib_util import humanize_size_file

370 all_result[col] = all_result[col].apply(lambda x: humanize_size_file(x))

371 list_col_display.append(col)

372 elif type == "link_format" or type == "date_link_format":

373 if "format" not in list_col_virtual[col]:

374 print(" Missing format info in " + str(list_col_virtual[col]))

375 continue

376 if "variables" not in list_col_virtual[col]:

377 print(" Missing variables info in " + str(list_col_virtual[col]))

378 continue

379 all_result[col] = col

380 for index, row in all_result.iterrows():

381 format = list_col_virtual[col]["format"]

382 for var in list_col_virtual[col]["variables"]:

383 key = list_col_virtual[col]["variables"][var]["key"]

384 col_source = list_col_virtual[col]["variables"][var]["col_source"]

385 try:

386 value = all_result.loc[index, col_source]

387 if key != None:

388 value = value.get(key, "dummy")

389 except Exception as e:

390 print(" Pb in getting value for a virtual column name " + str(e))

391 value = "dummy"

392 format = format.replace("{" + var + "}", str(value))

393 # VR 16-5-24 todo hack to have result auto download in suivi?type=lab

394 format = format.replace("/home/safia/workarea/git/Safia/prompt/python/server/static", "/static")

395 format = format.replace("//", "/")

396 if type == "link_format":

397 all_result.loc[index, col] = "<a href='" + format + "'>" + col + "</a>"

398 elif type == "date_link_format":

399 format_loc = format.replace("/home/safia/workarea/git/Safia/prompt/python/server/static", "static")

400 format_loc = format_loc.replace("//", "/")

401 format_loc = format_loc.lstrip("/")

402 try:

403 datetime_from_stat = os.stat(format_loc).st_ctime

404 dt = datetime.datetime.fromtimestamp(datetime_from_stat)

405 date_str = dt.strftime("%Y-%m-%d %H:%M:%S")

406 except Exception as e:

407 print(" Pb in date_link_format " + str(e))

408 date_str = "No date, possible no file"

409 all_result.loc[index, col] = "<a href='" + format + "'>" + date_str + "</a>"

410 else:

411 print("Unexpected type " + str(type))

412 all_result.loc[index, col] = "unexpected"

413 list_col_display.append(col)

414 else :

415 print(" Unexpected type " + str(type))

416

417 for col in list_col_action:

418 value = list_col_action[col]["value"] if "value" in list_col_action[col] else None

419 col_source = list_col_action[col]["col_source"] if "col_source" in list_col_action[col] else None

420 key = list_col_action[col]["key"] if "key" in list_col_action[col] else None

421 action_type = list_col_action[col]["action_type"] if "action_type" in list_col_action[col] else "input"

422 if col_source in all_result.columns:

423 if action_type == "launch":

424 print("TODO and refacto by the way")

425 elif action_type == "download":

426 print("TODO and refacto by the way")

427 elif action_type == "checkbox":

428 all_result[col] = "<input type=checkbox data-key=" + str(key) + " data-col=" + str(col_source) + " ></input>"

429 for index, row in all_result.iterrows():

430 # print(row[col], row[col_source])

431 # ready_to_deliver

432 if row[col_source] != None:

433 all_result.loc[index, col] = "<input type=checkbox checked data-key=" + str(key) + " data-col=" + str(col_source) + " ></input>"

434

435 elif action_type == "input":

436 begin_input = "<input type=text class='input_dyn' data-key=" + str(key) + " data-col=" + str(col_source)

437 if "typedata" in list_col_action[col]:

438 begin_input += " data-typedata=" + list_col_action[col]["typedata"]

439 end_input = " ><div class='status' ></div></input>"

440 all_result[col] = begin_input + end_input

441 for index, row in all_result.iterrows():

442 # print(row[col], row[col_source])

443 value = row[col_source]

444 if key != None:

445 if key in value :

446 value = value[key]

447 if row[col_source] != None:

448 all_result.loc[index, col] = begin_input + " value='" + str(value) + "' " + end_input

449

450# all_result[col] = all_result[col_source]

451# all_result[col] = all_result[col].apply(lambda x: "<input type=checkbox " + ("checked" if x != None else "") + " ></input>")

452 else :

453 print("Unknown action_type : " + str(action_type))

454# all_result[col] = all_result[col].apply(lambda x : x[key] if key in x else None)

455 list_col_display.append(col)

456 elif value != None:

457 all_result[col] = value

458 list_col_display.append(col)

459

460 # En fait ce n'est pas grave que ce ne soit pas unique => SI et puis je ne veux pas cela !

461 if list_col_display_init == []:

462 for l in list_col_from_input:

463 if l not in list_col_display:

464 list_col_display.append(l)

465

466 return list_col_display

467

468def create_markdown_table_from_df(df):

469

470 # Je veux créer le tableau en trouvant le nom des colonnes de df, en rajoutant une ligne avec uniquement des "--" puis en concatenant avec des " | " entre les données et avec "\n" entre les lignes

471 list_col_names = df.columns.values.tolist()

472 list_col_names = [str(i) for i in list_col_names] # .strip("**")

473 list_col_names = " | ".join(list_col_names)

474 list_col_names = list_col_names + "\n" + "--- | " * len(df.columns.values.tolist())

475 df_list = df.values.tolist()

476 body_content = "\n".join(map(lambda x : " | ".join(map(str, x)), df_list))

477 return list_col_names + "\n" + body_content

478

479def load_safia_doc_input_list(safia_document_id, safia_project_id, lpgss, limit=10000):

480

481 info_project = lpgss.get_project_info(safia_project_id)

482 table_documents = info_project["table_documents"] if "table_documents" in info_project else "table_documents"

483

484 # lpgss.load_document(safia_project_id, safia_document_id)

485 check_table_exists = lpgss.check_table_exists(table_documents)

486 if not check_table_exists:

487 print("l465 ERROR treated as warning Table " + table_documents + " does not exists, please create it first !")

488 documents = []

489 else:

490 documents = lpgss.get_documents(table_documents, safia_document_id, chunk_id = None, limit=limit)

491 print("Number of documents : " + str(len(documents)))

492 if len(documents) == 0:

493 print(" We will need to create the df and document")

494 return None

495

496 total_content = ""

497 for d in documents:

498 total_content += d["content"]

499

500 return total_content

501

502

503def fill_df_from_datou_result(df, outut_datou_name,

504 output_column_name, output,

505 value_index,

506 col_index="line_number"):

507 if value_index == None:

508 print("Missing value_index in fill_df_from_datou_result")

509 return

510 line_number = None

511 if col_index == "line_number":

512 line_number = value_index

513 else:

514 if col_index not in df.columns:

515 print(" Missing columns " + str(col_index) + " in df, data not saved from datou in df ")

516 else:

517 line_number = df[col_index] == value_index

518 if len(df.index[line_number]) == 0:

519 print(" Plop ")

520 line_number == 0 #None

521 elif len(df.index[line_number]) > 0:

522 print(" Unexpected multiple line_number, tant pis we will try to update the first !")

523 line_number = df.index[line_number][0]

524 else:

525 print(" Plop ")

526 line_number = 0#df.index[line_number][0]

527

528 if type(line_number) == types.NoneType:

529 print("Missing line to update in df : value_index : " + str(value_index))

530 line_number = 0

531

532 info_store = {}

533

534 for j in range(len(outut_datou_name)):

535 if outut_datou_name[j] in output:

536# if output_column_name[j] not in df.columns:

537# continue

538 val = output[outut_datou_name[j]]

539

540 info_store[output_column_name[j]] = val

541

542 val = str(val).replace("\n", "<br>")

543 val = str(val).replace("\n", "<br>")

544

545 df.loc[line_number, output_column_name[j]] = val

546

547 else:

548 print(" Missing output " + str(outut_datou_name[j]) + " keys available are : " + str(output.keys()))

549

550 return info_store

551

552def aux_fill_load_df(audit_info):

553

554 from lib.manaudit.lib_datou_audit import load_sub_json

555 key = "io_exec/9/output/df_complet_as_json"

556 try:

557 df_from_json = load_sub_json(audit_info, key)

558 print("df_from_json : " + str(df_from_json))

559 except Exception as e:

560 print("Error in loading df_as_json : " + str(e))

561 df_from_json = {}

562 return None

563

564 try:

565 df = pd.read_json(df_from_json, convert_dates=["datet", "date_entree_hospitalisationt", "date_sortie_hospitalisationt", "date_fin_arret_travailt", "date_debut_arret_travailt"])

566 except Exception as e:

567 print("Error in read_json df_as_json : " + str(e))

568 return None

569

570 return df

571

572# Not used VR 4/3/35

573def fill_info_list_page(audit_info, hash_id_treatment, document_type="ordonnance"):

574 df = aux_fill_load_df(audit_info)

575

576 list_page = []

577

578 if type(df) != types.NoneType:

579 print(df.columns)

580 else:

581 return []

582

583 key_list_images = "io_exec/0/output/images"

584 from lib.manaudit.lib_datou_audit import load_sub_json

585 list_pages = load_sub_json(audit_info, key_list_images)

586

587 for i in range(len(df)):

588 dt = str(df["document_type"][i])

589# if dt == "certif_at":

590# if dt == "ordonnance":

591 if document_type == "all" or dt == document_type:

592

593 list_des_pages = df["Liste des pages"][i]

594 try :

595 if type(list_des_pages) == str:

596 list_des_pages_as_int = map(int, list_des_pages.split(","))

597 else:

598 list_des_pages_as_int = [list_des_pages]

599 except Exception as e:

600 print(str(e))

601 list_des_pages_as_int = []

602 for page in list_des_pages_as_int:

603 if page < len(list_pages):

604 list_page.append(list_pages[page])

605 # list_page.append()

606

607 return list_page

608

609def fill_info_stat_audit(audit_info, hash_id_treatment):

610 print(" TODO plein de truc et adapter les sorties")

611

612 df = aux_fill_load_df(audit_info)

613 if df == None:

614 return {}

615

616 nb_page = 0

617 map_doc_type_nb = {}

618 for i in range(len(df)):

619 lp = str(df["Liste des pages"][i])

620 nb_page_this_doc = len(lp) - len(lp.replace(",", "")) + 1

621 nb_page += nb_page_this_doc

622 if df["document_type"][i] not in map_doc_type_nb:

623 map_doc_type_nb[df["document_type"][i]] = nb_page_this_doc

624 else:

625 map_doc_type_nb[df["document_type"][i]] += nb_page_this_doc

626

627 info_stat = {"nb_doc" : len(df), "count" : map_doc_type_nb}

628

629 return info_stat

630

631# [ ] TODO VR 11-1-23 refacto : deja modulariser le chargement

632# sans doute aussi modulariser la boucle

633# lorsqu'on a un in_file, il faut plutot faire un seul datou sans tourner sur le df

634# On pourrait construire un df from param_json, avec les **args ??

635# Il faut sinon une option pour faire tourner sur un seul

636# On sinon modulariser et du coup avoir par exemple un context_datou_exec

637# On pourrait aussi gérer le cas ou on charge les input d'un datou à partir d'un json dans un fichier putot

638def run_batch_datou(mtr_datou_id, safia_project_id = 0,

639 safia_document_id = None,

640 input_as_csv = None, output_as_csv = None,

641 verbose = False,

642 with_audit = False,

643 lpgss = None,

644 nb_lines = 10,

645 in_file=None, offset=None,

646 out_folder = "temp",

647 datou_func = "",

648 saxia_step_name=None,

649 special_op="default"): # TODO VR 14-5-24 : saxia_step_name est un nom de step dans le projet saxia, dans la fonction c'est juste le suffix d'un nom de colonne

650 print("# LOAD DATA NAKED")

651 # TODO 22-1-24 : on préferait avoir plutot que input_from_df_or_arg : load_input_from_missing et insert_input

652

653 if output_as_csv == None:

654 output_as_csv = ""

655

656 from lib.lib_util import count_and_display_elapsed_time

657

658 if "hash_id_treatment" not in output_as_csv:

659 if output_as_csv != "":

660 output_as_csv += ","

661 output_as_csv += "hash_id_treatment:hash_id_treatment"

662

663 input_from_df_or_arg = True

664 if in_file != None and in_file != "":

665 nb_lines = 1

666# input_from_df_or_arg = False

667

668 input_column_name, input_datou_name = prepare_ioput_col_datou(input_as_csv)

669 output_column_name, outut_datou_name = prepare_ioput_col_datou(output_as_csv)

670

671 output_column_name.append("datetime")

672 outut_datou_name.append("datetime")

673 if mtr_datou_id != 0:

674 output_column_name.append("datou_id")

675 outut_datou_name.append("datou_id")

676

677 from auth.lib_auth import create_id

678 hash_id_treatment = create_id()

679

680 if safia_document_id == "pg_audit":

681 condition = [

682 {

683 "value": mtr_datou_id,

684 "variable": "mtr_datou_id",

685 "data_type": "int"

686 }

687]

688 mtr_datou_id = None

689 df = lpgss.load_data_audit(output_type = "pd",

690 limit = nb_lines,

691 offset = offset,

692 condition = condition,

693 col_csv="*")

694 input_from_df_or_arg = True

695 else:

696 if safia_document_id == None or safia_document_id == "":

697 print(" Unexpected behavior TODO safia_document_id should be initialized by default with hash_id_treatment ! ")

698

699 safia_document_id = "tab_log_treatment_" + hash_id_treatment

700

701 total_content = None

702 if input_from_df_or_arg:

703 if safia_project_id != 0:

704 total_content = load_safia_doc_input_list(safia_document_id, safia_project_id, lpgss, limit=10000)

705

706 if total_content == None:

707 input_from_df_or_arg = False

708 else :

709 print("# PREPARE DATA : total_content : " + str(len(total_content)) + " characters")

710

711 if input_from_df_or_arg:

712 df = create_pandas_table_from_text(total_content, input_column_name, output_column_name)

713 else:

714 df = create_pandas_table_from_list_column_and_input(input_column_name, output_column_name)

715

716 if in_file != None and in_file != "":

717 input_arg_by_name = {}

718 # TODO 22-1-24 CA ca a l'air mieux https://stackoverflow.com/questions/218616/how-to-get-method-parameter-names mais je n'y arrive pas

719 # TODO 22-1-24 Et cette logique devrait aller aussi dans les ligne 223 et suivante quand on ne trouve pas l'input utile

720 for name in input_column_name:

721 # if name in locals():

722 if name == "file":

723 input_arg_by_name[name] = in_file

724 elif name == "out_folder":

725 input_arg_by_name[name] = out_folder

726 else:

727 print(" name : " + str(name) + " is not an accessible variable ! ")

728 input_arg_by_name[name] = ""

729

730 append_data_to_df(df, input_arg_by_name)

731 input_data, missing_row_index = input_arg_by_name, len(df.values) - 1

732 elif safia_document_id == "pg_audit":

733 offset = 0 # car on a chargé au load

734 missing_row_index = offset

735 else:

736 input_data, missing_row_index = find_first_missing_output(df, output_column_name, input_column_name)

737

738 # out_folder

739

740 if offset == None or offset == 0:

741 offset = missing_row_index

742

743 # Treat data

744# lpgss.load_datou(mtr_datou_id)

745

746 # A faire avant et puis c'est un "hack" pour le moment question de droit !

747 from lib.lib_safia_system import LibSafiaSystem

748 from server.safia import lib_external_info_from_apia_at

749 from server.safia import lpgss_singleton, lib_external_info_from_apia_at, lib_auth_now_from_at, lib_right_singleton

750 from lib.lib_safia_system import LibSafiaSystem

751 lss = LibSafiaSystem(lib_user_data_internal=lpgss_singleton,

752 lib_user_data_external=lib_external_info_from_apia_at,

753 lib_auth_user_otp=lib_auth_now_from_at,

754 lib_right=lib_right_singleton)

755 otp="0a76f14b131682eaa36fbef63d725f9352cfb85d"

756 info, is_valid = lss.connect_with_otp(otp)

757

758 # De ouf !

759 lss.user_id = 1

760

761# lss = LibSafiaSystem(lib_user_data_internal=lpgss, lib_user_data_external = lib_external_info_from_apia_at)

762

763 datous = lss.get_datou(mtr_datou_id)

764

765 datou = None

766 if len(datous) == 1:

767 datou = datous[0]

768 if str(datou["id"]) != str(mtr_datou_id):

769 print("Wrong datou id !")

770 else:

771 print("Wronge datou id !")

772

773# index_datou = datous.index(lambda x : x["id"] == mtr_datou_id)

774 if datou == None:

775 print("ERROR Missing datou")

776 # return

777# datou = datous[index_datou]

778

779 from auth.lib_auth import get_datou_exec_context_as_complete_param_json

780 user = "info@opio.fr"

781 user = "victor@reutenauer.eu"

782 privacy = False

783 from auth.lib_conf_system import lcs_global_singleton

784 OPENAI_API_KEY = lcs_global_singleton.get_openai_api_key()

785 complete_param_json = get_datou_exec_context_as_complete_param_json(user, verbose, privacy,

786 openai_token=OPENAI_API_KEY, lss=lss,

787 project_id=safia_project_id)

788

789 # VR TODO : mais p***, complete_param_json doit utilisé les param_json du datou ! grrr grrrr grrr

790 list_datou_step = list(map(lambda x : x["name"], datou["steps"])) if datou != None else [] # Et ca je doit le faire aussi ! grrr grrr grrr

791 list_param_json_steps = list(map(lambda x : x["param_json"], datou["steps"])) if datou != None else []

792

793# datou_exec(datou["datou_linear_list_steps"], input = input_data, verbose = verbose, privacy = False)

794

795# if missing_row_index == 0:

796# missing_row_index = 1

797

798 list_datou_func = []

799

800 if datou_func != "":

801# from lib.lib_batch import parse_directive

802 list_datou_func = parse_directive(datou_func)

803 else:

804 list_datou_func = []

805

806 map_modif_hash_id_treatment_data = {}

807

808 list_results = []

809

810 from lib.datou.datou_exec import datou_exec

811 for i in range(nb_lines):

812 input_datou = {}

813

814 if input_from_df_or_arg :

815 if i + missing_row_index >= len(df.values.tolist()):

816 print(" All the tab should be completed by now or we have a problem !")

817 break

818 if len(input_datou_name) != len(input_column_name) or len(outut_datou_name) != len(output_column_name):

819 print(" Internal error mismatch datou column io : exiting ")

820 exit(1)

821 for j in range(len(input_datou_name)):

822 print("About to get some input value for : j :" + str(j) + " col : " + str(offset + i))

823 print("USing input_datou_name[j] : " + str(input_datou_name[j]))

824

825 print(" available cols : " + str(df.columns.values.tolist()))

826 print(" number data : " + str(len(df.values.tolist())))

827

828 # Je veux récupérer l'élément de la ranger missing_row_index + i, de la colonne input_column_name[i] de df et le mettre dans val

829 val = df[input_column_name[j]][offset + i] # Hack pour le moment parce que jen'arrive pas à récupérer la bonne ranger on dirait que pour la première ligne il ne trouve pas les colonnes)

830 # bug crash lorsque url est vide, d'autre cas sans doute à gérer

831 input_datou[input_datou_name[j]] = val

832

833 else :

834 if in_file != None:

835 if nb_lines != 1 :

836 print(" Un managed behavior ")

837 input_datou["file"] = in_file

838 # sert à eviter le preprompt par defaut de l'ocr sur l'impact carbone grr je ne comprends pas le CdC qui ferait le job

839# input_datou["preprompt"] = ""

840

841 if verbose:

842 print(str(input_datou))

843

844 if datou != None:

845 input_datou["datou_exec_info"] = {"project_id": safia_project_id,

846 "safia_doc_id": safia_document_id,

847 "mtr_datou_id": mtr_datou_id,

848 "launched_at" : datetime.datetime.now()}

849 output, audit_json = datou_exec(list_datou_step, input_datou, complete_param_json,

850 verbose = verbose,

851 with_audit = with_audit,

852 privacy = privacy,

853 list_param_json_steps=list_param_json_steps)

854 output["datou_id"] = str(mtr_datou_id)

855

856 if len(list_datou_func) > 0:

857 output = call_functions(list_datou_func, input_datou)

858

859 output["datetime"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

860

861 info_store = fill_df_from_datou_result(df, outut_datou_name,

862 output_column_name, output, missing_row_index + i)

863

864 hash_id_treatment = output["hash_id_treatment"] if "hash_id_treatment" in output else input_datou["hash_id_treatment"] if "hash_id_treatment" in input_datou else "POURRI_" + hash_id_treatment

865 map_modif_hash_id_treatment_data[hash_id_treatment] = info_store

866

867 if special_op == "accumulate":

868 list_results.append(output)

869

870 if saxia_step_name != None:

871 name_info_step = "info_" + saxia_step_name

872

873 saxia_step_name_at = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

874 info_store[saxia_step_name + "_at"] = saxia_step_name_at

875

876 lss.lib_user_data_internal.upsert_audit_info(hash_id_treatment,

877 list_json_update = [

878{"variable": name_info_step, "type": "json"}

879],

880 input_values = {name_info_step : info_store},

881 verbose = verbose)

882

883 # saxia_step_name

884

885 new_text = create_markdown_table_from_df(df)

886 # TODO VR tester to_markdown

887

888 if safia_document_id != "pg_audit":

889 print(" NOW SAVING INDEX FILE BATCH TREATMENT !")

890 input_save = {"json_to_save" : [{"id" : safia_document_id, "text" : new_text}]}

891 output_save, audit_json = datou_exec(["import_json"], input_save, complete_param_json, verbose=verbose, privacy=privacy)

892 output_final = output_save

893

894 print(str(output_final))

895

896 return output_final

897 if special_op == "accumulate":

898 print(list_results)

899 list_pages = []

900 for l in list_results:

901 if "list_page" in l:

902 list_pages.extend(l["list_page"])

903 print(list_pages)

904 return list_pages

905 return "TO check"

906

907

908

909 # Fonction pour charger dynamiquement un module en Python.

910def load_module(module_path, function_name):

911# import sys

912 import importlib

913# sys.path.append('.')

914 # return importlib.import_module(module_path)

915

916 module = importlib.import_module(module_path)

917

918 # class_name = conf["class_name"]

919 #from lib.brick_layers.lib_abstract_generic_layer import LayerPrompt

920 # from lib.lib_openai import LayerPromptOpenAI

921 # import lib

922 function = getattr(module, function_name)

923 return function

924

925

926 # Fonction pour appeler dynamiquement une fonction avec des arguments.

927def call_function(function, args):

928# func = getattr(module, function_name)

929 return function(*args)

930

931

932

933# parse datou_function

934def parse_directive(chain):

935 import json

936 import re

937

938 # La chaîne de caractères à parser.

939 # chain = "path_to_file1:function1(arg1,arg2):output1;path_to_file2:function2(arg1,arg2):output2"

940

941 # 1. Splitter la chaîne de caractères par des points-virgules.

942 directives = chain.split(';')

943

944 # 3. Charger les arguments depuis un fichier JSON

945# with open('arguments.json') as file:

946# arguments_json = json.load(file)

947

948 list_function = []

949

950 # Processus principal pour chaque directive.

951 for directive in directives:

952 # 2. Splitter chaque directive selon le deux-points.

953 dir_split = directive.split(':')

954 if len(dir_split) != 3:

955 print(f"Error in directive : {directive}")

956 continue

957 (path_to_file, function_and_args, output) = tuple(dir_split)

958

959 # Utiliser une expression régulière pour extraire le nom de la fonction et les arguments.

960 function_name, args_str = re.match(r'(\w+)$(.*)$', function_and_args).groups()

961

962 # Extraire les noms des arguments en les séparant par des virgules, ignorer les espaces.

963 arg_names = re.split(r'\s*,\s*', args_str)

964

965 one_function = {"path_to_file" : path_to_file, "function_name" : function_name, "arguments" : arg_names, "output" : output}

966 list_function.append(one_function)

967

968 return list_function

969

970

971

972def call_functions(list_function, arguments_json):

973 map_results = {}

974 for function in list_function:

975 path_to_file = function["path_to_file"]

976 function_name = function["function_name"]

977 arguments = function["arguments"]

978 output = function["output"]

979 # 4. Charger la bibliothèque spécifiée.

980 function = load_module(path_to_file, function_name)

981

982 args_to_pass_as_list = []

983 missing_arg = False

984 for arg in arguments:

985 if arg in arguments_json:

986 args_to_pass_as_list.append(arguments_json[arg])

987 else:

988 # missing_arg = True

989 print(f"No argument data available for argument: {arg} expecting fixed value")

990 args_to_pass_as_list.append(arg)

991

992 # 5. Appeler la fonction avec les arguments.

993 if not missing_arg:

994# args_to_pass_as_list = arguments_json[function_name]

995 result = call_function(function, args_to_pass_as_list)

996 print(f"Result of {function_name}: {result}")

997 map_results[output] = result

998 else:

999 print(f"No argument data available for function: {function_name}")

1000 return map_results

1001

1002

1003file_column = "file"

1004size_column = "size"

1005created_at_column = "created_at"

1006indexed_at_column = "indexed_at"

1007last_indexed_at_column = "last_indexed_at"

1008modified_at_column = "modified_at"

1009deleted_at_column = "deleted_at"

1010def init_df_synchronize():

1011 input_column_name = [file_column, "sub_folder", created_at_column, modified_at_column, deleted_at_column, indexed_at_column, last_indexed_at_column, size_column]

1012 import pandas as pd

1013 df = pd.DataFrame(columns=input_column_name)

1014 return df

1015

1016def synchronize_df_folder(df = None, folder = "", verbose = False):

1017 from lib.import_util.lib_path_to_vec import list_files

1018 if df == None:

1019 df = init_df_synchronize()

1020

1021 import datetime

1022 now_as_string = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

1023

1024 index, file_list = list_files(folder, verbose = False,

1025 managed_zip_extension_to_avoid = [".zip", ".tar", ".tar.gz", ".tgz", ".tar.bz2", ".tbz"])

1026

1027 for file_dict in file_list:

1028 file = file_dict["path"]

1029 # search occurence of string file in pd column file

1030# if df.get("")

1031

1032 find_occurence = df[file_column] == file

1033 if len(find_occurence) == 0:

1034 print("Indexing one file : " + file)

1035 size = os.stat(file).st_size

1036 created_at_float = os.stat(file).st_birthtime

1037 modified_at_float = os.stat(file).st_mtime

1038 created_at_as_string = datetime.datetime.fromtimestamp(created_at_float).strftime('%Y-%m-%d %H:%M:%S')

1039 modified_at_as_string = datetime.datetime.fromtimestamp(modified_at_float).strftime('%Y-%m-%d %H:%M:%S')

1040 df.loc[len(df)] = [file, folder, created_at_as_string, modified_at_as_string, "", now_as_string, now_as_string, size]

1041 elif len(find_occurence) > 1:

1042 print("ERROR : internal inconsistency error : multiple occurence")

1043 else :

1044 iloc_file = df.index[find_occurence]

1045# input_data = df[input_column_name[0]][first_missing_output]

1046

1047 pass

1048

1049from abc import abstractmethod

1050

1051class Cdn():

1052 @abstractmethod

1053 def get_file(self, file_id):

1054 pass

1055

1056# faut une option onedrive ?

1057

1058class CdnLocal(Cdn):

1059 def __init__(self, root_dir = "temp",

1060 lcs = None,

1061 lib_data_internal = None,

1062 project_id = None):

1063 self.root_dir = root_dir

1064 if lcs != None:

1065 root_dir_conf = lcs.get_root_dir_local_cdn()

1066 if root_dir_conf != None:

1067 self.root_dir = root_dir_conf

1068 self.lib_data_internal = lib_data_internal

1069 self.project_id = project_id

1070

1071 # set project name

1072 # set date

1073 # la convention de localisation relative des pages doit etre implémenté en javascript aussi, ou bien comment fais t'on ?

1074

1075 def store_file_get_path(self, temp_file):

1076 import shutil

1077 from auth.lib_auth import create_id

1078 hash_id = create_id()

1079 import datetime

1080 folder_as_YMD = datetime.datetime.now().strftime('%Y/%m/%d')

1081 # get extension of the file temp_file

1082 if os.path.splitext(temp_file)[1] == "":

1083 ext = "dat"

1084 else:

1085 ext = os.path.splitext(temp_file)[1]

1086 basename = os.path.basename(temp_file)

1087 path_file_local_cdn = os.path.join(self.root_dir, str(self.project_id), folder_as_YMD, hash_id + "_" + basename + "." + ext)

1088 shutil.move(temp_file, path_file_local_cdn)

1089 return path_file_local_cdn

1090

1091def get_class_quali_from_worst_prediag(worst_prediag):

1092 # 3 classes : BON // LE RESTE // MANUSCRIT OU TABLEAUX

1093 if worst_prediag in ["OK", "BON"]:

1094 return "PRICE_LOW"

1095 elif worst_prediag in ["TABLEAUX", "MANUSCRIT", "MAUVAIS"]:

1096 return "PRICE_HIGH"

1097 else : # if worst_prediag in ["PRESQUEBON", "MISSING"]:

1098 return "PRICE_MEDIUM"

1099

1100def get_worst_page_prediag(list_of_page, map_id_page_prediag):

1101# import time

1102# time.sleep(1)

1103 worst_prediag = "INIT"

1104 complete_prediag = ""

1105 if len(list_of_page) == 0:

1106 return worst_prediag, complete_prediag

1107

1108 if len(list_of_page) == 1 and list_of_page[0] in map_id_page_prediag:

1109 return map_id_page_prediag[list_of_page[0]], map_id_page_prediag[list_of_page[0]]

1110

1111 # OK,BON => PRESQUEBON => {ALL} => MISSING => MANUSCRIT => MAUVAIS

1112 for page in list_of_page:

1113 if str(page) in map_id_page_prediag:

1114 prediag = map_id_page_prediag[str(page)]

1115 if prediag == "MISSING":

1116 continue

1117 if complete_prediag != "":

1118 complete_prediag += ","

1119 complete_prediag += prediag

1120

1121 if worst_prediag == "INIT":

1122 worst_prediag = prediag

1123 elif worst_prediag in ["OK", "BON"] and prediag not in ["OK", "BON"]:

1124 worst_prediag = prediag

1125 elif prediag == "PRESQUEBON" and worst_prediag in ["OK", "BON"]:

1126 worst_prediag = "PRESQUEBON"

1127 elif worst_prediag in ["OK", "BON", "PRESQUEBON"] and prediag not in ["OK", "BON", "PRESQUEBON"]:

1128 worst_prediag = prediag

1129 elif prediag == "MISSING" and worst_prediag not in ["MANUSCRIT", "MAUVAIS"]:

1130 worst_prediag = "MISSING"

1131 elif prediag == "MANUSCRIT" and worst_prediag != "MAUVAIS":

1132 worst_prediag = "MANUSCRIT"

1133 elif prediag == "MAUVAIS":

1134 worst_prediag = "MAUVAIS"

1135

1136 return worst_prediag, complete_prediag

1137

1138def get_nb_modif_nb_word_change_or_not_per_doc(info_consolidate_raw, map_id_page_prediag):

1139 map_count_modif_per_doc = info_consolidate_raw["map_count_modif_per_doc"] if "map_count_modif_per_doc" in info_consolidate_raw else {}

1140 map_nb_word_per_doc = info_consolidate_raw["audit_info_write"]["map_nb_word_per_doc"] if "audit_info_write" in info_consolidate_raw and "map_nb_word_per_doc" in info_consolidate_raw["audit_info_write"] else {}

1141 map_type_document_per_doc = info_consolidate_raw["audit_info_write"]["map_type_document_per_doc"] if "audit_info_write" in info_consolidate_raw and "map_type_document_per_doc" in info_consolidate_raw["audit_info_write"] else {}

1142 list_of_pages_as_sccsv = info_consolidate_raw["audit_info_write"]["list_of_pages_as_sccsv"] if "audit_info_write" in info_consolidate_raw and "list_of_pages_as_sccsv" in info_consolidate_raw["audit_info_write"] else ""

1143

1144 from lib.lib_util import from_list_page_per_doc_ccsv_to_list_of_list_of_page

1145 list_of_list_of_page, nb_page, max_page = from_list_page_per_doc_ccsv_to_list_of_list_of_page(list_of_pages_as_sccsv)

1146

1147 list_doc_nb_modif_nb_word = []

1148 for id_doc in map_type_document_per_doc:

1149 id_doc_int = int(id_doc)

1150 doc_type = map_type_document_per_doc[id_doc]

1151 if doc_type == "":

1152 continue

1153 nb_word = map_nb_word_per_doc[id_doc] if id_doc in map_nb_word_per_doc else 0

1154 nb_modif = map_count_modif_per_doc[id_doc] if id_doc in map_count_modif_per_doc else 0

1155 corr_type_manual = "map_modif_type_document" in info_consolidate_raw and id_doc in info_consolidate_raw["map_modif_type_document"]

1156

1157

1158 if list_of_list_of_page != None and len(list_of_list_of_page) > id_doc_int:

1159 list_of_page = list_of_list_of_page[id_doc_int]

1160 worst_prediag, complete_prediag = get_worst_page_prediag(list_of_page, map_id_page_prediag)

1161 else :

1162 worst_prediag = "MISSING"

1163 complete_prediag = "MISSING"

1164

1165 one_doc = {"doc_type" : doc_type, "nb_word" : nb_word, "nb_modif" : nb_modif, "corr_type_manual" : corr_type_manual, "worst_prediag" : worst_prediag, "complete_prediag" : complete_prediag} # "id_doc" : id_doc_int,

1166

1167 list_doc_nb_modif_nb_word.append(one_doc)

1168

1169 return list_doc_nb_modif_nb_word

1170

1171def assoc_doc_type_int(list_doc_type):

1172 map_doc_type_int = {}

1173 for i in range(len(list_doc_type)):

1174 map_doc_type_int[list_doc_type[i]] = i

1175 return map_doc_type_int

1176

1177def print_nb_word_nb_modif_type_doc(list_doc_nb_modif_nb_word, map_doc_type_int):

1178 print("# nb_word nb_modif doc_type")

1179 for doc in list_doc_nb_modif_nb_word:

1180 print(str(doc["nb_word"]) + " " + str(doc["nb_modif"]) + " " + str(map_doc_type_int[doc["doc_type"]]))

1181

1182taux_tva = 20

1183

1184def reset_saxia_fact_after_including(month_start = None, year_start = None,

1185 # month_end = None, year_end = None,

1186 lpgss = None, root_folder_fact = "temp"):

1187 # Find previous month

1188 # How to get list of month ?

1189# mtr = > select

1190# id, month, TO_DATE(month, 'TMmonth YYYY')

1191# from mtruser.mtr_factures;

1192# id | month | to_date

1193#----+---------------+------------

1194#52 | janvier

1195#2025 | 2025 - 01 - 01

1196#53 | janvier

1197#2025 | 2025 - 01 - 01

1198

1199 from datetime import date

1200 date_start = date(int(year_start), int(month_start), 1)

1201

1202 map_validated_nb, map_months = lpgss.get_bill_validate(date_start)

1203 if map_validated_nb[True] != 0:

1204 print("Can't regenerate since bills where already validated")

1205 return

1206 nb_to_delete = map_validated_nb[False]

1207 min_bill_id = lpgss.get_min_bill_id_after_or_equal_date(date_start)

1208 if min_bill_id != None and type(min_bill_id) == int and min_bill_id > 0:

1209

1210 autocommit_prev_val = lpgss.client.autocommit

1211 try:

1212 lpgss.client.autocommit = False

1213 ret_reset = lpgss.reset_facture(min_bill_id, nb_to_delete)

1214 except Exception as e:

1215 print("Error in resetting the bill")

1216 print(str(e))

1217 lpgss.client.autocommit = autocommit_prev_val

1218 return False, None

1219 lpgss.client.autocommit = autocommit_prev_val

1220 if ret_reset == False:

1221 print("Error in resetting the bill")

1222 return False, None

1223

1224# Get All files

1225 # Delete Files

1226 list_month_csv = map_months[False]

1227 if list_month_csv != "":

1228 list_month = list_month_csv.split(",")

1229 if len(list_month) > 1:

1230 print(" Many months to delete : " + str(list_month) + " only the first one deleted will recreated, so job should run !")

1231 for month in list_month:

1232 month_subfolder_suffix = month.lower().replace(" ", "_")

1233 subfolder_facture = "facture_" + month_subfolder_suffix

1234 folder_facture_www = os.path.join(root_folder_fact, subfolder_facture)

1235 folder_facture_internal_server = folder_facture_www.lstrip("/")

1236 import shutil

1237 ret = shutil.rmtree(folder_facture_internal_server, ignore_errors=True)

1238 print(" ret : " + str(ret))

1239 else :

1240 print("Unexpected something inconsistent, TODO set test before and cancel this reset")

1241 subfolder_facture = "mois_en_lettre_sans_accent_year_en_chiffre"

1242

1243 # Delete all lines in the table

1244 print("TODO")

1245 return True, min_bill_id

1246

1247def generate_saxia_fact(id_fact = 0, month = None, year = None,

1248 type_fact = "debug",

1249 project_id = None,

1250 root_folder_fact = "temp",

1251 only_correct_file = False,

1252 verbose = False):

1253 from server.safia import lpgss_singleton

1254

1255 # TODO VR 21/4/25 A choper de la configuration de l'app !

1256 mtr_datou_id_fact = 40

1257

1258 import locale

1259# locale.setlocale(locale.LC_ALL, 'fr_FR.utf8')

1260 locale.setlocale(locale.LC_ALL, 'fr_FR.UTF-8')

1261

1262 user = None

1263 import datetime

1264 # calculer aujourd'hui moins un mois et récupérer le mois et l'année

1265 today = datetime.datetime.now()

1266 # enlever un mois à aujourd'hui

1267# last_month_date = today - datetime.timedelta(month=1)

1268

1269 from dateutil.relativedelta import relativedelta

1270 last_month_date = today + relativedelta(months=-1)

1271 two_month_before_present = today + relativedelta(months=-2)

1272

1273 # While debugging !

1274

1275 last_month = 8

1276 year_last_month = 2024

1277 mois_last_month_string = "Aout"

1278

1279 last_month = 10

1280 year_last_month = 2024

1281 mois_last_month_string = "Octobre"

1282

1283 last_month = 11

1284 year_last_month = 2024

1285 mois_last_month_string = "Novembre"

1286

1287 if type_fact == "deprecated_no_db": # TODO Et quid du comportement qui génère le mois précédent à partir de now sans enregistrer dans la DB, veut-on le garder, gasp grrr plop => JE NE SAIS PAS !

1288 if month == None or year == None or month == "" or year == "":

1289 mois_last_month_string = last_month_date.strftime("%B")

1290 mois_last_month_string = mois_last_month_string[0].upper() + mois_last_month_string[1:]

1291 last_month = last_month_date.month

1292 year_last_month = last_month_date.year

1293 else:

1294 last_month = month

1295 year_last_month = year

1296

1297 # We check the consistency of the generation with voilà plop !

1298 res_max_id, res_last_value, res_month_argmax = lpgss_singleton.get_last_fact_id_month()

1299 inconsistent_data_last_id_stop = False

1300 if res_max_id != res_last_value:

1301 print("Inconsistency in the generation of the fact : " + str(res_max_id) + " != " + str(res_last_value))

1302 inconsistent_data_last_id_stop = True

1303 return

1304 else:

1305 if id_fact == 0 or id_fact == None:

1306 id_fact = res_max_id + 1

1307# import dateparser

1308 import datetime

1309 # res_month_argmax = last_month_date.strftime("%B %Y")

1310 try :

1311 datetime_last_info = datetime.datetime.strptime(res_month_argmax.lower(), "%B %Y")# dateparser.parse(month_year, settings={'DATE_ORDER': 'DMY'})

1312 except Exception as e:

1313 print("What to do with : res_month_argmax")

1314 print(str(e))

1315 print("Padam plop !")

1316 print("ERROR EXITING")

1317 exit(1)

1318

1319 print(" datetime_last_info : " + str(datetime_last_info))

1320

1321 if type_fact == "force":

1322 print("We don't care about check !")

1323

1324 month_to_treat = datetime_last_info + relativedelta(months=1)

1325 last_month = month_to_treat.month

1326 year_last_month = month_to_treat.year

1327

1328 if type_fact == "redo":

1329 if not only_correct_file:

1330 success_reset, min_bill_id = reset_saxia_fact_after_including(month_start=month, year_start=year,

1331 lpgss=lpgss_singleton,

1332 root_folder_fact=root_folder_fact)

1333 else:

1334 min_bill_id = None

1335 success_reset = True

1336 if not success_reset:

1337 print("Error in resetting the bill, nothing should have been done !")

1338 return

1339 last_month = month

1340 year_last_month = year

1341 month_to_treat = datetime.datetime(int(year_last_month), int(last_month), 1)

1342 if min_bill_id != None:

1343 id_fact = min_bill_id # - 1 # TODO VR not sure

1344 elif ((datetime_last_info.month == last_month_date.month

1345 and datetime_last_info.year == month_to_treat.year) or

1346 (datetime_last_info.month == two_month_before_present.month

1347 and datetime_last_info.year == two_month_before_present.year)) and not inconsistent_data_last_id_stop:

1348 print("We just generate the next month data")

1349 elif type_fact == "force":

1350 print("Force neede since fact done with too much delay")

1351 else:

1352 print("Nothing done since we need to make a lot of stuff and check !")

1353 return

1354

1355 list_project_id = []

1356 if project_id != None and project_id != "" and project_id != 0:

1357 list_project_id = [project_id]

1358 # VR TODO rename last_month and year_last_month in month_used

1359 all_results = lpgss_singleton.get_treatment_done(last_month, year_last_month, project_id=list_project_id)

1360

1361 if only_correct_file:

1362 print("More protection (third ? )")

1363# lpgss_singleton = None

1364

1365 # VR TODO : we need the last month data of course !

1366 subfolder_facture = "facture_" + month_to_treat.strftime("%B_%Y")

1367 mois_last_month_string = month_to_treat.strftime("%B")

1368 if mois_last_month_string != "":

1369 mois_last_month_string = mois_last_month_string[0].upper() + mois_last_month_string[1:]

1370 month_string_for_data = month_to_treat.strftime("%B %Y")

1371 if len(month_string_for_data) > 0:

1372 month_string_for_data = month_string_for_data[0].upper() + month_string_for_data[1:]

1373 else:

1374 print("Unexpected behavior : month_string_for_data is empty !")

1375 print(" subfolder_facture : " + str(subfolder_facture))

1376 from unidecode import unidecode

1377 subfolder_facture = unidecode(subfolder_facture)

1378 print(" subfolder_facture : " + str(subfolder_facture))

1379

1380# folder_facture = os.path.join(root_folder_fact, "factures", subfolder_facture)

1381 folder_facture = os.path.join(root_folder_fact, subfolder_facture)

1382 if not os.path.exists(folder_facture.lstrip("/")):

1383 os.makedirs(folder_facture.lstrip("/"))

1384

1385 map_pid_results = {}

1386 count_pid_nb_page = {}

1387 for data in all_results:

1388 pid = data["project_id"]

1389 id_file = data["id_file"]

1390 if pid not in map_pid_results:

1391 map_pid_results[pid] = []

1392 count_pid_nb_page[pid] = 0

1393 map_pid_results[pid].append(data)

1394 if data["nb_page"] != None:

1395 count_pid_nb_page[pid] += data["nb_page"]

1396

1397 print(count_pid_nb_page)

1398

1399 map_list_nb_modif_per_pid = {}

1400 map_list_folder_to_fact_per_pid = {}

1401

1402 list_doc_nb_modif_nb_word_acc = []

1403

1404 map_prediag_id_file_page_ccsv = {}

1405 for pid in map_pid_results:

1406 list_doc_nb_modif_nb_word_one_pid = []

1407 list_folder_to_fact = []

1408 for data in map_pid_results[pid]:

1409

1410 id_file = None

1411 from lib.sandbox.migration.append_prediag_df_complet_as_json import build_map_from_prediag_id_page_c_csv

1412 data_prediag = data["info_date"]["prediag_csv"] if "info_date" in data and "prediag_csv" in data["info_date"] else ""

1413 if ":" in data_prediag:

1414 map_id_page_prediag = build_map_from_prediag_id_page_c_csv(data_prediag)

1415 elif data_prediag == "":

1416 map_id_page_prediag = {}

1417 else :

1418 map_id_page_prediag = {str(i) : data_prediag.split(",")[i] for i in range(len(data_prediag.split(",")))}

1419

1420 if map_id_page_prediag == {}:

1421 print("Missing prediag for pid : " + str(pid) + " id_file : " + str(data["id_file"]) + " data_prediag : " + str(data_prediag) + " hit : " + data["hash_id_treatment"])

1422# continue

1423 else :

1424 if "id_file" in data:

1425 id_file = str(data["id_file"])

1426 print("Present prediag for " + str(id_file) + " : " + str(data["id_file"]))

1427 else :

1428 print("Wrong")

1429

1430 if "info_consolidate" not in data or "audit_info_write" not in data["info_consolidate"]:

1431 print("Missing audit_info_write for pid : " + str(pid) + " id_file : " + str(data["id_file"]) + " data_prediag : " + str(data_prediag) + " hit : " + data["hash_id_treatment"])

1432# continue

1433

1434 for id_page in map_id_page_prediag:

1435 id_page = str(id_page)

1436 id_file_id_page_ccsv = id_file + ":" + id_page

1437 prediag = map_id_page_prediag[id_page]

1438 if prediag not in map_prediag_id_file_page_ccsv:

1439 map_prediag_id_file_page_ccsv[prediag] = []

1440 if id_file_id_page_ccsv not in map_prediag_id_file_page_ccsv[prediag]:

1441 map_prediag_id_file_page_ccsv[prediag].append(id_file_id_page_ccsv)

1442

1443 list_doc_nb_modif_nb_word = get_nb_modif_nb_word_change_or_not_per_doc(data["info_consolidate"], map_id_page_prediag)

1444 list_doc_nb_modif_nb_word_acc.extend(list_doc_nb_modif_nb_word)

1445 list_doc_nb_modif_nb_word_one_pid.extend(list_doc_nb_modif_nb_word)

1446

1447 nb_modif = data["info_consolidate"]["nb_modif"] if "info_consolidate" in data and "nb_modif" in data["info_consolidate"] else 0

1448 input_file_at = data["info_date"]["input_file_available_at"] if "info_date" in data and "input_file_available_at" in data["info_date"] else None

1449 output_file_at = data["info_consolidate"]["consolidate_at"] if "info_consolidate" in data and "consolidate_at" in data["info_consolidate"] else None

1450 time_treatmeant = None

1451 import dateparser

1452 if output_file_at != None and input_file_at != None:

1453 try:

1454 in_date = dateparser.parse(input_file_at) #, settings={'DATE_ORDER': 'DMY'})

1455 out_date = dateparser.parse(output_file_at) #, settings={'DATE_ORDER': 'DMY'})

1456 time_treatmeant = out_date - in_date

1457 print(" time_treatmeant : " + str(time_treatmeant))

1458 time_treatmeant = int(time_treatmeant.total_seconds() / 3600)

1459 except Exception as e:

1460 print(str(e))

1461

1462 nb_page = data["nb_page"] if "nb_page" in data else 0

1463 worst_prediag_doc, complete_prediag = get_worst_page_prediag(list(range(0, nb_page)), map_id_page_prediag)

1464 list_folder_to_fact.append({"nb_page" : data["nb_page"], "id_file" : data["id_file"], "nb_modif" : nb_modif, "time_treatmeant" : time_treatmeant, "map_id_page_prediag" : map_id_page_prediag, "worst_prediag" : worst_prediag_doc})

1465

1466 map_list_nb_modif_per_pid[pid] = list_doc_nb_modif_nb_word_one_pid

1467 map_list_folder_to_fact_per_pid[pid] = list_folder_to_fact

1468

1469 print("""

1470

1471 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

1472

1473 map_prediag_id_file_page_ccsv :

1474

1475 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

1476

1477 """)

1478 print(str(map_prediag_id_file_page_ccsv))

1479 print("""

1480

1481 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

1482

1483 """)

1484

1485 print(map_list_folder_to_fact_per_pid)

1486

1487 print(map_list_nb_modif_per_pid)

1488

1489 list_prediag_class = []

1490 for doc in list_doc_nb_modif_nb_word_acc:

1491 if "worst_prediag" not in doc:

1492 print("ERROR : worst_prediag not found in doc : " + str(doc))

1493 continue

1494 if doc["worst_prediag"] not in list_prediag_class:

1495 list_prediag_class.append(doc["worst_prediag"])

1496

1497 map_prediag_class_int = assoc_doc_type_int(list_prediag_class)

1498 print(" map_prediag_class_int : ")

1499 print(str(map_prediag_class_int))

1500

1501 list_doc_type = []

1502 for doc in list_doc_nb_modif_nb_word_acc:

1503 if doc["doc_type"] not in list_doc_type:

1504 list_doc_type.append(doc["doc_type"])

1505

1506 map_doc_type_int = assoc_doc_type_int(list_doc_type)

1507 print(" map_doc_type_int : ")

1508 print(str(map_doc_type_int))

1509 for pid in map_list_nb_modif_per_pid:

1510 print_nb_word_nb_modif_type_doc(map_list_nb_modif_per_pid[pid], map_doc_type_int)

1511

1512 print(" AND ALL ")

1513

1514 print_nb_word_nb_modif_type_doc(list_doc_nb_modif_nb_word_acc, map_doc_type_int)

1515

1516 unit_price = 0.5

1517 unit_price_default = 0.5

1518 unit_price_high = 0.8

1519 unit_price_low = 0.4

1520 map_class_price_unit_price = {"PRICE_LOW" : unit_price_low,

1521 "PRICE_MEDIUM" : unit_price,

1522 "PRICE_HIGH" : unit_price_high}

1523 map_class_price_unit_price = {}

1524

1525 import numpy as np

1526 for pid in map_list_folder_to_fact_per_pid:

1527 if only_correct_file:

1528 print("We should only modify the input pid")

1529 print(f" pid : {pid} should be the one given as argument {project_id}")

1530 if project_id != pid:

1531 continue

1532

1533 data_count = {

1534 "document_type" : map_doc_type_int.keys(),

1535 "count" : len(map_doc_type_int) * [0]

1536 }

1537 data_count_class = {

1538 "prediag_class" : map_prediag_class_int.keys(),

1539 "count" : len(map_prediag_class_int) * [0]

1540 }

1541 # TO BE USED

1542 # Tableau de comptage des types de documents croisé avec les classe de prediagnostique

1543 np_array_count_class_type = np.zeros((len(map_prediag_class_int), len(map_doc_type_int)), dtype=int)

1544 for data in map_list_nb_modif_per_pid[pid]:

1545 data_count["count"][map_doc_type_int[data["doc_type"]]] += 1

1546 data_count_class["count"][map_prediag_class_int[data["worst_prediag"]]] += 1

1547 np_array_count_class_type[map_prediag_class_int[data["worst_prediag"]],map_doc_type_int[data["doc_type"]]] += 1

1548

1549 data_folder_price = {

1550 'Dossier': [],

1551 'Nb de Page': [],

1552 'Prix HT\npar page': [],

1553 'Prix total HT': [],

1554 'TVA': [],

1555 'Prix total TTC': [],

1556 'Temps (h)\ntraitement': [],

1557 }

1558 print("pid : " + str(pid))

1559 sum_total = 0

1560 time_treatmeant_total = 0

1561 for data in map_list_folder_to_fact_per_pid[pid]:

1562 unit_price = unit_price_default

1563 worst_prediag = data["worst_prediag"] if "worst_prediag" in data else "MISSING"

1564 price_class = get_class_quali_from_worst_prediag(worst_prediag)

1565 unit_price = map_class_price_unit_price.get(price_class, unit_price_default)

1566 print(data)

1567 data_folder_price["Dossier"].append(data["id_file"])

1568 data_folder_price["Nb de Page"].append(data["nb_page"])

1569 data_folder_price["Prix HT\npar page"].append(str(round(unit_price, 2)))

1570 prix_total_ht = round(unit_price * data["nb_page"], 2)

1571 data_folder_price["Prix total HT"].append(str(prix_total_ht))

1572 prix_total_ttc = round(prix_total_ht * (1. + float(taux_tva) / 100.), 2)

1573 data_folder_price["TVA"].append("{:.2f}".format(prix_total_ttc - prix_total_ht))

1574 data_folder_price["Prix total TTC"].append(str(prix_total_ttc))

1575 sum_total += unit_price * data["nb_page"] * (1. + float(taux_tva) / 100.)

1576

1577 time_treatmeant = data["time_treatmeant"]

1578 if time_treatmeant != -1 and time_treatmeant != None:

1579 time_treatmeant_total += time_treatmeant

1580 else:

1581 print("ERROR : time_treatmeant is -1")

1582 data_folder_price["Temps (h)\ntraitement"].append(str(time_treatmeant))

1583

1584 sum_total = round(sum_total, 2)

1585

1586 sum_total_wo_tva = sum_total / (1. + float(taux_tva) / 100.)

1587 sum_total_wo_tva = round(sum_total_wo_tva, 2)

1588 nb_page_total = int(sum_total_wo_tva / unit_price)

1589

1590 data_folder_price["Dossier"].append("Total " + str(mois_last_month_string) + " " + str(year_last_month) + " en EUROS ")

1591 data_folder_price["Nb de Page"].append(nb_page_total) #str(nb_page_total))

1592 data_folder_price["Prix HT\npar page"].append(str(unit_price))

1593 data_folder_price["Prix total HT"].append(str(sum_total_wo_tva))

1594 data_folder_price["TVA"].append("{:.2f}".format(sum_total - sum_total_wo_tva))

1595 data_folder_price["Prix total TTC"].append(str(sum_total) + " € TTC")

1596 temps_traitement_moyen = -1

1597 if len(map_list_folder_to_fact_per_pid[pid]) > 0:

1598 temps_traitement_moyen = time_treatmeant_total / len(map_list_folder_to_fact_per_pid[pid])

1599 data_folder_price["Temps (h)\ntraitement"].append(str(round(temps_traitement_moyen, 1)) + "(moyenne)")

1600

1601 # data = {

1602 # 'Dossier': ['Item A', 'Item B', 'Item C'],

1603 # 'Nb de Page': [4, 5, 2],

1604 # 'Prix par page': [7.5, 8.0, 9.5],

1605 # 'Prix total': [30.0, 40.0, 19.0]

1606 # }

1607 df = pd.DataFrame(data_folder_price)

1608 df_data_count = pd.DataFrame(data_count)

1609 df_data_class = pd.DataFrame(data_count_class)

1610

1611 info_project = lpgss_singleton.get_project_info(pid)

1612 owner_id = info_project["owner_id"]

1613 map_id_email = lpgss_singleton.get_email_from_user_ids([owner_id])

1614 info_user = lpgss_singleton.select_user(owner_id)

1615

1616 if only_correct_file:

1617 print("Fourth protection to be downgraded as third")

1618 print("We should only generate the pdf for the input pid and nnot modify the DB")

1619 lpgss_singleton = None

1620

1621 siret = info_user["miscinfo"]["siret"] if "miscinfo" in info_user and info_user["miscinfo"] != None and "siret" in info_user["miscinfo"] else ""

1622 adresse = info_user["miscinfo"]["adresse"] if "miscinfo" in info_user and info_user["miscinfo"] != None and "adresse" in info_user["miscinfo"] else ""

1623 email = info_user["email"] if "email" in info_user and info_user["email"] != None else ""

1624 first_name = info_user["firstname"] if "firstname" in info_user and info_user["firstname"] != None else ""

1625 last_name = info_user["lastname"] if "lastname" in info_user and info_user["lastname"] != None else ""

1626 found_user = False

1627 email_client_expert = "email@expert"

1628 for data in map_id_email:

1629 data_dict = dict(data)

1630 if "id" in data_dict and owner_id == data_dict["id"]:

1631 found_user = True

1632 email_client_expert = data_dict["mail"]

1633 break

1634 name = f"""Docteur {first_name} {last_name}"""

1635 print(" TODO add name ")

1636 if not found_user:

1637 email_client_expert = "anonymous@user"

1638 print(" INTERNAL ERROR GENERATING id_fact " + str(id_fact) + " for pid " + str(pid) + " owner_id " + str(owner_id) + " email_client_expert " + email_client_expert + " and project_id : " + str(pid) + " sum_total : " + str(sum_total) + " df_data_count : " + str(df_data_count) + " id_fact : " + str(id_fact) + " email_client_expert : " + email_client_expert + " project_id : " + str(pid))

1639

1640 pdf_path = create_pdf(datas = df,

1641 sum_total = sum_total,

1642 df_data_count = df_data_count,

1643 id_fact = id_fact,

1644 email_client_expert = email_client_expert,

1645 name = name,

1646 adresse=adresse,

1647 siret=siret,

1648 project_id = pid,

1649 sub_folder = folder_facture,

1650 df_data_class = df_data_class,

1651 np_array_count_class_type = np_array_count_class_type)

1652

1653 if only_correct_file:

1654 print ("We should only generate the pdf for the input pid and nnot modify the DB")

1655 exit(2)

1656

1657 if lpgss_singleton != None:

1658 print("Record please !")

1659 query_id = lpgss_singleton.insert_new_bill(id_fact, pdf_path, pid, month_string_for_data, sum_total_wo_tva, sum_total, nb_page_total)

1660 if query_id != id_fact:

1661 print("ERROR ERROR id fact We should cancel all !")

1662

1663 id_fact = id_fact + 1

1664

1665

1666# A supprimer ou utiliser !

1667en_tete_gauche = """

1668SAS FOTONOWER FRANCE

166930 Rue CHARLOT

167075003 PARIS-3E-ARRONDISSEMENT

1671FRANCE

1672compta@fotonower.com

1673https://www.fotonower.com/

1674N° TVA Intracommunautaire : FR80804468197

1675N° SIRET : 80446819700012

1676"""

1677

1678info_banque = """

1679Coordonnées bancaires :

1680IBAN : FR76 1695 8000 0165 7288 7296 040

1681BIC/SWIFT : QNTOFRP1XXX

1682"""

1683

1684info_banque = """

1685Coordonnées bancaires :

1686IBAN : FR76 1287 9000 0111 2329 2200 123

1687BIC/SWIFT : DELUFR22XXX

1688"""

1689

1690info_banque = """

1691Coordonnées bancaires :

1692IBAN : FR76 3000 3024 2400 1500 3117 325

1693BIC/SWIFT : SOGEFRPP

1694"""

1695

1696info_banque_lines = info_banque.strip().split("\n")

1697

1698info_legal = info_banque

1699

1700

1701

1702condition_paiement_1 = """

1703Conditions de paiement :

1704• 100 % soit """

1705

1706condition_paiement_2 = """ € TTC à payer le :

1707"""

1708

1709condition_paiement_3 = """ (à réception).

1710"""

1711

1712

1713

1714def create_pdf(datas = None,

1715 sum_total = 0,

1716 df_data_count = None,

1717 id_fact = 0,

1718 email_client_expert = "email@expert",

1719 name = "Docteur Expert",

1720 adresse = "30 Rue Charlot",

1721 siret = "FR56573",

1722 project_id = -1,

1723 sub_folder = "temp",

1724 df_data_class = None,

1725 np_array_count_class_type = None):

1726 suffix = "_p_" + str(project_id) + "_id_" + str(id_fact)

1727

1728 en_tete_droite = email_client_expert + """

1729 project_id : """ + str(project_id)

1730

1731 if len(datas) == 0:

1732 print("On ne génére pas de facture à 0")

1733 return

1734

1735 customer_info = [

1736 "Adresse de facturation :",

1737 name,

1738 adresse,

1739 "SIRET : " + siret,

1740 email_client_expert,

1741 "project_id=" + str(project_id)

1742 ]

1743

1744 pdf_file = 'facture_saxia_' + suffix + '.pdf'

1745

1746 table_data = []

1747 table_data.append(list(datas.columns))

1748 for index, row in datas.iterrows():

1749 table_data.append(list(row))

1750

1751 if float(table_data[-1][3]) >= 500 or project_id in [327]:

1752 print("TRIGGER FORFAIT 500")

1753 table_data[-1][2] = "FORFAIT500"

1754 table_data[-1][3] = "500"

1755 table_data[-1][4] = "100"

1756 table_data[-1][5] = "600 € TTC"

1757

1758 pdf_path = os.path.join(sub_folder, pdf_file)

1759 create_first_page_fact(id_fact = id_fact, pdf_file=pdf_path.strip("/"), customer_info=customer_info, table_data=table_data)

1760# pdf_path = pdf_path.strip("/")

1761

1762

1763

1764 import matplotlib.pyplot as plt

1765 from matplotlib.backends.backend_pdf import PdfPages

1766 from datetime import datetime

1767

1768 import matplotlib

1769 matplotlib.use('agg')

1770

1771 pdf_file_graph = 'facture_saxia_' + suffix + '_graph.pdf'

1772 pdf_path_graph = os.path.join(sub_folder, pdf_file_graph)

1773

1774 pdf_path_graph = pdf_path_graph.strip("/")

1775# os.path.copy(pdf_path, pdf_path_graph)

1776

1777 try:

1778 # Création d'un fichier PDF

1779 with PdfPages(pdf_path_graph) as pdf:

1780 plt.clf()

1781 # plt.close()

1782

1783 fontsize = 4

1784

1785# plt.rcParams.update({'font.size': fontsize})

1786# plt.rc('font', size=fontsize) # controls default text size

1787# plt.rc('axes', titlesize=fontsize) # fontsize of the title

1788# plt.rc('axes', labelsize=fontsize) # fontsize of the x and y labels

1789

1790

1791

1792 # Sans doute le seul utile ! => mais en dessous ca marche aussi !

1793# plt.rc('font', size=fontsize)

1794 plt.rc('legend', fontsize=4*fontsize) # fontsize of the legend

1795

1796 plt.rc('xtick', labelsize=fontsize) # fontsize of the x tick labels

1797 plt.rc('ytick', labelsize=fontsize) # fontsize of the y tick labels

1798

1799 # Plot des graphiques (exemples simples ici)

1800 plt.subplot(2, 2, 3)

1801 datas['Nb de Page'][:-1].plot(kind='bar', legend=False)

1802 plt.title('Nombre de pages\npar dossiers', size=5*fontsize, fontweight="bold")

1803

1804 index_null = df_data_count["count"] == 0

1805 # df_data_count["document_type"][index_null] = ""

1806 if all(index_null):

1807 df_data_count["document_type"] = ""

1808 else:

1809 # we just upade the null row for count

1810 df_data_count.loc[index_null, "document_type"] = ""

1811

1812

1813

1814 try :

1815 plt.subplot(2, 2, 2)

1816 plt.imshow(np_array_count_class_type, cmap='hot', interpolation='nearest')

1817 plt.title('Répartition des prédiagnostics\npar type de document')

1818 import numpy as np

1819 plt.xticks(ticks=np.arange(len(list(df_data_count["document_type"]))), labels=list(df_data_count["document_type"]), rotation=45)

1820 plt.yticks(ticks=np.arange(len(list(df_data_class["prediag_class"]))), labels=list(df_data_class["prediag_class"]))

1821 for i in range(len(list(df_data_class["prediag_class"]))):

1822 for j in range(len(list(df_data_count["document_type"]))):

1823 text = plt.text(j, i, np_array_count_class_type[i, j],

1824 ha="center", va="center", color="pink", fontsize=2)

1825

1826 except Exception as e:

1827 print("Error in heatmap chart : " + str(e))

1828

1829# Si on veut deux pages

1830# plt.savefig(pdf, format='pdf')

1831# plt.clf()

1832

1833

1834

1835 try:

1836 import numpy as np

1837 if np.sum(df_data_count["count"]) > 0:

1838 plt.subplot(2, 2, 1)

1839

1840 plt.pie(df_data_count["count"], labels=(df_data_count["document_type"] + " " + df_data_count["count"].apply(str)))

1841 plt.title('Répartition des types\nde documents')

1842 else:

1843 plt.subplot(2, 2, 1)

1844 plt.text(0.5, 0.5, 'Aucune informations sur les documents', horizontalalignment='center', verticalalignment='center')

1845 plt.title('Répartition des types de documents')

1846 except Exception as e:

1847 print("Error in pie chart doc_type : " + str(e))

1848

1849# plt.subplots_adjust(wspace=0.2)

1850

1851 try:

1852 import numpy as np

1853 if np.sum(df_data_count["count"]) > 0:

1854 plt.subplot(2, 2, 4)

1855 plt.pie(df_data_class["count"], labels=(df_data_class["prediag_class"] + " " + df_data_class["count"].apply(str)))

1856 plt.title('Répartition des prédiagnostics')

1857 else:

1858 plt.subplot(2, 2, 4)

1859 plt.text(0.5, 0.5, 'Aucune informations sur les prédiagnostics', horizontalalignment='center', verticalalignment='center')

1860 plt.title('Répartition des prédiagnostics')

1861 except Exception as e:

1862 print("Error in pie chart prediag : " + str(e))

1868 # datas['Prix total'].plot(kind='bar', color='green')

1869 # plt.title('Prix total')

1870

1871 plt.subplots_adjust(wspace=0.2, hspace=0.4, left=0.1, right=0.9, top=0.9, bottom=0.1)

1872

1873 # Ajustement de l'espace entre les sous-graphiques

1874# plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)

1875

1876 # plt.tight_layout()

1877 plt.savefig(pdf, format='pdf')

1878

1879 # C'est équivalent à cela : mais cela ne marche pas dans un pdf existant, en tout cas pas la !

1880# pdf.savefig(plt.gcf())

1881

1882 plt.close()

1883 except Exception as e:

1884 print("Error building graphs for invoice:")

1885 print(str(e))

1886

1887

1888 # Merge both

1889 if True:

1890 try :

1891 if os.path.exists(pdf_path.strip("/")) and os.path.exists(pdf_path_graph):

1892 from PyPDF2 import PdfReader, PdfWriter

1893

1894 # Create a PDF writer object

1895 pdf_writer = PdfWriter()

1896

1897 # Read the first page (invoice details)

1898 with open(pdf_path.strip("/"), 'rb') as f:

1899 pdf_reader = PdfReader(f)

1900 for page in pdf_reader.pages:

1901 pdf_writer.add_page(page)

1902

1903 # Read the second page (graphs)

1904 with open(pdf_path_graph, 'rb') as f:

1905 pdf_reader = PdfReader(f)

1906 for page in pdf_reader.pages:

1907 pdf_writer.add_page(page)

1908

1909 print(" WRITE in " + str(pdf_path.strip("/")))

1910

1911 # Write to a new PDF file

1912 with open(pdf_path.strip("/"), 'wb') as f:

1913 pdf_writer.write(f)

1914 except Exception as e:

1915 print("Error in merge")

1916 print(str(e))

1917 return pdf_path

1918

1919def test_voila():

1920

1921 from matplotlib.backends.backend_pdf import PdfPages

1922 import matplotlib.pyplot as plt

1923 from scipy.misc import imread

1924 import os

1925 import numpy as np

1926

1927 files = [ "Column0_Line16.jpg", "Column0_Line47.jpg" ]

1928 def plotImage(f):

1929 folder = "temp/"

1930 im = imread(os.path.join(folder, f)).astype(np.float32) / 255

1931 plt.imshow(im)

1932 a = plt.gca()

1933 a.get_xaxis().set_visible(False) # We don't need axis ticks

1934 a.get_yaxis().set_visible(False)

1935

1936 pp = PdfPages("temp/page1.pdf")

1937 plt.subplot(121)

1938 plotImage(files[0])

1939 plt.subplot(122)

1940 plotImage(files[1])

1941 pp.savefig(plt.gcf()) # This generates page 1

1942 pp.savefig(plt.gcf()) # This generates page 2

1943 pp.close()

1944

1945import os

1946

1947id_fact = 1666

1948# Create a PDF document

1949pdf_file = "output_invoice.pdf"

1950

1951customer_info = [

1952 "Docteur DUGUET",

1953 "drduguet.tiphaine@gmail.com",

1954 "project_id=112"

1955]

1956

1957

1958# Table of costs

1959table_data = [

1960 ["Nbs de pages", "Prix HT par pages (€)", "Prix total HT (€)", "Prix total TTC (€)"],

1961 ["42", "0.5", "21", "25.2"]

1962]

1963

1964def create_first_page_fact(id_fact = 1666, pdf_file = "output_invoice.pdf", customer_info = [

1965 "Docteur DUGUET",

1966 "drduguet.tiphaine@gmail.com",

1967 "project_id=112"

1968], table_data = [

1969 ["What", "Nbs de pages (€)", "Prix HT par pages (€)", "Prix total HT (€)", "Prix total TTC (€)"],

1970 ["id_trucmuche", "42", "0.5", "21", "25.2 € TTC"]

1971], montant_total = None):

1972

1973 if montant_total == None:

1974 montant_total = table_data[-1][-2].strip(" € TTC")

1975

1976 from reportlab.lib.pagesizes import A4

1977 from reportlab.lib import colors

1978 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle

1979 from reportlab.lib.units import mm

1980 from reportlab.lib.enums import TA_RIGHT, TA_LEFT, TA_CENTER

1981 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image

1982 from reportlab.pdfgen import canvas

1983

1984 def add_background(canvas, doc):

1985 git_safia_root = os.getenv("GITSAFIA")

1986 img_folder = os.path.join(git_safia_root, "prompt/python/server/static/image_static/saxia")

1987 background_image_path = os.path.join(img_folder, "Logo_Saxia_Picto_Vert_calque.png") # Replace with the path to your background image

1988 canvas.drawImage(background_image_path, 0, -100, width = A4[0], preserveAspectRatio = True)#, mask = [0,100,0,100,0,100]) # preserveAspectRatio = True, , height=A4[1])

1989

1990

1991 document = SimpleDocTemplate(pdf_file, pagesize=A4,

1992 rightMargin=20*mm, leftMargin=20*mm, topMargin=20*mm, bottomMargin=20*mm)

1993

1994 # Define styles

1995 styles = getSampleStyleSheet()

1996 styleN = styles["Normal"]

1997 styleH = styles["Heading1"]

1998 styleTitle = ParagraphStyle(

1999 name="Title",

2000 fontSize=16,

2001 alignment=TA_CENTER,

2002 spaceAfter=12

2003 )

2004 styleSubtitle = ParagraphStyle(

2005 name="Subtitle",

2006 fontSize=12,

2007 alignment=TA_CENTER,

2008 spaceAfter=12

2009 )

2010 styleRight = ParagraphStyle(

2011 name="RightAlign",

2012 fontSize=10,

2013 alignment=TA_RIGHT,

2014 )

2015

2016 styleLeft = ParagraphStyle(

2017 name="LeftAlign",

2018 fontSize=10,

2019 alignment=TA_LEFT,

2020 )

2021

2022 styleCenter = ParagraphStyle(

2023 name="CenterAlign",

2024 fontSize=10,

2025 alignment=TA_CENTER,

2026 )

2027

2028 import datetime

2029 date_today = datetime.datetime.now()

2030

2031 # Title

2032 elements = []

2033 elements.append(Paragraph("SAXIA", styleTitle))

2034 name_fact = "FTN-SAXIA " + date_today.strftime("%Y-%m") + "-" + str(id_fact)

2035

2036 elements.append(Paragraph("Facture " + name_fact, styleH))

2037 elements.append(Spacer(1, 12))

2038

2039

2040 # "Le 4 août 2024"

2041 date_facturation = date_today.strftime("Le %d %B %Y")

2042

2043 # Date and logo (Placeholder for Logo)

2044 elements.append(Paragraph(date_facturation, styleRight))

2045

2046 # Add company and customer details

2047 company_info = [

2048 "SAS FOTONOWER FRANCE",

2049 "30 RUE CHARLOT",

2050 "75003 PARIS",

2051 "compta@fotonower.com",

2052 "https://www.fotonower.com",

2053 "N TVA intracommunautaire : FR804468197",

2054 "N Siret 80446819700012",

2055 ]

2056

2057 company_paragraphs = [Paragraph(line, styleLeft) for line in company_info]

2058 customer_paragraphs = [Paragraph(line, styleLeft) for line in customer_info]

2059

2060 # Organize them in a table

2061 data = [[

2062 Paragraph("<br/>".join(company_info), styleLeft),

2063 Paragraph("<br/>".join(customer_info), styleLeft)

2064 ]]

2065

2066 table = Table(data)

2067 elements.append(table)

2068 elements.append(Spacer(1, 12))

2069

2070 table = Table(table_data, colWidths=[48*mm, 20*mm, 17*mm, 20*mm, 15*mm, 30*mm, 25*mm])

2071 table.setStyle(TableStyle([

2072 ("BACKGROUND", (0, 0), (-1, 0), colors.lightgreen),

2073 ("BACKGROUND", (-2, -1), (-2, -1), colors.red),

2074 ("TEXTCOLOR", (0, 0), (-1, 0), colors.black),

2075 ("ALIGN", (0, 0), (-1, -1), "CENTER"),

2076 ("GRID", (0, 0), (-1, -1), 1, colors.black),

2077 ]))

2078 elements.append(table)

2079

2080 # Payment conditions and bank details

2081 elements.append(Spacer(1, 12))

2082 elements.append(Paragraph("Conditions de paiement : à réception soit " + date_facturation.lower(), styleLeft))

2083 elements.append(Paragraph("Montant total : " + montant_total.lower() + " € TTC ", styleLeft))

2084 elements.append(Spacer(1, 12))

2085 elements.append(Paragraph(info_banque_lines[0], styleLeft))

2086 elements.append(Paragraph(info_banque_lines[1], styleLeft))

2087 elements.append(Paragraph(info_banque_lines[2], styleLeft))

2088

2089 # Build the document

2090 document.build(elements, onFirstPage=add_background)

2091

2092 print(f"PDF '{pdf_file}' created successfully.")

2093

2094

2095def retrieve_and_finish(mtd_upload_id = 51,

2096 mtd_complete_map = 46,

2097 id_step_finish = 1,

2098 list_project_void = [121],

2099 lpgss = None,

2100 project_id = 0,

2101 nb_day = 2,

2102 verbose = False,

2103 filo_or_fifo = True,

2104 min_time = 7200,

2105 smart_relaunch = False):

2106 runnings = retrieve_missed_folder(mtd_upload_id = mtd_upload_id,

2107 mtd_complete_map = mtd_complete_map,

2108 list_project_void = list_project_void,

2109 lpgss = lpgss,

2110 project_id = project_id,

2111 nb_day = nb_day,

2112 verbose = verbose,

2113 filo_or_fifo = filo_or_fifo)

2114

2115 project_id_param_relaunch = 91

2116 key_param_relaunch = "saxia/steps/relaunch"

2117 smart_param_relaunch = lpgss.load_conf_project(project_id_param_relaunch, key_param_relaunch)

2118 from lib.manaudit.lib_datou_audit import load_sub_json

2119 smart_param_relaunch = load_sub_json(smart_param_relaunch, key_param_relaunch)

2120

2121 import datetime

2122 now = datetime.datetime.now(datetime.timezone.utc)

2123 map_running_by_id_file = {}

2124 for running in runnings:

2125 if "id_file" not in running:

2126 print("ERROR missing id_file for running " + str(running))

2127 continue

2128 id_file = running["id_file"]

2129 if running["project_id"] in list_project_void:

2130 print("Ignoring running from void project (121) " + str(running))

2131 continue

2132 if id_file not in map_running_by_id_file:

2133 map_running_by_id_file[id_file] = []

2134 map_running_by_id_file[id_file].append(running)

2135

2136 selected_to_run = None

2137 for id_file in map_running_by_id_file:

2138 max_created_at = max(list(map(lambda x: x["created_at"], map_running_by_id_file[id_file])))

2139

2140 delai = now - max_created_at

2141 if delai.total_seconds() < min_time:

2142 print(" Not launching id_file : " + str(id_file) + " because it was launched " + str(delai.total_seconds()) + " seconds ago (min_time = " + str(min_time) + ")" )

2143 continue

2144

2145 for running in map_running_by_id_file[id_file]:

2146 created_at = running["created_at"]

2147 nb_page = running["nb_page"] if "nb_page" in running else 100

2148 # faire la différence entre les date

2149 delai = now - created_at

2150 print(" time running : " + str(delai) + " " + str(delai.total_seconds()))

2151 if verbose:

2152 print(" one running or failed " + str(running))

2153

2154 split_csv_empty = True

2155 if "info_lab" in running and "split_csv_lab" in running["info_lab"]:

2156 split_csv_lab = running["info_lab"]["split_csv_lab"]

2157 if split_csv_lab != "" and type(split_csv_lab) == str:

2158 split_csv_empty = False

2159 else :

2160 split_csv_empty = True

2161

2162 all_doc_separated = False

2163 if "info_lab" in running and "all_doc_separated" in running["info_lab"]: # and .lower() == "true":

2164 print(str(running["info_lab"]["all_doc_separated"]))

2165 all_doc_separated = True

2166 else :

2167 all_doc_separated = False

2168

2169 if split_csv_empty and ("only_with_manual_split_above_nb_page" in smart_param_relaunch \

2170 and smart_param_relaunch["only_with_manual_split_above_nb_page"] < nb_page):

2171 print( str(running["id_file"]) + " split csv empty we avoid launching this one !")

2172 continue

2173 print(str(running["id_file"]) + " candidate")

2174

2175 if delai.total_seconds() > min_time and running["project_id"] not in list_project_void:

2176 if selected_to_run != None:

2177 if filo_or_fifo :

2178 if delai > selected_to_run["delai"]:

2179 selected_to_run = running

2180 selected_to_run["delai"] = delai

2181 else:

2182 if delai < selected_to_run["delai"]:

2183 selected_to_run = running

2184 selected_to_run["delai"] = delai

2185 else :

2186 selected_to_run = running

2187 selected_to_run["delai"] = delai

2188

2189 # min_time

2190

2191 # Maintenant on relance celui selectionné

2192 if selected_to_run != None:

2193 print("Relaunching job : " + str(selected_to_run))

2194 from lib.datou.datou_exec import datou_exec

2195

2196 from lib.lib_safia_system import LibSafiaSystem

2197 lss = LibSafiaSystem(lib_user_data_internal=lpgss)

2198 project_id = selected_to_run["project_id"] if "project_id" in selected_to_run else 0

2199 datou_to_run = lss.get_datou(mtd_complete_map, project_id = project_id)

2200 if len(datou_to_run) != 1:

2201 print("Unexpected size datou_to_run")

2202 datou_to_run = datou_to_run[0]

2203

2204 input = {}

2205 # - [ ] TODO : récupérer les paramètres de l'input depuis audi : file

2206 # - [ ] TODO : vérifier que le fichier existe : sinon on a tourner en rond, on pourrait mettre à jour created_at ou utiliser modifier_at ?

2207 # - [ ] TODO : récupérer les complete_param_json depuis l'audit

2208

2209

2210 hash_id_treatment = selected_to_run["hash_id_treatment"]

2211 output_hit = selected_to_run["info_date"]["output_hit"] if "info_date" in selected_to_run and "hit_output" in selected_to_run["info_date"] else None

2212 all_data = lpgss.load_data_audit(hash_id_treatment)

2213 audit_info = all_data["audit_info"] if "audit_info" in all_data else {}

2214

2215 print(" We don't use id_step_finish for now or id_step_finish - 1!")

2216 input = audit_info["io_exec"]['0']["input"] if "io_exec" in audit_info and '0' in audit_info["io_exec"] else {}

2217 if output_hit != None:

2218 input["output_hit"] = output_hit

2219

2220 if "id_file" not in selected_to_run:

2221 print("Pb date")

2222 exit(1)

2223

2224 from lib.lib_util import parse_id_date_nb_page_folder

2225 nb, id, date = parse_id_date_nb_page_folder(selected_to_run["id_file"])

2226

2227 if date == None:

2228 print("Pb date")

2229 exit(1)

2230 date = selected_to_run["uploaded_at"].strftime("%Y%m%d")

2231

2232 try :

2233 date_parsed = datetime.datetime.strptime(date, "%Y%m%d")

2234 except Exception as e:

2235 print("Pb date")

2236 print(str(e))

2237 date_parsed = datetime.datetime.now()

2238

2239 input["date"] = date_parsed

2240

2241 file = input["file"] if "file" in input else ""

2242 if isinstance(file, list):

2243 test_file = file[0]

2244 elif isinstance(file, str):

2245 test_file = file

2246 else:

2247 print("Unexpected type for file : " + str(type(file)))

2248 print(" We don't do anything ! ")

2249 return

2250 if not os.path.exists(test_file):

2251 print("ERROR : file not found : " + file)

2252 print(" We don't do anything ! ")

2253 return

2254

2255 if "info_lab" in all_data and "split_csv_lab" in all_data["info_lab"]:

2256 input["saxia_split_end_csv"] = all_data["info_lab"]["split_csv_lab"]

2257 input["saxia_all_doc_separated"] = all_doc_separated

2258

2259 complete_param_json = audit_info["config"]["complete_param_json"] if "config" in audit_info and "complete_param_json" in audit_info["config"] else {}

2260

2261 from auth.lib_auth import build_layer_from_configuration

2262 map_type_layer_inst = build_layer_from_configuration(lss, complete_param_json)

2263 # Contexte execution traitement datou sans PG, VR 2023

2264 complete_param_json["map_type_layer_inst"] = map_type_layer_inst

2265

2266 datou_linear_list_steps = list(map(lambda x : x["name"], datou_to_run["steps"]))

2267 list_param_json_steps = list(map(lambda x : x["param_json"], datou_to_run["steps"]))

2268

2269 ret = datou_exec(datou_linear_list_steps = datou_linear_list_steps,

2270 input = input,

2271 complete_param_json = complete_param_json,

2272 verbose = verbose,

2273 with_audit = True,

2274 privacy = False,

2275 # map_type_layer_inst: dict = {},

2276 list_param_json_steps = list_param_json_steps,

2277 id_step_incomplete_args = 0) # id_step_finish)

2278

2279 return ret

2280 else :

2281 print(" ALL DONE !")

2282 return None

2283

2284def retrieve_missed_folder(mtd_upload_id = 51,

2285 mtd_complete_map = 46,

2286 list_project_void = [121],

2287 lpgss = None,

2288 project_id = 0,

2289 nb_day = 5,

2290 verbose = False,

2291 filo_or_fifo = True):

2292

2293 ret = lpgss.running_job(project_id = project_id, verbose = verbose, nb_day = nb_day)

2294

2295 print(str(ret))

2296 count_running = len(ret["running_or_failed"]) if "running_or_failed" in ret else 0

2297 print(" count_running : " + str(count_running))

2298

2299 return ret["running_or_failed"]

2300

2301def datou_exec_partial(id_step, mtd_id, input_datou,

2302 project_id, user_id,

2303 verbose = False):

2304 pass

2305

Coverage for lib/batch/lib_batch.py: 23%

1383 statements