Coverage for lib/lib_safia_system.py: 65%

672 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2026-02-18 02:40 +0100

1# -*- coding: utf-8 -*- 

2__author__ = 'moilerat' 

3# 

4# # Safia copyright 2022-2023 VR - LICENSETOBEDEFINED : on premise treatment custom 

5# 

6#! This file is a class definition for a system in a chatbot application, named LibSafiaSystem, that can access most of safia services, encapsulate all the calls today in functions in lib_safia. As an object-oriented system, it initializes with parameters related to user data and authentication. The system is then equipped with multiple methods for setting up, managing, and connecting users, getting and managing projects, estimating cost, logging, resuming projects, configuring and updating setups, managing documents, and processing summaries. 

7# 

8 

9# - [ ] TODO move in lib_util 

10def parsed_title(content): 

11 import re 

12 # définir l'expression régulière 

13 pattern = r'^#\s?(?P<title>[^\n]*)\n' 

14 # compiler l'expression régulière 

15 pattern = re.compile(pattern, re.MULTILINE) 

16 

17 # chercher cette expression dans le texte 

18 match = pattern.search(content) 

19 

20 # retourner le titre trouvé ou None si aucun titre 

21 if match : 

22 title = match.group('title') 

23 else : 

24 title = "No Title Found" 

25 

26 return title 

27 

28 

29 

30import sys 

31 

32# TODO VR 6-6-44+79 : replace info@opio.fr by anonymous user 

33 

34hc_default_map_conf_layer_class_name = {'nlp_chat': {'class_name': 'LayerPromptOpenAI', 'module_name': 'lib.lib_openai'}, 'compute_embedding': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'image_to_text': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'speech_to_text': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'vector_search_engine': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}} 

35hc_default_map_conf_selected = {'nlp_chat': 'openai_gpt_api', 'compute_embedding': 'openai_ada', 'image_to_text': 'google_ocr', 'speech_to_text': 'openai_whisper', 'vector_search_engine': 'pg_vector'} 

36 

37# Object-Oriented system to call safia 

38# - can access most of safia service 

39# - TODO VR 14-6-23 : should encapsulate all the call today in function in lib_safia 

40# - lib_Safia should only build it, and maybe treat special cas (non-connected and freemium case) 

41class LibSafiaSystem(): 

42 def __iter__(self) : 

43 #return iter(self) 

44 # raise StopIteration 

45 return iter(self) 

46 def __next__(self) : 

47# raise StopIteration 

48 raise StopIteration 

49 def __init__(self, 

50 lib_user_data_internal = None, 

51 lib_user_data_external = None, 

52 lib_auth_user_otp = None, 

53 lib_right = None): 

54 

55 self.lib_user_data_internal = lib_user_data_internal 

56 self.lib_user_data_external = lib_user_data_external 

57 self.lib_auth_user_otp = lib_auth_user_otp 

58 self.lib_right = lib_right 

59 

60 # marrant à garder ! 

61 if not hasattr(self, "project_id"): 

62 self.project_id = None 

63 # Could be used sometime or not (merci very clear #7 7-7-23) 

64 self.user_id = None 

65# self.schema_user_documents = "" 

66# self.user_id = user_id 

67# self.user = user 

68# self.map_projects = {} 

69 

70 from auth.lib_cost import CostEstimation as CE 

71 self.ce = CE() 

72 

73 # Once the mail has been validated by an otp 

74 def setup_user(self, user, description, auth_type, otp): 

75 all_info = {} 

76 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(user) 

77 if info_user_from_mail == None: 

78 info_setup_user = self.lib_user_data_internal.setup_user(user, auth_type) 

79# VR TODO on pourrait eviter ces qqs lignes dans la condition 

80 self.user_id = info_setup_user 

81 

82 all_info["setup_user"] = info_setup_user 

83 

84 user_data_ret = self.lib_user_data_internal.select_user(self.user_id) 

85 

86 self.schema_user_documents = user_data_ret["schema_user_documents"] #"schema_user_documents" 

87 else : 

88 self.schema_user_documents = info_user_from_mail["schema_user_documents"] if "schema_user_documents" in info_user_from_mail else "info_opio_fr" 

89 self.user_id = info_user_from_mail["id"] 

90 

91 already_existing_project = self.lib_user_data_internal.get_project_with_description_and_user(self.user_id, description) 

92 

93 if already_existing_project != None: 

94 all_info["project"] = already_existing_project 

95 else : 

96 all_info["project"] = self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, description, otp) 

97 

98 

99 try: 

100 self.lib_right.create_self_group(self.user_id) 

101 except Exception as e: 

102 print("Internal Error in create : " + str(e)) 

103 

104 return all_info 

105 

106 def set_user_id(self, user_id): 

107 self.user_id = user_id 

108 info = self.lib_user_data_internal.get_user_from_mail(self.user) 

109 

110 def get_user_info(self): 

111 info = self.lib_user_data_internal.get_user_info_from_id(self.user_id) 

112 return info 

113 

114 def connect_with_otp(self, otp): 

115 # TODO VR 5-7-23 la on pourrait faire fait setup_user qui peut etre mis dans lib_auth_user_otp 

116 # et est actuellement une partie de register_user_get_data 

117 

118 info_connexion, is_valid = self.lib_auth_user_otp.connect_with_otp(otp) 

119 

120 self.user = info_connexion['Email_User'] if 'Email_User' in info_connexion else 'anonymous@opio.fr' 

121 

122 if self.lib_user_data_internal != None: 

123 info = self.lib_user_data_internal.get_user_from_mail(self.user) 

124 self.user_id = info["id"] if info != None and "id" in info else -1 

125 else : # this must be used by APIA 

126 self.user_id = -1 

127 

128# self.user_id = info_connexion["user_id"] 

129 

130 return info_connexion, is_valid 

131 

132 # - [ ] TODO VR temporary 23-7-23 to get rid of user_id from safia.py 

133 def get_user_id(self): 

134 return self.user_id 

135 

136 def select_one_project_id(self): 

137 list_pids = self.lib_right.get_projects_with_access(self.get_user_id()) 

138 project_id = None 

139 if len(list_pids) > 0 and "id" in list_pids[0]: 

140 project_id = list_pids[0]["id"] 

141 return project_id, len(list_pids) 

142 

143 def has_access_to_this_facture(self, path = "/static/factures", filename = "facture.pdf"): 

144 import os 

145 path_facture = os.path.join(path, filename) 

146 project_id = self.lib_user_data_internal.select_project_id_from_facture(path_facture) 

147 role = self.lib_right.get_role_on_project(self.user_id, project_id) 

148 return role != None 

149 

150 def has_access_to_this_document(self, filepath): 

151 import os 

152 # 192.168.1.33 - - [05/Nov/2025 18:53:50] "GET /static/temp/workarea_anon/output_wfrom_api_d_0511_nov_16/id_0_nb_1_d_20251104_4c4d85e630e8524e16cf3bda36cdf90e8dff168b986dc178dfaf776a4ddec6ee.docx HTTP/1.1" 304 - 

153 import re 

154 # parse hit from filepath : ie 64 hexadecimal caracter before .docx 

155 pattern = r'([a-f0-9]{64})\.docx' 

156 if filepath[-5:] != ".docx": 

157 return False 

158 if len(filepath) < 64 + 5: 

159 return False 

160 potential_hex = filepath[-(64 + 5):-5] 

161 all_info = self.lib_user_data_internal.load_data_audit(col_csv = "project_id", hash_id_treatment=potential_hex) 

162 project_id = all_info["project_id"] if "project_id" in all_info else -1 

163 role = self.lib_right.get_role_on_project(self.user_id, project_id) 

164 return role != None 

165 

166 def get_user(self): 

167 return self.user 

168 

169 def delete_user(self): 

170 return self.lib_user_data_internal.audit_or_delete_user(self.user_id, also_delete=True) 

171 

172 def audit_user(self): 

173 return self.lib_user_data_internal.audit_or_delete_user(self.user_id) 

174 

175 def connect_with_mail_validated_by_password(self, password): 

176 pass 

177 

178 def get_project(self, user = None, verbose = False): 

179 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(user) 

180 if info_user_from_mail != None: 

181# deprecated on 27-10-24 

182# message = "No user, no project, please login on openpromptia.com !" 

183# print(message) 

184# return [] 

185# else : 

186 self.schema_user_documents = info_user_from_mail["schema_user_documents"] 

187 self.user_id = info_user_from_mail["id"] 

188 

189 list_projects = self.lib_user_data_internal.get_projects_from_owner(self.user_id) 

190 # map_id_table_documents = {x["id"] : x["table_documents"] for x in list_projects} 

191 # map_project_metatable_info = self.lib_data.get_meta_info_table_documents(map_id_table_documents) 

192 if verbose: 

193 print(str(list_projects)) 

194 else : 

195 print("Loaded config of " + str(len(list_projects)) + " ! ") 

196 return list_projects 

197 

198 # - [ ] TODO VR ne porte pas bien son nom ! et oui 18-8-23 

199 # et depend de configuration => a sans doute disparu 

200 def create_projects(self, description, otp_as_hash): 

201 

202 info_setup_project = self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, description, otp_as_hash) 

203 

204 return info_setup_project 

205 

206 def truncate_project_documents(self, project_id, kill_all = False, verbose = False): 

207 info_project = self.lib_user_data_internal.get_project_info(project_id) 

208 self.user_id = info_project["owner_id"] if "owner_id" in info_project else -1 

209 if "table_documents" not in info_project: 

210 return None 

211 return self.lib_user_data_internal.truncate(info_project["table_documents"], kill_all=kill_all, verbose=verbose) 

212 

213 def get_project_info(self, project_id, with_facture = False, is_validated = None): 

214 

215 import logging 

216 logger = logging.getLogger() 

217 logger.info("Before getting self.lib_data.get_project_info ") 

218 

219 if self.lib_user_data_internal == None or not str(project_id).isnumeric(): 

220 print(" Wrong configuration or pid : " + str(project_id)) 

221 return {} 

222 

223 info_project = self.lib_user_data_internal.get_project_info(project_id) 

224 logger.info(" info_project : " + str(info_project)) 

225 

226 meta_info = {} 

227 try: 

228 meta_info = self.lib_user_data_internal.get_meta_info_project(info_project["table_documents"]) 

229 except Exception as e: 

230 print(str(e)) 

231 print("Too much voilà ?") 

232 

233 logger.info(" nb_documents : " + str(meta_info)) 

234 

235 info_project["number_documents"] = meta_info["number_documents"] if "number_documents" in meta_info else 0 

236 info_project["latest_update"] = meta_info["latest_update"] if "latest_update" in meta_info else None 

237 

238 info_project["size"] = "TO ESTIMATE" 

239 

240 # info_project["cost"] = "TO ESTIMATE" 

241 

242 if "table_documents" in info_project: 

243 info_project["command"] = """ 

244 cd $GITSAFIA/prompt/python/lib/import_util/lib_import_retrieval; 

245 export PYTHONPATH=`pwd`/../../..:`pwd`; 

246 python3.10 scripts/process_json/process_json.py --project_id=""" + str(project_id) + """ --pg_table=""" + info_project["table_documents"] + """ --filepath=../../../io/Fotonower_Issues.json 

247 """ 

248 

249 info_project["created_at"] = info_project["created_at"].strftime("%m/%d/%Y %H:%M:%S") if "created_at" in info_project else "" 

250 info_project["modified_at"] = info_project["modified_at"].strftime("%m/%d/%Y %H:%M:%S") if "modified_at" in info_project else None 

251 

252 if with_facture: 

253 try : 

254 info_project["facture"] = self.lib_user_data_internal.get_facture_from_project(project_id, is_validated) 

255 except Exception as e: 

256 print("Error getting facture : " + str(e)) 

257 info_project["facture"] = [] 

258 

259# Project Name</th> 

260# <th>Number of Documents</th> 

261# <th>Size</th> 

262# <th>Cost</th> 

263# <th>Command 

264 

265 return info_project 

266 

267 def update_project_info(self, project_id, info_json): 

268 print("TODO check right ") 

269 return self.lib_user_data_internal.update_project_info(project_id, info_json) 

270 

271 

272 def update_project_costs(self, project_id, model, nb_new_token): 

273 add_cost = self.ce.compute_cost_search(nb_new_token, model) 

274 

275 self.lib_user_data_internal.add_cost_to_project(project_id, add_cost) 

276 

277 def create_project(self, input, otp_as_hash = None): 

278 project_name = input["project_name"] 

279 print(str(input)) 

280 

281 if otp_as_hash == None: 

282 import uuid 

283 otp_as_hash = str(uuid.uuid4()).replace("-", "") 

284 

285 # - [ ] TODO VR refacto 18-8-23 : one want's to use the project 

286 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(self.user) 

287 if info_user_from_mail != None: 

288 self.schema_user_documents = info_user_from_mail["schema_user_documents"] #"schema_user_documents" 

289 else : 

290 print("Internal error") 

291 

292 return self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, project_name, otp_as_hash) 

293 

294 def insert_query_log(self, user_id = -1, project_id = -1, endpoint = "/", request_method = "GET", 

295 request_body = "", ip_address = "0.0.0.0", elapsed_time = 0): 

296 import logging 

297 logger = logging.getLogger() 

298 logger.info("inside insert_query_log from lss ") 

299 logger.info(str(request_body)) 

300 query_id = self.lib_user_data_internal.insert_query_log(project_id, user_id, endpoint, request_method = request_method, 

301 request_body = request_body, ip_address = ip_address, 

302 elapsed_time = elapsed_time) 

303 return query_id 

304 

305 def update_query_end(self, query_id, response_status = 200, response_body = "", 

306 elapsed_time = 0, cost = 0, detailed_time = {}): 

307 if query_id == -1: 

308 return {"nothing" : "recorded"} 

309 self.lib_user_data_internal.update_query_end(query_id, 

310 response_status = response_status, 

311 response_body = response_body, 

312 elapsed_time = elapsed_time, 

313 cost = cost, 

314 detailed_time = detailed_time) 

315 return {} 

316 

317 def get_queries(self, project_id): 

318 return self.lib_user_data_internal.get_queries(project_id) 

319 

320 def resume_all_project(self, project_id, 

321 limit = 10000, 

322 nb_cluster_input = 10, 

323 metric = "euclidean", 

324 verbose = True, 

325 strat = "default"): 

326 print("NEED RIGHT") 

327 

328 info_project = self.get_project_info(project_id) 

329 

330 table_documents = info_project["table_documents"] 

331 

332 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents) 

333 if not check_table_exists: 

334 print("l310 ERROR treated as warning Table " + table_documents + " does not exists, please create it first !") 

335 document_datas = [] 

336 else: 

337 document_datas = self.lib_user_data_internal.get_documents(table_documents) 

338 

339 print(" How many documents : " + str(len(document_datas))) 

340 

341 map_group_by_documents = self.group_by_documents(document_datas) 

342 

343 if len(map_group_by_documents) == 1: 

344 print("We will use one document strategy (potentially long) ") 

345 else : 

346 message_error = """ 

347 Can't resume this project with multiple documents, please delete all the element not associated to one document before requiring again a resume (until this is developped !) . 

348 Maybe you have a query and an empty result that is useless, delete it in documents.html page. 

349 """ 

350 print("ERROR : we need to develop the multiple document resume strategy returning ! Please use stat button first, because this can occurs some costs ! ") 

351 

352 print("NOW WE GO THROUGH STRAT KMEAN !") 

353 

354 filename, list_nn_centroids, list_doc_centroids, list_example_media, misc_info = self.kmean_2d(list_project_ids = [project_id], 

355 nb_project = 1, 

356 nb_cluster_input = nb_cluster_input, 

357 verbose = verbose, 

358 limit = limit, 

359 filename = "temp/img.png", 

360 metric = metric) 

361 

362 # - [ ] TODO VR 28-7-23 : prepare the prompt 

363 preprompt = "Merci de faire un résumé des éléments suivant en y faisant référence " 

364 input_text = "" 

365 for doc in list_doc_centroids: 

366 content = doc["content"] 

367 id = doc["id"] 

368 input_text += "-----\n" + id + "------\n" + content 

369 

370 

371 input = {} 

372 input["preprompt"] = preprompt 

373 input["text"] = input_text 

374 list_datou_step = ["request_gpt"] 

375 list_inputs = [input] 

376 

377# from lib.datou.datou_exec import datou_exec 

378# from auth.lib_auth import get_datou_exec_context_as_complete_param_json 

379# from auth.lib_conf_system import lcs_global_singleton 

380# privacy = False 

381# user = self.get_user() 

382# complete_param_json = get_datou_exec_context_as_complete_param_json(user, verbose, privacy, 

383# openai_token=lcs_global_singleton.get_openai_api_key(), 

384# lss=self, 

385# project_id=project_id) 

386# result = datou_exec(list_datou_step, complete_param_json, input) 

387 

388 audit = "We have multiple documents so we use kmean strategy on chunk !" 

389 

390 # return result 

391 return audit, list_inputs, list_datou_step 

392 

393 document = list(map_group_by_documents.keys())[0] 

394 return self.resume_one_document(map_group_by_documents[document]) 

395 

396 def stat_all_one_project(self, project_id, limit = 10000, offset = 0, 

397 order_by_recent = True): 

398 

399 info_project = self.get_project_info(project_id) 

400 

401 table_documents = info_project["table_documents"] if "table_documents" in info_project else None 

402 

403 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents) 

404 

405 if not check_table_exists: 

406 print("l382 ERROR treated as warning Table " + str(table_documents) + " does not exists, please create it first !") 

407 document_datas = [] 

408 else : 

409 document_datas = self.lib_user_data_internal.get_documents(table_documents, 

410 limit = limit, offset = offset, 

411 order_by_recent = order_by_recent) 

412 

413 print(" How many documents : " + str(len(document_datas))) 

414 

415 map_group_by_documents = self.group_by_documents(document_datas) 

416 

417 map_audit_doc = [] 

418 

419 if len(map_group_by_documents) == 1: 

420 print("We will use one document strategy (potentially long) ") 

421 else : 

422 message_error = """ 

423 Can't resume this project with multiple documents, please delete all the element not associated to one document before requiring again a resume (until this is developped !) . 

424 Maybe you have a query and an empty result that is useless, delete it in documents.html page. 

425 """ 

426 print("ERROR : we need to develop the multiple document resume strategy returning ! Please use stat button first, because this can occurs some costs ! ") 

427 

428 for k in map_group_by_documents: 

429 audit, list_inputs, list_datou_step = self.parse_document_prepare_input(map_group_by_documents[k]) 

430 # TODO remove the three following lines 

431 kunslash = k.replace("/", "") 

432 kunslash = kunslash.replace("@", "") 

433 kunslash = kunslash.replace(".", "") 

434 from lib.lib_util import replace_non_alpha_with_underscore 

435 kunslash = replace_non_alpha_with_underscore(kunslash) 

436 audit["document_id"] = k 

437 audit["kunslash"] = kunslash 

438 from lib.lib_util import humanize_modified_time 

439 audit["created_at"] = audit["created_at"] 

440 audit["created_at_h"] = humanize_modified_time(audit["created_at"].replace(tzinfo=None)) 

441 map_audit_doc.append(audit) 

442 audit["title"] = parsed_title(audit["begin_content"]) 

443 

444 

445 from dateutil import parser 

446 dates = [parser.parse(item['created_at']) if isinstance(item['created_at'], str) else item['created_at'] for 

447 item in map_audit_doc] 

448 # Trouver la date la plus récente 

449 import datetime 

450 most_recent_date = max(dates) if len(dates) > 0 else datetime.datetime.strptime("2014-09-06 00:00:00", '%Y-%m-%d %H:%M:%S') 

451 print(f"La date la plus récente est : {most_recent_date.strftime('%Y-%m-%d %H:%M:%S')}") 

452 

453 from lib.lib_util import humanize_modified_time 

454 most_recent_date_h = humanize_modified_time(most_recent_date.replace(tzinfo=None)) 

455 info_project["most_recent_date_h"] = most_recent_date_h 

456 info_project["most_recent_date"] = most_recent_date 

457 

458 map_audit_doc = sorted(map_audit_doc, key=lambda x: x['created_at'],reverse=True) 

459 return map_audit_doc, info_project 

460 

461# document = list(map_group_by_documents.keys())[0] 

462# return "TODO" 

463 

464# def group_by_documents(documents: List[Document]) -> Dict[str, List[Document]]: 

465 def group_by_documents(self, documents): 

466 """ 

467 Cette fonction prend une liste de documents et les regroupe par un certain critère, par exemple par sujet ou par auteur. 

468 Elle renvoie un dictionnaire où les clés sont les critères de regroupement et les valeurs sont des listes de documents. 

469 """ 

470 map_doc_chunk = {} 

471 for document in documents: 

472 document_source = document["document_id"] 

473 document_id = document["id"] 

474 if document_source not in map_doc_chunk: 

475 map_doc_chunk[document_source] = {} 

476 if document_id.startswith(document_source): 

477 chunk_id_str = document_id[len(document_source) + 1:] 

478 if not chunk_id_str.isnumeric() or document_id != document_source + "_" + chunk_id_str : 

479 print("Unexpected internal error doc 2, can't manage this document !") 

480 continue 

481 

482 chunk_id = int(chunk_id_str) 

483 if chunk_id in map_doc_chunk[document_source]: 

484 print("Unexpected internal error doc 3, can't manage this document !") 

485 continue 

486 

487 map_doc_chunk[document_source][chunk_id] = document 

488 

489 else : 

490 print("Unexpected internal error doc 1, can't manage this document !") 

491 continue 

492 

493 return map_doc_chunk 

494 

495 def resume_one_document(self, map_one_document_id_chunk): 

496 audit, list_inputs, list_datou_step = self.parse_document_prepare_input(map_one_document_id_chunk) 

497 

498 return audit, list_inputs, list_datou_step 

499 # return "WIP " + str(audit) + " " + str(len(list_inputs)) + "\n " + str(list_inputs) 

500 

501 # Pretreat docs 

502 def parse_document_prepare_input(self, map_doc_chunk, max_nb_token_per_input = 100000) : #8000): #4096 8192): 

503 """ 

504 Cette fonction estime le nombre de groupes nécessaires pour un seul document. 

505 Elle prend un document et renvoie un nombre entier. 

506 """ 

507 

508 preprompt = "Merci de faire un résumé en listant les points importants et les taches à accomplir de la transcription de l'AG de Fotonower :" 

509 

510 import tiktoken 

511 tokenizer = tiktoken.get_encoding("cl100k_base") # The encoding scheme to use for tokenization 

512 

513 list_inputs = [] 

514 list_datou_step = [] 

515 list_nb_token_inputs = [] 

516 

517 current_input = "" 

518 current_input_nb_token = 0 

519 nb_tokens = 0 

520 size_content_total = 0 

521 begin_content = "" 

522 begin_chunk = None 

523 created_at = None 

524 for chunk_id in map_doc_chunk: 

525 created_at = map_doc_chunk[chunk_id]["created_at"] 

526 one_content = map_doc_chunk[chunk_id]["content"] 

527 if begin_content == "" or chunk_id < begin_chunk: 

528 begin_content = one_content 

529 begin_chunk = chunk_id 

530 size_content_total += len(one_content) 

531 tokens = tokenizer.encode(one_content, disallowed_special=()) 

532# print(str(len(tokens))) 

533 nb_tokens += len(tokens) 

534 if current_input_nb_token + len(tokens) < max_nb_token_per_input: 

535 current_input += one_content 

536 current_input_nb_token += len(tokens) 

537 else : 

538# list_inputs.append(current_input) 

539 list_inputs.append({"text" : current_input, "preprompt" : preprompt}) 

540 list_nb_token_inputs.append(current_input_nb_token) 

541 current_input = one_content 

542 current_input_nb_token = len(tokens) 

543 

544 list_inputs.append({"text" : current_input, "preprompt" : preprompt}) 

545 list_nb_token_inputs.append(current_input_nb_token) 

546 

547 # print(str(list_nb_token_inputs)) 

548 

549 audit = {"size_document" : size_content_total, 

550 "nb_chunks" : len(map_doc_chunk), 

551 "nb_tokens" : nb_tokens, 

552 "nb_input" : len(list_inputs), 

553 "created_at" : created_at, 

554 "begin_content" : begin_content} 

555 

556 

557 list_datou_step.append("request_gpt") 

558# list_datou_step.append("result_to_json") 

559# list_datou_step.append("import_json") 

560 

561 return audit, list_inputs, list_datou_step 

562 

563 # Fonction de construction de données kmeans 2D et multidimensionnels (à refactorer plus tard Q1-2024) 

564 def kmean_2d(self, list_project_ids, 

565 nb_project = 0, 

566 nb_cluster_input = 0, 

567 verbose = False, limit = 0, 

568 filename = "temp/img.png", 

569 metric = "", 

570 dim = 2, 

571 gpu_enable = False, 

572 nb_step = 10, 

573 keep_fvs_running = False, 

574 port = None, 

575 pdt = None, 

576 build_fvs_desc = True, 

577 save_cluster = "", 

578 load_cluster = "", 

579 launch_fvs = True, 

580 fvs_interface = None): 

581 

582 def cast_vector(row): 

583 return np.array(list(map(lambda x: x.astype('double'), row))) 

584 

585 import numpy as np 

586 import random 

587 

588 if dim == 2: 

589 list_id_local_desc_to_project_id_doc = [] 

590 

591 data_to_concat = () 

592 label_to_concat = () 

593 nb_data_total = 0 

594 idx = 0 

595 for project_id in list_project_ids: 

596 map_descs = self.load_descs(project_id=project_id, limit = limit) 

597 

598 for d in map_descs: 

599 list_id_local_desc_to_project_id_doc.append((project_id, d["id"], d["content"])) 

600 

601 if len(map_descs) == 0: 

602 continue 

603 

604 list_desc_str = list(map(lambda x : x["embedding"], map_descs)) 

605 # list_desc = list(map(lambda x : list(map(float, x.lstrip("[").rstrip("]").split(","))), list_desc_str)) 

606 

607 list_desc = [] 

608 for emb in list_desc_str: 

609 # print(" WTF type ") 

610 type_list_desc = str(type(emb)) 

611 # print(" type_list_desc : " + str(type_list_desc)) 

612 if type_list_desc == "<class 'str'>": 

613 sys.stdout.write("s") 

614 list_desc.append(list(map(float, emb.lstrip("[").rstrip("]").split(",")))) 

615 else : 

616 sys.stdout.write("n") 

617 # print(" emb : " + str(emb)) 

618 list_desc.append(cast_vector(emb)) 

619 

620 # - [ ] Having kmean in multi-dimension Bouh :-( 

621 list_desc_2d = list(map(lambda x : x[:2], list_desc)) 

622 

623 nb_data_one_proj = len(list_desc_2d) 

624 

625 data_to_concat += (list_desc_2d,) 

626 label_to_concat += ([idx] * nb_data_one_proj,) 

627 nb_data_total += nb_data_one_proj 

628 

629 idx += 1 

630 if nb_project > 0 and idx > nb_project: 

631 break 

632 

633 nb_clusters = len(list_project_ids) 

634 if nb_cluster_input == 0: 

635 nb_clusters = idx 

636 else : 

637 nb_clusters = nb_cluster_input 

638 from lib.lib_ml.lib_kmean_pyfvs import plot_classif_region, KmeanGB 

639 # model = KmeanGB(nb_cluster= nb_clusters, metric= "KL", nb_sample= 100) 

640 

641 if len(data_to_concat) == 0: 

642 return None, None, None, None, {} 

643 

644 data = np.concatenate(data_to_concat, axis=0) 

645 label = np.concatenate(label_to_concat, axis=0) 

646 index_shuffle = np.arange(0, nb_data_total) 

647 

648 

649 # Bouh :-( 

650 # data = data[:2] 

651 

652 # data = np.concatenate((data1, data2, data3), axis=0) 

653 # label = np.concatenate(([0]*data_size, [1]*data_size, [2]*data_size, [3]*data_size), axis=0) 

654 # index_shuffle = np.arange(0, nb_clusters*data_size) 

655 

656 np.random.shuffle(index_shuffle) 

657 

658 data = data[index_shuffle, :] 

659 

660 np.random.seed(9) 

661 

662 shuffle = random.sample(range(data.shape[0]), nb_clusters) 

663 

664 centroids = data[shuffle] 

665 

666 assigned_centroids = np.zeros(len(data), dtype=np.int32) 

667 

668 print(centroids) 

669 

670 from sklearn.cluster import KMeans 

671 

672 

673 # (reslt_cent, reslt_label, _) = kmeans(data, k=nb_clusters, nb_sample=100) 

674 # labels = np.unique(reslt_label) 

675 

676 if metric == "": 

677 c_model = KMeans(n_clusters= nb_clusters) 

678 outfile, list_legends, list_doc_centroids = plot_classif_region(data, c_model, label[index_shuffle], filename, 

679 list_ids_proj_doc_id=list_id_local_desc_to_project_id_doc) 

680 else : 

681 

682 

683 #"euclidean", "KL", "IS", "EX", "LX" 

684 

685 new_model = KmeanGB(nb_cluster= nb_clusters, metric= metric, nb_sample= 100) 

686 

687 # new_model = KmeanGB(nb_cluster=nb_clusters, nb_sample=100) 

688 outfile, list_legends, list_doc_centroids = plot_classif_region(data, new_model, label[index_shuffle], filename, 

689 list_ids_proj_doc_id=list_id_local_desc_to_project_id_doc) 

690 

691 # list_indexes 

692 # list_indexes 

693 

694 list_cluster_and_inertia = [{"nb" : 0, "inertia" : 0, "pdt_dyn" : 0}] * len(list_doc_centroids) 

695 misc_info = {} 

696 

697 else : 

698 if pdt != None: 

699 pdt = int(pdt) 

700 else : 

701 pdt = 1888 # Using default value ! 

702# print(" We have dim of : " + str(dim) + " with this data of this shape : " + str(data.shape)) 

703 from lib.lib_ml.lib_kmean_pyfvs import kmean_fvs_dim 

704 res = kmean_fvs_dim(data = None, nb_clusters = nb_cluster_input, 

705 list_project_ids = list_project_ids, 

706 limit = limit, 

707 dim_input = dim, 

708 lpgss = self.lib_user_data_internal, 

709 hostname="/", 

710 gpu_enable = gpu_enable, 

711 nb_step = nb_step, 

712 keep_fvs_running = keep_fvs_running, 

713 port = port, 

714 photo_desc_type = pdt, 

715 build_fvs_desc = build_fvs_desc, 

716 save_cluster = save_cluster, 

717 load_cluster = load_cluster, 

718 launch_fvs = launch_fvs, 

719 verbose = verbose, 

720 fvs_interface = fvs_interface) 

721 outfile, list_legends, list_doc_centroids, list_cluster_and_inertia, misc_info = res 

722 

723 return outfile, list_legends, list_doc_centroids, list_cluster_and_inertia, misc_info 

724 

725 

726 

727# Very useless ! 

728# from lib.lib_safia import safia_desc_export 

729# for project_id in list_project_ids: 

730# offset_media_id, offset_data_file_id = safia_desc_export(project_id=project_id, lpgss=self.lib_user_data_internal, 

731# verbose = verbose, out_folder=out_file, 

732# photo_desc_type=photo_desc_type, 

733# offset_media_id = offset_media_id, 

734# offset_data_file_id = offset_data_file_id) 

735# offset_data_file_id += 1 

736# nb_char = len(str(offset_media_id)) 

737# first_digit = int(str(offset_media_id)[0]) 

738# offset_media_id_str = str(first_digit) + ('0' * nb_char) 

739# offset_media_id = int(offset_media_id_str) 

740# print("- [ ] TODO would be good to offset offset_media_id to a next round number") 

741 

742# from lib.lib_ml.lib_kmean_pyfvs import kmean_all_projects 

743# print(" Now connect csv ") 

744# create_csv(out_file) 

745 

746 def get_datou_step_user(self, #id = None, 

747 list_datou_ids = []): 

748 return self.lib_user_data_internal.get_datou_step_user(self.user_id, list_datou_ids) 

749 

750 # TODO VR could be in datou lib ? 

751 def update_datou(self, datou): 

752 # - [ ] TODO VR 23-7-23 : check right 

753 

754 # Do we want to manage DETAIL: Key (owner_id, filename)=(1, extract_expert_just_prompt_internal_datou_one_document_json) already exists. 

755 # for save datou 

756 

757 # Managing format output complet 

758 if "datou" in datou and len(datou["datou"]) == 1: 

759 datou = datou["datou"][0] 

760 

761 datou_id_to_update = datou["id"] if "id" in datou and datou["id"] != "" else None 

762 datou_name = datou["name"] if "name" in datou else datou["filename"] # TODO VR backward compatibility while chaning filename to name 

763 datou_is_public = datou["public"] if "public" in datou else None 

764 datou_id = self.lib_user_data_internal.update_datou(self.user_id, datou_name, datou_id_to_update, datou_is_public) 

765 

766 print(" datou_id : " + str(datou_id)) 

767 

768 steps = datou["list_steps"] if "list_steps" in datou else datou["steps"] # TODO VR backward compatibility while chaning filename to name 

769 

770 for step in steps: 

771 step["datou_id"] = datou_id 

772 

773 steps_ids = self.lib_user_data_internal.update_datou_step(steps) 

774 

775 print("TODO") 

776 return {"log" : "Inserted", "new_step_ids" : steps_ids, "mtr_datou_id" : datou_id, "mtr_datou_name" : datou_name} 

777 

778 # - [ ] TODO rename load configuration project 

779 def load_configuration(self, with_class_name = False): 

780 if self.lib_user_data_internal != None: 

781 list_configuration = self.lib_user_data_internal.load_configuration(self.user_id, self.project_id) 

782 else : 

783 list_configuration = [] 

784 map_conf_selected = {} 

785 for l in list_configuration: 

786 map_conf_selected[l["layer_type"]] = l["configuration"] 

787 

788 if map_conf_selected == {}: 

789 map_conf_selected = hc_default_map_conf_selected 

790 

791 # TODO refacto on pourrait faire un join, mais ce n'est pas sur non plus 

792 map_conf_layer_class_name = {} 

793 if with_class_name: 

794 if self.lib_user_data_internal != None: 

795 try : 

796 list_layer_type_options_and_class_names = self.lib_user_data_internal.get_configuration_option() 

797 for one_conf_options in list_layer_type_options_and_class_names: 

798 layer_type = one_conf_options["layer_type"] 

799 option_selected = map_conf_selected[layer_type] 

800 list_options_this_layer = one_conf_options['options'].split(",") 

801 id_option_selected = list_options_this_layer.index(option_selected) 

802 list_class_name_this_layer = one_conf_options['list_class_name'].split(",") 

803 list_module_name_this_layer = one_conf_options['list_module_name'].split(",") 

804 if id_option_selected < len(list_class_name_this_layer) and id_option_selected >= 0 : 

805 class_name = list_class_name_this_layer[id_option_selected] 

806 module_name = list_module_name_this_layer[id_option_selected] 

807 else : 

808 class_name = None 

809 module_name = None 

810 map_conf_layer_class_name[layer_type] = {'class_name' : class_name, 'module_name' : module_name} 

811 except Exception as e: 

812 print(" ERROR in load_configuration for layer " + str(e)) 

813 map_conf_layer_class_name = hc_default_map_conf_layer_class_name 

814 

815 else : # on est dans le cas ou map_conf_layer_class_name == {}: 

816 map_conf_layer_class_name = hc_default_map_conf_layer_class_name 

817 

818 if map_conf_layer_class_name == {}: 

819 print("Error unexpected(since error was catched above) treated as warning !") 

820 map_conf_layer_class_name = hc_default_map_conf_layer_class_name 

821 

822 

823 return map_conf_selected, map_conf_layer_class_name 

824 

825 def update_conf_project(self, project_id, data_json, replace = False): 

826 # - [ ] TODO VR check right 

827 ret = self.lib_user_data_internal.update_configuration_project(project_id, data_json, replace = replace) 

828 return ret 

829 

830 def load_conf_project(self, project_id, key = None): 

831 # - [ ] TODO VR check right 

832 if project_id == None: 

833 print("Error project_id is None !") 

834 return {} 

835 ret = self.lib_user_data_internal.load_conf_project(project_id, key = key) 

836 if type(ret) == dict: 

837 ret["project_id"] = project_id 

838 return ret 

839 

840 def save_document(self, save_document_data, project_id, openai_token = None, verbose = False): 

841 print(" CHECK RIGHT OF " + str(self.user_id) + " on " + str(project_id)) 

842 # VR 22-11-23 : implémentation meilleur des droits 

843 has_access = self.lib_right.get_role_on_project(self.get_user_id(), project_id) 

844 if has_access : 

845 info = self.get_project_info(project_id) 

846 table_documents = info["table_documents"] 

847 json_to_save = [{ 

848 "id" : save_document_data["document_id"], 

849 "text" : save_document_data["document_content"] 

850 }] 

851 

852 # TODO VR 13-12 : pas ideal mais un bug qui me suit 

853 self.lib_user_data_internal.delete_document(table_documents, save_document_data["document_id"]) 

854 

855 import asyncio 

856 from lib.import_util.lib_import_retrieval.scripts.process_json.process_json import process_json_dump_aux 

857 total_nb_token, used_model = asyncio.run(process_json_dump_aux(json_to_save, {}, False, False, None, self.lib_user_data_internal, openai_token, table_documents, verbose = verbose)) 

858 

859 return total_nb_token, used_model 

860 else : 

861 return 0, "no_access_c_pas_un_model" # VR 22-11-23 : tODO meilleure gestion des erreurs sur les droits 

862 

863# def get_chunk_id(self): 

864 

865 # TODO rename load_document_content 

866 def load_document(self, document_id_input, project_id, chunk_id, verbose = False): 

867 print(" CHECK RIGHT OF " + str(self.user_id) + " on " + str(project_id)) 

868 # VR 22-11-23 : implémentation meilleur des droits 

869 has_access = self.lib_right.get_role_on_project(self.get_user_id(), project_id) 

870 if has_access : 

871 info = self.get_project_info(project_id) 

872 table_documents = info["table_documents"] 

873 print("TODO verify right access") 

874 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents) 

875 if not check_table_exists: 

876 print("l849 ERROR treated as warning Table " + str(table_documents) + " does not exists, please create it first !") 

877 documents = [] 

878 else: 

879 documents = self.lib_user_data_internal.get_documents(table_documents, document_id_input, chunk_id) 

880 total_content = "" 

881 if len(documents) > 0: 

882 document_id = documents[0]["document_id"] 

883 if document_id_input != None: 

884 documents = sorted(documents, key=lambda x : int(x["id"][len(document_id) + 1:])) 

885 for d in documents: 

886 total_content += d["content"] 

887 return total_content 

888 else : 

889 return "No access : user " + str(self.get_user_id()) + " on pid " + str(project_id) 

890 

891 def load_descs(self, project_id, limit = 0, offset = 0): 

892 

893 info_project = self.get_project_info(project_id) 

894 table_documents = info_project["table_documents"] 

895 print("TODO verify right access") 

896 list_descs = self.lib_user_data_internal.get_descs(table_documents, limit = limit, offset = offset) 

897 

898 if len(list_descs) > 0: 

899 print(" Let's check the type : !") 

900 from lib.lib_util import display_real_dict_row_shorten 

901 print(" nb desc : " + str(len(list_descs))) 

902 display_real_dict_row_shorten(list_descs[:2]) 

903 

904 return list_descs 

905 

906 from typing import List 

907 class Document(): 

908 pass 

909 class Group(): 

910 pass 

911 class Summary(): 

912 pass 

913 

914 def group_for_one_request(document: Document, number: int) -> List[Group]: 

915 """ 

916 Cette fonction crée un certain nombre de groupes pour une seule requête. 

917 Elle prend un document et un nombre de groupes à créer, et renvoie une liste de groupes. 

918 """ 

919 pass 

920 

921 def recursive_summary(group: Group, depth: int = 0) -> Summary: 

922 """ 

923 Cette fonction crée un résumé d'un groupe de documents en utilisant une méthode récursive. 

924 Elle prend un groupe de documents et une profondeur de récursion, et renvoie un résumé. 

925 """[l] 

926 pass 

927 

928 def parallel_summary(groups: List[Group]) -> List[Summary]: 

929 """ 

930 Cette fonction crée des résumés pour plusieurs groupes en parallèle. 

931 Elle prend une liste de groupes et renvoie une liste de résumés. 

932 """ 

933 pass 

934 

935 def merge_summaries(summaries: List[Summary]) -> Summary: 

936 """ 

937 Cette fonction fusionne plusieurs résumés en un seul. 

938 Elle prend une liste de résumés et renvoie un résumé. 

939 """ 

940 pass 

941 

942 def get_data_user(self, user, user_id_hack = None, verbose = False): 

943 

944# user_id_hack = self.user_id 

945 

946 # Non, il faut se connecter d'abord et avoir du coup un user_id sous la main ! 

947 

948 if user_id_hack != None: 

949 projects = self.get_projects(user_id_hack) 

950 else : 

951 projects = self.get_project(user) # renommer en get_projects je crois 

952 

953 from lib.lib_util import humanize_modified_time 

954 for p in projects: 

955 p["modified_at_h"] = humanize_modified_time(p["modified_at"]) 

956 

957 print(" projects : " + str(len(projects))) 

958 if len(projects) > 0: 

959 print(" the first project is : " + str(projects[0])[:100]) 

960 

961 return projects 

962 

963 # should be private, or somewhere else ? 

964 def append_step_to_datou(self, list_datou, list_step, verbose = False): 

965 

966 map_datou = {datou['id']: datou for datou in list_datou} 

967 

968 for s in list_step: 

969 mtd_id = s["mtr_datou_id"] 

970 if mtd_id not in map_datou: 

971 print("Unexpected behavior, internal error") 

972 else : 

973 if "steps" not in map_datou[mtd_id]: 

974 map_datou[mtd_id]["steps"] = [] 

975 map_datou[mtd_id]["steps"].append(s) 

976 for mtd_id in map_datou: 

977 list_step_to_sort = map_datou[mtd_id]["steps"] if "steps" in map_datou[mtd_id] else [] 

978 if verbose : 

979 print(" list_step_to_sort : " + str(list_step_to_sort)) 

980 list_step_sorted = list(sorted(list_step_to_sort, key=lambda x: x['order_step'], reverse=False)) 

981 if verbose : 

982 print(" list_step_sorted : " + str(list_step_sorted)) 

983 map_datou[mtd_id]["steps"] = list_step_sorted 

984 

985 import json #, asc # didin't manage to install 

986 # TODO vr valider ces tests pour intégration des data de datous dans l'interface html 

987 for k in map_datou: 

988 # map_datou[k]["data_str"] = ast.literal_eval(json.dumps(map_datou[k])) 

989 map_datou[k]["data_str"] = json.dumps(map_datou[k], indent=4, sort_keys=True, default=str) 

990 

991 list_datou_with_step = [map_datou[k] for k in map_datou] 

992 

993 return list_datou_with_step 

994 

995 def get_datou_step_template(self): 

996 # - [ ] TODO VR 23-7-23 : check right 

997 return self.lib_user_data_internal.get_datou_step_template() 

998 

999 def get_datou(self, id = None, list_datou_ids = None, 

1000 instantiate_config_prepare = False, 

1001 dont_instantiate_config_prepare = False, # VR TODO audit-refacto True for editing of the configuration => why the hell do we want another default value ? 

1002 load_recursively_map_reduce = False, 

1003 project_id = None): # This is for execution 

1004 # - [ ] TODO VR 23-7-23 : check right : for datou owner it is in fact by default enforced ! 

1005 

1006 # Avoid modification for others invocation of this function https://stackoverflow.com/questions/1132941/least-astonishment-and-the-mutable-default-argument 

1007 if list_datou_ids == None: 

1008 list_datou_ids = [] 

1009 

1010 print(" list_datou_ids " + str(list_datou_ids) + " id : " + str(id)) 

1011 

1012 if id != None and int(id) not in list_datou_ids: 

1013 list_datou_ids.append(int(id)) 

1014 

1015 datou_user = self.lib_user_data_internal.get_datou_user(self.user_id, list_datou_ids) 

1016 datou_step_user = self.lib_user_data_internal.get_datou_step_user(self.user_id, list_datou_ids) 

1017 if id != None and not dont_instantiate_config_prepare: # on a pas vraiment envie de se taper toutes les instanciations si on charge tous les datous 

1018 # par contre par défaut on instancie ! 

1019# instantiate_config_prepare and 

1020 list_param_json = list(map(lambda x : x["param_json"], datou_step_user)) 

1021 from lib.util.lib_formal_conf import formal_conf_prepare 

1022 list_param_json_configured = formal_conf_prepare(list_param_json, lss = self, project_id = project_id) 

1023 for i, sub_json in enumerate(list_param_json_configured): 

1024 datou_step_user[i]["param_json"] = list_param_json_configured[i] 

1025 list_datou_with_step = self.append_step_to_datou(datou_user, datou_step_user) 

1026 return list_datou_with_step 

1027 

1028 def delete_datou(self, mtr_datou_id): 

1029 # - [ ] TODO VR 23-7-23 : check right : for datou owner it is in fact by default enforced ! 

1030 

1031 return self.lib_user_data_internal.delete_datou(mtr_datou_id, self.user_id) 

1032 

1033 def get_projects(self, user_id = 0): 

1034 list_projects = self.lib_right.get_projects_with_access(user_id) 

1035 map_id_projects_with_role = {x["id"] : x for x in list_projects} 

1036 list_ids = list(map(lambda x : x["id"], list_projects)) 

1037 list_project_with_name_and_role = [] 

1038 for pid in list_ids: 

1039 if not str(pid).isnumeric(): 

1040 print("Unexpected non numeric pid : " + str(pid)) 

1041 continue 

1042 info = self.lib_user_data_internal.get_project_info(pid) 

1043 role = None 

1044 if "role" in map_id_projects_with_role[pid]: 

1045 role = map_id_projects_with_role[pid]["role"] 

1046 info["role"] = role 

1047 

1048 # - [ ] TODO VR question Should we do this with ajax ? grrr just for VR grrr get_role_access_to_project 

1049 if role == "owner" or role == "admin": 

1050 list_user_ids_role = self.lib_right.get_all_role_access_to_project(project_id=pid) 

1051 list_user_ids = list(map(lambda x:x["user_id"], list_user_ids_role)) 

1052 # - [ ] TODO VR verify we take this from user ids 

1053 map_user_id_email = self.lib_right.get_email_from_user_ids(list_user_ids) 

1054 for d in list_user_ids_role: 

1055 user_id = d["user_id"] 

1056 if user_id in map_user_id_email: 

1057 d["mail"] = map_user_id_email[user_id] 

1058 else : 

1059 print("Missing email or hidden from user_id") 

1060 d["mail"] = "Undisclosed Email" 

1061 info["users"] = list_user_ids_role 

1062 info["nb_users"] = len(list_user_ids_role) 

1063 

1064 # - [ ] Should we also get the active invitation ? 

1065 

1066 import json 

1067 info["proj_data_str"] = json.dumps(info, default=str) 

1068 

1069 list_project_with_name_and_role.append(info) 

1070 

1071 #get_projects_from_owner(list_ids) 

1072 print("TODO, how to get the user_id, grrr !") 

1073 

1074 return list_project_with_name_and_role 

1075# return [{"id":777, "description":"Not Arsene Lupin", "role":"test_read"}] 

1076 

1077 def create_group(self, name): 

1078 return self.lib_right.create_group(self.user_id, name) 

1079 

1080 def get_group(self, role = "owner"): 

1081 list_group = self.lib_right.get_group(self.user_id, role = role) 

1082 return list_group 

1083 

1084 def remove_right_group(self, user_id_that_query, groupId, userId_to_remove): 

1085 which_right = self.lib_right.get_role_on_group(user_id_that_query, groupId) 

1086 if which_right == "admin" or which_right == "owner": 

1087 self.lib_user_data_internal.remove_role_group_to_user(groupId, userId_to_remove) 

1088 

1089 def remove_right(self, project_id, user_id_to_change): 

1090 role = self.lib_right.get_role_on_project(self.user_id, project_id) 

1091 if role == "owner" or role == "admin": 

1092 self.lib_right.remove_role_to_user(user_id_to_change, project_id) 

1093 else : 

1094 return "Not Admin Cant Change Role" 

1095 

1096 def create_invitation(self, project_id, group_id, 

1097 role, mail = None, host = "https://safia.rubbia.fr"): 

1098 if project_id != None: 

1099 is_admin = self.lib_right.is_project_admin(self.user_id, project_id) 

1100 if is_admin : 

1101 link = self.lib_right.create_invitation(self.user_id, project_id, role, mail, host) 

1102 if mail != None: 

1103 print("TODO need to send mail sometime !") 

1104 return {"link":link} 

1105 else : 

1106 return {} 

1107 else: 

1108 is_admin = self.lib_right.is_group_admin(self.user_id, group_id) 

1109 if is_admin : 

1110 link = self.lib_right.create_invitation(self.user_id, group_id, role, mail, host, 

1111 action="grant_group_role") 

1112 if mail != None: 

1113 print("TODO need to send mail sometime !") 

1114 return {"link": link} 

1115 else: 

1116 return {} 

1117 

1118 def use_invitation(self, token): 

1119 one_record = self.lib_right.get_invitation_info_from_token(token) 

1120 

1121 # TODO : need to check validity or at least not used 

1122 # TODO il faut en fait check le valid_until ainsi que le used_at et user_id 

1123 if one_record != None and one_record["user_id"] == None: 

1124 role = one_record["param_json"]["role"] 

1125 data_id = one_record["param_json"]["data_id"] 

1126 owner_id = one_record["owner_id"] 

1127 action = one_record["action"] 

1128 if action == "grant_project_role": 

1129 self.lib_right.add_role_project_to_user(self.user_id, data_id, role) 

1130 self.lib_right.update_invitation_used(one_record["id"], self.user_id) 

1131 elif action == "grant_group_role": 

1132 print(" TODO voila ") 

1133 self.lib_right.add_role_group_to_user(self.user_id, data_id, role) 

1134 self.lib_right.update_invitation_used(one_record["id"], self.user_id) 

1135 return("Please check your project list on page projects !") 

1136 else : 

1137 if one_record == None or "used_at" not in one_record: 

1138 return("Invitation already used, but no date ?") 

1139 else: 

1140 return("Invitation already used on " + str(one_record["used_at"])) 

1141 

1142 print("Hello !") 

1143 

1144 def get_project_by_shortname(self, shortlink): 

1145 return self.lib_user_data_internal.get_project_by_shortname(shortlink) 

1146 

1147 def get_conso(self, hash_id_treatment = None, output_type = "dict", # other type : pd 

1148 col_csv = "sum(nb_page) as nb_page, 0 as id", limit = 0, offset = 0, 

1149 condition_csv = "previous_month,gliding_month,current_month,previous_year,gliding_year,current_year", 

1150 project_id = None): 

1151# col_csv = "id,id_file,mtr_datou_id,created_at,launched_at,modified_at,deleted_at,valid_until,user_id,project_id,safia_doc_id,hash_id_treatment,cdn,input_data,nb_page,nb_modif,input_file,audit_resume,info_anon,info_customer,info_lab,info_cdn,info_back,info_date,info_suivi,info_action,info_usage_integration,info_ml,info_init,info_extract,info_consolidate,info_result,info_exec,info_page,info_stat", 

1152 

1153 if project_id == None: 

1154 project_id = self.project_id 

1155 

1156 

1157 list_condition_csv = list(condition_csv.split(",")) 

1158 

1159 res = {} 

1160 for one_cond_csv in list_condition_csv: 

1161 

1162 # A mon avis faut encapsuler tout cela dans une fonction util saxia ? pour l'utiliser dans les endpoint ainsi qu'en ajax 

1163 condition = [] 

1164 if one_cond_csv != "": 

1165 if one_cond_csv == "gliding_month": 

1166 one_condition = {"type" : "GREATER", 

1167 "variable" : "created_at", 

1168 "value" : "formula", 

1169 "formula" : "now() - interval '1 month'"} 

1170 condition.append(one_condition) 

1171 # condition = [" created_at > "] 

1172 elif one_cond_csv == "previous_month": 

1173 one_condition = {"type" : "EQUAL", 

1174 "variable" : "EXTRACT(MONTH FROM created_at)", 

1175 "value" : "formula", 

1176 "formula" : "EXTRACT(MONTH FROM now() - interval '1 month')"} 

1177 condition.append(one_condition) 

1178 one_condition = {"type" : "EQUAL", 

1179 "variable" : "EXTRACT(YEAR FROM created_at)", 

1180 "value" : "formula", 

1181 "formula" : "EXTRACT(YEAR FROM now() - interval '1 month')"} 

1182 condition.append(one_condition) 

1183 elif one_cond_csv == "current_month": 

1184 one_condition = {"type" : "EQUAL", 

1185 "variable" : "EXTRACT(MONTH FROM created_at)", 

1186 "value" : "formula", 

1187 "formula" : "EXTRACT(MONTH FROM now())"} 

1188 condition.append(one_condition) 

1189 one_condition = {"type" : "EQUAL", 

1190 "variable" : "EXTRACT(YEAR FROM created_at)", 

1191 "value" : "formula", 

1192 "formula" : "EXTRACT(YEAR FROM now())"} 

1193 condition.append(one_condition) 

1194 elif one_cond_csv == "previous_year": 

1195 one_condition = {"type" : "EQUAL", 

1196 "variable" : "EXTRACT(YEAR FROM created_at)", 

1197 "value" : "formula", 

1198 "formula" : "EXTRACT(YEAR FROM now() - interval '1 year')"} 

1199 condition.append(one_condition) 

1200 elif one_cond_csv == "current_year": 

1201 one_condition = {"type" : "EQUAL", 

1202 "variable" : "EXTRACT(YEAR FROM created_at)", 

1203 "value" : "formula", 

1204 "formula" : "EXTRACT(YEAR FROM now())"} 

1205 condition.append(one_condition) 

1206 else: 

1207 print("Condition not supported") 

1208 if one_cond_csv == "gliding_year" or one_cond_csv == "previous_month" or one_cond_csv == "current_month": 

1209 one_condition = {"type" : "GREATER", 

1210 "variable" : "created_at", 

1211 "value" : "formula", 

1212 "formula" : "now() - interval '1 year'"} 

1213 condition.append(one_condition) 

1214 

1215 if project_id != None: 

1216 one_condition = {"type" : "EQUAL", 

1217 "variable" : "project_id", 

1218 "value" : project_id} 

1219 condition.append(one_condition) 

1220 else : 

1221 # Impossible : on ne veut pas de données 

1222 one_condition = {"type" : "EQUAL", 

1223 "variable" : "project_id", 

1224 "value" : -1} 

1225 condition.append(one_condition) 

1226 

1227 mtr_datou_id_extract = 40 

1228 one_condition = {"type" : "EQUAL", 

1229 "variable" : "mtr_datou_id", 

1230 "value" : mtr_datou_id_extract} 

1231 condition.append(one_condition) 

1232 

1233 one_res = self.lib_user_data_internal.load_data_audit(hash_id_treatment = hash_id_treatment, 

1234 output_type = output_type, 

1235 col_csv = col_csv, 

1236 limit = limit, 

1237 offset = offset, 

1238 condition = condition) 

1239 

1240 res[one_cond_csv] = one_res 

1241 

1242 return res #.to_html(classes="table pdt-table table-striped sortable") # if output_type == pd