Coverage for lib/lib_safia

1# -*- coding: utf-8 -*-

2__author__ = 'moilerat'

6#! This file is a class definition for a system in a chatbot application, named LibSafiaSystem, that can access most of safia services, encapsulate all the calls today in functions in lib_safia. As an object-oriented system, it initializes with parameters related to user data and authentication. The system is then equipped with multiple methods for setting up, managing, and connecting users, getting and managing projects, estimating cost, logging, resuming projects, configuring and updating setups, managing documents, and processing summaries.

9# - [ ] TODO move in lib_util

10def parsed_title(content):

11 import re

12 # définir l'expression régulière

13 pattern = r'^#\s?(?P<title>[^\n]*)\n'

14 # compiler l'expression régulière

15 pattern = re.compile(pattern, re.MULTILINE)

17 # chercher cette expression dans le texte

18 match = pattern.search(content)

20 # retourner le titre trouvé ou None si aucun titre

21 if match :

22 title = match.group('title')

23 else :

24 title = "No Title Found"

26 return title

30import sys

32# TODO VR 6-6-44+79 : replace info@opio.fr by anonymous user

34hc_default_map_conf_layer_class_name = {'nlp_chat': {'class_name': 'LayerPromptOpenAI', 'module_name': 'lib.lib_openai'}, 'compute_embedding': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'image_to_text': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'speech_to_text': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'vector_search_engine': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}}

35hc_default_map_conf_selected = {'nlp_chat': 'openai_gpt_api', 'compute_embedding': 'openai_ada', 'image_to_text': 'google_ocr', 'speech_to_text': 'openai_whisper', 'vector_search_engine': 'pg_vector'}

37# Object-Oriented system to call safia

38# - can access most of safia service

39# - TODO VR 14-6-23 : should encapsulate all the call today in function in lib_safia

40# - lib_Safia should only build it, and maybe treat special cas (non-connected and freemium case)

41class LibSafiaSystem():

42 def __iter__(self) :

43 #return iter(self)

44 # raise StopIteration

45 return iter(self)

46 def __next__(self) :

47# raise StopIteration

48 raise StopIteration

49 def __init__(self,

50 lib_user_data_internal = None,

51 lib_user_data_external = None,

52 lib_auth_user_otp = None,

53 lib_right = None):

55 self.lib_user_data_internal = lib_user_data_internal

56 self.lib_user_data_external = lib_user_data_external

57 self.lib_auth_user_otp = lib_auth_user_otp

58 self.lib_right = lib_right

60 # marrant à garder !

61 if not hasattr(self, "project_id"):

62 self.project_id = None

63 # Could be used sometime or not (merci very clear #7 7-7-23)

64 self.user_id = None

65# self.schema_user_documents = ""

66# self.user_id = user_id

67# self.user = user

68# self.map_projects = {}

70 from auth.lib_cost import CostEstimation as CE

71 self.ce = CE()

73 # Once the mail has been validated by an otp

74 def setup_user(self, user, description, auth_type, otp):

75 all_info = {}

76 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(user)

77 if info_user_from_mail == None:

78 info_setup_user = self.lib_user_data_internal.setup_user(user, auth_type)

79# VR TODO on pourrait eviter ces qqs lignes dans la condition

80 self.user_id = info_setup_user

82 all_info["setup_user"] = info_setup_user

84 user_data_ret = self.lib_user_data_internal.select_user(self.user_id)

86 self.schema_user_documents = user_data_ret["schema_user_documents"] #"schema_user_documents"

87 else :

88 self.schema_user_documents = info_user_from_mail["schema_user_documents"] if "schema_user_documents" in info_user_from_mail else "info_opio_fr"

89 self.user_id = info_user_from_mail["id"]

91 already_existing_project = self.lib_user_data_internal.get_project_with_description_and_user(self.user_id, description)

93 if already_existing_project != None:

94 all_info["project"] = already_existing_project

95 else :

96 all_info["project"] = self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, description, otp)

99 try:

100 self.lib_right.create_self_group(self.user_id)

101 except Exception as e:

102 print("Internal Error in create : " + str(e))

103

104 return all_info

105

106 def set_user_id(self, user_id):

107 self.user_id = user_id

108 info = self.lib_user_data_internal.get_user_from_mail(self.user)

109

110 def get_user_info(self):

111 info = self.lib_user_data_internal.get_user_info_from_id(self.user_id)

112 return info

113

114 def connect_with_otp(self, otp):

115 # TODO VR 5-7-23 la on pourrait faire fait setup_user qui peut etre mis dans lib_auth_user_otp

116 # et est actuellement une partie de register_user_get_data

117

118 info_connexion, is_valid = self.lib_auth_user_otp.connect_with_otp(otp)

119

120 self.user = info_connexion['Email_User'] if 'Email_User' in info_connexion else 'anonymous@opio.fr'

121

122 if self.lib_user_data_internal != None:

123 info = self.lib_user_data_internal.get_user_from_mail(self.user)

124 self.user_id = info["id"] if info != None and "id" in info else -1

125 else : # this must be used by APIA

126 self.user_id = -1

127

128# self.user_id = info_connexion["user_id"]

129

130 return info_connexion, is_valid

131

132 # - [ ] TODO VR temporary 23-7-23 to get rid of user_id from safia.py

133 def get_user_id(self):

134 return self.user_id

135

136 def select_one_project_id(self):

137 list_pids = self.lib_right.get_projects_with_access(self.get_user_id())

138 project_id = None

139 if len(list_pids) > 0 and "id" in list_pids[0]:

140 project_id = list_pids[0]["id"]

141 return project_id, len(list_pids)

142

143 def has_access_to_this_facture(self, path = "/static/factures", filename = "facture.pdf"):

144 import os

145 path_facture = os.path.join(path, filename)

146 project_id = self.lib_user_data_internal.select_project_id_from_facture(path_facture)

147 role = self.lib_right.get_role_on_project(self.user_id, project_id)

148 return role != None

149

150 def has_access_to_this_document(self, filepath):

151 import os

152 # 192.168.1.33 - - [05/Nov/2025 18:53:50] "GET /static/temp/workarea_anon/output_wfrom_api_d_0511_nov_16/id_0_nb_1_d_20251104_4c4d85e630e8524e16cf3bda36cdf90e8dff168b986dc178dfaf776a4ddec6ee.docx HTTP/1.1" 304 -

153 import re

154 # parse hit from filepath : ie 64 hexadecimal caracter before .docx

155 pattern = r'([a-f0-9]{64})\.docx'

156 if filepath[-5:] != ".docx":

157 return False

158 if len(filepath) < 64 + 5:

159 return False

160 potential_hex = filepath[-(64 + 5):-5]

161 all_info = self.lib_user_data_internal.load_data_audit(col_csv = "project_id", hash_id_treatment=potential_hex)

162 project_id = all_info["project_id"] if "project_id" in all_info else -1

163 role = self.lib_right.get_role_on_project(self.user_id, project_id)

164 return role != None

165

166 def get_user(self):

167 return self.user

168

169 def delete_user(self):

170 return self.lib_user_data_internal.audit_or_delete_user(self.user_id, also_delete=True)

171

172 def audit_user(self):

173 return self.lib_user_data_internal.audit_or_delete_user(self.user_id)

174

175 def connect_with_mail_validated_by_password(self, password):

176 pass

177

178 def get_project(self, user = None, verbose = False):

179 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(user)

180 if info_user_from_mail != None:

181# deprecated on 27-10-24

182# message = "No user, no project, please login on openpromptia.com !"

183# print(message)

184# return []

185# else :

186 self.schema_user_documents = info_user_from_mail["schema_user_documents"]

187 self.user_id = info_user_from_mail["id"]

188

189 list_projects = self.lib_user_data_internal.get_projects_from_owner(self.user_id)

190 # map_id_table_documents = {x["id"] : x["table_documents"] for x in list_projects}

191 # map_project_metatable_info = self.lib_data.get_meta_info_table_documents(map_id_table_documents)

192 if verbose:

193 print(str(list_projects))

194 else :

195 print("Loaded config of " + str(len(list_projects)) + " ! ")

196 return list_projects

197

198 # - [ ] TODO VR ne porte pas bien son nom ! et oui 18-8-23

199 # et depend de configuration => a sans doute disparu

200 def create_projects(self, description, otp_as_hash):

201

202 info_setup_project = self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, description, otp_as_hash)

203

204 return info_setup_project

205

206 def truncate_project_documents(self, project_id, kill_all = False, verbose = False):

207 info_project = self.lib_user_data_internal.get_project_info(project_id)

208 self.user_id = info_project["owner_id"] if "owner_id" in info_project else -1

209 if "table_documents" not in info_project:

210 return None

211 return self.lib_user_data_internal.truncate(info_project["table_documents"], kill_all=kill_all, verbose=verbose)

212

213 def get_project_info(self, project_id, with_facture = False, is_validated = None):

214

215 import logging

216 logger = logging.getLogger()

217 logger.info("Before getting self.lib_data.get_project_info ")

218

219 if self.lib_user_data_internal == None or not str(project_id).isnumeric():

220 print(" Wrong configuration or pid : " + str(project_id))

221 return {}

222

223 info_project = self.lib_user_data_internal.get_project_info(project_id)

224 logger.info(" info_project : " + str(info_project))

225

226 meta_info = {}

227 try:

228 meta_info = self.lib_user_data_internal.get_meta_info_project(info_project["table_documents"])

229 except Exception as e:

230 print(str(e))

231 print("Too much voilà ?")

232

233 logger.info(" nb_documents : " + str(meta_info))

234

235 info_project["number_documents"] = meta_info["number_documents"] if "number_documents" in meta_info else 0

236 info_project["latest_update"] = meta_info["latest_update"] if "latest_update" in meta_info else None

237

238 info_project["size"] = "TO ESTIMATE"

239

240 # info_project["cost"] = "TO ESTIMATE"

241

242 if "table_documents" in info_project:

243 info_project["command"] = """

244 cd $GITSAFIA/prompt/python/lib/import_util/lib_import_retrieval;

245 export PYTHONPATH=`pwd`/../../..:`pwd`;

246 python3.10 scripts/process_json/process_json.py --project_id=""" + str(project_id) + """ --pg_table=""" + info_project["table_documents"] + """ --filepath=../../../io/Fotonower_Issues.json

247 """

248

249 info_project["created_at"] = info_project["created_at"].strftime("%m/%d/%Y %H:%M:%S") if "created_at" in info_project else ""

250 info_project["modified_at"] = info_project["modified_at"].strftime("%m/%d/%Y %H:%M:%S") if "modified_at" in info_project else None

251

252 if with_facture:

253 try :

254 info_project["facture"] = self.lib_user_data_internal.get_facture_from_project(project_id, is_validated)

255 except Exception as e:

256 print("Error getting facture : " + str(e))

257 info_project["facture"] = []

258

259# Project Name</th>

260# <th>Number of Documents</th>

261# <th>Size</th>

262# <th>Cost</th>

263# <th>Command

264

265 return info_project

266

267 def update_project_info(self, project_id, info_json):

268 print("TODO check right ")

269 return self.lib_user_data_internal.update_project_info(project_id, info_json)

270

271

272 def update_project_costs(self, project_id, model, nb_new_token):

273 add_cost = self.ce.compute_cost_search(nb_new_token, model)

274

275 self.lib_user_data_internal.add_cost_to_project(project_id, add_cost)

276

277 def create_project(self, input, otp_as_hash = None):

278 project_name = input["project_name"]

279 print(str(input))

280

281 if otp_as_hash == None:

282 import uuid

283 otp_as_hash = str(uuid.uuid4()).replace("-", "")

284

285 # - [ ] TODO VR refacto 18-8-23 : one want's to use the project

286 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(self.user)

287 if info_user_from_mail != None:

288 self.schema_user_documents = info_user_from_mail["schema_user_documents"] #"schema_user_documents"

289 else :

290 print("Internal error")

291

292 return self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, project_name, otp_as_hash)

293

294 def insert_query_log(self, user_id = -1, project_id = -1, endpoint = "/", request_method = "GET",

295 request_body = "", ip_address = "0.0.0.0", elapsed_time = 0):

296 import logging

297 logger = logging.getLogger()

298 logger.info("inside insert_query_log from lss ")

299 logger.info(str(request_body))

300 query_id = self.lib_user_data_internal.insert_query_log(project_id, user_id, endpoint, request_method = request_method,

301 request_body = request_body, ip_address = ip_address,

302 elapsed_time = elapsed_time)

303 return query_id

304

305 def update_query_end(self, query_id, response_status = 200, response_body = "",

306 elapsed_time = 0, cost = 0, detailed_time = {}):

307 if query_id == -1:

308 return {"nothing" : "recorded"}

309 self.lib_user_data_internal.update_query_end(query_id,

310 response_status = response_status,

311 response_body = response_body,

312 elapsed_time = elapsed_time,

313 cost = cost,

314 detailed_time = detailed_time)

315 return {}

316

317 def get_queries(self, project_id):

318 return self.lib_user_data_internal.get_queries(project_id)

319

320 def resume_all_project(self, project_id,

321 limit = 10000,

322 nb_cluster_input = 10,

323 metric = "euclidean",

324 verbose = True,

325 strat = "default"):

326 print("NEED RIGHT")

327

328 info_project = self.get_project_info(project_id)

329

330 table_documents = info_project["table_documents"]

331

332 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents)

333 if not check_table_exists:

334 print("l310 ERROR treated as warning Table " + table_documents + " does not exists, please create it first !")

335 document_datas = []

336 else:

337 document_datas = self.lib_user_data_internal.get_documents(table_documents)

338

339 print(" How many documents : " + str(len(document_datas)))

340

341 map_group_by_documents = self.group_by_documents(document_datas)

342

343 if len(map_group_by_documents) == 1:

344 print("We will use one document strategy (potentially long) ")

345 else :

346 message_error = """

347 Can't resume this project with multiple documents, please delete all the element not associated to one document before requiring again a resume (until this is developped !) .

348 Maybe you have a query and an empty result that is useless, delete it in documents.html page.

349 """

350 print("ERROR : we need to develop the multiple document resume strategy returning ! Please use stat button first, because this can occurs some costs ! ")

351

352 print("NOW WE GO THROUGH STRAT KMEAN !")

353

354 filename, list_nn_centroids, list_doc_centroids, list_example_media, misc_info = self.kmean_2d(list_project_ids = [project_id],

355 nb_project = 1,

356 nb_cluster_input = nb_cluster_input,

357 verbose = verbose,

358 limit = limit,

359 filename = "temp/img.png",

360 metric = metric)

361

362 # - [ ] TODO VR 28-7-23 : prepare the prompt

363 preprompt = "Merci de faire un résumé des éléments suivant en y faisant référence "

364 input_text = ""

365 for doc in list_doc_centroids:

366 content = doc["content"]

367 id = doc["id"]

368 input_text += "-----\n" + id + "------\n" + content

369

370

371 input = {}

372 input["preprompt"] = preprompt

373 input["text"] = input_text

374 list_datou_step = ["request_gpt"]

375 list_inputs = [input]

376

377# from lib.datou.datou_exec import datou_exec

378# from auth.lib_auth import get_datou_exec_context_as_complete_param_json

379# from auth.lib_conf_system import lcs_global_singleton

380# privacy = False

381# user = self.get_user()

382# complete_param_json = get_datou_exec_context_as_complete_param_json(user, verbose, privacy,

383# openai_token=lcs_global_singleton.get_openai_api_key(),

384# lss=self,

385# project_id=project_id)

386# result = datou_exec(list_datou_step, complete_param_json, input)

387

388 audit = "We have multiple documents so we use kmean strategy on chunk !"

389

390 # return result

391 return audit, list_inputs, list_datou_step

392

393 document = list(map_group_by_documents.keys())[0]

394 return self.resume_one_document(map_group_by_documents[document])

395

396 def stat_all_one_project(self, project_id, limit = 10000, offset = 0,

397 order_by_recent = True):

398

399 info_project = self.get_project_info(project_id)

400

401 table_documents = info_project["table_documents"] if "table_documents" in info_project else None

402

403 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents)

404

405 if not check_table_exists:

406 print("l382 ERROR treated as warning Table " + str(table_documents) + " does not exists, please create it first !")

407 document_datas = []

408 else :

409 document_datas = self.lib_user_data_internal.get_documents(table_documents,

410 limit = limit, offset = offset,

411 order_by_recent = order_by_recent)

412

413 print(" How many documents : " + str(len(document_datas)))

414

415 map_group_by_documents = self.group_by_documents(document_datas)

416

417 map_audit_doc = []

418

419 if len(map_group_by_documents) == 1:

420 print("We will use one document strategy (potentially long) ")

421 else :

422 message_error = """

423 Can't resume this project with multiple documents, please delete all the element not associated to one document before requiring again a resume (until this is developped !) .

424 Maybe you have a query and an empty result that is useless, delete it in documents.html page.

425 """

426 print("ERROR : we need to develop the multiple document resume strategy returning ! Please use stat button first, because this can occurs some costs ! ")

427

428 for k in map_group_by_documents:

429 audit, list_inputs, list_datou_step = self.parse_document_prepare_input(map_group_by_documents[k])

430 # TODO remove the three following lines

431 kunslash = k.replace("/", "")

432 kunslash = kunslash.replace("@", "")

433 kunslash = kunslash.replace(".", "")

434 from lib.lib_util import replace_non_alpha_with_underscore

435 kunslash = replace_non_alpha_with_underscore(kunslash)

436 audit["document_id"] = k

437 audit["kunslash"] = kunslash

438 from lib.lib_util import humanize_modified_time

439 audit["created_at"] = audit["created_at"]

440 audit["created_at_h"] = humanize_modified_time(audit["created_at"].replace(tzinfo=None))

441 map_audit_doc.append(audit)

442 audit["title"] = parsed_title(audit["begin_content"])

443

444

445 from dateutil import parser

446 dates = [parser.parse(item['created_at']) if isinstance(item['created_at'], str) else item['created_at'] for

447 item in map_audit_doc]

448 # Trouver la date la plus récente

449 import datetime

450 most_recent_date = max(dates) if len(dates) > 0 else datetime.datetime.strptime("2014-09-06 00:00:00", '%Y-%m-%d %H:%M:%S')

451 print(f"La date la plus récente est : {most_recent_date.strftime('%Y-%m-%d %H:%M:%S')}")

452

453 from lib.lib_util import humanize_modified_time

454 most_recent_date_h = humanize_modified_time(most_recent_date.replace(tzinfo=None))

455 info_project["most_recent_date_h"] = most_recent_date_h

456 info_project["most_recent_date"] = most_recent_date

457

458 map_audit_doc = sorted(map_audit_doc, key=lambda x: x['created_at'],reverse=True)

459 return map_audit_doc, info_project

460

461# document = list(map_group_by_documents.keys())[0]

462# return "TODO"

463

464# def group_by_documents(documents: List[Document]) -> Dict[str, List[Document]]:

465 def group_by_documents(self, documents):

466 """

467 Cette fonction prend une liste de documents et les regroupe par un certain critère, par exemple par sujet ou par auteur.

468 Elle renvoie un dictionnaire où les clés sont les critères de regroupement et les valeurs sont des listes de documents.

469 """

470 map_doc_chunk = {}

471 for document in documents:

472 document_source = document["document_id"]

473 document_id = document["id"]

474 if document_source not in map_doc_chunk:

475 map_doc_chunk[document_source] = {}

476 if document_id.startswith(document_source):

477 chunk_id_str = document_id[len(document_source) + 1:]

478 if not chunk_id_str.isnumeric() or document_id != document_source + "_" + chunk_id_str :

479 print("Unexpected internal error doc 2, can't manage this document !")

480 continue

481

482 chunk_id = int(chunk_id_str)

483 if chunk_id in map_doc_chunk[document_source]:

484 print("Unexpected internal error doc 3, can't manage this document !")

485 continue

486

487 map_doc_chunk[document_source][chunk_id] = document

488

489 else :

490 print("Unexpected internal error doc 1, can't manage this document !")

491 continue

492

493 return map_doc_chunk

494

495 def resume_one_document(self, map_one_document_id_chunk):

496 audit, list_inputs, list_datou_step = self.parse_document_prepare_input(map_one_document_id_chunk)

497

498 return audit, list_inputs, list_datou_step

499 # return "WIP " + str(audit) + " " + str(len(list_inputs)) + "\n " + str(list_inputs)

500

501 # Pretreat docs

502 def parse_document_prepare_input(self, map_doc_chunk, max_nb_token_per_input = 100000) : #8000): #4096 8192):

503 """

504 Cette fonction estime le nombre de groupes nécessaires pour un seul document.

505 Elle prend un document et renvoie un nombre entier.

506 """

507

508 preprompt = "Merci de faire un résumé en listant les points importants et les taches à accomplir de la transcription de l'AG de Fotonower :"

509

510 import tiktoken

511 tokenizer = tiktoken.get_encoding("cl100k_base") # The encoding scheme to use for tokenization

512

513 list_inputs = []

514 list_datou_step = []

515 list_nb_token_inputs = []

516

517 current_input = ""

518 current_input_nb_token = 0

519 nb_tokens = 0

520 size_content_total = 0

521 begin_content = ""

522 begin_chunk = None

523 created_at = None

524 for chunk_id in map_doc_chunk:

525 created_at = map_doc_chunk[chunk_id]["created_at"]

526 one_content = map_doc_chunk[chunk_id]["content"]

527 if begin_content == "" or chunk_id < begin_chunk:

528 begin_content = one_content

529 begin_chunk = chunk_id

530 size_content_total += len(one_content)

531 tokens = tokenizer.encode(one_content, disallowed_special=())

532# print(str(len(tokens)))

533 nb_tokens += len(tokens)

534 if current_input_nb_token + len(tokens) < max_nb_token_per_input:

535 current_input += one_content

536 current_input_nb_token += len(tokens)

537 else :

538# list_inputs.append(current_input)

539 list_inputs.append({"text" : current_input, "preprompt" : preprompt})

540 list_nb_token_inputs.append(current_input_nb_token)

541 current_input = one_content

542 current_input_nb_token = len(tokens)

543

544 list_inputs.append({"text" : current_input, "preprompt" : preprompt})

545 list_nb_token_inputs.append(current_input_nb_token)

546

547 # print(str(list_nb_token_inputs))

548

549 audit = {"size_document" : size_content_total,

550 "nb_chunks" : len(map_doc_chunk),

551 "nb_tokens" : nb_tokens,

552 "nb_input" : len(list_inputs),

553 "created_at" : created_at,

554 "begin_content" : begin_content}

555

556

557 list_datou_step.append("request_gpt")

558# list_datou_step.append("result_to_json")

559# list_datou_step.append("import_json")

560

561 return audit, list_inputs, list_datou_step

562

563 # Fonction de construction de données kmeans 2D et multidimensionnels (à refactorer plus tard Q1-2024)

564 def kmean_2d(self, list_project_ids,

565 nb_project = 0,

566 nb_cluster_input = 0,

567 verbose = False, limit = 0,

568 filename = "temp/img.png",

569 metric = "",

570 dim = 2,

571 gpu_enable = False,

572 nb_step = 10,

573 keep_fvs_running = False,

574 port = None,

575 pdt = None,

576 build_fvs_desc = True,

577 save_cluster = "",

578 load_cluster = "",

579 launch_fvs = True,

580 fvs_interface = None):

581

582 def cast_vector(row):

583 return np.array(list(map(lambda x: x.astype('double'), row)))

584

585 import numpy as np

586 import random

587

588 if dim == 2:

589 list_id_local_desc_to_project_id_doc = []

590

591 data_to_concat = ()

592 label_to_concat = ()

593 nb_data_total = 0

594 idx = 0

595 for project_id in list_project_ids:

596 map_descs = self.load_descs(project_id=project_id, limit = limit)

597

598 for d in map_descs:

599 list_id_local_desc_to_project_id_doc.append((project_id, d["id"], d["content"]))

600

601 if len(map_descs) == 0:

602 continue

603

604 list_desc_str = list(map(lambda x : x["embedding"], map_descs))

605 # list_desc = list(map(lambda x : list(map(float, x.lstrip("[").rstrip("]").split(","))), list_desc_str))

606

607 list_desc = []

608 for emb in list_desc_str:

609 # print(" WTF type ")

610 type_list_desc = str(type(emb))

611 # print(" type_list_desc : " + str(type_list_desc))

612 if type_list_desc == "<class 'str'>":

613 sys.stdout.write("s")

614 list_desc.append(list(map(float, emb.lstrip("[").rstrip("]").split(","))))

615 else :

616 sys.stdout.write("n")

617 # print(" emb : " + str(emb))

618 list_desc.append(cast_vector(emb))

619

620 # - [ ] Having kmean in multi-dimension Bouh :-(

621 list_desc_2d = list(map(lambda x : x[:2], list_desc))

622

623 nb_data_one_proj = len(list_desc_2d)

624

625 data_to_concat += (list_desc_2d,)

626 label_to_concat += ([idx] * nb_data_one_proj,)

627 nb_data_total += nb_data_one_proj

628

629 idx += 1

630 if nb_project > 0 and idx > nb_project:

631 break

632

633 nb_clusters = len(list_project_ids)

634 if nb_cluster_input == 0:

635 nb_clusters = idx

636 else :

637 nb_clusters = nb_cluster_input

638 from lib.lib_ml.lib_kmean_pyfvs import plot_classif_region, KmeanGB

639 # model = KmeanGB(nb_cluster= nb_clusters, metric= "KL", nb_sample= 100)

640

641 if len(data_to_concat) == 0:

642 return None, None, None, None, {}

643

644 data = np.concatenate(data_to_concat, axis=0)

645 label = np.concatenate(label_to_concat, axis=0)

646 index_shuffle = np.arange(0, nb_data_total)

647

648

649 # Bouh :-(

650 # data = data[:2]

651

652 # data = np.concatenate((data1, data2, data3), axis=0)

653 # label = np.concatenate(([0]*data_size, [1]*data_size, [2]*data_size, [3]*data_size), axis=0)

654 # index_shuffle = np.arange(0, nb_clusters*data_size)

655

656 np.random.shuffle(index_shuffle)

657

658 data = data[index_shuffle, :]

659

660 np.random.seed(9)

661

662 shuffle = random.sample(range(data.shape[0]), nb_clusters)

663

664 centroids = data[shuffle]

665

666 assigned_centroids = np.zeros(len(data), dtype=np.int32)

667

668 print(centroids)

669

670 from sklearn.cluster import KMeans

671

672

673 # (reslt_cent, reslt_label, _) = kmeans(data, k=nb_clusters, nb_sample=100)

674 # labels = np.unique(reslt_label)

675

676 if metric == "":

677 c_model = KMeans(n_clusters= nb_clusters)

678 outfile, list_legends, list_doc_centroids = plot_classif_region(data, c_model, label[index_shuffle], filename,

679 list_ids_proj_doc_id=list_id_local_desc_to_project_id_doc)

680 else :

681

682

683 #"euclidean", "KL", "IS", "EX", "LX"

684

685 new_model = KmeanGB(nb_cluster= nb_clusters, metric= metric, nb_sample= 100)

686

687 # new_model = KmeanGB(nb_cluster=nb_clusters, nb_sample=100)

688 outfile, list_legends, list_doc_centroids = plot_classif_region(data, new_model, label[index_shuffle], filename,

689 list_ids_proj_doc_id=list_id_local_desc_to_project_id_doc)

690

691 # list_indexes

692 # list_indexes

693

694 list_cluster_and_inertia = [{"nb" : 0, "inertia" : 0, "pdt_dyn" : 0}] * len(list_doc_centroids)

695 misc_info = {}

696

697 else :

698 if pdt != None:

699 pdt = int(pdt)

700 else :

701 pdt = 1888 # Using default value !

702# print(" We have dim of : " + str(dim) + " with this data of this shape : " + str(data.shape))

703 from lib.lib_ml.lib_kmean_pyfvs import kmean_fvs_dim

704 res = kmean_fvs_dim(data = None, nb_clusters = nb_cluster_input,

705 list_project_ids = list_project_ids,

706 limit = limit,

707 dim_input = dim,

708 lpgss = self.lib_user_data_internal,

709 hostname="/",

710 gpu_enable = gpu_enable,

711 nb_step = nb_step,

712 keep_fvs_running = keep_fvs_running,

713 port = port,

714 photo_desc_type = pdt,

715 build_fvs_desc = build_fvs_desc,

716 save_cluster = save_cluster,

717 load_cluster = load_cluster,

718 launch_fvs = launch_fvs,

719 verbose = verbose,

720 fvs_interface = fvs_interface)

721 outfile, list_legends, list_doc_centroids, list_cluster_and_inertia, misc_info = res

722

723 return outfile, list_legends, list_doc_centroids, list_cluster_and_inertia, misc_info

724

725

726

727# Very useless !

728# from lib.lib_safia import safia_desc_export

729# for project_id in list_project_ids:

730# offset_media_id, offset_data_file_id = safia_desc_export(project_id=project_id, lpgss=self.lib_user_data_internal,

731# verbose = verbose, out_folder=out_file,

732# photo_desc_type=photo_desc_type,

733# offset_media_id = offset_media_id,

734# offset_data_file_id = offset_data_file_id)

735# offset_data_file_id += 1

736# nb_char = len(str(offset_media_id))

737# first_digit = int(str(offset_media_id)[0])

738# offset_media_id_str = str(first_digit) + ('0' * nb_char)

739# offset_media_id = int(offset_media_id_str)

740# print("- [ ] TODO would be good to offset offset_media_id to a next round number")

741

742# from lib.lib_ml.lib_kmean_pyfvs import kmean_all_projects

743# print(" Now connect csv ")

744# create_csv(out_file)

745

746 def get_datou_step_user(self, #id = None,

747 list_datou_ids = []):

748 return self.lib_user_data_internal.get_datou_step_user(self.user_id, list_datou_ids)

749

750 # TODO VR could be in datou lib ?

751 def update_datou(self, datou):

752 # - [ ] TODO VR 23-7-23 : check right

753

754 # Do we want to manage DETAIL: Key (owner_id, filename)=(1, extract_expert_just_prompt_internal_datou_one_document_json) already exists.

755 # for save datou

756

757 # Managing format output complet

758 if "datou" in datou and len(datou["datou"]) == 1:

759 datou = datou["datou"][0]

760

761 datou_id_to_update = datou["id"] if "id" in datou and datou["id"] != "" else None

762 datou_name = datou["name"] if "name" in datou else datou["filename"] # TODO VR backward compatibility while chaning filename to name

763 datou_is_public = datou["public"] if "public" in datou else None

764 datou_id = self.lib_user_data_internal.update_datou(self.user_id, datou_name, datou_id_to_update, datou_is_public)

765

766 print(" datou_id : " + str(datou_id))

767

768 steps = datou["list_steps"] if "list_steps" in datou else datou["steps"] # TODO VR backward compatibility while chaning filename to name

769

770 for step in steps:

771 step["datou_id"] = datou_id

772

773 steps_ids = self.lib_user_data_internal.update_datou_step(steps)

774

775 print("TODO")

776 return {"log" : "Inserted", "new_step_ids" : steps_ids, "mtr_datou_id" : datou_id, "mtr_datou_name" : datou_name}

777

778 # - [ ] TODO rename load configuration project

779 def load_configuration(self, with_class_name = False):

780 if self.lib_user_data_internal != None:

781 list_configuration = self.lib_user_data_internal.load_configuration(self.user_id, self.project_id)

782 else :

783 list_configuration = []

784 map_conf_selected = {}

785 for l in list_configuration:

786 map_conf_selected[l["layer_type"]] = l["configuration"]

787

788 if map_conf_selected == {}:

789 map_conf_selected = hc_default_map_conf_selected

790

791 # TODO refacto on pourrait faire un join, mais ce n'est pas sur non plus

792 map_conf_layer_class_name = {}

793 if with_class_name:

794 if self.lib_user_data_internal != None:

795 try :

796 list_layer_type_options_and_class_names = self.lib_user_data_internal.get_configuration_option()

797 for one_conf_options in list_layer_type_options_and_class_names:

798 layer_type = one_conf_options["layer_type"]

799 option_selected = map_conf_selected[layer_type]

800 list_options_this_layer = one_conf_options['options'].split(",")

801 id_option_selected = list_options_this_layer.index(option_selected)

802 list_class_name_this_layer = one_conf_options['list_class_name'].split(",")

803 list_module_name_this_layer = one_conf_options['list_module_name'].split(",")

804 if id_option_selected < len(list_class_name_this_layer) and id_option_selected >= 0 :

805 class_name = list_class_name_this_layer[id_option_selected]

806 module_name = list_module_name_this_layer[id_option_selected]

807 else :

808 class_name = None

809 module_name = None

810 map_conf_layer_class_name[layer_type] = {'class_name' : class_name, 'module_name' : module_name}

811 except Exception as e:

812 print(" ERROR in load_configuration for layer " + str(e))

813 map_conf_layer_class_name = hc_default_map_conf_layer_class_name

814

815 else : # on est dans le cas ou map_conf_layer_class_name == {}:

816 map_conf_layer_class_name = hc_default_map_conf_layer_class_name

817

818 if map_conf_layer_class_name == {}:

819 print("Error unexpected(since error was catched above) treated as warning !")

820 map_conf_layer_class_name = hc_default_map_conf_layer_class_name

821

822

823 return map_conf_selected, map_conf_layer_class_name

824

825 def update_conf_project(self, project_id, data_json, replace = False):

826 # - [ ] TODO VR check right

827 ret = self.lib_user_data_internal.update_configuration_project(project_id, data_json, replace = replace)

828 return ret

829

830 def load_conf_project(self, project_id, key = None):

831 # - [ ] TODO VR check right

832 if project_id == None:

833 print("Error project_id is None !")

834 return {}

835 ret = self.lib_user_data_internal.load_conf_project(project_id, key = key)

836 if type(ret) == dict:

837 ret["project_id"] = project_id

838 return ret

839

840 def save_document(self, save_document_data, project_id, openai_token = None, verbose = False):

841 print(" CHECK RIGHT OF " + str(self.user_id) + " on " + str(project_id))

842 # VR 22-11-23 : implémentation meilleur des droits

843 has_access = self.lib_right.get_role_on_project(self.get_user_id(), project_id)

844 if has_access :

845 info = self.get_project_info(project_id)

846 table_documents = info["table_documents"]

847 json_to_save = [{

848 "id" : save_document_data["document_id"],

849 "text" : save_document_data["document_content"]

850 }]

851

852 # TODO VR 13-12 : pas ideal mais un bug qui me suit

853 self.lib_user_data_internal.delete_document(table_documents, save_document_data["document_id"])

854

855 import asyncio

856 from lib.import_util.lib_import_retrieval.scripts.process_json.process_json import process_json_dump_aux

857 total_nb_token, used_model = asyncio.run(process_json_dump_aux(json_to_save, {}, False, False, None, self.lib_user_data_internal, openai_token, table_documents, verbose = verbose))

858

859 return total_nb_token, used_model

860 else :

861 return 0, "no_access_c_pas_un_model" # VR 22-11-23 : tODO meilleure gestion des erreurs sur les droits

862

863# def get_chunk_id(self):

864

865 # TODO rename load_document_content

866 def load_document(self, document_id_input, project_id, chunk_id, verbose = False):

867 print(" CHECK RIGHT OF " + str(self.user_id) + " on " + str(project_id))

868 # VR 22-11-23 : implémentation meilleur des droits

869 has_access = self.lib_right.get_role_on_project(self.get_user_id(), project_id)

870 if has_access :

871 info = self.get_project_info(project_id)

872 table_documents = info["table_documents"]

873 print("TODO verify right access")

874 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents)

875 if not check_table_exists:

876 print("l849 ERROR treated as warning Table " + str(table_documents) + " does not exists, please create it first !")

877 documents = []

878 else:

879 documents = self.lib_user_data_internal.get_documents(table_documents, document_id_input, chunk_id)

880 total_content = ""

881 if len(documents) > 0:

882 document_id = documents[0]["document_id"]

883 if document_id_input != None:

884 documents = sorted(documents, key=lambda x : int(x["id"][len(document_id) + 1:]))

885 for d in documents:

886 total_content += d["content"]

887 return total_content

888 else :

889 return "No access : user " + str(self.get_user_id()) + " on pid " + str(project_id)

890

891 def load_descs(self, project_id, limit = 0, offset = 0):

892

893 info_project = self.get_project_info(project_id)

894 table_documents = info_project["table_documents"]

895 print("TODO verify right access")

896 list_descs = self.lib_user_data_internal.get_descs(table_documents, limit = limit, offset = offset)

897

898 if len(list_descs) > 0:

899 print(" Let's check the type : !")

900 from lib.lib_util import display_real_dict_row_shorten

901 print(" nb desc : " + str(len(list_descs)))

902 display_real_dict_row_shorten(list_descs[:2])

903

904 return list_descs

905

906 from typing import List

907 class Document():

908 pass

909 class Group():

910 pass

911 class Summary():

912 pass

913

914 def group_for_one_request(document: Document, number: int) -> List[Group]:

915 """

916 Cette fonction crée un certain nombre de groupes pour une seule requête.

917 Elle prend un document et un nombre de groupes à créer, et renvoie une liste de groupes.

918 """

919 pass

920

921 def recursive_summary(group: Group, depth: int = 0) -> Summary:

922 """

923 Cette fonction crée un résumé d'un groupe de documents en utilisant une méthode récursive.

924 Elle prend un groupe de documents et une profondeur de récursion, et renvoie un résumé.

925 """[l]

926 pass

927

928 def parallel_summary(groups: List[Group]) -> List[Summary]:

929 """

930 Cette fonction crée des résumés pour plusieurs groupes en parallèle.

931 Elle prend une liste de groupes et renvoie une liste de résumés.

932 """

933 pass

934

935 def merge_summaries(summaries: List[Summary]) -> Summary:

936 """

937 Cette fonction fusionne plusieurs résumés en un seul.

938 Elle prend une liste de résumés et renvoie un résumé.

939 """

940 pass

941

942 def get_data_user(self, user, user_id_hack = None, verbose = False):

943

944# user_id_hack = self.user_id

945

946 # Non, il faut se connecter d'abord et avoir du coup un user_id sous la main !

947

948 if user_id_hack != None:

949 projects = self.get_projects(user_id_hack)

950 else :

951 projects = self.get_project(user) # renommer en get_projects je crois

952

953 from lib.lib_util import humanize_modified_time

954 for p in projects:

955 p["modified_at_h"] = humanize_modified_time(p["modified_at"])

956

957 print(" projects : " + str(len(projects)))

958 if len(projects) > 0:

959 print(" the first project is : " + str(projects[0])[:100])

960

961 return projects

962

963 # should be private, or somewhere else ?

964 def append_step_to_datou(self, list_datou, list_step, verbose = False):

965

966 map_datou = {datou['id']: datou for datou in list_datou}

967

968 for s in list_step:

969 mtd_id = s["mtr_datou_id"]

970 if mtd_id not in map_datou:

971 print("Unexpected behavior, internal error")

972 else :

973 if "steps" not in map_datou[mtd_id]:

974 map_datou[mtd_id]["steps"] = []

975 map_datou[mtd_id]["steps"].append(s)

976 for mtd_id in map_datou:

977 list_step_to_sort = map_datou[mtd_id]["steps"] if "steps" in map_datou[mtd_id] else []

978 if verbose :

979 print(" list_step_to_sort : " + str(list_step_to_sort))

980 list_step_sorted = list(sorted(list_step_to_sort, key=lambda x: x['order_step'], reverse=False))

981 if verbose :

982 print(" list_step_sorted : " + str(list_step_sorted))

983 map_datou[mtd_id]["steps"] = list_step_sorted

984

985 import json #, asc # didin't manage to install

986 # TODO vr valider ces tests pour intégration des data de datous dans l'interface html

987 for k in map_datou:

988 # map_datou[k]["data_str"] = ast.literal_eval(json.dumps(map_datou[k]))

989 map_datou[k]["data_str"] = json.dumps(map_datou[k], indent=4, sort_keys=True, default=str)

990

991 list_datou_with_step = [map_datou[k] for k in map_datou]

992

993 return list_datou_with_step

994

995 def get_datou_step_template(self):

996 # - [ ] TODO VR 23-7-23 : check right

997 return self.lib_user_data_internal.get_datou_step_template()

998

999 def get_datou(self, id = None, list_datou_ids = None,

1000 instantiate_config_prepare = False,

1001 dont_instantiate_config_prepare = False, # VR TODO audit-refacto True for editing of the configuration => why the hell do we want another default value ?

1002 load_recursively_map_reduce = False,

1003 project_id = None): # This is for execution

1004 # - [ ] TODO VR 23-7-23 : check right : for datou owner it is in fact by default enforced !

1005

1006 # Avoid modification for others invocation of this function https://stackoverflow.com/questions/1132941/least-astonishment-and-the-mutable-default-argument

1007 if list_datou_ids == None:

1008 list_datou_ids = []

1009

1010 print(" list_datou_ids " + str(list_datou_ids) + " id : " + str(id))

1011

1012 if id != None and int(id) not in list_datou_ids:

1013 list_datou_ids.append(int(id))

1014

1015 datou_user = self.lib_user_data_internal.get_datou_user(self.user_id, list_datou_ids)

1016 datou_step_user = self.lib_user_data_internal.get_datou_step_user(self.user_id, list_datou_ids)

1017 if id != None and not dont_instantiate_config_prepare: # on a pas vraiment envie de se taper toutes les instanciations si on charge tous les datous

1018 # par contre par défaut on instancie !

1019# instantiate_config_prepare and

1020 list_param_json = list(map(lambda x : x["param_json"], datou_step_user))

1021 from lib.util.lib_formal_conf import formal_conf_prepare

1022 list_param_json_configured = formal_conf_prepare(list_param_json, lss = self, project_id = project_id)

1023 for i, sub_json in enumerate(list_param_json_configured):

1024 datou_step_user[i]["param_json"] = list_param_json_configured[i]

1025 list_datou_with_step = self.append_step_to_datou(datou_user, datou_step_user)

1026 return list_datou_with_step

1027

1028 def delete_datou(self, mtr_datou_id):

1029 # - [ ] TODO VR 23-7-23 : check right : for datou owner it is in fact by default enforced !

1030

1031 return self.lib_user_data_internal.delete_datou(mtr_datou_id, self.user_id)

1032

1033 def get_projects(self, user_id = 0):

1034 list_projects = self.lib_right.get_projects_with_access(user_id)

1035 map_id_projects_with_role = {x["id"] : x for x in list_projects}

1036 list_ids = list(map(lambda x : x["id"], list_projects))

1037 list_project_with_name_and_role = []

1038 for pid in list_ids:

1039 if not str(pid).isnumeric():

1040 print("Unexpected non numeric pid : " + str(pid))

1041 continue

1042 info = self.lib_user_data_internal.get_project_info(pid)

1043 role = None

1044 if "role" in map_id_projects_with_role[pid]:

1045 role = map_id_projects_with_role[pid]["role"]

1046 info["role"] = role

1047

1048 # - [ ] TODO VR question Should we do this with ajax ? grrr just for VR grrr get_role_access_to_project

1049 if role == "owner" or role == "admin":

1050 list_user_ids_role = self.lib_right.get_all_role_access_to_project(project_id=pid)

1051 list_user_ids = list(map(lambda x:x["user_id"], list_user_ids_role))

1052 # - [ ] TODO VR verify we take this from user ids

1053 map_user_id_email = self.lib_right.get_email_from_user_ids(list_user_ids)

1054 for d in list_user_ids_role:

1055 user_id = d["user_id"]

1056 if user_id in map_user_id_email:

1057 d["mail"] = map_user_id_email[user_id]

1058 else :

1059 print("Missing email or hidden from user_id")

1060 d["mail"] = "Undisclosed Email"

1061 info["users"] = list_user_ids_role

1062 info["nb_users"] = len(list_user_ids_role)

1063

1064 # - [ ] Should we also get the active invitation ?

1065

1066 import json

1067 info["proj_data_str"] = json.dumps(info, default=str)

1068

1069 list_project_with_name_and_role.append(info)

1070

1071 #get_projects_from_owner(list_ids)

1072 print("TODO, how to get the user_id, grrr !")

1073

1074 return list_project_with_name_and_role

1075# return [{"id":777, "description":"Not Arsene Lupin", "role":"test_read"}]

1076

1077 def create_group(self, name):

1078 return self.lib_right.create_group(self.user_id, name)

1079

1080 def get_group(self, role = "owner"):

1081 list_group = self.lib_right.get_group(self.user_id, role = role)

1082 return list_group

1083

1084 def remove_right_group(self, user_id_that_query, groupId, userId_to_remove):

1085 which_right = self.lib_right.get_role_on_group(user_id_that_query, groupId)

1086 if which_right == "admin" or which_right == "owner":

1087 self.lib_user_data_internal.remove_role_group_to_user(groupId, userId_to_remove)

1088

1089 def remove_right(self, project_id, user_id_to_change):

1090 role = self.lib_right.get_role_on_project(self.user_id, project_id)

1091 if role == "owner" or role == "admin":

1092 self.lib_right.remove_role_to_user(user_id_to_change, project_id)

1093 else :

1094 return "Not Admin Cant Change Role"

1095

1096 def create_invitation(self, project_id, group_id,

1097 role, mail = None, host = "https://safia.rubbia.fr"):

1098 if project_id != None:

1099 is_admin = self.lib_right.is_project_admin(self.user_id, project_id)

1100 if is_admin :

1101 link = self.lib_right.create_invitation(self.user_id, project_id, role, mail, host)

1102 if mail != None:

1103 print("TODO need to send mail sometime !")

1104 return {"link":link}

1105 else :

1106 return {}

1107 else:

1108 is_admin = self.lib_right.is_group_admin(self.user_id, group_id)

1109 if is_admin :

1110 link = self.lib_right.create_invitation(self.user_id, group_id, role, mail, host,

1111 action="grant_group_role")

1112 if mail != None:

1113 print("TODO need to send mail sometime !")

1114 return {"link": link}

1115 else:

1116 return {}

1117

1118 def use_invitation(self, token):

1119 one_record = self.lib_right.get_invitation_info_from_token(token)

1120

1121 # TODO : need to check validity or at least not used

1122 # TODO il faut en fait check le valid_until ainsi que le used_at et user_id

1123 if one_record != None and one_record["user_id"] == None:

1124 role = one_record["param_json"]["role"]

1125 data_id = one_record["param_json"]["data_id"]

1126 owner_id = one_record["owner_id"]

1127 action = one_record["action"]

1128 if action == "grant_project_role":

1129 self.lib_right.add_role_project_to_user(self.user_id, data_id, role)

1130 self.lib_right.update_invitation_used(one_record["id"], self.user_id)

1131 elif action == "grant_group_role":

1132 print(" TODO voila ")

1133 self.lib_right.add_role_group_to_user(self.user_id, data_id, role)

1134 self.lib_right.update_invitation_used(one_record["id"], self.user_id)

1135 return("Please check your project list on page projects !")

1136 else :

1137 if one_record == None or "used_at" not in one_record:

1138 return("Invitation already used, but no date ?")

1139 else:

1140 return("Invitation already used on " + str(one_record["used_at"]))

1141

1142 print("Hello !")

1143

1144 def get_project_by_shortname(self, shortlink):

1145 return self.lib_user_data_internal.get_project_by_shortname(shortlink)

1146

1147 def get_conso(self, hash_id_treatment = None, output_type = "dict", # other type : pd

1148 col_csv = "sum(nb_page) as nb_page, 0 as id", limit = 0, offset = 0,

1149 condition_csv = "previous_month,gliding_month,current_month,previous_year,gliding_year,current_year",

1150 project_id = None):

1151# col_csv = "id,id_file,mtr_datou_id,created_at,launched_at,modified_at,deleted_at,valid_until,user_id,project_id,safia_doc_id,hash_id_treatment,cdn,input_data,nb_page,nb_modif,input_file,audit_resume,info_anon,info_customer,info_lab,info_cdn,info_back,info_date,info_suivi,info_action,info_usage_integration,info_ml,info_init,info_extract,info_consolidate,info_result,info_exec,info_page,info_stat",

1152

1153 if project_id == None:

1154 project_id = self.project_id

1155

1156

1157 list_condition_csv = list(condition_csv.split(","))

1158

1159 res = {}

1160 for one_cond_csv in list_condition_csv:

1161

1162 # A mon avis faut encapsuler tout cela dans une fonction util saxia ? pour l'utiliser dans les endpoint ainsi qu'en ajax

1163 condition = []

1164 if one_cond_csv != "":

1165 if one_cond_csv == "gliding_month":

1166 one_condition = {"type" : "GREATER",

1167 "variable" : "created_at",

1168 "value" : "formula",

1169 "formula" : "now() - interval '1 month'"}

1170 condition.append(one_condition)

1171 # condition = [" created_at > "]

1172 elif one_cond_csv == "previous_month":

1173 one_condition = {"type" : "EQUAL",

1174 "variable" : "EXTRACT(MONTH FROM created_at)",

1175 "value" : "formula",

1176 "formula" : "EXTRACT(MONTH FROM now() - interval '1 month')"}

1177 condition.append(one_condition)

1178 one_condition = {"type" : "EQUAL",

1179 "variable" : "EXTRACT(YEAR FROM created_at)",

1180 "value" : "formula",

1181 "formula" : "EXTRACT(YEAR FROM now() - interval '1 month')"}

1182 condition.append(one_condition)

1183 elif one_cond_csv == "current_month":

1184 one_condition = {"type" : "EQUAL",

1185 "variable" : "EXTRACT(MONTH FROM created_at)",

1186 "value" : "formula",

1187 "formula" : "EXTRACT(MONTH FROM now())"}

1188 condition.append(one_condition)

1189 one_condition = {"type" : "EQUAL",

1190 "variable" : "EXTRACT(YEAR FROM created_at)",

1191 "value" : "formula",

1192 "formula" : "EXTRACT(YEAR FROM now())"}

1193 condition.append(one_condition)

1194 elif one_cond_csv == "previous_year":

1195 one_condition = {"type" : "EQUAL",

1196 "variable" : "EXTRACT(YEAR FROM created_at)",

1197 "value" : "formula",

1198 "formula" : "EXTRACT(YEAR FROM now() - interval '1 year')"}

1199 condition.append(one_condition)

1200 elif one_cond_csv == "current_year":

1201 one_condition = {"type" : "EQUAL",

1202 "variable" : "EXTRACT(YEAR FROM created_at)",

1203 "value" : "formula",

1204 "formula" : "EXTRACT(YEAR FROM now())"}

1205 condition.append(one_condition)

1206 else:

1207 print("Condition not supported")

1208 if one_cond_csv == "gliding_year" or one_cond_csv == "previous_month" or one_cond_csv == "current_month":

1209 one_condition = {"type" : "GREATER",

1210 "variable" : "created_at",

1211 "value" : "formula",

1212 "formula" : "now() - interval '1 year'"}

1213 condition.append(one_condition)

1214

1215 if project_id != None:

1216 one_condition = {"type" : "EQUAL",

1217 "variable" : "project_id",

1218 "value" : project_id}

1219 condition.append(one_condition)

1220 else :

1221 # Impossible : on ne veut pas de données

1222 one_condition = {"type" : "EQUAL",

1223 "variable" : "project_id",

1224 "value" : -1}

1225 condition.append(one_condition)

1226

1227 mtr_datou_id_extract = 40

1228 one_condition = {"type" : "EQUAL",

1229 "variable" : "mtr_datou_id",

1230 "value" : mtr_datou_id_extract}

1231 condition.append(one_condition)

1232

1233 one_res = self.lib_user_data_internal.load_data_audit(hash_id_treatment = hash_id_treatment,

1234 output_type = output_type,

1235 col_csv = col_csv,

1236 limit = limit,

1237 offset = offset,

1238 condition = condition)

1239

1240 res[one_cond_csv] = one_res

1241

1242 return res #.to_html(classes="table pdt-table table-striped sortable") # if output_type == pd

Coverage for lib/lib_safia_system.py: 65%

672 statements