Coverage for lib/lib_safia_system.py: 65%
672 statements
« prev ^ index » next coverage.py v7.9.1, created at 2026-02-18 02:40 +0100
« prev ^ index » next coverage.py v7.9.1, created at 2026-02-18 02:40 +0100
1# -*- coding: utf-8 -*-
2__author__ = 'moilerat'
3#
4# # Safia copyright 2022-2023 VR - LICENSETOBEDEFINED : on premise treatment custom
5#
6#! This file is a class definition for a system in a chatbot application, named LibSafiaSystem, that can access most of safia services, encapsulate all the calls today in functions in lib_safia. As an object-oriented system, it initializes with parameters related to user data and authentication. The system is then equipped with multiple methods for setting up, managing, and connecting users, getting and managing projects, estimating cost, logging, resuming projects, configuring and updating setups, managing documents, and processing summaries.
7#
9# - [ ] TODO move in lib_util
10def parsed_title(content):
11 import re
12 # définir l'expression régulière
13 pattern = r'^#\s?(?P<title>[^\n]*)\n'
14 # compiler l'expression régulière
15 pattern = re.compile(pattern, re.MULTILINE)
17 # chercher cette expression dans le texte
18 match = pattern.search(content)
20 # retourner le titre trouvé ou None si aucun titre
21 if match :
22 title = match.group('title')
23 else :
24 title = "No Title Found"
26 return title
30import sys
32# TODO VR 6-6-44+79 : replace info@opio.fr by anonymous user
34hc_default_map_conf_layer_class_name = {'nlp_chat': {'class_name': 'LayerPromptOpenAI', 'module_name': 'lib.lib_openai'}, 'compute_embedding': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'image_to_text': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'speech_to_text': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}, 'vector_search_engine': {'class_name': 'LayerGeneric', 'module_name': 'lib.brick_layers.lib_abstract_generic_layer'}}
35hc_default_map_conf_selected = {'nlp_chat': 'openai_gpt_api', 'compute_embedding': 'openai_ada', 'image_to_text': 'google_ocr', 'speech_to_text': 'openai_whisper', 'vector_search_engine': 'pg_vector'}
37# Object-Oriented system to call safia
38# - can access most of safia service
39# - TODO VR 14-6-23 : should encapsulate all the call today in function in lib_safia
40# - lib_Safia should only build it, and maybe treat special cas (non-connected and freemium case)
41class LibSafiaSystem():
42 def __iter__(self) :
43 #return iter(self)
44 # raise StopIteration
45 return iter(self)
46 def __next__(self) :
47# raise StopIteration
48 raise StopIteration
49 def __init__(self,
50 lib_user_data_internal = None,
51 lib_user_data_external = None,
52 lib_auth_user_otp = None,
53 lib_right = None):
55 self.lib_user_data_internal = lib_user_data_internal
56 self.lib_user_data_external = lib_user_data_external
57 self.lib_auth_user_otp = lib_auth_user_otp
58 self.lib_right = lib_right
60 # marrant à garder !
61 if not hasattr(self, "project_id"):
62 self.project_id = None
63 # Could be used sometime or not (merci very clear #7 7-7-23)
64 self.user_id = None
65# self.schema_user_documents = ""
66# self.user_id = user_id
67# self.user = user
68# self.map_projects = {}
70 from auth.lib_cost import CostEstimation as CE
71 self.ce = CE()
73 # Once the mail has been validated by an otp
74 def setup_user(self, user, description, auth_type, otp):
75 all_info = {}
76 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(user)
77 if info_user_from_mail == None:
78 info_setup_user = self.lib_user_data_internal.setup_user(user, auth_type)
79# VR TODO on pourrait eviter ces qqs lignes dans la condition
80 self.user_id = info_setup_user
82 all_info["setup_user"] = info_setup_user
84 user_data_ret = self.lib_user_data_internal.select_user(self.user_id)
86 self.schema_user_documents = user_data_ret["schema_user_documents"] #"schema_user_documents"
87 else :
88 self.schema_user_documents = info_user_from_mail["schema_user_documents"] if "schema_user_documents" in info_user_from_mail else "info_opio_fr"
89 self.user_id = info_user_from_mail["id"]
91 already_existing_project = self.lib_user_data_internal.get_project_with_description_and_user(self.user_id, description)
93 if already_existing_project != None:
94 all_info["project"] = already_existing_project
95 else :
96 all_info["project"] = self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, description, otp)
99 try:
100 self.lib_right.create_self_group(self.user_id)
101 except Exception as e:
102 print("Internal Error in create : " + str(e))
104 return all_info
106 def set_user_id(self, user_id):
107 self.user_id = user_id
108 info = self.lib_user_data_internal.get_user_from_mail(self.user)
110 def get_user_info(self):
111 info = self.lib_user_data_internal.get_user_info_from_id(self.user_id)
112 return info
114 def connect_with_otp(self, otp):
115 # TODO VR 5-7-23 la on pourrait faire fait setup_user qui peut etre mis dans lib_auth_user_otp
116 # et est actuellement une partie de register_user_get_data
118 info_connexion, is_valid = self.lib_auth_user_otp.connect_with_otp(otp)
120 self.user = info_connexion['Email_User'] if 'Email_User' in info_connexion else 'anonymous@opio.fr'
122 if self.lib_user_data_internal != None:
123 info = self.lib_user_data_internal.get_user_from_mail(self.user)
124 self.user_id = info["id"] if info != None and "id" in info else -1
125 else : # this must be used by APIA
126 self.user_id = -1
128# self.user_id = info_connexion["user_id"]
130 return info_connexion, is_valid
132 # - [ ] TODO VR temporary 23-7-23 to get rid of user_id from safia.py
133 def get_user_id(self):
134 return self.user_id
136 def select_one_project_id(self):
137 list_pids = self.lib_right.get_projects_with_access(self.get_user_id())
138 project_id = None
139 if len(list_pids) > 0 and "id" in list_pids[0]:
140 project_id = list_pids[0]["id"]
141 return project_id, len(list_pids)
143 def has_access_to_this_facture(self, path = "/static/factures", filename = "facture.pdf"):
144 import os
145 path_facture = os.path.join(path, filename)
146 project_id = self.lib_user_data_internal.select_project_id_from_facture(path_facture)
147 role = self.lib_right.get_role_on_project(self.user_id, project_id)
148 return role != None
150 def has_access_to_this_document(self, filepath):
151 import os
152 # 192.168.1.33 - - [05/Nov/2025 18:53:50] "GET /static/temp/workarea_anon/output_wfrom_api_d_0511_nov_16/id_0_nb_1_d_20251104_4c4d85e630e8524e16cf3bda36cdf90e8dff168b986dc178dfaf776a4ddec6ee.docx HTTP/1.1" 304 -
153 import re
154 # parse hit from filepath : ie 64 hexadecimal caracter before .docx
155 pattern = r'([a-f0-9]{64})\.docx'
156 if filepath[-5:] != ".docx":
157 return False
158 if len(filepath) < 64 + 5:
159 return False
160 potential_hex = filepath[-(64 + 5):-5]
161 all_info = self.lib_user_data_internal.load_data_audit(col_csv = "project_id", hash_id_treatment=potential_hex)
162 project_id = all_info["project_id"] if "project_id" in all_info else -1
163 role = self.lib_right.get_role_on_project(self.user_id, project_id)
164 return role != None
166 def get_user(self):
167 return self.user
169 def delete_user(self):
170 return self.lib_user_data_internal.audit_or_delete_user(self.user_id, also_delete=True)
172 def audit_user(self):
173 return self.lib_user_data_internal.audit_or_delete_user(self.user_id)
175 def connect_with_mail_validated_by_password(self, password):
176 pass
178 def get_project(self, user = None, verbose = False):
179 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(user)
180 if info_user_from_mail != None:
181# deprecated on 27-10-24
182# message = "No user, no project, please login on openpromptia.com !"
183# print(message)
184# return []
185# else :
186 self.schema_user_documents = info_user_from_mail["schema_user_documents"]
187 self.user_id = info_user_from_mail["id"]
189 list_projects = self.lib_user_data_internal.get_projects_from_owner(self.user_id)
190 # map_id_table_documents = {x["id"] : x["table_documents"] for x in list_projects}
191 # map_project_metatable_info = self.lib_data.get_meta_info_table_documents(map_id_table_documents)
192 if verbose:
193 print(str(list_projects))
194 else :
195 print("Loaded config of " + str(len(list_projects)) + " ! ")
196 return list_projects
198 # - [ ] TODO VR ne porte pas bien son nom ! et oui 18-8-23
199 # et depend de configuration => a sans doute disparu
200 def create_projects(self, description, otp_as_hash):
202 info_setup_project = self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, description, otp_as_hash)
204 return info_setup_project
206 def truncate_project_documents(self, project_id, kill_all = False, verbose = False):
207 info_project = self.lib_user_data_internal.get_project_info(project_id)
208 self.user_id = info_project["owner_id"] if "owner_id" in info_project else -1
209 if "table_documents" not in info_project:
210 return None
211 return self.lib_user_data_internal.truncate(info_project["table_documents"], kill_all=kill_all, verbose=verbose)
213 def get_project_info(self, project_id, with_facture = False, is_validated = None):
215 import logging
216 logger = logging.getLogger()
217 logger.info("Before getting self.lib_data.get_project_info ")
219 if self.lib_user_data_internal == None or not str(project_id).isnumeric():
220 print(" Wrong configuration or pid : " + str(project_id))
221 return {}
223 info_project = self.lib_user_data_internal.get_project_info(project_id)
224 logger.info(" info_project : " + str(info_project))
226 meta_info = {}
227 try:
228 meta_info = self.lib_user_data_internal.get_meta_info_project(info_project["table_documents"])
229 except Exception as e:
230 print(str(e))
231 print("Too much voilà ?")
233 logger.info(" nb_documents : " + str(meta_info))
235 info_project["number_documents"] = meta_info["number_documents"] if "number_documents" in meta_info else 0
236 info_project["latest_update"] = meta_info["latest_update"] if "latest_update" in meta_info else None
238 info_project["size"] = "TO ESTIMATE"
240 # info_project["cost"] = "TO ESTIMATE"
242 if "table_documents" in info_project:
243 info_project["command"] = """
244 cd $GITSAFIA/prompt/python/lib/import_util/lib_import_retrieval;
245 export PYTHONPATH=`pwd`/../../..:`pwd`;
246 python3.10 scripts/process_json/process_json.py --project_id=""" + str(project_id) + """ --pg_table=""" + info_project["table_documents"] + """ --filepath=../../../io/Fotonower_Issues.json
247 """
249 info_project["created_at"] = info_project["created_at"].strftime("%m/%d/%Y %H:%M:%S") if "created_at" in info_project else ""
250 info_project["modified_at"] = info_project["modified_at"].strftime("%m/%d/%Y %H:%M:%S") if "modified_at" in info_project else None
252 if with_facture:
253 try :
254 info_project["facture"] = self.lib_user_data_internal.get_facture_from_project(project_id, is_validated)
255 except Exception as e:
256 print("Error getting facture : " + str(e))
257 info_project["facture"] = []
259# Project Name</th>
260# <th>Number of Documents</th>
261# <th>Size</th>
262# <th>Cost</th>
263# <th>Command
265 return info_project
267 def update_project_info(self, project_id, info_json):
268 print("TODO check right ")
269 return self.lib_user_data_internal.update_project_info(project_id, info_json)
272 def update_project_costs(self, project_id, model, nb_new_token):
273 add_cost = self.ce.compute_cost_search(nb_new_token, model)
275 self.lib_user_data_internal.add_cost_to_project(project_id, add_cost)
277 def create_project(self, input, otp_as_hash = None):
278 project_name = input["project_name"]
279 print(str(input))
281 if otp_as_hash == None:
282 import uuid
283 otp_as_hash = str(uuid.uuid4()).replace("-", "")
285 # - [ ] TODO VR refacto 18-8-23 : one want's to use the project
286 info_user_from_mail = self.lib_user_data_internal.get_user_from_mail(self.user)
287 if info_user_from_mail != None:
288 self.schema_user_documents = info_user_from_mail["schema_user_documents"] #"schema_user_documents"
289 else :
290 print("Internal error")
292 return self.lib_user_data_internal.create_project(self.user_id, self.schema_user_documents, project_name, otp_as_hash)
294 def insert_query_log(self, user_id = -1, project_id = -1, endpoint = "/", request_method = "GET",
295 request_body = "", ip_address = "0.0.0.0", elapsed_time = 0):
296 import logging
297 logger = logging.getLogger()
298 logger.info("inside insert_query_log from lss ")
299 logger.info(str(request_body))
300 query_id = self.lib_user_data_internal.insert_query_log(project_id, user_id, endpoint, request_method = request_method,
301 request_body = request_body, ip_address = ip_address,
302 elapsed_time = elapsed_time)
303 return query_id
305 def update_query_end(self, query_id, response_status = 200, response_body = "",
306 elapsed_time = 0, cost = 0, detailed_time = {}):
307 if query_id == -1:
308 return {"nothing" : "recorded"}
309 self.lib_user_data_internal.update_query_end(query_id,
310 response_status = response_status,
311 response_body = response_body,
312 elapsed_time = elapsed_time,
313 cost = cost,
314 detailed_time = detailed_time)
315 return {}
317 def get_queries(self, project_id):
318 return self.lib_user_data_internal.get_queries(project_id)
320 def resume_all_project(self, project_id,
321 limit = 10000,
322 nb_cluster_input = 10,
323 metric = "euclidean",
324 verbose = True,
325 strat = "default"):
326 print("NEED RIGHT")
328 info_project = self.get_project_info(project_id)
330 table_documents = info_project["table_documents"]
332 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents)
333 if not check_table_exists:
334 print("l310 ERROR treated as warning Table " + table_documents + " does not exists, please create it first !")
335 document_datas = []
336 else:
337 document_datas = self.lib_user_data_internal.get_documents(table_documents)
339 print(" How many documents : " + str(len(document_datas)))
341 map_group_by_documents = self.group_by_documents(document_datas)
343 if len(map_group_by_documents) == 1:
344 print("We will use one document strategy (potentially long) ")
345 else :
346 message_error = """
347 Can't resume this project with multiple documents, please delete all the element not associated to one document before requiring again a resume (until this is developped !) .
348 Maybe you have a query and an empty result that is useless, delete it in documents.html page.
349 """
350 print("ERROR : we need to develop the multiple document resume strategy returning ! Please use stat button first, because this can occurs some costs ! ")
352 print("NOW WE GO THROUGH STRAT KMEAN !")
354 filename, list_nn_centroids, list_doc_centroids, list_example_media, misc_info = self.kmean_2d(list_project_ids = [project_id],
355 nb_project = 1,
356 nb_cluster_input = nb_cluster_input,
357 verbose = verbose,
358 limit = limit,
359 filename = "temp/img.png",
360 metric = metric)
362 # - [ ] TODO VR 28-7-23 : prepare the prompt
363 preprompt = "Merci de faire un résumé des éléments suivant en y faisant référence "
364 input_text = ""
365 for doc in list_doc_centroids:
366 content = doc["content"]
367 id = doc["id"]
368 input_text += "-----\n" + id + "------\n" + content
371 input = {}
372 input["preprompt"] = preprompt
373 input["text"] = input_text
374 list_datou_step = ["request_gpt"]
375 list_inputs = [input]
377# from lib.datou.datou_exec import datou_exec
378# from auth.lib_auth import get_datou_exec_context_as_complete_param_json
379# from auth.lib_conf_system import lcs_global_singleton
380# privacy = False
381# user = self.get_user()
382# complete_param_json = get_datou_exec_context_as_complete_param_json(user, verbose, privacy,
383# openai_token=lcs_global_singleton.get_openai_api_key(),
384# lss=self,
385# project_id=project_id)
386# result = datou_exec(list_datou_step, complete_param_json, input)
388 audit = "We have multiple documents so we use kmean strategy on chunk !"
390 # return result
391 return audit, list_inputs, list_datou_step
393 document = list(map_group_by_documents.keys())[0]
394 return self.resume_one_document(map_group_by_documents[document])
396 def stat_all_one_project(self, project_id, limit = 10000, offset = 0,
397 order_by_recent = True):
399 info_project = self.get_project_info(project_id)
401 table_documents = info_project["table_documents"] if "table_documents" in info_project else None
403 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents)
405 if not check_table_exists:
406 print("l382 ERROR treated as warning Table " + str(table_documents) + " does not exists, please create it first !")
407 document_datas = []
408 else :
409 document_datas = self.lib_user_data_internal.get_documents(table_documents,
410 limit = limit, offset = offset,
411 order_by_recent = order_by_recent)
413 print(" How many documents : " + str(len(document_datas)))
415 map_group_by_documents = self.group_by_documents(document_datas)
417 map_audit_doc = []
419 if len(map_group_by_documents) == 1:
420 print("We will use one document strategy (potentially long) ")
421 else :
422 message_error = """
423 Can't resume this project with multiple documents, please delete all the element not associated to one document before requiring again a resume (until this is developped !) .
424 Maybe you have a query and an empty result that is useless, delete it in documents.html page.
425 """
426 print("ERROR : we need to develop the multiple document resume strategy returning ! Please use stat button first, because this can occurs some costs ! ")
428 for k in map_group_by_documents:
429 audit, list_inputs, list_datou_step = self.parse_document_prepare_input(map_group_by_documents[k])
430 # TODO remove the three following lines
431 kunslash = k.replace("/", "")
432 kunslash = kunslash.replace("@", "")
433 kunslash = kunslash.replace(".", "")
434 from lib.lib_util import replace_non_alpha_with_underscore
435 kunslash = replace_non_alpha_with_underscore(kunslash)
436 audit["document_id"] = k
437 audit["kunslash"] = kunslash
438 from lib.lib_util import humanize_modified_time
439 audit["created_at"] = audit["created_at"]
440 audit["created_at_h"] = humanize_modified_time(audit["created_at"].replace(tzinfo=None))
441 map_audit_doc.append(audit)
442 audit["title"] = parsed_title(audit["begin_content"])
445 from dateutil import parser
446 dates = [parser.parse(item['created_at']) if isinstance(item['created_at'], str) else item['created_at'] for
447 item in map_audit_doc]
448 # Trouver la date la plus récente
449 import datetime
450 most_recent_date = max(dates) if len(dates) > 0 else datetime.datetime.strptime("2014-09-06 00:00:00", '%Y-%m-%d %H:%M:%S')
451 print(f"La date la plus récente est : {most_recent_date.strftime('%Y-%m-%d %H:%M:%S')}")
453 from lib.lib_util import humanize_modified_time
454 most_recent_date_h = humanize_modified_time(most_recent_date.replace(tzinfo=None))
455 info_project["most_recent_date_h"] = most_recent_date_h
456 info_project["most_recent_date"] = most_recent_date
458 map_audit_doc = sorted(map_audit_doc, key=lambda x: x['created_at'],reverse=True)
459 return map_audit_doc, info_project
461# document = list(map_group_by_documents.keys())[0]
462# return "TODO"
464# def group_by_documents(documents: List[Document]) -> Dict[str, List[Document]]:
465 def group_by_documents(self, documents):
466 """
467 Cette fonction prend une liste de documents et les regroupe par un certain critère, par exemple par sujet ou par auteur.
468 Elle renvoie un dictionnaire où les clés sont les critères de regroupement et les valeurs sont des listes de documents.
469 """
470 map_doc_chunk = {}
471 for document in documents:
472 document_source = document["document_id"]
473 document_id = document["id"]
474 if document_source not in map_doc_chunk:
475 map_doc_chunk[document_source] = {}
476 if document_id.startswith(document_source):
477 chunk_id_str = document_id[len(document_source) + 1:]
478 if not chunk_id_str.isnumeric() or document_id != document_source + "_" + chunk_id_str :
479 print("Unexpected internal error doc 2, can't manage this document !")
480 continue
482 chunk_id = int(chunk_id_str)
483 if chunk_id in map_doc_chunk[document_source]:
484 print("Unexpected internal error doc 3, can't manage this document !")
485 continue
487 map_doc_chunk[document_source][chunk_id] = document
489 else :
490 print("Unexpected internal error doc 1, can't manage this document !")
491 continue
493 return map_doc_chunk
495 def resume_one_document(self, map_one_document_id_chunk):
496 audit, list_inputs, list_datou_step = self.parse_document_prepare_input(map_one_document_id_chunk)
498 return audit, list_inputs, list_datou_step
499 # return "WIP " + str(audit) + " " + str(len(list_inputs)) + "\n " + str(list_inputs)
501 # Pretreat docs
502 def parse_document_prepare_input(self, map_doc_chunk, max_nb_token_per_input = 100000) : #8000): #4096 8192):
503 """
504 Cette fonction estime le nombre de groupes nécessaires pour un seul document.
505 Elle prend un document et renvoie un nombre entier.
506 """
508 preprompt = "Merci de faire un résumé en listant les points importants et les taches à accomplir de la transcription de l'AG de Fotonower :"
510 import tiktoken
511 tokenizer = tiktoken.get_encoding("cl100k_base") # The encoding scheme to use for tokenization
513 list_inputs = []
514 list_datou_step = []
515 list_nb_token_inputs = []
517 current_input = ""
518 current_input_nb_token = 0
519 nb_tokens = 0
520 size_content_total = 0
521 begin_content = ""
522 begin_chunk = None
523 created_at = None
524 for chunk_id in map_doc_chunk:
525 created_at = map_doc_chunk[chunk_id]["created_at"]
526 one_content = map_doc_chunk[chunk_id]["content"]
527 if begin_content == "" or chunk_id < begin_chunk:
528 begin_content = one_content
529 begin_chunk = chunk_id
530 size_content_total += len(one_content)
531 tokens = tokenizer.encode(one_content, disallowed_special=())
532# print(str(len(tokens)))
533 nb_tokens += len(tokens)
534 if current_input_nb_token + len(tokens) < max_nb_token_per_input:
535 current_input += one_content
536 current_input_nb_token += len(tokens)
537 else :
538# list_inputs.append(current_input)
539 list_inputs.append({"text" : current_input, "preprompt" : preprompt})
540 list_nb_token_inputs.append(current_input_nb_token)
541 current_input = one_content
542 current_input_nb_token = len(tokens)
544 list_inputs.append({"text" : current_input, "preprompt" : preprompt})
545 list_nb_token_inputs.append(current_input_nb_token)
547 # print(str(list_nb_token_inputs))
549 audit = {"size_document" : size_content_total,
550 "nb_chunks" : len(map_doc_chunk),
551 "nb_tokens" : nb_tokens,
552 "nb_input" : len(list_inputs),
553 "created_at" : created_at,
554 "begin_content" : begin_content}
557 list_datou_step.append("request_gpt")
558# list_datou_step.append("result_to_json")
559# list_datou_step.append("import_json")
561 return audit, list_inputs, list_datou_step
563 # Fonction de construction de données kmeans 2D et multidimensionnels (à refactorer plus tard Q1-2024)
564 def kmean_2d(self, list_project_ids,
565 nb_project = 0,
566 nb_cluster_input = 0,
567 verbose = False, limit = 0,
568 filename = "temp/img.png",
569 metric = "",
570 dim = 2,
571 gpu_enable = False,
572 nb_step = 10,
573 keep_fvs_running = False,
574 port = None,
575 pdt = None,
576 build_fvs_desc = True,
577 save_cluster = "",
578 load_cluster = "",
579 launch_fvs = True,
580 fvs_interface = None):
582 def cast_vector(row):
583 return np.array(list(map(lambda x: x.astype('double'), row)))
585 import numpy as np
586 import random
588 if dim == 2:
589 list_id_local_desc_to_project_id_doc = []
591 data_to_concat = ()
592 label_to_concat = ()
593 nb_data_total = 0
594 idx = 0
595 for project_id in list_project_ids:
596 map_descs = self.load_descs(project_id=project_id, limit = limit)
598 for d in map_descs:
599 list_id_local_desc_to_project_id_doc.append((project_id, d["id"], d["content"]))
601 if len(map_descs) == 0:
602 continue
604 list_desc_str = list(map(lambda x : x["embedding"], map_descs))
605 # list_desc = list(map(lambda x : list(map(float, x.lstrip("[").rstrip("]").split(","))), list_desc_str))
607 list_desc = []
608 for emb in list_desc_str:
609 # print(" WTF type ")
610 type_list_desc = str(type(emb))
611 # print(" type_list_desc : " + str(type_list_desc))
612 if type_list_desc == "<class 'str'>":
613 sys.stdout.write("s")
614 list_desc.append(list(map(float, emb.lstrip("[").rstrip("]").split(","))))
615 else :
616 sys.stdout.write("n")
617 # print(" emb : " + str(emb))
618 list_desc.append(cast_vector(emb))
620 # - [ ] Having kmean in multi-dimension Bouh :-(
621 list_desc_2d = list(map(lambda x : x[:2], list_desc))
623 nb_data_one_proj = len(list_desc_2d)
625 data_to_concat += (list_desc_2d,)
626 label_to_concat += ([idx] * nb_data_one_proj,)
627 nb_data_total += nb_data_one_proj
629 idx += 1
630 if nb_project > 0 and idx > nb_project:
631 break
633 nb_clusters = len(list_project_ids)
634 if nb_cluster_input == 0:
635 nb_clusters = idx
636 else :
637 nb_clusters = nb_cluster_input
638 from lib.lib_ml.lib_kmean_pyfvs import plot_classif_region, KmeanGB
639 # model = KmeanGB(nb_cluster= nb_clusters, metric= "KL", nb_sample= 100)
641 if len(data_to_concat) == 0:
642 return None, None, None, None, {}
644 data = np.concatenate(data_to_concat, axis=0)
645 label = np.concatenate(label_to_concat, axis=0)
646 index_shuffle = np.arange(0, nb_data_total)
649 # Bouh :-(
650 # data = data[:2]
652 # data = np.concatenate((data1, data2, data3), axis=0)
653 # label = np.concatenate(([0]*data_size, [1]*data_size, [2]*data_size, [3]*data_size), axis=0)
654 # index_shuffle = np.arange(0, nb_clusters*data_size)
656 np.random.shuffle(index_shuffle)
658 data = data[index_shuffle, :]
660 np.random.seed(9)
662 shuffle = random.sample(range(data.shape[0]), nb_clusters)
664 centroids = data[shuffle]
666 assigned_centroids = np.zeros(len(data), dtype=np.int32)
668 print(centroids)
670 from sklearn.cluster import KMeans
673 # (reslt_cent, reslt_label, _) = kmeans(data, k=nb_clusters, nb_sample=100)
674 # labels = np.unique(reslt_label)
676 if metric == "":
677 c_model = KMeans(n_clusters= nb_clusters)
678 outfile, list_legends, list_doc_centroids = plot_classif_region(data, c_model, label[index_shuffle], filename,
679 list_ids_proj_doc_id=list_id_local_desc_to_project_id_doc)
680 else :
683 #"euclidean", "KL", "IS", "EX", "LX"
685 new_model = KmeanGB(nb_cluster= nb_clusters, metric= metric, nb_sample= 100)
687 # new_model = KmeanGB(nb_cluster=nb_clusters, nb_sample=100)
688 outfile, list_legends, list_doc_centroids = plot_classif_region(data, new_model, label[index_shuffle], filename,
689 list_ids_proj_doc_id=list_id_local_desc_to_project_id_doc)
691 # list_indexes
692 # list_indexes
694 list_cluster_and_inertia = [{"nb" : 0, "inertia" : 0, "pdt_dyn" : 0}] * len(list_doc_centroids)
695 misc_info = {}
697 else :
698 if pdt != None:
699 pdt = int(pdt)
700 else :
701 pdt = 1888 # Using default value !
702# print(" We have dim of : " + str(dim) + " with this data of this shape : " + str(data.shape))
703 from lib.lib_ml.lib_kmean_pyfvs import kmean_fvs_dim
704 res = kmean_fvs_dim(data = None, nb_clusters = nb_cluster_input,
705 list_project_ids = list_project_ids,
706 limit = limit,
707 dim_input = dim,
708 lpgss = self.lib_user_data_internal,
709 hostname="/",
710 gpu_enable = gpu_enable,
711 nb_step = nb_step,
712 keep_fvs_running = keep_fvs_running,
713 port = port,
714 photo_desc_type = pdt,
715 build_fvs_desc = build_fvs_desc,
716 save_cluster = save_cluster,
717 load_cluster = load_cluster,
718 launch_fvs = launch_fvs,
719 verbose = verbose,
720 fvs_interface = fvs_interface)
721 outfile, list_legends, list_doc_centroids, list_cluster_and_inertia, misc_info = res
723 return outfile, list_legends, list_doc_centroids, list_cluster_and_inertia, misc_info
727# Very useless !
728# from lib.lib_safia import safia_desc_export
729# for project_id in list_project_ids:
730# offset_media_id, offset_data_file_id = safia_desc_export(project_id=project_id, lpgss=self.lib_user_data_internal,
731# verbose = verbose, out_folder=out_file,
732# photo_desc_type=photo_desc_type,
733# offset_media_id = offset_media_id,
734# offset_data_file_id = offset_data_file_id)
735# offset_data_file_id += 1
736# nb_char = len(str(offset_media_id))
737# first_digit = int(str(offset_media_id)[0])
738# offset_media_id_str = str(first_digit) + ('0' * nb_char)
739# offset_media_id = int(offset_media_id_str)
740# print("- [ ] TODO would be good to offset offset_media_id to a next round number")
742# from lib.lib_ml.lib_kmean_pyfvs import kmean_all_projects
743# print(" Now connect csv ")
744# create_csv(out_file)
746 def get_datou_step_user(self, #id = None,
747 list_datou_ids = []):
748 return self.lib_user_data_internal.get_datou_step_user(self.user_id, list_datou_ids)
750 # TODO VR could be in datou lib ?
751 def update_datou(self, datou):
752 # - [ ] TODO VR 23-7-23 : check right
754 # Do we want to manage DETAIL: Key (owner_id, filename)=(1, extract_expert_just_prompt_internal_datou_one_document_json) already exists.
755 # for save datou
757 # Managing format output complet
758 if "datou" in datou and len(datou["datou"]) == 1:
759 datou = datou["datou"][0]
761 datou_id_to_update = datou["id"] if "id" in datou and datou["id"] != "" else None
762 datou_name = datou["name"] if "name" in datou else datou["filename"] # TODO VR backward compatibility while chaning filename to name
763 datou_is_public = datou["public"] if "public" in datou else None
764 datou_id = self.lib_user_data_internal.update_datou(self.user_id, datou_name, datou_id_to_update, datou_is_public)
766 print(" datou_id : " + str(datou_id))
768 steps = datou["list_steps"] if "list_steps" in datou else datou["steps"] # TODO VR backward compatibility while chaning filename to name
770 for step in steps:
771 step["datou_id"] = datou_id
773 steps_ids = self.lib_user_data_internal.update_datou_step(steps)
775 print("TODO")
776 return {"log" : "Inserted", "new_step_ids" : steps_ids, "mtr_datou_id" : datou_id, "mtr_datou_name" : datou_name}
778 # - [ ] TODO rename load configuration project
779 def load_configuration(self, with_class_name = False):
780 if self.lib_user_data_internal != None:
781 list_configuration = self.lib_user_data_internal.load_configuration(self.user_id, self.project_id)
782 else :
783 list_configuration = []
784 map_conf_selected = {}
785 for l in list_configuration:
786 map_conf_selected[l["layer_type"]] = l["configuration"]
788 if map_conf_selected == {}:
789 map_conf_selected = hc_default_map_conf_selected
791 # TODO refacto on pourrait faire un join, mais ce n'est pas sur non plus
792 map_conf_layer_class_name = {}
793 if with_class_name:
794 if self.lib_user_data_internal != None:
795 try :
796 list_layer_type_options_and_class_names = self.lib_user_data_internal.get_configuration_option()
797 for one_conf_options in list_layer_type_options_and_class_names:
798 layer_type = one_conf_options["layer_type"]
799 option_selected = map_conf_selected[layer_type]
800 list_options_this_layer = one_conf_options['options'].split(",")
801 id_option_selected = list_options_this_layer.index(option_selected)
802 list_class_name_this_layer = one_conf_options['list_class_name'].split(",")
803 list_module_name_this_layer = one_conf_options['list_module_name'].split(",")
804 if id_option_selected < len(list_class_name_this_layer) and id_option_selected >= 0 :
805 class_name = list_class_name_this_layer[id_option_selected]
806 module_name = list_module_name_this_layer[id_option_selected]
807 else :
808 class_name = None
809 module_name = None
810 map_conf_layer_class_name[layer_type] = {'class_name' : class_name, 'module_name' : module_name}
811 except Exception as e:
812 print(" ERROR in load_configuration for layer " + str(e))
813 map_conf_layer_class_name = hc_default_map_conf_layer_class_name
815 else : # on est dans le cas ou map_conf_layer_class_name == {}:
816 map_conf_layer_class_name = hc_default_map_conf_layer_class_name
818 if map_conf_layer_class_name == {}:
819 print("Error unexpected(since error was catched above) treated as warning !")
820 map_conf_layer_class_name = hc_default_map_conf_layer_class_name
823 return map_conf_selected, map_conf_layer_class_name
825 def update_conf_project(self, project_id, data_json, replace = False):
826 # - [ ] TODO VR check right
827 ret = self.lib_user_data_internal.update_configuration_project(project_id, data_json, replace = replace)
828 return ret
830 def load_conf_project(self, project_id, key = None):
831 # - [ ] TODO VR check right
832 if project_id == None:
833 print("Error project_id is None !")
834 return {}
835 ret = self.lib_user_data_internal.load_conf_project(project_id, key = key)
836 if type(ret) == dict:
837 ret["project_id"] = project_id
838 return ret
840 def save_document(self, save_document_data, project_id, openai_token = None, verbose = False):
841 print(" CHECK RIGHT OF " + str(self.user_id) + " on " + str(project_id))
842 # VR 22-11-23 : implémentation meilleur des droits
843 has_access = self.lib_right.get_role_on_project(self.get_user_id(), project_id)
844 if has_access :
845 info = self.get_project_info(project_id)
846 table_documents = info["table_documents"]
847 json_to_save = [{
848 "id" : save_document_data["document_id"],
849 "text" : save_document_data["document_content"]
850 }]
852 # TODO VR 13-12 : pas ideal mais un bug qui me suit
853 self.lib_user_data_internal.delete_document(table_documents, save_document_data["document_id"])
855 import asyncio
856 from lib.import_util.lib_import_retrieval.scripts.process_json.process_json import process_json_dump_aux
857 total_nb_token, used_model = asyncio.run(process_json_dump_aux(json_to_save, {}, False, False, None, self.lib_user_data_internal, openai_token, table_documents, verbose = verbose))
859 return total_nb_token, used_model
860 else :
861 return 0, "no_access_c_pas_un_model" # VR 22-11-23 : tODO meilleure gestion des erreurs sur les droits
863# def get_chunk_id(self):
865 # TODO rename load_document_content
866 def load_document(self, document_id_input, project_id, chunk_id, verbose = False):
867 print(" CHECK RIGHT OF " + str(self.user_id) + " on " + str(project_id))
868 # VR 22-11-23 : implémentation meilleur des droits
869 has_access = self.lib_right.get_role_on_project(self.get_user_id(), project_id)
870 if has_access :
871 info = self.get_project_info(project_id)
872 table_documents = info["table_documents"]
873 print("TODO verify right access")
874 check_table_exists = self.lib_user_data_internal.check_table_exists(table_documents)
875 if not check_table_exists:
876 print("l849 ERROR treated as warning Table " + str(table_documents) + " does not exists, please create it first !")
877 documents = []
878 else:
879 documents = self.lib_user_data_internal.get_documents(table_documents, document_id_input, chunk_id)
880 total_content = ""
881 if len(documents) > 0:
882 document_id = documents[0]["document_id"]
883 if document_id_input != None:
884 documents = sorted(documents, key=lambda x : int(x["id"][len(document_id) + 1:]))
885 for d in documents:
886 total_content += d["content"]
887 return total_content
888 else :
889 return "No access : user " + str(self.get_user_id()) + " on pid " + str(project_id)
891 def load_descs(self, project_id, limit = 0, offset = 0):
893 info_project = self.get_project_info(project_id)
894 table_documents = info_project["table_documents"]
895 print("TODO verify right access")
896 list_descs = self.lib_user_data_internal.get_descs(table_documents, limit = limit, offset = offset)
898 if len(list_descs) > 0:
899 print(" Let's check the type : !")
900 from lib.lib_util import display_real_dict_row_shorten
901 print(" nb desc : " + str(len(list_descs)))
902 display_real_dict_row_shorten(list_descs[:2])
904 return list_descs
906 from typing import List
907 class Document():
908 pass
909 class Group():
910 pass
911 class Summary():
912 pass
914 def group_for_one_request(document: Document, number: int) -> List[Group]:
915 """
916 Cette fonction crée un certain nombre de groupes pour une seule requête.
917 Elle prend un document et un nombre de groupes à créer, et renvoie une liste de groupes.
918 """
919 pass
921 def recursive_summary(group: Group, depth: int = 0) -> Summary:
922 """
923 Cette fonction crée un résumé d'un groupe de documents en utilisant une méthode récursive.
924 Elle prend un groupe de documents et une profondeur de récursion, et renvoie un résumé.
925 """[l]
926 pass
928 def parallel_summary(groups: List[Group]) -> List[Summary]:
929 """
930 Cette fonction crée des résumés pour plusieurs groupes en parallèle.
931 Elle prend une liste de groupes et renvoie une liste de résumés.
932 """
933 pass
935 def merge_summaries(summaries: List[Summary]) -> Summary:
936 """
937 Cette fonction fusionne plusieurs résumés en un seul.
938 Elle prend une liste de résumés et renvoie un résumé.
939 """
940 pass
942 def get_data_user(self, user, user_id_hack = None, verbose = False):
944# user_id_hack = self.user_id
946 # Non, il faut se connecter d'abord et avoir du coup un user_id sous la main !
948 if user_id_hack != None:
949 projects = self.get_projects(user_id_hack)
950 else :
951 projects = self.get_project(user) # renommer en get_projects je crois
953 from lib.lib_util import humanize_modified_time
954 for p in projects:
955 p["modified_at_h"] = humanize_modified_time(p["modified_at"])
957 print(" projects : " + str(len(projects)))
958 if len(projects) > 0:
959 print(" the first project is : " + str(projects[0])[:100])
961 return projects
963 # should be private, or somewhere else ?
964 def append_step_to_datou(self, list_datou, list_step, verbose = False):
966 map_datou = {datou['id']: datou for datou in list_datou}
968 for s in list_step:
969 mtd_id = s["mtr_datou_id"]
970 if mtd_id not in map_datou:
971 print("Unexpected behavior, internal error")
972 else :
973 if "steps" not in map_datou[mtd_id]:
974 map_datou[mtd_id]["steps"] = []
975 map_datou[mtd_id]["steps"].append(s)
976 for mtd_id in map_datou:
977 list_step_to_sort = map_datou[mtd_id]["steps"] if "steps" in map_datou[mtd_id] else []
978 if verbose :
979 print(" list_step_to_sort : " + str(list_step_to_sort))
980 list_step_sorted = list(sorted(list_step_to_sort, key=lambda x: x['order_step'], reverse=False))
981 if verbose :
982 print(" list_step_sorted : " + str(list_step_sorted))
983 map_datou[mtd_id]["steps"] = list_step_sorted
985 import json #, asc # didin't manage to install
986 # TODO vr valider ces tests pour intégration des data de datous dans l'interface html
987 for k in map_datou:
988 # map_datou[k]["data_str"] = ast.literal_eval(json.dumps(map_datou[k]))
989 map_datou[k]["data_str"] = json.dumps(map_datou[k], indent=4, sort_keys=True, default=str)
991 list_datou_with_step = [map_datou[k] for k in map_datou]
993 return list_datou_with_step
995 def get_datou_step_template(self):
996 # - [ ] TODO VR 23-7-23 : check right
997 return self.lib_user_data_internal.get_datou_step_template()
999 def get_datou(self, id = None, list_datou_ids = None,
1000 instantiate_config_prepare = False,
1001 dont_instantiate_config_prepare = False, # VR TODO audit-refacto True for editing of the configuration => why the hell do we want another default value ?
1002 load_recursively_map_reduce = False,
1003 project_id = None): # This is for execution
1004 # - [ ] TODO VR 23-7-23 : check right : for datou owner it is in fact by default enforced !
1006 # Avoid modification for others invocation of this function https://stackoverflow.com/questions/1132941/least-astonishment-and-the-mutable-default-argument
1007 if list_datou_ids == None:
1008 list_datou_ids = []
1010 print(" list_datou_ids " + str(list_datou_ids) + " id : " + str(id))
1012 if id != None and int(id) not in list_datou_ids:
1013 list_datou_ids.append(int(id))
1015 datou_user = self.lib_user_data_internal.get_datou_user(self.user_id, list_datou_ids)
1016 datou_step_user = self.lib_user_data_internal.get_datou_step_user(self.user_id, list_datou_ids)
1017 if id != None and not dont_instantiate_config_prepare: # on a pas vraiment envie de se taper toutes les instanciations si on charge tous les datous
1018 # par contre par défaut on instancie !
1019# instantiate_config_prepare and
1020 list_param_json = list(map(lambda x : x["param_json"], datou_step_user))
1021 from lib.util.lib_formal_conf import formal_conf_prepare
1022 list_param_json_configured = formal_conf_prepare(list_param_json, lss = self, project_id = project_id)
1023 for i, sub_json in enumerate(list_param_json_configured):
1024 datou_step_user[i]["param_json"] = list_param_json_configured[i]
1025 list_datou_with_step = self.append_step_to_datou(datou_user, datou_step_user)
1026 return list_datou_with_step
1028 def delete_datou(self, mtr_datou_id):
1029 # - [ ] TODO VR 23-7-23 : check right : for datou owner it is in fact by default enforced !
1031 return self.lib_user_data_internal.delete_datou(mtr_datou_id, self.user_id)
1033 def get_projects(self, user_id = 0):
1034 list_projects = self.lib_right.get_projects_with_access(user_id)
1035 map_id_projects_with_role = {x["id"] : x for x in list_projects}
1036 list_ids = list(map(lambda x : x["id"], list_projects))
1037 list_project_with_name_and_role = []
1038 for pid in list_ids:
1039 if not str(pid).isnumeric():
1040 print("Unexpected non numeric pid : " + str(pid))
1041 continue
1042 info = self.lib_user_data_internal.get_project_info(pid)
1043 role = None
1044 if "role" in map_id_projects_with_role[pid]:
1045 role = map_id_projects_with_role[pid]["role"]
1046 info["role"] = role
1048 # - [ ] TODO VR question Should we do this with ajax ? grrr just for VR grrr get_role_access_to_project
1049 if role == "owner" or role == "admin":
1050 list_user_ids_role = self.lib_right.get_all_role_access_to_project(project_id=pid)
1051 list_user_ids = list(map(lambda x:x["user_id"], list_user_ids_role))
1052 # - [ ] TODO VR verify we take this from user ids
1053 map_user_id_email = self.lib_right.get_email_from_user_ids(list_user_ids)
1054 for d in list_user_ids_role:
1055 user_id = d["user_id"]
1056 if user_id in map_user_id_email:
1057 d["mail"] = map_user_id_email[user_id]
1058 else :
1059 print("Missing email or hidden from user_id")
1060 d["mail"] = "Undisclosed Email"
1061 info["users"] = list_user_ids_role
1062 info["nb_users"] = len(list_user_ids_role)
1064 # - [ ] Should we also get the active invitation ?
1066 import json
1067 info["proj_data_str"] = json.dumps(info, default=str)
1069 list_project_with_name_and_role.append(info)
1071 #get_projects_from_owner(list_ids)
1072 print("TODO, how to get the user_id, grrr !")
1074 return list_project_with_name_and_role
1075# return [{"id":777, "description":"Not Arsene Lupin", "role":"test_read"}]
1077 def create_group(self, name):
1078 return self.lib_right.create_group(self.user_id, name)
1080 def get_group(self, role = "owner"):
1081 list_group = self.lib_right.get_group(self.user_id, role = role)
1082 return list_group
1084 def remove_right_group(self, user_id_that_query, groupId, userId_to_remove):
1085 which_right = self.lib_right.get_role_on_group(user_id_that_query, groupId)
1086 if which_right == "admin" or which_right == "owner":
1087 self.lib_user_data_internal.remove_role_group_to_user(groupId, userId_to_remove)
1089 def remove_right(self, project_id, user_id_to_change):
1090 role = self.lib_right.get_role_on_project(self.user_id, project_id)
1091 if role == "owner" or role == "admin":
1092 self.lib_right.remove_role_to_user(user_id_to_change, project_id)
1093 else :
1094 return "Not Admin Cant Change Role"
1096 def create_invitation(self, project_id, group_id,
1097 role, mail = None, host = "https://safia.rubbia.fr"):
1098 if project_id != None:
1099 is_admin = self.lib_right.is_project_admin(self.user_id, project_id)
1100 if is_admin :
1101 link = self.lib_right.create_invitation(self.user_id, project_id, role, mail, host)
1102 if mail != None:
1103 print("TODO need to send mail sometime !")
1104 return {"link":link}
1105 else :
1106 return {}
1107 else:
1108 is_admin = self.lib_right.is_group_admin(self.user_id, group_id)
1109 if is_admin :
1110 link = self.lib_right.create_invitation(self.user_id, group_id, role, mail, host,
1111 action="grant_group_role")
1112 if mail != None:
1113 print("TODO need to send mail sometime !")
1114 return {"link": link}
1115 else:
1116 return {}
1118 def use_invitation(self, token):
1119 one_record = self.lib_right.get_invitation_info_from_token(token)
1121 # TODO : need to check validity or at least not used
1122 # TODO il faut en fait check le valid_until ainsi que le used_at et user_id
1123 if one_record != None and one_record["user_id"] == None:
1124 role = one_record["param_json"]["role"]
1125 data_id = one_record["param_json"]["data_id"]
1126 owner_id = one_record["owner_id"]
1127 action = one_record["action"]
1128 if action == "grant_project_role":
1129 self.lib_right.add_role_project_to_user(self.user_id, data_id, role)
1130 self.lib_right.update_invitation_used(one_record["id"], self.user_id)
1131 elif action == "grant_group_role":
1132 print(" TODO voila ")
1133 self.lib_right.add_role_group_to_user(self.user_id, data_id, role)
1134 self.lib_right.update_invitation_used(one_record["id"], self.user_id)
1135 return("Please check your project list on page projects !")
1136 else :
1137 if one_record == None or "used_at" not in one_record:
1138 return("Invitation already used, but no date ?")
1139 else:
1140 return("Invitation already used on " + str(one_record["used_at"]))
1142 print("Hello !")
1144 def get_project_by_shortname(self, shortlink):
1145 return self.lib_user_data_internal.get_project_by_shortname(shortlink)
1147 def get_conso(self, hash_id_treatment = None, output_type = "dict", # other type : pd
1148 col_csv = "sum(nb_page) as nb_page, 0 as id", limit = 0, offset = 0,
1149 condition_csv = "previous_month,gliding_month,current_month,previous_year,gliding_year,current_year",
1150 project_id = None):
1151# col_csv = "id,id_file,mtr_datou_id,created_at,launched_at,modified_at,deleted_at,valid_until,user_id,project_id,safia_doc_id,hash_id_treatment,cdn,input_data,nb_page,nb_modif,input_file,audit_resume,info_anon,info_customer,info_lab,info_cdn,info_back,info_date,info_suivi,info_action,info_usage_integration,info_ml,info_init,info_extract,info_consolidate,info_result,info_exec,info_page,info_stat",
1153 if project_id == None:
1154 project_id = self.project_id
1157 list_condition_csv = list(condition_csv.split(","))
1159 res = {}
1160 for one_cond_csv in list_condition_csv:
1162 # A mon avis faut encapsuler tout cela dans une fonction util saxia ? pour l'utiliser dans les endpoint ainsi qu'en ajax
1163 condition = []
1164 if one_cond_csv != "":
1165 if one_cond_csv == "gliding_month":
1166 one_condition = {"type" : "GREATER",
1167 "variable" : "created_at",
1168 "value" : "formula",
1169 "formula" : "now() - interval '1 month'"}
1170 condition.append(one_condition)
1171 # condition = [" created_at > "]
1172 elif one_cond_csv == "previous_month":
1173 one_condition = {"type" : "EQUAL",
1174 "variable" : "EXTRACT(MONTH FROM created_at)",
1175 "value" : "formula",
1176 "formula" : "EXTRACT(MONTH FROM now() - interval '1 month')"}
1177 condition.append(one_condition)
1178 one_condition = {"type" : "EQUAL",
1179 "variable" : "EXTRACT(YEAR FROM created_at)",
1180 "value" : "formula",
1181 "formula" : "EXTRACT(YEAR FROM now() - interval '1 month')"}
1182 condition.append(one_condition)
1183 elif one_cond_csv == "current_month":
1184 one_condition = {"type" : "EQUAL",
1185 "variable" : "EXTRACT(MONTH FROM created_at)",
1186 "value" : "formula",
1187 "formula" : "EXTRACT(MONTH FROM now())"}
1188 condition.append(one_condition)
1189 one_condition = {"type" : "EQUAL",
1190 "variable" : "EXTRACT(YEAR FROM created_at)",
1191 "value" : "formula",
1192 "formula" : "EXTRACT(YEAR FROM now())"}
1193 condition.append(one_condition)
1194 elif one_cond_csv == "previous_year":
1195 one_condition = {"type" : "EQUAL",
1196 "variable" : "EXTRACT(YEAR FROM created_at)",
1197 "value" : "formula",
1198 "formula" : "EXTRACT(YEAR FROM now() - interval '1 year')"}
1199 condition.append(one_condition)
1200 elif one_cond_csv == "current_year":
1201 one_condition = {"type" : "EQUAL",
1202 "variable" : "EXTRACT(YEAR FROM created_at)",
1203 "value" : "formula",
1204 "formula" : "EXTRACT(YEAR FROM now())"}
1205 condition.append(one_condition)
1206 else:
1207 print("Condition not supported")
1208 if one_cond_csv == "gliding_year" or one_cond_csv == "previous_month" or one_cond_csv == "current_month":
1209 one_condition = {"type" : "GREATER",
1210 "variable" : "created_at",
1211 "value" : "formula",
1212 "formula" : "now() - interval '1 year'"}
1213 condition.append(one_condition)
1215 if project_id != None:
1216 one_condition = {"type" : "EQUAL",
1217 "variable" : "project_id",
1218 "value" : project_id}
1219 condition.append(one_condition)
1220 else :
1221 # Impossible : on ne veut pas de données
1222 one_condition = {"type" : "EQUAL",
1223 "variable" : "project_id",
1224 "value" : -1}
1225 condition.append(one_condition)
1227 mtr_datou_id_extract = 40
1228 one_condition = {"type" : "EQUAL",
1229 "variable" : "mtr_datou_id",
1230 "value" : mtr_datou_id_extract}
1231 condition.append(one_condition)
1233 one_res = self.lib_user_data_internal.load_data_audit(hash_id_treatment = hash_id_treatment,
1234 output_type = output_type,
1235 col_csv = col_csv,
1236 limit = limit,
1237 offset = offset,
1238 condition = condition)
1240 res[one_cond_csv] = one_res
1242 return res #.to_html(classes="table pdt-table table-striped sortable") # if output_type == pd