diff --git a/src/app.py b/src/app.py index 9ef3b201..5b36a1d0 100644 --- a/src/app.py +++ b/src/app.py @@ -32,7 +32,7 @@ from schema import schema_triggers from schema import schema_validators from schema import schema_neo4j_queries -from schema.schema_constants import SchemaConstants +from schema.schema_constants import SchemaConstants, ReindexPriorityLevelEnum from schema.schema_constants import DataVisibilityEnum from schema.schema_constants import MetadataScopeEnum from schema.schema_constants import TriggerTypeEnum @@ -1185,9 +1185,12 @@ def create_entity(entity_type): # Check URL parameters before proceeding to any CRUD operations, halting on validation failures. # - # Check if re-indexing is to be suppressed after entity creation. try: - supress_reindex = schema_manager.suppress_reindex(request.args) + # Check if re-indexing is to be suppressed after entity creation. + suppress_reindex = schema_manager.suppress_reindex(request_args=request.args) + # Determine valid re-indexing priority using Request parameters. + reindex_priority = schema_manager.get_reindex_priority(request_args=request.args + , calc_suppress_reindex=suppress_reindex) except Exception as e: bad_request_error(e) @@ -1312,7 +1315,7 @@ def create_entity(entity_type): # Will also filter the result based on schema normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) - if supress_reindex: + if suppress_reindex: logger.log(level=logging.INFO , msg=f"Re-indexing suppressed during creation of {complete_dict['entity_type']}" f" with UUID {complete_dict['uuid']}") @@ -1321,7 +1324,9 @@ def create_entity(entity_type): logger.log(level=logging.INFO , msg=f"Re-indexing for creation of {complete_dict['entity_type']}" f" with UUID {complete_dict['uuid']}") - reindex_entity(complete_dict['uuid'], user_token) + reindex_entity(uuid=complete_dict['uuid'] + , user_token=user_token + , priority_level=reindex_priority) return jsonify(normalized_complete_dict) @@ -1494,9 +1499,12 @@ def update_entity(id): # Check URL parameters before proceeding to any CRUD operations, halting on validation failures. # - # Check if re-indexing is to be suppressed after entity creation. try: - suppress_reindex = schema_manager.suppress_reindex(request.args) + # Check if re-indexing is to be suppressed after entity creation. + suppress_reindex = schema_manager.suppress_reindex(request_args=request.args) + # Determine valid re-indexing priority using Request parameters. + reindex_priority = schema_manager.get_reindex_priority(request_args=request.args + , calc_suppress_reindex=suppress_reindex) except Exception as e: bad_request_error(e) @@ -1594,7 +1602,9 @@ def update_entity(id): logger.log(level=logging.INFO , msg=f"Re-indexing for modification of {normalized_entity_type}" f" with UUID {entity_uuid}") - reindex_entity(entity_uuid, user_token) + reindex_entity(uuid=entity_uuid + , user_token=user_token + , priority_level=reindex_priority) # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou message_returned = f"The update request on {normalized_entity_type} of {id} has been accepted, the backend may still be processing" @@ -4167,9 +4177,12 @@ def multiple_components(): # Check URL parameters before proceeding to any CRUD operations, halting on validation failures. # - # Check if re-indexing is to be suppressed after entity creation. try: - suppress_reindex = schema_manager.suppress_reindex(request.args) + # Check if re-indexing is to be suppressed after entity creation. + suppress_reindex = schema_manager.suppress_reindex(request_args=request.args) + # Determine valid re-indexing priority using Request parameters. + reindex_priority = schema_manager.get_reindex_priority(request_args=request.args + , calc_suppress_reindex=suppress_reindex) except Exception as e: bad_request_error(e) @@ -4215,7 +4228,9 @@ def multiple_components(): logger.log(level=logging.INFO , msg=f"Re-indexing for multiple component creation of {complete_dict['entity_type']}" f" with UUID {complete_dict['uuid']}") - reindex_entity(complete_dict['uuid'], user_token) + reindex_entity(uuid=complete_dict['uuid'] + , user_token=user_token + , priority_level=reindex_priority) # Add back in dataset_link_abs_dir one last time normalized_complete_dict['dataset_link_abs_dir'] = dataset_link_abs_dir normalized_complete_entity_list.append(normalized_complete_dict) @@ -4304,6 +4319,28 @@ def entity_bulk_update(): return jsonify(list(uuids)), 202 +""" +Retrieve ids (uuid, hubmap_id) for a given id + +Parameters +---------- +id : str + The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity (Dataset/Sample) + +Returns +------- +json array + Each item in the array is a json object containing the uuid and hubmap_id for the given entity. +""" +@app.route('/entities/batch-ids', methods = ['POST']) +def get_batch_ids(): + validate_token_if_auth_header_exists(request) + require_json(request) + json_data_dict = request.get_json() + ids = app_neo4j_queries.get_batch_ids(neo4j_driver_instance, json_data_dict) + return jsonify(ids) + + #################################################################################################### ## Internal Functions #################################################################################################### @@ -5426,19 +5463,23 @@ def delete_cache(entity_uuid, entity_type): Parameters ---------- -uuid : str +uuid : The uuid of the target entity -user_token: str +user_token: The user's globus groups token +priority_level: + Value from the enumeration ReindexPriorityLevelEnum """ -def reindex_entity(uuid, user_token): +def reindex_entity(uuid:str, user_token:str, priority_level:int = ReindexPriorityLevelEnum.HIGH.value) -> None: headers = { 'Authorization': f'Bearer {user_token}' } logger.info(f"Making a call to search-api to reindex uuid: {uuid}") - response = requests.put(f"{app.config['SEARCH_API_URL']}/reindex/{uuid}", headers = headers) + response = requests.put(f"{app.config['SEARCH_API_URL']}/reindex/{uuid}" + , headers=headers + , params={'priority': priority_level}) # The reindex takes time, so 202 Accepted response status code indicates that # the request has been accepted for processing, but the processing has not been completed diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 2576307d..8fc130b0 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -1181,3 +1181,36 @@ def get_entities_by_uuid(neo4j_driver, return None return records + +""" +Get the uuid and hubmap_id for each entity in a list of ids. + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +id_list : list + The list of ids + +Returns +------- +Dictionary containing the uuid and hubmap_id of each entity in the list, keyed by that entities original id given +""" +def get_batch_ids(neo4j_driver, id_list): + query = """ + MATCH (e) + WHERE e.uuid IN $id_list OR e.hubmap_id IN $id_list + WITH e, [id IN $id_list WHERE id = e.uuid OR id = e.hubmap_id][0] AS original_id + RETURN original_id, e.uuid AS uuid, e.hubmap_id AS hubmap_id + """ + + result_map = {} + + with neo4j_driver.session() as session: + result = session.run(query, id_list=id_list) + for record in result: + result_map[record['original_id']] = { + "uuid": record['uuid'], + "hubmap_id": record['hubmap_id'] + } + return result_map \ No newline at end of file diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py index b8d6efda..99bfd8fc 100644 --- a/src/schema/schema_constants.py +++ b/src/schema/schema_constants.py @@ -64,3 +64,11 @@ class Neo4jRelationshipEnum(Enum): REVISION_OF = 'REVISION_OF' USES_DATA = 'USES_DATA' +# Define an enumeration of re-index priority level types. +# N.B. These are the same values maintained in ingest-api app.py _get_reindex_priority(), which +# must be the same levels defined for the enqueue() method at +# https://github.com/x-atlas-consortia/jobq/blob/main/src/atlas_consortia_jobq/queue.py +class ReindexPriorityLevelEnum(Enum): + HIGH = 1 + MEDIUM = 2 + LOW = 3 diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index afca2757..075b5fea 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -9,6 +9,7 @@ # Don't confuse urllib (Python native library) with urllib3 (3rd-party library, requests also uses urllib3) from requests.packages.urllib3.exceptions import InsecureRequestWarning +from werkzeug.datastructures import ImmutableMultiDict # Local modules from schema import schema_errors @@ -18,6 +19,7 @@ from schema.schema_constants import SchemaConstants from schema.schema_constants import MetadataScopeEnum from schema.schema_constants import TriggerTypeEnum +from schema.schema_constants import ReindexPriorityLevelEnum # HuBMAP commons from hubmap_commons.hm_auth import AuthHelper @@ -2392,7 +2394,7 @@ def get_organ_types(): """ See if 'reindex' is a URL parameter passed in with the request and -if it indicates reindexing should be supressed. Default to reindexing in all other cases. +if it indicates reindexing should be suppressed. Default to reindexing in all other cases. Parameters ---------- @@ -2404,6 +2406,9 @@ def get_organ_types(): bool """ def suppress_reindex(request_args) -> bool: + # N.B. This logic should be the same as that used by + # ingest-api app.py _suppress_reindex() + # https://github.com/hubmapconsortium/ingest-api/blob/main/src/app.py if 'reindex' not in request_args: return False reindex_str = request_args.get('reindex').lower() @@ -2414,6 +2419,38 @@ def suppress_reindex(request_args) -> bool: raise Exception(f"The value of the 'reindex' parameter must be True or False (case-insensitive)." f" '{request_args.get('reindex')}' is not recognized.") +""" +See if 'reindex-priority' is a URL parameter passed in with the request, if it is valid, and +if it is compatible with the calculated suppress_reindex() result. Default to 1 when not specified. + +Parameters +---------- +request_args: + The Flask request.args passed in from application request + +calc_suppress_reindex: + The value returned from the suppress_reindex() method, if previously called. +Returns +------- +int value from the enumeration ReindexPriorityLevelEnum +""" +def get_reindex_priority(request_args:ImmutableMultiDict, calc_suppress_reindex:bool) -> int: + # N.B. This logic should be the same as that used by + # ingest-api app.py _get_reindex_priority() + # https://github.com/hubmapconsortium/ingest-api/blob/main/src/app.py + if calc_suppress_reindex and 'reindex-priority' in request_args: + raise Exception("Specifying a re-index priority is incompatible with suppressing re-indexing.") + if 'reindex-priority' not in request_args: + return ReindexPriorityLevelEnum.HIGH.value + try: + priority_int = int(request_args.get('reindex-priority')) + except ValueError as ve: + raise Exception("The value of the 'reindex-priority' parameter must be an integer.") + if priority_int not in ReindexPriorityLevelEnum: + raise Exception(f"The value of the 'reindex-priority' parameter must be" + f" greater than or equal to {ReindexPriorityLevelEnum.HIGH.value} (high priority)" + f" and less than or equal to {ReindexPriorityLevelEnum.LOW.value} (low priority).") + return priority_int #################################################################################################### ## Internal functions