In [ ]:
 

this notebook helps load the data to the mongodb database¶

In [1]:
import pymongo
import json
import stk
In [5]:
# this is to load the database to learn from into the mongodb database
# this is done only once and it can take a while to load all the data back to your database
def add_files_to_mongo_db(client,database_name,collection_name,data_base_folder):
    """This function takes a json file and add it to the mongo database.
    
    Parameters
    ----------
    calculator : object
        object of the class Precursor_calculation.CalculatePrecursor()
    collection_name : str
        name of the collection to add, this needs to match the name of the json file in the data_base_folder
    data_base_folder : str
        path to the folder where the json file is located
    """  # noqa: D401
    import json
    from pymongo import MongoClient

    client = MongoClient(client)
    db = client[database_name]
    collection = db[collection_name]
    json_file = data_base_folder + collection_name + ".json"
    with open(json_file) as f:
        file_data = json.load(f)
    for file in file_data:
        file.pop("_id", None)
        collection.update_many(
                filter={"InChIKey": file["InChIKey"]},
                update={"$set": file},
                upsert=True,
            )
    client.close()
In [6]:
# add the fragments to the local database
client = "mongodb://localhost:27017/"
data_base_folder = "data_example/fragment/files_for_database/"
data_base_name = "stk_precursor"
client = "mongodb://localhost:27017/"
add_files_to_mongo_db(client,data_base_name,'molecules',data_base_folder)
add_files_to_mongo_db(client,data_base_name,'building_block_position_matrices',data_base_folder)
#add_files_to_mongo_db(client,data_base_name,"position_matrices",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"Precursors_IPEA",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"Precursors_opt",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"Precursors_Stda",data_base_folder)
In [4]:
# add the constructed molecules to the local database  
data_base_folder = "/media/mohammed/Work/Work/data_precurosr_Jelfs1/stk_constructed/"
data_base_name = "stk_constructed"
client = "mongodb://localhost:27017/"

add_files_to_mongo_db(client,data_base_name,"building_block_position_matrices",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"constructed_molecules",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"molecules",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"position_matrices",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"BO_exp1_IPEA",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"BO_exp1_opt",data_base_folder)
add_files_to_mongo_db(client,data_base_name,"BO_exp1_Stda",data_base_folder)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[4], line 7
      4 client = "mongodb://localhost:27017/"
      6 add_files_to_mongo_db(client,data_base_name,"building_block_position_matrices",data_base_folder)
----> 7 add_files_to_mongo_db(client,data_base_name,"constructed_molecules",data_base_folder)
      8 add_files_to_mongo_db(client,data_base_name,"molecules",data_base_folder)
      9 add_files_to_mongo_db(client,data_base_name,"position_matrices",data_base_folder)

Cell In[2], line 26, in add_files_to_mongo_db(client, database_name, collection_name, data_base_folder)
     24 for file in file_data:
     25     file.pop("_id", None)
---> 26     collection.update_many(
     27             filter={"InChIKey": file["InChIKey"]},
     28             update={"$set": file},
     29             upsert=True,
     30         )
     31 client.close()

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/collection.py:1177, in Collection.update_many(self, filter, update, upsert, array_filters, bypass_document_validation, collation, hint, session, let, comment)
   1173 common.validate_list_or_none("array_filters", array_filters)
   1175 write_concern = self._write_concern_for(session)
   1176 return UpdateResult(
-> 1177     self._update_retryable(
   1178         filter,
   1179         update,
   1180         _Op.UPDATE,
   1181         upsert,
   1182         multi=True,
   1183         write_concern=write_concern,
   1184         bypass_doc_val=bypass_document_validation,
   1185         collation=collation,
   1186         array_filters=array_filters,
   1187         hint=hint,
   1188         session=session,
   1189         let=let,
   1190         comment=comment,
   1191     ),
   1192     write_concern.acknowledged,
   1193 )

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/collection.py:872, in Collection._update_retryable(self, criteria, document, operation, upsert, multi, write_concern, op_id, ordered, bypass_doc_val, collation, array_filters, hint, session, let, comment)
    850 def _update(
    851     session: Optional[ClientSession], conn: Connection, retryable_write: bool
    852 ) -> Optional[Mapping[str, Any]]:
    853     return self._update(
    854         conn,
    855         criteria,
   (...)
    869         comment=comment,
    870     )
--> 872 return self.__database.client._retryable_write(
    873     (write_concern or self.write_concern).acknowledged and not multi,
    874     _update,
    875     session,
    876     operation,
    877 )

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/mongo_client.py:1575, in MongoClient._retryable_write(self, retryable, func, session, operation, bulk, operation_id)
   1561 """Execute an operation with consecutive retries if possible
   1562 
   1563 Returns func()'s return value on success. On error retries the same
   (...)
   1572 :param bulk: bulk abstraction to execute operations in bulk, defaults to None
   1573 """
   1574 with self._tmp_session(session) as s:
-> 1575     return self._retry_with_session(retryable, func, s, bulk, operation, operation_id)

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/mongo_client.py:1461, in MongoClient._retry_with_session(self, retryable, func, session, bulk, operation, operation_id)
   1456 # Ensure that the options supports retry_writes and there is a valid session not in
   1457 # transaction, otherwise, we will not support retry behavior for this txn.
   1458 retryable = bool(
   1459     retryable and self.options.retry_writes and session and not session.in_transaction
   1460 )
-> 1461 return self._retry_internal(
   1462     func=func,
   1463     session=session,
   1464     bulk=bulk,
   1465     operation=operation,
   1466     retryable=retryable,
   1467     operation_id=operation_id,
   1468 )

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/_csot.py:108, in apply.<locals>.csot_wrapper(self, *args, **kwargs)
    106         with _TimeoutContext(timeout):
    107             return func(self, *args, **kwargs)
--> 108 return func(self, *args, **kwargs)

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/mongo_client.py:1496, in MongoClient._retry_internal(self, func, session, bulk, operation, is_read, address, read_pref, retryable, operation_id)
   1470 @_csot.apply
   1471 def _retry_internal(
   1472     self,
   (...)
   1481     operation_id: Optional[int] = None,
   1482 ) -> T:
   1483     """Internal retryable helper for all client transactions.
   1484 
   1485     :param func: Callback function we want to retry
   (...)
   1494     :return: Output of the calling func()
   1495     """
-> 1496     return _ClientConnectionRetryable(
   1497         mongo_client=self,
   1498         func=func,
   1499         bulk=bulk,
   1500         operation=operation,
   1501         is_read=is_read,
   1502         session=session,
   1503         read_pref=read_pref,
   1504         address=address,
   1505         retryable=retryable,
   1506         operation_id=operation_id,
   1507     ).run()

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/mongo_client.py:2353, in _ClientConnectionRetryable.run(self)
   2351 self._check_last_error(check_csot=True)
   2352 try:
-> 2353     return self._read() if self._is_read else self._write()
   2354 except ServerSelectionTimeoutError:
   2355     # The application may think the write was never attempted
   2356     # if we raise ServerSelectionTimeoutError on the retry
   2357     # attempt. Raise the original exception instead.
   2358     self._check_last_error()

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/mongo_client.py:2470, in _ClientConnectionRetryable._write(self)
   2468             self._check_last_error()
   2469             self._retryable = False
-> 2470         return self._func(self._session, conn, self._retryable)  # type: ignore
   2471 except PyMongoError as exc:
   2472     if not self._retryable:

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/collection.py:853, in Collection._update_retryable.<locals>._update(session, conn, retryable_write)
    850 def _update(
    851     session: Optional[ClientSession], conn: Connection, retryable_write: bool
    852 ) -> Optional[Mapping[str, Any]]:
--> 853     return self._update(
    854         conn,
    855         criteria,
    856         document,
    857         upsert=upsert,
    858         multi=multi,
    859         write_concern=write_concern,
    860         op_id=op_id,
    861         ordered=ordered,
    862         bypass_doc_val=bypass_doc_val,
    863         collation=collation,
    864         array_filters=array_filters,
    865         hint=hint,
    866         session=session,
    867         retryable_write=retryable_write,
    868         let=let,
    869         comment=comment,
    870     )

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/collection.py:806, in Collection._update(self, conn, criteria, document, upsert, multi, write_concern, op_id, ordered, bypass_doc_val, collation, array_filters, hint, session, retryable_write, let, comment)
    802     command["bypassDocumentValidation"] = True
    804 # The command result has to be published for APM unmodified
    805 # so we make a shallow copy here before adding updatedExisting.
--> 806 result = conn.command(
    807     self.__database.name,
    808     command,
    809     write_concern=write_concern,
    810     codec_options=self.__write_response_codec_options,
    811     session=session,
    812     client=self.__database.client,
    813     retryable_write=retryable_write,
    814 ).copy()
    815 _check_write_command_response(result)
    816 # Add the updatedExisting field for compatibility.

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/helpers.py:342, in _handle_reauth.<locals>.inner(*args, **kwargs)
    339 from pymongo.pool import Connection
    341 try:
--> 342     return func(*args, **kwargs)
    343 except OperationFailure as exc:
    344     if no_reauth:

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/pool.py:1017, in Connection.command(self, dbname, spec, read_preference, codec_options, check, allowable_errors, read_concern, write_concern, parse_write_concern_error, collation, session, client, retryable_write, publish_events, user_fields, exhaust_allowed)
   1015 # Catch socket.error, KeyboardInterrupt, etc. and close ourselves.
   1016 except BaseException as error:
-> 1017     self._raise_connection_failure(error)

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/pool.py:989, in Connection.command(self, dbname, spec, read_preference, codec_options, check, allowable_errors, read_concern, write_concern, parse_write_concern_error, collation, session, client, retryable_write, publish_events, user_fields, exhaust_allowed)
    987     self._raise_if_not_writable(unacknowledged)
    988 try:
--> 989     return command(
    990         self,
    991         dbname,
    992         spec,
    993         self.is_mongos,
    994         read_preference,
    995         codec_options,
    996         session,
    997         client,
    998         check,
    999         allowable_errors,
   1000         self.address,
   1001         listeners,
   1002         self.max_bson_size,
   1003         read_concern,
   1004         parse_write_concern_error=parse_write_concern_error,
   1005         collation=collation,
   1006         compression_ctx=self.compression_context,
   1007         use_op_msg=self.op_msg_enabled,
   1008         unacknowledged=unacknowledged,
   1009         user_fields=user_fields,
   1010         exhaust_allowed=exhaust_allowed,
   1011         write_concern=write_concern,
   1012     )
   1013 except (OperationFailure, NotPrimaryError):
   1014     raise

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/network.py:202, in command(conn, dbname, spec, is_mongos, read_preference, codec_options, session, client, check, allowable_errors, address, listeners, max_bson_size, read_concern, parse_write_concern_error, collation, compression_ctx, use_op_msg, unacknowledged, user_fields, exhaust_allowed, write_concern)
    200     response_doc: _DocumentOut = {"ok": 1}
    201 else:
--> 202     reply = receive_message(conn, request_id)
    203     conn.more_to_come = reply.more_to_come
    204     unpacked_docs = reply.unpack_response(
    205         codec_options=codec_options, user_fields=user_fields
    206     )

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/network.py:317, in receive_message(conn, request_id, max_message_size)
    315         deadline = None
    316 # Ignore the response's request id.
--> 317 length, _, response_to, op_code = _UNPACK_HEADER(_receive_data_on_socket(conn, 16, deadline))
    318 # No request_id for exhaust cursor "getMore".
    319 if request_id is not None:

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/network.py:394, in _receive_data_on_socket(conn, length, deadline)
    392 while bytes_read < length:
    393     try:
--> 394         wait_for_read(conn, deadline)
    395         # CSOT: Update timeout. When the timeout has expired perform one
    396         # final non-blocking recv. This helps avoid spurious timeouts when
    397         # the response is actually already buffered on the client.
    398         if _csot.get_timeout() and deadline is not None:

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/network.py:375, in wait_for_read(conn, deadline)
    373     else:
    374         timeout = _POLL_TIMEOUT
--> 375     readable = conn.socket_checker.select(sock, read=True, timeout=timeout)
    376 if conn.cancel_context.cancelled:
    377     raise _OperationCancelled("operation cancelled")

File /media/mohammed/Work/anaconda3/envs/stk_search/lib/python3.8/site-packages/pymongo/socket_checker.py:56, in SocketChecker.select(self, sock, read, write, timeout)
     54 try:
     55     if self._poller:
---> 56         mask = select.POLLERR | select.POLLHUP
     57         if read:
     58             mask = mask | select.POLLIN | select.POLLPRI

KeyboardInterrupt: 
In [ ]: