Predicting polymerization reactions via transfer learning using chemical language models


JSON Export

{
  "id": "1780", 
  "updated": "2024-02-29T15:30:05.287721+00:00", 
  "metadata": {
    "version": 1, 
    "contributors": [
      {
        "givennames": "Brenda", 
        "affiliations": [
          "IBM Research Brazil - Avenida Rep\u00fablica do Chile, 330 - 11o. e 12. andares Rio De Janeiro, RJ 20031-170, Brazil"
        ], 
        "email": "bferrari@ibm.com", 
        "familyname": "S. Ferrari"
      }, 
      {
        "givennames": "Matteo", 
        "affiliations": [
          "IBM Research Europe - S\u00e4umerstrasse 4, 8803 R\u00fcschlikon, Switzerland"
        ], 
        "email": "TTE@zurich.ibm.com", 
        "familyname": "Manica"
      }, 
      {
        "givennames": "Ronaldo", 
        "affiliations": [
          "IBM Research Brazil - Avenida Rep\u00fablica do Chile, 330 - 11o. e 12. andares Rio De Janeiro, RJ 20031-170, Brazil"
        ], 
        "email": "rgiro@br.ibm.com", 
        "familyname": "Giro"
      }, 
      {
        "givennames": "Teodoro", 
        "affiliations": [
          "IBM Research Europe - S\u00e4umerstrasse 4, 8803 R\u00fcschlikon, Switzerland", 
          "National Center for Competence in Research-Catalysis (NCCR-Catalysis), Switzerland"
        ], 
        "email": "TEO@zurich.ibm.com", 
        "familyname": "Laino"
      }, 
      {
        "givennames": "Mathias", 
        "affiliations": [
          "IBM Research Brazil - Avenida Rep\u00fablica do Chile, 330 - 11o. e 12. andares Rio De Janeiro, RJ 20031-170, Brazil"
        ], 
        "email": "mathiast@br.ibm.com", 
        "familyname": "B. Steiner"
      }
    ], 
    "title": "Predicting polymerization reactions via transfer learning using chemical language models", 
    "_oai": {
      "id": "oai:materialscloud.org:1780"
    }, 
    "keywords": [
      "polymerization reaction", 
      "machine learning", 
      "homopolymers", 
      "co-polymers", 
      "reactants", 
      "reagents (solvents, catalysts)", 
      "products"
    ], 
    "publication_date": "Sep 06, 2023, 09:31:05", 
    "_files": [
      {
        "key": "hta_dataset_all_combinations.csv", 
        "description": "polymerization reactions with head and tail atoms assigned by a Python script based on polymerization mechanisms and nucleophilic index from quantum chemistry atom population of Highest Occupied Molecular Orbital", 
        "checksum": "md5:e98e53c67b90a7e2ec049a93a17dd78f", 
        "size": 2153792
      }, 
      {
        "key": "m2p_dataset_all_combinations.csv", 
        "description": "polymerization reactions with head and tail atoms assigned by a modified version of a Python tool called Monomers to Polymers (M2P)", 
        "checksum": "md5:def01c2f8f848ab7ad329372cb37898c", 
        "size": 1589715
      }, 
      {
        "key": "trained_models.zip", 
        "description": "zip file containing the Machine Learning training models (forward and retrosynthesis) where the config files are those with yml extensions and the model weights are those with pt extensions (pickle binary - can be read via pytorch)", 
        "checksum": "md5:f43e79c28fec01b7bf665b853b6359a3", 
        "size": 4120577643
      }
    ], 
    "references": [], 
    "description": "Polymers are candidate materials for a wide range of sustainability applications such as carbon capture and energy storage. However, computational polymer discovery lacks automated analysis of reaction pathways and stability assessment through retro-synthesis. Here, we report the first extension of transformer-based language models to polymerization reactions for both forward and retrosynthesis tasks. We curated a polymerization dataset for vinyl polymers covering reactions and retrosynthesis for representative homo-polymers and co-polymers. Overall, we report a forward model accuracy of 80% and a backward model accuracy of 60%. We further analyse the model performance on a set of case studies by providing polymerization and retro-synthesis examples and evaluating the model\u2019s predictions quality from a materials science perspective.", 
    "status": "published", 
    "license": "MIT License", 
    "conceptrecid": "1779", 
    "is_last": false, 
    "mcid": "2023.137", 
    "edited_by": 98, 
    "id": "1780", 
    "owner": 726, 
    "license_addendum": "CDLA-Permissive-2.0", 
    "doi": "10.24435/materialscloud:zw-be"
  }, 
  "revision": 8, 
  "created": "2023-05-30T12:47:22.223477+00:00"
}