FeatureListAsync.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. import os
  2. import json
  3. import time
  4. import asyncio
  5. import warnings
  6. from dotenv import load_dotenv
  7. from langchain_community.chat_models import ChatOpenAI
  8. from langchain import LLMChain, PromptTemplate
  9. import nest_asyncio
  10. # Apply the nest_asyncio patch to allow nested event loops
  11. nest_asyncio.apply()
  12. # Suppress all warnings
  13. warnings.filterwarnings("ignore")
  14. # Load environment variables
  15. load_dotenv()
  16. openai_api_key = os.getenv('OPENAI_API_KEY')
  17. # Step 1: Load the JSON file synchronously
  18. def load_json_file(file_path):
  19. with open(file_path, 'r') as json_file:
  20. data = json.load(json_file)
  21. print('Loaded data')
  22. return data
  23. # Define an asynchronous function to run Langchain on an individual invention
  24. async def process_invention(featureListExtractionChain, key, value):
  25. print(f'Generating Feature List for {key}')
  26. invention_text = value["invention"]
  27. # Process the invention text with the Langchain chain asynchronously
  28. result = await featureListExtractionChain.arun({"invention": invention_text})
  29. # Clean the result to remove code block markers and extra formatting
  30. cleaned_result = result.replace("```json", "").replace("```", "").strip()
  31. # Try to parse the cleaned result as JSON
  32. try:
  33. parsed_result = json.loads(cleaned_result)
  34. except json.JSONDecodeError as e:
  35. print(f"Error parsing result for {key}: {e}")
  36. parsed_result = cleaned_result # Fallback to raw string if parsing fails
  37. # Return the key and result for later collection
  38. return key, {"invention": invention_text, "result": parsed_result}
  39. # Step 2: Run Langchain prompt on each invention asynchronously
  40. async def run_langchain_on_inventions(data, model_name='gpt-4o-mini'):
  41. # Prompt template
  42. prompt_template = PromptTemplate(
  43. input_variables=["invention"],
  44. template = """
  45. Break down the provided text, which may describe a technical solution, invention claim, or methodology, into distinct and well-defined technical features. Each feature must adhere to the following guidelines:
  46. 1. **Technical Precision**:
  47. - Capture the structural, functional, or process-related elements described in the text.
  48. - Each feature should focus on one unique aspect or functionality of the described solution.
  49. 2. **Completeness**:
  50. - Write each feature as a complete, standalone sentence that specifies the component, configuration, or function clearly.
  51. - Avoid vague language or incomplete descriptions. Each feature must include enough context to be meaningful on its own.
  52. 3. **Clarity and Consistency**:
  53. - Exclude phrases like "the present invention" or narrative elements that do not contribute directly to the technical details.
  54. - Focus on unique features and avoid unnecessary repetition.
  55. 4. **Fallback Instructions**:
  56. - If the provided text is abstract or lacks distinct technical elements, break it down into general purposes, key objectives, and any identifiable components or methodologies. Each feature should focus on specific technical attributes or intended functions.
  57. - If the text does not explicitly list components or steps, infer features based on the described purpose or functionality, ensuring each feature is precise and self-contained.
  58. ### Example Inputs and Outputs:
  59. #### Example 1:
  60. **Input**:
  61. The solution proposes a hanger bracket (Fishtail/Y-support bracket) for an exhaust system that includes a fin design to give structural rigidity and allows support for a hanger rod. A fin/plate is attached to the rod and then it will engage a slot in the bracket for tolerance control. The hanger bracket (Fishtail/Y-support bracket) is supported on a square tube or on a triangular tube with adjustment slots, which allows adjustment fore/aft, and up/down for tolerance stacks. The hanger bracket can be provided with a chamfer.
  62. **Output**:
  63. {{
  64. "F1": "A hanger bracket (Fishtail/Y-support bracket) includes a fin design to give structural rigidity and allows support for a hanger rod.",
  65. "F2": "A fin/plate is attached to the rod, then it will engage a slot in the bracket for tolerance control.",
  66. "F3": "The hanger bracket (Fishtail/Y-support bracket) supports on a square tube or on a triangular tube with adjustment slots, which allows adjustment fore/aft, and up/down for tolerance stacks.",
  67. "F4": "The hanger bracket includes a chamfer."
  68. }}
  69. #### Example 2:
  70. **Input**:
  71. A toilet seat and a cover member disposed behind the toilet seat to cover a rear part of a toilet bowl, the cover member comprising: a first standing part formed on the toilet seat side; an inclined part connected to a rear end of the first standing part and extending upward and downward; A second rising portion connected to a rear end of the inclined portion; and an extending portion connected to a rear end of the second rising portion and extending rearward, wherein the inclined portion is formed to be inclined at a steeper inclination than the extending portion.
  72. **Output**:
  73. {{
  74. "F1": "A toilet seat and a cover member disposed behind the toilet seat to cover a rear part of a toilet bowl.",
  75. "F2": "The cover member comprising a first standing part formed on the toilet seat side.",
  76. "F3": "An inclined part connected to a rear end of the first standing part and extending upward and downward.",
  77. "F4": "A second rising portion connected to a rear end of the inclined portion.",
  78. "F5": "An extending portion connected to a rear end of the second rising portion and extending rearward.",
  79. "F6": "The inclined portion is formed to be inclined at a steeper inclination than the extending portion."
  80. }}
  81. ### Analysis Process:
  82. 1. Identify distinct components, configurations, or processes described in the text.
  83. 2. For each unique aspect, create a feature that captures the key technical detail, ensuring it is specific and complete.
  84. 3. Where variations or optional features are described, include them as separate features (e.g., a chamfer being present or absent).
  85. 4. If no clear features can be extracted, revert to the fallback approach by breaking down the text line-by-line and using each line as a feature. If the provided text is abstract or lacks distinct technical elements, break it down into general purposes, key objectives, and any identifiable components or methodologies.
  86. Input Text: {invention}
  87. Strictly follow the JSON Output Structure below, No extra content
  88. Output:
  89. {{
  90. "F1": "First technical feature...",
  91. "F2": "Second technical feature...",
  92. ...
  93. }}
  94. """
  95. )
  96. # Initialize the Langchain LLM with the desired model
  97. featureListExtractionChain = LLMChain(
  98. llm=ChatOpenAI(model=model_name),
  99. prompt=prompt_template
  100. )
  101. # Create a list to hold the asynchronous tasks
  102. tasks = []
  103. # Create tasks for each invention
  104. for key, value in data.items():
  105. tasks.append(process_invention(featureListExtractionChain, key, value))
  106. # Run all the tasks concurrently
  107. results = await asyncio.gather(*tasks)
  108. # Convert results into a dictionary
  109. results_dict = {key: value for key, value in results}
  110. return results_dict
  111. # Step 3: Save the results to a new JSON file synchronously
  112. def save_results_to_json(results, output_file):
  113. with open(output_file, 'w') as outfile:
  114. json.dump(results, outfile, indent=4)
  115. # Main function to tie everything together
  116. def main(input_file_path, output_file_path):
  117. # Start timing
  118. start_time = time.time()
  119. # Step 1: Load the JSON file
  120. data = load_json_file(input_file_path)
  121. if data is None:
  122. print("Error: Data not loaded.")
  123. return
  124. # Step 2: Process the inventions asynchronously using asyncio.run()
  125. processed_results = asyncio.run(run_langchain_on_inventions(data))
  126. # Step 3: Save the processed results to a new JSON file
  127. save_results_to_json(processed_results, output_file_path)
  128. # End timing
  129. end_time = time.time()
  130. # Calculate and print the total execution time
  131. execution_time = end_time - start_time
  132. print(f"Script executed in: {execution_time:.2f} seconds")
  133. # Run the script as a standalone program
  134. if __name__ == "__main__":
  135. input_file = 'FTO_inventions.json' # Set the path to your input JSON file
  136. output_file = 'FTO_GPT_FeatureList3.json' # Set the path to your output JSON file
  137. main(input_file, output_file)