123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 |
- import os
- import json
- import time
- import asyncio
- import warnings
- from dotenv import load_dotenv
- from langchain_community.chat_models import ChatOpenAI
- from langchain import LLMChain, PromptTemplate
- import nest_asyncio
- # Apply the nest_asyncio patch to allow nested event loops
- nest_asyncio.apply()
- # Suppress all warnings
- warnings.filterwarnings("ignore")
- # Load environment variables
- load_dotenv()
- openai_api_key = os.getenv('OPENAI_API_KEY')
- # Step 1: Load the JSON file synchronously
- def load_json_file(file_path):
- with open(file_path, 'r') as json_file:
- data = json.load(json_file)
- print('Loaded data')
- return data
- # Define an asynchronous function to run Langchain on an individual invention
- async def process_invention(featureListExtractionChain, key, value):
- print(f'Generating Feature List for {key}')
- invention_text = value["invention"]
- # Process the invention text with the Langchain chain asynchronously
- result = await featureListExtractionChain.arun({"invention": invention_text})
- # Clean the result to remove code block markers and extra formatting
- cleaned_result = result.replace("```json", "").replace("```", "").strip()
- # Try to parse the cleaned result as JSON
- try:
- parsed_result = json.loads(cleaned_result)
- except json.JSONDecodeError as e:
- print(f"Error parsing result for {key}: {e}")
- parsed_result = cleaned_result # Fallback to raw string if parsing fails
- # Return the key and result for later collection
- return key, {"invention": invention_text, "result": parsed_result}
- # Step 2: Run Langchain prompt on each invention asynchronously
- async def run_langchain_on_inventions(data, model_name='gpt-4o-mini'):
- # Prompt template
- prompt_template = PromptTemplate(
- input_variables=["invention"],
- template = """
- Break down the provided text, which may describe a technical solution, invention claim, or methodology, into distinct and well-defined technical features. Each feature must adhere to the following guidelines:
- 1. **Technical Precision**:
- - Capture the structural, functional, or process-related elements described in the text.
- - For apparatus/technical claims, identify each structural component, its configuration, and its specific role within the system.
- - For methodology claims, outline each step in the exact sequence presented, ensuring that dependencies and technical details are preserved.
- - Each feature should focus on one unique aspect or functionality of the described solution.
- 2. **Completeness**:
- - Write each feature as a complete, standalone sentence that specifies the component, configuration, or function clearly.
- - Avoid vague language or incomplete descriptions. Each feature must include enough context to be meaningful on its own.
- 3. **Clarity and Consistency**:
- - Exclude phrases like "the present invention" or narrative elements that do not contribute directly to the technical details.
- - Focus on unique features and avoid unnecessary repetition.
- 4. **Fallback Instructions**:
- - If the provided text is abstract or lacks distinct technical elements, break it down into general purposes, key objectives, and any identifiable components or methodologies. Each feature should focus on specific technical attributes or intended functions.
- - If the text does not explicitly list components or steps, infer features based on the described purpose or functionality, ensuring each feature is precise and self-contained.
-
-
- ### Example Inputs and Outputs:
- #### Example 1:
- **Input**:
- The solution proposes a hanger bracket (Fishtail/Y-support bracket) for an exhaust system that includes a fin design to give structural rigidity and allows support for a hanger rod. A fin/plate is attached to the rod and then it will engage a slot in the bracket for tolerance control. The hanger bracket (Fishtail/Y-support bracket) is supported on a square tube or on a triangular tube with adjustment slots, which allows adjustment fore/aft, and up/down for tolerance stacks. The hanger bracket can be provided with a chamfer.
- **Output**:
- {{
- "F1": "A hanger bracket (Fishtail/Y-support bracket) includes a fin design to give structural rigidity and allows support for a hanger rod.",
- "F2": "A fin/plate is attached to the rod, then it will engage a slot in the bracket for tolerance control.",
- "F3": "The hanger bracket (Fishtail/Y-support bracket) supports on a square tube or on a triangular tube with adjustment slots, which allows adjustment fore/aft, and up/down for tolerance stacks.",
- "F4": "The hanger bracket includes a chamfer."
- }}
- #### Example 2:
- **Input**:
- A toilet seat and a cover member disposed behind the toilet seat to cover a rear part of a toilet bowl, the cover member comprising: a first standing part formed on the toilet seat side; an inclined part connected to a rear end of the first standing part and extending upward and downward; A second rising portion connected to a rear end of the inclined portion; and an extending portion connected to a rear end of the second rising portion and extending rearward, wherein the inclined portion is formed to be inclined at a steeper inclination than the extending portion.
- **Output**:
- {{
- "F1": "A toilet seat and a cover member disposed behind the toilet seat to cover a rear part of a toilet bowl.",
- "F2": "The cover member comprising a first standing part formed on the toilet seat side.",
- "F3": "An inclined part connected to a rear end of the first standing part and extending upward and downward.",
- "F4": "A second rising portion connected to a rear end of the inclined portion.",
- "F5": "An extending portion connected to a rear end of the second rising portion and extending rearward.",
- "F6": "The inclined portion is formed to be inclined at a steeper inclination than the extending portion."
- }}
- ### Analysis Process:
- 1. Identify distinct components, configurations, or processes described in the text.
- 2. For each unique aspect, create a feature that captures the key technical detail, ensuring it is specific and complete.
- 3. Where variations or optional features are described, include them as separate features (e.g., a chamfer being present or absent).
- 4. If no clear features can be extracted, revert to the fallback approach by breaking down the text line-by-line and using each line as a feature. If the provided text is abstract or lacks distinct technical elements, break it down into general purposes, key objectives, and any identifiable components or methodologies.
- Input Text: {invention}
- Strictly follow the JSON Output Structure below, No extra content
- Output:
- {{
- "F1": "First technical feature...",
- "F2": "Second technical feature...",
- ...
- }}
- """
- )
-
- # Initialize the Langchain LLM with the desired model
- featureListExtractionChain = LLMChain(
- llm=ChatOpenAI(model=model_name),
- prompt=prompt_template
- )
- # Create a list to hold the asynchronous tasks
- tasks = []
- # Create tasks for each invention
- for key, value in data.items():
- tasks.append(process_invention(featureListExtractionChain, key, value))
- # Run all the tasks concurrently
- results = await asyncio.gather(*tasks)
- # Convert results into a dictionary
- results_dict = {key: value for key, value in results}
- return results_dict
- # Step 3: Save the results to a new JSON file synchronously
- def save_results_to_json(results, output_file):
- with open(output_file, 'w') as outfile:
- json.dump(results, outfile, indent=4)
- # Main function to tie everything together
- def main(input_file_path, output_file_path):
- # Start timing
- start_time = time.time()
- # Step 1: Load the JSON file
- data = load_json_file(input_file_path)
- if data is None:
- print("Error: Data not loaded.")
- return
- # Step 2: Process the inventions asynchronously using asyncio.run()
- processed_results = asyncio.run(run_langchain_on_inventions(data))
- # Step 3: Save the processed results to a new JSON file
- save_results_to_json(processed_results, output_file_path)
- # End timing
- end_time = time.time()
- # Calculate and print the total execution time
- execution_time = end_time - start_time
- print(f"Script executed in: {execution_time:.2f} seconds")
- # Run the script as a standalone program
- if __name__ == "__main__":
- input_file = 'FTO_inventions.json' # Set the path to your input JSON file
- output_file = 'FTO_GPT_FeatureList3.json' # Set the path to your output JSON file
- main(input_file, output_file)
|