ayush
/
FeatureTests


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
							import os
import json
import time
import asyncio
import warnings
from dotenv import load_dotenv
from langchain_community.chat_models import ChatOpenAI
from langchain import LLMChain, PromptTemplate
import nest_asyncio

# Apply the nest_asyncio patch to allow nested event loops
nest_asyncio.apply()

# Suppress all warnings
warnings.filterwarnings("ignore")

# Load environment variables
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')

# Step 1: Load the JSON file synchronously
def load_json_file(file_path):
    with open(file_path, 'r') as json_file:
        data = json.load(json_file)
        print('Loaded data')
        return data

# Define an asynchronous function to run Langchain on an individual invention
async def process_invention(featureListExtractionChain, key, value):
    print(f'Generating Feature List for {key}')
    invention_text = value["invention"]
    # Process the invention text with the Langchain chain asynchronously
    result = await featureListExtractionChain.arun({"invention": invention_text})

    # Clean the result to remove code block markers and extra formatting
    cleaned_result = result.replace("```json", "").replace("```", "").strip()

    # Try to parse the cleaned result as JSON
    try:
        parsed_result = json.loads(cleaned_result)
    except json.JSONDecodeError as e:
        print(f"Error parsing result for {key}: {e}")
        parsed_result = cleaned_result  # Fallback to raw string if parsing fails

    # Return the key and result for later collection
    return key, {"invention": invention_text, "result": parsed_result}

# Step 2: Run Langchain prompt on each invention asynchronously
async def run_langchain_on_inventions(data, model_name='gpt-4o-mini'):
    # Prompt template
    prompt_template = PromptTemplate(
    input_variables=["invention"],
    template = """
    Break down the provided text, which may describe a technical solution, invention claim, or methodology, into distinct and well-defined technical features. Each feature must adhere to the following guidelines:

    1. **Technical Precision**:
    - Capture the structural, functional, or process-related elements described in the text.
    - For apparatus/technical claims, identify each structural component, its configuration, and its specific role within the system.
    - For methodology claims, outline each step in the exact sequence presented, ensuring that dependencies and technical details are preserved.
    - Each feature should focus on one unique aspect or functionality of the described solution.


    2. **Completeness**:
    - Write each feature as a complete, standalone sentence that specifies the component, configuration, or function clearly.
    - Avoid vague language or incomplete descriptions. Each feature must include enough context to be meaningful on its own.

    3. **Clarity and Consistency**:
    - Exclude phrases like "the present invention" or narrative elements that do not contribute directly to the technical details.
    - Focus on unique features and avoid unnecessary repetition.

    4. **Fallback Instructions**:
    - If the provided text is abstract or lacks distinct technical elements, break it down into general purposes, key objectives, and any identifiable components or methodologies. Each feature should focus on specific technical attributes or intended functions.
    - If the text does not explicitly list components or steps, infer features based on the described purpose or functionality, ensuring each feature is precise and self-contained.
    
    
    ### Example Inputs and Outputs:

    #### Example 1:
    **Input**:
    The solution proposes a hanger bracket (Fishtail/Y-support bracket) for an exhaust system that includes a fin design to give structural rigidity and allows support for a hanger rod. A fin/plate is attached to the rod and then it will engage a slot in the bracket for tolerance control. The hanger bracket (Fishtail/Y-support bracket) is supported on a square tube or on a triangular tube with adjustment slots, which allows adjustment fore/aft, and up/down for tolerance stacks. The hanger bracket can be provided with a chamfer.

    **Output**:
    {{
        "F1": "A hanger bracket (Fishtail/Y-support bracket) includes a fin design to give structural rigidity and allows support for a hanger rod.",
        "F2": "A fin/plate is attached to the rod, then it will engage a slot in the bracket for tolerance control.",
        "F3": "The hanger bracket (Fishtail/Y-support bracket) supports on a square tube or on a triangular tube with adjustment slots, which allows adjustment fore/aft, and up/down for tolerance stacks.",
        "F4": "The hanger bracket includes a chamfer."
    }}

    #### Example 2:
    **Input**:
    A toilet seat and a cover member disposed behind the toilet seat to cover a rear part of a toilet bowl, the cover member comprising: a first standing part formed on the toilet seat side; an inclined part connected to a rear end of the first standing part and extending upward and downward; A second rising portion connected to a rear end of the inclined portion; and an extending portion connected to a rear end of the second rising portion and extending rearward, wherein the inclined portion is formed to be inclined at a steeper inclination than the extending portion.

    **Output**:
    {{
        "F1": "A toilet seat and a cover member disposed behind the toilet seat to cover a rear part of a toilet bowl.",
        "F2": "The cover member comprising a first standing part formed on the toilet seat side.",
        "F3": "An inclined part connected to a rear end of the first standing part and extending upward and downward.",
        "F4": "A second rising portion connected to a rear end of the inclined portion.",
        "F5": "An extending portion connected to a rear end of the second rising portion and extending rearward.",
        "F6": "The inclined portion is formed to be inclined at a steeper inclination than the extending portion."
    }}

    ### Analysis Process:
    1. Identify distinct components, configurations, or processes described in the text.
    2. For each unique aspect, create a feature that captures the key technical detail, ensuring it is specific and complete.
    3. Where variations or optional features are described, include them as separate features (e.g., a chamfer being present or absent).
    4. If no clear features can be extracted, revert to the fallback approach by breaking down the text line-by-line and using each line as a feature. If the provided text is abstract or lacks distinct technical elements, break it down into general purposes, key objectives, and any identifiable components or methodologies.

    Input Text: {invention}

    Strictly follow the JSON Output Structure below, No extra content


    Output:
    {{
        "F1": "First technical feature...",
        "F2": "Second technical feature...",
        ...
    }}
    """


    )
    
    # Initialize the Langchain LLM with the desired model
    featureListExtractionChain = LLMChain(
        llm=ChatOpenAI(model=model_name),
        prompt=prompt_template
    )

    # Create a list to hold the asynchronous tasks
    tasks = []

    # Create tasks for each invention
    for key, value in data.items():
        tasks.append(process_invention(featureListExtractionChain, key, value))

    # Run all the tasks concurrently
    results = await asyncio.gather(*tasks)

    # Convert results into a dictionary
    results_dict = {key: value for key, value in results}
    return results_dict

# Step 3: Save the results to a new JSON file synchronously
def save_results_to_json(results, output_file):
    with open(output_file, 'w') as outfile:
        json.dump(results, outfile, indent=4)

# Main function to tie everything together
def main(input_file_path, output_file_path):
    # Start timing
    start_time = time.time()

    # Step 1: Load the JSON file
    data = load_json_file(input_file_path)

    if data is None:
        print("Error: Data not loaded.")
        return

    # Step 2: Process the inventions asynchronously using asyncio.run()
    processed_results = asyncio.run(run_langchain_on_inventions(data))

    # Step 3: Save the processed results to a new JSON file
    save_results_to_json(processed_results, output_file_path)

    # End timing
    end_time = time.time()

    # Calculate and print the total execution time
    execution_time = end_time - start_time
    print(f"Script executed in: {execution_time:.2f} seconds")

# Run the script as a standalone program
if __name__ == "__main__":
    input_file = 'FTO_inventions.json'  # Set the path to your input JSON file
    output_file = 'FTO_GPT_FeatureList3.json'  # Set the path to your output JSON file
    main(input_file, output_file)