Skip to content

Commit aef3489

Browse files
feat: Include citations in response when using prompt flow (#1089)
Co-authored-by: Ross Smith <ross-p-smith@users.noreply.github.com>
1 parent ab670cb commit aef3489

File tree

9 files changed

+53
-105
lines changed

9 files changed

+53
-105
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ python-test: ## 🧪 Run Python unit + functional tests
3636

3737
unittest: ## 🧪 Run the unit tests
3838
@echo -e "\e[34m$@\e[0m" || true
39-
@poetry run pytest -m "not azure and not functional" $(optional_args)
39+
@poetry run pytest -vvv -m "not azure and not functional" $(optional_args)
4040

4141
unittest-frontend: build-frontend ## 🧪 Unit test the Frontend webapp
4242
@echo -e "\e[34m$@\e[0m" || true

code/backend/batch/utilities/orchestrator/prompt_flow.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from .orchestrator_base import OrchestratorBase
77
from ..common.answer import Answer
8+
from ..common.source_document import SourceDocument
89
from ..helpers.llm_helper import LLMHelper
910
from ..helpers.env_helper import EnvHelper
1011

@@ -50,7 +51,13 @@ async def orchestrate(
5051
raise RuntimeError(f"The request failed: {error}") from error
5152

5253
# Transform response into answer for further processing
53-
answer = Answer(question=user_message, answer=result["chat_output"])
54+
answer = Answer(
55+
question=user_message,
56+
answer=result["chat_output"],
57+
source_documents=self.transform_citations_into_source_documents(
58+
result["citations"]
59+
),
60+
)
5461

5562
# Call Content Safety tool on answer
5663
if self.config.prompts.enable_content_safety:
@@ -91,3 +98,18 @@ def transform_data_into_file(self, user_message, chat_history):
9198
with tempfile.NamedTemporaryFile(delete=False) as file:
9299
file.write(body)
93100
return file.name
101+
102+
def transform_citations_into_source_documents(self, citations):
103+
source_documents = []
104+
105+
for _, doc_id in enumerate(citations):
106+
citation = citations[doc_id]
107+
source_documents.append(
108+
SourceDocument(
109+
id=doc_id,
110+
content=citation.get("content"),
111+
source=citation.get("filepath"),
112+
chunk_id=str(citation.get("chunk_id", 0)),
113+
)
114+
)
115+
return source_documents

code/tests/utilities/orchestrator/test_prompt_flow.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,30 @@ async def test_orchestrate_returns_expected_chat_response(
9595
expected_result = [
9696
{
9797
"role": "tool",
98-
"content": '{"citations": [], "intent": "question"}',
98+
"content": '{"citations": [{"content": "[None](some-filepath)\\n\\n\\nsome-content", "id": "[doc1]", "chunk_id": "1", "title": null, "filepath": "some-filepath", "url": "[None](some-filepath)", "metadata": {"offset": null, "source": "some-filepath", "markdown_url": "[None](some-filepath)", "title": null, "original_url": "some-filepath", "chunk": null, "key": "[doc1]", "filename": "some-filepath"}}, {"content": "[None](some-other-filepath)\\n\\n\\nsome-other-content", "id": "[doc2]", "chunk_id": "2", "title": null, "filepath": "some-other-filepath", "url": "[None](some-other-filepath)", "metadata": {"offset": null, "source": "some-other-filepath", "markdown_url": "[None](some-other-filepath)", "title": null, "original_url": "some-other-filepath", "chunk": null, "key": "[doc2]", "filename": "some-other-filepath"}}], "intent": "question"}',
9999
"end_turn": False,
100100
},
101101
{
102102
"role": "assistant",
103-
"content": "answer",
103+
"content": "answer[doc1][doc2]",
104104
"end_turn": True,
105105
},
106106
]
107-
chat_output = {"chat_output": "answer", "citations": ["", []]}
107+
chat_output = {
108+
"chat_output": "answer[doc1][doc2]",
109+
"citations": {
110+
"[doc1]": {
111+
"content": "some-content",
112+
"filepath": "some-filepath",
113+
"chunk_id": 1,
114+
},
115+
"[doc2]": {
116+
"content": "some-other-content",
117+
"filepath": "some-other-filepath",
118+
"chunk_id": 2,
119+
},
120+
},
121+
}
108122

109123
orchestrator.transform_chat_history = MagicMock(return_value=[])
110124
orchestrator.ml_client.online_endpoints.invoke = AsyncMock(return_value=chat_output)
@@ -142,7 +156,7 @@ async def test_orchestrate_returns_content_safety_response_for_unsafe_output(
142156
):
143157
# given
144158
user_message = "question"
145-
chat_output = {"chat_output": "bad-response", "citations": ["", []]}
159+
chat_output = {"chat_output": "bad-response", "citations": {}}
146160
content_safety_response = [
147161
{
148162
"role": "tool",

infra/prompt-flow/create-prompt-flow.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ az account set --subscription "$subscription_id"
7575

7676
set +e
7777
tries=1
78-
pfazure flow create --subscription "$subscription_id" --resource-group "$resource_group" \
78+
poetry run pfazure flow create --subscription "$subscription_id" --resource-group "$resource_group" \
7979
--workspace-name "$aml_workspace" --flow "$flow_dir" --set type=chat
8080
while [ $? -ne 0 ]; do
8181
tries=$((tries+1))
@@ -86,7 +86,7 @@ while [ $? -ne 0 ]; do
8686

8787
echo "Failed to create flow, will retry in 30 seconds"
8888
sleep 30
89-
pfazure flow create --subscription "$subscription_id" --resource-group "$resource_group" \
89+
poetry run pfazure flow create --subscription "$subscription_id" --resource-group "$resource_group" \
9090
--workspace-name "$aml_workspace" --flow "$flow_dir" --set type=chat
9191
done
9292
set -e

infra/prompt-flow/cwyd/answer_output.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

infra/prompt-flow/cwyd/citation_output.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

infra/prompt-flow/cwyd/flow.dag.template.yaml

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ inputs:
1515
outputs:
1616
chat_output:
1717
type: string
18-
reference: ${answer_output.output}
18+
reference: ${chat_with_context.output}
1919
is_chat_output: true
2020
citations:
2121
type: string
22-
reference: ${output_parser.output}
22+
reference: ${generate_prompt_context.output}
2323
nodes:
2424
- name: lookup
2525
type: python
@@ -95,31 +95,6 @@ nodes:
9595
api: chat
9696
module: promptflow.tools.aoai
9797
use_variants: false
98-
- name: output_parser
99-
type: python
100-
source:
101-
type: code
102-
path: output_parser.py
103-
inputs:
104-
answer: ${chat_with_context.output}
105-
sources: ${generate_prompt_context.output}
106-
use_variants: false
107-
- name: answer_output
108-
type: python
109-
source:
110-
type: code
111-
path: answer_output.py
112-
inputs:
113-
output: ${output_parser.output}
114-
use_variants: false
115-
- name: citation_output
116-
type: python
117-
source:
118-
type: code
119-
path: citation_output.py
120-
inputs:
121-
output: ${output_parser.output}
122-
use_variants: false
12398
node_variants: {}
12499
environment:
125100
python_requirements_txt: requirements.txt

infra/prompt-flow/cwyd/generate_prompt_context.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
@tool
77
def generate_prompt_context(search_result: List[dict]) -> str:
8-
retrieved_docs = []
8+
retrieved_docs = {}
99
for index, item in enumerate(search_result):
1010

1111
entity = SearchResultEntity.from_dict(item)
@@ -14,14 +14,10 @@ def generate_prompt_context(search_result: List[dict]) -> str:
1414
filepath = additional_fields.get("source")
1515
chunk_id = additional_fields.get("chunk_id", additional_fields.get("chunk", ""))
1616

17-
retrieved_docs.append(
18-
{
19-
f"[doc{index+1}]": {
20-
"content": content,
21-
"filepath": filepath,
22-
"chunk_id": chunk_id,
23-
}
24-
}
25-
)
17+
retrieved_docs[f"[doc{index+1}]"] = {
18+
"content": content,
19+
"filepath": filepath,
20+
"chunk_id": chunk_id,
21+
}
2622

27-
return {"retrieved_documents": retrieved_docs}
23+
return retrieved_docs

infra/prompt-flow/cwyd/output_parser.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

0 commit comments

Comments
 (0)