Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
.idea/
.vscode/
__pycache__
.DS_Store
.mypy_cache
.pytest_cache
bin/
dist/
lib/
output/
token.bin
pip-wheel-metadata/
lib64/
lib64
pyvenv.cfg
simple_ado.egg-info/
coverage.xml
htmlcov
.coverage
test-results.xml
.env
.env
114 changes: 114 additions & 0 deletions auth_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import os
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file should be in the simple_ado package. It should probably be something like device_flow_helper to be as explicit as possible.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with that change. Would we consider leaving it as auth_helper as it could potentially be the base for other authentication flows as well?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't hugely mind the name, but I don't think it should be exposed to the users either way. I'd rather see two constructors for ADOClient, one which takes a PAT, one which takes a Tenant ID, App ID, etc. and calls into this class.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you take the pull request as is and then I can look to make that change for the future?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that's the right move. Temporary fixes like this shouldn't happen. If we merge and someone ends up depending on it, then fixing it would break things for them. We should do it right from the start.

import pathlib
import json
from typing import Optional, Union

import msal
import logging


class AuthError(RuntimeError):

"""Handle Auth Errors when attempting device_flow or recovery from cached token.
:param result: The result of the failed attempt
"""

result: Union[dict[str, Optional[int]], dict, dict[str, str], dict[str, Union[int, dict[str, str], str]], None]

def __init__(self, result: Union[
dict[str, Optional[int]],
dict,
dict[str, str],
dict[str, Union[int, dict[str, str], str]],
None
]
) -> None:
super().__init__()
self.result = result

def __str__(self) -> str:
"""Generate and return the string representation of the object.
:return: A string representation of the object
"""
if not self.result.get("error_message"):
return f"AuthError: Fatal error in authentication - {self.result.__str__()}"
else:
return f"AuthError: Fatal error in authentication - {self.result.get('error_message')}"


class AuthHelper:
def __init__(self, scope: Optional[str], app_id: Optional[str], log: Optional[logging.Logger]):
if not scope:
self.scope = os.environ["scope"]
else:
self.scope = scope
if not app_id:
self.app_id = os.environ["appId"]
else:
self.app_id = app_id
if not log:
self.log = logging.getLogger("ado")
self.log.getChild("AuthHelper")
else:
self.log = log
self.log.getChild("AuthHelper")

def device_flow_auth(self) -> \
Union[
dict[str, Optional[int]],
dict,
dict[str, str],
dict[str, Union[int, dict[str, str], str]],
None
]:
mscache = msal.SerializableTokenCache()
output = pathlib.Path.home() / pathlib.Path("tokens") / pathlib.Path("token.bin")
if os.path.exists(output):
self.log.debug("Deserializing cached credentials.")
mscache.deserialize(open(output, "r", encoding="utf-8").read())

app = msal.PublicClientApplication(
client_id=self.app_id,
token_cache=mscache
)
accounts = app.get_accounts()
if accounts:
result = app.acquire_token_silent(
scopes=[self.scope],
account=accounts[0]
)
if mscache.has_state_changed:
with open(output, "w", encoding='utf-8') as cache_file:
self.log.debug("Caching credentials.")
cache_file.write(mscache.serialize())
cache_file.close()
if result is not None:
if "access_token" in result:
self.log.debug("Found access token.")
return result
else:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary else.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The else should be handling the case when the access_token isn't returned in the result, which indicates an error.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The if returns a result, so you don't need the else. You can just straight up raise the AuthError

raise AuthError(result=result)
self.log.debug("Initiating device flow.")
flow = app.initiate_device_flow(scopes=[self.scope])
if "user_code" not in flow:
raise ValueError("User code not in result. Error: %s" % json.dumps(flow, indent=5))
print(flow["message"]) # Think this one needs to stay as a print so user sees the prompt
result = app.acquire_token_by_device_flow(flow)
if "access_token" in result:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Invert this condition for early exit.

self.log.debug("Access token acquired.")
if not pathlib.Path.is_dir(output.parent):
pathlib.Path.mkdir(output.parent)
if not pathlib.Path.exists(output):
with open(output, "w", encoding='utf-8') as cache_file:
self.log.debug("Writing cached credentials.")
cache_file.write(mscache.serialize())
cache_file.close()
else:
with open(output, "w+", encoding='utf-8') as cache_file:
self.log.debug("Writing cached credentials.")
cache_file.write(mscache.serialize())
cache_file.close()
return result
else:
self.log.debug("No access token found.")
raise AuthError(result)
76 changes: 76 additions & 0 deletions device_flow_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be in a test class.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, however, I think that's more of a bit of a lift for me to get sorted out on how you are handling tests. If you can take the commit of this as-is for this piece, I can commit to moving it to a test.

import pathlib
import sys
import simple_ado
import argparse
import logging
from auth_helper import AuthHelper, AuthError
from simple_ado import ADOException
from simple_ado.exceptions import ADOHTTPException

def main():
logging.basicConfig(level=logging.DEBUG,handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger("ado.device_flow_test")
app_id = os.environ["appId"]
scope = os.environ["scope"]
project_id = os.environ["SIMPLE_ADO_PROJECT_ID"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are these coming from?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean where are the os environment variables coming from? I'm not following.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. I'd like to see a test that's reproducible.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... ok, so the challenge is I'm not sure what I can share there. All of my tests are using values I've setup for internal usage at Microsoft. Getting this to work for project/repo/org etc. should be easy enough, but the authentication layer requires appId and a variety of other values that I'm not sure can be publicly provided. Let me look into that... are you by chance Microsoft-based? We could discuss this in a teams chat and I could show you there. Then we can figure out how we want to address it for the public.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I think I can do this, based on conversations I've had internally, nothing of these are considered secrets. If you can approve PR, I can re-pull, move this all to a test case, and add it there.

repo_id = os.environ["SIMPLE_ADO_REPO_ID"]
tenant = os.environ["SIMPLE_ADO_TENANT"]
output = pathlib.Path.home() / pathlib.Path(outputDir) / pathlib.Path(repo_id + ".zip")
if not pathlib.Path.is_dir(pathlib.Path(output).parent):
pathlib.Path.mkdir(pathlib.Path(output).parent)
logger.debug("* Fetching the repo: " + repoUrlStr)

try:
ah = AuthHelper(scope=scope,app_id=app_id,log=logger)
token = ah.device_flow_auth()

logger.debug("** Setting up ADOHTTPClient with " + tenant)
http = simple_ado.http_client.ADOHTTPClient(tenant=tenant,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't be constructing anything other than ADOClient for public use. It's fine for tests, but we need to be able to ensure it works in real world situations.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can commit to working on that in next iteration if that's ok... I can move this whole thing to a test case in your tests folder and address it there.

credentials=token,
user_agent="test",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use a valid user agent. We want to be respectful when using the service.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have one in mind? Or just leave it unset so we get some kind of default?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something like simple_ado/tests would be fine.

log = logger
)
git_client = simple_ado.git.ADOGitClient(http_client=http, log=logger)
logger.debug("** Getting Repository: " + repo_id + " from " + project_id)
repo = git_client.get_repository(repository_id=repo_id, project_id=project_id)
branch = repo["defaultBranch"]
branch = branch.split("/")[-1]
zip = git_client.download_zip(output_path=output, repository_id=repo["id"], branch=branch, project_id=project_id)
logger.debug("Completed.")
except ADOHTTPException as e:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In a test you want to re-throw these exceptions (or don't even catch them)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will address.

logger.critical("ADOHTTPException " + str(e.response.status_code) + " on: ")
logger.critical("e.message = " + e.message)
logger.critical("e.response = " + str(e.response.content))
logger.critical("e.request.url = " + e.response.request.url + " path: " + e.response.request.path_url)
if e.response.request.body:
logger.critical("e.request.body = " + e.response.request.body)
except ADOException as e:
if "The output path already exists" in str(e):
logger.debug("Skipping for " + repoUrlStr + " it already exists.")
pass
else:
logger.critical("ADOException " + str(e) + " on: ")
except AuthError as e:
logger.debug(str(e))

if __name__=="__main__":
parser = argparse.ArgumentParser(
prog = 'python progress_callback_test.py',
description = '''Notes.''',
epilog = '''Notes.'''
)
parser.add_argument('-r','--repo',type=str,dest='repoUrlStr',help="REQUIRED: Output directory.")
parser.add_argument('-o','--output',type=str,dest='output',help="REQUIRED: Output directory.")
parser.add_argument('-p','--progress',action='store_true',dest='progress',help="Turn on callbacks to show progress of long-running operations.")
args = parser.parse_args()
if not args.output:
print("\n\n*** No output directory provided. See help below. ***\n\n")
parser.print_help()
exit(1)
else:
outputDir = args.output
progress = args.progress
repoUrlStr = args.repoUrlStr
main()

2 changes: 1 addition & 1 deletion simple_ado/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import enum
import logging
import os
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Callable
import urllib.parse

from simple_ado.base_client import ADOBaseClient
Expand Down
37 changes: 27 additions & 10 deletions simple_ado/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from simple_ado.exceptions import ADOException, ADOHTTPException
from simple_ado.models import PatchOperation


# pylint: disable=invalid-name
ADOThread = Dict[str, Any]
ADOResponse = Any
Expand Down Expand Up @@ -59,15 +58,15 @@ class ADOHTTPClient:
log: logging.Logger
tenant: str
extra_headers: Dict[str, str]
credentials: Tuple[str, str]
credentials = None # Modify away from tuple to expand for device_flow
_not_before: Optional[datetime.datetime]
_session: requests.Session

def __init__(
self,
*,
tenant: str,
credentials: Tuple[str, str],
credentials: None, # Modify away from tuple to support device_flow
user_agent: str,
log: logging.Logger,
extra_headers: Optional[Dict[str, str]] = None,
Expand All @@ -78,6 +77,7 @@ def __init__(

self.tenant = tenant
self.credentials = credentials

self._not_before = None

self._session = requests.Session()
Expand Down Expand Up @@ -200,12 +200,29 @@ def get(
:returns: The raw response object from the API
"""
self._wait()

headers = self.construct_headers(additional_headers=additional_headers)
response = self._session.get(
request_url, auth=self.credentials, headers=headers, stream=stream
)

response = None
# Modified to support PAT-based Authentication
if isinstance(self.credentials, tuple) and len(self.credentials) == 2:
headers = self.construct_headers(additional_headers=additional_headers)
response = self._session.get(
request_url, auth=self.credentials, headers=headers, stream=stream
)
# Since requests does not support BearerAuth, this is a hacky way to do it.
elif isinstance(self.credentials, dict) and len(self.credentials) == 3:
if additional_headers:
additional_headers['Authorization'] = 'Bearer ' + self.credentials["access_token"]
else:
additional_headers = {"Authorization":"Bearer " + self.credentials["access_token"]}
headers = self.construct_headers(additional_headers=additional_headers)
response = self._session.get(
request_url, auth=None, headers=headers, stream=stream
)
else:
self.log.critical("len(self.credentials) == " + str(len(self.credentials)))
self.log.critical("type(self.credentials) == " + str(type(self.credentials)))
raise ValueError("Unknown authentication type. Modify simple_ado/http_client.py to support.")
if not response:
raise ADOHTTPException("Unable to get a response from authentication phase. See simple_ado/http_client.py.")
self._track_rate_limit(response)

if response.status_code == 429:
Expand Down Expand Up @@ -448,4 +465,4 @@ def construct_headers(
for header_name, header_value in additional_headers.items():
headers[header_name] = header_value

return headers
return headers
32 changes: 16 additions & 16 deletions simple_ado/security.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,22 @@
class ADOBranchPermission(enum.IntEnum):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes should be undone.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Believe these came from running the style checks.

"""Possible types of git branch permissions."""

ADMINISTER: int = 2 ** 0
READ: int = 2 ** 1
CONTRIBUTE: int = 2 ** 2
FORCE_PUSH: int = 2 ** 3
CREATE_BRANCH: int = 2 ** 4
CREATE_TAG: int = 2 ** 5
MANAGE_NOTES: int = 2 ** 6
BYPASS_PUSH_POLICIES: int = 2 ** 7
CREATE_REPOSITORY: int = 2 ** 8
DELETE_REPOSITORY: int = 2 ** 9
RENAME_REPOSITORY: int = 2 ** 10
EDIT_POLICIES: int = 2 ** 11
REMOVE_OTHERS_LOCKS: int = 2 ** 12
MANAGE_PERMISSIONS: int = 2 ** 13
CONTRIBUTE_TO_PULL_REQUESTS: int = 2 ** 14
BYPASS_PULL_REQUEST_POLICIES: int = 2 ** 15
ADMINISTER: int = 2**0
READ: int = 2**1
CONTRIBUTE: int = 2**2
FORCE_PUSH: int = 2**3
CREATE_BRANCH: int = 2**4
CREATE_TAG: int = 2**5
MANAGE_NOTES: int = 2**6
BYPASS_PUSH_POLICIES: int = 2**7
CREATE_REPOSITORY: int = 2**8
DELETE_REPOSITORY: int = 2**9
RENAME_REPOSITORY: int = 2**10
EDIT_POLICIES: int = 2**11
REMOVE_OTHERS_LOCKS: int = 2**12
MANAGE_PERMISSIONS: int = 2**13
CONTRIBUTE_TO_PULL_REQUESTS: int = 2**14
BYPASS_PULL_REQUEST_POLICIES: int = 2**15


class ADOBranchPermissionLevel(enum.IntEnum):
Expand Down